
    }$                         S r SSKrSSKrSSKJrJr  SSKJr  SSKrSSK	J
r
  SSKJr  SSKJr  \" \5      rS rS	 rS
 rS rS rS r\S:X  a  \" 5         gg)z%
Convert old style SBCS model to new
    N)ArgumentDefaultsHelpFormatterArgumentParser)ascii_letters)__version__)	LANGUAGES)SingleByteCharSetModelc                 J    U R                  5       n U R                  SS5      n U $ )z-Convert name to proper Python constant format-_)upperreplace)charset_names    1lib/third_party/chardet/convert_language_model.pynormalize_namer   +   s*      %%'L''S1L    c           
      r    [        [        U S   5      5      nU S   nU S   n[        U S   U S   USUUUS9nU$ )z@Create a SingleByteCharSetModel object representing the charset.char_to_order_maptypical_positive_ratiokeep_english_letterr   languageN)r   r   r   language_modelr   keep_ascii_lettersalphabet)dict	enumerater   )	old_modelr   char_to_order	pos_ratior   
curr_models         r   convert_sbcs_modelr    4   sa     9-@#ABCM23I"#89'~.:&'(-	J r   c           	         [        U  S3US9  [        UR                  5       5       HB  u  pE[        [	        U45      5      n UR                  U5      n[        SU< SU< SU< 3US9  MD     [        SUS9  g ! [         a    S n N3f = f)N = {filez     : ,  # }
)printsorteditemsbytes	bytearraydecodeUnicodeError)var_name	order_mapr   output_filecharorder
char_bytesunicode_chars           r   print_char_to_orderr6   H   s    	XJe
;/ioo/09dW-.
	 %,,\:L 	dXRyl-=>[Q 1 
%k"  	 L	 s   A88BBc           
      d   [        SUS9  [        U  S3US9  [        UR                  5       5       Hr  u  pEU(       a  XC;  a  M  [        SX4   < SU< 3US9  [        UR                  5       5       H$  u  pgXc;  a  M  [        SX6   < SU< SU< 3US9  M&     [        S	US9  Mt     [        S
US9  g )Nz6# 3: Positive
# 2: Likely
# 1: Unlikely
# 0: Negative
r#   r"   z    z: {  # z        r%   r&   z    },r'   )r(   r)   r*   )r/   r   r1   
char_ranks
first_charsub_dictsecond_char
likelihoods           r   print_language_modelr=   T   s    	D 
XJe
;/ &~';';'= >
:7Z+.hznEKX'-hnn.>'?#K,:25R
~U/#  (@ 	h[) !? 
%k"r   c           
         U R                  5       n [        R                  " U 5      nU(       d  [        SU  S35      eSU R	                  5        S3n[
        R                  R                  [
        R                  R                  SUS-   5      5      (       d  [        SU  S35        g	[        [        U5      n[        S
U  S35        [        SUR                   35        [        SUR                   35        0 n0 n0 n[        U5       H  nSU;   a  SU;  d  M  [        X75      nUS   n	[        SU	 35        [        R                   R#                  5         [%        XR                  5      XI'   XI   R&                  R)                  5        HY  u  pUS:  a  M  [+        [-        U
45      5      n UR/                  U	5      nX;  a
  XU'   XU'   MC  X]   U:w  d  MM  [        SU 35      e   M     [        X0R                  5        S35      n0 n[3        SS5       HF  nUU;  a  M  UU   n0 UU'   [3        SS5       H!  nUU;  a  M  UU   nUUS-  U-      UU   U'   M#     MH     [        SU  S35        [        R                   R#                  5         [5        SU R	                  5        S3SSS9 nU R7                  5       n[        SUS9  U S3n[9        UUUU5        [        SUS9  UR)                  5        H  u  n	n[;        U	5      nU SU S 3n[=        UUR&                  U	U5        U SU S!3nUR&                  R?                  5         [A        U5      RC                  S"U5      RC                  S#U5      RC                  S$S%S&[E        U5      S'-   -  -   5      n[        U S(U S
3US9  M     S	S	S	5        g	! [0         a     GM2  f = f! , (       d  f       g	= f))z:Convert old SingleByteCharSetModels for the given languagezUnknown language: z[. If you are adding a model for a new language, you must first update metadata/languages.pylangmodelchardetz.pyz	Skipping z' because it does not have an old model.N
zA
----------------------------------------------------------------zKeep ASCII Letters: z
Alphabet: Model	LangModelr   zConverting charset model for @   zUnstable character ranking for    zWriting output file for z

zmodel.pywzutf-8)encodingz<from chardet.sbcharsetprober import SingleByteCharSetModel

r#   _LANG_MODELz# 255: Undefined characters that did not exist in training text
# 254: Carriage/Return
# 253: symbol (punctuation) that does not belong to word
# 252: 0 - 9
# 251: Control characters

# Character Mapping Table(s):r   _CHAR_TO_ORDER_MODELNonez{}z, z,
    z = )#titler   get
ValueErrorlowerospathexistsjoinr(   getattrrA   	use_asciir   dirsysstdoutflushr    r   r*   r+   r,   r-   UnicodeDecodeErrorrangeopenr   r=   r   r6   clearreprr   len)r   lang_metadatalang_mod_namelang_modcharset_modelsr8   order_to_charsr/   r   r   byte_hexr3   r2   r5   old_lang_modelr   i	lang_charj
lang_char2r1   
upper_langlm_name
sbcs_modelnormal_namechar_to_order_namesbcs_model_namesbcs_model_reprs                               r   convert_models_for_langru   k   s    ~~HMM(+M 
 + 
 	

 8>>+,E2M77>>"'',,y-%2GHII	(#JKLw.H	
XJXY 
 !8!8 9
:;	J}--.
/0 NJNM8#8(CH/	 0-l^<=

'9--(
$  .;MMSSUOH rzH;/0D#{{<8 -+0<((4u%)U2 #B<.!QRR  V "> X..*:);9'EFNN1b\N""1%	$&y!q"A&'*J4BAFa<4PN9%j1	   
$XJd
34JJ	X^^%&h/w	G;^^%
L	

  L,Wnk:N, 	
 )7(<(<(>$L*(6K$/=*^!L",,	 "-Qzl&AO((..0Z )12s?/Cb/H(I IK	  _%S(9<;O% )?) 
H	G3 & 2 
H	Gs   )N*%C<N<*
N98N9<
O
c                  J   [        [        [        S9n U R                  SSSS9  U R                  SS[        S9  U R                  5       nUR                  (       d,  [        [        [        R                  " 5       5      5      Ul        UR                   H  n[        U5        M     g )	N)descriptionformatter_classr   zThe name of the language the input documents are in. Also the name of the language the generated model will detect. If no language is specified, models for all languages known to chardet will be trained.*)helpnargsz	--versionversion)actionr|   )r   __doc__r   add_argumentr   
parse_argsr   listr)   r   keysru   )parserargsr   s      r   mainr      s    -JF 
    I{KD==VINN$456MM) "r   __main__)r~   rS   rZ   argparser   r   stringr   rA   chardet.versionr   chardet.metadata.languagesr   chardet.sbcharsetproberr   setr   r    r6   r=   ru   r   __name__ r   r   <module>r      se   2 
 
 B    ' 0 : M"(	##.pPf*. zF r   