
                            S r SSKrSSKJr  SSKJr  SSKJrJrJ	r	J
r
JrJr  SSKJr  SSKJr   SSKJr  SSKJrJrJrJr  S	rSSKrSSKrSSKJr  1 Skr1 Skr S r!\RD                  RG                  S\!" 5       5      S 5       r$\RD                  RG                  S\!" 5       5      S 5       r%\(       a   " S S\&5      r'\RD                  RP                  \" \RR                  " SS9\RT                  " / SQ5      \RV                  " 5       5      \" SS9S 5       5       5       r,\" \RR                  " SS9\RT                  " / SQ5      \RV                  " 5       5      \" SS9S 5       5       r-gg! \ a    S
r GN/f = f)z~
Run chardet on a bunch of documents and see that we get the correct encodings.

:author: Dan Blanchard
:author: Ian Cordasco
    N)ndiff)listdir)dirnameisdirjoinrealpathrelpathsplitext)pformat)	normalize)	VerbosityassumegivensettingsTF)	LANGUAGES>   
iso-8859-2
iso-8859-6windows-1250windows-1254windows-1256>   #tests/iso-8859-9-turkish/_ude_1.txt#tests/iso-8859-9-turkish/_ude_2.txt%tests/iso-8859-9-turkish/subtitle.srt+tests/iso-8859-9-turkish/divxplanet.com.xml2tests/iso-8859-9-turkish/wikitop_tr_ISO-8859-9.txtc            	   #     #    [        [        [        [        [        5      5      S5      5      n [        U 5       GH!  n[        X5      n[        U5      (       d  M!  UR                  5       n[        [        R                  " 5       5       HB  nSUR                  5       -   nUR                  U5      (       d  M.  UR                  U5      S   n  O   U[        ;   a  M  [        U5       Hq  n[        U5      S   R                  5       nUS;  a  M'  [        X%5      nXq4nU[        ;   a.  [         R"                  " US[         R$                  R&                  06nUv   Ms     GM$     g7f)zGYields tuples of paths and encodings to use for test_encoding_detectiontests-r      )z.htmlz.txtz.xmlz.srtmarksN)r	   r   r   r   __file__r   r   lowersortedr   keysendswith
rpartitionMISSING_ENCODINGSr
   EXPECTED_FAILURESpytestparammarkxfail)		base_pathencodingpathlanguagepostfix	file_nameext	full_path	test_cases	            +platform/gsutil/third_party/chardet/test.pygen_test_paramsr7   .   s    WXh%78'BCII&I(T{{>>#y~~/0HHNN,,G  ))#..w7:	 1 (( I9%a(..0C;;T-I!+I--"LL)M6;;;L;LM	O '! 's   B0E 6B*E zfile_name, encodingc                    [        U S5       nUR                  5       n[        R                  " U5      n UR	                  U5      n UR	                  US   5      nS S S 5        W(       a   US   =(       d    SR                  5       U:H  nOSn[        SW5      n[        SW5      nU(       d  XV:w  a  SR                  [        R                  " US5      5      S-   nSR                  [        R                  " US5      5      S-   n	SR                  [        UR                  S5      U	R                  S5      5       V
s/ s H  n
U
R                  S	5      (       a  M  U
PM     sn
S S
 5      n[        R                   " WSS9nOSnSnU/nU(       d   SU SU SU  SU S[#        U5       3
5       eg ! [
         a    Sn GNtf = f! [
        [        [        4 a    Sn GN}f = f! , (       d  f       GN= fs  sn
f )Nrb r.   FNFKC
d   T    )ignore_threshold	Expected 
, but got  for z/.  First 20 lines with character differences: 

All encodings: openreadchardetdetectdecodeLookupErrorUnicodeDecodeError	TypeErrorr"   r   r   textwrapwrapr   
splitlines
startswith
detect_allr   r2   r.   finput_bytesresultexpected_unicodedetected_unicodeencoding_matchwrapped_expectedwrapped_detectedlinediffall_encodingss                r6   test_encoding_detectionr_   L   s   	i	!ffh,	"*11(;	"*11&2DE 
  ,299;xG !)9: )9:.B99X]]3CS%IJTQ99X]]3CS%IJTQww "$//57G7R7RSW7XD s+	  r
  **;N 
H:ZxuYK @//3f 5!-01	3>?  	"!	" /; 	"!	" 
	.sR   'GF+F>9G.G.+F;7G:F;;G>GGGG
G+c                    [        U S5       nUR                  5       n[        R                  " USS9n UR	                  U5      n UR	                  US   5      nS S S 5        W(       a   US   =(       d    SR                  5       U:H  nOSn[        SW5      n[        SW5      nU(       d  XV:w  a  SR                  [        R                  " US	5      5      S-   nSR                  [        R                  " US	5      5      S-   n	SR                  [        UR                  S5      U	R                  S5      5       V
s/ s H  n
U
R                  S
5      (       a  M  U
PM     sn
S S 5      n[        R                   " WSSS9nOSnSnU/nU(       d   SU SU SU  SU S[#        U5       3
5       eg ! [
         a    Sn GNuf = f! [
        [        [        4 a    Sn GN~f = f! , (       d  f       GN= fs  sn
f )Nr9   T)should_rename_legacyr:   r.   FNFKDr<   r=   r>   r?   )r@   ra   rA   rB   rC   z-.  First 20 lines of character differences: 
rD   rE   rS   s                r6   %test_encoding_detection_rename_legacyrc   y   s	   	i	!ffh$G	"*11(;	"*11&2DE 
  ,299;xG !)9: )9:.B99X]]3CS%IJTQ99X]]3CS%IJTQww "$//57G7R7RSW7XD s+	  r
  **$T
  
H:ZxuYK @--1F 3!-01	3>C  	"!	" /; 	"!	" 
	.sR   &GF+F>8G.G.+F;7G:F;;G>GGGG
G+c                       \ rS rSrSrg)JustALengthIssue    N)__name__
__module____qualname____firstlineno____static_attributes__rg       r6   re   re      s    rm   re   r   )min_size)asciizutf-8zutf-16zutf-32z
iso-8859-7z
iso-8859-8zwindows-1255   )max_examplesc                   ^ ^  T R                  T5      n[        R                  " W5      S   nUcf  [
        R                  " [        5         [        [        R                  " 5       US9[        [        R                  SS9UU 4S j5       5       nS S S 5        g g ! [         a    [        S5         Nf = f! , (       d  f       g = f)NFr.   )random2   )	verbosityrq   c                    >  TU -   R                  T5      n[        R                  " W5      nU(       a  US   b
  [        5       eg g ! [         a    [        S5         NFf = f)NFr.   )encodeUnicodeEncodeErrorr   rH   rI   re   )suffixextendedrV   enctxts      r6   string_poisons_following_text^test_never_fails_to_detect_if_there_is_a_valid_encoding.<locals>.string_poisons_following_text   sb    &$'&L#8#8#= %^^H5F&"4"@.00 #Av . &u&s   A AA)rw   rx   r   rH   rI   r)   raisesre   r   sttextr   r   quiet)r|   r{   rnddatadetectedr}   s   ``    r6   7test_never_fails_to_detect_if_there_is_a_valid_encodingr      s    $	::c?D >>$'
3/0rwwy-IOO"E1 F .1	 10  " 	5M	 10s   B AB3B0/B03
Cc                      U R                  U5      n [        R                  " W5      n[        R
                  " U5      nUS   US   S   :X  d   eg ! [         a    [        S5         NVf = f! [         a  n[        W SW 35      UeS nAff = f)NFr.   r   z != )rw   rx   r   rH   rI   rR   	ExceptionRuntimeError)r|   r{   _r   rV   resultsexcs          r6   +test_detect_all_and_detect_one_should_agreer      s    "	::c?D	B^^D)F((.G*%J)???? " 	5M	  	B&gY78cA	Bs(   A =A- A*)A*-
B7BB).__doc__rN   difflibr   osr   os.pathr   r   r   r   r	   r
   pprintr   unicodedatar   hypothesis.strategies
strategiesr   
hypothesisr   r   r   r   HAVE_HYPOTHESISImportErrorr)   rH   chardet.metadata.languagesr   r'   r(   r7   r+   parametrizer_   rc   r   re   r,   r   sampled_fromrandomsr   r   rg   rm   r6   <module>r      s      E E  !&==O   0  < .0AB) C)X .0AB+ C+\ 9  [[



	
 	

 31   "1( 


	
 	

 3
B   
Bu g  Os   E( (E43E4