
    $                         S r SSKrSSKrSSKrSSKJr  SSKJr  SSKJ	r	J
r
JrJrJrJr  SSKr SSKrSr1 Skr1 S	krS
 rS r\SS4S jrS r\S:X  a  \" 5         gg! \ a    Sr N2f = f)z~
Run chardet on a bunch of documents and see that we get the correct encodings.

:author: Dan Blanchard
:author: Ian Cordasco
    N)defaultdict)listdir)dirnameisdirjoinrealpathrelpathsplitextTF>   
iso-8859-2
iso-8859-6windows-1250windows-1254windows-1256>   %tests/iso-8859-9-turkish/subtitle.srt&tests/iso-8859-7-greek/disabled.gr.xml+tests/iso-8859-9-turkish/divxplanet.com.xml2tests/iso-8859-9-turkish/wikitop_tr_ISO-8859-9.txtc                      [        [        S5      (       a  Sn U $ [        R                  R                  S5      (       a  Sn U $ [        R                  S:X  a  Sn U $ Sn U $ )z"Return what kind of Python this ispypy_version_infoPyPyjavaJythoncli
IronPythonCPython)hasattrsysplatform
startswith)pyimpls    ,platform/gsutil/third_party/chardet/bench.pyget_py_implr"   +   se    s'(( M 
	 	 	(	(
 M	 
	 M M    c            	   #     #    [        [        [        [        [        5      5      S5      5      n [        U 5       H  n[        X5      n[        U5      (       d  M   UR                  5       nS H/  nUR                  U5      (       d  M  UR                  U5      S   n  O   U[        ;   a  Mq  [        U5       HC  n[        U5      S   R                  5       nUS;  a  M'  [        X$5      nU[        ;   a  M>  Xa4v   ME     M     g7f)z1Yields filenames to use for timing chardet.detecttests)z-arabicz
-bulgarianz	-cyrillicz-greekz-hebrewz
-hungarianz-turkishr      )z.htmlz.txtz.xmlz.srtN)r	   r   r   r   __file__r   r   lowerendswith
rpartitionMISSING_ENCODINGSr
   EXPECTED_FAILURES)	base_pathencodingpathpostfix	file_nameext	full_paths          r!   get_test_filesr4   8   s     WXh%78'BCII&I(T{{>>#
G   ))#..w7:
 (( I9%a(..0C;;T-I--%% '/ 's   B DA;D
   c           
         [        SU R                   SU R                   S[        5        S[        R
                   35        [        S5        SnSn[        [        5      n[        [        5      n[        5        H  u  pxUS-  n[        US5       n	U	R                  5       n
S S S 5        [        R                  " 5       n[        U5       H  nU R                  W
5        M     [        R                  " 5       U-
  nU(       a  [        SU S	X-   S
35        O([        SSS9  [        R                  R!                  5         X=-  nXX==   U-  ss'   Xh==   S-  ss'   M     [        S5        [#        UR%                  5       5       H  nX&U   -  XX   -  n[        U S	U 35        M!     X$-  U-  n[        SU SU S35        g ! , (       d  f       GN$= f)NzBenchmarking  z on zP--------------------------------------------------------------------------------r   r&   rbzAverage time for z: s. )endz$
Calls per second for each encoding:z
Total time: zs (z calls per second))print__name____version__r"   r   versionr   floatintr4   openreadtimerangedetectstdoutflushsortedkeys)chardet_modverbose	num_iters
total_time	num_filesencoding_timesencoding_num_filesr3   r.   finput_bytesstart_
bench_timecalls_per_secs                  r!   	benchmarkrY   \   s   	
,,-Q{/F/F.G Hm_Ackk]	, 
(OJI 'N$S)-/	Q	)T"a&&(K #		y!A{+ "YY[5(
%i[:3I2J!LM#2JJ 
 J. $)$  0" 

12>..01844~7OO 	 	
"]O,-	 2
 )J6M	N:,c-8J
KL- #"s   F==
G	c                     [         R                  " S[         R                  S9n U R                  SSSSS9  U R                  SS	S
[        SS9  U R                  SSSSS9  U R                  5       nUR                  (       a,  [        (       d!  [        S5        [        R                  " S5        [        UR                  (       a  [        O[        UR                  UR                  S9  g )NzHTimes how long it takes to process each file in test set multiple times.)descriptionformatter_classz-cz
--cchardet
store_truezCRun benchmarks for cChardet instead of chardet, if it is installed.)actionhelpz-iz--iterationsz$Number of times to process each filer5   )r_   typedefaultz-vz	--verbosez/Prints out the timing for each individual file.)r_   r^   z:You must pip install cchardet if you want to benchmark it.r&   )rL   rM   rN   )argparseArgumentParserArgumentDefaultsHelpFormatteradd_argumentrB   
parse_argscchardetHAVE_CCHARDETr=   r   exitrY   chardetrM   
iterations)parserargss     r!   mainrn      s    $$ >>F
 R	   3   >	   D}}]]JK $H7//r#   __main__)__doc__rb   r   rE   collectionsr   osr   os.pathr   r   r   r   r	   r
   rj   rg   rh   	Exceptionr+   r,   r"   r4   rY   rn   r>    r#   r!   <module>rv      s     
  #  E E M  
!&H "5B "MJ#L zF c  Ms   A A)(A)