
    *4                         S SK r S SKJr  S SKJr  S SKJr  S SKJr  S SK	J
r
  S SKJrJrJrJrJrJrJr  SS	KJrJr  SS
KJr  SSKJrJrJr   " S S5      r " S S5      r\\\4   r \\    r! " S S5      r"g)    N)Counter)aliases)sha256)dumps)sub)AnyDictIteratorListOptionalTupleUnion   )NOT_PRINTABLE_PATTERNTOO_BIG_SEQUENCE)
mess_ratio)	iana_nameis_multi_byte_encodingunicode_rangec                      \ rS rSr S+S\S\S\S\SSS	\\   4S
 jjr	S\
S\4S jrS\
S\4S jr\S\4S j5       r\S\4S j5       r\S\4S j5       r\S\4S j5       rS\4S jrS\4S jrS,S jr\S\4S j5       r\S\\   4S j5       r\S\4S j5       r\S\4S j5       r\S\\   4S j5       r\S\4S j5       r\S\4S j5       r\S\4S j5       r\S\4S j5       r\S\4S j5       r \S\4S  j5       r!\S\S    4S! j5       r"\S\4S" j5       r#\S\\   4S# j5       r$\S\\   4S$ j5       r%S-S% jr&S-S& jr'S.S'\S\4S( jjr(\S\4S) j5       r)S*r*g)/CharsetMatch   Npayloadguessed_encodingmean_mess_ratiohas_sig_or_bom	languagesCoherenceMatchesdecoded_payloadc                     Xl         X l        X0l        XPl        X@l        S U l        / U l        SU l        S U l        S U l	        X`l
        g )N        )_payload	_encoding_mean_mess_ratio
_languages_has_sig_or_bom_unicode_ranges_leaves_mean_coherence_ratio_output_payload_output_encoding_string)selfr   r   r   r   r   r   s          4platform/bq/third_party/charset_normalizer/models.py__init__CharsetMatch.__init__   sK      ) /#-#%("# $&    otherreturnc                     [        U[        5      (       dA  [        SR                  [	        UR
                  5      [	        U R
                  5      5      5      eU R                  UR                  :H  =(       a    U R                  UR                  :H  $ )Nz&__eq__ cannot be invoked on {} and {}.)
isinstancer   	TypeErrorformatstr	__class__encodingfingerprintr-   r2   s     r.   __eq__CharsetMatch.__eq__)   sl    %..8??(#dnn*= 
 }}.X43C3CuGXGX3XXr1   c                    [        U[        5      (       d  [        e[        U R                  UR                  -
  5      n[        U R
                  UR
                  -
  5      nUS:  aX  US:  aR  US:X  a3  U R
                  UR
                  :X  a  U R                  UR                  :  $ U R
                  UR
                  :  $ U R                  UR                  :  $ )zA
Implemented to make sorted available upon CharsetMatches items.
g{Gz?g{Gz?r!   )r5   r   
ValueErrorabschaos	coherencemulti_byte_usage)r-   r2   chaos_differencecoherence_differences       r.   __lt__CharsetMatch.__lt__2   s     %..tzzEKK78"4>>EOO#CD d"';d'B3&4>>U__+L,,u/E/EEE>>EOO33zzEKK''r1   c                 \    S[        [        U 5      5      [        U R                  5      -  -
  $ )N      ?)lenr8   rawr-   s    r.   rD   CharsetMatch.multi_byte_usageE   s"    ST^c$((m333r1   c                 b    [         R                  " S[        5        [        [	        U 5      S5      $ )z
Check once again chaos in decoded text, except this time, with full content.
Use with caution, this can be very slow.
Notice: Will be removed in 3.0
z=chaos_secondary_pass is deprecated and will be removed in 3.0rJ   )warningswarnDeprecationWarningr   r8   rM   s    r.   chaos_secondary_pass!CharsetMatch.chaos_secondary_passI   s)     	K	
 #d)S))r1   c                 :    [         R                  " S[        5        g)za
Coherence ratio on the first non-latin language detected if ANY.
Notice: Will be removed in 3.0
z<coherence_non_latin is deprecated and will be removed in 3.0r!   )rP   rQ   rR   rM   s    r.   coherence_non_latin CharsetMatch.coherence_non_latinV   s     	J	
 r1   c                     [         R                  " S[        5        [        [        S[        U 5      R                  5       5      n[        UR                  5       5      $ )zG
Word counter instance on decoded text.
Notice: Will be removed in 3.0
z2w_counter is deprecated and will be removed in 3.0 )	rP   rQ   rR   r   r   r8   lowerr   split)r-   string_printable_onlys     r.   	w_counterCharsetMatch.w_counterb   sH     	@BT	
 !$$93D	@Q R,22455r1   c                     U R                   c&  [        U R                  U R                  S5      U l         U R                   $ )Nstrict)r,   r8   r"   r#   rM   s    r.   __str__CharsetMatch.__str__p   s.    <<t}}dnnhGDL||r1   c                 N    SR                  U R                  U R                  5      $ )Nz<CharsetMatch '{}' bytes({})>)r7   r:   r;   rM   s    r.   __repr__CharsetMatch.__repr__v   s    .55dmmTEUEUVVr1   c                     [        U[        5      (       a  X:X  a$  [        SR                  UR                  5      5      eS Ul        U R                  R                  U5        g )Nz;Unable to add instance <{}> as a submatch of a CharsetMatch)r5   r   r@   r7   r9   r,   r(   appendr<   s     r.   add_submatchCharsetMatch.add_submatchy   sP    %..%-MTTOO  E"r1   c                     U R                   $ N)r#   rM   s    r.   r:   CharsetMatch.encoding   s    ~~r1   c                     / n[         R                  " 5        HK  u  p#U R                  U:X  a  UR                  U5        M(  U R                  U:X  d  M:  UR                  U5        MM     U$ )zr
Encoding name are known by many name, using this could help when searching for IBM855 when it's listed as CP855.
)r   itemsr:   rg   )r-   also_known_asups       r.   encoding_aliasesCharsetMatch.encoding_aliases   sW    
 MMODA}}!$$Q'!#$$Q'	 $
 r1   c                     U R                   $ rk   r&   rM   s    r.   bomCharsetMatch.bom       ###r1   c                     U R                   $ rk   ru   rM   s    r.   byte_order_markCharsetMatch.byte_order_mark   rx   r1   c                 J    U R                    Vs/ s H  oS   PM	     sn$ s  snf )z
Return the complete list of possible languages found in decoded sequence.
Usually not really useful. Returned list may be empty even if 'language' property return something != 'Unknown'.
r   r%   )r-   es     r.   r   CharsetMatch.languages   s"     #oo.o!o...s    c                 ,   U R                   (       dr  SU R                  ;   a  gSSKJnJn  [        U R                  5      (       a  U" U R                  5      OU" U R                  5      n[        U5      S:X  d  SU;   a  gUS   $ U R                   S   S   $ )zz
Most probable language found in decoded sequence. If none were detected or inferred, the property will return
"Unknown".
asciiEnglishr   )encoding_languagesmb_encoding_languageszLatin BasedUnknown)r%   could_be_from_charsetcharset_normalizer.cdr   r   r   r:   rK   )r-   r   r   r   s       r.   languageCharsetMatch.language   s      $444  X *$--88 &dmm4'6  9~"my&@ Q<q!!$$r1   c                     U R                   $ rk   )r$   rM   s    r.   rB   CharsetMatch.chaos   s    $$$r1   c                 J    U R                   (       d  gU R                   S   S   $ )Nr!   r   r   r}   rM   s    r.   rC   CharsetMatch.coherence   s     q!!$$r1   c                 0    [        U R                  S-  SS9$ Nd      )ndigits)roundrB   rM   s    r.   percent_chaosCharsetMatch.percent_chaos   s    TZZ#%q11r1   c                 0    [        U R                  S-  SS9$ r   )r   rC   rM   s    r.   percent_coherenceCharsetMatch.percent_coherence   s    T^^c)155r1   c                     U R                   $ )z
Original untouched bytes.
)r"   rM   s    r.   rL   CharsetMatch.raw   s    
 }}r1   c                     U R                   $ rk   )r(   rM   s    r.   submatchCharsetMatch.submatch   s    ||r1   c                 2    [        U R                  5      S:  $ Nr   )rK   r(   rM   s    r.   has_submatchCharsetMatch.has_submatch   s    4<< 1$$r1   c                    U R                   b  U R                   $ [        U 5       Vs/ s H  n[        U5      PM     nn[        [	        U Vs1 s H  o3(       d  M  UiM     sn5      5      U l         U R                   $ s  snf s  snf rk   )r'   r8   r   sortedlist)r-   chardetected_rangesrs       r.   	alphabetsCharsetMatch.alphabets   s}    +''' -0I
,5DM$I 	 
  &d+L!!A+L&MN###
 ,Ms   A<
BBc                 t    U R                   /U R                   Vs/ s H  oR                  PM     sn-   $ s  snf )z
The complete list of encoding that output the exact SAME str result and therefore could be the originating
encoding.
This list does include the encoding available in property 'encoding'.
)r#   r(   r:   )r-   ms     r.   r   "CharsetMatch.could_be_from_charset   s.     t||"D|!::|"DDD"Ds   5c                     U $ z.
Kept for BC reasons. Will be removed in 3.0.
 rM   s    r.   firstCharsetMatch.first   	     r1   c                     U $ r   r   rM   s    r.   bestCharsetMatch.best   r   r1   r:   c                     U R                   b  U R                   U:w  a&  Xl         [        U 5      R                  US5      U l        U R                  $ )z
Method to get re-encoded bytes payload using given target encoding. Default to UTF-8.
Any errors will be simply ignored by the encoder NOT replaced.
replace)r+   r8   encoder*   )r-   r:   s     r.   outputCharsetMatch.output  sH    
   (D,A,AX,M$,!#&t9#3#3Hi#HD ###r1   c                 P    [        U R                  5       5      R                  5       $ )zg
Retrieve the unique SHA256 computed using the transformed (re-encoded) payload. Not the original one.
)r   r   	hexdigestrM   s    r.   r;   CharsetMatch.fingerprint  s    
 dkkm$..00r1   )r#   r&   r%   r(   r)   r$   r+   r*   r"   r,   r'   rk   )r2   r   r3   N)r3   r   )utf_8)+__name__
__module____qualname____firstlineno__bytesr8   floatboolr   r/   objectr=   rG   propertyrD   rS   rV   r   r]   ra   rd   rh   r:   r   rr   rv   rz   r   r   rB   rC   r   r   rL   r   r   r   r   r   r   r   r;   __static_attributes__r   r1   r.   r   r      s    *.'' ' 	'
 ' &' "#'2YF Yt Y(F (t (& 4% 4 4 
*e 
* 
* 	U 	 	 67 6 6 W# W	# #   
$s) 
 
 $T $ $ $ $ $ /49 / / %# % %6 %u % % %5 % %
 2u 2 2 65 6 6 U   $~.   %d % % 	$49 	$ 	$ EtCy E E	$s 	$ 	$ 1S 1 1r1   r   c                       \ rS rSrSrSS\\   4S jjrS\\   4S jr	S\
\\4   S\4S	 jrS\4S
 jrS\4S jrS\SS4S jrS\S   4S jrS\S   4S jrSrg)CharsetMatchesi  z
Container with every CharsetMatch items ordered by default from most probable to the less one.
Act like a list(iterable) but does not implements all related methods.
Nresultsc                 B    U(       a  [        U5      U l        g / U l        g rk   )r   _results)r-   r   s     r.   r/   CharsetMatches.__init__  s    +2wr1   r3   c              #   :   #    U R                    H  nUv   M	     g 7frk   r   )r-   results     r.   __iter__CharsetMatches.__iter__  s     mmFL $s   itemc                     [        U[        5      (       a  U R                  U   $ [        U[        5      (       a2  [	        US5      nU R                   H  nXR
                  ;   d  M  Us  $    [        e)z
Retrieve a single item either by its position or encoding name (alias may be used here).
Raise KeyError upon invalid index or encoding not present in results.
F)r5   intr   r8   r   r   KeyError)r-   r   r   s      r.   __getitem__CharsetMatches.__getitem__#  s_    
 dC  ==&&dC  T5)D--777!M ( r1   c                 ,    [        U R                  5      $ rk   rK   r   rM   s    r.   __len__CharsetMatches.__len__1  s    4==!!r1   c                 2    [        U R                  5      S:  $ r   r   rM   s    r.   __bool__CharsetMatches.__bool__4  s    4==!A%%r1   c                    [        U[        5      (       d-  [        SR                  [	        UR
                  5      5      5      e[        UR                  5      [        ::  a\  U R                   HL  nUR                  UR                  :X  d  M  UR                  UR                  :X  d  M;  UR                  U5          g   U R                  R                  U5        [        U R                  5      U l	        g)zf
Insert a single match. Will be inserted accordingly to preserve sort.
Can be inserted as a submatch.
z-Cannot append instance '{}' to CharsetMatchesN)r5   r   r@   r7   r8   r9   rK   rL   r   r   r;   rB   rh   rg   r   )r-   r   matchs      r.   rg   CharsetMatches.append7  s    
 $--?FF'  txx=,,$$(8(88U[[DJJ=V&&t, ' 	T"t}}-r1   r   c                 D    U R                   (       d  gU R                   S   $ )zA
Simply return the first match. Strict equivalent to matches[0].
Nr   r   rM   s    r.   r   CharsetMatches.bestK  s     }}}}Qr1   c                 "    U R                  5       $ )z@
Redundant method, call the method best(). Kept for BC reasons.
)r   rM   s    r.   r   CharsetMatches.firstS  s     yy{r1   r   rk   )r   r   r   r   __doc__r   r   r/   r
   r   r   r   r8   r   r   r   r   rg   r   r   r   r   r   r1   r.   r   r     s    
;\ 2 ;(<0 c3h L " "&$ &.< .D .( h~.  x/ r1   r   c                       \ rS rSrS\S\\   S\\   S\\   S\S\\   S\S	\S
\S\\   S\4S jr	\
S\\\4   4S j5       rS\4S jrSrg)CliDetectionResulti^  pathr:   rr   alternative_encodingsr   r   r   rB   rC   unicode_pathis_preferredc                     Xl         Xl        X l        X0l        X@l        XPl        X`l        Xpl        Xl        Xl	        Xl
        g rk   )r   r   r:   rr   r   r   r   r   rB   rC   r   )r-   r   r:   rr   r   r   r   r   rB   rC   r   r   s               r.   r/   CliDetectionResult.__init___  s@     	(  0%:" ",
"(r1   r3   c                     U R                   U R                  U R                  U R                  U R                  U R
                  U R                  U R                  U R                  U R                  U R                  S.$ )Nr   r:   rr   r   r   r   r   rB   rC   r   r   r   rM   s    r.   __dict__CliDetectionResult.__dict__y  se     II $ 5 5%)%?%?"11ZZ -- --
 	
r1   c                 ,    [        U R                  SSS9$ )NT   )ensure_asciiindent)r   r   rM   s    r.   to_jsonCliDetectionResult.to_json  s    T]]a@@r1   )r   r   rB   rC   r:   rr   r   r   r   r   r   N)r   r   r   r   r8   r   r   r   r   r/   r   r	   r   r   r   r   r   r1   r.   r   r   ^  s    )) 3-) s)	)
  $Cy) ) 9) ) ) ) sm) )4 
$sCx. 
 
A Ar1   r   )#rP   collectionsr   encodings.aliasesr   hashlibr   jsonr   rer   typingr   r	   r
   r   r   r   r   constantr   r   mdr   utilsr   r   r   r   r   r8   r   CoherenceMatchr   r   r   r1   r.   <module>r     sm      %    D D D =  C CD1 D1NA AH sEz"' ,A ,Ar1   