
    "C              
          S SK Jr  S SKJrJr  SSKJrJr  SSKJ	r	J
r
JrJrJrJrJrJrJrJrJrJrJrJrJr   " S S5      r " S S	\5      r " S
 S\5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      r S\\!   S\\!   S\"4S jr#\" SS9 S#S\!S\$S \"S\$4S! jj5       r%g")$    )	lru_cache)ListOptional   )COMMON_SAFE_ASCII_CHARACTERSUNICODE_SECONDARY_RANGE_KEYWORD)is_accentuatedis_asciiis_case_variableis_cjkis_emoticon	is_hangulis_hiraganais_katakanais_latinis_punctuationis_separator	is_symbolis_thairemove_accentunicode_rangec                   ^    \ rS rSrSrS\S\4S jrS\SS4S jrSS jr	\
S\4S	 j5       rS
rg)MessDetectorPlugin   zm
Base abstract class used for mess detection plugins.
All detectors MUST extend and implement given methods.
	characterreturnc                     [         e)z0
Determine if given character should be fed in.
NotImplementedErrorselfr   s     0platform/bq/third_party/charset_normalizer/md.pyeligibleMessDetectorPlugin.eligible   
     "!    Nc                     [         e)zq
The main routine to be executed upon character.
Insert the logic in witch the text would be considered chaotic.
r   r    s     r"   feedMessDetectorPlugin.feed%   s
    
 "!r&   c                     [         e)z2
Permit to reset the plugin to the initial state.
r   r!   s    r"   resetMessDetectorPlugin.reset,   r%   r&   c                     [         e)zm
Compute the chaos ratio based on what your feed() has seen.
Must NOT be lower than 0.; No restriction gt 0.
r   r+   s    r"   ratioMessDetectorPlugin.ratio2   s
     "!r&    r   N)__name__
__module____qualname____firstlineno____doc__strboolr#   r(   r,   propertyfloatr/   __static_attributes__r1   r&   r"   r   r      sM    
"# "$ ""c "d "" "u " "r&   r   c                   d    \ rS rSrSS jrS\S\4S jrS\SS4S jrSS jr	\
S\4S	 j5       rS
rg) TooManySymbolOrPunctuationPlugin;   r   Nc                 J    SU l         SU l        SU l        S U l        SU l        g )Nr   F)_punctuation_count_symbol_count_character_count_last_printable_char_frenzy_symbol_in_wordr+   s    r"   __init__)TooManySymbolOrPunctuationPlugin.__init__<   s*    "# !$(!&+#r&   r   c                 "    UR                  5       $ Nisprintabler    s     r"   r#   )TooManySymbolOrPunctuationPlugin.eligibleD       $$&&r&   c                 D   U =R                   S-  sl         XR                  :w  av  U[        ;  al  [        U5      (       a  U =R                  S-  sl        OFUR                  5       SL a3  [        U5      (       a#  [        U5      SL a  U =R                  S-  sl        Xl        g )Nr   F   )	rC   rD   r   r   rA   isdigitr   r   rB   r    s     r"   r(   %TooManySymbolOrPunctuationPlugin.feedG   s    " 222!==i((''1,'!!#u,i((	*e3""a'"$-!r&   c                 .    SU l         SU l        SU l        g Nr   )rA   rC   rB   r+   s    r"   r,   &TooManySymbolOrPunctuationPlugin.resetY   s    "# !r&   c                     U R                   S:X  a  gU R                  U R                  -   U R                   -  nUS:  a  U$ S$ )Nr           g333333?)rC   rA   rB   )r!   ratio_of_punctuations     r"   r/   &TooManySymbolOrPunctuationPlugin.ratio^   sO      A% ##d&8&88!! " (<s'B#KKr&   )rC   rE   rD   rA   rB   r2   r3   r4   r5   r6   rF   r8   r9   r#   r(   r,   r:   r;   r/   r<   r1   r&   r"   r>   r>   ;   sP    ,'# '$ '.c .d .$
 Lu L Lr&   r>   c                   d    \ rS rSrSS jrS\S\4S jrS\SS4S jrSS jr	\
S\4S	 j5       rS
rg)TooManyAccentuatedPluginj   r   Nc                      SU l         SU l        g rS   rC   _accentuated_countr+   s    r"   rF   !TooManyAccentuatedPlugin.__init__k        !"#r&   r   c                 "    UR                  5       $ rI   )isalphar    s     r"   r#   !TooManyAccentuatedPlugin.eligibleo   s      ""r&   c                 z    U =R                   S-  sl         [        U5      (       a  U =R                  S-  sl        g g Nr   )rC   r	   r_   r    s     r"   r(   TooManyAccentuatedPlugin.feedr   s4    ")$$##q(# %r&   c                      SU l         SU l        g rS   r^   r+   s    r"   r,   TooManyAccentuatedPlugin.resetx   ra   r&   c                 j    U R                   S:X  a  gU R                  U R                   -  nUS:  a  U$ S$ )Nr   rV   gffffff?r^   )r!   ratio_of_accentuations     r"   r/   TooManyAccentuatedPlugin.ratio|   sA      A%##d&;&;; 	 )>(E$N3Nr&   )r_   rC   r2   rY   r1   r&   r"   r[   r[   j   sP    $## #$ #)c )d )$ Ou O Or&   r[   c                   d    \ rS rSrSS jrS\S\4S jrS\SS4S jrSS jr	\
S\4S	 j5       rS
rg)UnprintablePlugin   r   Nc                      SU l         SU l        g rS   )_unprintable_countrC   r+   s    r"   rF   UnprintablePlugin.__init__   s    "# !r&   r   c                     gNTr1   r    s     r"   r#   UnprintablePlugin.eligible       r&   c                     UR                  5       SL a.  UR                  5       SL a  US:w  a  U =R                  S-  sl        U =R                  S-  sl        g )NFr   )isspacerK   rq   rC   r    s     r"   r(   UnprintablePlugin.feed   sN    5(%%'50V###q(#"r&   c                     SU l         g rS   )rq   r+   s    r"   r,   UnprintablePlugin.reset   s
    "#r&   c                 \    U R                   S:X  a  gU R                  S-  U R                   -  $ )Nr   rV      rC   rq   r+   s    r"   r/   UnprintablePlugin.ratio   s/      A%''!+t/D/DDDr&   r   r2   rY   r1   r&   r"   rn   rn      sP    "# $ #c #d #$ Eu E Er&   rn   c                   d    \ rS rSrSS jrS\S\4S jrS\SS4S jrSS jr	\
S\4S	 j5       rS
rg)SuspiciousDuplicateAccentPlugin   r   Nc                 .    SU l         SU l        S U l        g rS   _successive_countrC   _last_latin_characterr+   s    r"   rF   (SuspiciousDuplicateAccentPlugin.__init__   s    !" !%)"r&   r   c                 F    UR                  5       =(       a    [        U5      $ rI   )rc   r   r    s     r"   r#   (SuspiciousDuplicateAccentPlugin.eligible   s      ":x	'::r&   c                    U =R                   S-  sl         U R                  b  [        U5      (       a  [        U R                  5      (       a  UR                  5       (       a4  U R                  R                  5       (       a  U =R                  S-  sl        [        U5      [        U R                  5      :X  a  U =R                  S-  sl        Xl        g rf   )rC   r   r	   isupperr   r   r    s     r"   r(   $SuspiciousDuplicateAccentPlugin.feed   s    "%%1i((^D<V<V-W-W$$&&4+E+E+M+M+O+O**a/* +}..0  **a/*%."r&   c                 .    SU l         SU l        S U l        g rS   r   r+   s    r"   r,   %SuspiciousDuplicateAccentPlugin.reset   s    !" !%)"r&   c                 \    U R                   S:X  a  gU R                  S-  U R                   -  $ )Nr   rV   rO   )rC   r   r+   s    r"   r/   %SuspiciousDuplicateAccentPlugin.ratio   s/      A%&&*d.C.CCCr&   )rC   r   r   r2   rY   r1   r&   r"   r   r      sP    *;# ;$ ;/c /d /*
 Du D Dr&   r   c                   d    \ rS rSrSS jrS\S\4S jrS\SS4S jrSS jr	\
S\4S	 j5       rS
rg)SuspiciousRange   r   Nc                 .    SU l         SU l        S U l        g rS   )"_suspicious_successive_range_countrC   _last_printable_seenr+   s    r"   rF   SuspiciousRange.__init__   s    23/ !$(!r&   r   c                 "    UR                  5       $ rI   rJ   r    s     r"   r#   SuspiciousRange.eligible   rM   r&   c                 Z   U =R                   S-  sl         UR                  5       (       d  [        U5      (       d
  U[        ;   a  S U l        g U R                  c  Xl        g [        U R                  5      n[        U5      n[        X#5      (       a  U =R                  S-  sl        Xl        g rf   )rC   ry   r   r   r   r    is_suspiciously_successive_ranger   )r!   r   unicode_range_aunicode_range_bs       r"   r(   SuspiciousRange.feed   s    " i((88(,D%$$,(1%'%%
 (	2+OMM33q83$-!r&   c                 .    SU l         SU l        S U l        g rS   )rC   r   r   r+   s    r"   r,   SuspiciousRange.reset   s     !23/$(!r&   c                 n    U R                   S:X  a  gU R                  S-  U R                   -  nUS:  a  gU$ )Nr   rV   rO   g?)rC   r   )r!   ratio_of_suspicious_range_usages     r"   r/   SuspiciousRange.ratio   sG      A% 33a7!!+"' +S0..r&   )rC   r   r   r2   rY   r1   r&   r"   r   r      sM    )
'# '$ '.c .d .2)
 /u / /r&   r   c                   d    \ rS rSrSS jrS\S\4S jrS\SS4S jrSS jr	\
S\4S	 j5       rS
rg)SuperWeirdWordPlugin   r   Nc                 t    SU l         SU l        SU l        SU l        SU l        SU l        SU l        SU l        g )Nr   F )_word_count_bad_word_count_is_current_word_bad_foreign_long_watchrC   _bad_character_count_buffer_buffer_accent_countr+   s    r"   rF   SuperWeirdWordPlugin.__init__   sA     $)!#(  !$%!$%!r&   r   c                     grt   r1   r    s     r"   r#   SuperWeirdWordPlugin.eligible	  rv   r&   c                    UR                  5       (       a  SR                  U R                  U/5      U l        [        U5      (       a  U =R                  S-  sl        U R
                  SL a[  [        U5      SL aM  [        U5      SL a?  [        U5      SL a1  [        U5      SL a#  [        U5      SL a  [        U5      SL a  SU l        g U R                  (       d  g UR                  5       (       d   [        U5      (       d  [        U5      (       a  U R                  (       a  U =R                  S-  sl        [!        U R                  5      nU =R"                  U-  sl        US:  a  U R                  U-  S:  a  SU l        US:  a  U R
                  (       a  SU l        U R$                  (       aD  U =R&                  S-  sl        U =R(                  [!        U R                  5      -  sl        SU l        SU l        SU l        SU l        g US	;  aB  UR+                  5       SL a.  [-        U5      (       a  SU l        U =R                  U-  sl        g g g g )
Nr   r   FT   g(\?   r   >   -<=>)rc   joinr   r	   r   r   r   r   r   r   r   r   ry   r   r   r   lenrC   r   r   r   rP   r   )r!   r   buffer_lengths      r"   r(   SuperWeirdWordPlugin.feed  s   77DLL)#<=DLi(())Q.)((E1Y'509%.i(E1	*e3	*e3I&%/+/(||>)#<#<Y@W@Wll!-M!!]2!!d&?&?-&ORV&V,0)"t'?'?,0)(($$)$))S->>),1)',D$DL()D%11!!#u,)$$(,D%LLI%L % - 2r&   c                 f    SU l         SU l        SU l        SU l        SU l        SU l        SU l        g )Nr   Fr   )r   r   r   r   r   rC   r   r+   s    r"   r,   SuperWeirdWordPlugin.reset;  s9    $)!#(   !$%!r&   c                 V    U R                   S::  a  gU R                  U R                  -  $ )N
   rV   )r   r   rC   r+   s    r"   r/   SuperWeirdWordPlugin.ratioD  s*    r!((4+@+@@@r&   )r   r   r   r   rC   r   r   r   r2   rY   r1   r&   r"   r   r      sQ    
&# $ -&c -&d -&^& Au A Ar&   r   c                   h    \ rS rSrSrSS jrS\S\4S jrS\SS4S jr	SS	 jr
\S\4S
 j5       rSrg)CjkInvalidStopPluginiL  u   
GB(Chinese) based encoding often render the stop incorrectly when the content does not fit and
can be easily detected. Searching for the overuse of '丅' and '丄'.
r   Nc                      SU l         SU l        g rS   _wrong_stop_count_cjk_character_countr+   s    r"   rF   CjkInvalidStopPlugin.__init__R      !"$%!r&   r   c                     grt   r1   r    s     r"   r#   CjkInvalidStopPlugin.eligibleV  rv   r&   c                     US;   a  U =R                   S-  sl         g [        U5      (       a  U =R                  S-  sl        g g )N)u   丅u   丄r   )r   r   r   r    s     r"   r(   CjkInvalidStopPlugin.feedY  s?    &""a'")%%*% r&   c                      SU l         SU l        g rS   r   r+   s    r"   r,   CjkInvalidStopPlugin.reset`  r   r&   c                 V    U R                   S:  a  gU R                  U R                   -  $ )N   rV   r   r   r+   s    r"   r/   CjkInvalidStopPlugin.ratiod  s*    $$r)%%(A(AAAr&   r   r2   )r3   r4   r5   r6   r7   rF   r8   r9   r#   r(   r,   r:   r;   r/   r<   r1   r&   r"   r   r   L  sU    
&# $ +c +d +& Bu B Br&   r   c                   d    \ rS rSrSS jrS\S\4S jrS\SS4S jrSS jr	\
S\4S	 j5       rS
rg)ArchaicUpperLowerPluginik  r   Nc                 f    SU l         SU l        SU l        SU l        SU l        S U l        SU l        g )NFr   T)_buf_character_count_since_last_sep_successive_upper_lower_count#_successive_upper_lower_count_finalrC   _last_alpha_seen_current_ascii_onlyr+   s    r"   rF    ArchaicUpperLowerPlugin.__init__l  s9    	/0,-.*340 ! $#' r&   r   c                     grt   r1   r    s     r"   r#    ArchaicUpperLowerPlugin.eligibley  rv   r&   c                    UR                  5       =(       a    [        U5      nUSL nU(       a  U R                  S:  a  U R                  S::  aA  UR                  5       SL a.  U R                  SL a  U =R
                  U R                  -  sl        SU l        SU l        S U l        SU l        U =R                  S-  sl	        SU l        g U R                  SL a  [        U5      SL a  SU l        U R                  b  UR                  5       (       a  U R                  R                  5       (       d4  UR                  5       (       aS  U R                  R                  5       (       a4  U R                  SL a  U =R                  S-  sl        SU l        OSU l        OSU l        U =R                  S-  sl	        U =R                  S-  sl        Xl        g )NFr   @   r   TrO   )rc   r   r   rP   r   r   r   r   r   rC   r
   r   islower)r!   r   is_concerned	chunk_seps       r"   r(   ArchaicUpperLowerPlugin.feed|  s    ((*J/?	/J E)	==A44:%%'50,,588668 23D.34D0$(D!DI!!Q&!'+D$##t+0Cu0L',D$  ,!!##(=(=(E(E(G(G!!##(=(=(E(E(G(G99$66!;6 %DI $DI!	",,1, )r&   c                 f    SU l         SU l        SU l        SU l        S U l        SU l        SU l        g )Nr   FT)rC   r   r   r   r   r   r   r+   s    r"   r,   ArchaicUpperLowerPlugin.reset  s9     !/0,-.*340 $	#' r&   c                 V    U R                   S:X  a  gU R                  U R                   -  $ )Nr   rV   )rC   r   r+   s    r"   r/   ArchaicUpperLowerPlugin.ratio  s*      A%77$:O:OOOr&   )r   rC   r   r   r   r   r   r2   rY   r1   r&   r"   r   r   k  sQ    (# $ (*c (*d (*T( Pu P Pr&   r   r   r   r   c                    U b  Uc  gX:X  a  gSU ;   a  SU;   a  gSU ;   d  SU;   a  gU R                  S5      UR                  S5      p2U H  nU[        ;   a  M  XC;   d  M    g   U S;   US;   peU(       d  U(       a  SU ;   d  SU;   a  gU(       a  U(       a  gSU ;   d  SU;   a  SU ;   d  SU;   a  gU S	:X  d  US	:X  a  gSU ;   d  SU;   d  U S
;   a   US
;   a  SU ;   d  SU;   a  gSU ;   d  SU;   a  gg)zY
Determine if two Unicode range seen next to each other can be considered as suspicious.
TFLatin	Emoticons )HiraganaKatakanaCJKHangulzBasic Latin)r   r   PunctuationForms)splitr   )r   r   keywords_range_akeywords_range_belrange_a_jp_charsrange_b_jp_charss          r"   r   r     sC    /"9)/!g&@o%)G)8)>)>*S! ' 00!	  	
	

 	33 ' +O#u'? 0?"h/&AO#u'?m+-/O 	 E_$<3377O+}/Oo%O)Cr&   i   )maxsizedecoded_sequencemaximum_thresholddebugc                 J   [         R                  5        Vs/ s H	  o3" 5       PM     nn[        U 5      nSnUS:  a  SnOUS::  a  SnOSn[        U [	        SU5      5       H{  u  pU H,  n
U
R                  U5      (       d  M  U
R                  U5        M.     U	S:  a  X-  S:X  d
  XS-
  :X  d  MO  [        U Vs/ s H  oR                  PM     sn5      nXa:  d  M{    O   U(       a)  U H#  n[        UR                  UR                  5        M%     [        US	5      $ s  snf s  snf )
zo
Compute a mess ratio given a decoded bytes sequence. The maximum threshold does stop the computation earlier.
rV   i       i   r      r   r      )r   __subclasses__r   zipranger#   r(   sumr/   print	__class__round)r   r   r   md_class	detectorslengthmean_mess_ratio!intermediary_mean_mess_ratio_calcr   indexdetectordts               r"   
mess_ratior    s    $6#D#D#F#Fx
#F   !"FO|,.)	4,.),/) 0%62BC	!H  ++i( "
 AI%CqHqj !i"@i88i"@AO3 D B",,)  !$$A. #As   D8D 
N)g?F)&	functoolsr   typingr   r   constantr   r   utilsr	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r>   r[   rn   r   r   r   r   r   r8   r9   r   r;   r  r1   r&   r"   <module>r     s    ! S    &" "D,L'9 ,L^O1 O8E* E8!D&8 !DH3/( 3/lMA- MA`B- B>IP0 IPX;c];5=c];	;| 4IN'%'%.3'%BF'%
'% '%r&   