
    M                        S r SSKrSSKrSSKJr  SSKJr  S/r\R                  " S5      r
\R                  " S5      r\R                  " S5      r\R                  " S	5      r\R                  " S
5      r\R                  " S5      r\R                  " S5      r\R                  " S5      r\R                  " S5      r\R                  " S5      r\R                  " S5      r\R                  " S\R*                  5      r\R                  " S\R*                  5      r\R                  " S\R*                  5      r\R                  " S5      r\R                  " S5      rS rS r " S S\R:                  5      rg)zA parser for HTML and XHTML.    N)unescape)html5
HTMLParserz[&<]z
&[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z6&(#[0-9]+|#[xX][0-9a-fA-F]+|[a-zA-Z][a-zA-Z0-9]*)[;=]?z	<[a-zA-Z]z
</[a-zA-Z]>z--!?>z-?>z0([a-zA-Z][^\t\n\r\f />]*)(?:[\t\n\r\f ]|/(?!>))*a{  
  (
    (?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]*  # attribute name
   )
  ([\t\n\r\f ]*=[\t\n\r\f ]*        # value indicator
    ('[^']*'                        # LITA-enclosed value
    |"[^"]*"                        # LIT-enclosed value
    |(?!['"])[^>\t\n\r\f ]*         # bare value
    )
   )?
  (?:[\t\n\r\f ]|/(?!>))*           # possibly followed by a space
a  
  [a-zA-Z][^\t\n\r\f />]*           # tag name
  [\t\n\r\f /]*                     # optional whitespace before attribute name
  (?:(?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]*  # attribute name
    (?:[\t\n\r\f ]*=[\t\n\r\f ]*    # value indicator
      (?:'[^']*'                    # LITA-enclosed value
        |"[^"]*"                    # LIT-enclosed value
        |(?!['"])[^>\t\n\r\f ]*     # bare value
       )
     )?
    [\t\n\r\f /]*                   # possibly followed by a space
   )*
   >?
aF  
  <[a-zA-Z][^\t\n\r\f />\x00]*       # tag name
  (?:[\s/]*                          # optional whitespace before attribute name
    (?:(?<=['"\s/])[^\s/>][^\s/=>]*  # attribute name
      (?:\s*=+\s*                    # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |"[^"]*"                   # LIT-enclosed value
          |(?!['"])[^>\s]*           # bare value
         )
        \s*                          # possibly followed by a space
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                # trailing whitespace
z#</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>c                     U R                  S5      nUR                  S5      (       a  [        U5      $ UR                  S5      (       d  USS  [        ;   a  [        U5      $ U$ )Nr   &#=   )group
startswithr   endswithhtml5_entities)matchrefs     8platform/bundledpythonunix/lib/python3.13/html/parser.py_replace_attr_charrefr   \   sU    
++a.C
~~d} <<QRN!:}J    c                 6    [         R                  [        U 5      $ N)attr_charrefsubr   )ss    r   _unescape_attrvaluer   h   s    1155r   c                      ^  \ rS rSrSrSrSrSS.U 4S jjrU 4S jrS	 r	S
 r
SrS rSS.S jrS rS rS rS$S jrS%S jrS rS rS rS rS rS rS rS rS rS rS rS  rS! rS" r S#r!U =r"$ )&r   l   a  Find tags and other markup and call handler functions.

Usage:
    p = HTMLParser()
    p.feed(data)
    ...
    p.close()

Start tags are handled by calling self.handle_starttag() or
self.handle_startendtag(); end tags by self.handle_endtag().  The
data between tags is passed from the parser to the derived class
by calling self.handle_data() with the data as argument (the data
may be split up in arbitrary chunks).  If convert_charrefs is
True the character references are converted automatically to the
corresponding Unicode character (and self.handle_data() is no
longer split in chunks), otherwise they are passed by calling
self.handle_entityref() or self.handle_charref() with the string
containing respectively the named or numeric reference as the
argument.
)scriptstyle)textareatitleT)convert_charrefsc                N   > [         TU ]  5         Xl        U R                  5         g)zInitialize and reset this instance.

If convert_charrefs is True (the default), all character references
are automatically converted to the corresponding Unicode characters.
N)super__init__r    reset)selfr    	__class__s     r   r#   HTMLParser.__init__   s     	 0

r   c                 p   > SU l         SU l        [        U l        SU l        SU l        [        TU ]  5         g)z1Reset this instance.  Loses all unprocessed data. z???NT)rawdatalasttaginteresting_normalinteresting
cdata_elem
_escapabler"   r$   )r%   r&   s    r   r$   HTMLParser.reset   s0    -r   c                 N    U R                   U-   U l         U R                  S5        g)zyFeed data to the parser.

Call this as often as you want, with as little or as much text
as you want (may include '\n').
r   N)r*   goaheadr%   datas     r   feedHTMLParser.feed   s     ||d*Qr   c                 &    U R                  S5        g)zHandle any buffered data.r
   N)r2   r%   s    r   closeHTMLParser.close   s    Qr   Nc                     U R                   $ )z)Return full source of start tag: '<...>'.)_HTMLParser__starttag_textr8   s    r   get_starttag_textHTMLParser.get_starttag_text   s    ###r   F	escapablec                   UR                  5       U l        X l        U(       aZ  U R                  (       dI  [        R
                  " SU R                  -  [        R                  [        R                  -  5      U l        g [        R
                  " SU R                  -  [        R                  [        R                  -  5      U l        g )Nz&|</%s(?=[\t\n\r\f />])z</%s(?=[\t\n\r\f />]))	lowerr.   r/   r    recompile
IGNORECASEASCIIr-   )r%   elemr@   s      r   set_cdata_modeHTMLParser.set_cdata_mode   s}    **,#T22!zz*Dt*V*,--*@ BD  "zz*BT__*T*,--*@ BDr   c                 6    [         U l        S U l        SU l        g )NT)r,   r-   r.   r/   r8   s    r   clear_cdata_modeHTMLParser.clear_cdata_mode   s    -r   c                    U R                   nSn[        U5      nX4:  Ga  U R                  (       a|  U R                  (       dk  UR	                  SU5      nUS:  aR  UR                  S[        X4S-
  5      5      nUS:  a,  [        R                  " S5      R                  X&5      (       d  GO3UnOHU R                  R                  X#5      nU(       a  UR                  5       nOU R                  (       a  GOUnX5:  aR  U R                  (       a.  U R                  (       a  U R                  [        X#U 5      5        OU R                  X#U 5        U R                  X55      nX4:X  a  GOyUR                   nU" SU5      (       Gaw  ["        R%                  X#5      (       a  U R'                  U5      n	OU" SU5      (       a  U R)                  U5      n	OU" SU5      (       a  U R+                  U5      n	OiU" SU5      (       a  U R-                  U5      n	OIU" S	U5      (       a  U R/                  U5      n	O)US
-   U:  d  U(       a  U R                  S5        US
-   n	OGOU	S:  Ga  U(       d  GOy["        R%                  X#5      (       a  GOaU" SU5      (       aP  US-   U:X  a  U R                  S5        GO7[0        R%                  X#5      (       a  GOU R3                  X#S-   S  5        GOU" SU5      (       aK  UnS H,  n
UR5                  XS-   5      (       d  M  U[        U
5      -  n  O   U R3                  X#S-   U 5        OU" SU5      (       a  U R7                  X#S-   S  5        OX#US-    R9                  5       S:X  a  U R;                  X#S-   S  5        OUU" S	U5      (       a  U R3                  X#S-   S  5        O0U" SU5      (       a  U R=                  X#S-   S  5        O[?        S5      eUn	U R                  X95      nGOU" SU5      (       a  [@        R%                  X#5      nU(       a^  URC                  5       SS nU RE                  U5        URG                  5       n	U" SU	S
-
  5      (       d  U	S
-
  n	U R                  X95      nGMd  SX#S  ;   a*  U R                  X#US-    5        U R                  X3S-   5      nGO+U" SU5      (       Ga  [H        R%                  X#5      nU(       a\  URC                  S
5      nU RK                  U5        URG                  5       n	U" SU	S
-
  5      (       d  U	S
-
  n	U R                  X95      nGM  [L        R%                  X#5      nU(       aI  U(       aA  URC                  5       X#S  :X  a+  URG                  5       n	X::  a  Un	U R                  X3S
-   5      nO?US
-   U:  a&  U R                  S5        U R                  X3S
-   5      nOO S5       eX4:  a  GM  U(       ah  X4:  ac  U R                  (       a.  U R                  (       a  U R                  [        X#U 5      5        OU R                  X#U 5        U R                  X45      nX#S  U l         g )Nr   <&"   z[\t\n\r\f ;]</<!--<?<!r
      )z--!z---   	<![CDATA[   	   	<!doctypezwe should not get here!r   ;zinteresting.search() lied)'r*   lenr    r.   findrfindmaxrC   rD   searchr-   startr/   handle_datar   	updateposr   starttagopenr   parse_starttagparse_endtagparse_commentparse_piparse_html_declaration
endtagopenhandle_commentr   unknown_declrB   handle_decl	handle_piAssertionErrorcharrefr   handle_charrefend	entityrefhandle_entityref
incomplete)r%   rt   r*   injampposr   r   ksuffixnames               r   r2   HTMLParser.goahead   sj   ,,Le$$T__LLa(q5 %]]3At=F!JJ7>>wOOA((//;AAu((T__$$Xgl%;<$$Wq\2q$Avu ++J#q!!%%g11++A.Aa(())!,A****1-Aa((a(Aa((33A6A!eq[C$$S)AAq5#))'55#D!,,q5A: ,,T2'--g99  !//!>#FA..&8F&//!<< !S[ 0 % '9 ++GaCN;#K33))'A#$-8 1Q3--/;>((17#D!,,++GaCDM:#D!,,wst}5,-FGGANN1(D!$$g1 ;;=2.D''-		A%c1Q3//Eq,Agbk)((1Q38 NN1c2C##!3 ;;q>D))$/		A%c1Q3//Eq,A"((4u{{};!IIK6 !A NN1!e4!eq[ $$S)qa%0A555qw ez 15$$  'A,!78  1.q$Ar{r   c                 p   U R                   nX!US-    S:X  d   S5       eX!US-    S:X  a  U R                  U5      $ X!US-    S:X  a  U R                  U5      $ X!US-    R                  5       S:X  a7  UR	                  S	US-   5      nUS
:X  a  g
U R                  X!S-   U 5        US-   $ U R                  U5      $ )NrU   rT   z+unexpected call to parse_html_declaration()rW   rR   rZ   rX   r[   r   r\   r
   )r*   ri   parse_marked_sectionrB   r_   ro   parse_bogus_comment)r%   rx   r*   gtposs       r   rk   !HTMLParser.parse_html_declarationI  s    ,,1~% 	D )C 	D%QqS>V#%%a((qs^{*,,Q//qs^!!#{2LLac*E{WqS/07N++A..r   c                 N   U R                   nUR                  SU5      (       d   S5       e[        R                  X1S-   5      nU(       d   [        R                  X1S-   5      nU(       d  gU(       a&  UR                  5       nU R                  X1S-   U 5        UR                  5       $ )NrR   z"unexpected call to parse_comment()rW   r\   )	r*   r   commentcloserb   commentabruptcloser   rc   rm   rt   )r%   rx   reportr*   r   rz   s         r   ri   HTMLParser.parse_comment^  s    ,,!!&!,,R.RR,##GqS1&,,Wc:EA!Q0yy{r   c                     U R                   nX1US-    S;   d   S5       eUR                  SUS-   5      nUS:X  a  gU(       a  U R                  X1S-   U 5        US-   $ )NrU   )rT   rQ   z(unexpected call to parse_bogus_comment()r   r\   r
   )r*   r_   rm   )r%   rx   r   r*   poss        r   r   HTMLParser.parse_bogus_commentm  sq    ,,1~- 	I 1H 	I-ll3!$"9!C 01Qwr   c                     U R                   nX!US-    S:X  d   S5       e[        R                  X!S-   5      nU(       d  gUR                  5       nU R	                  X!S-   U 5        UR                  5       nU$ )NrU   rS   zunexpected call to parse_pi()r\   )r*   picloserb   rc   rp   rt   r%   rx   r*   r   rz   s        r   rj   HTMLParser.parse_piy  sn    ,,1~%F'FF%w!,KKMwsA'IIKr   c                    S U l         U R                  U5      nUS:  a  U$ U R                  nX1U U l         / n[        R	                  X1S-   5      nU(       d   S5       eUR                  5       nUR                  S5      R                  5       =U l        nXb:  a  [        R	                  X65      nU(       d  OUR                  SSS5      u  pnU
(       d  S nO0US S Ss=:X  a	  USS  :X  d  O  US S Ss=:X  a	  USS  :X  a  O  OUSS nU(       a  [        U5      nUR                  U	R                  5       U45        UR                  5       nXb:  a  M  X6U R                  5       nUS	;  a  U R                  X1U 5        U$ UR                  S
5      (       a  U R                  Xt5        U$ U R!                  Xt5        XpR"                  ;   a  U R%                  U5        U$ XpR&                  ;   a  U R%                  USS9  U$ )Nr   r
   z#unexpected call to parse_starttag()rU   rY   'r\   ")r   />r   Tr?   )r<   check_for_whole_start_tagr*   tagfind_tolerantr   rt   r   rB   r+   attrfind_tolerantr   appendstriprd   r   handle_startendtaghandle_starttagCDATA_CONTENT_ELEMENTSrH   RCDATA_CONTENT_ELEMENTS)r%   rx   endposr*   attrsr   r|   tagmattrnamerest	attrvaluert   s                r   rg   HTMLParser.parse_starttag  s     $//2A:M,,&0  &&w!4;;;uIIK"[[^1133sj!''3A()1a(8%HI 	2A$8)BC.82A#7237%aO	/	:	LL(..*I67A j %%'k!Wv./M<<##C/    ,111##C(  444##C4#8r   c                     U R                   n[        R                  X!S-   5      nU(       d   eUR                  5       nX$S-
     S:w  a  gU$ )Nr
   r   r\   )r*   locatetagendr   rt   r   s        r   r   $HTMLParser.check_for_whole_start_tag  sG    ,,""7aC0uIIKQ3<3r   c                 .   U R                   nX!US-    S:X  d   S5       eUR                  SUS-   5      S:  a  g[        R                  X!5      (       d$  X!S-   US-    S:X  a  US-   $ U R	                  U5      $ [
        R                  X!S-   5      nU(       d   eUR                  5       nX$S-
     S:w  a  g[        R                  X!S-   5      nU(       d   eUR                  S5      R                  5       nU R                  U5        U R                  5         U$ )	NrU   rQ   zunexpected call to parse_endtagr   r   r\   rY   r
   )r*   r_   rl   r   r   r   rt   r   r   rB   handle_endtagrK   )r%   rx   r*   r   rz   r   s         r   rh   HTMLParser.parse_endtag  s    ,,1~%H'HH%<<QqS!A%++s1Q33&s
//22""7aC0uIIKQ3<3 !&&w!4ukk!n""$3r   c                 H    U R                  X5        U R                  U5        g r   )r   r   r%   r   r   s      r   r   HTMLParser.handle_startendtag  s    S(3r   c                     g r    r   s      r   r   HTMLParser.handle_starttag      r   c                     g r   r   )r%   r   s     r   r   HTMLParser.handle_endtag  r   r   c                     g r   r   r%   r~   s     r   rs   HTMLParser.handle_charref  r   r   c                     g r   r   r   s     r   rv   HTMLParser.handle_entityref  r   r   c                     g r   r   r3   s     r   rd   HTMLParser.handle_data  r   r   c                     g r   r   r3   s     r   rm   HTMLParser.handle_comment  r   r   c                     g r   r   )r%   decls     r   ro   HTMLParser.handle_decl  r   r   c                     g r   r   r3   s     r   rp   HTMLParser.handle_pi  r   r   c                     g r   r   r3   s     r   rn   HTMLParser.unknown_decl  r   r   )__starttag_textr/   r.   r    r-   r+   r*   )T)r
   )#__name__
__module____qualname____firstlineno____doc__r   r   r#   r$   r5   r9   r<   r=   rH   rK   r2   rk   ri   r   rj   rg   r   rh   r   r   r   rs   rv   rd   rm   ro   rp   rn   __static_attributes____classcell__)r&   s   @r   r   r   l   s    * 13+/   O$ 16 BG#X/*		,`< 
 r   )r   rC   _markupbasehtmlr   html.entitiesr   r   __all__rD   r,   rw   ru   rr   r   rf   rl   r   r   r   r   VERBOSEr   r   locatestarttagend_tolerant	endendtag
endtagfindr   r   
ParserBaser   r   r   r   <module>r      sY   " 
   1 . ZZ' ZZ%
JJ>?	
**@
AzzSTzz+&ZZ%

**S/zz(#ZZ'  ::QR JJ   ZZ  zz  ZZ   ZZ ) ZZ  JJsO	ZZ>?

6V'' Vr   