
    /                         S r SSKJr  SSKrSSKJr  SSKJr  SSKJ	r	  SSK
JrJrJr  S	 r " S
 S5      r " S S5      r " S S5      r " S S5      rS"S jrS r " S S5      r " S S\5      rS rS rS rS rS rS rS rS r S  r!S! r"g)#z$This module implements a CYK parser.    )defaultdictN   )
ParseError)Token)Tree)TerminalNonTerminalSymbolc                 b    [        U [        5      (       d   eU R                  UR                  :H  $ N)
isinstanceTnametype)tss     #lib/third_party/lark/parsers/cyk.pymatchr      s(    a66QVV    c                   J   ^  \ rS rSrSrU 4S jrS rS rS rS r	S r
S	rU =r$ )
Rule   zContext-free grammar rule.c                    > [         [        U ]  5         [        U[        5      (       d   U5       e[        S U 5       5      (       d   U5       eXl        X l        X0l        X@l	        g )Nc              3   p   #    U  H,  n[        U[        5      =(       d    [        U[        5      v   M.     g 7fr   )r   NTr   .0xs     r   	<genexpr> Rule.__init__.<locals>.<genexpr>   s&     F#Q:a$8
1a(88#s   46)
superr   __init__r   r   alllhsrhsweightalias)selfr$   r%   r&   r'   	__class__s        r   r"   Rule.__init__   sX    dD"$#r""'C'"F#FFFKKF
r   c                 z    [        U R                  5      < SSR                  S U R                   5       5      < 3$ )Nz ->  c              3   8   #    U  H  n[        U5      v   M     g 7fr   strr   s     r   r   Rule.__str__.<locals>.<genexpr>"   s     4NXSVVX   )r/   r$   joinr%   r(   s    r   __str__Rule.__str__!   s(     ]CHH4NTXX4N,NOOr   c                     [        U 5      $ r   r.   r3   s    r   __repr__Rule.__repr__$       4yr   c                 V    [        U R                  [        U R                  5      45      $ r   )hashr$   tupler%   r3   s    r   __hash__Rule.__hash__'   s    TXXuTXX/00r   c                 t    U R                   UR                   :H  =(       a    U R                  UR                  :H  $ r   )r$   r%   r(   others     r   __eq__Rule.__eq__*   s'    xx599$>UYY)>>r   c                     X:X  + $ r    r@   s     r   __ne__Rule.__ne__-   s    ""r   )r'   r$   r%   r&   )__name__
__module____qualname____firstlineno____doc__r"   r4   r7   r=   rB   rF   __static_attributes____classcell__r)   s   @r   r   r      s*    $P1?# #r   r   c                   0    \ rS rSrSrS rS rS rS rSr	g)	Grammar1   zContext-free grammar.c                 $    [        U5      U l        g r   )	frozensetrules)r(   rU   s     r   r"   Grammar.__init__4   s    u%
r   c                 4    U R                   UR                   :H  $ r   rU   r@   s     r   rB   Grammar.__eq__7   s    zzU[[((r   c                 d    SSR                  [        S U R                   5       5      5      -   S-   $ )N
c              3   8   #    U  H  n[        U5      v   M     g 7fr   )reprr   s     r   r   "Grammar.__str__.<locals>.<genexpr>;   s     &C
1tAww
r1   )r2   sortedrU   r3   s    r   r4   Grammar.__str__:   s*    dii&C

&C CDDtKKr   c                     [        U 5      $ r   r.   r3   s    r   r7   Grammar.__repr__=   r9   r   rX   N)
rH   rI   rJ   rK   rL   r"   rB   r4   r7   rM   rE   r   r   rQ   rQ   1   s    &)Lr   rQ   c                   (    \ rS rSrSrSS jrS rSrg)RuleNodeB   z@A node in the parse tree, which also contains the full rhs rule.c                 (    Xl         X l        X0l        g r   )rulechildrenr&   )r(   rg   rh   r&   s       r   r"   RuleNode.__init__E   s    	 r   c                     S[        U R                  R                  5      < SSR                  S U R                   5       5      < S3$ )Nz	RuleNode(z, [z, c              3   8   #    U  H  n[        U5      v   M     g 7fr   r.   r   s     r   r   $RuleNode.__repr__.<locals>.<genexpr>K   s     EdVcQRc!ffVcr1   z]))r]   rg   r$   r2   rh   r3   s    r   r7   RuleNode.__repr__J   s0    '+DIIMM':DIIEdVZVcVcEd<deer   )rh   rg   r&   Nr   )rH   rI   rJ   rK   rL   r"   r7   rM   rE   r   r   rd   rd   B   s    J
fr   rd   c                   >   ^  \ rS rSrSrU 4S jrS rS rS rSr	U =r
$ )ParserO   zParser wrapper.c                    > [         [        U ]  5         U Vs0 s H  o"U_M     snU l        U Vs/ s H  o R	                  U5      PM     nn[        [        U5      5      U l        g s  snf s  snf r   )r!   rp   r"   
orig_rules_to_ruleto_cnfrQ   grammar)r(   rU   rg   r)   s      r   r"   Parser.__init__R   s[    fd$&278%$:%8167t$7gen- 97s
   A&A+c                 2   [        UR                  [        5      (       d   e[        S UR                   5       5      (       d   e[        UR                  UR                  UR                  R                  (       a  UR                  R                  US9$ SUS9$ )z?Converts a lark rule, (lhs, rhs, callback, options), to a Rule.c              3   B   #    U  H  n[        U[        5      v   M     g 7fr   )r   r
   r   s     r   r   "Parser._to_rule.<locals>.<genexpr>[   s     F2EQ:a((2E   r   r&   r'   )r   originr   r#   	expansionr   optionspriority)r(   	lark_rules     r   rt   Parser._to_ruleX   s    )**B////F)2E2EFFFFFi111:1B1B1K1K9$$-- 	QR 	r   c                 &  ^ T(       d   e[        T5      m[        XR                  5      u  p4[        U4S jUS[	        U5      S-
  4    5       5      (       a  [        S5      eUS[	        U5      S-
  4   T   nU R                  [        U5      5      $ )z(Parses input, which is a list of tokens.c              3   @   >#    U  H  oR                   T:g  v   M     g 7fr   )r$   )r   rstarts     r   r   Parser.parse.<locals>.<genexpr>h   s     F'E!uu~'Es   r      zParsing failed.)r   _parserv   r#   lenr   _to_tree
revert_cnf)r(   	tokenizedr   tabletreesparses     `   r   r   Parser.parsea   s    u5	i6FuaY!1C-D'EFFF.//q#i.1,-.u5}}Z.//r   c                    U R                   UR                  R                     n/ nUR                   Hv  n[	        U[
        5      (       a"  UR                  U R                  U5      5        M:  [	        UR                  [        5      (       d   eUR                  UR                  5        Mx     [        UR                  U5      nX%l        U$ )z.Converts a RuleNode parse tree to a lark Tree.)rs   rg   r'   rh   r   rd   appendr   r   r   r   r}   )r(   	rule_node	orig_rulerh   childr   s         r   r   Parser._to_treem   s    OOINN$8$89	''E%**e 45!%**e4444

+ ( !!8,r   )rv   rs   )rH   rI   rJ   rK   rL   r"   rt   r   r   rM   rN   rO   s   @r   rp   rp   O   s    .
0 r   rp   c                 "   [        U [        5      (       aS  [        SUS-  -  [        U R                  R
                  5      -   5        U R                   H  n[        X!S-   5        M     g [        SUS-  -  [        U R                  5      -   5        g )Nr,   r   r   )	r   rd   printr/   rg   r$   rh   print_parser   )nodeindentr   s      r   r   r   |   sl    $!!cVaZ 3tyy}}#556]]Ez* # 	cVaZ 3tvv;./r   c                    [        [        5      n[        [        5      n[        U 5       H  u  pEUR                  R                  5        H  u  pg[        Xe5      (       d  M  U H  nX$U4   R                  U5        UR                  X4U4   ;  d-  UR                  X4U4   UR                     R                  :  d  MY  [        U[        U5      /UR                  S9X4U4   UR                  '   M     M     M     [        S[        U 5      S-   5       GHt  n	[        [        U 5      U	-
  S-   5       GHQ  n[        US-   XI-   5       GH7  n
XJS-
  4nXU	-   S-
  4n[        R                  " X+   X,   5       GH  u  pUR                   R#                  UR                  UR                  4/ 5       H  nX$XI-   S-
  4   R                  U5        X;   UR                     nX<   UR                     nUR                  UR                  -   UR                  -   nUR                  X4XI-   S-
  4   ;  d(  UX4XI-   S-
  4   UR                     R                  :  d  M  [        XU/US9X4XI-   S-
  4   UR                  '   M     GM     GM:     GMT     GMw     X#4$ )z*Parses sentence 's' using CNF grammar 'g'.r&   r   r   )r   setdict	enumerateterminal_rulesitemsr   addr$   r&   rd   r   ranger   	itertoolsproductnonterminal_rulesget)r   gr   r   iwterminalrU   rg   lpspan1span2r1r2r1_treer2_treerule_total_weights                     r   r   r      sB    E E! //557OHX!!!Da&M%%d+!f5eFmDHH&=&D&DD2:4!A$PTP[P[2\!fdhh/	 "  8  1c!fqj!s1vzA~&A1q5!%(E
EAI'//elKFB ! 3 3 7 78H" M!%!)n-11$7"',rvv"6"',rvv"6,0KK'..,H7>>,Y) HHEaeai.,AA05QUQY3H3R3Y3YY>FtW^M_hy>zEaeai.1$((; !N L ) ' "  <r   c                   8   ^  \ rS rSrSrU 4S jrS rS rSrU =r	$ )
CnfWrapper   zgCNF wrapper for grammar.

Validates that the input grammar is CNF and provides helper data structures.
c                   > [         [        U ]  5         Xl        UR                  U l        [        [        5      U l        [        [        5      U l        U R                   GH(  n[        UR                  [        5      (       d   U5       e[        UR                  5      S;  a  [        S5      e[        UR                  5      S:X  aO  [        UR                  S   [        5      (       a-  U R                  UR                  S      R!                  U5        M  [        UR                  5      S:X  aU  [#        S UR                   5       5      (       a4  U R                  [%        UR                  5         R!                  U5        GM$   U5       e   g )N)r   r   zCYK doesn't support empty rulesr   r   r   c              3   B   #    U  H  n[        U[        5      v   M     g 7fr   )r   r   r   s     r   r   &CnfWrapper.__init__.<locals>.<genexpr>   s     (JEqAr):):Er{   )r!   r   r"   rv   rU   r   listr   r   r   r$   r   r   r%   r   r   r   r#   r<   )r(   rv   r   r)   s      r   r"   CnfWrapper.__init__   s   j$(*]]
)$/!,T!2AaeeR((+!+(155z' !BCC155zQ:aeeAh#:#:##AEE!H-44Q7QUUqS(JAEE(J%J%J&&uQUU|4;;A>au r   c                 4    U R                   UR                   :H  $ r   )rv   r@   s     r   rB   CnfWrapper.__eq__   s    ||u}},,r   c                 ,    [        U R                  5      $ r   )r]   rv   r3   s    r   r7   CnfWrapper.__repr__   s    DLL!!r   )rv   r   rU   r   )
rH   rI   rJ   rK   rL   r"   rB   r7   rM   rN   rO   s   @r   r   r      s    
 $-" "r   r   c                   J   ^  \ rS rSrSrU 4S jrS r\R                  rSr	U =r
$ )UnitSkipRule   z@A rule that records NTs that were skipped during transformation.c                 :   > [         [        U ]  XXE5        X0l        g r   )r!   r   r"   skipped_rules)r(   r$   r%   r   r&   r'   r)   s         r   r"   UnitSkipRule.__init__   s    lD*3VC*r   c                 l    [        U[        U 5      5      =(       a    U R                  UR                  :H  $ r   )r   r   r   r@   s     r   rB   UnitSkipRule.__eq__   s)    %d,Z1C1CuGZGZ1ZZr   )r   )rH   rI   rJ   rK   rL   r"   rB   r   r=   rM   rN   rO   s   @r   r   r      s    J+[ }}Hr   r   c                 8   / n[        U [        5      (       a  X R                  -  nUR                  U5        [        U[        5      (       a  X!R                  -  n[        U R                  UR
                  UU R                  UR                  -   U R                  S9$ )Nr|   )r   r   r   r   r$   r%   r&   r'   )	unit_ruletarget_ruler   s      r   build_unit_skipruler      s    M)\**000%+|,,222	{&--0B0BB)//[ [r   c                     U R                    HD  n[        UR                  5      S:X  d  M  [        UR                  S   [        5      (       d  MB  Us  $    g)zDReturns a non-terminal unit rule from 'g', or None if there is none.r   r   N)rU   r   r%   r   r   )r   rg   s     r   get_any_nt_unit_ruler      s>    txx=A*TXXa[""="=K  r   c                 ,   U R                    Vs/ s H  o"U:w  d  M
  UPM     nnU R                    Vs/ s H#  o"R                  UR                  S   :X  d  M!  UPM%     nnX4 Vs/ s H  n[        X5      PM     sn-  n[	        U5      $ s  snf s  snf s  snf )zFRemoves 'rule' from 'g' without changing the language produced by 'g'.r   )rU   r$   r%   r   rQ   )r   rg   r   	new_rulesrefsrefs         r   _remove_unit_ruler      s|    GG1GqDyGI1ww7w!%%488A;"6AwD7DADS%d0DAAI9 27As   	BB BB"Bc           
   #   X  #    [        U R                  5      S-   SR                  S U R                   5       5      -   nSU-  S-   n[	        U R                  U R                  S   [        US-  5      /U R                  U R                  S9v   [        S[        U R                  5      S	-
  5       H9  n[	        [        X#-  5      U R                  U   [        X#S-   -  5      /SS
S9v   M;     [	        [        U[        U R                  5      S	-
  -  5      U R                  SS SS
S9v   g7f)z4Splits a rule whose len(rhs) > 2 into shorter rules.___c              3   8   #    U  H  n[        U5      v   M     g 7fr   r.   r   s     r   r   _split.<locals>.<genexpr>  s     .Hx!s1vvxr1   z__SP_%sz_%dr   r   r|   r   SplitN)
r/   r$   r2   r%   r   r   r&   r'   r   r   )rg   rule_str	rule_namer   s       r   _splitr      s     488}t#chh.Htxx.H&HHHX&.I
txx$((1+r)a-'89$++UYU_U_
``1c$((ma'(2im$txx{ByE7J4K&LUV^eff )
r)s488}q012DHHRSM!SZ
[[s   D(D*c                 4  ^ U R                    VVs1 s H/  oR                    H  n[        U[        5      (       d  M  UiM     M1     nnnU Vs0 s H%  oD[	        [        S[        U5      -  5      U/SSS9_M'     nn/ nU R                    H  m[        TR                  5      S:  a  [        S TR                   5       5      (       a  TR                   Vs/ s H(  n[        U[        5      (       a  XR   R                  OUPM*     nnUR                  [	        TR                  UTR                  TR                  S95        UR                  U4S jUR                  5        5       5        M  UR                  T5        M     [        U5      $ s  snnf s  snf s  snf )z/Applies the TERM rule on 'g' (see top comment).z__T_%sr   Termr|   r   c              3   B   #    U  H  n[        U[        5      v   M     g 7fr   )r   r   r   s     r   r   _term.<locals>.<genexpr>  s     $Hx!Z1%5%5xr{   c              3   P   >#    U  H  u  pUTR                   ;   d  M  Uv   M     g 7fr   )r%   )r   kvrg   s      r   r   r     s     L?41a488mQQ?s   &	&)rU   r%   r   r   r   r   r/   r   anyr$   r   r&   r'   extendr   rQ   )r   rg   r   all_tr   t_rulesr   new_rhss    `      r   _termr   	  s1   77I74HHq
1a8HQHQ7EITYZTYq$r(SV+,qc!6JJTYGZItxx=1$Htxx$H!H!HJN((S(QAq)9)9wz~~q@(GST$((GDKKtzzZ[L7==?LLT"  9 JZ Ts   (F

F
,F/Fc                     / nU R                    H=  n[        UR                  5      S:  a  U[        U5      -  nM,  UR	                  U5        M?     [        U5      $ )z.Applies the BIN rule to 'g' (see top comment).r   )rU   r   r%   r   r   rQ   )r   r   rg   s      r   _binr     sN    Itxx=1%IT"	 
 9r   c                 h    [        U 5      nU(       a  [        X5      n [        U 5      nU(       a  M  U $ )z/Applies the UNIT rule to 'g' (see top comment).)r   r   )r   nt_unit_rules     r   _unitr   #  s1    '*L
a.+A. , Hr   c                 R    [        [        [        U 5      5      5      n [        U 5      $ )z>Creates a CNF grammar from a general context-free grammar 'g'.)r   r   r   r   )r   s    r   ru   ru   ,  s    d58nAa=r   c                 
   U(       d  [        [        XXES9X4S9$ XBS   R                  -
  n[        [        XS   R                  /XES9[	        US   R                  UUSS  UUS   R                  US   R
                  5      /US9$ )Nr|   r   r   r   )rd   r   r&   r$   unroll_unit_skipruler'   )r$   orig_rhsr   rh   r&   r'   s         r   r   r   2  s    S6Gaa*111Q'++,VI$]1%5%9%98 -ab 18 -a 0 7 7q9I9O9OQL  	r   c                 "   [        U [        5      (       a  U $ U R                  R                  R                  R                  S5      (       a  U R                  S   $ / n[        [        U R                  5       Hm  n[        U[        5      (       aD  UR                  R                  R                  R                  S5      (       a  XR                  -  nM\  UR                  U5        Mo     [        U R                  [        5      (       at  [        U R                  R                  U R                  R                  U R                  R                  UU R                  R                  U R                  R                   5      $ [        U R                  U5      $ )zDReverts a parse tree (RuleNode) to its original non-CNF form (Node).__T_r   __SP_)r   r   rg   r$   r   
startswithrh   mapr   rd   r   r   r   r%   r   r&   r'   )r   rh   r   s      r   r   r   ?  s   $yy}}$$V,,}}QT]]3E%**uzz~~/B/B/M/Mg/V/VNN*& 4 dii..'		tyy}}$(II$;$;X$(II$4$4diiooG G DIIx00r   rn   )#rL   collectionsr   r   
exceptionsr   lexerr   treer   rv   r   r   r	   r   r
   r   r   rQ   rd   rp   r   r   r   r   r   r   r   r   r   r   r   ru   r   r   rE   r   r   <module>r      s    * $  #   > >
# #8 "	f 	f* *Z0"f" ">
4 
[\
1r   