HEX

File: //proc/self/root/lib64/python3.9/html/__pycache__/parser.cpython-39.opt-1.pyc
a

�i�R�@s�dZddlZddlZddlmZdgZe�d�Ze�d�Ze�d�Z	e�d�Z
e�d	�Ze�d
�Ze�d�Z
e�d�Ze�d
�Ze�d�Ze�dej�Ze�dej�Ze�dej�Ze�d�Ze�d�ZGdd�dej�ZdS)zA parser for HTML and XHTML.�N)�unescape�
HTMLParserz[&<]z
&[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z	<[a-zA-Z]z
</[a-zA-Z]�>z--!?>z-?>z0([a-zA-Z][^\t\n\r\f />]*)(?:[\t\n\r\f ]|/(?!>))*a{
  (
    (?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]*  # attribute name
   )
  ([\t\n\r\f ]*=[\t\n\r\f ]*        # value indicator
    ('[^']*'                        # LITA-enclosed value
    |"[^"]*"                        # LIT-enclosed value
    |(?!['"])[^>\t\n\r\f ]*         # bare value
    )
   )?
  (?:[\t\n\r\f ]|/(?!>))*           # possibly followed by a space
a
  [a-zA-Z][^\t\n\r\f />]*           # tag name
  [\t\n\r\f /]*                     # optional whitespace before attribute name
  (?:(?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]*  # attribute name
    (?:[\t\n\r\f ]*=[\t\n\r\f ]*    # value indicator
      (?:'[^']*'                    # LITA-enclosed value
        |"[^"]*"                    # LIT-enclosed value
        |(?!['"])[^>\t\n\r\f ]*     # bare value
       )
     )?
    [\t\n\r\f /]*                   # possibly followed by a space
   )*
   >?
aF
  <[a-zA-Z][^\t\n\r\f />\x00]*       # tag name
  (?:[\s/]*                          # optional whitespace before attribute name
    (?:(?<=['"\s/])[^\s/>][^\s/=>]*  # attribute name
      (?:\s*=+\s*                    # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |"[^"]*"                   # LIT-enclosed value
          |(?!['"])[^>\s]*           # bare value
         )
        \s*                          # possibly followed by a space
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                # trailing whitespace
z#</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>c@seZdZdZdZdZddd�dd�Zd	d
�Zdd�Zd
d�Z	dZ
dd�Zdd�dd�Zdd�Z
d>dd�Zdd�Zdd�Zd?dd�Zd@d d!�Zd"d#�Zd$d%�Zd&d'�Zd(d)�Zd*d+�Zd,d-�Zd.d/�Zd0d1�Zd2d3�Zd4d5�Zd6d7�Zd8d9�Zd:d;�Zd<d=�Z dS)AraEFind tags and other markup and call handler functions.

    Usage:
        p = HTMLParser()
        p.feed(data)
        ...
        p.close()

    Start tags are handled by calling self.handle_starttag() or
    self.handle_startendtag(); end tags by self.handle_endtag().  The
    data between tags is passed from the parser to the derived class
    by calling self.handle_data() with the data as argument (the data
    may be split up in arbitrary chunks).  If convert_charrefs is
    True the character references are converted automatically to the
    corresponding Unicode character (and self.handle_data() is no
    longer split in chunks), otherwise they are passed by calling
    self.handle_entityref() or self.handle_charref() with the string
    containing respectively the named or numeric reference as the
    argument.
    )Zscript�styleZxmpZiframeZnoembedZnoframes)Ztextarea�titleTF)�convert_charrefs�	scriptingcCs||_||_|��dS)azInitialize and reset this instance.

        If convert_charrefs is true (the default), all character references
        are automatically converted to the corresponding Unicode characters.

        If *scripting* is false (the default), the content of the
        ``noscript`` element is parsed normally; if it's true,
        it's returned as is without being parsed.
        N)rr�reset)�selfrr�r�#/usr/lib64/python3.9/html/parser.py�__init__vs
zHTMLParser.__init__cCs4d|_d|_t|_d|_d|_d|_tj�	|�dS)z1Reset this instance.  Loses all unprocessed data.�z???NT)
�rawdata�lasttag�interesting_normal�interesting�
cdata_elem�_support_cdata�
_escapable�_markupbase�
ParserBaser	�r
rrrr	�szHTMLParser.resetcCs|j||_|�d�dS)z�Feed data to the parser.

        Call this as often as you want, with as little or as much text
        as you want (may include '\n').
        rN)r�goahead�r
�datarrr�feed�szHTMLParser.feedcCs|�d�dS)zHandle any buffered data.�N)rrrrr�close�szHTMLParser.closeNcCs|jS)z)Return full source of start tag: '<...>'.)�_HTMLParser__starttag_textrrrr�get_starttag_text�szHTMLParser.get_starttag_text��	escapablecCsp|��|_||_|jdkr(t�d�|_nD|rP|jsPt�d|jtjtjB�|_nt�d|jtjtjB�|_dS)N�	plaintextz\Zz&|</%s(?=[\t\n\r\f />])z</%s(?=[\t\n\r\f />]))	�lowerrr�re�compilerr�
IGNORECASE�ASCII)r
�elemr"rrr�set_cdata_mode�s



�
�zHTMLParser.set_cdata_modecCst|_d|_d|_dS)NT)rrrrrrrr�clear_cdata_mode�szHTMLParser.clear_cdata_modecCs
||_dS)aEnable or disable support of the CDATA sections.
        If enabled, "<[CDATA[" starts a CDATA section which ends with "]]>".
        If disabled, "<[CDATA[" starts a bogus comments which ends with ">".

        This method is not called by default. Its purpose is to be called
        in custom handle_starttag() and handle_endtag() methods, with
        value that depends on the adjusted current node.
        See https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
        for details.
        N)r)r
�flagrrr�_set_support_cdata�szHTMLParser._set_support_cdatacCs4|j}d}t|�}||k�r�|jrv|jsv|�d|�}|dkr�|�dt||d��}|dkrpt�d��	||�sp�q�|}n*|j
�	||�}|r�|��}n|jr��q�|}||kr�|jr�|jr�|�
t|||���n|�
|||��|�||�}||kr��q�|j}|d|��r<t�||��r"|�|�}	n�|d|��r:|�|�}	nt|d|��rR|�|�}	n\|d|��rj|�|�}	nD|d	|��r�|�|�}	n,|d
|k�s�|�r�|�
d�|d
}	n�q�|	dk�r.|�sq�t�||��rԐnV|d|��r$|d|k�r�|�
d�n&t�||��r
n|�||dd���n|d|��r||}dD]*}
|�|
|d
��r8|t|
�8}�qd�q8|�||d
|��n�|d|��r�|j�r�|�||dd��n�|||d���dk�r�|�||dd��nP|d	|��r�|�||dd��n,|d|��r"|�||dd��nt d��|}	|�||	�}q|d|��r�t!�||�}|�r�|�"�dd�}|�#|�|�$�}	|d|	d
��s�|	d
}	|�||	�}qn<d||d�v�r�|�
|||d��|�||d�}�q�q|d|�rt%�||�}|�r@|�"d
�}|�&|�|�$�}	|d|	d
��s2|	d
}	|�||	�}qt'�||�}|�r�|�r�|�"�||d�k�r�|�$�}	|	|k�r�|}	|�||d
�}�q�n.|d
|k�r�|�
d�|�||d
�}n�q�qq|�r"||k�r"|j�r|j�r|�
t|||���n|�
|||��|�||�}||d�|_dS)Nr�<�&�"z[\t\n\r\f ;]z</�<!--z<?z<!r�)z--!z--�-��	<![CDATA[��	�	<!doctypezwe should not get here!z&#����;)(r�lenrr�find�rfind�maxr%r&�searchr�startr�handle_datarZ	updatepos�
startswith�starttagopen�match�parse_starttag�parse_endtag�
parse_comment�parse_pi�parse_html_declaration�
endtagopen�handle_comment�endswithr�unknown_declr$�handle_decl�	handle_pi�AssertionError�charref�group�handle_charref�end�	entityref�handle_entityref�
incomplete)r
rTr�i�n�jZampposrDrB�k�suffix�namerrrr�s�
�










zHTMLParser.goaheadcCsX|j}|||d�dkr$|�|�S|||d�dkrx|jrx|�d|d�}|dkrZdS|�||d|��|dS|||d���d	kr�|�d
|d�}|dkr�dS|�||d|��|dS|||d�d
k�rJ|�d
|d�}|dk�r�dS||ddk�r,|�||d|d��n|�||d|��|dS|�|�SdS)Nr4r1r7r5z]]>rr9r6r8rr2rz<![�])	rrGrr<rMr$rNrK�parse_bogus_comment)r
rXrrZZgtposrrrrINs0

z!HTMLParser.parse_html_declarationcCs\|j}t�||d�}|s2t�||d�}|s2dS|rT|��}|�||d|��|��S)Nr4r9)r�commentcloser?�commentabruptcloserDr@rKrT)r
rX�reportrrDrZrrrrGpszHTMLParser.parse_commentrcCsD|j}|�d|d�}|dkr"dS|r<|�||d|��|dS)Nrr2r9r)rr<rK)r
rXrbr�posrrrr_szHTMLParser.parse_bogus_commentcCsH|j}t�||d�}|sdS|��}|�||d|��|��}|S)Nr2r9)r�picloser?r@rOrT�r
rXrrDrZrrrrH�szHTMLParser.parse_picCsd|_|�|�}|dkr|S|j}|||�|_g}t�||d�}|��}|�d���|_}||k�r t	�||�}|s~�q |�ddd�\}	}
}|
s�d}nZ|dd�dkr�|dd�ks�n|dd�dkr�|dd�kr�nn|dd�}|�rt
|�}|�|	��|f�|��}q`|||���}|dv�r�|�
�\}
}d	|jv�rz|
|j�d	�}
t|j�|j�d	�}n|t|j�}|�|||��|S|�d
��r�|�||�n^|�||�||jv�s�|j�r�|dk�s�|dk�r�|j|d
d�n||jv�r|j|dd�|S)Nrrr2r6�'r9�")r�/>�
rhZnoscriptr#Fr!T)r�check_for_whole_start_tagr�tagfind_tolerantrDrTrRr$r�attrfind_tolerantr�append�stripZgetpos�countr;r=rArL�handle_startendtag�handle_starttag�CDATA_CONTENT_ELEMENTSrr*�RCDATA_CONTENT_ELEMENTS)r
rX�endposr�attrsrDr[�tag�m�attrname�restZ	attrvaluerT�lineno�offsetrrrrE�sh

&�
�



����zHTMLParser.parse_starttagcCs6|j}t�||d�}|��}||ddkr2dS|S)Nrrr9)r�locatetagendrDrTrerrrrj�sz$HTMLParser.check_for_whole_start_tagcCs�|j}|�d|d�dkrdSt�||�sT||d|d�dkrJ|dS|�|�St�||d�}|��}||ddkr�dSt�||d�}|�d��	�}|�
|�|��|S)Nrr2rr9r6r)rr<rJrDr_r|rTrkrRr$�
handle_endtagr+)r
rXrrDrZrvrrrrF�s 

zHTMLParser.parse_endtagcCs|�||�|�|�dS�N)rqr}�r
rvrurrrrp�szHTMLParser.handle_startendtagcCsdSr~rrrrrrq�szHTMLParser.handle_starttagcCsdSr~r)r
rvrrrr}szHTMLParser.handle_endtagcCsdSr~r�r
r]rrrrSszHTMLParser.handle_charrefcCsdSr~rr�rrrrV	szHTMLParser.handle_entityrefcCsdSr~rrrrrrA
szHTMLParser.handle_datacCsdSr~rrrrrrKszHTMLParser.handle_commentcCsdSr~r)r
ZdeclrrrrNszHTMLParser.handle_declcCsdSr~rrrrrrOszHTMLParser.handle_picCsdSr~rrrrrrMszHTMLParser.unknown_decl)T)T)r)!�__name__�
__module__�__qualname__�__doc__rrrsr
r	rrrr r*r+r-rrIrGr_rHrErjrFrprqr}rSrVrArKrNrOrMrrrrrZs>
	

"

9
)r�r%rZhtmlr�__all__r&rrWrUrQrCrJrdr`rark�VERBOSErlr|Zlocatestarttagend_tolerantZ	endendtagZ
endtagfindrrrrrr�<module>s4











�
��