
    hC\                        S r SSKrSSKrSSKr\R                  " SS\SS9  SSKJrJr  SSKJ	r	  SS	K
JrJr  SS
K
Jr  SSKJr  SSKJrJrJrJrJrJr  SSKJr  SSKJr  SSKJrJr  SSKJrJ r   SSK!J"r"  \	RF                  r$\"" \$5      r%\	RL                  S   \	RL                  S   \	RL                  S   1r'\	RL                  S   r(\	RL                  S   r)\	RL                  S   r*\	RL                  S   r+\," S5      r-\," S5      r. " S S5      r/ " S S\ 5      r0 " S S\5      r1S r2S r3S  r4\Rj                  " S!5      r6S" r7 " S# S$\5      r8g)%z
Shim module between Bleach and html5lib. This makes it easier to upgrade the
html5lib library without having to change a lot of code.
    Nignorez"html5lib's sanitizer is deprecatedzbleach._vendor.html5lib)messagecategorymodule)
HTMLParsergetTreeWalker)	constants)
namespacesprefixes)_ReparseException)Filter)allowed_protocolsallowed_css_propertiesallowed_svg_propertiesattr_val_is_urisvg_attr_val_allows_refsvg_allow_local_href)HTMLInputStream)escapeHTMLSerializer)attributeMapHTMLTokenizer)TrieStartTagEndTagEmptyTag
Characters
ParseError)paabbraddressareaarticleasideaudiobbasebdibdo
blockquotebodybrbuttoncanvascaptioncitecodecolcolgroupdatadatalistdddeldetailsdfndialogdivdldtemembedfieldset
figcaptionfigurefooterformh1h2h3h4h5h6headheaderhgrouphrhtmliiframeimginputinskbdkeygenlabellegendlilinkmapmarkmenumetameternavnoscriptobjectoloptgroupoptionoutputpparampicturepreprogressqrprtrubyssampscriptsectionselectslotsmallsourcespanstrongstylesubsummarysuptabletbodytdtemplatetextareatfootththeadtimetitletrtrackuulvarvideowbr)!r!   r#   r$   r*   r8   r:   r6   r;   r<   r=   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rL   rM   rN   rY   mainr`   rc   rg   rj   rs   r~   r   c                   p    \ rS rSrSrS r\S 5       r\S 5       r\S 5       r	S r
SS jrS	 rS
 rS rSrg)InputStreamWithMemory   zWraps an HTMLInputStream to remember characters since last <

This wraps existing HTMLInputStream classes to keep track of the stream
since the last < which marked an open tag state.

c                     Xl         U R                   R                  U l        U R                   R                  U l        / U l        g N)_inner_streamresetposition_buffer)selfinner_streams     gC:\Users\julio\OneDrive\Documentos\Trabajo\Ideas Frescas\venv\Lib\site-packages\bleach/html5lib_shim.py__init__InputStreamWithMemory.__init__   s5    )''--
**33    c                 .    U R                   R                  $ r   )r   errorsr   s    r   r   InputStreamWithMemory.errors  s    !!(((r   c                 .    U R                   R                  $ r   )r   charEncodingr   s    r   r   "InputStreamWithMemory.charEncoding  s    !!...r   c                 .    U R                   R                  $ r   )r   changeEncodingr   s    r   r   $InputStreamWithMemory.changeEncoding
  s    !!000r   c                 ~    U R                   R                  5       nU(       a  U R                  R                  U5        U$ r   )r   charr   append)r   cs     r   r   InputStreamWithMemory.char  s0    ##%LL"r   c                     U R                   R                  XS9nU R                  R                  [	        U5      5        U$ )N)opposite)r   
charsUntilr   extendlist)r   
charactersr   charss       r   r    InputStreamWithMemory.charsUntil  s6    ""--j-LDK(r   c                     U R                   (       a  U R                   R                  S5        U R                  R                  U5      $ )N)r   popr   unget)r   r   s     r   r   InputStreamWithMemory.unget  s2    <<LLR !!''--r   c                 8    SR                  U R                  5      $ )zReturns the stream history since last '<'

Since the buffer starts at the last '<' as as seen by tagOpenState(),
we know that everything from that point to when this method is called
is the "tag" that is being tokenized.

 )joinr   r   s    r   get_tagInputStreamWithMemory.get_tag  s     wwt||$$r   c                     S/U l         g)zResets stream history to just '<'

This gets called by tagOpenState() which marks a '<' that denotes an
open tag. Any time we see that, we reset the buffer.

<N)r   r   s    r   	start_tagInputStreamWithMemory.start_tag)  s     ur   )r   r   r   r   NF)__name__
__module____qualname____firstlineno____doc__r   propertyr   r   r   r   r   r   r   r   __static_attributes__ r   r   r   r      sa     ) ) / / 1 1
.
%r   r   c                   d   ^  \ rS rSrSrS	U 4S jjrU 4S jrS
U 4S jjrU 4S jrU 4S jr	Sr
U =r$ )BleachHTMLTokenizeri3  z1Tokenizer that doesn't consume character entitiesc                 t   > [         TU ]  " S0 UD6  Xl        [        U R                  5      U l        S U l        g )Nr   )superr   consume_entitiesr   streamemitted_last_token)r   r   kwargs	__class__s      r   r   BleachHTMLTokenizer.__init__6  s5    "6" 0 ,DKK8 #'r   c              #   @  >#    S n[         TU ]  5        GH  nUGb   US   S:X  aP  US   [        ;   aC  UR                  S5      (       a-  [	        S US   R                  5        5       5      US'   S nUv   OUS   S:X  a}  U R                  R                  bf  US   R                  5       R                  5       U R                  R                  ;  a-  U R                  R                  5       US'   [        US'   S nUv   OUS   [        :X  a  Uv   UnO
Uv   Uv   S nGM  US   [        :X  a  UnGM  Uv   GM!     U(       a_  US   S:X  a$  [        U R                  R                  5       S.v   g US   S;   a$  [        U R                  R                  5       S.v   g Uv   g g 7f)	Nr4   z#invalid-character-in-attribute-nametypec              3   \   #    U  H"  u  pS U;  d  M  SU;  d  M  SU;  d  M  X4v   M$     g7f)"'r   Nr   ).0	attr_name
attr_values      r   	<genexpr>/BleachHTMLTokenizer.__iter__.<locals>.<genexpr>Q  sE      15J1Iy0 0 !$9 4	 0
 !$9 4 0/5Js   ,,,
,z!expected-closing-tag-but-got-charzeof-in-tag-namer   r4   )zduplicate-attributezeof-in-attribute-namez eof-in-attribute-value-no-quoteszexpected-end-of-tag-but-got-eof)r   __iter__TAG_TOKEN_TYPESgetr   itemsparsertagslowerstripr   r   TAG_TOKEN_TYPE_CHARACTERSTAG_TOKEN_TYPE_PARSEERROR)r   last_error_tokentokenr   s      r   r   BleachHTMLTokenizer.__iter__A  s    W%'E+$V,0UUf8		&)) %1 15:6]5H5H5J1 %E&M (,$K %V,0SS((4f++-335T[[=M=MM %)KK$7$7$9E&M$=E&M'+$K6]&?? +*',$ +*K'+$ V} 99#( KE (H '+<<
  9$++BUBUBWXX!&) .    9$++BUBUBWXX '&7 s   FFc                    > U R                   (       a  [        TU ]	  X5      $ U(       a  U R                  S   S   S==   S-  ss'   g U R                  R                  [        SS.5        g )Nr4   r      &r   )r   r   consumeEntitycurrentToken
tokenQueuer   r   )r   allowedCharfromAttributer   s      r   r   !BleachHTMLTokenizer.consumeEntity  s]       7(DD f%b)!,3, OO"",Es#STr   c                 T   > U R                   R                  5         [        TU ]  5       $ r   )r   r   r   tagOpenState)r   r   s    r   r    BleachHTMLTokenizer.tagOpenState  s#    
 	w#%%r   c                 j  > U R                   nU R                  R                  b  US   [        ;   a  US   R	                  5       U R                  R                  ;  a  U R                  R
                  (       a?  U R                  (       a+  US   [        :X  a  US   R	                  5       [        ;   a  SnOSnOU R                  R                  5       n[        US.nU=U l         U l        U R                  R                  U5        U R                  U l        g U R                   U l        [         TU ]E  5         g )Nr   name
r   r   )r   r   r   r   r   r   r   TAG_TOKEN_TYPE_STARTHTML_TAGS_BLOCK_LEVELr   r   r   r   r   	dataStatestater   emitCurrentToken)r   r   new_data	new_tokenr   s       r   r  $BleachHTMLTokenizer.emitCurrentToken  s    !! KK(f0f##%T[[-=-==
 {{  ++f)==f++-1FF
  $H  "H  ;;..0!:HMI:CCD 7OO""9-DJ"&"3"3 "r   )r   r   r   r  r   r   )NF)r   r   r   r   r   r   r   r   r   r  r   __classcell__r   s   @r   r   r   3  s'    ;	'b'HU$&*# *#r   r   c                   8   ^  \ rS rSrSrU 4S jr SS jrSrU =r$ )BleachHTMLParseri  z$Parser that uses BleachHTMLTokenizerc                 v   > Ub  [        S U 5       5      OSU l        X l        X0l        [        TU ]  " S0 UD6  g)a  
:arg tags: set of allowed tags--everything else is either stripped or
    escaped; if None, then this doesn't look at tags at all
:arg strip: whether to strip disallowed tags (True) or escape them (False);
    if tags=None, then this doesn't have any effect
:arg consume_entities: whether to consume entities (default behavior) or
    leave them as is when tokenizing (BleachHTMLTokenizer-added behavior)

Nc              3   @   #    U  H  oR                  5       v   M     g 7fr   )r   )r   tags     r   r   ,BleachHTMLParser.__init__.<locals>.<genexpr>  s     3dsyy{{ds   r   )	frozensetr   r   r   r   r   )r   r   r   r   r   r   s        r   r   BleachHTMLParser.__init__  s>     9=8HI3d34d 		 
 0"6"r   c                    X l         X0l        X@l        [        SXR                  U S.UD6U l        U R                  5          U R                  5         g ! [         a#    U R                  5         U R                  5          g f = f)N)r   r   r   r   )	innerHTMLMode	container	scriptingr   r   	tokenizerr   mainLoopReparseException)r   r   	innerHTMLr  r  r   s         r   _parseBleachHTMLParser._parse   sr     '"", 
,A,A$
RX
 	

	MMO 	JJLMMO	s   A *A?>A?)r   r  r  r  r   r   r  )Fr;   T)	r   r   r   r   r   r   r  r   r  r  s   @r   r	  r	    s    .#$ CG r   r	  c                     U S   S:X  aT  [        U 5      S:  a  gU S   S;   a  U SS Sp!OU SS Sp!US	:X  a  g[        X5      nSUs=:  a  S
:  a  O  g[        U5      $ g[        R	                  U S5      $ )a%  Convert an entity (minus the & and ; part) into what it represents

This handles numeric, hex, and text entities.

:arg value: the string (minus the ``&`` and ``;`` part) to convert

:returns: unicode character or None if it's an ambiguous ampersand that
    doesn't match a character entity

r   #   Nr   xX   
   r   i   )lenintchrENTITIESr   )valueint_as_stringr'   
code_points       r   convert_entityr)    s     Qx3u:>8z!"')R4 #()R4B-
z$H$  z?"<<t$$r   c                 l   SU ;  a  U $ / n[        U 5       H  nU(       d  M  UR                  S5      (       aX  [        U5      nUbJ  [        U5      nUb<  UR	                  U5        U[        U5      S-   S nU(       a  UR	                  U5        Mz  UR	                  U5        M     SR                  U5      $ )zConverts all found entities in the text

:arg text: the text to convert entities in

:returns: unicode text with converted entities

r   Nr  r   )next_possible_entity
startswithmatch_entityr)  r   r"  r   )textnew_textpartentity	converted	remainders         r   convert_entitiesr4  :  s     $H$T*??3!$'F!*62	 (OOI. $S[1_%6 7I 	2% +( 778r   c                    U S   S:w  a  [        S5      eU SS n [        U 5      n SnS[        R                  -   nU (       a  U S   S:X  a  SnU R	                  S5        U (       a  U S   S	;   a  S
nXR	                  S5      -  nOSnU (       a6  U S   U;  a-  U R	                  S5      nXC;  a  OX-  nU (       a  U S   U;  a  M-  U(       a  U (       a  U S   S:X  a  U$ gU (       aK  U S   U;  aB  U R	                  S5      nX-  n[
        R                  U5      (       d  gU (       a  U S   U;  a  MB  U(       a  U (       a  U S   S:X  a  U$ g)a^  Returns first entity in stream or None if no entity exists

Note: For Bleach purposes, entities must start with a "&" and end with a
";". This ignores ambiguous character entities that have no ";" at the end.

:arg stream: the character stream

:returns: the entity string without "&" or ";" if it's a valid character
    entity; ``None`` otherwise

r   r   zStream should begin with "&"r   Nr   z<&=;r  r  0123456789abcdefABCDEF
0123456789;)
ValueErrorr   string
whitespacer   ENTITIES_TRIEhas_keys_with_prefix)r   possible_entityend_charactersallowedr   s        r   r-  r-  ]  sL    ayC788ABZF&\FOf///N &)s"

1fQi:-.Gzz!},O"G .8

1A O	 .8 v&)s*:"" VAYn4JJqM11/BB  VAYn4 6fQi3&6r   z(&)c              #      #    [        [        R                  U 5      5       H#  u  pUS:X  a  Uv   M  US-  S:X  d  M  SU-   v   M%     g7f)zTakes a text and generates a list of possible entities

:arg text: the text to look at

:returns: generator where each part (except the first) starts with an
    "&"

r   r  r   N)	enumerateAMP_SPLIT_REsplit)r.  rP   r0  s      r   r+  r+    sD      \//566JUaZ*	 7s
   7A	A	c                   :   ^  \ rS rSrSrSrS rSU 4S jjrSrU =r	$ )BleachHTMLSerializeri  zSHTMLSerializer that undoes & -> &amp; in attributes and sets
escape_rcdata to True
Tc              #   :  #    UR                  SS5      n[        U5       Hv  nU(       d  M  UR                  S5      (       a@  [        U5      nUb2  [	        U5      b&  SU S3v   U[        U5      S-   S nU(       a  Uv   Mb  UR                  SS5      v   Mx     g7f)z,Escapes just bare & in HTML attribute valuesz&amp;r   Nr8  r  )replacer+  r,  r-  r)  r"  )r   stokenr0  r1  s       r   escape_base_amp$BleachHTMLSerializer.escape_base_amp  s      - )0Ds##%d+ %.*@*LfXQ-'  Fa 12D"
,,sG,,% 1s   BBc              #     >#    SnSn[         TU ]  X5       Hh  nU(       aB  US:X  a  SnO3U(       a$  US:w  a  U R                  U5       Sh  vN   SnM=  OUS:X  a  SnUv   ML  UR                  S5      (       a  SnUv   Mj     g N87f)zWrap HTMLSerializer.serialize and conver & to &amp; in attribute values

Note that this converts & to &amp; in attribute values where the & isn't
already part of an unambiguous character entity.

F>r   N=Tr   )r   	serializerJ  r,  )r   
treewalkerencodingin_tagafter_equalsrI  r   s         r   rO  BleachHTMLSerializer.serialize  s      g'
=FS="F!}#'#7#7#???', 	 % s]#'L$$S))!F' > @s   ABB9Br   r   )
r   r   r   r   r   escape_rcdatarJ  rO  r   r  r  s   @r   rF  rF    s     M-> r   rF  )9r   rer:  warningsfilterwarningsDeprecationWarningbleach._vendor.html5libr   r   r	   !bleach._vendor.html5lib.constantsr
   r   r   r  $bleach._vendor.html5lib.filters.baser   )bleach._vendor.html5lib.filters.sanitizerr   r   r   r   r   r   SanitizerFilter$bleach._vendor.html5lib._inputstreamr   "bleach._vendor.html5lib.serializerr   r   "bleach._vendor.html5lib._tokenizerr   r   bleach._vendor.html5lib._trier   entitiesr%  r<  
tokenTypesr   r   TAG_TOKEN_TYPE_ENDr   r   r  	HTML_TAGSr   r   r   r	  r)  r4  r-  compilerC  r+  rF  r   r   r   <module>rh     s  
 
   	  0$	   X $"$
 !++J7 ))(3 %00> %00> 
 qs	r ""$ N< <~v#- v#r)z )X%D F9x zz%  I> Ir   