ó Ž÷JUc@sŽddlmZmZddlmZddlZddlZddlZddddgZej dƒZ d efd „ƒYZ d „Z dS( iÿÿÿÿ(t HTMLParsertHTMLParseError(tname2codepointNtaltthreftsrcttitles\s+t MyHTMLParsercBs}eZd„Zd„Zd„Zd„Zd„Zd„Zd„Zd„Z d„Z d „Z d „Z d „Z d „ZRS( cCs5tj|ƒd|_t|_d|_d|_dS(Ntstarttagt(Rt__init__tlasttFalsetin_pretoutputtlast_tag(tself((sS/Users/gjtorikian/Development/commonmarker/ext/commonmarker/cmark/test/normalize.pyR s     cCsÚ|jdkp|jdk}|o3|j|jƒ}|r]|jdkr]|jdƒ}ntjd|ƒ}|r¾|j r¾|jdkr|jƒ}q¾|jdkr¾|jƒ}q¾n|j|7_d|_dS(NtendtagRtbrs t tdata( R t is_block_tagRtlstript whitespace_retsubR tstripR(RRt after_tagtafter_block_tag((sS/Users/gjtorikian/Development/commonmarker/ext/commonmarker/cmark/test/normalize.pyt handle_datascCsi|dkrt|_n$|j|ƒr<|jjƒ|_n|jd|d7_||_d|_dS(NtpresR(R R RRtrstripRR (Rttag((sS/Users/gjtorikian/Development/commonmarker/ext/commonmarker/cmark/test/normalize.pyt handle_endtag s   cCsæ|dkrt|_n|j|ƒr<|jjƒ|_n|jd|7_|rÁ|jƒx_|D]T\}}|jd|7_|dkrf|jd tj|dtƒd7_qfqfWn|jd7_||_ d|_ dS( NRttcomment(RR (RR((sS/Users/gjtorikian/Development/commonmarker/ext/commonmarker/cmark/test/normalize.pythandle_comment?scCs$|jd|d7_d|_dS(Ns((sS/Users/gjtorikian/Development/commonmarker/ext/commonmarker/cmark/test/normalize.pythandle_charrefRs  cCs©|dkr|jd7_n‡|dkr<|jd7_ni|dkrZ|jd7_nK|dkrx|jd7_n-|dkr–|j|7_n|j|7_dS( NR"s<Rs>R7s&R$s"(RR((RR>tfallback((sS/Users/gjtorikian/Development/commonmarker/ext/commonmarker/cmark/test/normalize.pyR<]s     c3Cs |d3kS(4Ntarticletheadertasidethgroupt blockquotethrtiframetbodytlitmaptbuttontobjecttcanvastoltcaptionRtcoltptcolgroupRtddtprogresstdivtsectiontdlttablettdtdtttbodytembedttextareatfieldsetttfoott figcaptiontthtfigurettheadtfooterttrtformtulth1th2th3th4th5th6tvideotscripttstyle(2RFsheaderRHRIRJshrRLRMRNsmapRPsobjectRRRSRTsoutputRURVRWspresddRYsdivssectionR\stableR^R_R`RaRbRcRdResthRgRhRistrRkRlRmRnRoRpRqRrRsRtRu((RR ((sS/Users/gjtorikian/Development/commonmarker/ext/commonmarker/cmark/test/normalize.pyRks(t__name__t __module__R RR!R.R/R1R3R4R6R?RDR<R(((sS/Users/gjtorikian/Development/commonmarker/ext/commonmarker/cmark/test/normalize.pyR s          cCsÊtjdƒ}y…tƒ}xdtj||ƒD]P}|jdƒd dkrh|j|jdƒ7_q.|j|jdƒƒq.W|jƒ|jSWn/tk rÅ}t j j d|j dƒ|SXdS(s  Return normalized form of HTML which ignores insignificant output differences: Multiple inner whitespaces are collapsed to a single space (except in pre tags): >>> normalize_html("

a \t b

") '

a b

' >>> normalize_html("

a \t\nb

") '

a b

' * Whitespace surrounding block-level tags is removed. >>> normalize_html("

a b

") '

a b

' >>> normalize_html("

a b

") '

a b

' >>> normalize_html("

a b

") '

a b

' >>> normalize_html("\n\t

\n\t\ta b\t\t

\n\t") '

a b

' >>> normalize_html("a b ") 'a b ' * Self-closing tags are converted to open tags. >>> normalize_html("
") '
' * Attributes are sorted and lowercased. >>> normalize_html('x') 'x' * References are converted to unicode, except that '<', '>', '&', and '"' are rendered using entities. >>> normalize_html("∀&><"") '\u2200&><"' s'(\|\<[^>]*\>|[^<]+)iiss   h