a \t b

") '

a b

' >>> normalize_html("

a \t\nb

") '

a b

' * Whitespace surrounding block-level tags is removed. >>> normalize_html("

a b

") '

a b

' >>> normalize_html("

a b

") '

a b

' >>> normalize_html("

a b

") '

a b

' >>> normalize_html("\n\t

\n\t\ta b\t\t

\n\t") '

a b

' >>> normalize_html("a b ") 'a b ' * Self-closing tags are converted to open tags. >>> normalize_html("
") '
' * Attributes are sorted and lowercased. >>> normalize_html('x') 'x' * References are converted to unicode, except that '<', '>', '&', and '"' are rendered using entities. >>> normalize_html("∀&><"") '\u2200&><"' z'(\|\<[^>]*\>|[^<]+)rNézs   i