module Rouge module Lexers class HTML < RegexLexer tag 'html' filenames '*.htm', '*.html', '*.xhtml', '*.xslt' mimetypes 'text/html', 'application/xhtml+xml' def self.analyze_text(text) return 1 if text.doctype?(/\bhtml\b/i) return 1 if text =~ /<\s*html\b/ end state :root do rule /[^<&]+/m, 'Text' rule /&\S*?;/, 'Name.Entity' rule //i, 'Comment.Preproc' rule //m, 'Comment.Preproc' rule //, 'Comment', :pop! rule /-/, 'Comment' end state :tag do rule /\s+/m, 'Text' rule /[a-zA-Z0-9_:-]+\s*=/m, 'Name.Attribute', :attr rule /[a-zA-Z0-9_:-]+/, 'Name.Attribute' rule %r(/?\s*>)m, 'Name.Tag', :pop! end state :attr do # TODO: are backslash escapes valid here? rule /"/ do token 'Literal.String' pop!; push :dq end rule /'/ do token 'Literal.String' pop!; push :sq end rule /[^\s>]+/, 'Literal.String', :pop! end state :dq do rule /"/, 'Literal.String', :pop! rule /[^"]+/, 'Literal.String' end state :sq do rule /'/, 'Literal.String', :pop! rule /[^']+/, 'Literal.String' end state :script_content do rule %r(<\s*/\s*script\s*>)m, 'Name.Tag', :pop! rule %r(.*?(?=<\s*/\s*script\s*>))m do delegate Javascript end end state :style_content do rule %r(<\s*/\s*style\s*>)m, 'Name.Tag', :pop! rule %r(.*(?=<\s*/\s*style\s*>))m do delegate CSS end end end end end