%%{ machine hpricot_common; # # HTML tokens # (a blatant rip from HTree) # newline = '\n' @{curline += 1;} ; NameChar = [\-A-Za-z0-9._:?] ; Name = [A-Za-z_:] NameChar* ; StartComment = "" ; StartCdata = "" ; NameCap = Name >_tag %tag; NameAttr = NameChar+ >_akey %akey ; Q1Char = [^'] ; Q1Attr = Q1Char* >_aval %aval ; Q2Char = [^"] ; Q2Attr = Q2Char* >_aval %aval ; UnqAttr = ( space >_aval | [^ \t\r\n<>"'] >_aval [^ \t\r\n<>]* %aunq ) ; Nmtoken = NameChar+ >_akey %akey ; Attr = NameAttr space* "=" space* ('"' Q2Attr '"' | "'" Q1Attr "'" | UnqAttr space+ ) space* ; AttrEnd = ( NameAttr space* "=" space* UnqAttr? | Nmtoken >new_attr %save_attr ) ; AttrSet = ( Attr >new_attr %save_attr | Nmtoken >new_attr space+ %save_attr ) ; StartTag = "<" NameCap space+ AttrSet* (AttrEnd >new_attr %save_attr)? ">" | "<" NameCap ">"; EmptyTag = "<" NameCap space+ AttrSet* (AttrEnd >new_attr %save_attr)? "/>" | "<" NameCap "/>" ; EndTag = "" ; XmlVersionNum = [a-zA-Z0-9_.:\-]+ >_aval %xmlver ; XmlVersionInfo = space+ "version" space* "=" space* ("'" XmlVersionNum "'" | '"' XmlVersionNum '"' ) ; XmlEncName = [A-Za-z] >_aval [A-Za-z0-9._\-]* %xmlenc ; XmlEncodingDecl = space+ "encoding" space* "=" space* ("'" XmlEncName "'" | '"' XmlEncName '"' ) ; XmlYesNo = ("yes" | "no") >_aval %xmlsd ; XmlSDDecl = space+ "standalone" space* "=" space* ("'" XmlYesNo "'" | '"' XmlYesNo '"') ; XmlDecl = "" ; SystemLiteral = '"' [^"]* >_aval %sysid '"' | "'" [^']* >_aval %sysid "'" ; PubidLiteral = '"' [\t a-zA-Z0-9\-'()+,./:=?;!*\#@$_%]* >_aval %pubid '"' | "'" [\t a-zA-Z0-9\-'()+,./:=?;!*\#@$_%]* >_aval %pubid "'" ; ExternalID = ( "SYSTEM" | "PUBLIC" space+ PubidLiteral ) (space+ SystemLiteral)? ; DocType = "" ; StartXmlProcIns = "{ TEXT_PASS(); } space+ ; EndXmlProcIns = "?"? ">" ; html_comment := |* EndComment @{ EBLK(comment, 3); fgoto main; }; any | newline { TEXT_PASS(); }; *|; html_cdata := |* EndCdata @{ EBLK(cdata, 3); fgoto main; }; any | newline { TEXT_PASS(); }; *|; html_procins := |* EndXmlProcIns @{ EBLK(procins, 2); fgoto main; }; any | newline { TEXT_PASS(); }; *|; main := |* XmlDecl >newEle { ELE(xmldecl); }; DocType >newEle { ELE(doctype); }; StartXmlProcIns >newEle { fgoto html_procins; }; StartTag >newEle { ELE(stag); }; EndTag >newEle { ELE(etag); }; EmptyTag >newEle { ELE(emptytag); }; StartComment >newEle { fgoto html_comment; }; StartCdata >newEle { fgoto html_cdata; }; any | newline { TEXT_PASS(); }; *|; }%%;