gumbo-parser/src/tokenizer_states.h in nokogumbo-2.0.0.pre.alpha vs gumbo-parser/src/tokenizer_states.h in nokogumbo-2.0.0
- old
+ new
@@ -12,76 +12,328 @@
// clutter the tokenizer code with lots of precise error messages.
// The ordering of this enum is also used to build the dispatch table for the
// tokenizer state machine, so if it is changed, be sure to update that too.
typedef enum {
+ // 12.2.5.1 Data state
+ // https://html.spec.whatwg.org/multipage/parsing.html#data-state
GUMBO_LEX_DATA,
- GUMBO_LEX_CHAR_REF_IN_DATA,
+
+ // 12.2.5.2 RCDATA state
+ // https://html.spec.whatwg.org/multipage/parsing.html#rcdata-state
GUMBO_LEX_RCDATA,
- GUMBO_LEX_CHAR_REF_IN_RCDATA,
+
+ // 12.2.5.3 RAWTEXT state
+ // https://html.spec.whatwg.org/multipage/parsing.html#rawtext-state<Paste>
GUMBO_LEX_RAWTEXT,
- GUMBO_LEX_SCRIPT,
+
+ // 12.2.5.4 Script data state
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-state
+ GUMBO_LEX_SCRIPT_DATA,
+
+ // 12.2.5.5 PLAINTEXT state
+ // https://html.spec.whatwg.org/multipage/parsing.html#plaintext-state
GUMBO_LEX_PLAINTEXT,
+
+ // 12.2.5.6 Tag open state
+ // https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
GUMBO_LEX_TAG_OPEN,
+
+ // 12.2.5.7 End tag open state
+ // https://html.spec.whatwg.org/multipage/parsing.html#end-tag-open-state
GUMBO_LEX_END_TAG_OPEN,
+
+ // 12.2.5.8 Tag name state
+ // https://html.spec.whatwg.org/multipage/parsing.html#tag-name-state
GUMBO_LEX_TAG_NAME,
+
+ // 12.2.5.9 RCDATA less-than sign state
+ // https://html.spec.whatwg.org/multipage/parsing.html#rcdata-less-than-sign-state
GUMBO_LEX_RCDATA_LT,
+
+ // 12.2.5.10 RCDATA end tag open state
+ // https://html.spec.whatwg.org/multipage/parsing.html#rcdata-end-tag-open-state
GUMBO_LEX_RCDATA_END_TAG_OPEN,
+
+ // 12.2.5.11 RCDATA end tag name state
+ // https://html.spec.whatwg.org/multipage/parsing.html#rcdata-end-tag-name-state
GUMBO_LEX_RCDATA_END_TAG_NAME,
+
+ // 12.2.5.12 RAWTEXT less-than sign state
+ // https://html.spec.whatwg.org/multipage/parsing.html#rawtext-less-than-sign-state
GUMBO_LEX_RAWTEXT_LT,
+
+ // 12.2.5.13 RAWTEXT end tag open state
+ // https://html.spec.whatwg.org/multipage/parsing.html#rawtext-end-tag-open-state
GUMBO_LEX_RAWTEXT_END_TAG_OPEN,
+
+ // 12.2.5.14 RAWTEXT end tag name state
+ // https://html.spec.whatwg.org/multipage/parsing.html#rawtext-end-tag-name-state
GUMBO_LEX_RAWTEXT_END_TAG_NAME,
- GUMBO_LEX_SCRIPT_LT,
- GUMBO_LEX_SCRIPT_END_TAG_OPEN,
- GUMBO_LEX_SCRIPT_END_TAG_NAME,
- GUMBO_LEX_SCRIPT_ESCAPED_START,
- GUMBO_LEX_SCRIPT_ESCAPED_START_DASH,
- GUMBO_LEX_SCRIPT_ESCAPED,
- GUMBO_LEX_SCRIPT_ESCAPED_DASH,
- GUMBO_LEX_SCRIPT_ESCAPED_DASH_DASH,
- GUMBO_LEX_SCRIPT_ESCAPED_LT,
- GUMBO_LEX_SCRIPT_ESCAPED_END_TAG_OPEN,
- GUMBO_LEX_SCRIPT_ESCAPED_END_TAG_NAME,
- GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_START,
- GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED,
- GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_DASH,
- GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_DASH_DASH,
- GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_LT,
- GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_END,
+
+ // 12.2.5.15 Script data less-than sign state
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-less-than-sign-state
+ GUMBO_LEX_SCRIPT_DATA_LT,
+
+ // 12.2.5.16 Script data end tag open state
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-end-tag-open-state
+ GUMBO_LEX_SCRIPT_DATA_END_TAG_OPEN,
+
+ // 12.2.5.17 Script data end tag name state
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-end-tag-name-state
+ GUMBO_LEX_SCRIPT_DATA_END_TAG_NAME,
+
+ // 12.2.5.18 Script data escape start state
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-escape-start-state
+ GUMBO_LEX_SCRIPT_DATA_ESCAPED_START,
+
+ // 12.2.5.19 Script data escape start dash state
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-escape-start-dash-state
+ GUMBO_LEX_SCRIPT_DATA_ESCAPED_START_DASH,
+
+ // 12.2.5.20 Script data escaped state
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-state
+ GUMBO_LEX_SCRIPT_DATA_ESCAPED,
+
+ // 12.2.5.21 Script data escaped dash state
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-dash-state
+ GUMBO_LEX_SCRIPT_DATA_ESCAPED_DASH,
+
+ // 12.2.5.22 Script data escaped dash dash state
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-dash-dash-state
+ GUMBO_LEX_SCRIPT_DATA_ESCAPED_DASH_DASH,
+
+ // 12.2.5.23 Script data escaped less than sign state
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-less-than-sign-state
+ GUMBO_LEX_SCRIPT_DATA_ESCAPED_LT,
+
+ // 12.2.5.24 Script data escaped end tag open state
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-end-tag-open-state
+ GUMBO_LEX_SCRIPT_DATA_ESCAPED_END_TAG_OPEN,
+
+ // 12.2.5.25 Script data escaped end tag name state
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-end-tag-name-state
+ GUMBO_LEX_SCRIPT_DATA_ESCAPED_END_TAG_NAME,
+
+ // 12.2.5.26 Script data double escape start state
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escape-start-state
+ GUMBO_LEX_SCRIPT_DATA_DOUBLE_ESCAPED_START,
+
+ // 12.2.5.27 Script data double escaped state
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-state
+ GUMBO_LEX_SCRIPT_DATA_DOUBLE_ESCAPED,
+
+ // 12.2.5.28 Script data double escaped dash state
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-dash-state
+ GUMBO_LEX_SCRIPT_DATA_DOUBLE_ESCAPED_DASH,
+
+ // 12.2.5.29 Script data double escaped dash dash state
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-dash-dash-state
+ GUMBO_LEX_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH,
+
+ // 12.2.5.30 Script data double escaped less-than sign state
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-less-than-sign-state
+ GUMBO_LEX_SCRIPT_DATA_DOUBLE_ESCAPED_LT,
+
+ // 12.2.5.31 Script data double escape end state (XXX: spec bug with the
+ // name?)
+ // https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escape-end-state
+ GUMBO_LEX_SCRIPT_DATA_DOUBLE_ESCAPED_END,
+
+ // 12.2.5.32 Before attribute name state
+ // https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
GUMBO_LEX_BEFORE_ATTR_NAME,
+
+ // 12.2.5.33 Attributet name state
+ // https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state
GUMBO_LEX_ATTR_NAME,
+
+ // 12.2.5.34 After attribute name state
+ // https://html.spec.whatwg.org/multipage/parsing.html#after-attribute-name-state
GUMBO_LEX_AFTER_ATTR_NAME,
+
+ // 12.2.5.35 Before attribute value state
+ // https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-value-state
GUMBO_LEX_BEFORE_ATTR_VALUE,
+
+ // 12.2.5.36 Attribute value (double-quoted) state
+ // https://html.spec.whatwg.org/multipage/parsing.html#attribute-value-(double-quoted)-state
GUMBO_LEX_ATTR_VALUE_DOUBLE_QUOTED,
+
+ // 12.2.5.37 Attribute value (single-quoted) state
+ // https://html.spec.whatwg.org/multipage/parsing.html#attribute-value-(single-quoted)-state
GUMBO_LEX_ATTR_VALUE_SINGLE_QUOTED,
+
+ // 12.2.5.38 Attribute value (unquoted) state
+ // https://html.spec.whatwg.org/multipage/parsing.html#attribute-value-(unquoted)-state
GUMBO_LEX_ATTR_VALUE_UNQUOTED,
- GUMBO_LEX_CHAR_REF_IN_ATTR_VALUE,
+
+ // 12.2.5.39 After attribute value (quoted) state
+ // https://html.spec.whatwg.org/multipage/parsing.html#after-attribute-value-(quoted)-state
GUMBO_LEX_AFTER_ATTR_VALUE_QUOTED,
+
+ // 12.2.5.40 Self-closing start tag state
+ // https://html.spec.whatwg.org/multipage/parsing.html#self-closing-start-tag-state
GUMBO_LEX_SELF_CLOSING_START_TAG,
+
+ // 12.2.5.41 Bogus comment state
+ // https://html.spec.whatwg.org/multipage/parsing.html#bogus-comment-state
GUMBO_LEX_BOGUS_COMMENT,
- GUMBO_LEX_MARKUP_DECLARATION,
+
+ // 12.2.5.42 Markup declaration open state
+ // https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
+ GUMBO_LEX_MARKUP_DECLARATION_OPEN,
+
+ // 12.2.5.43 Comment start state
+ // https://html.spec.whatwg.org/multipage/parsing.html#comment-start-state
GUMBO_LEX_COMMENT_START,
+
+ // 12.2.5.44 Comment start dash state
+ // https://html.spec.whatwg.org/multipage/parsing.html#comment-start-dash-state
GUMBO_LEX_COMMENT_START_DASH,
+
+ // 12.2.5.45 Comment state
+ // https://html.spec.whatwg.org/multipage/parsing.html#comment-state
GUMBO_LEX_COMMENT,
+
+ // 12.2.5.46 Comment less-than sign state
+ // https://html.spec.whatwg.org/multipage/parsing.html#comment-less-than-sign-state
+ GUMBO_LEX_COMMENT_LT,
+
+ // 12.2.5.47 Comment less-than sign bang state
+ // https://html.spec.whatwg.org/multipage/parsing.html#comment-less-than-sign-bang-state
+ GUMBO_LEX_COMMENT_LT_BANG,
+
+ // 12.2.5.48 Comment less-than sign bang dash state
+ // https://html.spec.whatwg.org/multipage/parsing.html#comment-less-than-sign-bang-dash-state
+ GUMBO_LEX_COMMENT_LT_BANG_DASH,
+
+ // 12.2.5.49 Comment less-than sign bang dash dash state
+ // https://html.spec.whatwg.org/multipage/parsing.html#comment-less-than-sign-bang-dash-dash-state
+ GUMBO_LEX_COMMENT_LT_BANG_DASH_DASH,
+
+ // 12.2.5.50 Comment end dash state
+ // https://html.spec.whatwg.org/multipage/parsing.html#comment-end-dash-state
GUMBO_LEX_COMMENT_END_DASH,
+
+ // 12.2.5.51 Comment end state
+ // https://html.spec.whatwg.org/multipage/parsing.html#comment-end-state
GUMBO_LEX_COMMENT_END,
+
+ // 12.2.5.52 Comment end bang state
+ // https://html.spec.whatwg.org/multipage/parsing.html#comment-end-bang-state
GUMBO_LEX_COMMENT_END_BANG,
+
+ // 12.2.5.53 DOCTYPE state
+ // https://html.spec.whatwg.org/multipage/parsing.html#doctype-state
GUMBO_LEX_DOCTYPE,
+
+ // 12.2.5.54 Before DOCTYPE name state
+ // https://html.spec.whatwg.org/multipage/parsing.html#before-doctype-name-state
GUMBO_LEX_BEFORE_DOCTYPE_NAME,
+
+ // 12.2.5.55 DOCTYPE name state
+ // https://html.spec.whatwg.org/multipage/parsing.html#doctype-name-state
GUMBO_LEX_DOCTYPE_NAME,
+
+ // 12.2.5.56 After DOCTYPE name state
+ // https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-name-state
GUMBO_LEX_AFTER_DOCTYPE_NAME,
+
+ // 12.2.5.57 After DOCTYPE public keyword state
+ // https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-public-keyword-state
GUMBO_LEX_AFTER_DOCTYPE_PUBLIC_KEYWORD,
+
+ // 12.2.5.58 Before DOCTYPE public identifier state
+ // https://html.spec.whatwg.org/multipage/parsing.html#before-doctype-public-identifier-state
GUMBO_LEX_BEFORE_DOCTYPE_PUBLIC_ID,
+
+ // 12.2.5.59 DOCTYPE public identifier (double-quoted) state
+ // https://html.spec.whatwg.org/multipage/parsing.html#doctype-public-identifier-(double-quoted)-state
GUMBO_LEX_DOCTYPE_PUBLIC_ID_DOUBLE_QUOTED,
+
+ // 12.2.5.60 DOCTYPE public identifier (single-quoted) state
+ // https://html.spec.whatwg.org/multipage/parsing.html#doctype-public-identifier-(single-quoted)-state
GUMBO_LEX_DOCTYPE_PUBLIC_ID_SINGLE_QUOTED,
+
+ // 12.2.5.61 After DOCTYPE public identifier state
+ // https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-public-identifier-state
GUMBO_LEX_AFTER_DOCTYPE_PUBLIC_ID,
+
+ // 12.2.5.62 Between DOCTYPE public and system identifiers state
+ // https://html.spec.whatwg.org/multipage/parsing.html#between-doctype-public-and-system-identifiers-state
GUMBO_LEX_BETWEEN_DOCTYPE_PUBLIC_SYSTEM_ID,
+
+ // 12.2.5.63 After DOCTYPE system keyword state
+ // https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-system-keyword-state
GUMBO_LEX_AFTER_DOCTYPE_SYSTEM_KEYWORD,
+
+ // 12.2.5.64 Before DOCTYPE system identifier state
+ // https://html.spec.whatwg.org/multipage/parsing.html#before-doctype-system-identifier-state
GUMBO_LEX_BEFORE_DOCTYPE_SYSTEM_ID,
+
+ // 12.2.5.65 DOCTYPE system identifier (double-quoted) state
+ // https://html.spec.whatwg.org/multipage/parsing.html#doctype-system-identifier-(double-quoted)-state
GUMBO_LEX_DOCTYPE_SYSTEM_ID_DOUBLE_QUOTED,
+
+ // 12.2.5.66 DOCTYPE system identifier (single-quoted) state
+ // https://html.spec.whatwg.org/multipage/parsing.html#doctype-system-identifier-(single-quoted)-state
GUMBO_LEX_DOCTYPE_SYSTEM_ID_SINGLE_QUOTED,
+
+ // 12.2.5.67 After DOCTYPE system identifier state
+ // https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-system-identifier-state
GUMBO_LEX_AFTER_DOCTYPE_SYSTEM_ID,
+
+ // 12.2.5.68 Bogus DOCTYPE state
+ // https://html.spec.whatwg.org/multipage/parsing.html#bogus-doctype-state
GUMBO_LEX_BOGUS_DOCTYPE,
- GUMBO_LEX_CDATA
+
+ // 12.2.5.69 CDATA section state
+ // https://html.spec.whatwg.org/multipage/parsing.html#cdata-section-state
+ GUMBO_LEX_CDATA_SECTION,
+
+ // 12.2.5.70 CDATA section bracket state
+ // https://html.spec.whatwg.org/multipage/parsing.html#cdata-section-bracket-state
+ GUMBO_LEX_CDATA_SECTION_BRACKET,
+
+ // 12.2.5.71 CDATA section end state
+ // https://html.spec.whatwg.org/multipage/parsing.html#cdata-section-end-state
+ GUMBO_LEX_CDATA_SECTION_END,
+
+ // 12.2.5.72 Character reference state
+ // https://html.spec.whatwg.org/multipage/parsing.html#character-reference-state
+ GUMBO_LEX_CHARACTER_REFERENCE,
+
+ // 12.2.5.73 Named character reference state
+ // https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
+ GUMBO_LEX_NAMED_CHARACTER_REFERENCE,
+
+ // 12.2.5.74 Ambiguous ampersand state
+ // https://html.spec.whatwg.org/multipage/parsing.html#ambiguous-ampersand-state
+ GUMBO_LEX_AMBIGUOUS_AMPERSAND,
+
+ // 12.2.5.75 Numeric character reference state
+ // https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-state
+ GUMBO_LEX_NUMERIC_CHARACTER_REFERENCE,
+
+ // 12.2.5.76 Hexadecimal character reference start state
+ // https://html.spec.whatwg.org/multipage/parsing.html#hexadecimal-character-reference-start-state
+ GUMBO_LEX_HEXADECIMAL_CHARACTER_REFERENCE_START,
+
+ // 12.2.5.77 Decimal character reference start state
+ // https://html.spec.whatwg.org/multipage/parsing.html#decimal-character-reference-start-state
+ GUMBO_LEX_DECIMAL_CHARACTER_REFERENCE_START,
+
+ // 12.2.5.78 Hexadecimal character reference state
+ // https://html.spec.whatwg.org/multipage/parsing.html#hexadecimal-character-reference-state
+ GUMBO_LEX_HEXADECIMAL_CHARACTER_REFERENCE,
+
+ // 12.2.5.79 Decimal character reference state
+ // https://html.spec.whatwg.org/multipage/parsing.html#decimal-character-reference-state
+ GUMBO_LEX_DECIMAL_CHARACTER_REFERENCE,
+
+ // 12.2.5.80 Numeric character reference end state
+ // https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
+ GUMBO_LEX_NUMERIC_CHARACTER_REFERENCE_END
} GumboTokenizerEnum;
#endif // GUMBO_TOKENIZER_STATES_H_