// situationally print newlines to make the generated html // easier to read #define PRETTY_PRINT 0 #include #include #include #include #include #include typedef struct StateMachine { size_t top; int cs; int act; const char * p; const char * pb; const char * pe; const char * eof; const char * ts; const char * te; const char * a1; const char * a2; const char * b1; const char * b2; bool f_inline; bool f_strip; bool list_mode; bool header_mode; GString * output; GArray * stack; GQueue * dstack; int list_nest; int d; int b; int quote; } StateMachine; static const size_t MAX_STACK_DEPTH = 512; static const int BLOCK_P = 1; static const int INLINE_SPOILER = 2; static const int BLOCK_SPOILER = 3; static const int BLOCK_QUOTE = 4; static const int BLOCK_EXPAND = 5; static const int BLOCK_NODTEXT = 6; static const int BLOCK_CODE = 7; static const int BLOCK_TD = 8; static const int INLINE_NODTEXT = 9; static const int INLINE_B = 10; static const int INLINE_I = 11; static const int INLINE_U = 12; static const int INLINE_S = 13; static const int INLINE_TN = 14; static const int BLOCK_TN = 15; static const int BLOCK_TABLE = 16; static const int BLOCK_THEAD = 17; static const int BLOCK_TBODY = 18; static const int BLOCK_TR = 19; static const int BLOCK_UL = 20; static const int BLOCK_LI = 21; static const int BLOCK_TH = 22; static const int BLOCK_H1 = 23; static const int BLOCK_H2 = 24; static const int BLOCK_H3 = 25; static const int BLOCK_H4 = 26; static const int BLOCK_H5 = 27; static const int BLOCK_H6 = 28; %%{ machine dtext; access sm->; variable p sm->p; variable pe sm->pe; variable eof sm->eof; variable top sm->top; variable ts sm->ts; variable te sm->te; variable act sm->act; variable stack ((int *)sm->stack->data); prepush { size_t len = sm->stack->len; if (len > MAX_STACK_DEPTH) { free_machine(sm); rb_raise(rb_eSyntaxError, "too many nested elements"); } if (sm->top >= len) { sm->stack = g_array_set_size(sm->stack, len + 16); } } action mark_a1 { sm->a1 = sm->p; } action mark_a2 { sm->a2 = sm->p; } action mark_b1 { sm->b1 = sm->p; } action mark_b2 { sm->b2 = sm->p; } newline = '\r\n' | '\n'; nonnewline = any - (newline | '\0' | '\r'); nonquote = ^'"'; nonbracket = ^']'; nonpipe = ^'|'; nonpipebracket = nonpipe & nonbracket; noncurly = ^'}'; mention = '@' graph+ >mark_a1 %mark_a2; url = 'http' 's'? '://' graph+; internal_url = '/' graph+; basic_textile_link = '"' nonquote+ >mark_a1 '"' >mark_a2 ':' (url | internal_url) >mark_b1 %mark_b2; bracketed_textile_link = '"' nonquote+ >mark_a1 '"' >mark_a2 ':[' (url | internal_url) >mark_b1 %mark_b2 :>> ']'; basic_wiki_link = '[[' nonpipebracket+ >mark_a1 %mark_a2 ']]'; aliased_wiki_link = '[[' nonpipebracket+ >mark_a1 %mark_a2 '|' nonbracket+ >mark_b1 %mark_b2 ']]'; post_link = '{{' noncurly+ >mark_a1 %mark_a2 '}}'; post_id = 'post #'i digit+ >mark_a1 %mark_a2; forum_post_id = 'forum #'i digit+ >mark_a1 %mark_a2; forum_topic_id = 'topic #'i digit+ >mark_a1 %mark_a2; forum_topic_paged_id = 'topic #'i digit+ >mark_a1 %mark_a2 '/p' digit+ >mark_b1 %mark_b2; comment_id = 'comment #'i digit+ >mark_a1 %mark_a2; pool_id = 'pool #'i digit+ >mark_a1 %mark_a2; user_id = 'user #'i digit+ >mark_a1 %mark_a2; artist_id = 'artist #'i digit+ >mark_a1 %mark_a2; github_issue_id = 'issue #'i digit+ >mark_a1 %mark_a2; pixiv_id = 'pixiv #'i digit+ >mark_a1 %mark_a2; pixiv_paged_id = 'pixiv #'i digit+ >mark_a1 %mark_a2 '/p' digit+ >mark_b1 %mark_b2; ws = ' ' | '\t'; nonperiod = graph - ('.' | '"'); header = 'h'i [123456] >mark_a1 %mark_a2 '.' ws*; header_with_id = 'h'i [123456] >mark_a1 %mark_a2 '#' nonperiod+ >mark_b1 %mark_b2 '.' ws*; aliased_expand = '[expand='i (nonbracket+ >mark_a1 %mark_a2) ']'; list_item = '*'+ >mark_a1 %mark_a2 ws+ nonnewline+ >mark_b1 %mark_b2; inline := |* post_id => { append(sm, true, "a1, sm->a2 - 1); append(sm, true, "\">"); append(sm, false, "post #"); append_segment(sm, false, sm->a1, sm->a2 - 1); append(sm, true, ""); }; forum_post_id => { append(sm, true, "a1, sm->a2 - 1); append(sm, true, "\">"); append(sm, false, "forum #"); append_segment(sm, false, sm->a1, sm->a2 - 1); append(sm, true, ""); }; forum_topic_id => { append(sm, true, "a1, sm->a2 - 1); append(sm, true, "\">"); append(sm, false, "topic #"); append_segment(sm, false, sm->a1, sm->a2 - 1); append(sm, true, ""); }; forum_topic_paged_id => { append(sm, true, "a1, sm->a2 - 1); append(sm, true, "?page="); append_segment(sm, true, sm->b1, sm->b2 - 1); append(sm, true, "\">"); append(sm, false, "topic #"); append_segment(sm, false, sm->a1, sm->a2 - 1); append(sm, false, "/p"); append_segment(sm, false, sm->b1, sm->b2 - 1); append(sm, true, ""); }; comment_id => { append(sm, true, "a1, sm->a2 - 1); append(sm, true, "\">"); append(sm, false, "comment #"); append_segment(sm, false, sm->a1, sm->a2 - 1); append(sm, true, ""); }; pool_id => { append(sm, true, "a1, sm->a2 - 1); append(sm, true, "\">"); append(sm, false, "pool #"); append_segment(sm, false, sm->a1, sm->a2 - 1); append(sm, true, ""); }; user_id => { append(sm, true, "a1, sm->a2 - 1); append(sm, true, "\">"); append(sm, false, "user #"); append_segment(sm, false, sm->a1, sm->a2 - 1); append(sm, true, ""); }; artist_id => { append(sm, true, "a1, sm->a2 - 1); append(sm, true, "\">"); append(sm, false, "artist #"); append_segment(sm, false, sm->a1, sm->a2 - 1); append(sm, true, ""); }; github_issue_id => { append(sm, true, "a1, sm->a2 - 1); append(sm, true, "\">"); append(sm, false, "issue #"); append_segment(sm, false, sm->a1, sm->a2 - 1); append(sm, true, ""); }; pixiv_id => { append(sm, true, "a1, sm->a2 - 1); append(sm, true, "\">"); append(sm, false, "pixiv #"); append_segment(sm, false, sm->a1, sm->a2 - 1); append(sm, true, ""); }; pixiv_paged_id => { append(sm, true, "a1, sm->a2 - 1); append(sm, true, "&page="); append_segment(sm, true, sm->b1, sm->b2 - 1); append(sm, true, "\">"); append(sm, false, "pixiv #"); append_segment(sm, false, sm->a1, sm->a2 - 1); append(sm, false, "/p"); append_segment(sm, false, sm->b1, sm->b2 - 1); append(sm, true, ""); }; post_link => { append(sm, true, "a1, sm->a2 - 1); append(sm, true, "\">"); append_segment_html_escaped(sm, sm->a1, sm->a2 - 1); append(sm, true, ""); }; basic_wiki_link => { GString * segment = g_string_new_len(sm->a1, sm->a2 - sm->a1); GString * lowercase_segment = NULL; underscore_string(segment->str, segment->len); if (g_utf8_validate(segment->str, -1, NULL)) { lowercase_segment = g_string_new(g_utf8_strdown(segment->str, -1)); } else { lowercase_segment = g_string_new(g_ascii_strdown(segment->str, -1)); } append(sm, true, "str, lowercase_segment->str + lowercase_segment->len - 1); append(sm, true, "\">"); append_segment_html_escaped(sm, sm->a1, sm->a2 - 1); append(sm, true, ""); g_string_free(lowercase_segment, TRUE); g_string_free(segment, TRUE); }; aliased_wiki_link => { GString * segment = g_string_new_len(sm->a1, sm->a2 - sm->a1); GString * lowercase_segment = NULL; underscore_string(segment->str, segment->len); if (g_utf8_validate(segment->str, -1, NULL)) { lowercase_segment = g_string_new(g_utf8_strdown(segment->str, -1)); } else { lowercase_segment = g_string_new(g_ascii_strdown(segment->str, -1)); } append(sm, true, "str, lowercase_segment->str + lowercase_segment->len - 1); append(sm, true, "\">"); append_segment_html_escaped(sm, sm->b1, sm->b2 - 1); append(sm, true, ""); g_string_free(lowercase_segment, TRUE); g_string_free(segment, TRUE); }; basic_textile_link => { if (is_boundary_c(fc)) { sm->d = 2; sm->b = true; } else { sm->d = 1; sm->b = false; } append(sm, true, "b1, sm->b2 - sm->d); append(sm, true, "\">"); link_content_sm = parse_helper(sm->a1, sm->a2 - sm->a1, false, true); append(sm, true, link_content_sm->output->str); free_machine(link_content_sm); link_content_sm = NULL; append(sm, true, ""); if (sm->b) { append_c_html_escaped(sm, fc); } }; bracketed_textile_link => { append(sm, true, "b1, sm->b2 - 1); append(sm, true, "\">"); append_segment_html_escaped(sm, sm->a1, sm->a2 - 1); append(sm, true, ""); }; url => { if (is_boundary_c(fc)) { sm->b = true; sm->d = 2; } else { sm->b = false; sm->d = 1; } append(sm, true, "ts, sm->te - sm->d); append(sm, true, "\">"); append_segment_html_escaped(sm, sm->ts, sm->te - sm->d); append(sm, true, ""); if (sm->b) { append_c_html_escaped(sm, fc); } }; # probably a tag. examples include @.@ and @_@ '@' graph '@' => { append_segment_html_escaped(sm, sm->ts, sm->te - 1); }; mention => { if (is_boundary_c(fc)) { sm->b = true; sm->d = 2; } else { sm->b = false; sm->d = 1; } append(sm, true, "a1, sm->a2 - sm->d); append(sm, true, "\">"); append_c(sm, '@'); append_segment_html_escaped(sm, sm->a1, sm->a2 - sm->d); append(sm, true, ""); if (sm->b) { append_c_html_escaped(sm, fc); } }; newline list_item => { g_debug("inline list"); if (dstack_check(sm, BLOCK_LI)) { g_debug(" rewind li"); dstack_rewind(sm); } else if (dstack_check(sm, BLOCK_P)) { g_debug(" rewind p"); dstack_rewind(sm); } else if (sm->header_mode) { g_debug(" rewind header"); dstack_rewind(sm); } g_debug(" next list"); fexec sm->ts + 1; fnext list; }; '[b]'i => { dstack_push(sm, &INLINE_B); append(sm, true, ""); }; '[/b]'i => { if (dstack_check(sm, INLINE_B)) { dstack_pop(sm); append(sm, true, ""); } else { append(sm, true, "[/b]"); } }; '[i]'i => { dstack_push(sm, &INLINE_I); append(sm, true, ""); }; '[/i]'i => { if (dstack_check(sm, INLINE_I)) { dstack_pop(sm); append(sm, true, ""); } else { append(sm, true, "[/i]"); } }; '[s]'i => { dstack_push(sm, &INLINE_S); append(sm, true, ""); }; '[/s]'i => { if (dstack_check(sm, INLINE_S)) { dstack_pop(sm); append(sm, true, ""); } else { append(sm, true, "[/s]"); } }; '[u]'i => { dstack_push(sm, &INLINE_U); append(sm, true, ""); }; '[/u]'i => { if (dstack_check(sm, INLINE_U)) { dstack_pop(sm); append(sm, true, ""); } else { append(sm, true, "[/u]"); } }; '[tn]'i => { dstack_push(sm, &INLINE_TN); append(sm, true, ""); }; '[/tn]'i => { dstack_close_before_block(sm); if (dstack_check(sm, BLOCK_TN)) { dstack_pop(sm); fret; } else if (dstack_check(sm, INLINE_TN)) { dstack_pop(sm); append(sm, true, ""); } else { append_block(sm, "[/tn]"); } }; # these are block level elements that should kick us out of the inline # scanner header => { dstack_rewind(sm); fexec sm->a1 - 1; fret; }; header_with_id => { dstack_rewind(sm); fexec sm->a1 - 1; fret; }; '[quote]'i => { g_debug("inline [quote]"); dstack_close_before_block(sm); fexec sm->ts; fret; }; '[/quote]'i space* => { g_debug("inline [/quote]"); dstack_close_before_block(sm); if (dstack_check(sm, BLOCK_LI)) { dstack_close_list(sm); } if (dstack_check(sm, BLOCK_QUOTE)) { dstack_rewind(sm); fret; } else { append_block(sm, "[/quote]"); } }; '[spoiler]'i => { g_debug("inline [spoiler]"); g_debug(" push "); dstack_push(sm, &INLINE_SPOILER); append(sm, true, ""); }; '[/spoiler]'i => { g_debug("inline [/spoiler]"); dstack_close_before_block(sm); if (dstack_check(sm, INLINE_SPOILER)) { g_debug(" pop dstack"); g_debug(" print "); dstack_pop(sm); append(sm, true, ""); } else if (dstack_check(sm, BLOCK_SPOILER)) { g_debug(" pop dstack"); g_debug(" print "); g_debug(" return"); dstack_pop(sm); append_block(sm, ""); fret; } else { append_block(sm, "[/spoiler]"); } }; '[expand]'i => { g_debug("inline [expand]"); dstack_rewind(sm); fexec(sm->p - 7); fret; }; '[/expand]'i => { dstack_close_before_block(sm); if (dstack_check(sm, BLOCK_EXPAND)) { append_block(sm, ""); dstack_pop(sm); fret; } else { append_block(sm, "[/expand]"); } }; '[nodtext]'i => { dstack_push(sm, &INLINE_NODTEXT); fcall nodtext; }; '[/th]'i => { if (dstack_check(sm, BLOCK_TH)) { dstack_pop(sm); append_block(sm, ""); fret; } else { append_block(sm, "[/th]"); } }; '[/td]'i => { if (dstack_check(sm, BLOCK_TD)) { dstack_pop(sm); append_block(sm, ""); fret; } else { append_block(sm, "[/td]"); } }; '\0' => { g_debug("inline 0"); g_debug(" return"); fhold; fret; }; newline{2,} => { g_debug("inline newline2"); g_debug(" return"); if (sm->list_mode) { dstack_close_list(sm); } fexec sm->ts; fret; }; newline => { g_debug("inline newline"); if (sm->header_mode) { sm->header_mode = false; dstack_rewind(sm); fret; } else { append(sm, true, "
"); } }; '\r' => { append_c(sm, ' '); }; any => { g_debug("inline char: %c", fc); append_c_html_escaped(sm, fc); }; *|; code := |* '[/code]'i => { if (dstack_check(sm, BLOCK_CODE)) { dstack_rewind(sm); } else { append(sm, true, "[/code]"); } fret; }; '\0' => { fhold; fret; }; any => { append_c_html_escaped(sm, fc); }; *|; nodtext := |* '[/nodtext]'i => { if (dstack_check(sm, BLOCK_NODTEXT)) { dstack_pop(sm); append_block(sm, "

"); fret; } else if (dstack_check(sm, INLINE_NODTEXT)) { dstack_pop(sm); fret; } else { append(sm, true, "[/nodtext]"); } }; '\0' => { fhold; fret; }; any => { append_c_html_escaped(sm, fc); }; *|; table := |* '[thead]'i => { dstack_push(sm, &BLOCK_THEAD); append_block(sm, ""); }; '[/thead]'i => { if (dstack_check(sm, BLOCK_THEAD)) { dstack_pop(sm); append_block(sm, ""); } else { append(sm, true, "[/thead]"); } }; '[tbody]'i => { dstack_push(sm, &BLOCK_TBODY); append_block(sm, ""); }; '[/tbody]'i => { if (dstack_check(sm, BLOCK_TBODY)) { dstack_pop(sm); append_block(sm, ""); } else { append(sm, true, "[/tbody]"); } }; '[th]'i => { dstack_push(sm, &BLOCK_TH); append_block(sm, ""); fcall inline; }; '[tr]'i => { dstack_push(sm, &BLOCK_TR); append_block(sm, ""); }; '[/tr]'i => { if (dstack_check(sm, BLOCK_TR)) { dstack_pop(sm); append_block(sm, ""); } else { append(sm, true, "[/tr]"); } }; '[td]'i => { dstack_push(sm, &BLOCK_TD); append_block(sm, ""); fcall inline; }; '[/table]'i => { if (dstack_check(sm, BLOCK_TABLE)) { dstack_pop(sm); append_block(sm, ""); fret; } else { append(sm, true, "[/table]"); } }; '\0' => { fhold; fret; }; any; *|; list := |* list_item => { int prev_nest = sm->list_nest; append_closing_p_if(sm); g_debug("list start"); sm->list_mode = true; sm->list_nest = sm->a2 - sm->a1; fexec sm->b1; if (sm->list_nest > prev_nest) { int i=0; for (i=prev_nest; ilist_nest; ++i) { g_debug(" dstack push ul"); g_debug(" print
    "); append_block(sm, "
      "); dstack_push(sm, &BLOCK_UL); } } else if (sm->list_nest < prev_nest) { int i=0; for (i=sm->list_nest; i"); dstack_pop(sm); append_block(sm, "
    "); } } } append_block(sm, "
  • "); dstack_push(sm, &BLOCK_LI); g_debug(" print
  • "); g_debug(" push li"); g_debug(" call inline"); fcall inline; }; # exit list (newline{2,} | '\0') => { dstack_close_list(sm); fexec sm->ts; fret; }; newline; any => { dstack_rewind(sm); fhold; fret; }; *|; main := |* header_with_id => { char header = *sm->a1; GString * id_name = g_string_new_len(sm->b1, sm->b2 - sm->b1); if (sm->f_inline) { header = '6'; } if (!sm->f_strip) { switch (header) { case '1': dstack_push(sm, &BLOCK_H1); append_block(sm, "

    str); append_block(sm, "\">"); break; case '2': dstack_push(sm, &BLOCK_H2); append_block(sm, "

    str); append_block(sm, "\">"); break; case '3': dstack_push(sm, &BLOCK_H3); append_block(sm, "

    str); append_block(sm, "\">"); break; case '4': dstack_push(sm, &BLOCK_H4); append_block(sm, "

    str); append_block(sm, "\">"); break; case '5': dstack_push(sm, &BLOCK_H5); append_block(sm, "

    str); append_block(sm, "\">"); break; case '6': dstack_push(sm, &BLOCK_H6); append_block(sm, "
    str); append_block(sm, "\">"); break; } } sm->header_mode = true; g_string_free(id_name, false); id_name = NULL; fcall inline; }; header => { char header = *sm->a1; if (sm->f_inline) { header = '6'; } if (!sm->f_strip) { switch (header) { case '1': dstack_push(sm, &BLOCK_H1); append_block(sm, "

    "); break; case '2': dstack_push(sm, &BLOCK_H2); append_block(sm, "

    "); break; case '3': dstack_push(sm, &BLOCK_H3); append_block(sm, "

    "); break; case '4': dstack_push(sm, &BLOCK_H4); append_block(sm, "

    "); break; case '5': dstack_push(sm, &BLOCK_H5); append_block(sm, "

    "); break; case '6': dstack_push(sm, &BLOCK_H6); append_block(sm, "
    "); break; } } sm->header_mode = true; fcall inline; }; '[quote]'i space* => { g_debug("block [quote]"); g_debug(" push quote"); g_debug(" print
    "); dstack_close_before_block(sm); dstack_push(sm, &BLOCK_QUOTE); append_block(sm, "
    "); }; '[spoiler]'i space* => { g_debug("block [spoiler]"); g_debug(" push spoiler"); g_debug(" print
    "); dstack_close_before_block(sm); dstack_push(sm, &BLOCK_SPOILER); append_block(sm, "
    "); }; '[/spoiler]'i => { g_debug("block [/spoiler]"); dstack_close_before_block(sm); if (dstack_check(sm, BLOCK_SPOILER)) { g_debug(" rewind"); dstack_rewind(sm); } }; '[code]'i space* => { g_debug("block [code]"); dstack_close_before_block(sm); dstack_push(sm, &BLOCK_CODE); append_block(sm, "
    ");
        fcall code;
      };
    
      '[expand]'i space* => {
        g_debug("block [expand]");
        dstack_close_before_block(sm);
        dstack_push(sm, &BLOCK_EXPAND);
        append_block(sm, "
    "); append_block(sm, "
    "); append_block(sm, "
    "); }; aliased_expand space* => { g_debug("block [expand=]"); dstack_close_before_block(sm); dstack_push(sm, &BLOCK_EXPAND); append_block(sm, "
    "); append(sm, true, ""); append_segment_html_escaped(sm, sm->a1, sm->a2 - 1); append(sm, true, ""); append_block(sm, "
    "); append_block(sm, "
    "); }; '[nodtext]'i space* => { g_debug("block [nodtext]"); dstack_close_before_block(sm); dstack_push(sm, &BLOCK_NODTEXT); dstack_push(sm, &BLOCK_P); append_block(sm, "

    "); fcall nodtext; }; '[table]'i => { dstack_close_before_block(sm); dstack_push(sm, &BLOCK_TABLE); append_block(sm, ""); fcall table; }; '[tn]'i => { dstack_push(sm, &BLOCK_TN); append_block(sm, "

    "); fcall inline; }; list_item => { g_debug("block list"); g_debug(" call list"); sm->list_nest = 0; sm->list_mode = true; append_closing_p_if(sm); fexec sm->ts; fcall list; }; '\0' => { g_debug("block 0"); g_debug(" close dstack"); dstack_close(sm); }; newline{2,} => { g_debug("block newline2"); if (sm->header_mode) { sm->header_mode = false; dstack_rewind(sm); } else if (sm->list_mode) { dstack_close_list(sm); } else { dstack_close_before_block(sm); } }; newline => { g_debug("block newline"); }; any => { g_debug("block char: %c", fc); fhold; if (g_queue_is_empty(sm->dstack) || dstack_check(sm, BLOCK_QUOTE) || dstack_check(sm, BLOCK_SPOILER) || dstack_check(sm, BLOCK_EXPAND)) { g_debug(" push p"); g_debug(" print

    "); dstack_push(sm, &BLOCK_P); append_block(sm, "

    "); } fcall inline; }; *|; }%% %% write data; static inline void underscore_string(char * str, size_t len) { for (size_t i=0; idstack, (gpointer)element); } static inline int * dstack_pop(StateMachine * sm) { return g_queue_pop_tail(sm->dstack); } static inline int * dstack_peek(StateMachine * sm) { return g_queue_peek_tail(sm->dstack); } static inline bool dstack_search(StateMachine * sm, const int * element) { return g_queue_find(sm->dstack, (gconstpointer)element); } static inline bool dstack_check(StateMachine * sm, int expected_element) { int * top = dstack_peek(sm); return top && *top == expected_element; } static inline bool dstack_check2(StateMachine * sm, int expected_element) { int * top2 = NULL; if (sm->dstack->length < 2) { return false; } top2 = g_queue_peek_nth(sm->dstack, sm->dstack->length - 2); return top2 && *top2 == expected_element; } static inline void append(StateMachine * sm, bool is_markup, const char * s) { if (!(is_markup && sm->f_strip)) { sm->output = g_string_append(sm->output, s); } } static inline void append_c(StateMachine * sm, char s) { sm->output = g_string_append_c(sm->output, s); } static inline void append_c_html_escaped(StateMachine * sm, char s) { switch (s) { case '<': sm->output = g_string_append(sm->output, "<"); break; case '>': sm->output = g_string_append(sm->output, ">"); break; case '&': sm->output = g_string_append(sm->output, "&"); break; case '"': sm->output = g_string_append(sm->output, """); break; default: sm->output = g_string_append_c(sm->output, s); break; } } static inline void append_segment(StateMachine * sm, bool is_markup, const char * a, const char * b) { if (!(is_markup && sm->f_strip)) { sm->output = g_string_append_len(sm->output, a, b - a + 1); } } static inline void append_segment_uri_escaped(StateMachine * sm, const char * a, const char * b) { if (sm->f_strip) { return; } char * segment1 = NULL; char * segment2 = NULL; GString * segment_string = g_string_new_len(a, b - a + 1); segment1 = g_uri_escape_string(segment_string->str, NULL, TRUE); segment2 = g_markup_escape_text(segment1, -1); sm->output = g_string_append(sm->output, segment2); g_string_free(segment_string, TRUE); g_free(segment1); g_free(segment2); } static inline void append_segment_html_escaped(StateMachine * sm, const char * a, const char * b) { gchar * segment = g_markup_escape_text(a, b - a + 1); sm->output = g_string_append(sm->output, segment); g_free(segment); } static inline void append_block(StateMachine * sm, const char * s) { if (sm->f_inline) { // sm->output = g_string_append_c(sm->output, ' '); } else if (sm->f_strip) { // do nothing } else { sm->output = g_string_append(sm->output, s); } } static void append_closing_p(StateMachine * sm) { size_t i = sm->output->len; if (i > 4 && !strncmp(sm->output->str + i - 4, "
    ", 4)) { sm->output = g_string_truncate(sm->output, sm->output->len - 4); } if (i > 3 && !strncmp(sm->output->str + i - 3, "

    ", 3)) { sm->output = g_string_truncate(sm->output, sm->output->len - 3); return; } append_block(sm, "

    "); } static void append_closing_p_if(StateMachine * sm) { if (!dstack_check(sm, BLOCK_P)) { return; } dstack_pop(sm); append_closing_p(sm); } static void dstack_rewind(StateMachine * sm) { int * element = dstack_pop(sm); if (element == NULL) { return; } if (*element == BLOCK_P) { append_closing_p(sm); } else if (*element == INLINE_SPOILER) { append(sm, true, ""); } else if (*element == BLOCK_SPOILER) { append_block(sm, ""); } else if (*element == BLOCK_QUOTE) { append_block(sm, ""); } else if (*element == BLOCK_EXPAND) { append_block(sm, ""); } else if (*element == BLOCK_NODTEXT) { append_closing_p(sm); } else if (*element == BLOCK_CODE) { append_block(sm, ""); } else if (*element == BLOCK_TD) { append_block(sm, ""); } else if (*element == INLINE_NODTEXT) { } else if (*element == INLINE_B) { append(sm, true, ""); } else if (*element == INLINE_I) { append(sm, true, ""); } else if (*element == INLINE_U) { append(sm, true, ""); } else if (*element == INLINE_S) { append(sm, true, ""); } else if (*element == INLINE_TN) { append(sm, true, ""); } else if (*element == BLOCK_TN) { append_closing_p(sm); } else if (*element == BLOCK_TABLE) { append_block(sm, "
    "); } else if (*element == BLOCK_THEAD) { append_block(sm, ""); } else if (*element == BLOCK_TBODY) { append_block(sm, ""); } else if (*element == BLOCK_TR) { append_block(sm, ""); } else if (*element == BLOCK_UL) { append_block(sm, "

"); } else if (*element == BLOCK_LI) { append_block(sm, ""); } else if (*element == BLOCK_H6) { append_block(sm, ""); } else if (*element == BLOCK_H5) { append_block(sm, ""); } else if (*element == BLOCK_H4) { append_block(sm, ""); } else if (*element == BLOCK_H3) { append_block(sm, ""); } else if (*element == BLOCK_H2) { append_block(sm, ""); } else if (*element == BLOCK_H1) { append_block(sm, ""); } } static void dstack_close_before_block(StateMachine * sm) { while (1) { if (dstack_check(sm, BLOCK_P)) { dstack_pop(sm); append_closing_p(sm); } else if (dstack_check(sm, BLOCK_LI) || dstack_check(sm, BLOCK_UL)) { dstack_rewind(sm); } else { return; } } } static void dstack_close(StateMachine * sm) { while (dstack_peek(sm) != NULL) { dstack_rewind(sm); } } static void dstack_close_list(StateMachine * sm) { while (dstack_check(sm, BLOCK_LI) || dstack_check(sm, BLOCK_UL)) { dstack_rewind(sm); } sm->list_mode = false; sm->list_nest = 0; } static inline bool is_boundary_c(char c) { switch (c) { case ':': case ';': case '.': case ',': case '!': case '?': case ')': case ']': case '<': case '>': return true; } return false; } static bool print_machine(StateMachine * sm) { printf("p=%c\n", *sm->p); return true; } static void init_machine(StateMachine * sm, const char * src, size_t len) { size_t output_length = 0; sm->p = src; sm->pb = sm->p; sm->pe = sm->p + len; sm->eof = sm->pe; sm->ts = NULL; sm->te = NULL; sm->cs = 0; sm->act = 0; sm->top = 0; output_length = len; if (output_length < (INT16_MAX / 2)) { output_length *= 2; } sm->output = g_string_sized_new(output_length); sm->a1 = NULL; sm->a2 = NULL; sm->b1 = NULL; sm->b2 = NULL; sm->f_inline = false; sm->f_strip = false; sm->stack = g_array_sized_new(FALSE, TRUE, sizeof(int), 16); sm->dstack = g_queue_new(); sm->list_nest = 0; sm->list_mode = false; sm->header_mode = false; sm->d = 0; sm->b = 0; sm->quote = 0; } static void free_machine(StateMachine * sm) { g_string_free(sm->output, TRUE); g_array_free(sm->stack, FALSE); g_queue_free(sm->dstack); g_free(sm); } static StateMachine * parse_helper(const char * src, size_t len, bool f_strip, bool f_inline) { StateMachine * sm = NULL; StateMachine * link_content_sm = NULL; sm = (StateMachine *)g_malloc0(sizeof(StateMachine)); init_machine(sm, src, len); sm->f_strip = f_strip; sm->f_inline = f_inline; %% write init; %% write exec; dstack_close(sm); return sm; } static VALUE parse(int argc, VALUE * argv, VALUE self) { VALUE input; VALUE input0; VALUE options; VALUE opt_inline; VALUE opt_strip; VALUE ret; rb_encoding * encoding = NULL; StateMachine * sm = NULL; bool f_strip = false; bool f_inline = false; g_debug("start\n"); if (argc == 0) { rb_raise(rb_eArgError, "wrong number of arguments (0 for 1)"); } input = argv[0]; if (NIL_P(input)) { return Qnil; } input0 = rb_str_dup(input); input0 = rb_str_cat(input0, "\0", 1); if (argc > 1) { options = argv[1]; if (!NIL_P(options)) { opt_strip = rb_hash_aref(options, ID2SYM(rb_intern("strip"))); if (RTEST(opt_strip)) { f_strip = true; } opt_inline = rb_hash_aref(options, ID2SYM(rb_intern("inline"))); if (RTEST(opt_inline)) { f_inline = true; } } } sm = parse_helper(RSTRING_PTR(input0), RSTRING_LEN(input0), f_strip, f_inline); encoding = rb_enc_find("utf-8"); ret = rb_enc_str_new(sm->output->str, sm->output->len, encoding); free_machine(sm); return ret; } void Init_dtext() { VALUE mDTextRagel = rb_define_module("DTextRagel"); rb_define_singleton_method(mDTextRagel, "parse", parse, -1); }