// situationally print newlines to make the generated html
// easier to read
#define PRETTY_PRINT 0
#include
#include
#include
#include
#include
#include
typedef struct StateMachine {
size_t top;
int cs;
int act;
const char * p;
const char * pb;
const char * pe;
const char * eof;
const char * ts;
const char * te;
const char * a1;
const char * a2;
const char * b1;
const char * b2;
bool f_inline;
bool f_strip;
bool list_mode;
bool header_mode;
GString * output;
GArray * stack;
GQueue * dstack;
int list_nest;
int d;
int b;
} StateMachine;
static const size_t MAX_STACK_DEPTH = 512;
static const int BLOCK_P = 1;
static const int INLINE_SPOILER = 2;
static const int BLOCK_SPOILER = 3;
static const int BLOCK_QUOTE = 4;
static const int BLOCK_EXPAND = 5;
static const int BLOCK_NODTEXT = 6;
static const int BLOCK_CODE = 7;
static const int BLOCK_TD = 8;
static const int INLINE_NODTEXT = 9;
static const int INLINE_B = 10;
static const int INLINE_I = 11;
static const int INLINE_U = 12;
static const int INLINE_S = 13;
static const int INLINE_TN = 14;
static const int BLOCK_TN = 15;
static const int BLOCK_TABLE = 16;
static const int BLOCK_THEAD = 17;
static const int BLOCK_TBODY = 18;
static const int BLOCK_TR = 19;
static const int BLOCK_UL = 20;
static const int BLOCK_LI = 21;
static const int BLOCK_TH = 22;
static const int BLOCK_H1 = 23;
static const int BLOCK_H2 = 24;
static const int BLOCK_H3 = 25;
static const int BLOCK_H4 = 26;
static const int BLOCK_H5 = 27;
static const int BLOCK_H6 = 28;
%%{
machine dtext;
access sm->;
variable p sm->p;
variable pe sm->pe;
variable eof sm->eof;
variable top sm->top;
variable ts sm->ts;
variable te sm->te;
variable act sm->act;
variable stack ((int *)sm->stack->data);
prepush {
size_t len = sm->stack->len;
if (len > MAX_STACK_DEPTH) {
free_machine(sm);
rb_raise(rb_eSyntaxError, "too many nested elements");
}
if (sm->top >= len) {
sm->stack = g_array_set_size(sm->stack, len + 16);
}
}
action mark_a1 {
sm->a1 = sm->p;
}
action mark_a2 {
sm->a2 = sm->p;
}
action mark_b1 {
sm->b1 = sm->p;
}
action mark_b2 {
sm->b2 = sm->p;
}
newline = '\r\n' | '\n';
nonnewline = any - (newline | '\0' | '\r');
nonquote = ^'"';
nonbracket = ^']';
nonpipe = ^'|';
nonpipebracket = nonpipe & nonbracket;
noncurly = ^'}';
mention = '@' graph+ >mark_a1 %mark_a2;
url = 'http' 's'? '://' graph+;
internal_url = '/' graph+;
basic_textile_link = '"' nonquote+ >mark_a1 '"' >mark_a2 ':' (url | internal_url) >mark_b1 %mark_b2;
bracketed_textile_link = '"' nonquote+ >mark_a1 '"' >mark_a2 ':[' (url | internal_url) >mark_b1 %mark_b2 :>> ']';
basic_wiki_link = '[[' nonpipebracket+ >mark_a1 %mark_a2 ']]';
aliased_wiki_link = '[[' nonpipebracket+ >mark_a1 %mark_a2 '|' nonbracket+ >mark_b1 %mark_b2 ']]';
post_link = '{{' noncurly+ >mark_a1 %mark_a2 '}}';
post_id = 'post #'i digit+ >mark_a1 %mark_a2;
forum_post_id = 'forum #'i digit+ >mark_a1 %mark_a2;
forum_topic_id = 'topic #'i digit+ >mark_a1 %mark_a2;
forum_topic_paged_id = 'topic #'i digit+ >mark_a1 %mark_a2 '/p' digit+ >mark_b1 %mark_b2;
comment_id = 'comment #'i digit+ >mark_a1 %mark_a2;
pool_id = 'pool #'i digit+ >mark_a1 %mark_a2;
user_id = 'user #'i digit+ >mark_a1 %mark_a2;
artist_id = 'artist #'i digit+ >mark_a1 %mark_a2;
github_issue_id = 'issue #'i digit+ >mark_a1 %mark_a2;
pixiv_id = 'pixiv #'i digit+ >mark_a1 %mark_a2;
pixiv_paged_id = 'pixiv #'i digit+ >mark_a1 %mark_a2 '/p' digit+ >mark_b1 %mark_b2;
ws = ' ' | '\t';
header = 'h'i [123456] >mark_a1 %mark_a2 '.' ws*;
aliased_expand = '[expand='i (nonbracket+ >mark_a1 %mark_a2) ']';
list_item = '*'+ >mark_a1 %mark_a2 ws+ nonnewline+ >mark_b1 %mark_b2;
inline := |*
post_id => {
append(sm, true, "a1, sm->a2 - 1);
append(sm, true, "\">");
append(sm, false, "post #");
append_segment(sm, false, sm->a1, sm->a2 - 1);
append(sm, true, "");
};
forum_post_id => {
append(sm, true, "a1, sm->a2 - 1);
append(sm, true, "\">");
append(sm, false, "forum #");
append_segment(sm, false, sm->a1, sm->a2 - 1);
append(sm, true, "");
};
forum_topic_id => {
append(sm, true, "a1, sm->a2 - 1);
append(sm, true, "\">");
append(sm, false, "topic #");
append_segment(sm, false, sm->a1, sm->a2 - 1);
append(sm, true, "");
};
forum_topic_paged_id => {
append(sm, true, "a1, sm->a2 - 1);
append(sm, true, "?page=");
append_segment(sm, true, sm->b1, sm->b2 - 1);
append(sm, true, "\">");
append(sm, false, "topic #");
append_segment(sm, false, sm->a1, sm->a2 - 1);
append(sm, false, "/p");
append_segment(sm, false, sm->b1, sm->b2 - 1);
append(sm, true, "");
};
comment_id => {
append(sm, true, "a1, sm->a2 - 1);
append(sm, true, "\">");
append(sm, false, "comment #");
append_segment(sm, false, sm->a1, sm->a2 - 1);
append(sm, true, "");
};
pool_id => {
append(sm, true, "a1, sm->a2 - 1);
append(sm, true, "\">");
append(sm, false, "pool #");
append_segment(sm, false, sm->a1, sm->a2 - 1);
append(sm, true, "");
};
user_id => {
append(sm, true, "a1, sm->a2 - 1);
append(sm, true, "\">");
append(sm, false, "user #");
append_segment(sm, false, sm->a1, sm->a2 - 1);
append(sm, true, "");
};
artist_id => {
append(sm, true, "a1, sm->a2 - 1);
append(sm, true, "\">");
append(sm, false, "artist #");
append_segment(sm, false, sm->a1, sm->a2 - 1);
append(sm, true, "");
};
github_issue_id => {
append(sm, true, "a1, sm->a2 - 1);
append(sm, true, "\">");
append(sm, false, "issue #");
append_segment(sm, false, sm->a1, sm->a2 - 1);
append(sm, true, "");
};
pixiv_id => {
append(sm, true, "a1, sm->a2 - 1);
append(sm, true, "\">");
append(sm, false, "pixiv #");
append_segment(sm, false, sm->a1, sm->a2 - 1);
append(sm, true, "");
};
pixiv_paged_id => {
append(sm, true, "a1, sm->a2 - 1);
append(sm, true, "&page=");
append_segment(sm, true, sm->b1, sm->b2 - 1);
append(sm, true, "\">");
append(sm, false, "pixiv #");
append_segment(sm, false, sm->a1, sm->a2 - 1);
append(sm, false, "/p");
append_segment(sm, false, sm->b1, sm->b2 - 1);
append(sm, true, "");
};
post_link => {
append(sm, true, "a1, sm->a2 - 1);
append(sm, true, "\">");
append_segment_html_escaped(sm, sm->a1, sm->a2 - 1);
append(sm, true, "");
};
basic_wiki_link => {
GString * segment = g_string_new_len(sm->a1, sm->a2 - sm->a1);
GString * lowercase_segment = NULL;
underscore_string(segment->str, segment->len);
if (g_utf8_validate(segment->str, -1, NULL)) {
lowercase_segment = g_string_new(g_utf8_strdown(segment->str, -1));
} else {
lowercase_segment = g_string_new(g_ascii_strdown(segment->str, -1));
}
append(sm, true, "str, lowercase_segment->str + lowercase_segment->len - 1);
append(sm, true, "\">");
append_segment_html_escaped(sm, sm->a1, sm->a2 - 1);
append(sm, true, "");
g_string_free(lowercase_segment, TRUE);
g_string_free(segment, TRUE);
};
aliased_wiki_link => {
GString * segment = g_string_new_len(sm->a1, sm->a2 - sm->a1);
GString * lowercase_segment = NULL;
underscore_string(segment->str, segment->len);
if (g_utf8_validate(segment->str, -1, NULL)) {
lowercase_segment = g_string_new(g_utf8_strdown(segment->str, -1));
} else {
lowercase_segment = g_string_new(g_ascii_strdown(segment->str, -1));
}
append(sm, true, "str, lowercase_segment->str + lowercase_segment->len - 1);
append(sm, true, "\">");
append_segment_html_escaped(sm, sm->b1, sm->b2 - 1);
append(sm, true, "");
g_string_free(lowercase_segment, TRUE);
g_string_free(segment, TRUE);
};
basic_textile_link => {
if (is_boundary_c(fc)) {
sm->d = 2;
sm->b = true;
} else {
sm->d = 1;
sm->b = false;
}
append(sm, true, "b1, sm->b2 - sm->d);
append(sm, true, "\">");
append_segment_html_escaped(sm, sm->a1, sm->a2 - 1);
append(sm, true, "");
if (sm->b) {
append_c_html_escaped(sm, fc);
}
};
bracketed_textile_link => {
append(sm, true, "b1, sm->b2 - 1);
append(sm, true, "\">");
append_segment_html_escaped(sm, sm->a1, sm->a2 - 1);
append(sm, true, "");
};
url => {
if (is_boundary_c(fc)) {
sm->b = true;
sm->d = 2;
} else {
sm->b = false;
sm->d = 1;
}
append(sm, true, "ts, sm->te - sm->d);
append(sm, true, "\">");
append_segment_html_escaped(sm, sm->ts, sm->te - sm->d);
append(sm, true, "");
if (sm->b) {
append_c_html_escaped(sm, fc);
}
};
# probably a tag. examples include @.@ and @_@
'@' graph '@' => {
append_segment_html_escaped(sm, sm->ts, sm->te - 1);
};
mention => {
if (is_boundary_c(fc)) {
sm->b = true;
sm->d = 2;
} else {
sm->b = false;
sm->d = 1;
}
append(sm, true, "a1, sm->a2 - sm->d);
append(sm, true, "\">");
append_c(sm, '@');
append_segment_html_escaped(sm, sm->a1, sm->a2 - sm->d);
append(sm, true, "");
if (sm->b) {
append_c_html_escaped(sm, fc);
}
};
newline list_item => {
g_debug("inline list");
if (dstack_check(sm, BLOCK_LI)) {
g_debug(" rewind li");
dstack_rewind(sm);
} else if (dstack_check(sm, BLOCK_P)) {
g_debug(" rewind p");
dstack_rewind(sm);
} else if (sm->header_mode) {
g_debug(" rewind header");
dstack_rewind(sm);
}
g_debug(" next list");
fexec sm->ts + 1;
fnext list;
};
'[b]'i => {
dstack_push(sm, &INLINE_B);
append(sm, true, "");
};
'[/b]'i => {
if (dstack_check(sm, INLINE_B)) {
dstack_pop(sm);
append(sm, true, "");
} else {
append(sm, true, "[/b]");
}
};
'[i]'i => {
dstack_push(sm, &INLINE_I);
append(sm, true, "");
};
'[/i]'i => {
if (dstack_check(sm, INLINE_I)) {
dstack_pop(sm);
append(sm, true, "");
} else {
append(sm, true, "[/i]");
}
};
'[s]'i => {
dstack_push(sm, &INLINE_S);
append(sm, true, "");
};
'[/s]'i => {
if (dstack_check(sm, INLINE_S)) {
dstack_pop(sm);
append(sm, true, "");
} else {
append(sm, true, "[/s]");
}
};
'[u]'i => {
dstack_push(sm, &INLINE_U);
append(sm, true, "");
};
'[/u]'i => {
if (dstack_check(sm, INLINE_U)) {
dstack_pop(sm);
append(sm, true, "");
} else {
append(sm, true, "[/u]");
}
};
'[tn]'i => {
dstack_push(sm, &INLINE_TN);
append(sm, true, "");
};
'[/tn]'i => {
dstack_close_before_block(sm);
if (dstack_check(sm, BLOCK_TN)) {
dstack_pop(sm);
fret;
} else if (dstack_check(sm, INLINE_TN)) {
dstack_pop(sm);
append(sm, true, "");
} else {
append_block(sm, "[/tn]");
}
};
# these are block level elements that should kick us out of the inline
# scanner
header => {
dstack_rewind(sm);
fexec sm->a1 - 1;
fret;
};
'[quote]'i => {
g_debug("inline [quote]");
dstack_close_before_block(sm);
fexec sm->ts;
fret;
};
'[/quote]'i space* => {
g_debug("inline [/quote]");
dstack_close_before_block(sm);
if (dstack_check(sm, BLOCK_LI)) {
dstack_close_list(sm);
}
if (dstack_check(sm, BLOCK_QUOTE)) {
dstack_rewind(sm);
fret;
} else {
append_block(sm, "[/quote]");
}
};
'[spoiler]'i => {
g_debug("inline [spoiler]");
g_debug(" push ");
dstack_push(sm, &INLINE_SPOILER);
append(sm, true, "");
};
'[/spoiler]'i => {
g_debug("inline [/spoiler]");
dstack_close_before_block(sm);
if (dstack_check(sm, INLINE_SPOILER)) {
g_debug(" pop dstack");
g_debug(" print ");
dstack_pop(sm);
append(sm, true, "");
} else if (dstack_check(sm, BLOCK_SPOILER)) {
g_debug(" pop dstack");
g_debug(" print ");
g_debug(" return");
dstack_pop(sm);
append_block(sm, "");
fret;
} else {
append_block(sm, "[/spoiler]");
}
};
'[expand]'i => {
g_debug("inline [expand]");
dstack_rewind(sm);
fexec(sm->p - 7);
fret;
};
'[/expand]'i => {
dstack_close_before_block(sm);
if (dstack_check(sm, BLOCK_EXPAND)) {
append_block(sm, "");
dstack_pop(sm);
fret;
} else {
append_block(sm, "[/expand]");
}
};
'[nodtext]'i => {
dstack_push(sm, &INLINE_NODTEXT);
fcall nodtext;
};
'[/th]'i => {
if (dstack_check(sm, BLOCK_TH)) {
dstack_pop(sm);
append_block(sm, "");
fret;
} else {
append_block(sm, "[/th]");
}
};
'[/td]'i => {
if (dstack_check(sm, BLOCK_TD)) {
dstack_pop(sm);
append_block(sm, "");
fret;
} else {
append_block(sm, "[/td]");
}
};
'\0' => {
g_debug("inline 0");
g_debug(" return");
fhold;
fret;
};
newline{2,} => {
g_debug("inline newline2");
g_debug(" return");
if (sm->list_mode) {
dstack_close_list(sm);
}
fexec sm->ts;
fret;
};
newline => {
g_debug("inline newline");
if (sm->header_mode) {
sm->header_mode = false;
dstack_rewind(sm);
fret;
} else {
append(sm, true, "
");
}
};
'\r' => {
append_c(sm, ' ');
};
any => {
g_debug("inline char: %c", fc);
append_c_html_escaped(sm, fc);
};
*|;
code := |*
'[/code]'i => {
if (dstack_check(sm, BLOCK_CODE)) {
dstack_rewind(sm);
} else {
append(sm, true, "[/code]");
}
fret;
};
'\0' => {
fhold;
fret;
};
any => {
append_c_html_escaped(sm, fc);
};
*|;
nodtext := |*
'[/nodtext]'i => {
if (dstack_check(sm, BLOCK_NODTEXT)) {
dstack_pop(sm);
append_block(sm, "
");
fret;
} else if (dstack_check(sm, INLINE_NODTEXT)) {
dstack_pop(sm);
fret;
} else {
append(sm, true, "[/nodtext]");
}
};
'\0' => {
fhold;
fret;
};
any => {
append_c_html_escaped(sm, fc);
};
*|;
table := |*
'[thead]'i => {
dstack_push(sm, &BLOCK_THEAD);
append_block(sm, "");
};
'[/thead]'i => {
if (dstack_check(sm, BLOCK_THEAD)) {
dstack_pop(sm);
append_block(sm, "");
} else {
append(sm, true, "[/thead]");
}
};
'[tbody]'i => {
dstack_push(sm, &BLOCK_TBODY);
append_block(sm, "");
};
'[/tbody]'i => {
if (dstack_check(sm, BLOCK_TBODY)) {
dstack_pop(sm);
append_block(sm, "");
} else {
append(sm, true, "[/tbody]");
}
};
'[th]'i => {
dstack_push(sm, &BLOCK_TH);
append_block(sm, "");
fcall inline;
};
'[tr]'i => {
dstack_push(sm, &BLOCK_TR);
append_block(sm, " | ");
};
'[/tr]'i => {
if (dstack_check(sm, BLOCK_TR)) {
dstack_pop(sm);
append_block(sm, "
");
} else {
append(sm, true, "[/tr]");
}
};
'[td]'i => {
dstack_push(sm, &BLOCK_TD);
append_block(sm, "");
fcall inline;
};
'[/table]'i => {
if (dstack_check(sm, BLOCK_TABLE)) {
dstack_pop(sm);
append_block(sm, "");
fret;
} else {
append(sm, true, "[/table]");
}
};
'\0' => {
fhold;
fret;
};
any;
*|;
list := |*
list_item => {
int prev_nest = sm->list_nest;
append_closing_p_if(sm);
g_debug("list start");
sm->list_mode = true;
sm->list_nest = sm->a2 - sm->a1;
fexec sm->b1;
if (sm->list_nest > prev_nest) {
int i=0;
for (i=prev_nest; ilist_nest; ++i) {
g_debug(" dstack push ul");
g_debug(" print ");
append_block(sm, "");
dstack_push(sm, &BLOCK_UL);
}
} else if (sm->list_nest < prev_nest) {
int i=0;
for (i=sm->list_nest; i");
dstack_pop(sm);
append_block(sm, " ");
}
}
}
append_block(sm, "- ");
dstack_push(sm, &BLOCK_LI);
g_debug(" print
- ");
g_debug(" push li");
g_debug(" call inline");
fcall inline;
};
# exit list
(newline{2,} | '\0') => {
dstack_close_list(sm);
fexec sm->ts;
fret;
};
newline;
any => {
dstack_rewind(sm);
fhold;
fret;
};
*|;
main := |*
header => {
char header = *sm->a1;
if (sm->f_inline) {
header = '6';
}
if (!sm->f_strip) {
switch (header) {
case '1':
dstack_push(sm, &BLOCK_H1);
append_block(sm, "
");
break;
case '2':
dstack_push(sm, &BLOCK_H2);
append_block(sm, "");
break;
case '3':
dstack_push(sm, &BLOCK_H3);
append_block(sm, "");
break;
case '4':
dstack_push(sm, &BLOCK_H4);
append_block(sm, "");
break;
case '5':
dstack_push(sm, &BLOCK_H5);
append_block(sm, "");
break;
case '6':
dstack_push(sm, &BLOCK_H6);
append_block(sm, "");
break;
}
}
sm->header_mode = true;
fcall inline;
};
'[quote]'i space* => {
g_debug("block [quote]");
g_debug(" push quote");
g_debug(" print ");
dstack_close_before_block(sm);
dstack_push(sm, &BLOCK_QUOTE);
append_block(sm, "");
};
'[spoiler]'i space* => {
g_debug("block [spoiler]");
g_debug(" push spoiler");
g_debug(" print ");
dstack_close_before_block(sm);
dstack_push(sm, &BLOCK_SPOILER);
append_block(sm, " ");
};
'[/spoiler]'i => {
g_debug("block [/spoiler]");
dstack_close_before_block(sm);
if (dstack_check(sm, BLOCK_SPOILER)) {
g_debug(" rewind");
dstack_rewind(sm);
}
};
'[code]'i space* => {
g_debug("block [code]");
dstack_close_before_block(sm);
dstack_push(sm, &BLOCK_CODE);
append_block(sm, " ");
fcall code;
};
'[expand]'i space* => {
g_debug("block [expand]");
dstack_close_before_block(sm);
dstack_push(sm, &BLOCK_EXPAND);
append_block(sm, "");
append_block(sm, " ");
append_block(sm, " ");
};
aliased_expand space* => {
g_debug("block [expand=]");
dstack_close_before_block(sm);
dstack_push(sm, &BLOCK_EXPAND);
append_block(sm, " ");
append(sm, true, "");
append_segment_html_escaped(sm, sm->a1, sm->a2 - 1);
append(sm, true, "");
append_block(sm, " ");
append_block(sm, " ");
};
'[nodtext]'i space* => {
g_debug("block [nodtext]");
dstack_close_before_block(sm);
dstack_push(sm, &BLOCK_NODTEXT);
dstack_push(sm, &BLOCK_P);
append_block(sm, " ");
fcall nodtext;
};
'[table]'i => {
dstack_close_before_block(sm);
dstack_push(sm, &BLOCK_TABLE);
append_block(sm, " ");
fcall table;
};
'[tn]'i => {
dstack_push(sm, &BLOCK_TN);
append_block(sm, "");
fcall inline;
};
list_item => {
g_debug("block list");
g_debug(" call list");
sm->list_nest = 0;
sm->list_mode = true;
append_closing_p_if(sm);
fexec sm->ts;
fcall list;
};
'\0' => {
g_debug("block 0");
g_debug(" close dstack");
dstack_close(sm);
};
newline{2,} => {
g_debug("block newline2");
if (sm->header_mode) {
sm->header_mode = false;
dstack_rewind(sm);
} else if (sm->list_mode) {
dstack_close_list(sm);
} else {
dstack_close_before_block(sm);
}
};
newline => {
g_debug("block newline");
};
any => {
g_debug("block char: %c", fc);
fhold;
if (g_queue_is_empty(sm->dstack) || dstack_check(sm, BLOCK_QUOTE) || dstack_check(sm, BLOCK_SPOILER) || dstack_check(sm, BLOCK_EXPAND)) {
g_debug(" push p");
g_debug(" print ");
dstack_push(sm, &BLOCK_P);
append_block(sm, " ");
}
fcall inline;
};
*|;
}%%
%% write data;
static inline void underscore_string(char * str, size_t len) {
for (size_t i=0; idstack, (gpointer)element);
}
static inline int * dstack_pop(StateMachine * sm) {
return g_queue_pop_tail(sm->dstack);
}
static inline int * dstack_peek(StateMachine * sm) {
return g_queue_peek_tail(sm->dstack);
}
static inline bool dstack_search(StateMachine * sm, const int * element) {
return g_queue_find(sm->dstack, (gconstpointer)element);
}
static inline bool dstack_check(StateMachine * sm, int expected_element) {
int * top = dstack_peek(sm);
return top && *top == expected_element;
}
static inline bool dstack_check2(StateMachine * sm, int expected_element) {
int * top2 = NULL;
if (sm->dstack->length < 2) {
return false;
}
top2 = g_queue_peek_nth(sm->dstack, sm->dstack->length - 2);
return top2 && *top2 == expected_element;
}
static inline void append(StateMachine * sm, bool is_markup, const char * s) {
if (!(is_markup && sm->f_strip)) {
sm->output = g_string_append(sm->output, s);
}
}
static inline void append_c(StateMachine * sm, char s) {
sm->output = g_string_append_c(sm->output, s);
}
static inline void append_c_html_escaped(StateMachine * sm, char s) {
switch (s) {
case '<':
sm->output = g_string_append(sm->output, "<");
break;
case '>':
sm->output = g_string_append(sm->output, ">");
break;
case '&':
sm->output = g_string_append(sm->output, "&");
break;
case '"':
sm->output = g_string_append(sm->output, """);
break;
default:
sm->output = g_string_append_c(sm->output, s);
break;
}
}
static inline void append_segment(StateMachine * sm, bool is_markup, const char * a, const char * b) {
if (!(is_markup && sm->f_strip)) {
sm->output = g_string_append_len(sm->output, a, b - a + 1);
}
}
static inline void append_segment_uri_escaped(StateMachine * sm, const char * a, const char * b) {
if (sm->f_strip) {
return;
}
char * segment1 = NULL;
char * segment2 = NULL;
GString * segment_string = g_string_new_len(a, b - a + 1);
segment1 = g_uri_escape_string(segment_string->str, NULL, TRUE);
segment2 = g_markup_escape_text(segment1, -1);
sm->output = g_string_append(sm->output, segment2);
g_string_free(segment_string, TRUE);
g_free(segment1);
g_free(segment2);
}
static inline void append_segment_html_escaped(StateMachine * sm, const char * a, const char * b) {
gchar * segment = g_markup_escape_text(a, b - a + 1);
sm->output = g_string_append(sm->output, segment);
g_free(segment);
}
static inline void append_block(StateMachine * sm, const char * s) {
if (sm->f_inline) {
sm->output = g_string_append_c(sm->output, ' ');
} else if (sm->f_strip) {
// do nothing
} else {
sm->output = g_string_append(sm->output, s);
}
}
static void append_closing_p(StateMachine * sm) {
size_t i = sm->output->len;
if (i > 4 && !strncmp(sm->output->str + i - 4, " ", 4)) {
sm->output = g_string_truncate(sm->output, sm->output->len - 4);
}
if (i > 3 && !strncmp(sm->output->str + i - 3, "", 3)) {
sm->output = g_string_truncate(sm->output, sm->output->len - 3);
return;
}
append_block(sm, " ");
}
static void append_closing_p_if(StateMachine * sm) {
if (!dstack_check(sm, BLOCK_P)) {
return;
}
dstack_pop(sm);
append_closing_p(sm);
}
static void dstack_rewind(StateMachine * sm) {
int * element = dstack_pop(sm);
if (element == NULL) {
return;
}
if (*element == BLOCK_P) {
append_closing_p(sm);
} else if (*element == INLINE_SPOILER) {
append(sm, true, "");
} else if (*element == BLOCK_SPOILER) {
append_block(sm, "");
} else if (*element == BLOCK_QUOTE) {
append_block(sm, "");
} else if (*element == BLOCK_EXPAND) {
append_block(sm, "");
} else if (*element == BLOCK_NODTEXT) {
append_closing_p(sm);
} else if (*element == BLOCK_CODE) {
append_block(sm, "");
} else if (*element == BLOCK_TD) {
append_block(sm, "");
} else if (*element == INLINE_NODTEXT) {
} else if (*element == INLINE_B) {
append(sm, true, "");
} else if (*element == INLINE_I) {
append(sm, true, "");
} else if (*element == INLINE_U) {
append(sm, true, "");
} else if (*element == INLINE_S) {
append(sm, true, "");
} else if (*element == INLINE_TN) {
append(sm, true, "");
} else if (*element == BLOCK_TN) {
append_closing_p(sm);
} else if (*element == BLOCK_TABLE) {
append_block(sm, " ");
} else if (*element == BLOCK_THEAD) {
append_block(sm, "");
} else if (*element == BLOCK_TBODY) {
append_block(sm, "");
} else if (*element == BLOCK_TR) {
append_block(sm, "");
} else if (*element == BLOCK_UL) {
append_block(sm, "");
} else if (*element == BLOCK_LI) {
append_block(sm, "");
} else if (*element == BLOCK_H6) {
append_block(sm, "");
} else if (*element == BLOCK_H5) {
append_block(sm, "");
} else if (*element == BLOCK_H4) {
append_block(sm, "");
} else if (*element == BLOCK_H3) {
append_block(sm, "");
} else if (*element == BLOCK_H2) {
append_block(sm, "");
} else if (*element == BLOCK_H1) {
append_block(sm, "");
}
}
static void dstack_close_before_block(StateMachine * sm) {
while (1) {
if (dstack_check(sm, BLOCK_P)) {
dstack_pop(sm);
append_closing_p(sm);
} else if (dstack_check(sm, BLOCK_LI) || dstack_check(sm, BLOCK_UL)) {
dstack_rewind(sm);
} else {
return;
}
}
}
static void dstack_close(StateMachine * sm) {
while (dstack_peek(sm) != NULL) {
dstack_rewind(sm);
}
}
static void dstack_close_list(StateMachine * sm) {
while (dstack_check(sm, BLOCK_LI) || dstack_check(sm, BLOCK_UL)) {
dstack_rewind(sm);
}
sm->list_mode = false;
sm->list_nest = 0;
}
static inline bool is_boundary_c(char c) {
switch (c) {
case ':':
case ';':
case '.':
case ',':
case '!':
case '?':
case ')':
case ']':
case '<':
case '>':
return true;
}
return false;
}
static bool print_machine(StateMachine * sm) {
printf("p=%c\n", *sm->p);
return true;
}
static void init_machine(StateMachine * sm, VALUE input) {
size_t output_length = 0;
sm->p = RSTRING_PTR(input);
sm->pb = sm->p;
sm->pe = sm->p + RSTRING_LEN(input);
sm->eof = sm->pe;
sm->ts = NULL;
sm->te = NULL;
sm->cs = 0;
sm->act = 0;
sm->top = 0;
output_length = RSTRING_LEN(input);
if (output_length < (INT16_MAX / 2)) {
output_length *= 2;
}
sm->output = g_string_sized_new(output_length);
sm->a1 = NULL;
sm->a2 = NULL;
sm->b1 = NULL;
sm->b2 = NULL;
sm->f_inline = false;
sm->f_strip = false;
sm->stack = g_array_sized_new(FALSE, TRUE, sizeof(int), 16);
sm->dstack = g_queue_new();
sm->list_nest = 0;
sm->list_mode = false;
sm->header_mode = false;
sm->d = 0;
sm->b = 0;
}
static void free_machine(StateMachine * sm) {
g_string_free(sm->output, TRUE);
g_array_free(sm->stack, FALSE);
g_queue_free(sm->dstack);
g_free(sm);
}
static VALUE parse(int argc, VALUE * argv, VALUE self) {
VALUE input;
VALUE input0;
VALUE options;
VALUE opt_inline;
VALUE opt_strip;
VALUE ret;
rb_encoding * encoding = NULL;
StateMachine * sm = NULL;
g_debug("start\n");
if (argc == 0) {
rb_raise(rb_eArgError, "wrong number of arguments (0 for 1)");
}
input = argv[0];
if (NIL_P(input)) {
return Qnil;
}
input0 = rb_str_dup(input);
sm = (StateMachine *)g_malloc0(sizeof(StateMachine));
input0 = rb_str_cat(input0, "\0", 1);
init_machine(sm, input0);
if (argc > 1) {
options = argv[1];
if (!NIL_P(options)) {
opt_strip = rb_hash_aref(options, ID2SYM(rb_intern("strip")));
if (RTEST(opt_strip)) {
sm->f_strip = true;
}
opt_inline = rb_hash_aref(options, ID2SYM(rb_intern("inline")));
if (RTEST(opt_inline)) {
sm->f_inline = true;
}
}
}
%% write init;
%% write exec;
dstack_close(sm);
encoding = rb_enc_find("utf-8");
ret = rb_enc_str_new(sm->output->str, sm->output->len, encoding);
free_machine(sm);
return ret;
}
void Init_dtext() {
VALUE mDTextRagel = rb_define_module("DTextRagel");
rb_define_singleton_method(mDTextRagel, "parse", parse, -1);
}
|