/* * Copyright (c) 2015, Vicent Marti * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ #include "buffer.h" #include "html.h" #include #include #include #include #if defined(_WIN32) #define snprintf _snprintf #endif struct smartypants_data { int in_squote; int in_dquote; }; static size_t smartypants_cb__ltag(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); static size_t smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); static size_t smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); static size_t smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); static size_t smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); static size_t smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); static size_t smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); static size_t smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); static size_t smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); static size_t smartypants_cb__escape(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size); static size_t (*smartypants_cb_ptrs[]) (struct buf *, struct smartypants_data *, uint8_t, const uint8_t *, size_t) = { NULL, /* 0 */ smartypants_cb__dash, /* 1 */ smartypants_cb__parens, /* 2 */ smartypants_cb__squote, /* 3 */ smartypants_cb__dquote, /* 4 */ smartypants_cb__amp, /* 5 */ smartypants_cb__period, /* 6 */ smartypants_cb__number, /* 7 */ smartypants_cb__ltag, /* 8 */ smartypants_cb__backtick, /* 9 */ smartypants_cb__escape, /* 10 */ }; static const uint8_t smartypants_cb_chars[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 5, 3, 2, 0, 0, 0, 0, 1, 6, 0, 0, 7, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; static inline int word_boundary(uint8_t c) { return c == 0 || isspace(c) || ispunct(c); } static inline int fraction_boundary(uint8_t c) { return c == 0 || isspace(c) || (c != '/' && ispunct(c)); } // If 'text' begins with any kind of single quote (e.g. "'" or "'" etc.), // returns the length of the sequence of characters that makes up the single- // quote. Otherwise, returns zero. static size_t squote_len(const uint8_t *text, size_t size) { static char* single_quote_list[] = { "'", "'", "'", "'", NULL }; char** p; for (p = single_quote_list; *p; ++p) { size_t len = strlen(*p); if (size >= len && memcmp(text, *p, len) == 0) { return len; } } return 0; } // Converts " or ' at very beginning or end of a word to left or right quote static int smartypants_quotes(struct buf *ob, uint8_t previous_char, uint8_t next_char, uint8_t quote, int *is_open) { char ent[8]; if (*is_open && !word_boundary(next_char)) return 0; if (!(*is_open) && !word_boundary(previous_char)) return 0; snprintf(ent, sizeof(ent), "&%c%cquo;", (*is_open) ? 'r' : 'l', quote); *is_open = !(*is_open); bufputs(ob, ent); return 1; } // Converts ' to left or right single quote; but the initial ' might be in // different forms, e.g. ' or ' or '. // 'squote_text' points to the original single quote, and 'squote_size' is its length. // 'text' points at the last character of the single-quote, e.g. ' or ; static size_t smartypants_squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size, const uint8_t *squote_text, size_t squote_size) { if (size >= 2) { uint8_t t1 = tolower(text[1]); int next_squote_len = squote_len(text+1, size-1); // convert '' to “ or ” if (next_squote_len > 0) { uint8_t next_char = (size > 1+next_squote_len) ? text[1+next_squote_len] : 0; if (smartypants_quotes(ob, previous_char, next_char, 'd', &smrt->in_dquote)) return next_squote_len; } if (smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 's', &smrt->in_squote)) return 0; // trailing single quotes: students', tryin' if (word_boundary(t1)) { BUFPUTSL(ob, "’"); return 0; } // Tom's, isn't, I'm, I'd if ((t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') && (size == 3 || word_boundary(text[2]))) { BUFPUTSL(ob, "’"); return 0; } // you're, you'll, you've if (size >= 3) { uint8_t t2 = tolower(text[2]); if (((t1 == 'r' && t2 == 'e') || (t1 == 'l' && t2 == 'l') || (t1 == 'v' && t2 == 'e')) && (size == 4 || word_boundary(text[3]))) { BUFPUTSL(ob, "’"); return 0; } } } bufput(ob, squote_text, squote_size); return 0; } // Converts ' to left or right single quote. static size_t smartypants_cb__squote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) { return smartypants_squote(ob, smrt, previous_char, text, size, text, 1); } // Converts (c), (r), (tm) static size_t smartypants_cb__parens(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) { if (size >= 3) { uint8_t t1 = tolower(text[1]); uint8_t t2 = tolower(text[2]); if (t1 == 'c' && t2 == ')') { BUFPUTSL(ob, "©"); return 2; } if (t1 == 'r' && t2 == ')') { BUFPUTSL(ob, "®"); return 2; } if (size >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')') { BUFPUTSL(ob, "™"); return 3; } } bufputc(ob, text[0]); return 0; } // Converts "--" to em-dash, etc. static size_t smartypants_cb__dash(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) { if (size >= 3 && text[1] == '-' && text[2] == '-') { BUFPUTSL(ob, "—"); return 2; } if (size >= 2 && text[1] == '-') { BUFPUTSL(ob, "–"); return 1; } bufputc(ob, text[0]); return 0; } // Converts " etc. static size_t smartypants_cb__amp(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) { if (size >= 6 && memcmp(text, """, 6) == 0) { if (smartypants_quotes(ob, previous_char, size >= 7 ? text[6] : 0, 'd', &smrt->in_dquote)) return 5; } int len = squote_len(text, size); if (len > 0) { return (len-1) + smartypants_squote(ob, smrt, previous_char, text+(len-1), size-(len-1), text, len); } if (size >= 4 && memcmp(text, "�", 4) == 0) return 3; bufputc(ob, '&'); return 0; } // Converts "..." to ellipsis static size_t smartypants_cb__period(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) { if (size >= 3 && text[1] == '.' && text[2] == '.') { BUFPUTSL(ob, "…"); return 2; } if (size >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.') { BUFPUTSL(ob, "…"); return 4; } bufputc(ob, text[0]); return 0; } // Converts `` to opening double quote static size_t smartypants_cb__backtick(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) { if (size >= 2 && text[1] == '`') { if (smartypants_quotes(ob, previous_char, size >= 3 ? text[2] : 0, 'd', &smrt->in_dquote)) return 1; } bufputc(ob, text[0]); return 0; } // Converts 1/2, 1/4, 3/4 static size_t smartypants_cb__number(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) { if (fraction_boundary(previous_char) && size >= 3) { if (text[0] == '1' && text[1] == '/' && text[2] == '2') { if (size == 3 || fraction_boundary(text[3])) { BUFPUTSL(ob, "½"); return 2; } } if (text[0] == '1' && text[1] == '/' && text[2] == '4') { if (size == 3 || fraction_boundary(text[3]) || (size >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h')) { BUFPUTSL(ob, "¼"); return 2; } } if (text[0] == '3' && text[1] == '/' && text[2] == '4') { if (size == 3 || fraction_boundary(text[3]) || (size >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's')) { BUFPUTSL(ob, "¾"); return 2; } } } bufputc(ob, text[0]); return 0; } // Converts " to left or right double quote static size_t smartypants_cb__dquote(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) { if (!smartypants_quotes(ob, previous_char, size > 0 ? text[1] : 0, 'd', &smrt->in_dquote)) BUFPUTSL(ob, """); return 0; } static size_t smartypants_cb__ltag(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) { static const char *skip_tags[] = { "pre", "code", "var", "samp", "kbd", "math", "script", "style" }; static const size_t skip_tags_count = 8; size_t next_to_closing_a = 0; size_t tag, i = 0; while (i < size && text[i] != '>') i++; for (tag = 0; tag < skip_tags_count; ++tag) { if (sdhtml_is_tag(text, size, skip_tags[tag]) == HTML_TAG_OPEN) break; } if (tag < skip_tags_count) { for (;;) { while (i < size && text[i] != '<') i++; if (i == size) break; if (sdhtml_is_tag(text + i, size - i, skip_tags[tag]) == HTML_TAG_CLOSE) break; i++; } while (i < size && text[i] != '>') i++; } if (sdhtml_is_tag(text, size, "a") == HTML_TAG_CLOSE) { while (i < size && text[i] != '>') i++; next_to_closing_a = 1; } bufput(ob, text, i + 1); // Pretty tricky: since people may refer to something or someone // with a link but use the possessive form right after it, we need // to check whether a single quote is next to a closing " tag. if (next_to_closing_a && strncmp("'", text+(i+1), 5) == 0) { bufput(ob, "’", 7); i += 5; } return i; } static size_t smartypants_cb__escape(struct buf *ob, struct smartypants_data *smrt, uint8_t previous_char, const uint8_t *text, size_t size) { if (size < 2) return 0; switch (text[1]) { case '\\': case '"': case '\'': case '.': case '-': case '`': bufputc(ob, text[1]); return 1; default: bufputc(ob, '\\'); return 0; } } #if 0 static struct { uint8_t c0; const uint8_t *pattern; const uint8_t *entity; int skip; } smartypants_subs[] = { { '\'', "'s>", "’", 0 }, { '\'', "'t>", "’", 0 }, { '\'', "'re>", "’", 0 }, { '\'', "'ll>", "’", 0 }, { '\'', "'ve>", "’", 0 }, { '\'', "'m>", "’", 0 }, { '\'', "'d>", "’", 0 }, { '-', "--", "—", 1 }, { '-', "<->", "–", 0 }, { '.', "...", "…", 2 }, { '.', ". . .", "…", 4 }, { '(', "(c)", "©", 2 }, { '(', "(r)", "®", 2 }, { '(', "(tm)", "™", 3 }, { '3', "<3/4>", "¾", 2 }, { '3', "<3/4ths>", "¾", 2 }, { '1', "<1/2>", "½", 2 }, { '1', "<1/4>", "¼", 2 }, { '1', "<1/4th>", "¼", 2 }, { '&', "�", 0, 3 }, }; #endif void sdhtml_smartypants(struct buf *ob, const uint8_t *text, size_t size) { size_t i; struct smartypants_data smrt = {0, 0}; if (!text) return; bufgrow(ob, size); for (i = 0; i < size; ++i) { size_t org; uint8_t action = 0; org = i; while (i < size && (action = smartypants_cb_chars[text[i]]) == 0) i++; if (i > org) bufput(ob, text + org, i - org); if (i < size) { i += smartypants_cb_ptrs[(int)action] (ob, &smrt, i ? text[i - 1] : 0, text + i, size - i); } } }