ext/rinku/autolink.c in zendesk-rinku-1.7.2.1 vs ext/rinku/autolink.c in zendesk-rinku-1.7.3

- old
+ new

@@ -36,10 +36,13 @@ #if defined(_WIN32) #define strncasecmp _strnicmp #endif int +is_unicode_space(uint8_t *data, size_t offset); + +int sd_autolink_issafe(const uint8_t *link, size_t link_len) { static const size_t valid_uris_count = 5; static const char *valid_uris[] = { "/", "http://", "https://", "ftp://", "mailto:" @@ -76,11 +79,11 @@ link_end--; else if (data[link_end - 1] == ';') { size_t new_end = link_end - 2; - while (new_end > 0 && isalpha(data[new_end])) + while (new_end > 0 && (isalnum(data[new_end]) || data[new_end] == '#')) new_end--; if (new_end < link_end - 2 && data[new_end] == '&') link_end = new_end; else @@ -142,20 +145,46 @@ return link_end; } static size_t +autolink_delim_iter(uint8_t *data, size_t link_end, size_t offset, size_t size) +{ + size_t next_link_end; + int iterations = 0; + link_end = autolink_delim(data, link_end, offset, size); + + while(link_end != 0) { + next_link_end = autolink_delim(data, link_end, offset, size); + if (next_link_end == link_end || iterations > 5) { + break; + } + link_end = next_link_end; + iterations++; + } + + return link_end; +} + + +static size_t check_domain(uint8_t *data, size_t size, int allow_short) { size_t i, np = 0; - if (!isalnum(data[0])) - return 0; + if (data[0] == '.' || data[0] == '-') return 0; - for (i = 1; i < size - 1; ++i) { - if (data[i] == '.') np++; - else if (!isalnum(data[i]) && data[i] != '-') break; + for (i = 0; i < size - 1; i++) { + if (data[i] == '.') { + np++; + } else if ((data[i] >= 'a' && data[i] <= 'z') + || (data[i] >= 'A' && data[i] <= 'Z') + || (data[i] >= '0' && data[i] <= '9') + || (data[i] == '-')) { + } else { + break; + } } if (allow_short) { /* We don't need a valid domain in the strict sense (with * least one dot; so just make sure it's composed of valid @@ -186,21 +215,28 @@ if (size < 4 || memcmp(data, "www.", strlen("www.")) != 0) return 0; link_end = check_domain(data, size, 0); - if (link_end == 0) + if (link_end == 0) { return 0; + } else if (data[link_end] > 127) { + if (!is_unicode_space(data, link_end)) { + /* we prevent linking a domain that contains unicode + * in case of sneaky lookalike attacks */ + return 0; + } + } else { + while (link_end < size && !is_unicode_space(data, link_end)) { + link_end++; + } + } - while (link_end < size && !isspace(data[link_end])) - link_end++; + link_end = autolink_delim_iter(data, link_end, max_rewind, size); - link_end = autolink_delim(data, link_end, max_rewind, size); + if (link_end == 0) { return 0; } - if (link_end == 0) - return 0; - bufput(link, data, link_end); *rewind_p = 0; return (int)link_end; } @@ -247,11 +283,11 @@ } if (link_end < 2 || nb != 1 || np == 0) return 0; - link_end = autolink_delim(data, link_end, max_rewind, size); + link_end = autolink_delim_iter(data, link_end, max_rewind, size); if (link_end == 0) return 0; bufput(link, data - rewind, link_end + rewind); @@ -289,18 +325,61 @@ if (domain_len == 0) return 0; link_end += domain_len; - while (link_end < size && !isspace(data[link_end])) - link_end++; - link_end = autolink_delim(data, link_end, max_rewind, size); + if (data[link_end] > 127) { + if (!is_unicode_space(data, link_end)) { return 0; } + } else { + while (link_end < size && !is_unicode_space(data, link_end)) { + link_end++; + } + } + link_end = autolink_delim_iter(data, link_end, max_rewind, size); + if (link_end == 0) return 0; bufput(link, data - rewind, link_end + rewind); *rewind_p = rewind; return link_end; +} + +int +is_unicode_space(uint8_t *data, size_t offset) { + + if(isspace(data[offset])) { + return 1; + } + + /* Unicode Whitespace list from https://en.wikipedia.org/wiki/Whitespace_character#Unicode */ + if (data[offset] == 0xE2) { + if (data[offset+1] == 0x80) { + if (data[offset+2] >= 0x80 && data[offset+2] <= 0x8C) { + return 1; + } else if (data[offset+2] == 0xA8 || data[offset+2] == 0xA9 || data[offset+2] == 0xAF) { + return 1; + } + } else if (data[offset+1] == 0x81) { + if (data[offset+2] == 0x9F) { + return 1; + } + } + } else if (data[offset] == 0xE3) { + if (data[offset+1] == 0x80) { + if (data[offset+2] == 0x80) { + return 1; + } + } + } else if (data[offset] == 0xEF) { + if (data[offset+1] == 0xBB) { + if (data[offset+2] == 0xBF) { + return 1; + } + } + } + + return 0; }