ext/rinku/autolink.c in zendesk-rinku-1.7.2.1 vs ext/rinku/autolink.c in zendesk-rinku-1.7.3
- old
+ new
@@ -36,10 +36,13 @@
#if defined(_WIN32)
#define strncasecmp _strnicmp
#endif
int
+is_unicode_space(uint8_t *data, size_t offset);
+
+int
sd_autolink_issafe(const uint8_t *link, size_t link_len)
{
static const size_t valid_uris_count = 5;
static const char *valid_uris[] = {
"/", "http://", "https://", "ftp://", "mailto:"
@@ -76,11 +79,11 @@
link_end--;
else if (data[link_end - 1] == ';') {
size_t new_end = link_end - 2;
- while (new_end > 0 && isalpha(data[new_end]))
+ while (new_end > 0 && (isalnum(data[new_end]) || data[new_end] == '#'))
new_end--;
if (new_end < link_end - 2 && data[new_end] == '&')
link_end = new_end;
else
@@ -142,20 +145,46 @@
return link_end;
}
static size_t
+autolink_delim_iter(uint8_t *data, size_t link_end, size_t offset, size_t size)
+{
+ size_t next_link_end;
+ int iterations = 0;
+ link_end = autolink_delim(data, link_end, offset, size);
+
+ while(link_end != 0) {
+ next_link_end = autolink_delim(data, link_end, offset, size);
+ if (next_link_end == link_end || iterations > 5) {
+ break;
+ }
+ link_end = next_link_end;
+ iterations++;
+ }
+
+ return link_end;
+}
+
+
+static size_t
check_domain(uint8_t *data, size_t size, int allow_short)
{
size_t i, np = 0;
- if (!isalnum(data[0]))
- return 0;
+ if (data[0] == '.' || data[0] == '-') return 0;
- for (i = 1; i < size - 1; ++i) {
- if (data[i] == '.') np++;
- else if (!isalnum(data[i]) && data[i] != '-') break;
+ for (i = 0; i < size - 1; i++) {
+ if (data[i] == '.') {
+ np++;
+ } else if ((data[i] >= 'a' && data[i] <= 'z')
+ || (data[i] >= 'A' && data[i] <= 'Z')
+ || (data[i] >= '0' && data[i] <= '9')
+ || (data[i] == '-')) {
+ } else {
+ break;
+ }
}
if (allow_short) {
/* We don't need a valid domain in the strict sense (with
* least one dot; so just make sure it's composed of valid
@@ -186,21 +215,28 @@
if (size < 4 || memcmp(data, "www.", strlen("www.")) != 0)
return 0;
link_end = check_domain(data, size, 0);
- if (link_end == 0)
+ if (link_end == 0) {
return 0;
+ } else if (data[link_end] > 127) {
+ if (!is_unicode_space(data, link_end)) {
+ /* we prevent linking a domain that contains unicode
+ * in case of sneaky lookalike attacks */
+ return 0;
+ }
+ } else {
+ while (link_end < size && !is_unicode_space(data, link_end)) {
+ link_end++;
+ }
+ }
- while (link_end < size && !isspace(data[link_end]))
- link_end++;
+ link_end = autolink_delim_iter(data, link_end, max_rewind, size);
- link_end = autolink_delim(data, link_end, max_rewind, size);
+ if (link_end == 0) { return 0; }
- if (link_end == 0)
- return 0;
-
bufput(link, data, link_end);
*rewind_p = 0;
return (int)link_end;
}
@@ -247,11 +283,11 @@
}
if (link_end < 2 || nb != 1 || np == 0)
return 0;
- link_end = autolink_delim(data, link_end, max_rewind, size);
+ link_end = autolink_delim_iter(data, link_end, max_rewind, size);
if (link_end == 0)
return 0;
bufput(link, data - rewind, link_end + rewind);
@@ -289,18 +325,61 @@
if (domain_len == 0)
return 0;
link_end += domain_len;
- while (link_end < size && !isspace(data[link_end]))
- link_end++;
- link_end = autolink_delim(data, link_end, max_rewind, size);
+ if (data[link_end] > 127) {
+ if (!is_unicode_space(data, link_end)) { return 0; }
+ } else {
+ while (link_end < size && !is_unicode_space(data, link_end)) {
+ link_end++;
+ }
+ }
+ link_end = autolink_delim_iter(data, link_end, max_rewind, size);
+
if (link_end == 0)
return 0;
bufput(link, data - rewind, link_end + rewind);
*rewind_p = rewind;
return link_end;
+}
+
+int
+is_unicode_space(uint8_t *data, size_t offset) {
+
+ if(isspace(data[offset])) {
+ return 1;
+ }
+
+ /* Unicode Whitespace list from https://en.wikipedia.org/wiki/Whitespace_character#Unicode */
+ if (data[offset] == 0xE2) {
+ if (data[offset+1] == 0x80) {
+ if (data[offset+2] >= 0x80 && data[offset+2] <= 0x8C) {
+ return 1;
+ } else if (data[offset+2] == 0xA8 || data[offset+2] == 0xA9 || data[offset+2] == 0xAF) {
+ return 1;
+ }
+ } else if (data[offset+1] == 0x81) {
+ if (data[offset+2] == 0x9F) {
+ return 1;
+ }
+ }
+ } else if (data[offset] == 0xE3) {
+ if (data[offset+1] == 0x80) {
+ if (data[offset+2] == 0x80) {
+ return 1;
+ }
+ }
+ } else if (data[offset] == 0xEF) {
+ if (data[offset+1] == 0xBB) {
+ if (data[offset+2] == 0xBF) {
+ return 1;
+ }
+ }
+ }
+
+ return 0;
}