ext/rinku/rinku.c in rinku-1.7.3 vs ext/rinku/rinku.c in rinku-2.0.0

- old
+ new

@@ -1,7 +1,7 @@ /* - * Copyright (c) 2011, Vicent Marti + * Copyright (c) 2016, GitHub, Inc * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * @@ -11,84 +11,59 @@ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#define RSTRING_NOT_MODIFIED - +#include <string.h> +#include <stdlib.h> #include <stdio.h> -#include "ruby.h" +#include <assert.h> -#define RUBY_EXPORT __attribute__ ((visibility ("default"))) - -#ifdef HAVE_RUBY_ENCODING_H -#include <ruby/encoding.h> -#else -#define rb_enc_copy(dst, src) -#endif - +#include "rinku.h" #include "autolink.h" #include "buffer.h" +#include "utf8.h" -#include <string.h> -#include <stdlib.h> -#include <stdio.h> -#include <ctype.h> - -static VALUE rb_mRinku; - typedef enum { HTML_TAG_NONE = 0, HTML_TAG_OPEN, HTML_TAG_CLOSE, } html_tag; typedef enum { - AUTOLINK_URLS = (1 << 0), - AUTOLINK_EMAILS = (1 << 1), - AUTOLINK_ALL = AUTOLINK_URLS|AUTOLINK_EMAILS -} autolink_mode; - -typedef size_t (*autolink_parse_cb)( - size_t *rewind, struct buf *, uint8_t *, size_t, size_t, unsigned int); - -typedef enum { AUTOLINK_ACTION_NONE = 0, AUTOLINK_ACTION_WWW, AUTOLINK_ACTION_EMAIL, AUTOLINK_ACTION_URL, AUTOLINK_ACTION_SKIP_TAG } autolink_action; +typedef bool (*autolink_parse_cb)( + struct autolink_pos *, const uint8_t *, size_t, size_t, unsigned int); + static autolink_parse_cb g_callbacks[] = { NULL, - sd_autolink__www, /* 1 */ - sd_autolink__email,/* 2 */ - sd_autolink__url, /* 3 */ + autolink__www, /* 1 */ + autolink__email,/* 2 */ + autolink__url, /* 3 */ }; static const char *g_hrefs[] = { NULL, "<a href=\"http://", "<a href=\"mailto:", "<a href=\"", }; -static void -autolink__print(struct buf *ob, const struct buf *link, void *payload) -{ - bufput(ob, link->data, link->size); -} - /* * Rinku assumes valid HTML encoding for all input, but there's still * the case where a link can contain a double quote `"` that allows XSS. * * We need to properly escape the character we use for the `href` attribute * declaration */ -static void print_link(struct buf *ob, const char *link, size_t size) +static void print_link(struct buf *ob, const uint8_t *link, size_t size) { size_t i = 0, org; while (i < size) { org = i; @@ -133,11 +108,11 @@ } if (i == tag_size) return HTML_TAG_NONE; - if (isspace(tag_data[i]) || tag_data[i] == '>') + if (rinku_isspace(tag_data[i]) || tag_data[i] == '>') return closed ? HTML_TAG_CLOSE : HTML_TAG_OPEN; return HTML_TAG_NONE; } @@ -176,11 +151,10 @@ while (i < size && text[i] != '>') i++; } -// bufput(ob, text, i + 1); return i; } int rinku_autolink( @@ -189,24 +163,20 @@ size_t size, autolink_mode mode, unsigned int flags, const char *link_attr, const char **skip_tags, - void (*link_text_cb)(struct buf *ob, const struct buf *link, void *payload), + void (*link_text_cb)(struct buf *, const uint8_t *, size_t, void *), void *payload) { - size_t i, end, last_link_found = 0; - struct buf *link = bufnew(16); - char active_chars[256]; - void (*link_url_cb)(struct buf *, const struct buf *, void *); + size_t i, end; + char active_chars[256] = {0}; int link_count = 0; if (!text || size == 0) return 0; - memset(active_chars, 0x0, sizeof(active_chars)); - active_chars['<'] = AUTOLINK_ACTION_SKIP_TAG; if (mode & AUTOLINK_EMAILS) active_chars['@'] = AUTOLINK_ACTION_EMAIL; @@ -214,24 +184,22 @@ active_chars['w'] = AUTOLINK_ACTION_WWW; active_chars['W'] = AUTOLINK_ACTION_WWW; active_chars[':'] = AUTOLINK_ACTION_URL; } - if (link_text_cb == NULL) - link_text_cb = &autolink__print; - if (link_attr != NULL) { - while (isspace(*link_attr)) + while (rinku_isspace(*link_attr)) link_attr++; } bufgrow(ob, size); i = end = 0; while (i < size) { - size_t rewind, link_end; + struct autolink_pos link; + bool link_found; char action = 0; while (end < size && (action = active_chars[text[end]]) == 0) end++; @@ -242,230 +210,44 @@ } if (action == AUTOLINK_ACTION_SKIP_TAG) { end += autolink__skip_tag(ob, text + end, size - end, skip_tags); - continue; } - link->size = 0; + link_found = g_callbacks[(int)action]( + &link, text, end, size, flags); - link_end = g_callbacks[(int)action]( - &rewind, link, (uint8_t *)text + end, - end - last_link_found, - size - end, flags); + if (link_found && link.start >= i) { + const uint8_t *link_str = text + link.start; + const size_t link_len = link.end - link.start; - /* print the link */ - if (link_end > 0) { - bufput(ob, text + i, end - i - rewind); - + bufput(ob, text + i, link.start - i); bufputs(ob, g_hrefs[(int)action]); - print_link(ob, link->data, link->size); + print_link(ob, link_str, link_len); if (link_attr) { BUFPUTSL(ob, "\" "); bufputs(ob, link_attr); bufputc(ob, '>'); } else { BUFPUTSL(ob, "\">"); } - link_text_cb(ob, link, payload); + if (link_text_cb) { + link_text_cb(ob, link_str, link_len, payload); + } else { + bufput(ob, link_str, link_len); + } + BUFPUTSL(ob, "</a>"); link_count++; - i = end + link_end; - last_link_found = end = i; + end = i = link.end; } else { end = end + 1; } } - bufrelease(link); return link_count; } - - -/** - * Ruby code - */ -static void -autolink_callback(struct buf *link_text, const struct buf *link, void *block) -{ - VALUE rb_link, rb_link_text; - rb_link = rb_str_new(link->data, link->size); - rb_link_text = rb_funcall((VALUE)block, rb_intern("call"), 1, rb_link); - Check_Type(rb_link_text, T_STRING); - bufput(link_text, RSTRING_PTR(rb_link_text), RSTRING_LEN(rb_link_text)); -} - -const char **rinku_load_tags(VALUE rb_skip) -{ - const char **skip_tags; - size_t i, count; - - Check_Type(rb_skip, T_ARRAY); - - count = RARRAY_LEN(rb_skip); - skip_tags = xmalloc(sizeof(void *) * (count + 1)); - - for (i = 0; i < count; ++i) { - VALUE tag = rb_ary_entry(rb_skip, i); - Check_Type(tag, T_STRING); - skip_tags[i] = StringValueCStr(tag); - } - - skip_tags[count] = NULL; - return skip_tags; -} - -/* - * Document-method: auto_link - * - * call-seq: - * auto_link(text, mode=:all, link_attr=nil, skip_tags=nil, flags=0) - * auto_link(text, mode=:all, link_attr=nil, skip_tags=nil, flags=0) { |link_text| ... } - * - * Parses a block of text looking for "safe" urls or email addresses, - * and turns them into HTML links with the given attributes. - * - * NOTE: The block of text may or may not be HTML; if the text is HTML, - * Rinku will skip the relevant tags to prevent double-linking and linking - * inside `pre` blocks by default. - * - * NOTE: If the input text is HTML, it's expected to be already escaped. - * Rinku will perform no escaping. - * - * NOTE: Currently the follow protocols are considered safe and are the - * only ones that will be autolinked. - * - * http:// https:// ftp:// mailto:// - * - * Email addresses are also autolinked by default. URLs without a protocol - * specifier but starting with 'www.' will also be autolinked, defaulting to - * the 'http://' protocol. - * - * - `text` is a string in plain text or HTML markup. If the string is formatted in - * HTML, Rinku is smart enough to skip the links that are already enclosed in `<a>` - * tags.` - * - * - `mode` is a symbol, either `:all`, `:urls` or `:email_addresses`, - * which specifies which kind of links will be auto-linked. - * - * - `link_attr` is a string containing the link attributes for each link that - * will be generated. These attributes are not sanitized and will be include as-is - * in each generated link, e.g. - * - * ~~~~~ruby - * auto_link('http://www.pokemon.com', :all, 'target="_blank"') - * # => '<a href="http://www.pokemon.com" target="_blank">http://www.pokemon.com</a>' - * ~~~~~ - * - * This string can be autogenerated from a hash using the Rails `tag_options` helper. - * - * - `skip_tags` is a list of strings with the names of HTML tags that will be skipped - * when autolinking. If `nil`, this defaults to the value of the global `Rinku.skip_tags`, - * which is initially `["a", "pre", "code", "kbd", "script"]`. - * - * - `flag` is an optional boolean value specifying whether to recognize - * 'http://foo' as a valid domain, or require at least one '.'. It defaults to false. - * - * - `&block` is an optional block argument. If a block is passed, it will - * be yielded for each found link in the text, and its return value will be used instead - * of the name of the link. E.g. - * - * ~~~~~ruby - * auto_link('Check it out at http://www.pokemon.com') do |url| - * "THE POKEMAN WEBSITEZ" - * end - * # => 'Check it out at <a href="http://www.pokemon.com">THE POKEMAN WEBSITEZ</a>' - * ~~~~~~ - */ -static VALUE -rb_rinku_autolink(int argc, VALUE *argv, VALUE self) -{ - static const char *SKIP_TAGS[] = {"a", "pre", "code", "kbd", "script", NULL}; - - VALUE result, rb_text, rb_mode, rb_html, rb_skip, rb_flags, rb_block; - struct buf *output_buf; - int link_mode, count; - unsigned int link_flags = 0; - const char *link_attr = NULL; - const char **skip_tags = NULL; - ID mode_sym; - - rb_scan_args(argc, argv, "14&", &rb_text, &rb_mode, - &rb_html, &rb_skip, &rb_flags, &rb_block); - - Check_Type(rb_text, T_STRING); - - if (!NIL_P(rb_mode)) { - Check_Type(rb_mode, T_SYMBOL); - mode_sym = SYM2ID(rb_mode); - } else { - mode_sym = rb_intern("all"); - } - - if (!NIL_P(rb_html)) { - Check_Type(rb_html, T_STRING); - link_attr = RSTRING_PTR(rb_html); - } - - if (NIL_P(rb_skip)) - rb_skip = rb_iv_get(self, "@skip_tags"); - - if (NIL_P(rb_skip)) { - skip_tags = SKIP_TAGS; - } else { - skip_tags = rinku_load_tags(rb_skip); - } - - if (!NIL_P(rb_flags)) { - Check_Type(rb_flags, T_FIXNUM); - link_flags = FIX2INT(rb_flags); - } - - output_buf = bufnew(32); - - if (mode_sym == rb_intern("all")) - link_mode = AUTOLINK_ALL; - else if (mode_sym == rb_intern("email_addresses")) - link_mode = AUTOLINK_EMAILS; - else if (mode_sym == rb_intern("urls")) - link_mode = AUTOLINK_URLS; - else - rb_raise(rb_eTypeError, - "Invalid linking mode (possible values are :all, :urls, :email_addresses)"); - - count = rinku_autolink( - output_buf, - RSTRING_PTR(rb_text), - RSTRING_LEN(rb_text), - link_mode, - link_flags, - link_attr, - skip_tags, - RTEST(rb_block) ? &autolink_callback : NULL, - (void*)rb_block); - - if (count == 0) - result = rb_text; - else { - result = rb_str_new(output_buf->data, output_buf->size); - rb_enc_copy(result, rb_text); - } - - if (skip_tags != SKIP_TAGS) - xfree(skip_tags); - - bufrelease(output_buf); - return result; -} - -void RUBY_EXPORT Init_rinku() -{ - rb_mRinku = rb_define_module("Rinku"); - rb_define_method(rb_mRinku, "auto_link", rb_rinku_autolink, -1); - rb_define_const(rb_mRinku, "AUTOLINK_SHORT_DOMAINS", INT2FIX(SD_AUTOLINK_SHORT_DOMAINS)); -} -