ext/rinku/rinku.c in rinku-1.7.3 vs ext/rinku/rinku.c in rinku-2.0.0
- old
+ new
@@ -1,7 +1,7 @@
/*
- * Copyright (c) 2011, Vicent Marti
+ * Copyright (c) 2016, GitHub, Inc
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
@@ -11,84 +11,59 @@
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
-#define RSTRING_NOT_MODIFIED
-
+#include <string.h>
+#include <stdlib.h>
#include <stdio.h>
-#include "ruby.h"
+#include <assert.h>
-#define RUBY_EXPORT __attribute__ ((visibility ("default")))
-
-#ifdef HAVE_RUBY_ENCODING_H
-#include <ruby/encoding.h>
-#else
-#define rb_enc_copy(dst, src)
-#endif
-
+#include "rinku.h"
#include "autolink.h"
#include "buffer.h"
+#include "utf8.h"
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <ctype.h>
-
-static VALUE rb_mRinku;
-
typedef enum {
HTML_TAG_NONE = 0,
HTML_TAG_OPEN,
HTML_TAG_CLOSE,
} html_tag;
typedef enum {
- AUTOLINK_URLS = (1 << 0),
- AUTOLINK_EMAILS = (1 << 1),
- AUTOLINK_ALL = AUTOLINK_URLS|AUTOLINK_EMAILS
-} autolink_mode;
-
-typedef size_t (*autolink_parse_cb)(
- size_t *rewind, struct buf *, uint8_t *, size_t, size_t, unsigned int);
-
-typedef enum {
AUTOLINK_ACTION_NONE = 0,
AUTOLINK_ACTION_WWW,
AUTOLINK_ACTION_EMAIL,
AUTOLINK_ACTION_URL,
AUTOLINK_ACTION_SKIP_TAG
} autolink_action;
+typedef bool (*autolink_parse_cb)(
+ struct autolink_pos *, const uint8_t *, size_t, size_t, unsigned int);
+
static autolink_parse_cb g_callbacks[] = {
NULL,
- sd_autolink__www, /* 1 */
- sd_autolink__email,/* 2 */
- sd_autolink__url, /* 3 */
+ autolink__www, /* 1 */
+ autolink__email,/* 2 */
+ autolink__url, /* 3 */
};
static const char *g_hrefs[] = {
NULL,
"<a href=\"http://",
"<a href=\"mailto:",
"<a href=\"",
};
-static void
-autolink__print(struct buf *ob, const struct buf *link, void *payload)
-{
- bufput(ob, link->data, link->size);
-}
-
/*
* Rinku assumes valid HTML encoding for all input, but there's still
* the case where a link can contain a double quote `"` that allows XSS.
*
* We need to properly escape the character we use for the `href` attribute
* declaration
*/
-static void print_link(struct buf *ob, const char *link, size_t size)
+static void print_link(struct buf *ob, const uint8_t *link, size_t size)
{
size_t i = 0, org;
while (i < size) {
org = i;
@@ -133,11 +108,11 @@
}
if (i == tag_size)
return HTML_TAG_NONE;
- if (isspace(tag_data[i]) || tag_data[i] == '>')
+ if (rinku_isspace(tag_data[i]) || tag_data[i] == '>')
return closed ? HTML_TAG_CLOSE : HTML_TAG_OPEN;
return HTML_TAG_NONE;
}
@@ -176,11 +151,10 @@
while (i < size && text[i] != '>')
i++;
}
-// bufput(ob, text, i + 1);
return i;
}
int
rinku_autolink(
@@ -189,24 +163,20 @@
size_t size,
autolink_mode mode,
unsigned int flags,
const char *link_attr,
const char **skip_tags,
- void (*link_text_cb)(struct buf *ob, const struct buf *link, void *payload),
+ void (*link_text_cb)(struct buf *, const uint8_t *, size_t, void *),
void *payload)
{
- size_t i, end, last_link_found = 0;
- struct buf *link = bufnew(16);
- char active_chars[256];
- void (*link_url_cb)(struct buf *, const struct buf *, void *);
+ size_t i, end;
+ char active_chars[256] = {0};
int link_count = 0;
if (!text || size == 0)
return 0;
- memset(active_chars, 0x0, sizeof(active_chars));
-
active_chars['<'] = AUTOLINK_ACTION_SKIP_TAG;
if (mode & AUTOLINK_EMAILS)
active_chars['@'] = AUTOLINK_ACTION_EMAIL;
@@ -214,24 +184,22 @@
active_chars['w'] = AUTOLINK_ACTION_WWW;
active_chars['W'] = AUTOLINK_ACTION_WWW;
active_chars[':'] = AUTOLINK_ACTION_URL;
}
- if (link_text_cb == NULL)
- link_text_cb = &autolink__print;
-
if (link_attr != NULL) {
- while (isspace(*link_attr))
+ while (rinku_isspace(*link_attr))
link_attr++;
}
bufgrow(ob, size);
i = end = 0;
while (i < size) {
- size_t rewind, link_end;
+ struct autolink_pos link;
+ bool link_found;
char action = 0;
while (end < size && (action = active_chars[text[end]]) == 0)
end++;
@@ -242,230 +210,44 @@
}
if (action == AUTOLINK_ACTION_SKIP_TAG) {
end += autolink__skip_tag(ob,
text + end, size - end, skip_tags);
-
continue;
}
- link->size = 0;
+ link_found = g_callbacks[(int)action](
+ &link, text, end, size, flags);
- link_end = g_callbacks[(int)action](
- &rewind, link, (uint8_t *)text + end,
- end - last_link_found,
- size - end, flags);
+ if (link_found && link.start >= i) {
+ const uint8_t *link_str = text + link.start;
+ const size_t link_len = link.end - link.start;
- /* print the link */
- if (link_end > 0) {
- bufput(ob, text + i, end - i - rewind);
-
+ bufput(ob, text + i, link.start - i);
bufputs(ob, g_hrefs[(int)action]);
- print_link(ob, link->data, link->size);
+ print_link(ob, link_str, link_len);
if (link_attr) {
BUFPUTSL(ob, "\" ");
bufputs(ob, link_attr);
bufputc(ob, '>');
} else {
BUFPUTSL(ob, "\">");
}
- link_text_cb(ob, link, payload);
+ if (link_text_cb) {
+ link_text_cb(ob, link_str, link_len, payload);
+ } else {
+ bufput(ob, link_str, link_len);
+ }
+
BUFPUTSL(ob, "</a>");
link_count++;
- i = end + link_end;
- last_link_found = end = i;
+ end = i = link.end;
} else {
end = end + 1;
}
}
- bufrelease(link);
return link_count;
}
-
-
-/**
- * Ruby code
- */
-static void
-autolink_callback(struct buf *link_text, const struct buf *link, void *block)
-{
- VALUE rb_link, rb_link_text;
- rb_link = rb_str_new(link->data, link->size);
- rb_link_text = rb_funcall((VALUE)block, rb_intern("call"), 1, rb_link);
- Check_Type(rb_link_text, T_STRING);
- bufput(link_text, RSTRING_PTR(rb_link_text), RSTRING_LEN(rb_link_text));
-}
-
-const char **rinku_load_tags(VALUE rb_skip)
-{
- const char **skip_tags;
- size_t i, count;
-
- Check_Type(rb_skip, T_ARRAY);
-
- count = RARRAY_LEN(rb_skip);
- skip_tags = xmalloc(sizeof(void *) * (count + 1));
-
- for (i = 0; i < count; ++i) {
- VALUE tag = rb_ary_entry(rb_skip, i);
- Check_Type(tag, T_STRING);
- skip_tags[i] = StringValueCStr(tag);
- }
-
- skip_tags[count] = NULL;
- return skip_tags;
-}
-
-/*
- * Document-method: auto_link
- *
- * call-seq:
- * auto_link(text, mode=:all, link_attr=nil, skip_tags=nil, flags=0)
- * auto_link(text, mode=:all, link_attr=nil, skip_tags=nil, flags=0) { |link_text| ... }
- *
- * Parses a block of text looking for "safe" urls or email addresses,
- * and turns them into HTML links with the given attributes.
- *
- * NOTE: The block of text may or may not be HTML; if the text is HTML,
- * Rinku will skip the relevant tags to prevent double-linking and linking
- * inside `pre` blocks by default.
- *
- * NOTE: If the input text is HTML, it's expected to be already escaped.
- * Rinku will perform no escaping.
- *
- * NOTE: Currently the follow protocols are considered safe and are the
- * only ones that will be autolinked.
- *
- * http:// https:// ftp:// mailto://
- *
- * Email addresses are also autolinked by default. URLs without a protocol
- * specifier but starting with 'www.' will also be autolinked, defaulting to
- * the 'http://' protocol.
- *
- * - `text` is a string in plain text or HTML markup. If the string is formatted in
- * HTML, Rinku is smart enough to skip the links that are already enclosed in `<a>`
- * tags.`
- *
- * - `mode` is a symbol, either `:all`, `:urls` or `:email_addresses`,
- * which specifies which kind of links will be auto-linked.
- *
- * - `link_attr` is a string containing the link attributes for each link that
- * will be generated. These attributes are not sanitized and will be include as-is
- * in each generated link, e.g.
- *
- * ~~~~~ruby
- * auto_link('http://www.pokemon.com', :all, 'target="_blank"')
- * # => '<a href="http://www.pokemon.com" target="_blank">http://www.pokemon.com</a>'
- * ~~~~~
- *
- * This string can be autogenerated from a hash using the Rails `tag_options` helper.
- *
- * - `skip_tags` is a list of strings with the names of HTML tags that will be skipped
- * when autolinking. If `nil`, this defaults to the value of the global `Rinku.skip_tags`,
- * which is initially `["a", "pre", "code", "kbd", "script"]`.
- *
- * - `flag` is an optional boolean value specifying whether to recognize
- * 'http://foo' as a valid domain, or require at least one '.'. It defaults to false.
- *
- * - `&block` is an optional block argument. If a block is passed, it will
- * be yielded for each found link in the text, and its return value will be used instead
- * of the name of the link. E.g.
- *
- * ~~~~~ruby
- * auto_link('Check it out at http://www.pokemon.com') do |url|
- * "THE POKEMAN WEBSITEZ"
- * end
- * # => 'Check it out at <a href="http://www.pokemon.com">THE POKEMAN WEBSITEZ</a>'
- * ~~~~~~
- */
-static VALUE
-rb_rinku_autolink(int argc, VALUE *argv, VALUE self)
-{
- static const char *SKIP_TAGS[] = {"a", "pre", "code", "kbd", "script", NULL};
-
- VALUE result, rb_text, rb_mode, rb_html, rb_skip, rb_flags, rb_block;
- struct buf *output_buf;
- int link_mode, count;
- unsigned int link_flags = 0;
- const char *link_attr = NULL;
- const char **skip_tags = NULL;
- ID mode_sym;
-
- rb_scan_args(argc, argv, "14&", &rb_text, &rb_mode,
- &rb_html, &rb_skip, &rb_flags, &rb_block);
-
- Check_Type(rb_text, T_STRING);
-
- if (!NIL_P(rb_mode)) {
- Check_Type(rb_mode, T_SYMBOL);
- mode_sym = SYM2ID(rb_mode);
- } else {
- mode_sym = rb_intern("all");
- }
-
- if (!NIL_P(rb_html)) {
- Check_Type(rb_html, T_STRING);
- link_attr = RSTRING_PTR(rb_html);
- }
-
- if (NIL_P(rb_skip))
- rb_skip = rb_iv_get(self, "@skip_tags");
-
- if (NIL_P(rb_skip)) {
- skip_tags = SKIP_TAGS;
- } else {
- skip_tags = rinku_load_tags(rb_skip);
- }
-
- if (!NIL_P(rb_flags)) {
- Check_Type(rb_flags, T_FIXNUM);
- link_flags = FIX2INT(rb_flags);
- }
-
- output_buf = bufnew(32);
-
- if (mode_sym == rb_intern("all"))
- link_mode = AUTOLINK_ALL;
- else if (mode_sym == rb_intern("email_addresses"))
- link_mode = AUTOLINK_EMAILS;
- else if (mode_sym == rb_intern("urls"))
- link_mode = AUTOLINK_URLS;
- else
- rb_raise(rb_eTypeError,
- "Invalid linking mode (possible values are :all, :urls, :email_addresses)");
-
- count = rinku_autolink(
- output_buf,
- RSTRING_PTR(rb_text),
- RSTRING_LEN(rb_text),
- link_mode,
- link_flags,
- link_attr,
- skip_tags,
- RTEST(rb_block) ? &autolink_callback : NULL,
- (void*)rb_block);
-
- if (count == 0)
- result = rb_text;
- else {
- result = rb_str_new(output_buf->data, output_buf->size);
- rb_enc_copy(result, rb_text);
- }
-
- if (skip_tags != SKIP_TAGS)
- xfree(skip_tags);
-
- bufrelease(output_buf);
- return result;
-}
-
-void RUBY_EXPORT Init_rinku()
-{
- rb_mRinku = rb_define_module("Rinku");
- rb_define_method(rb_mRinku, "auto_link", rb_rinku_autolink, -1);
- rb_define_const(rb_mRinku, "AUTOLINK_SHORT_DOMAINS", INT2FIX(SD_AUTOLINK_SHORT_DOMAINS));
-}
-