ext/re2/re2.cc in re2-0.3.0 vs ext/re2/re2.cc in re2-0.4.0

- old
+ new

@@ -1,32 +1,41 @@ /* * re2 (http://github.com/mudge/re2) * Ruby bindings to re2, an "efficient, principled regular expression library" * - * Copyright (c) 2010, Paul Mucur (http://mucur.name) + * Copyright (c) 2010-2012, Paul Mucur (http://mudge.name) * Released under the BSD Licence, please see LICENSE.txt */ #include <re2/re2.h> #include <string> +#include <sstream> using namespace std; extern "C" { #include <ruby.h> #define BOOL2RUBY(v) (v ? Qtrue : Qfalse) #define UNUSED(x) ((void)x) - #if !defined(RSTRING_LEN) - # define RSTRING_LEN(x) (RSTRING(x)->len) + #ifndef RSTRING_LEN + #define RSTRING_LEN(x) (RSTRING(x)->len) #endif - #if !defined(RSTRING_PTR) - # define RSTRING_PTR(x) (RSTRING(x)->ptr) + #ifndef RSTRING_PTR + #define RSTRING_PTR(x) (RSTRING(x)->ptr) #endif + #ifdef HAVE_ENDPOS_ARGUMENT + #define match(pattern, text, startpos, endpos, anchor, match, nmatch) \ + (pattern->Match(text, startpos, endpos, anchor, match, nmatch)) + #else + #define match(pattern, text, startpos, endpos, anchor, match, nmatch) \ + (pattern->Match(text, startpos, anchor, match, nmatch)) + #endif + typedef struct { RE2 *pattern; } re2_pattern; typedef struct { @@ -75,11 +84,11 @@ /* * Returns a frozen copy of the string passed into +match+. * * @return [String] a frozen copy of the passed string. * @example - * m = RE2('(\d+)').match("bob 123") + * m = RE2::Regexp.new('(\d+)').match("bob 123") * m.string #=> "bob 123" */ static VALUE re2_matchdata_string(VALUE self) { @@ -92,11 +101,11 @@ /* * Returns the number of elements in the match array (including nils). * * @return [Fixnum] the number of elements * @example - * m = RE2('(\d+)').match("bob 123") + * m = RE2::Regexp.new('(\d+)').match("bob 123") * m.size #=> 2 * m.length #=> 2 */ static VALUE re2_matchdata_size(VALUE self) @@ -110,11 +119,11 @@ /* * Returns the {RE2::Regexp} used in the match. * * @return [RE2::Regexp] the regexp used in the match * @example - * m = RE2('(\d+)').match("bob 123") + * m = RE2::Regexp.new('(\d+)').match("bob 123") * m.regexp #=> #<RE2::Regexp /(\d+)/> */ static VALUE re2_matchdata_regexp(VALUE self) { @@ -133,30 +142,33 @@ /* * Returns the array of matches. * * @return [Array<String, nil>] the array of matches * @example - * m = RE2('(\d+)').match("bob 123") + * m = RE2::Regexp.new('(\d+)').match("bob 123") * m.to_a #=> ["123", "123"] */ static VALUE re2_matchdata_to_a(VALUE self) { int i; re2_matchdata *m; re2::StringPiece match; + VALUE array; Data_Get_Struct(self, re2_matchdata, m); - VALUE array = rb_ary_new2(m->number_of_matches); + + array = rb_ary_new2(m->number_of_matches); for (i = 0; i < m->number_of_matches; i++) { if (m->matches[i].empty()) { rb_ary_push(array, Qnil); } else { match = m->matches[i]; rb_ary_push(array, rb_str_new(match.data(), match.size())); } } + return array; } static VALUE re2_matchdata_nth_match(int nth, VALUE self) @@ -205,39 +217,39 @@ * Access a particular match by index. * * @param [Fixnum] index the index of the match to fetch * @return [String, nil] the specified match * @example - * m = RE2('(\d+)').match("bob 123") + * m = RE2::Regexp.new('(\d+)').match("bob 123") * m[0] #=> "123" * * @overload [](start, length) * Access a range of matches by starting index and length. * * @param [Fixnum] start the index from which to start * @param [Fixnum] length the number of elements to fetch * @return [Array<String, nil>] the specified matches * @example - * m = RE2('(\d+)').match("bob 123") + * m = RE2::Regexp.new('(\d+)').match("bob 123") * m[0, 1] #=> ["123"] * * @overload [](range) * Access a range of matches by index. * * @param [Range] range the range of match indexes to fetch * @return [Array<String, nil>] the specified matches * @example - * m = RE2('(\d+)').match("bob 123") + * m = RE2::Regexp.new('(\d+)').match("bob 123") * m[0..1] #=> "[123", "123"] * * @overload [](name) * Access a particular match by name. * * @param [String, Symbol] name the name of the match to fetch * @return [String, nil] the specific match * @example - * m = RE2('(?P<number>\d+)').match("bob 123") + * m = RE2::Regexp.new('(?P<number>\d+)').match("bob 123") * m["number"] #=> "123" * m[:number] #=> "123" */ static VALUE re2_matchdata_aref(int argc, VALUE *argv, VALUE self) @@ -270,46 +282,47 @@ /* * Returns a printable version of the match. * * @return [String] a printable version of the match * @example - * m = RE2('(\d+)').match("bob 123") + * m = RE2::Regexp.new('(\d+)').match("bob 123") * m.inspect #=> "#<RE2::MatchData \"123\" 1:\"123\">" */ static VALUE re2_matchdata_inspect(VALUE self) { int i; re2_matchdata *m; VALUE match, result; + ostringstream output; Data_Get_Struct(self, re2_matchdata, m); result = rb_str_new("#<RE2::MatchData", 16); + output << "#<RE2::MatchData"; + for (i = 0; i < m->number_of_matches; i++) { - rb_str_cat(result, " ", 1); + output << " "; if (i > 0) { - char buf[sizeof(i)*3+1]; - snprintf(buf, sizeof(buf), "%d", i); - rb_str_cat2(result, buf); - rb_str_cat(result, ":", 1); + output << i << ":"; } match = re2_matchdata_nth_match(i, self); if (match == Qnil) { - rb_str_cat(result, "nil", 3); + output << "nil"; } else { - rb_str_cat(result, "\"", 1); - rb_str_cat(result, RSTRING_PTR(match), RSTRING_LEN(match)); - rb_str_cat(result, "\"", 1); + output << "\"" << StringValuePtr(match) << "\""; } } - rb_str_cat(result, ">", 1); + output << ">"; + + result = rb_str_new(output.str().data(), output.str().length()); + return result; } /* * Returns a new RE2 object with a compiled version of @@ -457,16 +470,19 @@ */ static VALUE re2_regexp_inspect(VALUE self) { re2_pattern *p; - VALUE result = rb_str_new("#<RE2::Regexp /", 15); + VALUE result; + ostringstream output; Data_Get_Struct(self, re2_pattern, p); - rb_str_cat(result, p->pattern->pattern().data(), p->pattern->pattern().size()); - rb_str_cat(result, "/>", 2); + output << "#<RE2::Regexp /" << p->pattern->pattern() << "/>"; + + result = rb_str_new(output.str().data(), output.str().length()); + return result; } /* * Returns a string version of the regular expression +re2+. @@ -704,34 +720,42 @@ return BOOL2RUBY(p->pattern->options().one_line()); } /* * If the RE2 could not be created properly, returns an - * error string. + * error string otherwise returns nil. * - * @return [String] the error string + * @return [String, nil] the error string or nil */ static VALUE re2_regexp_error(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); - return rb_str_new(p->pattern->error().data(), p->pattern->error().size()); + if (p->pattern->ok()) { + return Qnil; + } else { + return rb_str_new(p->pattern->error().data(), p->pattern->error().size()); + } } /* * If the RE2 could not be created properly, returns - * the offending portion of the regexp. + * the offending portion of the regexp otherwise returns nil. * - * @return [String] the offending portion of the regexp + * @return [String, nil] the offending portion of the regexp or nil */ static VALUE re2_regexp_error_arg(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); - return rb_str_new(p->pattern->error_arg().data(), p->pattern->error_arg().size()); + if (p->pattern->ok()) { + return Qnil; + } else { + return rb_str_new(p->pattern->error_arg().data(), p->pattern->error_arg().size()); + } } /* * Returns the program size, a very approximate measure * of a regexp's "cost". Larger numbers are more expensive @@ -858,11 +882,11 @@ * @param [String] text the text to search * @return [RE2::MatchData] the matches * @raise [NoMemoryError] if there was not enough memory to allocate the matches * @example * r = RE2::Regexp.new('w(o)(o)') - * r.match('woo) #=> #<RE2::MatchData "woo" 1:"o" 2:"o"> + * r.match('woo') #=> #<RE2::MatchData "woo" 1:"o" 2:"o"> * * @overload match(text, 0) * Returns either true or false indicating whether a * successful match was made. * @@ -905,15 +929,11 @@ } else { n = p->pattern->NumberOfCapturingGroups(); } if (n == 0) { -#if defined(HAVE_ENDPOS_ARGUMENT) - matched = p->pattern->Match(StringValuePtr(text), 0, (int)RSTRING_LEN(text), RE2::UNANCHORED, 0, 0); -#else - matched = p->pattern->Match(StringValuePtr(text), 0, RE2::UNANCHORED, 0, 0); -#endif + matched = match(p->pattern, StringValuePtr(text), 0, (int)RSTRING_LEN(text), RE2::UNANCHORED, 0, 0); return BOOL2RUBY(matched); } else { /* Because match returns the whole match as well. */ n += 1; @@ -929,15 +949,11 @@ rb_raise(rb_eNoMemError, "not enough memory to allocate StringPieces for matches"); } m->number_of_matches = n; -#if defined(HAVE_ENDPOS_ARGUMENT) - matched = p->pattern->Match(StringValuePtr(text), 0, (int)RSTRING_LEN(text), RE2::UNANCHORED, m->matches, n); -#else - matched = p->pattern->Match(StringValuePtr(text), 0, RE2::UNANCHORED, m->matches, n); -#endif + matched = match(p->pattern, StringValuePtr(text), 0, (int)RSTRING_LEN(text), RE2::UNANCHORED, m->matches, n); if (matched) { return matchdata; } else { return Qnil; @@ -966,16 +982,17 @@ * +rewrite+ <i>in place</i>. * * @param [String] str the string to modify * @param [String, RE2::Regexp] pattern a regexp matching text to be replaced * @param [String] rewrite the string to replace with + * @return [String] the resulting string * @example - * RE2::Replace("hello there", "hello", "howdy") #=> "howdy there" + * RE2.Replace("hello there", "hello", "howdy") #=> "howdy there" * re2 = RE2.new("hel+o") - * RE2::Replace("hello there", re2, "yo") #=> "yo there" + * RE2.Replace("hello there", re2, "yo") #=> "yo there" * text = "Good morning" - * RE2::Replace(text, "morn", "even") #=> "Good evening" + * RE2.Replace(text, "morn", "even") #=> "Good evening" * text #=> "Good evening" */ static VALUE re2_Replace(VALUE self, VALUE str, VALUE pattern, VALUE rewrite) { @@ -1015,15 +1032,16 @@ * +rewrite+ <i>in place</i>. * * @param [String] str the string to modify * @param [String, RE2::Regexp] pattern a regexp matching text to be replaced * @param [String] rewrite the string to replace with + * @return [String] the resulting string * @example - * RE2::GlobalReplace("hello there", "e", "i") #=> "hillo thiri" + * RE2.GlobalReplace("hello there", "e", "i") #=> "hillo thiri" * re2 = RE2.new("oo?") - * RE2::GlobalReplace("whoops-doops", re2, "e") #=> "wheps-deps" + * RE2.GlobalReplace("whoops-doops", re2, "e") #=> "wheps-deps" * text = "Good morning" - * RE2::GlobalReplace(text, "o", "ee") #=> "Geeeed meerning" + * RE2.GlobalReplace(text, "o", "ee") #=> "Geeeed meerning" * text #=> "Geeeed meerning" */ static VALUE re2_GlobalReplace(VALUE self, VALUE str, VALUE pattern, VALUE rewrite) {