ext/re2/re2.cc in re2-0.0.3 vs ext/re2/re2.cc in re2-0.0.4

- old
+ new

@@ -17,58 +17,261 @@ #if !defined(RSTRING_LEN) # define RSTRING_LEN(x) (RSTRING(x)->len) #endif - struct re2_pattern { + typedef struct { RE2 *pattern; - }; + } re2_pattern; - VALUE re2_cRE2; + typedef struct { + re2::StringPiece *matches; + int number_of_matches; + VALUE regexp, string; + } re2_matchdata; + VALUE re2_mRE2, re2_cRegexp, re2_cMatchData; + /* Symbols used in RE2 options. */ static ID id_utf8, id_posix_syntax, id_longest_match, id_log_errors, id_max_mem, id_literal, id_never_nl, id_case_sensitive, id_perl_classes, id_word_boundary, id_one_line; + void re2_matchdata_mark(re2_matchdata* self) + { + rb_gc_mark(self->regexp); + rb_gc_mark(self->string); + } + + void re2_matchdata_free(re2_matchdata* self) + { + if (self->matches) { + delete[] self->matches; + } + free(self); + } + void - re2_free(re2_pattern* self) + re2_regexp_free(re2_pattern* self) { if (self->pattern) { delete self->pattern; } free(self); } static VALUE - re2_allocate(VALUE klass) + re2_matchdata_allocate(VALUE klass) { + re2_matchdata *m; + return Data_Make_Struct(klass, re2_matchdata, re2_matchdata_mark, re2_matchdata_free, m); + } + + /* + * call-seq: + * match.string -> string + * + * Returns a frozen copy of the string passed into +match+. + * + * m = RE2('(\d+)').match("bob 123") + * m.string #=> "bob 123" + */ + static VALUE + re2_matchdata_string(VALUE self) + { + re2_matchdata *m; + Data_Get_Struct(self, re2_matchdata, m); + + return m->string; + } + + /* + * call-seq: + * match.size -> integer + * match.length -> integer + * + * Returns the number of elements in the match array (including nils). + * + * m = RE2('(\d+)').match("bob 123") + * m.length #=> 2 + * m.size #=> 2 + */ + static VALUE + re2_matchdata_size(VALUE self) + { + re2_matchdata *m; + Data_Get_Struct(self, re2_matchdata, m); + + return INT2FIX(m->number_of_matches); + } + + /* + * call-seq: + * match.regexp -> RE2::Regexp + * + * Return the RE2::Regexp used in the match. + * + * m = RE2('(\d+)').match("bob 123") + * m.regexp #=> #<RE2::Regexp /(\d+)/> + */ + static VALUE + re2_matchdata_regexp(VALUE self) + { + re2_matchdata *m; + Data_Get_Struct(self, re2_matchdata, m); + return m->regexp; + } + + static VALUE + re2_regexp_allocate(VALUE klass) + { re2_pattern *p; - return Data_Make_Struct(klass, re2_pattern, 0, re2_free, p); + return Data_Make_Struct(klass, re2_pattern, 0, re2_regexp_free, p); } /* * call-seq: + * match.to_a -> array + * + * Returns the array of matches. + * + * m = RE2('(\d+)').match("bob 123") + * m.to_a #=> ["123", "123"] + */ + static VALUE + re2_matchdata_to_a(VALUE self) + { + int i; + re2_matchdata *m; + Data_Get_Struct(self, re2_matchdata, m); + VALUE array = rb_ary_new2(m->number_of_matches); + for (i = 0; i < m->number_of_matches; i++) { + if (m->matches[i].empty()) { + rb_ary_store(array, i, Qnil); + } else { + rb_ary_store(array, i, rb_str_new2(m->matches[i].as_string().c_str())); + } + } + return array; + } + + static VALUE + re2_matchdata_nth_match(int nth, VALUE self) + { + re2_matchdata *m; + Data_Get_Struct(self, re2_matchdata, m); + + if (nth >= m->number_of_matches || m->matches[nth].empty()) { + return Qnil; + } else { + return rb_str_new2(m->matches[nth].as_string().c_str()); + } + } + + /* + * call-seq: + * match[i] -> string + * match[start, length] -> array + * match[range] -> array + * + * Access the match data as an array. + * + * m = RE2('(\d+)').match("bob 123") + * m[0] #=> "123" + * m[0, 1] #=> ["123"] + * m[0...1] #=> ["123"] + * m[0..1] #=> ["123", "123"] + */ + static VALUE + re2_matchdata_aref(int argc, VALUE *argv, VALUE self) + { + VALUE idx, rest; + rb_scan_args(argc, argv, "11", &idx, &rest); + + if (!NIL_P(rest) || !FIXNUM_P(idx) || FIX2INT(idx) < 0) { + return rb_ary_aref(argc, argv, re2_matchdata_to_a(self)); + } else { + return re2_matchdata_nth_match(FIX2INT(idx), self); + } + } + + /* + * call-seq: + * match.to_s -> string + * + * Returns the entire matched string. + */ + static VALUE + re2_matchdata_to_s(VALUE self) + { + return re2_matchdata_nth_match(0, self); + } + + /* + * call-seq: + * match.inspect -> string + * + * Returns a printable version of the match. + * + * m = RE2('(\d+)').match("bob 123") + * m.inspect #=> "#<RE2::MatchData \"123\" 1:\"123\">" + */ + static VALUE + re2_matchdata_inspect(VALUE self) + { + int i; + re2_matchdata *m; + VALUE match, result; + + Data_Get_Struct(self, re2_matchdata, m); + + result = rb_str_buf_new2("#<RE2::MatchData"); + + for (i = 0; i < m->number_of_matches; i++) { + rb_str_buf_cat2(result, " "); + + if (i > 0) { + char buf[sizeof(i)*3+1]; + snprintf(buf, sizeof(buf), "%d", i); + rb_str_buf_cat2(result, buf); + rb_str_buf_cat2(result, ":"); + } + + match = re2_matchdata_nth_match(i, self); + + if (match == Qnil) { + rb_str_buf_cat2(result, "nil"); + } else { + rb_str_buf_append(result, rb_str_inspect(match)); + } + } + rb_str_buf_cat2(result, ">"); + + return result; + } + + /* + * call-seq: * RE2(pattern) -> re2 * RE2(pattern, options) -> re2 * * Returns a new RE2 object with a compiled version of * +pattern+ stored inside. Equivalent to +RE2.new+. */ static VALUE re2_re2(int argc, VALUE *argv, VALUE self) { UNUSED(self); - return rb_class_new_instance(argc, argv, re2_cRE2); + return rb_class_new_instance(argc, argv, re2_cRegexp); } /* * call-seq: - * RE2.new(pattern) -> re2 - * RE2.new(pattern, options) -> re2 - * RE2.compile(pattern) -> re2 - * RE2.compile(pattern, options) -> re2 + * RE2::Regexp.new(pattern) -> re2 + * RE2::Regexp.new(pattern, options) -> re2 + * RE2::Regexp.compile(pattern) -> re2 + * RE2::Regexp.compile(pattern, options) -> re2 * * Returns a new RE2 object with a compiled version of * +pattern+ stored inside. * * Options can be a hash with the following keys: @@ -105,88 +308,83 @@ * * :one_line - ^ and $ only match beginning and end of text * when in posix_syntax mode (default false) */ static VALUE - re2_initialize(int argc, VALUE *argv, VALUE self) + re2_regexp_initialize(int argc, VALUE *argv, VALUE self) { VALUE pattern, options, utf8, posix_syntax, longest_match, log_errors, max_mem, literal, never_nl, case_sensitive, perl_classes, word_boundary, one_line; re2_pattern *p; - RE2::Options *re2_options; rb_scan_args(argc, argv, "11", &pattern, &options); Data_Get_Struct(self, re2_pattern, p); if (RTEST(options)) { if (TYPE(options) != T_HASH) { rb_raise(rb_eArgError, "options should be a hash"); } - re2_options = new (std::nothrow) RE2::Options(); + RE2::Options re2_options; - if (re2_options == 0) { - rb_raise(rb_eNoMemError, "not enough memory to allocate RE2::Options"); - } - utf8 = rb_hash_aref(options, ID2SYM(id_utf8)); if (!NIL_P(utf8)) { - re2_options->set_utf8(RTEST(utf8)); + re2_options.set_utf8(RTEST(utf8)); } posix_syntax = rb_hash_aref(options, ID2SYM(id_posix_syntax)); if (!NIL_P(posix_syntax)) { - re2_options->set_posix_syntax(RTEST(posix_syntax)); + re2_options.set_posix_syntax(RTEST(posix_syntax)); } longest_match = rb_hash_aref(options, ID2SYM(id_longest_match)); if (!NIL_P(longest_match)) { - re2_options->set_longest_match(RTEST(longest_match)); + re2_options.set_longest_match(RTEST(longest_match)); } log_errors = rb_hash_aref(options, ID2SYM(id_log_errors)); if (!NIL_P(log_errors)) { - re2_options->set_log_errors(RTEST(log_errors)); + re2_options.set_log_errors(RTEST(log_errors)); } max_mem = rb_hash_aref(options, ID2SYM(id_max_mem)); if (!NIL_P(max_mem)) { - re2_options->set_max_mem(NUM2INT(max_mem)); + re2_options.set_max_mem(NUM2INT(max_mem)); } literal = rb_hash_aref(options, ID2SYM(id_literal)); if (!NIL_P(literal)) { - re2_options->set_literal(RTEST(literal)); + re2_options.set_literal(RTEST(literal)); } never_nl = rb_hash_aref(options, ID2SYM(id_never_nl)); if (!NIL_P(never_nl)) { - re2_options->set_never_nl(RTEST(never_nl)); + re2_options.set_never_nl(RTEST(never_nl)); } case_sensitive = rb_hash_aref(options, ID2SYM(id_case_sensitive)); if (!NIL_P(case_sensitive)) { - re2_options->set_case_sensitive(RTEST(case_sensitive)); + re2_options.set_case_sensitive(RTEST(case_sensitive)); } perl_classes = rb_hash_aref(options, ID2SYM(id_perl_classes)); if (!NIL_P(perl_classes)) { - re2_options->set_perl_classes(RTEST(perl_classes)); + re2_options.set_perl_classes(RTEST(perl_classes)); } word_boundary = rb_hash_aref(options, ID2SYM(id_word_boundary)); if (!NIL_P(word_boundary)) { - re2_options->set_word_boundary(RTEST(word_boundary)); + re2_options.set_word_boundary(RTEST(word_boundary)); } one_line = rb_hash_aref(options, ID2SYM(id_one_line)); if (!NIL_P(one_line)) { - re2_options->set_one_line(RTEST(one_line)); + re2_options.set_one_line(RTEST(one_line)); } - p->pattern = new (std::nothrow) RE2(StringValuePtr(pattern), *re2_options); + p->pattern = new (std::nothrow) RE2(StringValuePtr(pattern), re2_options); } else { p->pattern = new (std::nothrow) RE2(StringValuePtr(pattern)); } if (p->pattern == 0) { @@ -198,26 +396,24 @@ /* * call-seq: * re2.inspect -> string * - * Returns a printable version of the regular expression +re2+, - * surrounded by forward slashes. + * Returns a printable version of the regular expression +re2+. * - * re2 = RE2.new("woo?") - * re2.inspect #=> "/woo?/" + * re2 = RE2::Regexp.new("woo?") + * re2.inspect #=> "#<RE2::Regexp /woo?/>" */ static VALUE - re2_inspect(VALUE self) + re2_regexp_inspect(VALUE self) { - VALUE result = rb_str_buf_new(2); re2_pattern *p; + VALUE result = rb_str_buf_new2("#<RE2::Regexp /"); - rb_str_buf_cat2(result, "/"); Data_Get_Struct(self, re2_pattern, p); rb_str_buf_cat2(result, p->pattern->pattern().c_str()); - rb_str_buf_cat2(result, "/"); + rb_str_buf_cat2(result, "/>"); return result; } /* @@ -228,15 +424,15 @@ * re2.source -> string * re2.inspect -> string * * Returns a string version of the regular expression +re2+. * - * re2 = RE2.new("woo?") + * re2 = RE2::Regexp.new("woo?") * re2.to_s #=> "woo?" */ static VALUE - re2_to_s(VALUE self) + re2_regexp_to_s(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return rb_str_new2(p->pattern->pattern().c_str()); } @@ -246,15 +442,15 @@ * re2.ok? -> true or false * * Returns whether or not the regular expression +re2+ * was compiled successfully or not. * - * re2 = RE2.new("woo?") + * re2 = RE2::Regexp.new("woo?") * re2.ok? #=> true */ static VALUE - re2_ok(VALUE self) + re2_regexp_ok(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->ok()); } @@ -264,15 +460,15 @@ * re2.utf8? -> true or false * * Returns whether or not the regular expression +re2+ * was compiled with the utf8 option set to true. * - * re2 = RE2.new("woo?", :utf8 => true) + * re2 = RE2::Regexp.new("woo?", :utf8 => true) * re2.utf8? #=> true */ static VALUE - re2_utf8(VALUE self) + re2_regexp_utf8(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().utf8()); } @@ -282,15 +478,15 @@ * re2.posix_syntax? -> true or false * * Returns whether or not the regular expression +re2+ * was compiled with the posix_syntax option set to true. * - * re2 = RE2.new("woo?", :posix_syntax => true) + * re2 = RE2::Regexp.new("woo?", :posix_syntax => true) * re2.posix_syntax? #=> true */ static VALUE - re2_posix_syntax(VALUE self) + re2_regexp_posix_syntax(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().posix_syntax()); } @@ -300,15 +496,15 @@ * re2.longest_match? -> true or false * * Returns whether or not the regular expression +re2+ * was compiled with the longest_match option set to true. * - * re2 = RE2.new("woo?", :longest_match => true) + * re2 = RE2::Regexp.new("woo?", :longest_match => true) * re2.longest_match? #=> true */ static VALUE - re2_longest_match(VALUE self) + re2_regexp_longest_match(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().longest_match()); } @@ -318,15 +514,15 @@ * re2.log_errors? -> true or false * * Returns whether or not the regular expression +re2+ * was compiled with the log_errors option set to true. * - * re2 = RE2.new("woo?", :log_errors => true) + * re2 = RE2::Regexp.new("woo?", :log_errors => true) * re2.log_errors? #=> true */ static VALUE - re2_log_errors(VALUE self) + re2_regexp_log_errors(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().log_errors()); } @@ -336,15 +532,15 @@ * re2.max_mem -> int * * Returns the max_mem setting for the regular expression * +re2+. * - * re2 = RE2.new("woo?", :max_mem => 1024) + * re2 = RE2::Regexp.new("woo?", :max_mem => 1024) * re2.max_mem #=> 1024 */ static VALUE - re2_max_mem(VALUE self) + re2_regexp_max_mem(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return INT2FIX(p->pattern->options().max_mem()); } @@ -354,15 +550,15 @@ * re2.literal? -> true or false * * Returns whether or not the regular expression +re2+ * was compiled with the literal option set to true. * - * re2 = RE2.new("woo?", :literal => true) + * re2 = RE2::Regexp.new("woo?", :literal => true) * re2.literal? #=> true */ static VALUE - re2_literal(VALUE self) + re2_regexp_literal(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().literal()); } @@ -372,15 +568,15 @@ * re2.never_nl? -> true or false * * Returns whether or not the regular expression +re2+ * was compiled with the never_nl option set to true. * - * re2 = RE2.new("woo?", :never_nl => true) + * re2 = RE2::Regexp.new("woo?", :never_nl => true) * re2.never_nl? #=> true */ static VALUE - re2_never_nl(VALUE self) + re2_regexp_never_nl(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().never_nl()); } @@ -390,15 +586,15 @@ * re2.case_sensitive? -> true or false * * Returns whether or not the regular expression +re2+ * was compiled with the case_sensitive option set to true. * - * re2 = RE2.new("woo?", :case_sensitive => true) + * re2 = RE2::Regexp.new("woo?", :case_sensitive => true) * re2.case_sensitive? #=> true */ static VALUE - re2_case_sensitive(VALUE self) + re2_regexp_case_sensitive(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().case_sensitive()); } @@ -409,31 +605,31 @@ * re2.casefold? -> true or false * * Returns whether or not the regular expression +re2+ * was compiled with the case_sensitive option set to false. * - * re2 = RE2.new("woo?", :case_sensitive => true) + * re2 = RE2::Regexp.new("woo?", :case_sensitive => true) * re2.case_insensitive? #=> false */ static VALUE - re2_case_insensitive(VALUE self) + re2_regexp_case_insensitive(VALUE self) { - return BOOL2RUBY(re2_case_sensitive(self) != Qtrue); + return BOOL2RUBY(re2_regexp_case_sensitive(self) != Qtrue); } /* * call-seq: * re2.perl_classes? -> true or false * * Returns whether or not the regular expression +re2+ * was compiled with the perl_classes option set to true. * - * re2 = RE2.new("woo?", :perl_classes => true) + * re2 = RE2::Regexp.new("woo?", :perl_classes => true) * re2.perl_classes? #=> true */ static VALUE - re2_perl_classes(VALUE self) + re2_regexp_perl_classes(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().perl_classes()); } @@ -443,15 +639,15 @@ * re2.word_boundary? -> true or false * * Returns whether or not the regular expression +re2+ * was compiled with the word_boundary option set to true. * - * re2 = RE2.new("woo?", :word_boundary => true) + * re2 = RE2::Regexp.new("woo?", :word_boundary => true) * re2.word_boundary? #=> true */ static VALUE - re2_word_boundary(VALUE self) + re2_regexp_word_boundary(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().word_boundary()); } @@ -461,15 +657,15 @@ * re2.one_line? -> true or false * * Returns whether or not the regular expression +re2+ * was compiled with the one_line option set to true. * - * re2 = RE2.new("woo?", :one_line => true) + * re2 = RE2::Regexp.new("woo?", :one_line => true) * re2.one_line? #=> true */ static VALUE - re2_one_line(VALUE self) + re2_regexp_one_line(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return BOOL2RUBY(p->pattern->options().one_line()); } @@ -480,11 +676,11 @@ * * If the RE2 could not be created properly, returns an * error string. */ static VALUE - re2_error(VALUE self) + re2_regexp_error(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return rb_str_new2(p->pattern->error().c_str()); } @@ -495,11 +691,11 @@ * * If the RE2 could not be created properly, returns * the offending portion of the regexp. */ static VALUE - re2_error_arg(VALUE self) + re2_regexp_error_arg(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return rb_str_new2(p->pattern->error_arg().c_str()); } @@ -511,11 +707,11 @@ * Returns the program size, a very approximate measure * of a regexp's "cost". Larger numbers are more expensive * than smaller numbers. */ static VALUE - re2_program_size(VALUE self) + re2_regexp_program_size(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return INT2FIX(p->pattern->ProgramSize()); } @@ -526,11 +722,11 @@ * * Returns a hash of the options currently set for * +re2+. */ static VALUE - re2_options(VALUE self) + re2_regexp_options(VALUE self) { VALUE options; re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); @@ -582,11 +778,11 @@ * Returns the number of capturing subpatterns, or -1 if the regexp * wasn't valid on construction. The overall match ($0) does not * count: if the regexp is "(a)(b)", returns 2. */ static VALUE - re2_number_of_capturing_groups(VALUE self) + re2_regexp_number_of_capturing_groups(VALUE self) { re2_pattern *p; Data_Get_Struct(self, re2_pattern, p); return INT2FIX(p->pattern->NumberOfCapturingGroups()); @@ -604,24 +800,24 @@ * simple true or false will be returned to indicate a successful * match. If the second argument is any integer greater than 0, * that number of matches will be returned (padded with nils if * there are insufficient matches). * - * r = RE2.new('w(o)(o)') + * r = RE2::Regexp.new('w(o)(o)') * r.match('woo') #=> ["woo", "o", "o"] * r.match('woo', 0) #=> true * r.match('bob', 0) #=> false * r.match('woo', 1) #=> ["woo", "o"] */ static VALUE - re2_match(int argc, VALUE *argv, VALUE self) + re2_regexp_match(int argc, VALUE *argv, VALUE self) { int n; bool matched; re2_pattern *p; - VALUE text, number_of_matches, matches; - re2::StringPiece *string_matches, *text_as_string_piece; + re2_matchdata *m; + VALUE text, number_of_matches, matchdata; rb_scan_args(argc, argv, "11", &text, &number_of_matches); Data_Get_Struct(self, re2_pattern, p); @@ -629,56 +825,37 @@ n = NUM2INT(number_of_matches); } else { n = p->pattern->NumberOfCapturingGroups(); } - text_as_string_piece = new (std::nothrow) re2::StringPiece(StringValuePtr(text)); + re2::StringPiece text_as_string_piece(StringValuePtr(text)); - if (text_as_string_piece == 0) { - rb_raise(rb_eNoMemError, "not enough memory to allocate StringPiece for text"); - } - if (n == 0) { - matched = p->pattern->Match(*text_as_string_piece, 0, RE2::UNANCHORED, 0, 0); - - delete text_as_string_piece; - + matched = p->pattern->Match(text_as_string_piece, 0, RE2::UNANCHORED, 0, 0); return BOOL2RUBY(matched); - } else { /* Because match returns the whole match as well. */ n += 1; - string_matches = new (std::nothrow) re2::StringPiece[n]; + matchdata = rb_class_new_instance(0, 0, re2_cMatchData); + Data_Get_Struct(matchdata, re2_matchdata, m); + m->matches = new (std::nothrow) re2::StringPiece[n]; + m->regexp = self; + m->string = rb_str_dup_frozen(text); - if (string_matches == 0) { - rb_raise(rb_eNoMemError, "not enough memory to allocate array of StringPieces for matches"); + if (m->matches == 0) { + rb_raise(rb_eNoMemError, "not enough memory to allocate StringPieces for matches"); } - matched = p->pattern->Match(*text_as_string_piece, 0, RE2::UNANCHORED, string_matches, n); + m->number_of_matches = n; - delete text_as_string_piece; + matched = p->pattern->Match(text_as_string_piece, 0, RE2::UNANCHORED, m->matches, n); if (matched) { - matches = rb_ary_new(); - - for (int i = 0; i < n; i++) { - if (!string_matches[i].empty()) { - rb_ary_push(matches, rb_str_new2(string_matches[i].as_string().c_str())); - } else { - rb_ary_push(matches, Qnil); - } - } - - delete[] string_matches; - - return matches; + return matchdata; } else { - - delete[] string_matches; - return Qnil; } } } @@ -689,266 +866,21 @@ * * Returns true or false to indicate a successful match. * Equivalent to +re2.match(text, 0)+. */ static VALUE - re2_match_query(VALUE self, VALUE text) + re2_regexp_match_query(VALUE self, VALUE text) { VALUE argv[2]; argv[0] = text; argv[1] = INT2FIX(0); - return re2_match(2, argv, self); + return re2_regexp_match(2, argv, self); } /* * call-seq: - * re2 !~ text -> true or false - * - * Returns true or false to indicate an unsuccessful match. - * Equivalent to +!re2.match(text, 0)+. - */ - static VALUE - re2_bang_tilde(VALUE self, VALUE text) - { - return BOOL2RUBY(re2_match_query(self, text) != Qtrue); - } - - /* - * call-seq: - * RE2::FullMatch(text, re) -> true or false - * - * Returns whether or not a full match for +re2+ was - * found in text. - * - * RE2::FullMatch("woo", "wo+") #=> true - * RE2::FullMatch("woo", "a") #=> false - * re2 = RE2.new("woo") - * RE2::FullMatch("woo", re2) #=> true - */ - static VALUE - re2_FullMatch(VALUE self, VALUE text, VALUE re) - { - UNUSED(self); - bool result; - re2_pattern *p; - - if (rb_obj_is_kind_of(re, re2_cRE2)) { - Data_Get_Struct(re, re2_pattern, p); - result = RE2::FullMatch(StringValuePtr(text), *p->pattern); - } else { - result = RE2::FullMatch(StringValuePtr(text), StringValuePtr(re)); - } - - return BOOL2RUBY(result); - } - - /* - * call-seq: - * RE2::FullMatchN(text, re) -> array of matches - * - * Returns an array of successful matches as defined in - * +re+ for +text+. - * - * RE2::FullMatchN("woo", "w(oo)") #=> ["oo"] - */ - static VALUE - re2_FullMatchN(VALUE self, VALUE text, VALUE re) - { - UNUSED(self); - int n; - bool matched, re2_given; - re2_pattern *p; - VALUE matches; - RE2 *compiled_pattern; - RE2::Arg *argv; - const RE2::Arg **args; - std::string *string_matches; - - re2_given = rb_obj_is_kind_of(re, re2_cRE2); - - if (re2_given) { - Data_Get_Struct(re, re2_pattern, p); - compiled_pattern = p->pattern; - } else { - compiled_pattern = new (std::nothrow) RE2(StringValuePtr(re)); - - if (compiled_pattern == 0) { - rb_raise(rb_eNoMemError, "not enough memory to allocate RE2 object for pattern"); - } - } - - n = compiled_pattern->NumberOfCapturingGroups(); - - argv = new (std::nothrow) RE2::Arg[n]; - args = new (std::nothrow) const RE2::Arg*[n]; - string_matches = new (std::nothrow) std::string[n]; - - if (argv == 0) { - rb_raise(rb_eNoMemError, "not enough memory to allocate array of RE2::Args"); - } - - if (args == 0) { - rb_raise(rb_eNoMemError, "not enough memory to allocate array of pointers to RE2::Args"); - } - - if (string_matches == 0) { - rb_raise(rb_eNoMemError, "not enough memory to allocate array of strings for matches"); - } - - for (int i = 0; i < n; i++) { - args[i] = &argv[i]; - argv[i] = &string_matches[i]; - } - - matched = RE2::FullMatchN(StringValuePtr(text), *compiled_pattern, args, n); - - if (!re2_given) { - delete compiled_pattern; - } - - delete[] argv; - delete[] args; - - if (matched) { - matches = rb_ary_new(); - - for (int i = 0; i < n; i++) { - if (!string_matches[i].empty()) { - rb_ary_push(matches, rb_str_new2(string_matches[i].c_str())); - } else { - rb_ary_push(matches, Qnil); - } - } - - delete[] string_matches; - - return matches; - } else { - delete[] string_matches; - return Qnil; - } - } - - /* - * call-seq: - * RE2::PartialMatchN(text, re) -> array of matches - * - * Returns an array of successful matches as defined in - * +re+ for +text+. - * - * RE2::PartialMatchN("woo", "w(oo)") #=> ["oo"] - */ - static VALUE - re2_PartialMatchN(VALUE self, VALUE text, VALUE re) - { - UNUSED(self); - int n; - bool matched, re2_given; - re2_pattern *p; - VALUE matches; - RE2 *compiled_pattern; - RE2::Arg *argv; - const RE2::Arg **args; - std::string *string_matches; - - re2_given = rb_obj_is_kind_of(re, re2_cRE2); - - if (re2_given) { - Data_Get_Struct(re, re2_pattern, p); - compiled_pattern = p->pattern; - } else { - compiled_pattern = new (std::nothrow) RE2(StringValuePtr(re)); - - if (compiled_pattern == 0) { - rb_raise(rb_eNoMemError, "not enough memory to allocate RE2 object for pattern"); - } - } - - n = compiled_pattern->NumberOfCapturingGroups(); - - argv = new (std::nothrow) RE2::Arg[n]; - args = new (std::nothrow) const RE2::Arg*[n]; - string_matches = new (std::nothrow) std::string[n]; - - if (argv == 0) { - rb_raise(rb_eNoMemError, "not enough memory to allocate array of RE2::Args"); - } - - if (args == 0) { - rb_raise(rb_eNoMemError, "not enough memory to allocate array of pointers to RE2::Args"); - } - - if (string_matches == 0) { - rb_raise(rb_eNoMemError, "not enough memory to allocate array of strings for matches"); - } - - for (int i = 0; i < n; i++) { - args[i] = &argv[i]; - argv[i] = &string_matches[i]; - } - - matched = RE2::PartialMatchN(StringValuePtr(text), *compiled_pattern, args, n); - - if (!re2_given) { - delete compiled_pattern; - } - - delete[] argv; - delete[] args; - - if (matched) { - matches = rb_ary_new(); - - for (int i = 0; i < n; i++) { - if (!string_matches[i].empty()) { - rb_ary_push(matches, rb_str_new2(string_matches[i].c_str())); - } else { - rb_ary_push(matches, Qnil); - } - } - - delete[] string_matches; - - return matches; - } else { - delete[] string_matches; - return Qnil; - } - } - - /* - * call-seq: - * RE2::PartialMatch(text, re) -> true or false - * - * Returns whether or not a partial match for +re2+ was - * found in text. - * - * RE2::PartialMatch("woo", "o+") #=> true - * RE2::PartialMatch("woo", "a") #=> false - * re2 = RE2.new("oo?") - * RE2::PartialMatch("woo", re2) #=> true - */ - static VALUE - re2_PartialMatch(VALUE self, VALUE text, VALUE re) - { - UNUSED(self); - bool result; - re2_pattern *p; - - if (rb_obj_is_kind_of(re, re2_cRE2)) { - Data_Get_Struct(re, re2_pattern, p); - result = RE2::PartialMatch(StringValuePtr(text), *p->pattern); - } else { - result = RE2::PartialMatch(StringValuePtr(text), StringValuePtr(re)); - } - - return BOOL2RUBY(result); - } - - /* - * call-seq: * RE2::Replace(str, pattern, rewrite) -> str * * Replaces the first occurrence +pattern+ in +str+ with * +rewrite+ <i>in place</i>. * @@ -969,11 +901,11 @@ // Convert all the inputs to be pumped into RE2::Replace. std::string str_as_string(StringValuePtr(str)); re2::StringPiece rewrite_as_string_piece(StringValuePtr(rewrite)); // Do the replacement. - if (rb_obj_is_kind_of(pattern, re2_cRE2)) { + if (rb_obj_is_kind_of(pattern, re2_cRegexp)) { Data_Get_Struct(pattern, re2_pattern, p); RE2::Replace(&str_as_string, *p->pattern, rewrite_as_string_piece); } else { RE2::Replace(&str_as_string, StringValuePtr(pattern), rewrite_as_string_piece); } @@ -1011,11 +943,11 @@ std::string str_as_string(StringValuePtr(str)); re2::StringPiece rewrite_as_string_piece(StringValuePtr(rewrite)); VALUE repl; // Do the replacement. - if (rb_obj_is_kind_of(pattern, re2_cRE2)) { + if (rb_obj_is_kind_of(pattern, re2_cRegexp)) { Data_Get_Struct(pattern, re2_pattern, p); RE2::GlobalReplace(&str_as_string, *p->pattern, rewrite_as_string_piece); } else { RE2::GlobalReplace(&str_as_string, StringValuePtr(pattern), rewrite_as_string_piece); } @@ -1030,12 +962,12 @@ } /* * call-seq: * RE2::QuoteMeta(str) -> str - * RE2.escape(str) -> str - * RE2.quote(str) -> str + * RE2::Regexp.escape(str) -> str + * RE2::Regexp.quote(str) -> str * * Returns a version of str with all potentially meaningful regexp * characters escaped. The returned string, used as a regular * expression, will exactly match the original string. * @@ -1050,51 +982,62 @@ } void Init_re2() { - re2_cRE2 = rb_define_class("RE2", rb_cObject); - rb_define_alloc_func(re2_cRE2, (VALUE (*)(VALUE))re2_allocate); - rb_define_method(re2_cRE2, "initialize", (VALUE (*)(...))re2_initialize, -1); - rb_define_method(re2_cRE2, "ok?", (VALUE (*)(...))re2_ok, 0); - rb_define_method(re2_cRE2, "error", (VALUE (*)(...))re2_error, 0); - rb_define_method(re2_cRE2, "error_arg", (VALUE (*)(...))re2_error_arg, 0); - rb_define_method(re2_cRE2, "program_size", (VALUE (*)(...))re2_program_size, 0); - rb_define_method(re2_cRE2, "options", (VALUE (*)(...))re2_options, 0); - rb_define_method(re2_cRE2, "number_of_capturing_groups", (VALUE (*)(...))re2_number_of_capturing_groups, 0); - rb_define_method(re2_cRE2, "match", (VALUE (*)(...))re2_match, -1); - rb_define_method(re2_cRE2, "match?", (VALUE (*)(...))re2_match_query, 1); - rb_define_method(re2_cRE2, "=~", (VALUE (*)(...))re2_match_query, 1); - rb_define_method(re2_cRE2, "===", (VALUE (*)(...))re2_match_query, 1); - rb_define_method(re2_cRE2, "!~", (VALUE (*)(...))re2_bang_tilde, 1); - rb_define_method(re2_cRE2, "to_s", (VALUE (*)(...))re2_to_s, 0); - rb_define_method(re2_cRE2, "to_str", (VALUE (*)(...))re2_to_s, 0); - rb_define_method(re2_cRE2, "pattern", (VALUE (*)(...))re2_to_s, 0); - rb_define_method(re2_cRE2, "source", (VALUE (*)(...))re2_to_s, 0); - rb_define_method(re2_cRE2, "inspect", (VALUE (*)(...))re2_inspect, 0); - rb_define_method(re2_cRE2, "utf8?", (VALUE (*)(...))re2_utf8, 0); - rb_define_method(re2_cRE2, "posix_syntax?", (VALUE (*)(...))re2_posix_syntax, 0); - rb_define_method(re2_cRE2, "longest_match?", (VALUE (*)(...))re2_longest_match, 0); - rb_define_method(re2_cRE2, "log_errors?", (VALUE (*)(...))re2_log_errors, 0); - rb_define_method(re2_cRE2, "max_mem", (VALUE (*)(...))re2_max_mem, 0); - rb_define_method(re2_cRE2, "literal?", (VALUE (*)(...))re2_literal, 0); - rb_define_method(re2_cRE2, "never_nl?", (VALUE (*)(...))re2_never_nl, 0); - rb_define_method(re2_cRE2, "case_sensitive?", (VALUE (*)(...))re2_case_sensitive, 0); - rb_define_method(re2_cRE2, "case_insensitive?", (VALUE (*)(...))re2_case_insensitive, 0); - rb_define_method(re2_cRE2, "casefold?", (VALUE (*)(...))re2_case_insensitive, 0); - rb_define_method(re2_cRE2, "perl_classes?", (VALUE (*)(...))re2_perl_classes, 0); - rb_define_method(re2_cRE2, "word_boundary?", (VALUE (*)(...))re2_word_boundary, 0); - rb_define_method(re2_cRE2, "one_line?", (VALUE (*)(...))re2_one_line, 0); - rb_define_singleton_method(re2_cRE2, "FullMatch", (VALUE (*)(...))re2_FullMatch, 2); - rb_define_singleton_method(re2_cRE2, "FullMatchN", (VALUE (*)(...))re2_FullMatchN, 2); - rb_define_singleton_method(re2_cRE2, "PartialMatch", (VALUE (*)(...))re2_PartialMatch, 2); - rb_define_singleton_method(re2_cRE2, "PartialMatchN", (VALUE (*)(...))re2_PartialMatchN, 2); - rb_define_singleton_method(re2_cRE2, "Replace", (VALUE (*)(...))re2_Replace, 3); - rb_define_singleton_method(re2_cRE2, "GlobalReplace", (VALUE (*)(...))re2_GlobalReplace, 3); - rb_define_singleton_method(re2_cRE2, "QuoteMeta", (VALUE (*)(...))re2_QuoteMeta, 1); - rb_define_singleton_method(re2_cRE2, "escape", (VALUE (*)(...))re2_QuoteMeta, 1); - rb_define_singleton_method(re2_cRE2, "quote", (VALUE (*)(...))re2_QuoteMeta, 1); - rb_define_singleton_method(re2_cRE2, "compile", (VALUE (*)(...))rb_class_new_instance, -1); + re2_mRE2 = rb_define_module("RE2"); + re2_cRegexp = rb_define_class_under(re2_mRE2, "Regexp", rb_cObject); + re2_cMatchData = rb_define_class_under(re2_mRE2, "MatchData", rb_cObject); + + rb_define_alloc_func(re2_cRegexp, (VALUE (*)(VALUE))re2_regexp_allocate); + rb_define_alloc_func(re2_cMatchData, (VALUE (*)(VALUE))re2_matchdata_allocate); + + rb_define_method(re2_cMatchData, "string", (VALUE (*)(...))re2_matchdata_string, 0); + rb_define_method(re2_cMatchData, "regexp", (VALUE (*)(...))re2_matchdata_regexp, 0); + rb_define_method(re2_cMatchData, "to_a", (VALUE (*)(...))re2_matchdata_to_a, 0); + rb_define_method(re2_cMatchData, "size", (VALUE (*)(...))re2_matchdata_size, 0); + rb_define_method(re2_cMatchData, "length", (VALUE (*)(...))re2_matchdata_size, 0); + rb_define_method(re2_cMatchData, "[]", (VALUE (*)(...))re2_matchdata_aref, -1); + rb_define_method(re2_cMatchData, "to_s", (VALUE (*)(...))re2_matchdata_to_s, 0); + rb_define_method(re2_cMatchData, "inspect", (VALUE (*)(...))re2_matchdata_inspect, 0); + + rb_define_method(re2_cRegexp, "initialize", (VALUE (*)(...))re2_regexp_initialize, -1); + rb_define_method(re2_cRegexp, "ok?", (VALUE (*)(...))re2_regexp_ok, 0); + rb_define_method(re2_cRegexp, "error", (VALUE (*)(...))re2_regexp_error, 0); + rb_define_method(re2_cRegexp, "error_arg", (VALUE (*)(...))re2_regexp_error_arg, 0); + rb_define_method(re2_cRegexp, "program_size", (VALUE (*)(...))re2_regexp_program_size, 0); + rb_define_method(re2_cRegexp, "options", (VALUE (*)(...))re2_regexp_options, 0); + rb_define_method(re2_cRegexp, "number_of_capturing_groups", (VALUE (*)(...))re2_regexp_number_of_capturing_groups, 0); + rb_define_method(re2_cRegexp, "match", (VALUE (*)(...))re2_regexp_match, -1); + rb_define_method(re2_cRegexp, "match?", (VALUE (*)(...))re2_regexp_match_query, 1); + rb_define_method(re2_cRegexp, "=~", (VALUE (*)(...))re2_regexp_match_query, 1); + rb_define_method(re2_cRegexp, "===", (VALUE (*)(...))re2_regexp_match_query, 1); + rb_define_method(re2_cRegexp, "to_s", (VALUE (*)(...))re2_regexp_to_s, 0); + rb_define_method(re2_cRegexp, "to_str", (VALUE (*)(...))re2_regexp_to_s, 0); + rb_define_method(re2_cRegexp, "pattern", (VALUE (*)(...))re2_regexp_to_s, 0); + rb_define_method(re2_cRegexp, "source", (VALUE (*)(...))re2_regexp_to_s, 0); + rb_define_method(re2_cRegexp, "inspect", (VALUE (*)(...))re2_regexp_inspect, 0); + rb_define_method(re2_cRegexp, "utf8?", (VALUE (*)(...))re2_regexp_utf8, 0); + rb_define_method(re2_cRegexp, "posix_syntax?", (VALUE (*)(...))re2_regexp_posix_syntax, 0); + rb_define_method(re2_cRegexp, "longest_match?", (VALUE (*)(...))re2_regexp_longest_match, 0); + rb_define_method(re2_cRegexp, "log_errors?", (VALUE (*)(...))re2_regexp_log_errors, 0); + rb_define_method(re2_cRegexp, "max_mem", (VALUE (*)(...))re2_regexp_max_mem, 0); + rb_define_method(re2_cRegexp, "literal?", (VALUE (*)(...))re2_regexp_literal, 0); + rb_define_method(re2_cRegexp, "never_nl?", (VALUE (*)(...))re2_regexp_never_nl, 0); + rb_define_method(re2_cRegexp, "case_sensitive?", (VALUE (*)(...))re2_regexp_case_sensitive, 0); + rb_define_method(re2_cRegexp, "case_insensitive?", (VALUE (*)(...))re2_regexp_case_insensitive, 0); + rb_define_method(re2_cRegexp, "casefold?", (VALUE (*)(...))re2_regexp_case_insensitive, 0); + rb_define_method(re2_cRegexp, "perl_classes?", (VALUE (*)(...))re2_regexp_perl_classes, 0); + rb_define_method(re2_cRegexp, "word_boundary?", (VALUE (*)(...))re2_regexp_word_boundary, 0); + rb_define_method(re2_cRegexp, "one_line?", (VALUE (*)(...))re2_regexp_one_line, 0); + + rb_define_module_function(re2_mRE2, "Replace", (VALUE (*)(...))re2_Replace, 3); + rb_define_module_function(re2_mRE2, "GlobalReplace", (VALUE (*)(...))re2_GlobalReplace, 3); + rb_define_module_function(re2_mRE2, "QuoteMeta", (VALUE (*)(...))re2_QuoteMeta, 1); + rb_define_singleton_method(re2_cRegexp, "escape", (VALUE (*)(...))re2_QuoteMeta, 1); + rb_define_singleton_method(re2_cRegexp, "quote", (VALUE (*)(...))re2_QuoteMeta, 1); + rb_define_singleton_method(re2_cRegexp, "compile", (VALUE (*)(...))rb_class_new_instance, -1); + rb_define_global_function("RE2", (VALUE (*)(...))re2_re2, -1); /* Create the symbols used in options. */ id_utf8 = rb_intern("utf8"); id_posix_syntax = rb_intern("posix_syntax");