ext/re2/re2.cc in re2-0.2.0 vs ext/re2/re2.cc in re2-0.3.0
- old
+ new
@@ -5,10 +5,12 @@
* Copyright (c) 2010, Paul Mucur (http://mucur.name)
* Released under the BSD Licence, please see LICENSE.txt
*/
#include <re2/re2.h>
+#include <string>
+using namespace std;
extern "C" {
#include <ruby.h>
@@ -28,11 +30,11 @@
} re2_pattern;
typedef struct {
re2::StringPiece *matches;
int number_of_matches;
- VALUE regexp, string;
+ VALUE regexp, text;
} re2_matchdata;
VALUE re2_mRE2, re2_cRegexp, re2_cMatchData;
/* Symbols used in RE2 options. */
@@ -41,11 +43,11 @@
id_perl_classes, id_word_boundary, id_one_line;
void re2_matchdata_mark(re2_matchdata* self)
{
rb_gc_mark(self->regexp);
- rb_gc_mark(self->string);
+ rb_gc_mark(self->text);
}
void re2_matchdata_free(re2_matchdata* self)
{
if (self->matches) {
@@ -82,11 +84,11 @@
re2_matchdata_string(VALUE self)
{
re2_matchdata *m;
Data_Get_Struct(self, re2_matchdata, m);
- return m->string;
+ return m->text;
}
/*
* Returns the number of elements in the match array (including nils).
*
@@ -139,37 +141,65 @@
static VALUE
re2_matchdata_to_a(VALUE self)
{
int i;
re2_matchdata *m;
+ re2::StringPiece match;
+
Data_Get_Struct(self, re2_matchdata, m);
VALUE array = rb_ary_new2(m->number_of_matches);
for (i = 0; i < m->number_of_matches; i++) {
if (m->matches[i].empty()) {
rb_ary_push(array, Qnil);
} else {
- rb_ary_push(array, rb_str_new(m->matches[i].data(), m->matches[i].size()));
+ match = m->matches[i];
+ rb_ary_push(array, rb_str_new(match.data(), match.size()));
}
}
return array;
}
static VALUE
re2_matchdata_nth_match(int nth, VALUE self)
{
re2_matchdata *m;
+ re2::StringPiece match;
+
Data_Get_Struct(self, re2_matchdata, m);
+ match = m->matches[nth];
- if (nth >= m->number_of_matches || m->matches[nth].empty()) {
+ if (nth >= m->number_of_matches || match.empty()) {
return Qnil;
} else {
- return rb_str_new(m->matches[nth].data(), m->matches[nth].size());
+ return rb_str_new(match.data(), match.size());
}
}
+ static VALUE
+ re2_matchdata_named_match(const char* name, VALUE self)
+ {
+ int idx;
+ re2_matchdata *m;
+ re2_pattern *p;
+ map<string, int> groups;
+ string name_as_string(name);
+
+ Data_Get_Struct(self, re2_matchdata, m);
+ Data_Get_Struct(m->regexp, re2_pattern, p);
+
+ groups = p->pattern->NamedCapturingGroups();
+
+ if (groups.count(name_as_string) == 1) {
+ idx = groups[name_as_string];
+ return re2_matchdata_nth_match(idx, self);
+ } else {
+ return Qnil;
+ }
+ }
+
/*
- * Retrieve zero, one or more matches by index.
+ * Retrieve zero, one or more matches by index or name.
*
* @return [Array<String, nil>, String, Boolean]
*
* @overload [](index)
* Access a particular match by index.
@@ -196,18 +226,32 @@
* @param [Range] range the range of match indexes to fetch
* @return [Array<String, nil>] the specified matches
* @example
* m = RE2('(\d+)').match("bob 123")
* m[0..1] #=> "[123", "123"]
+ *
+ * @overload [](name)
+ * Access a particular match by name.
+ *
+ * @param [String, Symbol] name the name of the match to fetch
+ * @return [String, nil] the specific match
+ * @example
+ * m = RE2('(?P<number>\d+)').match("bob 123")
+ * m["number"] #=> "123"
+ * m[:number] #=> "123"
*/
static VALUE
re2_matchdata_aref(int argc, VALUE *argv, VALUE self)
{
VALUE idx, rest;
rb_scan_args(argc, argv, "11", &idx, &rest);
- if (!NIL_P(rest) || !FIXNUM_P(idx) || FIX2INT(idx) < 0) {
+ if (TYPE(idx) == T_STRING) {
+ return re2_matchdata_named_match(StringValuePtr(idx), self);
+ } else if (TYPE(idx) == T_SYMBOL) {
+ return re2_matchdata_named_match(rb_id2name(SYM2ID(idx)), self);
+ } else if (!NIL_P(rest) || !FIXNUM_P(idx) || FIX2INT(idx) < 0) {
return rb_ary_aref(argc, argv, re2_matchdata_to_a(self));
} else {
return re2_matchdata_nth_match(FIX2INT(idx), self);
}
}
@@ -389,13 +433,13 @@
one_line = rb_hash_aref(options, ID2SYM(id_one_line));
if (!NIL_P(one_line)) {
re2_options.set_one_line(RTEST(one_line));
}
- p->pattern = new (std::nothrow) RE2(StringValuePtr(pattern), re2_options);
+ p->pattern = new (nothrow) RE2(StringValuePtr(pattern), re2_options);
} else {
- p->pattern = new (std::nothrow) RE2(StringValuePtr(pattern));
+ p->pattern = new (nothrow) RE2(StringValuePtr(pattern));
}
if (p->pattern == 0) {
rb_raise(rb_eNoMemError, "not enough memory to allocate RE2 object");
}
@@ -772,10 +816,36 @@
Data_Get_Struct(self, re2_pattern, p);
return INT2FIX(p->pattern->NumberOfCapturingGroups());
}
/*
+ * Returns a hash of names to capturing indices of groups.
+ *
+ * @return [Hash] a hash of names to capturing indices
+ */
+ static VALUE
+ re2_regexp_named_capturing_groups(VALUE self)
+ {
+ VALUE capturing_groups;
+ re2_pattern *p;
+ map<string, int> groups;
+ map<string, int>::iterator iterator;
+
+ Data_Get_Struct(self, re2_pattern, p);
+ groups = p->pattern->NamedCapturingGroups();
+ capturing_groups = rb_hash_new();
+
+ for (iterator = groups.begin(); iterator != groups.end(); iterator++) {
+ rb_hash_aset(capturing_groups,
+ rb_str_new(iterator->first.data(), iterator->first.size()),
+ INT2FIX(iterator->second));
+ }
+
+ return capturing_groups;
+ }
+
+ /*
* Match the pattern against the given +text+ and return either
* a boolean (if no submatches are required) or a {RE2::MatchData}
* instance.
*
* @return [Boolean, RE2::MatchData]
@@ -834,34 +904,40 @@
n = NUM2INT(number_of_matches);
} else {
n = p->pattern->NumberOfCapturingGroups();
}
- re2::StringPiece text_as_string_piece(StringValuePtr(text));
-
if (n == 0) {
- matched = p->pattern->Match(text_as_string_piece, 0, (int)RSTRING_LEN(text), RE2::UNANCHORED, 0, 0);
+#if defined(HAVE_ENDPOS_ARGUMENT)
+ matched = p->pattern->Match(StringValuePtr(text), 0, (int)RSTRING_LEN(text), RE2::UNANCHORED, 0, 0);
+#else
+ matched = p->pattern->Match(StringValuePtr(text), 0, RE2::UNANCHORED, 0, 0);
+#endif
return BOOL2RUBY(matched);
} else {
/* Because match returns the whole match as well. */
n += 1;
matchdata = rb_class_new_instance(0, 0, re2_cMatchData);
Data_Get_Struct(matchdata, re2_matchdata, m);
- m->matches = new (std::nothrow) re2::StringPiece[n];
+ m->matches = new (nothrow) re2::StringPiece[n];
m->regexp = self;
- m->string = rb_str_dup(text);
- rb_str_freeze(m->string);
+ m->text = rb_str_dup(text);
+ rb_str_freeze(m->text);
if (m->matches == 0) {
rb_raise(rb_eNoMemError, "not enough memory to allocate StringPieces for matches");
}
m->number_of_matches = n;
- matched = p->pattern->Match(text_as_string_piece, 0, (int)RSTRING_LEN(text), RE2::UNANCHORED, m->matches, n);
+#if defined(HAVE_ENDPOS_ARGUMENT)
+ matched = p->pattern->Match(StringValuePtr(text), 0, (int)RSTRING_LEN(text), RE2::UNANCHORED, m->matches, n);
+#else
+ matched = p->pattern->Match(StringValuePtr(text), 0, RE2::UNANCHORED, m->matches, n);
+#endif
if (matched) {
return matchdata;
} else {
return Qnil;
@@ -910,23 +986,22 @@
UNUSED(self);
VALUE repl;
re2_pattern *p;
/* Convert all the inputs to be pumped into RE2::Replace. */
- std::string str_as_string(StringValuePtr(str));
- re2::StringPiece rewrite_as_string_piece(StringValuePtr(rewrite));
+ string str_as_string(StringValuePtr(str));
/* Do the replacement. */
if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
Data_Get_Struct(pattern, re2_pattern, p);
- RE2::Replace(&str_as_string, *p->pattern, rewrite_as_string_piece);
+ RE2::Replace(&str_as_string, *p->pattern, StringValuePtr(rewrite));
} else {
- RE2::Replace(&str_as_string, StringValuePtr(pattern), rewrite_as_string_piece);
+ RE2::Replace(&str_as_string, StringValuePtr(pattern), StringValuePtr(rewrite));
}
/* Save the replacement as a VALUE. */
- repl = rb_str_new(str_as_string.c_str(), str_as_string.length());
+ repl = rb_str_new(str_as_string.data(), str_as_string.size());
/* Replace the original string with the replacement. */
if (RSTRING_LEN(str) != RSTRING_LEN(repl)) {
rb_str_resize(str, RSTRING_LEN(repl));
}
@@ -959,24 +1034,23 @@
UNUSED(self);
/* Convert all the inputs to be pumped into RE2::GlobalReplace. */
re2_pattern *p;
- std::string str_as_string(StringValuePtr(str));
- re2::StringPiece rewrite_as_string_piece(StringValuePtr(rewrite));
+ string str_as_string(StringValuePtr(str));
VALUE repl;
/* Do the replacement. */
if (rb_obj_is_kind_of(pattern, re2_cRegexp)) {
Data_Get_Struct(pattern, re2_pattern, p);
- RE2::GlobalReplace(&str_as_string, *p->pattern, rewrite_as_string_piece);
+ RE2::GlobalReplace(&str_as_string, *p->pattern, StringValuePtr(rewrite));
} else {
- RE2::GlobalReplace(&str_as_string, StringValuePtr(pattern), rewrite_as_string_piece);
+ RE2::GlobalReplace(&str_as_string, StringValuePtr(pattern), StringValuePtr(rewrite));
}
/* Save the replacement as a VALUE. */
- repl = rb_str_new(str_as_string.c_str(), str_as_string.length());
+ repl = rb_str_new(str_as_string.data(), str_as_string.size());
/* Replace the original string with the replacement. */
if (RSTRING_LEN(str) != RSTRING_LEN(repl)) {
rb_str_resize(str, RSTRING_LEN(repl));
}
@@ -997,12 +1071,11 @@
*/
static VALUE
re2_QuoteMeta(VALUE self, VALUE unquoted)
{
UNUSED(self);
- re2::StringPiece unquoted_as_string_piece(StringValuePtr(unquoted));
- std::string quoted_string = RE2::QuoteMeta(unquoted_as_string_piece);
+ string quoted_string = RE2::QuoteMeta(StringValuePtr(unquoted));
return rb_str_new(quoted_string.data(), quoted_string.size());
}
void
Init_re2()
@@ -1028,9 +1101,10 @@
rb_define_method(re2_cRegexp, "error", RUBY_METHOD_FUNC(re2_regexp_error), 0);
rb_define_method(re2_cRegexp, "error_arg", RUBY_METHOD_FUNC(re2_regexp_error_arg), 0);
rb_define_method(re2_cRegexp, "program_size", RUBY_METHOD_FUNC(re2_regexp_program_size), 0);
rb_define_method(re2_cRegexp, "options", RUBY_METHOD_FUNC(re2_regexp_options), 0);
rb_define_method(re2_cRegexp, "number_of_capturing_groups", RUBY_METHOD_FUNC(re2_regexp_number_of_capturing_groups), 0);
+ rb_define_method(re2_cRegexp, "named_capturing_groups", RUBY_METHOD_FUNC(re2_regexp_named_capturing_groups), 0);
rb_define_method(re2_cRegexp, "match", RUBY_METHOD_FUNC(re2_regexp_match), -1);
rb_define_method(re2_cRegexp, "match?", RUBY_METHOD_FUNC(re2_regexp_match_query), 1);
rb_define_method(re2_cRegexp, "=~", RUBY_METHOD_FUNC(re2_regexp_match_query), 1);
rb_define_method(re2_cRegexp, "===", RUBY_METHOD_FUNC(re2_regexp_match_query), 1);
rb_define_method(re2_cRegexp, "to_s", RUBY_METHOD_FUNC(re2_regexp_to_s), 0);