#include "rb_oniguruma.h"
#include "rb_oniguruma_match.h"
#include "rb_oniguruma_struct_args.h"
#if ONIGURUMA_VERSION_MAJOR >= 5
# ifndef enc_len
# define enc_len(enc, byte) ONIGENC_MBC_ENC_LEN(enc, byte)
# endif
#endif
#define og_oniguruma_oregexp_get_code_point(cp, cpl, enc, rep, pos) do { \
cp = ONIGENC_MBC_TO_CODE(enc, OG_STRING_PTR(rep) + pos, \
OG_STRING_PTR(rep) + RSTRING_LEN(rep) - 1); \
cpl = enc_len(enc, (OG_STRING_PTR(rep) + pos)); \
} while(0)
static inline void
og_oniguruma_string_modification_check(VALUE s, char *p, long len)
{
if (RSTRING_PTR(s) != p || RSTRING_LEN(s) != len)
rb_raise(rb_eRuntimeError, "string modified");
}
#pragma mark Class Methods
/*
* Document-method: escape
*
* call-seq:
* ORegexp.escape(str) => a_str
* ORegexp.quote(str) => a_str
*
* Escapes any characters that would have special meaning in a regular
* expression. Returns a new escaped string, or self if no characters are
* escaped. For any string,
* ORegexp.escape(str)=~str
will be true.
*
* ORegexp.escape('\\*?{}.') #=> \\\\\*\?\{\}\.
*/
static VALUE
og_oniguruma_oregexp_escape(int argc, VALUE *argv, VALUE self)
{
return rb_funcall3(rb_cRegexp, rb_intern("escape"), argc, argv);
}
/*
* Document-method: last_match
*
* call-seq:
* ORegexp.last_match => matchdata
* ORegexp.last_match(fixnum) => str
*
* The first form returns the MatchData
object generated by the
* last successful pattern match. The second form returns the nth field in this
* MatchData
object.
*
* ORegexp.new( 'c(.)t' ) =~ 'cat' #=> 0
* ORegexp.last_match #=> #
* ORegexp.last_match(0) #=> "cat"
* ORegexp.last_match(1) #=> "a"
* ORegexp.last_match(2) #=> nil
*/
static VALUE
og_oniguruma_oregexp_last_match(int argc, VALUE *argv, VALUE self)
{
VALUE index, args[2];
rb_scan_args(argc, argv, "01", &index);
if (index == Qnil) {
return rb_cv_get(self, "@@last_match");
} else {
args[0] = index;
args[1] = (VALUE)NULL;
return rb_funcall3(rb_cv_get(self, "@@last_match"), rb_intern("[]"), 1, args);
}
}
/* Constructor Methods */
static void
og_oniguruma_oregexp_free(void *arg)
{
og_ORegexp *oregexp = (og_ORegexp*)arg;
onig_free(oregexp->reg);
free(oregexp);
}
/*
* Document-method: initialize
*
* call-seq:
* ORegexp.new( pattern, options_hash )
* ORegexp.new( pattern, option_str, encoding_str=nil, syntax_str=nil)
*
* Constructs a new regular expression from pattern, which is a
* String
. The second parameter may be a Hash
* of the form:
*
* { :options => option_value, :encoding => encoding_value, :syntax => syntax_value }
*
* Where option_value
is a bitwise OR
of
* Oniguruma::OPTION_XXX
constants; encoding_value
* is one of Oniguruma::ENCODING_XXX
constants; and
* syntax_value
is one of Oniguruma::SYNTAX_XXX
* constants.
*
* r1 = ORegexp.new('^a-z+:\\s+\w+') #=> /^a-z+:\s+\w+/
* r2 = ORegexp.new('cat', :options => OPTION_IGNORECASE ) #=> /cat/i
* r3 = ORegexp.new('dog', :options => OPTION_EXTEND ) #=> /dog/x
*
* #Accept java syntax on SJIS encoding:
* r4 = ORegexp.new('ape', :syntax => SYNTAX_JAVA, :encoding => ENCODING_SJIS) #=> /ape/
*
* Second form uses string shortcuts to set options and encoding:
* r = ORegexp.new('cat', 'i', 'utf8', 'java')
*/
static VALUE
og_oniguruma_oregexp_alloc(VALUE klass)
{
VALUE obj;
og_ORegexp *oregexp;
oregexp = malloc( sizeof( og_ORegexp ) );
oregexp->reg = NULL;
obj = Data_Wrap_Struct(klass, 0, og_oniguruma_oregexp_free, oregexp);
return obj;
}
/* Instance Methods */
static int
og_oniguruma_oregexp_compile(VALUE self, VALUE regex)
{
int result;
og_ORegexp *oregexp;
OnigErrorInfo error_info;
UChar error_string[ONIG_MAX_ERROR_MESSAGE_LEN];
Data_Get_Struct(self, og_ORegexp, oregexp);
StringValue(regex);
result = onig_new(&(oregexp->reg), /* Regexp Object */
OG_STRING_PTR(regex), OG_STRING_PTR(regex) + RSTRING_LEN(regex),
og_oniguruma_extract_option(rb_iv_get(self, "@options")),
og_oniguruma_extract_encoding(rb_iv_get(self, "@encoding")),
og_oniguruma_extract_syntax(rb_iv_get(self, "@syntax")),
&error_info);
if (result != ONIG_NORMAL) {
onig_error_code_to_str(error_string, result, &error_info);
rb_raise(rb_eArgError, "Oniguruma Error: %s", error_string);
}
return Qnil;
}
static void
og_oniguruma_oregexp_options_parse(VALUE self, VALUE hash)
{
VALUE options, encoding, syntax, og_mOniguruma;
og_mOniguruma = rb_const_get(rb_cObject, rb_intern(OG_M_ONIGURUMA));
encoding = rb_hash_aref(hash, ID2SYM(rb_intern("encoding")));
options = rb_hash_aref(hash, ID2SYM(rb_intern("options")));
syntax = rb_hash_aref(hash, ID2SYM(rb_intern("syntax")));
if (NIL_P(encoding))
encoding = rb_const_get(og_mOniguruma, rb_intern("ENCODING_ASCII"));
if (NIL_P(options))
options = rb_const_get(og_mOniguruma, rb_intern("OPTION_DEFAULT"));
if (NIL_P(syntax))
syntax = rb_const_get(og_mOniguruma, rb_intern("SYNTAX_DEFAULT"));
rb_iv_set(self, "@encoding", encoding);
rb_iv_set(self, "@options", options);
rb_iv_set(self, "@syntax", syntax);
}
static VALUE
og_oniguruma_oregexp_initialize_real(VALUE self, VALUE re, VALUE options)
{
rb_iv_set(self, "@pattern", StringValue(re)); /* Take a copy */
og_oniguruma_oregexp_options_parse(self, options);
og_oniguruma_oregexp_compile(self, rb_iv_get(self, "@pattern"));
return self;
}
static void
og_oniguruma_oregexp_upper(char *str)
{
while (*str != '\0')
{
if ('a' <= *str && *str <= 'z')
*str -= 32;
str++;
}
}
static VALUE
og_oniguruma_oregexp_initialize(int argc, VALUE *argv, VALUE self)
{
int i;
long opts;
char *byte;
VALUE re, args, options, shortcuts,
cut, opt, enc, syn, og_mOniguruma;
og_mOniguruma = rb_const_get(rb_cObject, rb_intern(OG_M_ONIGURUMA));
rb_scan_args(argc, argv, "1*", &re, &args);
if (TYPE(rb_ary_entry(args, 0)) == T_STRING) {
options = rb_hash_new();
shortcuts = rb_const_get(og_mOniguruma, rb_intern("OPT_SHORTCUTS"));
opt = rb_ary_shift(args);
enc = rb_ary_shift(args);
syn = rb_ary_shift(args);
opts = 0;
if (!NIL_P(opt)) {
for (i = 0, byte = RSTRING_PTR(opt); i < RSTRING_LEN(opt); i++, byte++)
{
cut = rb_hash_aref(shortcuts, rb_str_new(byte, 1));
if (!NIL_P(cut))
opts = opts | FIX2INT(cut);
}
}
rb_hash_aset(options, ID2SYM(rb_intern("options")), INT2FIX(opts));
if (!NIL_P(enc)) {
byte = (char *) malloc( sizeof(char) * (RSTRING_LEN(enc) + 10));
snprintf(byte, RSTRING_LEN(enc) + 10, "ENCODING_%s", RSTRING_PTR(enc));
og_oniguruma_oregexp_upper(byte);
if (!NIL_P(enc) && rb_const_defined(og_mOniguruma, rb_intern(byte)))
rb_hash_aset(options, ID2SYM(rb_intern("encoding")), rb_const_get(og_mOniguruma, rb_intern(byte)));
free(byte);
}
if (!NIL_P(syn)) {
byte = (char *) malloc( sizeof(char) * (RSTRING_LEN(syn) + 8));
snprintf(byte, RSTRING_LEN(syn) + 10, "SYNTAX_%s", RSTRING_PTR(syn));
og_oniguruma_oregexp_upper(byte);
if (!NIL_P(syn) && rb_const_defined(og_mOniguruma, rb_intern(byte)))
rb_hash_aset(options, ID2SYM(rb_intern("syntax")), rb_const_get(og_mOniguruma, rb_intern(byte)));
free(byte);
}
} else {
if (!NIL_P(rb_ary_entry(args, 0)))
options = rb_ary_shift(args);
else
options = rb_hash_new();
}
return og_oniguruma_oregexp_initialize_real(self, re, options);
}
static VALUE
og_oniguruma_oregexp_do_match(VALUE self, OnigRegion *region, VALUE string)
{
VALUE match;
og_CallbackPacket packet;
og_ORegexp *oregexp;
Data_Get_Struct(self, og_ORegexp, oregexp);
match = og_oniguruma_match_initialize(region, string);
rb_cv_set(CLASS_OF(self), "@@last_match", match);
packet.region = region;
if (onig_number_of_names(oregexp->reg) > 0) {
packet.hash = rb_hash_new();
onig_foreach_name(oregexp->reg, &og_oniguruma_name_callback, &packet);
rb_iv_set(match, "@named_captures", packet.hash);
}
return match;
}
/*
* Document-method: match
*
* call-seq:
* rxp.match(str) => matchdata or nil
* rxp.match(str, begin, end) => matchdata or nil
*
* Returns a MatchData
object describing the match, or
* nil
if there was no match. This is equivalent to retrieving the
* value of the special variable $~
following a normal match.
*
* ORegexp.new('(.)(.)(.)').match("abc")[2] #=> "b"
*
* The second form allows to perform the match in a region
* defined by begin
and end
while
* still taking into account look-behinds and look-forwards.
*
* ORegexp.new('1*2*').match('11221122').offset => [4,8]
* ORegexp.new('(?<=2)1*2*').match('11221122').offset => [4,8]
*
* Compare with:
*
* ORegexp.new('(?<=2)1*2*').match('11221122'[4..-1]) => nil
*/
static VALUE
og_oniguruma_oregexp_match(int argc, VALUE *argv, VALUE self)
{
int result;
OnigRegion *region;
og_ORegexp *oregexp;
UChar error_string[ONIG_MAX_ERROR_MESSAGE_LEN];
VALUE string, begin, end, match;
rb_scan_args(argc, argv, "12", &string, &begin, &end);
if (NIL_P(begin)) begin = INT2FIX(0);
if (NIL_P(end)) end = INT2FIX(RSTRING_LEN(string));
Data_Get_Struct(self, og_ORegexp, oregexp);
StringValue(string);
region = onig_region_new();
result = onig_search(oregexp->reg,
OG_STRING_PTR(string), OG_STRING_PTR(string) + RSTRING_LEN(string),
OG_STRING_PTR(string) + FIX2INT(begin), OG_STRING_PTR(string) + FIX2INT(end),
region, ONIG_OPTION_NONE);
rb_backref_set(Qnil);
if (result >= 0) {
match = og_oniguruma_oregexp_do_match(self, region, string);
onig_region_free(region, 1);
rb_backref_set(match);
rb_match_busy(match);
return match;
} else if (result == ONIG_MISMATCH) {
onig_region_free(region, 1);
} else {
onig_region_free(region, 1);
onig_error_code_to_str(error_string, result);
rb_raise(rb_eArgError, OG_M_ONIGURUMA " Error: %s", error_string);
}
return Qnil;
}
static VALUE
og_oniguruma_oregexp_do_replacement(VALUE self, VALUE buffer, VALUE str, VALUE replacement, OnigRegion *region)
{
const UChar *subj; long subj_len;
og_ORegexp *oregexp;
int position = 0, digits, group, named_group_pos,
named_group_begin, named_group_end,
code_point_len, previous_code_point_len;
OnigCodePoint code_point;
OnigEncoding encoding;
subj = OG_STRING_PTR(str); subj_len = RSTRING_LEN(str);
Data_Get_Struct(self, og_ORegexp, oregexp);
encoding = onig_get_encoding(oregexp->reg);
while (position < RSTRING_LEN(replacement))
{
og_oniguruma_oregexp_get_code_point(code_point, code_point_len, encoding, replacement, position);
if (code_point_len == 0) {
rb_warn("Anomally: length of char '%d' is 0", code_point);
code_point_len = 1;
}
position += code_point_len;
if (code_point != 0x5c) { /* 0x5c is a backslash \ */
rb_str_buf_cat(buffer, RSTRING_PTR(replacement) + position - code_point_len, code_point_len);
continue;
}
if (position >= RSTRING_LEN(replacement)) {
rb_str_buf_cat(buffer, RSTRING_PTR(replacement) + (position - code_point_len), code_point_len);
break;
}
digits = group = 0;
while(1) /* n + 1/2 times loop. break out manually. */
{
if (position >= RSTRING_LEN(replacement)) break;
og_oniguruma_oregexp_get_code_point(code_point, code_point_len, encoding, replacement, position);
if (!ONIGENC_IS_CODE_DIGIT(encoding, code_point)) break;
group = group * 10 + (code_point - '0');
position += code_point_len;
digits++;
if (digits >= 2) break; /* limit 99 groups */
}
if (digits == 0) {
previous_code_point_len = code_point_len;
og_oniguruma_oregexp_get_code_point(code_point, code_point_len, encoding, replacement, position);
switch (code_point)
{
case '\\': // \ literal, just cat and keep going
rb_str_buf_cat(buffer, RSTRING_PTR(replacement) + position, code_point_len);
position += code_point_len;
break;
case '&': // matched substring
rb_str_buf_cat(buffer, (char*)(subj + region->beg[0]), region->end[0] - region->beg[0]);
position += code_point_len;
break;
case '`': // prematch
rb_str_buf_cat(buffer, (char*)subj, region->beg[0]);
position += code_point_len;
break;
case '\'': // postmatch
rb_str_buf_cat(buffer, (char*)(subj + region->end[0]), subj_len - region->end[0]);
position += code_point_len;
break;
case '+': // last matched
position += code_point_len;
for (group = region->num_regs - 1; group > 0; group--)
{
if (region->beg[group] != -1) {
rb_str_buf_cat(buffer, (char*)(subj + region->beg[group]),
region->end[group] - region->beg[group]);
break;
}
}
break;
//case 'k': // Oniguruma style named group reference
// Eat the 'k' code point and let it fall through to the '<'
//og_oniguruma_oregexp_get_code_point(code_point, code_point_len, encoding, replacement, position);
//position += code_point_len;
case '<': // Oniguruma Gem named group reference (for compatibility)
named_group_end = named_group_begin = named_group_pos = position + code_point_len;
while (named_group_pos < RSTRING_LEN(replacement))
{
og_oniguruma_oregexp_get_code_point(code_point, code_point_len, encoding, replacement, named_group_pos);
named_group_pos += code_point_len;
if (code_point == '>') break;
if (ONIGENC_IS_CODE_WORD(encoding, code_point))
named_group_end += code_point_len;
else break;
}
if (code_point != '>' || named_group_end == named_group_begin) { // place backslash and '<'
rb_str_buf_cat(buffer, RSTRING_PTR(replacement) + (position - previous_code_point_len),
previous_code_point_len + code_point_len);
position += code_point_len;
} else { // lookup for group and subst for that value
group = onig_name_to_backref_number(oregexp->reg,
OG_STRING_PTR(replacement) + named_group_begin,
OG_STRING_PTR(replacement) + named_group_end, region);
if (group >= 0)
rb_str_buf_cat(buffer, (char*)(subj + region->beg[group]),
region->end[group] - region->beg[group]);
position = named_group_pos;
}
break;
default:
rb_str_buf_cat(buffer, RSTRING_PTR(replacement) + (position - previous_code_point_len),
previous_code_point_len + code_point_len);
position += code_point_len;
break;
} /* switch (code_point) */
} else {
if (group < region->num_regs && region->beg[group] >= 0)
rb_str_buf_cat(buffer, (char*)(subj + region->beg[group]),
region->end[group] - region->beg[group]);
} /* if (digits == 0) */
} /* while (position < RSTRING_LEN(replacement)) */
return Qnil;
}
static VALUE
og_oniguruma_oregexp_do_substitution(og_SubstitutionArgs *args)
{
int tainted_replacement = 0;
VALUE str, replacement, block;
og_ORegexp *oregexp;
long begin = 0, end = 0, last_end = 0, multibyte_diff = 0;
UChar *subj; int subj_len;
VALUE buffer, block_match, block_result;
OnigEncoding encoding;
/* Parse the arguments */
if (rb_block_given_p()) {
rb_scan_args(args->argc, args->argv, "1&", &str, &block);
} else {
rb_scan_args(args->argc, args->argv, "2", &str, &replacement);
Check_Type(replacement, T_STRING);
if (OBJ_TAINTED(replacement))
tainted_replacement = 1;
}
// Ensure str is a string
StringValue(str);
Data_Get_Struct(args->self, og_ORegexp, oregexp);
subj = OG_STRING_PTR(str); subj_len = RSTRING_LEN(str);
begin = onig_search(oregexp->reg,
subj, subj + subj_len,
subj, subj + subj_len,
args->region, ONIG_OPTION_NONE);
if (begin < 0) {
if (args->update_self)
return Qnil;
return rb_str_dup(str);
}
buffer = rb_str_buf_new(subj_len);
encoding = onig_get_encoding(oregexp->reg);
do {
last_end = end;
begin = args->region->beg[0];
end = args->region->end[0];
rb_str_buf_cat(buffer, (char*)(subj + last_end), begin - last_end);
if (rb_block_given_p()) {
/* yielding to a block */
block_match = og_oniguruma_oregexp_do_match(args->self, args->region, str);
rb_backref_set(block_match);
rb_match_busy(block_match);
block_result = rb_yield(block_match);
og_oniguruma_string_modification_check(str, (char*)subj, subj_len);
replacement = rb_obj_as_string(block_result);
rb_str_append(buffer, replacement);
} else {
og_oniguruma_oregexp_do_replacement(args->self, buffer, str, replacement, args->region);
}
if (!args->global) break;
/* Finish this match so we can do the next one */
if (begin == end) {
if (subj_len <= end) break;
multibyte_diff = enc_len(encoding, (subj + end));
rb_str_buf_cat(buffer, (char*)(subj + end), multibyte_diff);
end += multibyte_diff;
}
begin = onig_search(oregexp->reg,
subj, subj + subj_len,
subj + end, subj + subj_len,
args->region, ONIG_OPTION_NONE);
} while (begin >= 0);
rb_str_buf_cat(buffer, (char*)(subj + end), subj_len - end);
if (tainted_replacement)
OBJ_INFECT(buffer, replacement);
OBJ_INFECT(buffer, str);
if (args->update_self) {
rb_funcall(str, rb_intern("replace"), 1, buffer);
return str;
}
return buffer;
}
static VALUE
og_oniguruma_oregexp_do_cleanup(OnigRegion *region)
{
onig_region_free(region, 1);
return Qnil;
}
static VALUE
og_oniguruma_oregexp_do_substitution_safe(VALUE self,
int argc, VALUE *argv, int global, int update_self)
{
OnigRegion *region = onig_region_new();
og_SubstitutionArgs fargs;
og_SubstitutionArgs_set(&fargs, self, argc, argv, global, update_self, region);
return rb_ensure(og_oniguruma_oregexp_do_substitution, (VALUE)&fargs,
og_oniguruma_oregexp_do_cleanup, (VALUE)region);
}
/*
* Document-method: gsub
*
* call-seq:
* rxp.gsub(str, replacement)
* rxp.gsub(str) {|match_data| ... }
*
* Returns a copy of _str_ with _all_ occurrences of _rxp_ pattern
* replaced with either _replacement_ or the value of the block.
*
* If a string is used as the replacement, the sequences \1, \2,
* and so on may be used to interpolate successive groups in the match.
*
* In the block form, the current MatchData object is passed in as a
* parameter. The value returned by the block will be substituted for
* the match on each call.
*/
static VALUE
og_oniguruma_oregexp_gsub(int argc, VALUE *argv, VALUE self)
{
return og_oniguruma_oregexp_do_substitution_safe(self, argc, argv, 1, 0);
}
/*
* Document-method: gsub!
*
* call-seq:
* rxp.gsub!(str, replacement)
* rxp.gsub!(str) {|match_data| ... }
*
* Performs the substitutions of ORegexp#gsub in place, returning
* _str_, or _nil_ if no substitutions were performed
*/
static VALUE
og_oniguruma_oregexp_gsub_bang(int argc, VALUE *argv, VALUE self)
{
return og_oniguruma_oregexp_do_substitution_safe(self, argc, argv, 1, 1);
}
/*
* Document-method: sub
*
* call-seq:
* rxp.sub(str, replacement)
* rxp.sub(str) {|match_data| ... }
*
* Returns a copy of _str_ with the _first_ occurrence of _rxp_ pattern
* replaced with either _replacement_ or the value of the block.
*
* If a string is used as the replacement, the sequences \1, \2,
* and so on may be used to interpolate successive groups in the match.
*
* In the block form, the current MatchData object is passed in as a
* parameter. The value returned by the block will be substituted for
* the match on each call.
*/
static VALUE
og_oniguruma_oregexp_sub(int argc, VALUE *argv, VALUE self)
{
return og_oniguruma_oregexp_do_substitution_safe(self, argc, argv, 0, 0);
}
/*
* Document-method: sub!
*
* call-seq:
* oregexp.sub!(str, replacement)
* oregexp.sub!(str) {|match_data| ... }
*
* Performs the substitutions of ORegexp#sub in place, returning
* _str_, or _nil_ if no substitutions were performed.
*
*/
static VALUE
og_oniguruma_oregexp_sub_bang(int argc, VALUE *argv, VALUE self)
{
return og_oniguruma_oregexp_do_substitution_safe(self, argc, argv, 0, 1);
}
static VALUE
og_oniguruma_oregexp_do_scan(og_ScanArgs *args)
{
VALUE str, match, matches;
OnigEncoding encoding;
og_ORegexp *oregexp;
long begin = 0, end = 0, multibyte_diff = 0;
Data_Get_Struct(args->self, og_ORegexp, oregexp);
str = StringValue(args->str);
begin = onig_search(oregexp->reg,
OG_STRING_PTR(str), OG_STRING_PTR(str) + RSTRING_LEN(str),
OG_STRING_PTR(str), OG_STRING_PTR(str) + RSTRING_LEN(str),
args->region, ONIG_OPTION_NONE);
if (begin < 0)
return Qnil;
matches = rb_ary_new();
encoding = onig_get_encoding(oregexp->reg);
do {
match = og_oniguruma_oregexp_do_match(args->self, args->region, str);
end = args->region->end[0];
rb_ary_push(matches, match);
if (rb_block_given_p())
rb_yield(match);
if (end == begin) {
if( RSTRING_LEN(str) <= end )
break;
multibyte_diff = enc_len(encoding, OG_STRING_PTR(str) + end);
end += multibyte_diff;
}
begin = onig_search(oregexp->reg,
OG_STRING_PTR(str), OG_STRING_PTR(str) + RSTRING_LEN(str),
OG_STRING_PTR(str) + end, OG_STRING_PTR(str) + RSTRING_LEN(str),
args->region, ONIG_OPTION_NONE);
} while (begin >= 0);
return matches;
}
/*
* Document-method: scan
*
* call-seq:
* rxp.scan(str) # => [matchdata1, matchdata2,...] or nil
* rxp.scan(str) {|match_data| ... } # => [matchdata1, matchdata2,...] or nil
*
* Both forms iterate through _str_, matching the pattern. For each match,
* a MatchData object is generated and passed to the block, and
* added to the resulting array of MatchData objects.
*
* If _str_ does not match pattern, _nil_ is returned.
*/
static VALUE
og_oniguruma_oregexp_scan(VALUE self, VALUE str)
{
OnigRegion *region = onig_region_new();
og_ScanArgs fargs;
og_ScanArgs_set(&fargs, self, str, region);
return rb_ensure(og_oniguruma_oregexp_do_scan, (VALUE)&fargs,
og_oniguruma_oregexp_do_cleanup, (VALUE)region);
}
/*
* Document-method: casefold?
*
* call-seq:
* rxp.casefold? => true of false
*
* Returns the value of the case-insensitive flag.
*/
static VALUE
og_oniguruma_oregexp_casefold(VALUE self)
{
int options, ignore_case;
VALUE og_mOniguruma;
og_mOniguruma = rb_const_get(rb_cObject, rb_intern(OG_M_ONIGURUMA));
options = FIX2INT(rb_iv_get(self, "@options"));
ignore_case = FIX2INT(rb_const_get(og_mOniguruma, rb_intern("OPTION_IGNORECASE")));
if ((options & ignore_case) > 0)
return Qtrue;
return Qfalse;
}
/*
* Document-method: ==
*
* call-seq:
* rxp == other_rxp => true or false
* rxp.eql?(other_rxp) => true or false
*
* Equality---Two regexps are equal if their patterns are identical, they have
* the same character set code, and their #casefold?
values are the
* same.
*/
static VALUE
og_oniguruma_oregexp_operator_equality(VALUE self, VALUE rhs)
{
VALUE pattern, encoding, rhs_pattern, rhs_encoding;
/* TODO: test type, make sure its an Oniguruma::ORegexp object */
encoding = rb_iv_get(self, "@encoding");
pattern = rb_iv_get(self, "@pattern");
rhs_encoding = rb_iv_get(rhs, "@encoding");
rhs_pattern = rb_iv_get(rhs, "@pattern");
if (rb_str_cmp(pattern, rhs_pattern) == 0 &&
FIX2INT(encoding) == FIX2INT(rhs_encoding) &&
og_oniguruma_oregexp_casefold(self) == og_oniguruma_oregexp_casefold(rhs))
return Qtrue;
return Qfalse;
}
/*
* Document-method: ===
*
* call-seq:
* rxp === str => true or false
*
* Case Equality---Synonym for ORegexp#=~
used in case statements.
*
* a = "HELLO"
* case a
* when ORegexp.new('^[a-z]*$'); print "Lower case\n"
* when ORegexp.new('^[A-Z]*$'); print "Upper case\n"
* else; print "Mixed case\n"
* end
*
* produces:
*
* Upper case
*/
static VALUE
og_oniguruma_oregexp_operator_identical(VALUE self, VALUE str)
{
VALUE match, args[2];
if (TYPE(str) != T_STRING) {
str = rb_check_string_type(str);
if (NIL_P(str))
return Qfalse;
}
args[0] = StringValue(str);
args[1] = (VALUE)NULL;
match = og_oniguruma_oregexp_match(1, args, self);
if (NIL_P(match))
return Qfalse;
return Qtrue;
}
/*
* Document-method: =~
*
* call-seq:
* rxp =~ string => int or nil
*
* Matches rxp
against string
, returning the offset
* of the start of the match or nil
if the match failed. Sets $~
* to the corresponding MatchData
or nil
.
*
* ORegexp.new( 'SIT' ) =~ "insensitive" #=> nil
* ORegexp.new( 'SIT', :options => OPTION_IGNORECASE ) =~ "insensitive" #=> 5
*/
static VALUE
og_oniguruma_oregexp_operator_match(VALUE self, VALUE str)
{
VALUE match, args[2];
args[0] = str;
args[1] = (VALUE)NULL;
match = og_oniguruma_oregexp_match(1, args, self);
if(NIL_P(match))
return Qnil;
return INT2FIX(RMATCH(match)->regs->beg[0]);
}
/*
* Document-method: kcode
*
* call-seq:
* rxp.kode => int
*
* Returns the character set code for the regexp.
*/
static VALUE
og_oniguruma_oregexp_kcode(VALUE self)
{
return rb_iv_get(self, "@encoding");
}
/*
* Document-method: options
*
* call-seq:
* rxp.options => fixnum
*
* Returns the set of bits corresponding to the options used when creating this
* ORegexp (see ORegexp::new
for details. Note that additional bits
* may be set in the returned options: these are used internally by the regular
* expression code. These extra bits are ignored if the options are passed to
* ORegexp::new
.
*
* Oniguruma::OPTION_IGNORECASE #=> 1
* Oniguruma::OPTION_EXTEND #=> 2
* Oniguruma::OPTION_MULTILINE #=> 4
*
* Regexp.new(r.source, :options => Oniguruma::OPTION_EXTEND ) #=> 2
*/
static VALUE
og_oniguruma_oregexp_options(VALUE self)
{
return rb_iv_get(self, "@options");
}
/*
* Document-method: source
*
* call-seq:
* rxp.source => str
*
* Returns the original string of the pattern.
*
* ORegex.new( 'ab+c', 'ix' ).source #=> "ab+c"
*/
static VALUE
og_oniguruma_oregexp_source(VALUE self)
{
VALUE pattern = rb_iv_get(self, "@pattern");
OBJ_FREEZE(pattern);
return pattern;
}
/*
* Document-method: to_s
*
* call-seq:
* rxp.to_s => str
*
* Returns a string containing the regular expression and its options (using the
* (?xxx:yyy)
notation. This string can be fed back in to
* Regexp::new
to a regular expression with the same semantics as
* the original. (However, Regexp#==
may not return true when
* comparing the two, as the source of the regular expression itself may
* differ, as the example shows). Regexp#inspect
produces a
* generally more readable version of rxp.
*
* r1 = ORegexp.new( 'ab+c', :options OPTION_IGNORECASE | OPTION_EXTEND ) #=> /ab+c/ix
* s1 = r1.to_s #=> "(?ix-m:ab+c)"
* r2 = ORegexp.new(s1) #=> /(?ix-m:ab+c)/
* r1 == r2 #=> false
* r1.source #=> "ab+c"
* r2.source #=> "(?ix-m:ab+c)"
*/
static VALUE
og_oniguruma_oregexp_to_s(VALUE self)
{
int options;
VALUE str, og_mOniguruma;
og_mOniguruma = rb_const_get(rb_cObject, rb_intern(OG_M_ONIGURUMA));
options = FIX2INT(og_oniguruma_oregexp_options(self));
str = rb_str_new2("(?");
if ((options & FIX2INT(rb_const_get(og_mOniguruma, rb_intern("OPTION_IGNORECASE")))) > 0)
rb_str_cat(str, "i", 1);
if ((options & FIX2INT(rb_const_get(og_mOniguruma, rb_intern("OPTION_MULTILINE")))) > 0)
rb_str_cat(str, "m", 1);
if ((options & FIX2INT(rb_const_get(og_mOniguruma, rb_intern("OPTION_EXTEND")))) > 0)
rb_str_cat(str, "x", 1);
if (rb_str_cmp(str, rb_str_new2("(?imx")) != 0) {
rb_str_cat(str, "-", 1);
if ((options & FIX2INT(rb_const_get(og_mOniguruma, rb_intern("OPTION_IGNORECASE")))) == 0)
rb_str_cat(str, "i", 1);
if ((options & FIX2INT(rb_const_get(og_mOniguruma, rb_intern("OPTION_MULTILINE")))) == 0)
rb_str_cat(str, "m", 1);
if ((options & FIX2INT(rb_const_get(og_mOniguruma, rb_intern("OPTION_EXTEND")))) == 0)
rb_str_cat(str, "x", 1);
}
rb_str_cat(str, ":", 1);
rb_str_concat(str, rb_iv_get(self, "@pattern"));
return rb_str_cat(str, ")", 1);
}
/*
* Document-method: inspect
*
* call-seq:
* rxp.inspect => string
*
* Returns a readable version of rxp
*
* ORegexp.new( 'cat', :options => OPTION_MULTILINE | OPTION_IGNORECASE ).inspect => /cat/im
* ORegexp.new( 'cat', :options => OPTION_MULTILINE | OPTION_IGNORECASE ).to_s => (?im-x)cat
*/
static VALUE
og_oniguruma_oregexp_inspect(VALUE self)
{
int options;
VALUE str, og_mOniguruma;
og_mOniguruma = rb_const_get(rb_cObject, rb_intern(OG_M_ONIGURUMA));
options = FIX2INT(og_oniguruma_oregexp_options(self));
str = rb_str_new2("/");
rb_str_concat(str, rb_iv_get(self, "@pattern"));
rb_str_cat(str, "/", 1);
if ((options & FIX2INT(rb_const_get(og_mOniguruma, rb_intern("OPTION_IGNORECASE")))) > 0)
rb_str_cat(str, "i", 1);
if ((options & FIX2INT(rb_const_get(og_mOniguruma, rb_intern("OPTION_MULTILINE")))) > 0)
rb_str_cat(str, "m", 1);
if ((options & FIX2INT(rb_const_get(og_mOniguruma, rb_intern("OPTION_EXTEND")))) > 0)
rb_str_cat(str, "x", 1);
return str;
}
void
og_oniguruma_oregexp(VALUE mod, const char* name)
{
VALUE og_cOniguruma_ORegexp, og_cOniguruma_ORegexp_Singleton;
og_cOniguruma_ORegexp = rb_define_class_under(mod, name, rb_cObject);
rb_define_alloc_func(og_cOniguruma_ORegexp, og_oniguruma_oregexp_alloc);
/* Now add the methods to the class */
rb_define_singleton_method(og_cOniguruma_ORegexp, "escape", og_oniguruma_oregexp_escape, -1);
rb_define_singleton_method(og_cOniguruma_ORegexp, "last_match", og_oniguruma_oregexp_last_match, -1);
/* Define Instance Methods */
rb_define_method(og_cOniguruma_ORegexp, "initialize", og_oniguruma_oregexp_initialize, -1);
rb_define_method(og_cOniguruma_ORegexp, "match", og_oniguruma_oregexp_match, -1);
rb_define_method(og_cOniguruma_ORegexp, "=~", og_oniguruma_oregexp_operator_match, 1);
rb_define_method(og_cOniguruma_ORegexp, "==", og_oniguruma_oregexp_operator_equality, 1);
rb_define_method(og_cOniguruma_ORegexp, "===", og_oniguruma_oregexp_operator_identical, 1);
rb_define_method(og_cOniguruma_ORegexp, "sub", og_oniguruma_oregexp_sub, -1);
rb_define_method(og_cOniguruma_ORegexp, "sub!", og_oniguruma_oregexp_sub_bang, -1);
rb_define_method(og_cOniguruma_ORegexp, "gsub", og_oniguruma_oregexp_gsub, -1);
rb_define_method(og_cOniguruma_ORegexp, "gsub!", og_oniguruma_oregexp_gsub_bang, -1);
rb_define_method(og_cOniguruma_ORegexp, "scan", og_oniguruma_oregexp_scan, 1);
rb_define_method(og_cOniguruma_ORegexp, "casefold?", og_oniguruma_oregexp_casefold, 0);
rb_define_method(og_cOniguruma_ORegexp, "kcode", og_oniguruma_oregexp_kcode, 0);
rb_define_method(og_cOniguruma_ORegexp, "options", og_oniguruma_oregexp_options, 0);
rb_define_method(og_cOniguruma_ORegexp, "source", og_oniguruma_oregexp_source, 0);
rb_define_method(og_cOniguruma_ORegexp, "inspect", og_oniguruma_oregexp_inspect, 0);
rb_define_method(og_cOniguruma_ORegexp, "to_s", og_oniguruma_oregexp_to_s, 0);
/* Define Aliases */
/* Instance method aliases */
rb_define_alias(og_cOniguruma_ORegexp, "eql?", "==");
rb_define_alias(og_cOniguruma_ORegexp, "match_all", "scan");
/* Class method aliases */
og_cOniguruma_ORegexp_Singleton = rb_singleton_class(og_cOniguruma_ORegexp);
rb_define_alias(og_cOniguruma_ORegexp_Singleton, "compile", "new");
rb_define_alias(og_cOniguruma_ORegexp_Singleton, "quote", "escape");
}