lib/arugula/parts.rb in arugula-0.3.0 vs lib/arugula/parts.rb in arugula-0.4.0

- old
+ new

@@ -19,11 +19,11 @@ def initialize(literal) @literal = literal end def to_s - literal.gsub('\\', '\\\\') + literal.gsub('\\', '\\\\').gsub(/[.]/) { |m| "\\#{m}" } end def match(str, index, _match_data) length = literal.size matches = str[index, length] == literal @@ -59,12 +59,12 @@ @parts = [] end def match(str, index, match_data) parts.each do |part| - match, match_index = part.match(str, index, match_data) - return true, match_index if match + matches, match_index = part.match(str, index, match_data) + return true, match_index if matches end [false, index] end end @@ -81,11 +81,18 @@ end class CharacterClassPart < Part include MatchAny def to_s - "[#{parts.join}]" + parts_string = parts.map do |part| + next part unless part.class.type == :literal + lit = part.literal + lit = '\\]' if lit == ']' + lit = '\\[' if lit == '[' + lit + end.join + "[#{parts_string}]" end end class RangePart < Part def initialize(start, final) @@ -106,27 +113,31 @@ MATCHERS = { A: ->(_str, index) { index == 0 }, d: ->(str, index) { ('0'..'9').member?(str[index]) }, s: ->(str, index) { [' ', "\t"].include?(str[index]) }, S: ->(str, index) { ![' ', "\t"].include?(str[index]) }, + z: ->(str, index) { index == str.size }, + Z: ->(str, index) { str[index..-1] == "\n" || index == str.size }, }.freeze OFFSETS = begin offsets = { - A: 0, + A: ->(_str, _index) { 0 }, + Z: ->(_str, _index) { 0 }, + z: ->(_str, _index) { 0 }, } - offsets.default = 1 + offsets.default = ->(_str, _index) { 1 } offsets.freeze end def initialize(metachar) @metachar = metachar.to_sym end def match(str, index, _match_data) matches = MATCHERS[@metachar][str, index] - [matches, index + (matches ? OFFSETS[@metachar] : 0)] + [matches, index + (matches ? OFFSETS[@metachar][str, index] : 0)] end def to_s "\\#{@metachar}" end @@ -179,46 +190,107 @@ def initialize(wrapped) @wrapped = wrapped end end - class StarPart < Part + class NotPart < Part include Wrapping + def to_s - "#{wrapped}*" + @wrapped.to_s.dup.insert(1, '^') end def match(str, index, match_data) + matches, end_index = wrapped.match(str, index, match_data) + [!matches, matches ? index : end_index + 1] + end + end + + module MatchNTimes + include Wrapping + def initialize(*args, times: 1..1) + @times = times + super(*args) + end + + def match(str, index, match_data) + match_count = 0 + end_index = index + loop do matches, index = wrapped.match(str, index, match_data) - return true, index unless matches + if matches + end_index = index + match_count += 1 + end + break if !matches || match_count > @times.end end + + matches = @times.member?(match_count) + [matches, matches ? end_index : index] end end + class StarPart < Part + include MatchNTimes + def initialize(*args) + super(*args, times: 0..Float::INFINITY) + end + + def to_s + "#{wrapped}*" + end + end + class PlusPart < Part - include Wrapping + include MatchNTimes + def initialize(*args) + super(*args, times: 1..Float::INFINITY) + end + def to_s "#{wrapped}+" end + end - def match(str, index, match_data) - has_matched = false - loop do - matches, index = wrapped.match(str, index, match_data) - has_matched = true if matches - return has_matched, index unless matches - end + class QuestionPart < Part + include MatchNTimes + def initialize(*args) + super(*args, times: 0..1) end + + def to_s + "#{wrapped}?" + end end + class QuantifierPart < Part + include MatchNTimes + def initialize(before, after, *args) + super(*args, times: before..after) + end + + def to_s + before = @times.begin + after = @times.end + quantifier_part = '{'.dup + quantifier_part << before.to_s unless before == 0 + quantifier_part << ',' unless before == after + quantifier_part << after.to_s unless before == after || + after == Float::INFINITY + quantifier_part << '}' + "#{wrapped}#{quantifier_part}" + end + end + class DotPart < Part def to_s '.' end def match(str, index, _match_data) - matches = index < str.size + char = str[index] + matches = char && char != "\n" [matches, index + (matches ? 1 : 0)] end end end