lib/grok-pure.rb in jls-grok-0.10.12 vs lib/grok-pure.rb in jls-grok-0.10.13
- old
+ new
@@ -1,26 +1,27 @@
require "rubygems"
require "logger"
-require "grok/pure/discovery"
require "cabin"
+require "grok/pure/discovery"
+require "grok/pure/match"
# TODO(sissel): Check if 'grok' c-ext has been loaded and abort?
class Grok
class PatternError < StandardError; end
# The pattern input
attr_accessor :pattern
-
+
# The fully-expanded pattern (in regex form)
attr_accessor :expanded_pattern
# The logger
attr_accessor :logger
# The dictionary of pattern names to pattern expressions
attr_accessor :patterns
-
+
PATTERN_RE = \
/%\{ # match '%{' not prefixed with '\'
(?<name> # match the pattern name
(?<pattern>[A-z0-9]+)
(?::(?<subname>[@\[\]A-z0-9_:.-]+))?
@@ -48,10 +49,12 @@
def initialize
@patterns = {}
@logger = Cabin::Channel.new
@logger.subscribe(Logger.new(STDOUT))
@logger.level = :warn
+ # Captures Lambda which is generated at Grok compile time and called at match time
+ @captures_func = nil
# TODO(sissel): Throw exception if we aren't using Ruby 1.9.2 or newer.
end # def initialize
public
@@ -64,30 +67,29 @@
public
def add_patterns_from_file(path)
file = File.new(path, "r")
file.each do |line|
# Skip comments
- next if line =~ /^\s*#/
+ next if line =~ /^\s*#/
# File format is: NAME ' '+ PATTERN '\n'
name, pattern = line.gsub(/^\s*/, "").split(/\s+/, 2)
#p name => pattern
# If the line is malformed, skip it.
next if pattern.nil?
# Trim newline and add the pattern.
add_pattern(name, pattern.chomp)
end
return nil
+ ensure
+ file.close
end # def add_patterns_from_file
public
- def compile(pattern)
- @capture_map = {}
-
+ def compile(pattern, named_captures_only=false)
iterations_left = 10000
@pattern = pattern
@expanded_pattern = pattern.clone
- index = 0
# Replace any instances of '%{FOO}' with that pattern.
loop do
if iterations_left == 0
raise PatternError, "Deep recursion pattern compilation of #{pattern.inspect} - expanded: #{@expanded_pattern.inspect}"
@@ -99,65 +101,91 @@
if m["definition"]
add_pattern(m["pattern"], m["definition"])
end
if @patterns.include?(m["pattern"])
- # create a named capture index that we can push later as the named
- # pattern. We do this because ruby regexp can't capture something
- # by the same name twice.
regex = @patterns[m["pattern"]]
- #puts "patterns[#{m["pattern"]}] => #{regex}"
+ name = m["name"]
- capture = "a#{index}" # named captures have to start with letters?
- #capture = "%04d" % "#{index}" # named captures have to start with letters?
- replacement_pattern = "(?<#{capture}>#{regex})"
- @capture_map[capture] = m["name"]
+ if named_captures_only && name.index(":").nil?
+ # this has no semantic (pattern:foo) so we don't need to capture
+ replacement_pattern = "(?:#{regex})"
+ else
+ replacement_pattern = "(?<#{name}>#{regex})"
+ end
- #puts "Before: #{@expanded_pattern}"
- #puts "m[0]: #{m[0]}"
- #puts "replacement_pattern => #{replacement_pattern}"
- #puts "Proposed: #{@expanded_pattern.sub(m[0], replacement_pattern)}"
-
# Ruby's String#sub() has a bug (or misfeature) that causes it to do bad
# things to backslashes in string replacements, so let's work around it
# See this gist for more details: https://gist.github.com/1491437
# This hack should resolve LOGSTASH-226.
@expanded_pattern.sub!(m[0]) { |s| replacement_pattern }
-
- #puts "After: #{@expanded_pattern}"
- #puts "m[0]: #{m[0]}"
- #puts "replacement_pattern => #{replacement_pattern}"
- index += 1
+ @logger.debug? and @logger.debug("replacement_pattern => #{replacement_pattern}")
else
raise PatternError, "pattern #{m[0]} not defined"
end
end
- #@logger.debug("Finished expanding", :string => @expanded_pattern)
- #puts "Expanded: #{@expanded_pattern}"
- @regexp = Regexp.new(@expanded_pattern)
- @logger.debug("Grok compiled OK", :pattern => pattern,
- :expanded_pattern => @expanded_pattern)
- end # def compile
+ @regexp = Regexp.new(@expanded_pattern, Regexp::MULTILINE)
+ @logger.debug? and @logger.debug("Grok compiled OK", :pattern => pattern,
+ :expanded_pattern => @expanded_pattern)
+ @captures_func = compile_captures_func(@regexp)
+ end
+
+ private
+ # compiles the captures lambda so runtime match can be optimized
+ def compile_captures_func(re)
+ re_match = ["lambda do |match, &block|"]
+ re.named_captures.each do |name, indices|
+ pattern, name, coerce = name.split(":")
+ indices.each do |index|
+ coerce = case coerce
+ when "int"; ".to_i"
+ when "float"; ".to_f"
+ else; ""
+ end
+ name = pattern if name.nil?
+ re_match << " block.call(#{name.inspect}, match[#{index}]#{coerce})"
+ end
+ end
+ re_match << "end"
+ return eval(re_match.join("\n"))
+ end # def compile_captures_func
+
public
def match(text)
match = @regexp.match(text)
-
if match
grokmatch = Grok::Match.new
grokmatch.subject = text
- grokmatch.start, grokmatch.end = match.offset(0)
grokmatch.grok = self
grokmatch.match = match
- @logger.debug("Regexp match object", :names => match.names, :captures => match.captures)
+ @logger.debug? and @logger.debug("Regexp match object", :names => match.names,
+ :captures => match.captures)
return grokmatch
else
return false
end
end # def match
+ # Optimized match and capture instead of calling them separately
+ def match_and_capture(text)
+ match = @regexp.match(text)
+ if match
+ @logger.debug? and @logger.debug("Regexp match object", :names => match.names,
+ :captures => match.captures)
+ @captures_func.call(match) { |k,v| yield k,v }
+ return true
+ else
+ return false
+ end
+ end # def match_and_capture
+
+ def capture(match, block)
+ @captures_func.call(match) { |k,v| block.call k,v }
+ end # def capture
+
public
def discover(input)
init_discover if @discover == nil
return @discover.discover(input)
@@ -168,13 +196,6 @@
require "grok/pure/discovery"
@discover = Grok::Discovery.new(self)
@discover.logger = @logger
end # def init_discover
- public
- def capture_name(id)
- return @capture_map[id]
- end # def capture_name
end # Grok
-
-require "grok/pure/match"
-require "grok/pure/pile"