lib/log_analysis.rb in hadoop-rubydsl-0.0.4 vs lib/log_analysis.rb in hadoop-rubydsl-0.0.5
- old
+ new
@@ -7,10 +7,12 @@
PASS = nil
MODEL_METHODS = [:column, :value]
# controller
class LogAnalysisMapper < HadoopDsl::BaseMapper
+ @@reg_cache = {}
+
def initialize(script, key, value)
super(script, LogAnalysisMapperModel.new(key, value))
end
# model methods
@@ -25,13 +27,19 @@
def separate(sep)
parts = value.split(sep)
@model.create_or_replace_columns_with(parts) {|column, value| column.value = value}
end
- def pattern(re)
+ def pattern(reg_str)
+ # try to get RE from cache
+ cached = @@reg_cache[reg_str]
+ re = cached ? @@reg_cache[reg_str] : Regexp.new(reg_str)
+ @@reg_cache[reg_str] ||= re # new cache
+
if value =~ re
md = Regexp.last_match
@model.create_or_replace_columns_with(md.captures) {|column, value| column.value = value}
+ else throw :each_line # non-local exit
end
end
# column names by String converted to Symbol
def column_name(*names)