Sha256: 026f8b981be6a5629e2242a7671af7f1a10252cc27a9979ba32ef00e78635cb4
Contents?: true
Size: 1.69 KB
Versions: 4
Compression:
Stored size: 1.69 KB
Contents
#!/usr/bin/env ruby $: << File.dirname(__FILE__)+'/../lib' require 'wukong' module ApacheLogParser class Mapper < Wukong::Streamer::LineStreamer def parse_request req m = %r{\A(\w+) (.*) (\w+/[\w\.]+)\z}.match(req) if m [''] + m.captures else [req, '', '', ''] end end # regular expression to match on apache-style log lines # IP addr - - [07/Jun/2008:20:37:11 +0000] 400 "GET /faq" + gaJsHost + "google-analytics.com/ga.js HTTP/1.1" 173 "-" "-" "-" LOG_RE = %r{\A(\d+\.\d+\.\d+\.\d+) ([^\s]+) ([^\s]+) \[(\d\d/\w+/\d+):(\d\d:\d\d:\d\d)([^\]]*)\] (\d+) "([^\"]*(?:\" \+ gaJsHost \+ \"[^\"]*)?)" (\d+) "([^\"]*)" "([^\"]*)" "([^\"]*)"\z} def process line line.chomp m = LOG_RE.match(line) if m ip, j1, j2, datepart, timepart, tzpart, resp, req, j3, ref, ua, j4 = m.captures req_date = DateTime.parse("#{datepart} #{timepart} #{tzpart}").to_flat req, method, path, protocol = parse_request(req) yield [:logline, method, path, protocol, ip, j1, j2, req_date, resp, req, j3, ref, ua, j4] else yield [:unparseable, line] end end end class Reducer < Wukong::Streamer::LineStreamer end # Execute the script class Script < Wukong::Script def reduce_command "/usr/bin/uniq" end def default_options super.merge :sort_fields => 8 # , :reduce_tasks => 0 end end Script.new(Mapper,nil).run end # 55.55.155.55 - - [04/Feb/2008:11:37:52 +0000] 301 "GET /robots.txt HTTP/1.1" 185 "-" "WebAlta Crawler/2.0 (http://www.webalta.net/ru/about_webmaster.html) (Windows; U; Windows NT 5.1; ru-RU)" "-"
Version data entries
4 entries across 4 versions & 2 rubygems
Version | Path |
---|---|
mrflip-wukong-0.1.0 | examples/apache_log_parser.rb |
wukong-1.4.0 | examples/apache_log_parser.rb |
wukong-0.1.4 | examples/apache_log_parser.rb |
wukong-0.1.1 | examples/apache_log_parser.rb |