require 'date' require 'cgi' require 'stringio' # Parses a standard web server log file stream and returns a hash with # key/values for each line. Includes the Enumerable interface. class LogParser include Enumerable # Support both strings and streams as input. def initialize(input) input = StringIO.new(input) if input.class == String @stream = input end # Enumerable interface. def each while(observation = get_next_observation) yield observation end end def get_next_observation line = @stream.gets line && LogParser.parse_line(line) end LOG_FORMAT = /([^ ]*) [^ ]* [^ ]* \[([^\]]*)\] "([^"]*)" ([^ ]*)/ LOG_DATE_FORMAT = "%d/%b/%Y:%H:%M:%S %z" LOG_KEY_VALUE_FORMAT = /[?&]([^=]+)=([^&]+)/ SERVER_ATTRIBUTES = [:ip, :timestamp, :request, :status] # Parse one log line and return a hash with all attributes. def self.parse_line(line) return nil if line.strip.empty? line =~ LOG_FORMAT result = {} # Save ip, timestamp and request. result[:ip] = $1 begin result[:timestamp] = DateTime.strptime($2, LOG_DATE_FORMAT) rescue ArgumentError raise ParseError.new("Error while parsing timestamp") end result[:request] = $3 result[:status] = $4 # Extract key/values pairs from the query part of the request. $3.scan(LOG_KEY_VALUE_FORMAT) do |key, value| begin key = CGI.unescape(key).to_sym value = CGI.unescape(value) rescue Encoding::CompatibilityError => e raise ParseError.new("Error while parsing query parameters") end if result.has_key? key if result[key].is_a? Array result[key] << value else result[key] = [result[key], value] end else result[key] = value end end return result rescue ParseError raise rescue raise ParseError.new("Unknown parsing error") end class ParseError < StandardError ; end end