# ----------------------------------------------------------------------------- # # Sawmill stream parser utility # # ----------------------------------------------------------------------------- # Copyright 2009 Daniel Azuma # # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # * Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # * Neither the name of the copyright holder, nor the names of any other # contributors to this software, may be used to endorse or promote products # derived from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # ----------------------------------------------------------------------------- ; module Sawmill # A logfile parser that parses log entries from a logfile and sends them # to an entry processor. class Parser # :stopdoc: LINE_REGEXP = /^\[\s*([[:graph:]]+)\s+(\d{4})-(\d{2})-(\d{2})(T|\s)(\d{2}):(\d{2}):(\d{2})(.(\d{1,6}))?Z?\s?([+-]\d{4})?\s+([[:graph:]]+)(\s+([[:graph:]]+))?\s+([\^$.=])\]\s(.*)$/ DIRECTIVE_REGEXP = /^#\s+sawmill_format:\s+(\w+)=(.*)$/ ATTRIBUTE_REGEXP = /^([[:graph:]]+)\s([=+\/-])\s/ SUPPORTS_ENCODING = defined?(::Encoding) ENCODING_OPTS = {:invalid => :replace, :undef => :replace} # :startdoc: # Create a new parser that reads from the given stream. # # You should provide a processor to receive the data from the logfile. # The processor may be either an entry processor or a record processor. # You may also pass nil for the processor. In this case, the generated # log entries will not be sent to a processor but will still be returned # by the parse_one_entry method. # # Recognized options include: # # [:levels] # Sawmill::LevelGroup to use to parse log levels. # If not specified, Sawmill::STANDARD_LEVELS is used by default. # [:emit_incomplete_records_at_eof] # If set to true, causes any incomplete log records to be emitted # in their incomplete state when EOF is reached. # [:encoding] # Overrides the IO encoding. (Ruby 1.9 only). If specified, lines # read from the stream are assumed to be in this encoding. If not # specified, the IO's default encoding is honored. # Note that the encoding may also be modified by the stream itself, # if an appropriate parser directive is encountered. # [:internal_encoding] # Transcodes strings as they are read. (Ruby 1.9 only). If specified, # lines are transcoded into this encoding after they are read from # the stream. If not specified, no post-transcoding is done. def initialize(io_, processor_, opts_={}) @io = io_ @processor = nil if processor_.respond_to?(:record) && processor_.respond_to?(:extra_entry) @processor = RecordBuilder.new(processor_) elsif processor_.respond_to?(:begin_record) && processor_.respond_to?(:end_record) @processor = processor_ end @levels = opts_[:levels] || STANDARD_LEVELS @emit_incomplete_records_at_eof = opts_[:emit_incomplete_records_at_eof] @current_record_id = nil if SUPPORTS_ENCODING @encoding = opts_[:encoding] @encoding = ::Encoding.find(@encoding) if @encoding && !@encoding.kind_of?(::Encoding) @internal_encoding = opts_[:internal_encoding] @internal_encoding = ::Encoding.find(@internal_encoding) if @internal_encoding && !@internal_encoding.kind_of?(::Encoding) end end # Parse one log entry from the stream and emit it to the processor. # Also returns the log entry. # Returns nil if EOF has been reached. def parse_one_entry str_ = _get_next_line entry_ = nil if str_ match_ = LINE_REGEXP.match(str_) if match_ level_ = @levels.get(match_[1]) timestamp_ = ::Time.utc(match_[2].to_i, match_[3].to_i, match_[4].to_i, match_[6].to_i, match_[7].to_i, match_[8].to_i, match_[10].to_s.ljust(6, '0').to_i) offset_ = match_[11].to_i if offset_ != 0 neg_ = offset_ < 0 offset_ = -offset_ if neg_ secs_ = offset_ / 100 * 3600 + offset_ % 100 * 60 if neg_ timestamp_ += secs_ else timestamp_ -= secs_ end end progname_ = match_[12] record_id_ = match_[14] || @current_record_id type_code_ = match_[15] str_ = match_[16] if str_ =~ /(\\+)$/ count_ = $1.length str_ = $` + "\\"*(count_/2) while count_ % 2 == 1 str2_ = _get_next_line if str2_ && str2_ =~ /(\\*)\n?$/ count_ = $1.length str_ << "\n" << $` << "\\"*(count_/2) else break end end end case type_code_ when '^' if str_ =~ /^BEGIN\s/ @current_record_id = $' entry_ = Entry::BeginRecord.new(level_, timestamp_, progname_, @current_record_id) @processor.begin_record(entry_) if @processor end when '$' if str_ =~ /^END\s/ @current_record_id = $' entry_ = Entry::EndRecord.new(level_, timestamp_, progname_, @current_record_id) @current_record_id = nil @processor.end_record(entry_) if @processor end when '=' if str_ =~ ATTRIBUTE_REGEXP key_ = $1 opcode_ = $2 value_ = $' operation_ = opcode_ == '+' ? :append : :set entry_ = Entry::Attribute.new(level_, timestamp_, progname_, record_id_, key_, value_, operation_) @processor.attribute(entry_) if @processor end end unless entry_ entry_ = Entry::Message.new(level_, timestamp_, progname_, record_id_, str_) @processor.message(entry_) if @processor end else if str_ =~ DIRECTIVE_REGEXP _set_parser_directive($1, $2) end entry_ = Entry::UnknownData.new(str_.chomp) @processor.unknown_data(entry_) if @processor.respond_to?(:unknown_data) end else if @emit_incomplete_records_at_eof && @processor.respond_to?(:emit_incomplete_records) @processor.emit_incomplete_records end end entry_ end # Parse the rest of the stream until EOF is reached, and emit the log # entries to the processor. def parse_all while parse_one_entry; end end private def _get_next_line # :nodoc: str_ = @io.gets if str_ && SUPPORTS_ENCODING str_.force_encoding(@encoding) if @encoding str_.encode!(@internal_encoding, ENCODING_OPTS) if @internal_encoding end str_ end def _set_parser_directive(key_, value_) # :nodoc: case key_ when 'encoding' if SUPPORTS_ENCODING encoding_ = ::Encoding.find(value_) rescue nil @encoding = encoding_ if encoding_ end end end end end