lib/sawmill/parser.rb in sawmill-0.0.4 vs lib/sawmill/parser.rb in sawmill-0.0.5

- old
+ new

@@ -44,10 +44,12 @@ # :stopdoc: LINE_REGEXP = /^\[\s*([[:graph:]]+)\s+(\d{4})-(\d{2})-(\d{2})(T|\s)(\d{2}):(\d{2}):(\d{2})(.(\d{1,6}))?Z?\s?([+-]\d{4})?\s+([[:graph:]]+)(\s+([[:graph:]]+))?\s+([\^$.=])\]\s(.*)$/ DIRECTIVE_REGEXP = /^#\s+sawmill_format:\s+(\w+)=(.*)$/ ATTRIBUTE_REGEXP = /^([[:graph:]]+)\s([=+\/-])\s/ + SUPPORTS_ENCODING = defined?(::Encoding) + ENCODING_OPTS = {:invalid => :replace, :undef => :replace} # :startdoc: # Create a new parser that reads from the given stream. # @@ -63,10 +65,20 @@ # Sawmill::LevelGroup to use to parse log levels. # If not specified, Sawmill::STANDARD_LEVELS is used by default. # <tt>:emit_incomplete_records_at_eof</tt> # If set to true, causes any incomplete log records to be emitted # in their incomplete state when EOF is reached. + # <tt>:encoding</tt> + # Overrides the IO encoding. (Ruby 1.9 only). If specified, lines + # read from the stream are assumed to be in this encoding. If not + # specified, the IO's default encoding is honored. + # Note that the encoding may also be modified by the stream itself, + # if an appropriate parser directive is encountered. + # <tt>:internal_encoding</tt> + # Transcodes strings as they are read. (Ruby 1.9 only). If specified, + # lines are transcoded into this encoding after they are read from + # the stream. If not specified, no post-transcoding is done. def initialize(io_, processor_, opts_={}) @io = io_ @processor = nil if processor_.respond_to?(:record) && processor_.respond_to?(:extra_entry) @@ -75,15 +87,14 @@ @processor = processor_ end @levels = opts_[:levels] || STANDARD_LEVELS @emit_incomplete_records_at_eof = opts_[:emit_incomplete_records_at_eof] @current_record_id = nil - @parser_directives = {} - @encoding = opts_[:encoding] - @internal_encoding = opts_[:internal_encoding] - if defined?(::Encoding) + if SUPPORTS_ENCODING + @encoding = opts_[:encoding] @encoding = ::Encoding.find(@encoding) if @encoding && !@encoding.kind_of?(::Encoding) + @internal_encoding = opts_[:internal_encoding] @internal_encoding = ::Encoding.find(@internal_encoding) if @internal_encoding && !@internal_encoding.kind_of?(::Encoding) end end @@ -156,11 +167,11 @@ entry_ = Entry::Message.new(level_, timestamp_, progname_, record_id_, str_) @processor.message(entry_) if @processor end else if str_ =~ DIRECTIVE_REGEXP - @parser_directives[$1] = $2 + _set_parser_directive($1, $2) end entry_ = Entry::UnknownData.new(str_.chomp) @processor.unknown_data(entry_) if @processor.respond_to?(:unknown_data) end else @@ -183,14 +194,25 @@ private def _get_next_line # :nodoc: str_ = @io.gets - if str_ + if str_ && SUPPORTS_ENCODING str_.force_encoding(@encoding) if @encoding - str_.encode!(@internal_encoding) if @internal_encoding + str_.encode!(@internal_encoding, ENCODING_OPTS) if @internal_encoding end str_ + end + + + def _set_parser_directive(key_, value_) # :nodoc: + case key_ + when 'encoding' + if SUPPORTS_ENCODING + encoding_ = ::Encoding.find(value_) rescue nil + @encoding = encoding_ if encoding_ + end + end end end