lib/sawmill/parser.rb in sawmill-0.0.4 vs lib/sawmill/parser.rb in sawmill-0.0.5
- old
+ new
@@ -44,10 +44,12 @@
# :stopdoc:
LINE_REGEXP = /^\[\s*([[:graph:]]+)\s+(\d{4})-(\d{2})-(\d{2})(T|\s)(\d{2}):(\d{2}):(\d{2})(.(\d{1,6}))?Z?\s?([+-]\d{4})?\s+([[:graph:]]+)(\s+([[:graph:]]+))?\s+([\^$.=])\]\s(.*)$/
DIRECTIVE_REGEXP = /^#\s+sawmill_format:\s+(\w+)=(.*)$/
ATTRIBUTE_REGEXP = /^([[:graph:]]+)\s([=+\/-])\s/
+ SUPPORTS_ENCODING = defined?(::Encoding)
+ ENCODING_OPTS = {:invalid => :replace, :undef => :replace}
# :startdoc:
# Create a new parser that reads from the given stream.
#
@@ -63,10 +65,20 @@
# Sawmill::LevelGroup to use to parse log levels.
# If not specified, Sawmill::STANDARD_LEVELS is used by default.
# <tt>:emit_incomplete_records_at_eof</tt>
# If set to true, causes any incomplete log records to be emitted
# in their incomplete state when EOF is reached.
+ # <tt>:encoding</tt>
+ # Overrides the IO encoding. (Ruby 1.9 only). If specified, lines
+ # read from the stream are assumed to be in this encoding. If not
+ # specified, the IO's default encoding is honored.
+ # Note that the encoding may also be modified by the stream itself,
+ # if an appropriate parser directive is encountered.
+ # <tt>:internal_encoding</tt>
+ # Transcodes strings as they are read. (Ruby 1.9 only). If specified,
+ # lines are transcoded into this encoding after they are read from
+ # the stream. If not specified, no post-transcoding is done.
def initialize(io_, processor_, opts_={})
@io = io_
@processor = nil
if processor_.respond_to?(:record) && processor_.respond_to?(:extra_entry)
@@ -75,15 +87,14 @@
@processor = processor_
end
@levels = opts_[:levels] || STANDARD_LEVELS
@emit_incomplete_records_at_eof = opts_[:emit_incomplete_records_at_eof]
@current_record_id = nil
- @parser_directives = {}
- @encoding = opts_[:encoding]
- @internal_encoding = opts_[:internal_encoding]
- if defined?(::Encoding)
+ if SUPPORTS_ENCODING
+ @encoding = opts_[:encoding]
@encoding = ::Encoding.find(@encoding) if @encoding && !@encoding.kind_of?(::Encoding)
+ @internal_encoding = opts_[:internal_encoding]
@internal_encoding = ::Encoding.find(@internal_encoding) if @internal_encoding && !@internal_encoding.kind_of?(::Encoding)
end
end
@@ -156,11 +167,11 @@
entry_ = Entry::Message.new(level_, timestamp_, progname_, record_id_, str_)
@processor.message(entry_) if @processor
end
else
if str_ =~ DIRECTIVE_REGEXP
- @parser_directives[$1] = $2
+ _set_parser_directive($1, $2)
end
entry_ = Entry::UnknownData.new(str_.chomp)
@processor.unknown_data(entry_) if @processor.respond_to?(:unknown_data)
end
else
@@ -183,14 +194,25 @@
private
def _get_next_line # :nodoc:
str_ = @io.gets
- if str_
+ if str_ && SUPPORTS_ENCODING
str_.force_encoding(@encoding) if @encoding
- str_.encode!(@internal_encoding) if @internal_encoding
+ str_.encode!(@internal_encoding, ENCODING_OPTS) if @internal_encoding
end
str_
+ end
+
+
+ def _set_parser_directive(key_, value_) # :nodoc:
+ case key_
+ when 'encoding'
+ if SUPPORTS_ENCODING
+ encoding_ = ::Encoding.find(value_) rescue nil
+ @encoding = encoding_ if encoding_
+ end
+ end
end
end