Sha256: 2bbdd54329214c5db1d0c50137cdfd196052300ec073eb68d798d4696e4bc3e4

Contents?: true

Size: 1.42 KB

Versions: 1

Compression:

Stored size: 1.42 KB

Contents

# encoding: utf-8
require "logstash/namespace"
require "logstash/util"

class LogStash::Util::Charset
  attr_accessor :logger
  def initialize(charset)
    @charset = charset
  end

  def convert(data)
    data.force_encoding(@charset)
    if @charset == "UTF-8"
      # Some users don't know the charset of their logs or just don't know they
      # can set the charset setting.
      if !data.valid_encoding?
        @logger.warn("Received an event that has a different character encoding than you configured.", :text => data.inspect[1..-2], :expected_charset => @charset)
        #if @force_lossy_charset_conversion
          ## Janky hack to force ruby to re-encode UTF-8 with replacement chars.
          #data.force_encoding("CP65001")
          #data = data.encode("UTF-8", :invalid => :replace, :undef => :replace)
        #else
        #end

        # A silly hack to help convert some of the unknown bytes to
        # somewhat-readable escape codes. The [1..-2] is to trim the quotes
        # ruby puts on the value.
        data = data.inspect[1..-2]
      else
        # The user has declared the character encoding of this data is
        # something other than UTF-8. Let's convert it (as cleanly as possible)
        # into UTF-8 so we can use it with JSON, etc.
        data = data.encode("UTF-8", :invalid => :replace, :undef => :replace)
      end
    end
    return data
  end # def convert
end # class LogStash::Util::Charset

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
logstash-lib-1.3.2 lib/logstash/util/charset.rb