# encoding: utf-8
require "logstash/filters/base"
require "logstash/namespace"
require "logstash/timestamp"
require "logstash-filter-date_jars"

# The date filter is used for parsing dates from fields, and then using that
# date or timestamp as the logstash timestamp for the event.
#
# For example, syslog events usually have timestamps like this:
# [source,ruby]
#     "Apr 17 09:32:01"
#
# You would use the date format `MMM dd HH:mm:ss` to parse this.
#
# The date filter is especially important for sorting events and for
# backfilling old data. If you don't get the date correct in your
# event, then searching for them later will likely sort out of order.
#
# In the absence of this filter, logstash will choose a timestamp based on the
# first time it sees the event (at input time), if the timestamp is not already
# set in the event. For example, with file input, the timestamp is set to the
# time of each read.
class LogStash::Filters::Date < LogStash::Filters::Base
  config_name "date"

  # Specify a time zone canonical ID to be used for date parsing.
  # The valid IDs are listed on the http://joda-time.sourceforge.net/timezones.html[Joda.org available time zones page].
  # This is useful in case the time zone cannot be extracted from the value,
  # and is not the platform default.
  # If this is not specified the platform default will be used.
  # Canonical ID is good as it takes care of daylight saving time for you
  # For example, `America/Los_Angeles` or `Europe/Paris` are valid IDs.
  # This field can be dynamic and include parts of the event using the `%{field}` syntax
  config :timezone, :validate => :string

  # Specify a locale to be used for date parsing using either IETF-BCP47 or POSIX language tag.
  # Simple examples are `en`,`en-US` for BCP47 or `en_US` for POSIX.
  #
  # The locale is mostly necessary to be set for parsing month names (pattern with `MMM`) and
  # weekday names (pattern with `EEE`).
  #
  # If not specified, the platform default will be used but for non-english platform default
  # an english parser will also be used as a fallback mechanism.
  config :locale, :validate => :string

  # An array with field name first, and format patterns following, `[ field,
  # formats... ]`
  #
  # If your time field has multiple possible formats, you can do this:
  # [source,ruby]
  #     match => [ "logdate", "MMM dd yyyy HH:mm:ss",
  #               "MMM  d yyyy HH:mm:ss", "ISO8601" ]
  #
  # The above will match a syslog (rfc3164) or `iso8601` timestamp.
  #
  # There are a few special exceptions. The following format literals exist
  # to help you save time and ensure correctness of date parsing.
  #
  # * `ISO8601` - should parse any valid ISO8601 timestamp, such as
  #   `2011-04-19T03:44:01.103Z`
  # * `UNIX` - will parse *float or int* value expressing unix time in seconds since epoch like 1326149001.132 as well as 1326149001
  # * `UNIX_MS` - will parse **int** value expressing unix time in milliseconds since epoch like 1366125117000
  # * `TAI64N` - will parse tai64n time values
  #
  # For example, if you have a field `logdate`, with a value that looks like
  # `Aug 13 2010 00:03:44`, you would use this configuration:
  # [source,ruby]
  #     filter {
  #       date {
  #         match => [ "logdate", "MMM dd yyyy HH:mm:ss" ]
  #       }
  #     }
  #
  # If your field is nested in your structure, you can use the nested
  # syntax `[foo][bar]` to match its value. For more information, please refer to
  # <<logstash-config-field-references>>
  #
  # *More details on the syntax*
  #
  # The syntax used for parsing date and time text uses letters to indicate the
  # kind of time value (month, minute, etc), and a repetition of letters to
  # indicate the form of that value (2-digit month, full month name, etc).
  #
  # Here's what you can use to parse dates and times:
  #
  # [horizontal]
  # y:: year
  #   yyyy::: full year number. Example: `2015`.
  #   yy::: two-digit year. Example: `15` for the year 2015.
  #
  # M:: month of the year
  #   M::: minimal-digit month. Example: `1` for January and `12` for December.
  #   MM::: two-digit month. zero-padded if needed. Example: `01` for January  and `12` for December
  #   MMM::: abbreviated month text. Example: `Jan` for January. Note: The language used depends on your locale. See the `locale` setting for how to change the language.
  #   MMMM::: full month text, Example: `January`. Note: The language used depends on your locale.
  #
  # d:: day of the month
  #   d::: minimal-digit day. Example: `1` for the 1st of the month.
  #   dd::: two-digit day, zero-padded if needed. Example: `01` for the 1st of the month.
  #
  # H:: hour of the day (24-hour clock)
  #   H::: minimal-digit hour. Example: `0` for midnight.
  #   HH::: two-digit hour, zero-padded if needed. Example: `00` for midnight.
  #
  # m:: minutes of the hour (60 minutes per hour)
  #   m::: minimal-digit minutes. Example: `0`.
  #   mm::: two-digit minutes, zero-padded if needed. Example: `00`.
  #
  # s:: seconds of the minute (60 seconds per minute)
  #   s::: minimal-digit seconds. Example: `0`.
  #   ss::: two-digit seconds, zero-padded if needed. Example: `00`.
  #
  # S:: fraction of a second
  #   *Maximum precision is milliseconds (`SSS`). Beyond that, zeroes are appended.*
  #   S::: tenths of a second. Example:  `0` for a subsecond value `012`
  #   SS::: hundredths of a second. Example:  `01` for a subsecond value `01`
  #   SSS::: thousandths of a second. Example:  `012` for a subsecond value `012`
  #
  # Z:: time zone offset or identity
  #   Z::: Timezone offset structured as HHmm (hour and minutes offset from Zulu/UTC). Example: `-0700`.
  #   ZZ::: Timezone offset structured as HH:mm (colon in between hour and minute offsets). Example: `-07:00`.
  #   ZZZ::: Timezone identity. Example: `America/Los_Angeles`. Note: Valid IDs are listed on the http://joda-time.sourceforge.net/timezones.html[Joda.org available time zones page].
  #
  # z:: time zone names. *Time zone names ('z') cannot be parsed.*
  #
  # w:: week of the year
  #   w::: minimal-digit week. Example: `1`.
  #   ww::: two-digit week, zero-padded if needed. Example: `01`.
  #
  # D:: day of the year
  #
  # e:: day of the week (number)
  #
  # E:: day of the week (text)
  #   E, EE, EEE::: Abbreviated day of the week. Example:  `Mon`, `Tue`, `Wed`, `Thu`, `Fri`, `Sat`, `Sun`. Note: The actual language of this will depend on your locale.
  #   EEEE::: The full text day of the week. Example: `Monday`, `Tuesday`, ... Note: The actual language of this will depend on your locale.
  #
  # For non-formatting syntax, you'll need to put single-quote characters around the value. For example, if you were parsing ISO8601 time, "2015-01-01T01:12:23" that little "T" isn't a valid time format, and you want to say "literally, a T", your format would be this: "yyyy-MM-dd'T'HH:mm:ss"
  #
  # Other less common date units, such as era (G), century \(C), am/pm (a), and # more, can be learned about on the
  # http://www.joda.org/joda-time/key_format.html[joda-time documentation].
  config :match, :validate => :array, :default => []

  # Store the matching timestamp into the given target field.  If not provided,
  # default to updating the `@timestamp` field of the event.
  config :target, :validate => :string, :default => LogStash::Event::TIMESTAMP

  # Append values to the `tags` field when there has been no
  # successful match
  config :tag_on_failure, :validate => :array, :default => ["_dateparsefailure"]

  def register
    # nothing
  end

  def initialize(config = {})
    super
    if @match.length < 2
      raise LogStash::ConfigurationError, I18n.t("logstash.agent.configuration.invalid_plugin_register",
        :plugin => "filter", :type => "date",
        :error => "The match setting should contains first a field name and at least one date format, current value is #{@match}")
    end
    if @locale
      if @locale.include? '_'
        @logger.warn("Date filter now use BCP47 format for locale, replacing underscore with dash")
        @locale.gsub!('_','-')
      end
      locale = java.util.Locale.forLanguageTag(@locale)
    end

    source = @match.first

    @datefilter = org.logstash.filters.DateFilter.new(source, @target, @tag_on_failure) do |event|
      filter_matched(event)
    end

    @match[1..-1].map do |format|
      @datefilter.accept_filter_config(format, @locale, @timezone)

      # Offer a fallback parser such that if the default system Locale is non-english and that no locale is set,
      # we should try to parse english if the first local parsing fails.:w
      if !@locale && "en" != java.util.Locale.getDefault().getLanguage() && (format.include?("MMM") || format.include?("E"))
        @datefilter.accept_filter_config(format, "en-US", @timezone)
      end
    end

  end # def initialize

  def multi_filter(events)
    @datefilter.receive(events)
  end

  def filter(event)
    multi_filter([event]).first
  end
end