# encoding: utf-8 require "logstash/filters/base" require "logstash/namespace" require "logstash/timestamp" # The date filter is used for parsing dates from fields, and then using that # date or timestamp as the logstash timestamp for the event. # # For example, syslog events usually have timestamps like this: # [source,ruby] # "Apr 17 09:32:01" # # You would use the date format `MMM dd HH:mm:ss` to parse this. # # The date filter is especially important for sorting events and for # backfilling old data. If you don't get the date correct in your # event, then searching for them later will likely sort out of order. # # In the absence of this filter, logstash will choose a timestamp based on the # first time it sees the event (at input time), if the timestamp is not already # set in the event. For example, with file input, the timestamp is set to the # time of each read. class LogStash::Filters::Date < LogStash::Filters::Base if RUBY_ENGINE == "jruby" JavaException = java.lang.Exception UTC = org.joda.time.DateTimeZone.forID("UTC") java_import org.joda.time.LocalDateTime class LocalDateTime java_alias :to_datetime_with_tz, :toDateTime, [Java::org.joda.time.DateTimeZone] end end config_name "date" # Specify a time zone canonical ID to be used for date parsing. # The valid IDs are listed on the http://joda-time.sourceforge.net/timezones.html[Joda.org available time zones page]. # This is useful in case the time zone cannot be extracted from the value, # and is not the platform default. # If this is not specified the platform default will be used. # Canonical ID is good as it takes care of daylight saving time for you # For example, `America/Los_Angeles` or `Europe/Paris` are valid IDs. # This field can be dynamic and include parts of the event using the `%{field}` syntax config :timezone, :validate => :string # Specify a locale to be used for date parsing using either IETF-BCP47 or POSIX language tag. # Simple examples are `en`,`en-US` for BCP47 or `en_US` for POSIX. # # The locale is mostly necessary to be set for parsing month names (pattern with `MMM`) and # weekday names (pattern with `EEE`). # # If not specified, the platform default will be used but for non-english platform default # an english parser will also be used as a fallback mechanism. config :locale, :validate => :string # An array with field name first, and format patterns following, `[ field, # formats... ]` # # If your time field has multiple possible formats, you can do this: # [source,ruby] # match => [ "logdate", "MMM dd YYY HH:mm:ss", # "MMM d YYY HH:mm:ss", "ISO8601" ] # # The above will match a syslog (rfc3164) or `iso8601` timestamp. # # There are a few special exceptions. The following format literals exist # to help you save time and ensure correctness of date parsing. # # * `ISO8601` - should parse any valid ISO8601 timestamp, such as # `2011-04-19T03:44:01.103Z` # * `UNIX` - will parse *float or int* value expressing unix time in seconds since epoch like 1326149001.132 as well as 1326149001 # * `UNIX_MS` - will parse **int** value expressing unix time in milliseconds since epoch like 1366125117000 # * `TAI64N` - will parse tai64n time values # # For example, if you have a field `logdate`, with a value that looks like # `Aug 13 2010 00:03:44`, you would use this configuration: # [source,ruby] # filter { # date { # match => [ "logdate", "MMM dd YYYY HH:mm:ss" ] # } # } # # If your field is nested in your structure, you can use the nested # syntax `[foo][bar]` to match its value. For more information, please refer to # <> # # *More details on the syntax* # # The syntax used for parsing date and time text uses letters to indicate the # kind of time value (month, minute, etc), and a repetition of letters to # indicate the form of that value (2-digit month, full month name, etc). # # Here's what you can use to parse dates and times: # # [horizontal] # y:: year # yyyy::: full year number. Example: `2015`. # yy::: two-digit year. Example: `15` for the year 2015. # # M:: month of the year # M::: minimal-digit month. Example: `1` for January and `12` for December. # MM::: two-digit month. zero-padded if needed. Example: `01` for January and `12` for December # MMM::: abbreviated month text. Example: `Jan` for January. Note: The language used depends on your locale. See the `locale` setting for how to change the language. # MMMM::: full month text, Example: `January`. Note: The language used depends on your locale. # # d:: day of the month # d::: minimal-digit day. Example: `1` for the 1st of the month. # dd::: two-digit day, zero-padded if needed. Example: `01` for the 1st of the month. # # H:: hour of the day (24-hour clock) # H::: minimal-digit hour. Example: `0` for midnight. # HH::: two-digit hour, zero-padded if needed. Example: `00` for midnight. # # m:: minutes of the hour (60 minutes per hour) # m::: minimal-digit minutes. Example: `0`. # mm::: two-digit minutes, zero-padded if needed. Example: `00`. # # s:: seconds of the minute (60 seconds per minute) # s::: minimal-digit seconds. Example: `0`. # ss::: two-digit seconds, zero-padded if needed. Example: `00`. # # S:: fraction of a second # *Maximum precision is milliseconds (`SSS`). Beyond that, zeroes are appended.* # S::: tenths of a second. Example: `0` for a subsecond value `012` # SS::: hundredths of a second. Example: `01` for a subsecond value `01` # SSS::: thousandths of a second. Example: `012` for a subsecond value `012` # # Z:: time zone offset or identity # Z::: Timezone offset structured as HHmm (hour and minutes offset from Zulu/UTC). Example: `-0700`. # ZZ::: Timezone offset structured as HH:mm (colon in between hour and minute offsets). Example: `-07:00`. # ZZZ::: Timezone identity. Example: `America/Los_Angeles`. Note: Valid IDs are listed on the http://joda-time.sourceforge.net/timezones.html[Joda.org available time zones page]. # # z:: time zone names. *Time zone names ('z') cannot be parsed.* # # w:: week of the year # w::: minimal-digit week. Example: `1`. # ww::: two-digit week, zero-padded if needed. Example: `01`. # # D:: day of the year # # e:: day of the week (number) # # E:: day of the week (text) # E, EE, EEE::: Abbreviated day of the week. Example: `Mon`, `Tue`, `Wed`, `Thu`, `Fri`, `Sat`, `Sun`. Note: The actual language of this will depend on your locale. # EEEE::: The full text day of the week. Example: `Monday`, `Tuesday`, ... Note: The actual language of this will depend on your locale. # # For non-formatting syntax, you'll need to put single-quote characters around the value. For example, if you were parsing ISO8601 time, "2015-01-01T01:12:23" that little "T" isn't a valid time format, and you want to say "literally, a T", your format would be this: "yyyy-MM-dd'T'HH:mm:ss" # # Other less common date units, such as era (G), century \(C), am/pm (a), and # more, can be learned about on the # http://www.joda.org/joda-time/key_format.html[joda-time documentation]. config :match, :validate => :array, :default => [] # Store the matching timestamp into the given target field. If not provided, # default to updating the `@timestamp` field of the event. config :target, :validate => :string, :default => LogStash::Event::TIMESTAMP # Append values to the `tags` field when there has been no # successful match config :tag_on_failure, :validate => :array, :default => ["_dateparsefailure"] # LOGSTASH-34 DATEPATTERNS = %w{ y d H m s S } def initialize(config = {}) super @parsers = Hash.new { |h,k| h[k] = [] } end # def initialize def register require "java" if @match.length < 2 raise LogStash::ConfigurationError, I18n.t("logstash.agent.configuration.invalid_plugin_register", :plugin => "filter", :type => "date", :error => "The match setting should contains first a field name and at least one date format, current value is #{@match}") end locale = nil if @locale if @locale.include? '_' @logger.warn("Date filter now use BCP47 format for locale, replacing underscore with dash") @locale.gsub!('_','-') end locale = java.util.Locale.forLanguageTag(@locale) end @sprintf_timezone = @timezone && !@timezone.index("%{").nil? setupMatcher(@config["match"].shift, locale, @config["match"] ) end def parseWithJodaParser(joda_parser, date, format_has_year, format_has_timezone) return joda_parser.parseMillis(date) if format_has_year now = Time.now now_month = now.month if (format_has_timezone) result = joda_parser.parseDateTime(date) else # Parse date in UTC, Timezone correction later result = joda_parser.withZone(UTC).parseLocalDateTime(date) end event_month = result.getMonthOfYear if (event_month == now_month) result = result.with_year(now.year) elsif (event_month == 12 && now_month == 1) result = result.with_year(now.year-1) elsif (event_month == 1 && now_month == 12) result = result.with_year(now.year+1) else result = result.with_year(now.year) end if (format_has_timezone) return result.get_millis else #Timezone correction return result.to_datetime_with_tz(joda_parser.getZone()).get_millis end end def setupMatcher(field, locale, value) metric.gauge(:formats, value.length) value.each do |format| parsers = [] case format when "ISO8601" iso_parser = org.joda.time.format.ISODateTimeFormat.dateTimeParser if @timezone && !@sprintf_timezone iso_parser = iso_parser.withZone(org.joda.time.DateTimeZone.forID(@timezone)) else iso_parser = iso_parser.withOffsetParsed end parsers << lambda { |date| iso_parser.parseMillis(date) } #Fall back solution of almost ISO8601 date-time almostISOparsers = [ org.joda.time.format.DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSSZ").getParser(), org.joda.time.format.DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS").getParser(), org.joda.time.format.DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss,SSSZ").getParser(), org.joda.time.format.DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss,SSS").getParser() ].to_java(org.joda.time.format.DateTimeParser) joda_parser = org.joda.time.format.DateTimeFormatterBuilder.new.append( nil, almostISOparsers ).toFormatter() if @timezone && !@sprintf_timezone joda_parser = joda_parser.withZone(org.joda.time.DateTimeZone.forID(@timezone)) else joda_parser = joda_parser.withOffsetParsed end parsers << lambda { |date| joda_parser.parseMillis(date) } when "UNIX" # unix epoch parsers << lambda do |date| raise "Invalid UNIX epoch value '#{date}'" unless /^\d+(?:\.\d+)?$/ === date || date.is_a?(Numeric) (date.to_f * 1000).to_i end when "UNIX_MS" # unix epoch in ms parsers << lambda do |date| raise "Invalid UNIX epoch value '#{date}'" unless /^\d+$/ === date || date.is_a?(Numeric) date.to_i end when "TAI64N" # TAI64 with nanoseconds, -10000 accounts for leap seconds parsers << lambda do |date| # Skip leading "@" if it is present (common in tai64n times) date = date[1..-1] if date[0, 1] == "@" return (date[1..15].hex * 1000 - 10000)+(date[16..23].hex/1000000) end else begin format_has_year = format.match(/y|Y/) format_has_timezone = format.match(/Z/) joda_parser = org.joda.time.format.DateTimeFormat.forPattern(format) if @timezone && !@sprintf_timezone joda_parser = joda_parser.withZone(org.joda.time.DateTimeZone.forID(@timezone)) else joda_parser = joda_parser.withOffsetParsed end if locale joda_parser = joda_parser.withLocale(locale) end if @sprintf_timezone parsers << lambda { |date , tz| return parseWithJodaParser(joda_parser.withZone(org.joda.time.DateTimeZone.forID(tz)), date, format_has_year, format_has_timezone) } end parsers << lambda do |date| return parseWithJodaParser(joda_parser, date, format_has_year, format_has_timezone) end #Include a fallback parser to english when default locale is non-english if !locale && "en" != java.util.Locale.getDefault().getLanguage() && (format.include?("MMM") || format.include?("E")) en_joda_parser = joda_parser.withLocale(java.util.Locale.forLanguageTag('en-US')) parsers << lambda { |date| parseWithJodaParser(en_joda_parser, date, format_has_year, format_has_timezone) } end rescue JavaException => e raise LogStash::ConfigurationError, I18n.t("logstash.agent.configuration.invalid_plugin_register", :plugin => "filter", :type => "date", :error => "#{e.message} for pattern '#{format}'") end end @logger.debug("Adding type with date config", :type => @type, :field => field, :format => format) @parsers[field] << { :parser => parsers, :format => format } end end def filter(event) @logger.debug? && @logger.debug("Date filter: received event", :type => event.get("type")) @parsers.each do |field, fieldparsers| @logger.debug? && @logger.debug("Date filter looking for field", :type => event.get("type"), :field => field) next unless event.include?(field) fieldvalues = event.get(field) fieldvalues = [fieldvalues] if !fieldvalues.is_a?(Array) fieldvalues.each do |value| next if value.nil? begin epochmillis = nil success = false last_exception = RuntimeError.new "Unknown" fieldparsers.each do |parserconfig| parserconfig[:parser].each do |parser| begin if @sprintf_timezone epochmillis = parser.call(value, event.sprintf(@timezone)) else epochmillis = parser.call(value) end success = true break # success rescue StandardError, JavaException => e last_exception = e end end # parserconfig[:parser].each break if success end # fieldparsers.each raise last_exception unless success # Convert joda DateTime to a ruby Time event.set(@target, LogStash::Timestamp.at(epochmillis / 1000, (epochmillis % 1000) * 1000)) @logger.debug? && @logger.debug("Date parsing done", :value => value, :timestamp => event.get(@target)) metric.increment(:matches) filter_matched(event) rescue StandardError, JavaException => e @logger.warn("Failed parsing date from field", :field => field, :value => value, :exception => e.message, :config_parsers => fieldparsers.collect {|x| x[:format]}.join(','), :config_locale => @locale ? @locale : "default="+java.util.Locale.getDefault().toString() ) # Tag this event if we can't parse it. We can use this later to # reparse+reindex logs if we improve the patterns given. metric.increment(:failures) @tag_on_failure.each do |tag| event.tag(tag) end end end end return event end end