lib/fluent/plugin/parser_xml.rb in fluent-plugin-xml-parser-0.0.8 vs lib/fluent/plugin/parser_xml.rb in fluent-plugin-xml-parser-1.0.0
- old
+ new
@@ -1,127 +1,124 @@
-require 'fluent/parser'
+require 'fluent/plugin/parser'
require 'rexml/document'
-module Fluent
- class TextParser
- class XmlParser < Parser
- # Register this parser as "xml"
- Plugin.register_parser("xml", self)
- # How to specify the target attributes and values
- # The followings are an example description for Libelium SmartCity sensor data.
- #
- # time_xpath '["cap:alert/cap:info/cap:onset", "text"]'
- # attr_xpaths '[[null, "description"], ["cap:alert/cap:info/cap:parameter/cap:valueName", "text"]]'
- # value_xpaths '[["cap:alert/cap:info/cap:description", "text"], ["cap:alert/cap:info/cap:parameter/cap:value", "text"]]'
- #
- # attr_xpaths indicates attribute name of the target value. Each array with two strings
- # means xpath of the attribute name and the attribute of the XML element (name, text etc).
- # XPath can be omitted as 'null' and specify your own attribute name as the second
- # parameter.
- #
- # value_xpaths indicates the target value to be extracted. Each array with two strings
- # means xpath of the target value and the attribute of the XML element (name, text etc).
- # XPath can be omitted as 'null' and specify your own value as the second parameter.
- #
- # You can check your own XML data structure by using irb or pry
- #
- # require 'rexml/document'
- # doc = REXML::Document.new(open("test.xml"))
- # doc.elements['cap:alert/cap:info'].children
- #
- config_param :time_xpath, :string, :default => nil
- config_param :time_key, :string, :default => nil
- config_param :time_format, :string, :default => nil # time_format is configurable
- config_param :attr_xpaths, :string, :default => '[]'
- config_param :value_xpaths, :string, :default => '[]'
- # This method is called after config_params have read configuration parameters
- def configure(conf)
- super
+module Fluent::Plugin
+ class XmlParser < Parser
+ # Register this parser as "xml"
+ Fluent::Plugin.register_parser("xml", self)
- if conf['time_xpath'].nil?
- @time_xpath = nil
- else
- @time_xpath = json_parse(conf['time_xpath'])
- end
- @time_key = conf['time_key']
- @time_format = conf['time_format']
- @time_parser = TimeParser.new(@time_format)
- @attr_xpaths = json_parse(conf['attr_xpaths'])
- @value_xpaths = json_parse(conf['value_xpaths'])
- # TimeParser class is already given. It takes a single argument as the time format
- # to parse the time string with.
+ # How to specify the target attributes and values
+ # The followings are an example description for Libelium SmartCity sensor data.
+ #
+ # time_xpath '["cap:alert/cap:info/cap:onset", "text"]'
+ # attr_xpaths '[[null, "description"], ["cap:alert/cap:info/cap:parameter/cap:valueName", "text"]]'
+ # value_xpaths '[["cap:alert/cap:info/cap:description", "text"], ["cap:alert/cap:info/cap:parameter/cap:value", "text"]]'
+ #
+ # attr_xpaths indicates attribute name of the target value. Each array with two strings
+ # means xpath of the attribute name and the attribute of the XML element (name, text etc).
+ # XPath can be omitted as 'null' and specify your own attribute name as the second
+ # parameter.
+ #
+ # value_xpaths indicates the target value to be extracted. Each array with two strings
+ # means xpath of the target value and the attribute of the XML element (name, text etc).
+ # XPath can be omitted as 'null' and specify your own value as the second parameter.
+ #
+ # You can check your own XML data structure by using irb or pry
+ #
+ # require 'rexml/document'
+ # doc = REXML::Document.new(open("test.xml"))
+ # doc.elements['cap:alert/cap:info'].children
+ #
+ config_param :time_xpath, :string, :default => nil
+ config_param :time_key, :string, :default => nil
+ config_param :time_format, :string, :default => nil # time_format is configurable
+ config_param :attr_xpaths, :string, :default => '[]'
+ config_param :value_xpaths, :string, :default => '[]'
+ # This method is called after config_params have read configuration parameters
+ def configure(conf)
+ super
+
+ if conf['time_xpath'].nil?
+ @time_xpath = nil
+ else
+ @time_xpath = json_parse(@time_xpath)
end
+ @time_parser = Fluent::TimeParser.new(@time_format)
+ @attr_xpaths = json_parse(@attr_xpaths)
+ @value_xpaths = json_parse(@value_xpaths)
+ # TimeParser class is already given. It takes a single argument as the time format
+ # to parse the time string with.
+ end
- # This is the main method. The input "text" is the unit of data to be parsed.
- # If this is the in_tail plugin, it would be a line. If this is for in_syslog,
- # it is a single syslog message.
- def parse(text)
- begin
- doc = REXML::Document.new(text)
- $log.debug doc
- # parse time field
- if @time_xpath.nil?
- @time = Fluent::Engine.now
- else
- @time = @time_parser.parse(doc.elements[@time_xpath[0]].method(@time_xpath[1]).call)
- end
- record = {}
- if !@time_key.nil?
- record = {@time_key => format_time(@time)}
- end
- attrs = @attr_xpaths.map do |attr_xpath|
- if attr_xpath[0].nil? # when null is specified
- attr_xpath[1] # second parameter is used as the attribute name
- else # otherwise, the target attribute name is extracted from XML
- el = doc.elements[attr_xpath[0]]
- unless el.nil? and attr_xpath.size > 2
- el.method(attr_xpath[1]).call
- else # unless it's not in the XML and we have a third parameter
- attr_xpath[2] # then the third parameter is used as the target value
- end
+ # This is the main method. The input "text" is the unit of data to be parsed.
+ # If this is the in_tail plugin, it would be a line. If this is for in_syslog,
+ # it is a single syslog message.
+ def parse(text)
+ begin
+ doc = REXML::Document.new(text)
+ $log.debug doc
+ # parse time field
+ if @time_xpath.nil?
+ time = Fluent::Engine.now
+ else
+ time = @time_parser.parse(doc.elements[@time_xpath[0]].method(@time_xpath[1]).call)
+ end
+ record = {}
+ if !@time_key.nil?
+ record = {@time_key => format_time(@time)}
+ end
+ attrs = @attr_xpaths.map do |attr_xpath|
+ if attr_xpath[0].nil? # when null is specified
+ attr_xpath[1] # second parameter is used as the attribute name
+ else # otherwise, the target attribute name is extracted from XML
+ el = doc.elements[attr_xpath[0]]
+ unless el.nil? and attr_xpath.size > 2
+ el.method(attr_xpath[1]).call
+ else # unless it's not in the XML and we have a third parameter
+ attr_xpath[2] # then the third parameter is used as the target value
end
end
- values = @value_xpaths.map do |value_xpath|
- if value_xpath[0].nil? # when null is specified
- value_xpath[1] # second parameter is used as the target value
- else # otherwise, the target value is extracted from XML
- el = doc.elements[value_xpath[0]]
- unless el.nil? and value_xpath.size > 2
- el.method(value_xpath[1]).call
- else # unless it's not in the XML and we have a third parameter
- value_xpath[2] # then the third parameter is used as the target value
- end
+ end
+ values = @value_xpaths.map do |value_xpath|
+ if value_xpath[0].nil? # when null is specified
+ value_xpath[1] # second parameter is used as the target value
+ else # otherwise, the target value is extracted from XML
+ el = doc.elements[value_xpath[0]]
+ unless el.nil? and value_xpath.size > 2
+ el.method(value_xpath[1]).call
+ else # unless it's not in the XML and we have a third parameter
+ value_xpath[2] # then the third parameter is used as the target value
end
end
- attrs.size.times do |i|
- record[attrs[i]] = values[i]
- end
- yield @time, record
- rescue REXML::ParseException => e
- $log.warn "Parse error", :error => e.to_s
- $log.debug_backtrace(e.backtrace)
- rescue Exception => e
- $log.warn "error", :error => e.to_s
- $log.debug_backtrace(e.backtrace)
end
+ attrs.size.times do |i|
+ record[attrs[i]] = values[i]
+ end
+ yield time, record
+ rescue REXML::ParseException => e
+ $log.warn "Parse error", :error => e.to_s
+ $log.debug_backtrace(e.backtrace)
+ rescue Exception => e
+ $log.warn "error", :error => e.to_s
+ $log.debug_backtrace(e.backtrace)
end
+ end
- def format_time(time)
- if @time_format.nil?
- Time.at(time).iso8601
- else
- Time.at(time).strftime(@time_format)
- end
+ def format_time(time)
+ if @time_format.nil?
+ Time.at(time).iso8601
+ else
+ Time.at(time).strftime(@time_format)
end
+ end
- def json_parse message
- begin
- y = Yajl::Parser.new
- y.parse(message)
- rescue
- $log.error "JSON parse error", :error => $!.to_s, :error_class => $!.class.to_s
- $log.warn_backtrace $!.backtrace
- end
+ def json_parse message
+ begin
+ y = Yajl::Parser.new
+ y.parse(message)
+ rescue
+ $log.error "JSON parse error", :error => $!.to_s, :error_class => $!.class.to_s
+ $log.warn_backtrace $!.backtrace
end
end
end
end