# encoding: utf-8
require "logstash/devutils/rspec/spec_helper"
require "insist"
require "logstash/filters/xml"
describe LogStash::Filters::Xml do
describe "parse standard xml (Deprecated checks)" do
config <<-CONFIG
filter {
xml {
source => "raw"
target => "data"
}
}
CONFIG
sample("raw" => '') do
insist { subject.get("tags") }.nil?
insist { subject.get("data")} == {"key" => "value"}
end
#From parse xml with array as a value
sample("raw" => 'value1value2') do
insist { subject.get("tags") }.nil?
insist { subject.get("data")} == {"key" => ["value1", "value2"]}
end
#From parse xml with hash as a value
sample("raw" => 'value') do
insist { subject.get("tags") }.nil?
insist { subject.get("data")} == {"key1" => [{"key2" => ["value"]}]}
end
# parse xml in single item array
sample("raw" => [""]) do
insist { subject.get("tags") }.nil?
insist { subject.get("data")} == {"bar" => "baz"}
end
# fail in multi items array
sample("raw" => ["", "jojoba"]) do
insist { subject.get("tags") }.include?("_xmlparsefailure")
insist { subject.get("data")} == nil
end
# fail in empty array
sample("raw" => []) do
insist { subject.get("tags") }.include?("_xmlparsefailure")
insist { subject.get("data")} == nil
end
# fail for non string field
sample("raw" => {"foo" => "bar"}) do
insist { subject.get("tags") }.include?("_xmlparsefailure")
insist { subject.get("data")} == nil
end
# fail for non string single item array
sample("raw" => [{"foo" => "bar"}]) do
insist { subject.get("tags") }.include?("_xmlparsefailure")
insist { subject.get("data")} == nil
end
#From bad xml
sample("raw" => ' "raw"
target => "data"
store_xml => false
}
}
CONFIG
sample("raw" => '') do
insist { subject.get("tags") }.nil?
insist { subject.get("data")} == nil
end
end
describe "parse xml and store values with xpath (Deprecated checks)" do
config <<-CONFIG
filter {
xml {
source => "raw"
target => "data"
xpath => [ "/foo/key/text()", "xpath_field" ]
}
}
CONFIG
# Single value
sample("raw" => 'value') do
insist { subject.get("tags") }.nil?
insist { subject.get("xpath_field")} == ["value"]
end
#Multiple values
sample("raw" => 'value1value2') do
insist { subject.get("tags") }.nil?
insist { subject.get("xpath_field")} == ["value1","value2"]
end
end
## New tests
describe "parse standard xml" do
config <<-CONFIG
filter {
xml {
source => "xmldata"
target => "data"
}
}
CONFIG
sample("xmldata" => '') do
insist { subject.get("tags") }.nil?
insist { subject.get("data") } == {"key" => "value"}
end
#From parse xml with array as a value
sample("xmldata" => 'value1value2') do
insist { subject.get("tags") }.nil?
insist { subject.get("data") } == {"key" => ["value1", "value2"]}
end
#From parse xml with hash as a value
sample("xmldata" => 'value') do
insist { subject.get("tags") }.nil?
insist { subject.get("data") } == {"key1" => [{"key2" => ["value"]}]}
end
#From bad xml
sample("xmldata" => ' "xmldata"
target => "data"
store_xml => false
}
}
CONFIG
sample("xmldata" => '') do
insist { subject.get("tags") }.nil?
insist { subject.get("data")} == nil
end
end
describe "parse xml and store values with xpath" do
config <<-CONFIG
filter {
xml {
source => "xmldata"
target => "data"
xpath => [ "/foo/key/text()", "xpath_field" ]
}
}
CONFIG
# Single value
sample("xmldata" => 'value') do
insist { subject.get("tags") }.nil?
insist { subject.get("xpath_field") } == ["value"]
end
#Multiple values
sample("xmldata" => 'value1value2') do
insist { subject.get("tags") }.nil?
insist { subject.get("xpath_field") } == ["value1","value2"]
end
end
describe "parse correctly non ascii content with xpath" do
config <<-CONFIG
filter {
xml {
source => "xmldata"
target => "data"
xpath => [ "/foo/key/text()", "xpath_field" ]
}
}
CONFIG
# Single value
sample("xmldata" => 'Français') do
insist { subject.get("tags") }.nil?
insist { subject.get("xpath_field")} == ["Français"]
end
end
describe "parse including namespaces" do
config <<-CONFIG
filter {
xml {
source => "xmldata"
xpath => [ "/foo/h:div", "xpath_field" ]
remove_namespaces => false
store_xml => false
}
}
CONFIG
# Single value
sample("xmldata" => 'Content') do
insist { subject.get("xpath_field") } == ["Content"]
end
end
describe "parse including namespaces declarations on root" do
config <<-CONFIG
filter {
xml {
source => "xmldata"
xpath => [ "/foo/h:div", "xpath_field" ]
namespaces => {"h" => "http://www.w3.org/TR/html4/"}
remove_namespaces => false
store_xml => false
}
}
CONFIG
# Single value
sample("xmldata" => 'Content') do
insist { subject.get("xpath_field") } == ["Content"]
end
end
describe "parse including namespaces declarations on child" do
config <<-CONFIG
filter {
xml {
source => "xmldata"
xpath => [ "/foo/h:div", "xpath_field" ]
namespaces => {"h" => "http://www.w3.org/TR/html4/"}
remove_namespaces => false
store_xml => false
}
}
CONFIG
# Single value
sample("xmldata" => 'Content') do
insist { subject.get("xpath_field") } == ["Content"]
end
end
describe "parse removing namespaces" do
config <<-CONFIG
filter {
xml {
source => "xmldata"
xpath => [ "/foo/div", "xpath_field" ]
remove_namespaces => true
store_xml => false
}
}
CONFIG
# Single value
sample("xmldata" => 'Content') do
insist { subject.get("xpath_field") } == ["Content
"]
end
end
describe "parse with forcing array (Default)" do
config <<-CONFIG
filter {
xml {
source => "xmldata"
target => "parseddata"
}
}
CONFIG
# Single value
sample("xmldata" => 'Content') do
insist { subject.get("parseddata") } == { "bar" => ["Content"] }
end
end
describe "parse disabling forcing array" do
config <<-CONFIG
filter {
xml {
source => "xmldata"
target => "parseddata"
force_array => false
}
}
CONFIG
# Single value
sample("xmldata" => 'Content') do
insist { subject.get("parseddata") } == { "bar" => "Content" }
end
end
describe "parse disabling forcing with nested elements" do
config <<-CONFIG
filter {
xml {
source => "xmldata"
store_xml => "false"
force_array => "false"
xpath => [
"/element/field1/text()", "field1"
]
}
}
CONFIG
# Single value
sample("xmldata" => 'bbb789e3f') do
insist { subject.get("field1") } == "bbb"
end
end
context "Using suppress_empty option" do
describe "suppress_empty => false" do
config <<-CONFIG
filter {
xml {
source => "xmldata"
target => "data"
suppress_empty => false
}
}
CONFIG
sample("xmldata" => 'value1') do
insist { subject.get("tags") }.nil?
insist { subject.get("data") } == {"key" => ["value1", {}]}
end
end
describe "suppress_empty => true" do
config <<-CONFIG
filter {
xml {
source => "xmldata"
target => "data"
suppress_empty => true
}
}
CONFIG
sample("xmldata" => 'value1') do
insist { subject.get("tags") }.nil?
insist { subject.get("data") } == {"key" => ["value1"]}
end
end
end
context "Using force content option" do
describe "force_content => false" do
config <<-CONFIG
filter {
xml {
source => "xmldata"
target => "data"
force_array => false
force_content => false
}
}
CONFIG
sample("xmldata" => 'text1text2') do
insist { subject.get("tags") }.nil?
insist { subject.get("data") } == { 'x' => 'text1', 'y' => { 'a' => '2', 'content' => 'text2' } }
end
end
describe "force_content => true" do
config <<-CONFIG
filter {
xml {
source => "xmldata"
target => "data"
force_array => false
force_content => true
}
}
CONFIG
sample("xmldata" => 'text1text2') do
insist { subject.get("tags") }.nil?
insist { subject.get("data") } == { 'x' => { 'content' => 'text1' }, 'y' => { 'a' => '2', 'content' => 'text2' } }
end
end
describe "does not set empty array event on failed xpath" do
config <<-CONFIG
filter {
xml {
source => "xmldata"
target => "data"
xpath => [ "//foo/text()","xpath_field" ]
}
}
CONFIG
sample("raw" => '') do
insist { subject.get("tags") }.nil?
insist { subject.get("xpath_field")}.nil?
end
end
end
describe "parsing invalid xml" do
subject { described_class.new(options) }
let(:options) { ({ 'source' => 'xmldata', 'store_xml' => false }) }
let(:xmldata) { " " }
let(:event) { LogStash::Event.new(data) }
let(:data) { { "xmldata" => xmldata } }
before { subject.register }
after { subject.close }
it 'does not fail (by default)' do
subject.filter(event)
expect( event.get("tags") ).to be nil
end
context 'strict option' do
let(:options) { super.merge({ 'parse_options' => 'strict' }) }
it 'does fail parsing' do
subject.filter(event)
expect( event.get("tags") ).to_not be nil
expect( event.get("tags") ).to include '_xmlparsefailure'
end
end
end
describe 'when an exception is thrown in XML#filter' do
let(:logger_stub) { double('Logger').as_null_object }
before(:each) do
allow_any_instance_of(described_class).to receive(:logger).and_return(logger_stub)
end
subject(:xml_filter_plugin) { described_class.new(options).tap(&:register) }
let(:options) { ({ 'source' => 'xmldata', 'store_xml' => true, 'target' => 'decoded' }) }
let(:xmldata) { '' }
let(:event) { LogStash::Event.new(data) }
let(:data) { { "xmldata" => xmldata } }
after { xml_filter_plugin.close }
# In order to test how we handle and propagate exceptions, we inject an
# intentional failure when `filter_matched` is called, and parse an XML
# that would not otherwise fail.
before(:each) do
expect(xml_filter_plugin).to receive(:filter_matched) { |_| fail('intentional') }
end
it 'does not propagate the exception' do
expect{ xml_filter_plugin.filter(event) }.to_not raise_error
end
it 'emits the event with an error tag' do
xml_filter_plugin.filter(event)
expect(event.get("tags")).to_not be nil
expect(event.get('tags')).to include '_xmlparsefailure'
end
it 'logs a helpful message' do
xml_filter_plugin.filter(event)
expect(logger_stub).to have_received(:warn) do |message, metadata|
expect(message).to include('XML Parse Error')
expect(metadata).to include(:value)
expect(metadata).to include(:source)
end
end
end
describe "parse_options" do
subject { described_class.new(options) }
let(:options) { ({ 'source' => 'xmldata', 'store_xml' => false, 'parse_options' => parse_options }) }
context 'strict (supported option)' do
let(:parse_options) { 'strict' }
it 'registers filter' do
subject.register
expect( subject.send(:xml_parse_options) ).
to eql Nokogiri::XML::ParseOptions::STRICT
end
end
context 'valid' do
let(:parse_options) { 'no_error,NOWARNING' }
it 'registers filter' do
subject.register
expect( subject.send(:xml_parse_options) ).
to eql Nokogiri::XML::ParseOptions::NOERROR | Nokogiri::XML::ParseOptions::NOWARNING
end
end
context 'invalid' do
let(:parse_options) { 'strict,invalid0' }
it 'fails to register' do
expect { subject.register }.to raise_error(LogStash::ConfigurationError, 'unsupported parse option: "invalid0"')
end
end
end
end