require 'test_helper' describe "Traject::NokogiriIndexer" do before do Traject::Indexer.send(:default_settings=, Traject::Indexer.default_settings.merge("solr_writer.thread_pool" => 0, "processing_thread_pool" => 0)) @xml_sample_path = support_file_path("sample-oai-pmh.xml") @indexer = Traject::Indexer::NokogiriIndexer.new("writer_class_name" => "Traject::ArrayWriter", "solr_writer.thread_pool" => 0, "processing_thread_pool" => 0) @namespaces = { "oai" => "http://www.openarchives.org/OAI/2.0/", "dc" => "http://purl.org/dc/elements/1.1/", "oai_dc" => "http://www.openarchives.org/OAI/2.0/oai_dc/", "edm" => "http://www.europeana.eu/schemas/edm/" } end it "smoke test" do namespaces = @namespaces @indexer.configure do settings do provide "nokogiri.namespaces", namespaces provide "nokogiri.each_record_xpath", "//oai:record" end to_field "id", extract_xpath("//oai:metadata/oai_dc:dc/dc:identifier"), first_only to_field "title", extract_xpath("//oai:metadata/oai_dc:dc/dc:title") end @indexer.process(File.open(@xml_sample_path)) results = @indexer.writer.values source_doc = Nokogiri::XML.parse(File.open(@xml_sample_path)) assert_equal source_doc.xpath("//oai:record", @namespaces).count, results.count assert(results.all? { |hash| hash["id"] && hash["id"].length == 1 && hash["title"] && hash["title"].length >= 1 }, "expected results have expected values") end it "namespaces to extract_xpath" do namespaces = @namespaces.merge(edm: "http://this.is.wrong") @indexer.configure do settings do provide "nokogiri.namespaces", namespaces provide "nokogiri.each_record_xpath", "//oai:record" end to_field "rights", extract_xpath("//oai:metadata/oai_dc:dc/edm:rights", ns: { edm: "http://www.europeana.eu/schemas/edm/" }) end @indexer.process(File.open(@xml_sample_path)) results = @indexer.writer.values refute_empty results.last["rights"] end it "exposes nokogiri.namespaces setting in default_namespaces" do namespaces = @namespaces @indexer.configure do settings do provide "nokogiri.namespaces", namespaces end end @indexer.settings.fill_in_defaults! assert_equal namespaces, @indexer.default_namespaces end describe "xpath to non-terminal element" do before do @xml = <<-EOS José Lopez Sue Jones EOS @indexer.configure do settings do provide "nokogiri.each_record_xpath", "//record" end end end it "outputs text" do @indexer.configure { to_field "name", extract_xpath("/record/name") } @indexer.process(StringIO.new(@xml)) results = @indexer.writer.values assert_equal( {"name" => ["José Lopez", "Sue Jones"]}, results.first ) end it "outputs Nokogiri::XML::Element with to_text: false" do @indexer.configure { to_field "name", extract_xpath("/record/name", to_text: false) } @indexer.process(StringIO.new(@xml)) results = @indexer.writer.values values = results.first["name"] assert(values.each { |result| result["name"].kind_of?(Nokogiri::XML::Element) && result["name"].name == "name" }) end end describe "xpath to attribute" do let(:indexer) do namespaces = @namespaces Traject::Indexer::NokogiriIndexer.new("nokogiri.namespaces" => namespaces, "nokogiri.each_record_xpath" => "//oai:record") do to_field "status", extract_xpath("//oai:record/oai:header/@status") end end let(:records) { Traject::NokogiriReader.new(StringIO.new( <<-XML 2020-03-03T04:16:09Z https://na02.alma.exlibrisgroup.com/view/oai/01TULI_INST/request
oai:alma.01TULI_INST:991025803889703811 2020-03-03T03:54:35Z blacklight rapid_print_journals blacklight_qa
XML ), []).to_a } it "extracts the correct attribute" do statuses = indexer.map_record(records.first)["status"] assert_equal ["deleted"], statuses end end end