Sha256: 40d8b96b87de3400967110ac4cd4a18faff084e348825abb4c473691459106d4

Contents?: true

Size: 1.28 KB

Versions: 7

Compression:

Stored size: 1.28 KB

Contents

module Traject
  module Macros
    module NokogiriMacros

      def default_namespaces
        @default_namespaces ||= (settings["nokogiri.namespaces"] || {}).tap { |ns|
          unless ns.kind_of?(Hash)
            raise ArgumentError, "nokogiri.namespaces must be a hash, not: #{ns.inspect}"
          end
        }
      end

      def extract_xpath(xpath, ns: {}, to_text: true)
        if ns && ns.length > 0
          namespaces = default_namespaces.merge(ns)
        else
          namespaces = default_namespaces
        end

        lambda do |record, accumulator|
          result = record.xpath(xpath, namespaces)

          if to_text
            # take all matches, for each match take all
            # text content, join it together separated with spaces
            # Make sure to avoid text content that was all blank, which is "between the children"
            # whitespace.
            result = result.collect do |n|
              n.xpath('.//text()').collect(&:text).tap do |arr|
                arr.reject! { |s| s =~ (/\A\s+\z/) }
              end.join(" ")
            end
          else
            # just put all matches in accumulator as Nokogiri::XML::Node's
            result = result.to_a
          end

          accumulator.concat result
        end
      end
    end
  end
end

Version data entries

7 entries across 7 versions & 1 rubygems

Version Path
traject-3.3.0 lib/traject/macros/nokogiri_macros.rb
traject-3.2.0 lib/traject/macros/nokogiri_macros.rb
traject-3.1.0 lib/traject/macros/nokogiri_macros.rb
traject-3.1.0.rc1 lib/traject/macros/nokogiri_macros.rb
traject-3.0.0 lib/traject/macros/nokogiri_macros.rb
traject-3.0.0.alpha.2 lib/traject/macros/nokogiri_macros.rb
traject-3.0.0.alpha.1 lib/traject/macros/nokogiri_macros.rb