Sha256: 8765233acbe3a99a2dd10ca589faa1ff4f7a3f6351f0595019afa843442cdb89

Contents?: true

Size: 1.46 KB

Versions: 8

Compression:

Stored size: 1.46 KB

Contents

module Traject
  module Macros
    module NokogiriMacros

      def default_namespaces
        @default_namespaces ||= (settings["nokogiri.namespaces"] || {}).tap { |ns|
          unless ns.kind_of?(Hash)
            raise ArgumentError, "nokogiri.namespaces must be a hash, not: #{ns.inspect}"
          end
        }
      end

      def extract_xpath(xpath, ns: {}, to_text: true)
        if ns && ns.length > 0
          namespaces = default_namespaces.merge(ns)
        else
          namespaces = default_namespaces
        end

        lambda do |record, accumulator|
          result = record.xpath(xpath, namespaces)

          if to_text
            # take all matches, for each match take all
            # text content, join it together separated with spaces
            # Make sure to avoid text content that was all blank, which is "between the children"
            # whitespace.
            result = result.collect do |n|
              if n.kind_of?(Nokogiri::XML::Attr)
                # attribute value
                n.value
              else
                # text from node
                n.xpath('.//text()').collect(&:text).tap do |arr|
                  arr.reject! { |s| s =~ (/\A\s+\z/) }
                end.join(" ")
              end
            end
          else
            # just put all matches in accumulator as Nokogiri::XML::Node's
            result = result.to_a
          end

          accumulator.concat result
        end
      end
    end
  end
end

Version data entries

8 entries across 8 versions & 1 rubygems

Version Path
traject-3.8.3 lib/traject/macros/nokogiri_macros.rb
traject-3.8.2 lib/traject/macros/nokogiri_macros.rb
traject-3.8.1 lib/traject/macros/nokogiri_macros.rb
traject-3.8.0 lib/traject/macros/nokogiri_macros.rb
traject-3.7.0 lib/traject/macros/nokogiri_macros.rb
traject-3.6.0 lib/traject/macros/nokogiri_macros.rb
traject-3.5.0 lib/traject/macros/nokogiri_macros.rb
traject-3.4.0 lib/traject/macros/nokogiri_macros.rb