Sha256: ae2909e4e25d1a7f201797b275939557f1021bf19dc7f8f0b1ddd590e7c8f231

Contents?: true

Size: 1.64 KB

Versions: 6

Compression:

Stored size: 1.64 KB

Contents

# frozen_string_literal: true

require 'nokogiri'
require 'aranha/parsers/base'
require 'aranha/parsers/html/node/default'

module Aranha
  module Parsers
    module Html
      class Base < ::Aranha::Parsers::Base
        class << self
          def fields
            @fields ||= []
            @fields.dup
          end

          def field(name, type, xpath)
            @fields ||= []
            @fields << Field.new(name, type, xpath)
          end

          # @param node [Nokogiri::XML::Node]
          # @return [Aranha::Parsers::Html::Base]
          def from_node(node)
            from_string(node.to_html)
          end

          # @param node [String]
          # @param klass [String]
          # @return [String]
          def xpath_contains_class(klass, node = '@class')
            "contains(concat(' ', normalize-space(#{node}), ' '), ' #{klass} ')"
          end

          # @param haystack [String]
          # @param needle [String]
          # @return [String]
          def xpath_ends_with(haystack, needle)
            "substring(#{haystack}, string-length(#{haystack}) - string-length(#{needle}) + 1) " \
              "= #{needle}"
          end

          Field = Struct.new(:name, :type, :xpath)
        end

        def nokogiri
          @nokogiri ||= Nokogiri::HTML(content, &:noblanks)
        end

        protected

        def node_parser_class
          ::Aranha::Parsers::Html::Node::Default
        end

        private

        def node_parser
          @node_parser ||= node_parser_class.new(fields)
        end

        def fields
          self.class.fields.map { |f| [f.name, f.type, f.xpath] }
        end
      end
    end
  end
end

Version data entries

6 entries across 6 versions & 1 rubygems

Version Path
aranha-parsers-0.25.0 lib/aranha/parsers/html/base.rb
aranha-parsers-0.24.0 lib/aranha/parsers/html/base.rb
aranha-parsers-0.23.1 lib/aranha/parsers/html/base.rb
aranha-parsers-0.23.0 lib/aranha/parsers/html/base.rb
aranha-parsers-0.22.0 lib/aranha/parsers/html/base.rb
aranha-parsers-0.21.0 lib/aranha/parsers/html/base.rb