Sha256: 0c51e76d015a7ed504525ba32373bccf38ea4a91041d4a31537941b5e55479c6

Contents?: true

Size: 1.4 KB

Versions: 11

Compression:

Stored size: 1.4 KB

Contents

# frozen_string_literal: true

require 'nokogiri'
require 'aranha/parsers/base'
require 'aranha/parsers/html/node/default'

module Aranha
  module Parsers
    module Html
      class Base < ::Aranha::Parsers::Base
        class << self
          def fields
            @fields ||= []
            @fields.dup
          end

          def field(name, type, xpath)
            @fields ||= []
            @fields << Field.new(name, type, xpath)
          end

          # @param node [Nokogiri::XML::Node]
          # @return [Aranha::Parsers::Html::Base]
          def from_node(node)
            from_string(node.to_html)
          end

          # @param haystack [String]
          # @param needle [String]
          # @return [String]
          def xpath_ends_with(haystack, needle)
            "substring(#{haystack}, string-length(#{haystack}) - string-length(#{needle}) + 1) " \
              "= #{needle}"
          end

          Field = Struct.new(:name, :type, :xpath)
        end

        def nokogiri
          @nokogiri ||= Nokogiri::HTML(content, &:noblanks)
        end

        protected

        def node_parser_class
          ::Aranha::Parsers::Html::Node::Default
        end

        private

        def node_parser
          @node_parser ||= node_parser_class.new(fields)
        end

        def fields
          self.class.fields.map { |f| [f.name, f.type, f.xpath] }
        end
      end
    end
  end
end

Version data entries

11 entries across 11 versions & 2 rubygems

Version Path
aranha-parsers-0.20.0 lib/aranha/parsers/html/base.rb
eac_tools-0.69.1 sub/aranha-parsers/lib/aranha/parsers/html/base.rb
aranha-parsers-0.19.1 lib/aranha/parsers/html/base.rb
eac_tools-0.69.0 sub/aranha-parsers/lib/aranha/parsers/html/base.rb
eac_tools-0.68.0 sub/aranha-parsers/lib/aranha/parsers/html/base.rb
aranha-parsers-0.19.0 lib/aranha/parsers/html/base.rb
eac_tools-0.67.1 sub/aranha-parsers/lib/aranha/parsers/html/base.rb
eac_tools-0.67.0 sub/aranha-parsers/lib/aranha/parsers/html/base.rb
eac_tools-0.66.0 sub/aranha-parsers/lib/aranha/parsers/html/base.rb
eac_tools-0.65.1 sub/aranha-parsers/lib/aranha/parsers/html/base.rb
aranha-parsers-0.18.0 lib/aranha/parsers/html/base.rb