Sha256: 4332ea52c1a90316c2f7e1c81f9cd5d5862536d8a8e3de3fbdd1b447d7489c7d
Contents?: true
Size: 1.29 KB
Versions: 2
Compression:
Stored size: 1.29 KB
Contents
require_relative 'yasuri_node' module Yasuri module Node attr_reader :url, :xpath, :name, :children def initialize(xpath, name, children = [], **_opt) @xpath, @name, @children = xpath, name, children end def scrape(uri, opt = {}) agent = Mechanize.new scrape_with_agent(uri, agent, opt) end def scrape_with_agent(uri, agent, opt = {}) retry_count = opt[:retry_count] || Yasuri::DefaultRetryCount interval_ms = opt[:interval_ms] || Yasuri::DefaultInterval_ms page = Yasuri.with_retry(retry_count, interval_ms) { agent.get(uri) } inject(agent, page, opt) end def inject(agent, page, opt = {}, element = page) fail "#{Kernel.__method__} is not implemented in included class." end def to_h return @xpath if @xpath and @children.empty? and self.opts.values.compact.empty? node_hash = {} self.opts.each{|k, v| node_hash[k] = v if not v.nil?} node_hash[:path] = @xpath if @xpath children.each do |child| child_node_name = "#{child.node_type_str}_#{child.name}" node_hash[child_node_name] = child.to_h end node_hash end def opts {} end def node_type_str fail "#{Kernel.__method__} is not implemented in included class." end end end
Version data entries
2 entries across 2 versions & 1 rubygems
Version | Path |
---|---|
yasuri-3.3.2 | lib/yasuri/yasuri_node.rb |
yasuri-3.3.1 | lib/yasuri/yasuri_node.rb |