lib/maxwell.rb in maxwell-0.3.0 vs lib/maxwell.rb in maxwell-0.4.0

- old
+ new

@@ -1,86 +1,62 @@ +require 'parallel' + require "maxwell/converter" +require "maxwell/helper" module Maxwell + class NoParserDefinedErr; end + class Base class << self - def attr_scrape(*attr_scrapes) - @acquirer_class = Class.new do - attr_accessor *attr_scrapes - @attr_scrapes = attr_scrapes + def execute(urls) + Parallel. + map_with_index(urls, in_threads: @concurrency || 1) do |url, id| + p "scraping: #{ id + 1 }" - def self.attr_scrapes - @attr_scrapes - end + scraper = self.new + html = Maxwell::Converter.call(url, @use_poltergeist) - def initialize(nokogiri_obj) - @html = nokogiri_obj - end + scraper.parser html - def result - self.class.attr_scrapes.map { |k| [k, send(k)] }.to_h + scraper.handler ({ id: id + 1 }).merge(scraper.result) end - end end - def regist_strategy(link_selectore=nil, &strategy_blk) - @link_selectore = link_selectore - @strategy_blk = strategy_blk + def attr_accessor(*attrs) + @attrs ||= [] + @attrs.concat attrs + super end - def regist_handler(&handler_blk) - @handler_blk = handler_blk + def attrs + @attrs end - def use_poltergeist(value) + def javascript(value) @use_poltergeist = value end - end - def execute(root_url) - if self.link_selectore - html = Maxwell::Converter.call(root_url, use_poltergeist) - html.css(self.link_selectore).each do |a| - execute_for_result a[:href] - end - else - execute_for_result root_url + def concurrency(value) + @concurrency = value end end - def use_poltergeist - self.class.instance_eval("@use_poltergeist") + def parser html + raise NoParserDefinedErr "You need to define #{self}#parser" end - def link_selectore - self.class.instance_eval("@link_selectore") + def handler result + p result end - def strategy_blk - self.class.instance_eval("@strategy_blk") + def result + self.class.attrs.map { |k| [k, self.send(k)] }.to_h end - - def handler_blk - self.class.instance_eval("@handler_blk") - end - - def acquirer_class - self.class.instance_eval("@acquirer_class") - end - - private - def execute_for_result(tip_url) - acquirer = acquirer_class.new(Maxwell::Converter.call(tip_url, use_poltergeist)) - acquirer.instance_eval &self.strategy_blk - - acquirer.result.tap do |result| - self.handler_blk.call(result) if self.handler_blk - end - end end end class ::String def trim - delete("\r\n\t") + delete "\r\n\t" end end