lib/maxwell.rb in maxwell-0.3.0 vs lib/maxwell.rb in maxwell-0.4.0
- old
+ new
@@ -1,86 +1,62 @@
+require 'parallel'
+
require "maxwell/converter"
+require "maxwell/helper"
module Maxwell
+ class NoParserDefinedErr; end
+
class Base
class << self
- def attr_scrape(*attr_scrapes)
- @acquirer_class = Class.new do
- attr_accessor *attr_scrapes
- @attr_scrapes = attr_scrapes
+ def execute(urls)
+ Parallel.
+ map_with_index(urls, in_threads: @concurrency || 1) do |url, id|
+ p "scraping: #{ id + 1 }"
- def self.attr_scrapes
- @attr_scrapes
- end
+ scraper = self.new
+ html = Maxwell::Converter.call(url, @use_poltergeist)
- def initialize(nokogiri_obj)
- @html = nokogiri_obj
- end
+ scraper.parser html
- def result
- self.class.attr_scrapes.map { |k| [k, send(k)] }.to_h
+ scraper.handler ({ id: id + 1 }).merge(scraper.result)
end
- end
end
- def regist_strategy(link_selectore=nil, &strategy_blk)
- @link_selectore = link_selectore
- @strategy_blk = strategy_blk
+ def attr_accessor(*attrs)
+ @attrs ||= []
+ @attrs.concat attrs
+ super
end
- def regist_handler(&handler_blk)
- @handler_blk = handler_blk
+ def attrs
+ @attrs
end
- def use_poltergeist(value)
+ def javascript(value)
@use_poltergeist = value
end
- end
- def execute(root_url)
- if self.link_selectore
- html = Maxwell::Converter.call(root_url, use_poltergeist)
- html.css(self.link_selectore).each do |a|
- execute_for_result a[:href]
- end
- else
- execute_for_result root_url
+ def concurrency(value)
+ @concurrency = value
end
end
- def use_poltergeist
- self.class.instance_eval("@use_poltergeist")
+ def parser html
+ raise NoParserDefinedErr "You need to define #{self}#parser"
end
- def link_selectore
- self.class.instance_eval("@link_selectore")
+ def handler result
+ p result
end
- def strategy_blk
- self.class.instance_eval("@strategy_blk")
+ def result
+ self.class.attrs.map { |k| [k, self.send(k)] }.to_h
end
-
- def handler_blk
- self.class.instance_eval("@handler_blk")
- end
-
- def acquirer_class
- self.class.instance_eval("@acquirer_class")
- end
-
- private
- def execute_for_result(tip_url)
- acquirer = acquirer_class.new(Maxwell::Converter.call(tip_url, use_poltergeist))
- acquirer.instance_eval &self.strategy_blk
-
- acquirer.result.tap do |result|
- self.handler_blk.call(result) if self.handler_blk
- end
- end
end
end
class ::String
def trim
- delete("\r\n\t")
+ delete "\r\n\t"
end
end