Sha256: 0a3b2fb07fef92db778a22e3916d597ad54b7e16141c927c79bf14fd976c3e36

Contents?: true

Size: 1.12 KB

Versions: 1

Compression:

Stored size: 1.12 KB

Contents

require 'parallel'

require "maxwell/converter"
require "maxwell/helper"

require "maxwell/overide_ohter_class"

module Maxwell
  class NoParserDefinedErr; end

  class Base
    class << self
      def execute(urls)
        Parallel.
          map_with_index(urls, in_threads: @concurrency || 1) do |url, id|
            p "scraping: #{ id + 1 }"

            scraper = self.new
            html = Maxwell::Converter.call(url, @use_poltergeist)

            scraper.parser html

            scraper.handler ({ url: url }).merge(scraper.result)
          end
      end

      def attr_accessor(*attrs)
        @attrs ||= []
        @attrs.concat attrs
        super
      end

      def attrs
        @attrs || self.superclass.instance_eval("@attrs")
      end

      def javascript(value)
        @use_poltergeist = value
      end

      def concurrency(value)
        @concurrency = value
      end
    end

    def parser html
      raise NoParserDefinedErr "You need to define #{self}#parser"
    end

    def handler result
      p result
    end

    def result
      self.class.attrs.map { |k| [k, self.send(k)]  }.to_h
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
maxwell-0.4.3 lib/maxwell.rb