Sha256: 3454fd606acc9c83f4534a4ad9aab7ca4bf3cd89310894e23770b8cb33c46e23

Contents?: true

Size: 1.3 KB

Versions: 4

Compression:

Stored size: 1.3 KB

Contents

# frozen_string_literal: true

require 'scrapework'

# The archive web page
class Archive < Scrapework::Object
  ROOT = 'http://www.ma3comic.com/strips-ma3/'

  has_many :pages

  def self.load(*)
    super(ROOT + 'archive/')
  end

  map :pages do |html|
    html.css('select[name=comic] option').drop(1).map.with_index do |page, i|
      { url: ROOT + page['value'], number: i + 1 }
    end
  end
end

# The page web page
class Page < Scrapework::Object
  attribute :src
  attribute :number, type: Integer

  map :src do |html|
    img(html)['src']
  end

  map :number do |html|
    img(html)['title'].slice(/\d+/).to_i
  end

  def filename
    "#{number.to_s.rjust(3, '0')}.png"
  end

  def img(html)
    html.css('img#cc-comic').first
  end
end

require 'open-uri'

archive = Archive.load

Dir.mkdir('ma3') unless Dir.exist?('ma3')
Dir.chdir('ma3') do
  archive.pages.each_slice(20) do |pages|
    threads = []
    pages.each do |page|
      next if File.exist?(page.filename)

      threads << Thread.new(page) do |this_page|
        begin
          this_page.load
        rescue StandardError => e
          puts "error (#{this_page.url}): #{e.message}"
          retry
        end

        uri = URI.parse(this_page.src)
        File.write(this_page.filename, uri.read)
      end
    end
    threads.each(&:join)
  end
end

Version data entries

4 entries across 4 versions & 1 rubygems

Version Path
scrapework-0.1.3 examples/ma3.rb
scrapework-0.1.2 examples/ma3.rb
scrapework-0.1.1 examples/ma3.rb
scrapework-0.1.0 examples/ma3.rb