Sha256: 34b205057b14a656ddd08a76c81418945236c792eecb46b725319d1868f4dc4f

Contents?: true

Size: 1.49 KB

Versions: 7

Compression:

Stored size: 1.49 KB

Contents

require "yaml"
require "httparty"
require "nokogiri"
require "debug"

module Zettacode
  class Scrap
    attr_reader :settings

    def initialize(filepath)
      @configpath = filepath
    end

    def load_settings
      puts "==> Loading settings (#{@configpath})"
      @settings = YAML.load(File.read(@configpath))
      # Show settings
      @settings.each_with_index do |problem, index|
        number = "%02d" % (index + 1)
        puts "    #{number} #{problem[:name]}: #{problem[:url]}"
      end
    end

    def find_langs
      # Scrap page
      @settings.each do |problem|
        # 1. Get page
        response = HTTParty.get(problem[:url])
        unless response.code == 200
          puts "==> httparty: [Error] #{response.code}"
          exit 1
        end

        # 2. Find every LANG (a href ="#LANG")
        langs = []
        document = Nokogiri::HTML(response.body)
        document.css("h2 > span").each do |e|
          id = e.attribute("id")
          langs << id unless id.nil?
        end
        puts "==> Problem: #{problem[:name]} (langs=#{langs.size})"
        puts langs.join("\n")

        langs = []
        elems = document.css("a")
        elems.each do |e|
          href = e.attribute("href")
          filter = /\/wiki\/Category:([\w\d.-_]+)/
          items = filter.match(href)&.captures
          langs << items.first unless items.nil?
        end
        puts "==> Problem: #{problem[:name]} (langs=#{langs.size})"
        puts langs.join("\n")
      end
    end
  end
end

Version data entries

7 entries across 7 versions & 1 rubygems

Version Path
zettacode-0.1.6 lib/zettacode/scrap/scrap.rb
zettacode-0.1.5 lib/zettacode/scrap/scrap.rb
zettacode-0.1.4 lib/zettacode/scrap/scrap.rb
zettacode-0.1.3 lib/zettacode/scrap/scrap.rb
zettacode-0.1.2 lib/zettacode/scrap/scrap.rb
zettacode-0.1.1 lib/zettacode/scrap/scrap.rb
zettacode-0.1.0 lib/zettacode/scrap/scrap.rb