Sha256: e60ea10245a03dc7368330b75013bc67a9716d3f1858509fcc08c063f1f7b408

Contents?: true

Size: 1 KB

Versions: 1

Compression:

Stored size: 1 KB

Contents

# frozen_string_literal: true

require_relative "simple_wiki_converter/version"
require 'open-uri'
require 'nokogiri'

module SimpleWikiConverter
  class Error < StandardError; end
  
  class WikiScraper  # Renamed class for clarity
    def initialize(url)
      @url = url
    end

    def scrape
      doc = fetch_document
      remove_unwanted_elements(doc)
      extract_content(doc)
    end

    private

    def fetch_document
      Nokogiri::HTML(URI.open(@url).read)
    end

    def remove_unwanted_elements(doc)
      doc.css('.reference, .mw-headline#See_also, .mw-headline#References, .mw-headline#External_links').each(&:remove)
    end

    def extract_content(doc)
      content_array = doc.css('.mw-headline, p').map(&:content)
      content_array.join("\n\n")
    end
  end
end


if __FILE__ == $0
  def main
    if ARGV.empty?
      puts "Usage: simple_wiki_converter URL"
      exit
    end

    url = ARGV[0]
    scraper = SimpleWikiConverter::WikiScraper.new(url)
    puts scraper.scrape
  end

  main
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
simple_wiki_converter-0.1.1 lib/simple_wiki_converter.rb