Sha256: 03cd050bc5ff083962ece752234f016fc56cffc1ca24c852da85822f3e122e9b

Contents?: true

Size: 1.02 KB

Versions: 1

Compression:

Stored size: 1.02 KB

Contents

# frozen_string_literal: true

require_relative "simple_wiki_converter/version"
require 'open-uri'
require 'nokogiri'

module SimpleWikiConverter
  class Error < StandardError; end
  
  class WikiScraper  # Renamed class for clarity
    def initialize(url)
      @url = url
    end

    def scrape
      doc = fetch_document
      remove_unwanted_elements(doc)
      extract_content(doc)
    end

    private

    def fetch_document
      Nokogiri::HTML(URI.open(@url).read)
    end

    def remove_unwanted_elements(doc)
      doc.css('.reference, .mw-headline#See_also, .mw-headline#References, .mw-headline#External_links').each(&:remove)
    end

    def extract_content(doc)
      content_array = doc.css('.mw-headline, p').map(&:content)
      content_array.join("\n\n")
    end
  end
end

def main
  if ARGV.empty?
    puts "Usage: simple_wiki_converter URL"
    exit
  end

  url = ARGV[0]
  scraper = SimpleWikiConverter::WikiScraper.new(url)  # Updated to match the class name
  puts scraper.scrape
end

main  # Invoking the main method

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
simple_wiki_converter-0.1.0 lib/simple_wiki_converter.rb