Sha256: 9df394dbd19e839db965927fc1ba32ce0ab7530139408ce5e2e34adaff0cbf40
Contents?: true
Size: 1.22 KB
Versions: 1
Compression:
Stored size: 1.22 KB
Contents
require 'nokogiri' require 'open-uri' module Wiki::Yggdrasil class Article attr_reader :uri def initialize(uri:) raise ArgumentError unless Wiki::Yggdrasil::Article.is_valid_wiki_article?(uri: uri) @uri = uri @summary = nil @child_links = nil end def summary @summary ||= Nokogiri::HTML(Nokogiri::HTML(open(self.uri)).to_s.split('<div id="toc" class="toc">')[0]).css('p') ## TODO: Cleanup end def child_links formatted_links = format_links validated_links = formatted_links.select { |uri| Wiki::Yggdrasil::Article.is_valid_wiki_article?(uri: uri) } @child_links ||= validated_links end def scrape_all_summary_links self.summary.css('p a') end def format_links(anchors: self.scrape_all_summary_links) uris = anchors.map do |anchor| anchor.nil? || anchor['href'].nil? ? next : 'https://en.wikipedia.org' << anchor['href'] ## nil href attributes are often self refs (but possibly not always). Ignore them. end uris.compact end def self.is_valid_wiki_article?(uri:) ## Is this URI a wikipedia article? uri =~ /.*wikipedia\.org\/wiki\/.+/ ? true : false end end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
wiki-yggdrasil-0.1.0 | lib/wiki/article.rb |