Sha256: ebf1a34fca45324adfba024f7e134ec816631b0aef07b8fb193db8d8532adb85
Contents?: true
Size: 1.35 KB
Versions: 2
Compression:
Stored size: 1.35 KB
Contents
require 'nokogiri' require 'open-uri' module Google module Scholar class Scraper attr_accessor :documents def initialize(url,initial_document=nil) @documents = [] @documents << initial_document if initial_document @documents << self.class.load_url(url) if url self end def self.class_lookup(url="") arguments = url.split("?") arguments = arguments[1].split("&") if arguments.length > 1 if(arguments.include?("view_op=search_authors")) return Google::Scholar::AuthorsDocument end if(arguments.any?{|x| x.include?("user=")}) return Google::Scholar::AuthorsProfileDocument end return Google::Scholar::Document end def valid? @documents.each do |document| return false unless document.valid? end return true end def load_next_page return unless self.has_more_pages? @documents << self.class.load_url(@documents.last.next_page_url) end def self.load_url(url) uri = URI(url) raise "Invalid scheme for #{url}" if uri.scheme.nil? || !%w{http https}.any?{|scheme| uri.scheme == scheme} return class_lookup(url).new(Nokogiri::HTML(open(url))) end def has_more_pages? @documents.last.has_next_page? end end end end
Version data entries
2 entries across 2 versions & 1 rubygems
Version | Path |
---|---|
google-scholar-0.0.2 | lib/google/scholar/scraper.rb |
google-scholar-0.0.1 | lib/google/scholar/scraper.rb |