Sha256: ebf1a34fca45324adfba024f7e134ec816631b0aef07b8fb193db8d8532adb85

Contents?: true

Size: 1.35 KB

Versions: 2

Compression:

Stored size: 1.35 KB

Contents

require 'nokogiri'
require 'open-uri'
module Google
  module Scholar
    class Scraper
      attr_accessor :documents
      def initialize(url,initial_document=nil)
        @documents = []
        @documents << initial_document if initial_document
        @documents << self.class.load_url(url) if url
        self
      end
      def self.class_lookup(url="")
        arguments = url.split("?")
        arguments = arguments[1].split("&") if arguments.length > 1
        if(arguments.include?("view_op=search_authors"))
          return Google::Scholar::AuthorsDocument
        end
        if(arguments.any?{|x| x.include?("user=")})
          return Google::Scholar::AuthorsProfileDocument
        end
        return Google::Scholar::Document
      end
      def valid?
        @documents.each do |document|
          return false unless document.valid?
        end
        return true
      end
      def load_next_page
        return unless self.has_more_pages?
        @documents << self.class.load_url(@documents.last.next_page_url)
      end
      def self.load_url(url)
        uri = URI(url)
        raise "Invalid scheme for #{url}" if uri.scheme.nil? || !%w{http https}.any?{|scheme| uri.scheme == scheme}
        return class_lookup(url).new(Nokogiri::HTML(open(url)))
      end
      def has_more_pages?
        @documents.last.has_next_page?
      end
    end
  end
end

Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
google-scholar-0.0.2 lib/google/scholar/scraper.rb
google-scholar-0.0.1 lib/google/scholar/scraper.rb