Sha256: 244c12e5eace5dfb7006223308e4187d838e709728b4d97c765c511a4b0d4870

Contents?: true

Size: 1 KB

Versions: 1

Compression:

Stored size: 1 KB

Contents

require 'open-uri'
require 'nokogiri'
require 'json'

class Arxiv
  attr_reader :title, :authors, :abstruct, :pdfurl
  attr_accessor :references
  BASE_URL = 'https://arxiv.org'
  def initialize(id)
    url = "#{BASE_URL}/abs/#{id}" if id.index('http').nil?
    charset = nil
    html = open(url) do |f|
      charset = f.charset
      f.read
    end
    @page = Nokogiri::HTML.parse(html, nil, charset)
    @title = fetch_title
    @authors = fetch_authors
    @abstruct = fetch_abstruct
    @pdfurl = fetch_pdfurl
    @references = nil
  end

  def fetch_title
    @page.xpath('//*[@id="abs"]/div[2]/h1').children.select{|i| i.name=='text'}.shift.text.gsub(/\n/,'')
  end

  def fetch_authors
    @page.xpath('//*[@id="abs"]/div[2]/div[2]/a').map(&:text)
  end

  def fetch_abstruct
    @page.xpath('//*[@id="abs"]/div[2]/blockquote').children.select{|i| i.name = 'text'}.reverse.shift.text
  end

  def fetch_pdfurl
    "#{BASE_URL}#{@page.xpath('//*[@id="abs"]/div[1]/div[1]/ul/li[1]/a').attr('href').value}"
  end
end


Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
arxiv-references-0.1.7.0 lib/arxiv/references/Arxiv.rb