lib/arx.rb in arx-0.3.0 vs lib/arx.rb in arx-0.3.1
- old
+ new
@@ -1,23 +1,49 @@
# frozen_string_literal: true
+require 'cgi'
require 'nokogiri'
require 'open-uri'
+require 'happymapper'
require 'arx/version'
+require 'arx/cleaner'
+require 'arx/inspector'
require 'arx/categories'
-require 'arx/query/query'
+require 'arx/error'
require 'arx/query/validate'
+require 'arx/query/query'
require 'arx/entities/author'
require 'arx/entities/category'
+require 'arx/entities/link'
require 'arx/entities/paper'
# A Ruby interface for querying academic papers on the arXiv search API.
module Arx
# The arXiv search API endpoint.
ENDPOINT = 'http://export.arxiv.org/api/query?'
+ # The current arxiv paper identifier scheme (1 April 2007 and onwards).
+ # The last block of digits can either be five digits (if the paper was published after 1501 - January 2015),
+ # or four digits (if the paper was published before 1501).
+ #
+ # @see https://arxiv.org/help/arxiv_identifier#new arXiv identifier (new)
+ # @example
+ # 1501.00001
+ # 1705.01662v1
+ # 1412.0135
+ # 0706.0001v2
+ NEW_IDENTIFIER_FORMAT = %r"^\d{4}\.\d{4,5}(v\d+)?$"
+
+ # The legacy arXiv paper identifier scheme (before 1 April 2007).
+ #
+ # @see https://arxiv.org/help/arxiv_identifier#old arXiv identifier (old)
+ # @example
+ # math/0309136v1
+ # cond-mat/0211034
+ OLD_IDENTIFIER_FORMAT = %r"^[a-z]+(\-[a-z]+)?\/\d{7}(v\d+)?$"
+
class << self
# Performs a search query for papers on the arXiv search API.
#
# @note The +sort_by+ and +sort_order+ arguments are ignored if passing in your own +query+.
@@ -35,10 +61,10 @@
end
document = Nokogiri::XML(open ENDPOINT + query.to_s + '&max_results=10000').remove_namespaces!
results = Paper.parse(document, single: false).reject {|paper| paper.id.empty?}
- raise MissingPaper.new(ids.first) if results.empty? && ids.size == 1
+ raise Error::MissingPaper.new(ids.first) if results.empty? && ids.size == 1
ids.size == 1 && results.size == 1 ? results.first : results
end
alias_method :find, :search
alias_method :get, :search
\ No newline at end of file