Sha256: be09259e5f478d6b829b819be09ae000df5ba33b93dacfd207d06ac3ccb0c1e1
Contents?: true
Size: 1.91 KB
Versions: 3
Compression:
Stored size: 1.91 KB
Contents
module Arx # Class for cleaning strings. # # @private class Cleaner # arXiv paper URL prefix format URL_PREFIX = /^(https?\:\/\/)?(www.)?arxiv\.org\/abs\// class << self # Cleans strings. # # @param [String] string Removes newline/return characters and multiple spaces from a string. # @return [String] The cleaned string. def clean(string) string.gsub(/\r\n|\r|\n/, ' ').strip.squeeze ' ' end # Attempt to extract an arXiv identifier from a string such as a URL. # # @param string [String] The string to extract the ID from. # @param version [Boolean] Whether or not to include the paper's version. # @return [String] The extracted ID. def extract_id(string, version: false) if version == !!version if string.is_a? String trimmed = /#{URL_PREFIX}.+\/?$/.match?(string) ? string.gsub(/(#{URL_PREFIX})|(\/$)/, '') : string raise ArgumentError.new("Couldn't extract arXiv identifier from: #{string}") unless Validate.id? trimmed version ? trimmed : trimmed.sub(/v[0-9]+$/, '') else raise TypeError.new("Expected `string` to be a String, got: #{string.class}") end else raise TypeError.new("Expected `version` to be boolean (TrueClass or FalseClass), got: #{version.class}") end end # Attempt to extract a version number from an arXiv identifier. # # @param string [String] The arXiv identifier to extract the version number from. # @return [String] The extracted version number. def extract_version(string) reversed = extract_id(string, version: true).reverse if /^[0-9]+v/.match? reversed reversed.partition('v').first.reverse.to_i else raise ArgumentError.new("Couldn't extract version number from identifier: #{string}") end end end end end
Version data entries
3 entries across 3 versions & 1 rubygems
Version | Path |
---|---|
arx-1.3.0 | lib/arx/cleaner.rb |
arx-1.2.1 | lib/arx/cleaner.rb |
arx-1.2.0 | lib/arx/cleaner.rb |