Sha256: 5ab6a8b71ee756f56d8e3020e3b3231d56cfbe84322afc74aee2ea22c88141ae

Contents?: true

Size: 1.77 KB

Versions: 13

Compression:

Stored size: 1.77 KB

Contents

# frozen_string_literal: true

module RelatonIetf
  # Scrapper module
  module Scrapper
    extend Scrapper

    IDS = "https://raw.githubusercontent.com/relaton/relaton-data-ids/main/data/"
    RFC = "https://raw.githubusercontent.com/relaton/relaton-data-rfcs/main/data/"
    RSS = "https://raw.githubusercontent.com/relaton/relaton-data-rfcsubseries/main/data/"

    # @param text [String]
    # @return [RelatonIetf::IetfBibliographicItem]
    def scrape_page(text)
      # Remove initial "IETF " string if specified
      ref = text.gsub(/^IETF /, "")
      ref.sub!(/(?<=^(?:RFC|BCP|FYI|STD))\s(\d+)/) { $1.rjust 4, "0" }
      rfc_item ref
    rescue Timeout::Error, Errno::EINVAL, Errno::ECONNRESET, EOFError,
           Net::HTTPBadResponse, Net::HTTPHeaderSyntaxError,
           Net::ProtocolError, SocketError
      raise RelatonBib::RequestError, "No document found for #{ref} reference"
    end

    private

    # @param ref [String]
    # @return [RelatonIetf::IetfBibliographicItem]
    def rfc_item(ref) # rubocop:disable Metrics/MethodLength
      ghurl = case ref
              when /^RFC/ then RFC
              when /^(?:BCP|FYI|STD)/ then RSS
              when /^I-D/
                ref.sub!(/^I-D[.\s]/, "")
                IDS
              else return
              end

      uri = "#{ghurl}#{ref.sub(/\s|\u00a0/, '.')}.yaml"
      # BibXMLParser.parse get_page(uri), is_relation: is_relation, ver: ver
      resp = get_page uri
      return unless resp

      hash = YAML.safe_load resp
      hash["fetched"] = Date.today.to_s
      IetfBibliographicItem.from_hash hash
    end

    # @param uri [String]
    # @return [String] HTTP response body
    def get_page(uri)
      res = Net::HTTP.get_response(URI(uri))
      return unless res.code == "200"

      res.body
    end
  end
end

Version data entries

13 entries across 13 versions & 1 rubygems

Version Path
relaton-ietf-1.14.3 lib/relaton_ietf/scrapper.rb
relaton-ietf-1.14.2 lib/relaton_ietf/scrapper.rb
relaton-ietf-1.14.1 lib/relaton_ietf/scrapper.rb
relaton-ietf-1.14.0 lib/relaton_ietf/scrapper.rb
relaton-ietf-1.13.10 lib/relaton_ietf/scrapper.rb
relaton-ietf-1.13.9 lib/relaton_ietf/scrapper.rb
relaton-ietf-1.13.8 lib/relaton_ietf/scrapper.rb
relaton-ietf-1.13.7 lib/relaton_ietf/scrapper.rb
relaton-ietf-1.13.6 lib/relaton_ietf/scrapper.rb
relaton-ietf-1.13.5 lib/relaton_ietf/scrapper.rb
relaton-ietf-1.13.4 lib/relaton_ietf/scrapper.rb
relaton-ietf-1.13.3 lib/relaton_ietf/scrapper.rb
relaton-ietf-1.13.2 lib/relaton_ietf/scrapper.rb