Sha256: d3f9af6879a6961a6c737f1db709870c60c0b0a262ae0a8364c4131265f82943

Contents?: true

Size: 1.47 KB

Versions: 5

Compression:

Stored size: 1.47 KB

Contents

module RelatonXsf
  class DataFetcher
    # @param output [String]
    # @param format [String]
    def initialize(output, format)
      @output = output
      @format = format
      @ext = format.sub(/^bib/, "")
      @files = []
    end

    def self.fetch(output: "data", format: "yaml")
      warn "fetching data to #{output} in #{format} format"
      t1 = Time.now
      warn "start at #{t1}"
      FileUtils.mkdir_p output
      new(output, format).fetch
      t2 = Time.now
      t = t2 - t1
      warn "finished at #{t2} (#{t.round} seconds)"
    end

    def index
      @index ||= Relaton::Index.find_or_create :xsf, file: "index-v1.yaml"
    end

    def fetch
      agent = Mechanize.new
      resp = agent.get "https://xmpp.org/extensions/refs/"
      resp.xpath("//a[contains(@href, 'XEP-')]").each do |link|
        doc = agent.get link[:href]
        bib = BibXMLParser.parse doc.body
        write_doc bib
      end
      index.save
    end

    def write_doc(bib)
      id = bib.docidentifier.find(&:primary).id
      file = File.join @output, "#{id.gsub(' ', '-').downcase}.#{@ext}"
      if @files.include? file
        warn "WARNING: #{file} already exists"
      end
      File.write file, serialize(bib), encoding: "UTF-8"
      @files << file
      index.add_or_update id, file
    end

    def serialize(bib)
      case @format
      when "yaml" then bib.to_hash.to_yaml
      when "xml" then bib.to_xml bibdata: true
      else bib.send "to_#{@format}"
      end
    end
  end
end

Version data entries

5 entries across 5 versions & 1 rubygems

Version Path
relaton-xsf-1.18.1 lib/relaton_xsf/data_fetcher.rb
relaton-xsf-1.18.0 lib/relaton_xsf/data_fetcher.rb
relaton-xsf-1.17.0 lib/relaton_xsf/data_fetcher.rb
relaton-xsf-1.16.2 lib/relaton_xsf/data_fetcher.rb
relaton-xsf-1.16.1 lib/relaton_xsf/data_fetcher.rb