Sha256: 9c45a4300f53039bf327d93a1f35fe0f67425ee31c1fbf856536a023131b0b4c

Contents?: true

Size: 811 Bytes

Versions: 6

Compression:

Stored size: 811 Bytes

Contents

# encoding: UTF-8

require 'tmpdir'
require 'shellwords'
require 'nokogiri'

module CiteSeer

  PERL_DIR = "#{File.dirname(__FILE__)}/../../svm-header-parse"

  def self.extract(in_file, opts={})
    ParseOperation.new(in_file).result
  end

  class ParseOperation

    attr_reader :result

    def initialize(in_file)
      Dir.mktmpdir do |out_dir|
        `#{PERL_DIR}/extract.pl #{in_file.path} #{out_dir}`
        output = IO.read("#{out_dir}/out.header")
        xml = Nokogiri::XML output
        @result = parse(xml)
      end
    end

  private

    def parse(xml)
      {
        title: xml.css('title').text,
        authors: xml.css('author > name').map { |n| n.text },
        abstract: xml.css('abstract').text,
        valid: xml.css('validHeader').first.text == '1',
      }
    end

  end

end

Version data entries

6 entries across 6 versions & 1 rubygems

Version Path
biblicit-2.0.8 lib/biblicit/citeseer.rb
biblicit-2.0.7 lib/biblicit/citeseer.rb
biblicit-2.0.6 lib/biblicit/citeseer.rb
biblicit-2.0.5 lib/biblicit/citeseer.rb
biblicit-2.0.4 lib/biblicit/citeseer.rb
biblicit-2.0.3 lib/biblicit/citeseer.rb