Sha256: 9c45a4300f53039bf327d93a1f35fe0f67425ee31c1fbf856536a023131b0b4c
Contents?: true
Size: 811 Bytes
Versions: 6
Compression:
Stored size: 811 Bytes
Contents
# encoding: UTF-8 require 'tmpdir' require 'shellwords' require 'nokogiri' module CiteSeer PERL_DIR = "#{File.dirname(__FILE__)}/../../svm-header-parse" def self.extract(in_file, opts={}) ParseOperation.new(in_file).result end class ParseOperation attr_reader :result def initialize(in_file) Dir.mktmpdir do |out_dir| `#{PERL_DIR}/extract.pl #{in_file.path} #{out_dir}` output = IO.read("#{out_dir}/out.header") xml = Nokogiri::XML output @result = parse(xml) end end private def parse(xml) { title: xml.css('title').text, authors: xml.css('author > name').map { |n| n.text }, abstract: xml.css('abstract').text, valid: xml.css('validHeader').first.text == '1', } end end end
Version data entries
6 entries across 6 versions & 1 rubygems