Sha256: d5cb777d249206391ed78f2b47e5a755adb88d20680376cdd2fe05caea840c1c

Contents?: true

Size: 830 Bytes

Versions: 6

Compression:

Stored size: 830 Bytes

Contents

# encoding: UTF-8

require 'tmpdir'
require 'shellwords'
require 'nokogiri'

module ParsCit

  PERL_DIR = "#{File.dirname(__FILE__)}/../../parscit"

  def self.extract(in_file, opts={})
    ParseOperation.new(in_file, opts).result
  end

  class ParseOperation

    attr_reader :result

    def initialize(in_txt, opts={})
      ENV['CRFPP_HOME'] ||= "#{File.dirname(`which crf_test`)}/../"
      output = `#{PERL_DIR}/bin/citeExtract.pl -q -m extract_all #{in_txt.path}`
      @result = parse(Nokogiri::XML output)
    end

  private

    def parse(xml)
      parsed = xml.css("algorithm[name=ParsHed]")
      {
        title: parsed.css('title').text.gsub(/\s+/,' ').strip,
        authors: parsed.css('author').map { |a| a.text.gsub(/\s+/,' ').strip },
        abstract: parsed.css('abstract').text
      }
    end

  end

end

Version data entries

6 entries across 6 versions & 1 rubygems

Version Path
biblicit-2.0.8 lib/biblicit/parscit.rb
biblicit-2.0.7 lib/biblicit/parscit.rb
biblicit-2.0.6 lib/biblicit/parscit.rb
biblicit-2.0.5 lib/biblicit/parscit.rb
biblicit-2.0.4 lib/biblicit/parscit.rb
biblicit-2.0.3 lib/biblicit/parscit.rb