Sha256: 9b4156e6377b0b857b7b5f828002e403ec4547763ad37be9e95d64ae1bd1e064

Contents?: true

Size: 1.87 KB

Versions: 1

Compression:

Stored size: 1.87 KB

Contents

require 'digest/sha2'
require 'time'
require 'fileutils'
require 'pty'
require 'expect'
require 'pdf-reader'

class P3
  def self.fetchPdfFile(pdfUrl,file_name) 
    open(file_name, 'wb') do |o|
      open(pdfUrl) do |data|
        o.write(data.read)
      end
    end
  end

  def self.convertSingleColPdf(job_id, work_dir,file_name, use_dir)
    cmd = "k2pdfopt -dev kpw #{file_name}"
    PTY.spawn(cmd) do |i,o|
      o.sync = true
      i.expect(/\S.*Enter option above \(h=help, q=quit\):/,10){
        o.puts "\n"
        o.flush
      }
      while( i.eof? == false )
        res = i.gets
        print res
        break unless res.index('written').nil?
      end
    end
    return getK2Pdf(job_id, work_dir, use_dir)
  end

  def self.fetchReference(file_name)
    reader = PDF::Reader.new(file_name)
    page_no = reader.
      pages.
      reject{|i|
        i.text.index(REFERENCE_START_REGEXP).nil?
      }.
      map(&:number).
      sort.
      shift
      puts "Detect References page=> #{page_no} "
      ref_page = reader.
        pages.
        select{|i|
          i.number >= page_no
        }.
        map{|i|
          i.text.gsub(/\n+/,"\n").gsub(/ +/,' ')
        }.
        join(' ').
        gsub(REFERENCE_REGEXP,"\n\\1").
        gsub('- ','').
        split("\n")

        return ref_page[(ref_page.index{|i| i =~ REFERENCE_START_REGEXP}+1)..ref_page.length].
          select{|i|
          i.length > 5
        }
  end

  def self.fetchFromPdfUrl(pdfUrl, work_dir, use_dir)
    job_id = makeId
    makeDir(job_id, work_dir) if use_dir
    file_name = makeFile(job_id, work_dir, use_dir)

    fetchPdfFile(pdfUrl, file_name)
    executed_pdf = convertSingleColPdf(job_id, work_dir, file_name, use_dir)
    references = fetchReference(executed_pdf)
    if use_dir
      removeDir(job_id, work_dir) 
    else
      removeFile(job_id, work_dir)
    end
    return references
  end

end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
arxiv-references-0.1.7.0 lib/arxiv/references/FetchPaperPDF.rb