lib/arxiv/references/myUtil.rb in arxiv-references-0.1.5.1 vs lib/arxiv/references/myUtil.rb in arxiv-references-0.1.6.0
- old
+ new
@@ -21,20 +21,28 @@
def self.removeDir(id, work_dir)
FileUtils.rm_rf("#{work_dir}/#{id}")
end
- def self.makeFile(id, work_dir)
- return "#{work_dir}/#{id}/output.pdf"
+ def self.makeFile(id, work_dir, use_dir)
+ if use_dir
+ return "#{work_dir}/#{id}/output.pdf"
+ else
+ return "#{work_dir}-#{id}-output.pdf"
+ end
end
- def self. getK2Pdf(id, work_dir)
- return "#{work_dir}/#{id}/output_k2opt.pdf"
+ def self. getK2Pdf(id, work_dir, use_dir)
+ if use_dir
+ return "#{work_dir}/#{id}/output_k2opt.pdf"
+ else
+ return "#{work_dir}-#{id}-output_k2opt.pdf"
+ end
end
- def self.fetchFromUrl(urlName, work_dir)
+ def self.fetchFromUrl(urlName, work_dir, use_dir)
puts "fetch => #{urlName}"
charset = nil
html = open(urlName) do |f|
charset = f.charset
f.read
@@ -44,11 +52,11 @@
result = {}
result[:title] = page.xpath('//*[@id="abs"]/div[2]/h1').text
result[:authors] = page.xpath('//*[@id="abs"]/div[2]/div[2]/a').text
result[:abstruct] = page.xpath('//*[@id="abs"]/div[2]/blockquote').text
result[:pdfurl] = "#{BASE_URL}#{page.xpath('//*[@id="abs"]/div[1]/div[1]/ul/li[1]/a').attr('href').value}"
- result[:references] = fetchFromPdfUrl(result[:pdfurl], work_dir)
+ result[:references] = fetchFromPdfUrl(result[:pdfurl], work_dir, use_dir)
return result.to_json
end
def self.fetchFromArxivId(id, work_dir)
target_url = "#{BASE_URL}/abs/#{id}"
@@ -61,11 +69,11 @@
o.write(data.read)
end
end
end
- def self.convertSingleColPdf(job_id, work_dir,file_name)
+ def self.convertSingleColPdf(job_id, work_dir,file_name, use_dir)
cmd = "k2pdfopt -dev kpw #{file_name}"
PTY.spawn(cmd) do |i,o|
o.sync = true
i.expect(/\S.*Enter option above \(h=help, q=quit\):/,10){
o.puts "\n"
@@ -75,11 +83,11 @@
res = i.gets
print res
break unless res.index('written').nil?
end
end
- return getK2Pdf(job_id, work_dir)
+ return getK2Pdf(job_id, work_dir, use_dir)
end
def self.fetchReference(file_name)
reader = PDF::Reader.new(file_name)
page_no = reader.
@@ -88,37 +96,37 @@
i.text.index(REFERENCE_START_REGEXP).nil?
}.
map(&:number).
sort.
shift
- puts "Detect References page=> #{page_no} "
- ref_page = reader.
- pages.
- select{|i|
- i.number >= page_no
- }.
- map{|i|
- i.text.gsub(/\n+/,"\n").gsub(/ +/,' ')
- }.
- join(' ').
- gsub(REFERENCE_REGEXP,"\n\\1").
- gsub('- ','').
- split("\n")
+ puts "Detect References page=> #{page_no} "
+ ref_page = reader.
+ pages.
+ select{|i|
+ i.number >= page_no
+ }.
+ map{|i|
+ i.text.gsub(/\n+/,"\n").gsub(/ +/,' ')
+ }.
+ join(' ').
+ gsub(REFERENCE_REGEXP,"\n\\1").
+ gsub('- ','').
+ split("\n")
- return ref_page[(ref_page.index{|i| i =~ REFERENCE_START_REGEXP}+1)..ref_page.length].
- select{|i|
- i.length > 5
- }
+ return ref_page[(ref_page.index{|i| i =~ REFERENCE_START_REGEXP}+1)..ref_page.length].
+ select{|i|
+ i.length > 5
+ }
end
- def self.fetchFromPdfUrl(pdfUrl, work_dir)
+ def self.fetchFromPdfUrl(pdfUrl, work_dir, use_dir)
job_id = makeId
- makeDir(job_id, work_dir)
- file_name = makeFile(job_id, work_dir)
-
+ makeDir(job_id, work_dir) unless use_dir
+ file_name = makeFile(job_id, work_dir, use_dir)
+
fetchPdfFile(pdfUrl, file_name)
- executed_pdf = convertSingleColPdf(job_id, work_dir, file_name)
+ executed_pdf = convertSingleColPdf(job_id, work_dir, file_name, use_dir)
references = fetchReference(executed_pdf)
- removeDir(job_id, work_dir)
+ removeDir(job_id, work_dir) unless use_dir
return references
end
end