lib/arxiv/references/myUtil.rb in arxiv-references-0.1.5.1 vs lib/arxiv/references/myUtil.rb in arxiv-references-0.1.6.0

- old
+ new

@@ -21,20 +21,28 @@ def self.removeDir(id, work_dir) FileUtils.rm_rf("#{work_dir}/#{id}") end - def self.makeFile(id, work_dir) - return "#{work_dir}/#{id}/output.pdf" + def self.makeFile(id, work_dir, use_dir) + if use_dir + return "#{work_dir}/#{id}/output.pdf" + else + return "#{work_dir}-#{id}-output.pdf" + end end - def self. getK2Pdf(id, work_dir) - return "#{work_dir}/#{id}/output_k2opt.pdf" + def self. getK2Pdf(id, work_dir, use_dir) + if use_dir + return "#{work_dir}/#{id}/output_k2opt.pdf" + else + return "#{work_dir}-#{id}-output_k2opt.pdf" + end end - def self.fetchFromUrl(urlName, work_dir) + def self.fetchFromUrl(urlName, work_dir, use_dir) puts "fetch => #{urlName}" charset = nil html = open(urlName) do |f| charset = f.charset f.read @@ -44,11 +52,11 @@ result = {} result[:title] = page.xpath('//*[@id="abs"]/div[2]/h1').text result[:authors] = page.xpath('//*[@id="abs"]/div[2]/div[2]/a').text result[:abstruct] = page.xpath('//*[@id="abs"]/div[2]/blockquote').text result[:pdfurl] = "#{BASE_URL}#{page.xpath('//*[@id="abs"]/div[1]/div[1]/ul/li[1]/a').attr('href').value}" - result[:references] = fetchFromPdfUrl(result[:pdfurl], work_dir) + result[:references] = fetchFromPdfUrl(result[:pdfurl], work_dir, use_dir) return result.to_json end def self.fetchFromArxivId(id, work_dir) target_url = "#{BASE_URL}/abs/#{id}" @@ -61,11 +69,11 @@ o.write(data.read) end end end - def self.convertSingleColPdf(job_id, work_dir,file_name) + def self.convertSingleColPdf(job_id, work_dir,file_name, use_dir) cmd = "k2pdfopt -dev kpw #{file_name}" PTY.spawn(cmd) do |i,o| o.sync = true i.expect(/\S.*Enter option above \(h=help, q=quit\):/,10){ o.puts "\n" @@ -75,11 +83,11 @@ res = i.gets print res break unless res.index('written').nil? end end - return getK2Pdf(job_id, work_dir) + return getK2Pdf(job_id, work_dir, use_dir) end def self.fetchReference(file_name) reader = PDF::Reader.new(file_name) page_no = reader. @@ -88,37 +96,37 @@ i.text.index(REFERENCE_START_REGEXP).nil? }. map(&:number). sort. shift - puts "Detect References page=> #{page_no} " - ref_page = reader. - pages. - select{|i| - i.number >= page_no - }. - map{|i| - i.text.gsub(/\n+/,"\n").gsub(/ +/,' ') - }. - join(' '). - gsub(REFERENCE_REGEXP,"\n\\1"). - gsub('- ',''). - split("\n") + puts "Detect References page=> #{page_no} " + ref_page = reader. + pages. + select{|i| + i.number >= page_no + }. + map{|i| + i.text.gsub(/\n+/,"\n").gsub(/ +/,' ') + }. + join(' '). + gsub(REFERENCE_REGEXP,"\n\\1"). + gsub('- ',''). + split("\n") - return ref_page[(ref_page.index{|i| i =~ REFERENCE_START_REGEXP}+1)..ref_page.length]. - select{|i| - i.length > 5 - } + return ref_page[(ref_page.index{|i| i =~ REFERENCE_START_REGEXP}+1)..ref_page.length]. + select{|i| + i.length > 5 + } end - def self.fetchFromPdfUrl(pdfUrl, work_dir) + def self.fetchFromPdfUrl(pdfUrl, work_dir, use_dir) job_id = makeId - makeDir(job_id, work_dir) - file_name = makeFile(job_id, work_dir) - + makeDir(job_id, work_dir) unless use_dir + file_name = makeFile(job_id, work_dir, use_dir) + fetchPdfFile(pdfUrl, file_name) - executed_pdf = convertSingleColPdf(job_id, work_dir, file_name) + executed_pdf = convertSingleColPdf(job_id, work_dir, file_name, use_dir) references = fetchReference(executed_pdf) - removeDir(job_id, work_dir) + removeDir(job_id, work_dir) unless use_dir return references end end