spec/downloader_spec.rb in oddb2xml-2.2.3 vs spec/downloader_spec.rb in oddb2xml-2.2.4
- old
+ new
@@ -1,10 +1,59 @@
# encoding: utf-8
require 'spec_helper'
VCR.eject_cassette # we use insert/eject around each example
+# not used but, as I still don't know how to generate
+def filter_aips_xml(filename='AipsDownload_ng.xml', ids_to_keep = [55558, 61848])
+ puts "File #{filename} exists? #{File.exists?(filename)}"
+ tst = %(<?xml version="1.0" encoding="utf-8"?>
+<medicalInformations>
+ <medicalInformation type="fi" version="5" lang="de" safetyRelevant="false" informationUpdate="07.2008">
+ <title>Zyvoxid®</title>
+ <authHolder>Pfizer AG</authHolder>
+ <atcCode>J01XX08</atcCode>
+ <substances>Linezolid</substances>
+ <authNrs>55558, 55559, 55560</authNrs>
+)
+ @xml = IO.read(filename)
+ ausgabe = File.open('tst.out', 'w+')
+ data = {}
+ result = MedicalInformationsContent.parse(@xml.sub(Strip_For_Sax_Machine, ''), :lazy => true)
+ result.medicalInformation.each do |pac|
+ lang = pac.lang.to_s
+ next unless lang =~ /de|fr/
+ item = {}
+ keepIt = false
+ pac.authNrs.split(/[, ]+/).each{
+ |id|
+ if ids_to_keep.index(id.to_i)
+ data[ [lang, id.to_i] ] = pac
+ keepIt = true;
+ ausgabe.puts
+ break
+ end
+ }
+ html = Nokogiri::HTML.fragment(pac.content.force_encoding('UTF-8'))
+ item[:paragraph] = html
+ numbers = /(\d{5})[,\s]*(\d{5})?|(\d{5})[,\s]*(\d{5})?[,\s]*(\d{5})?/.match(html)
+ if numbers
+ [$1, $2, $3].compact.each {
+ |id|
+ if ids_to_keep.index(id.to_i)
+ data[ [lang, id.to_i] ] = pac
+ keepIt = true;
+ break
+ end
+ }
+ puts "Must keep #{keepIt} #{pac.authNrs}"
+ end
+ end
+ puts data.size
+ puts data.keys
+end
+
XML_VERSION_1_0 = /xml\sversion=["']1.0["']/
PREP_XML = 'Preparations.xml'
shared_examples_for 'any downloader' do
# this takes 5 sec. by call for sleep
it 'should count retry times as retrievable or not', :slow => true do
@@ -27,10 +76,16 @@
end
def common_after
Dir.chdir(@savedDir) if @savedDir and File.directory?(@savedDir)
VCR.eject_cassette
+ vcr_file = File.expand_path(File.join(Oddb2xml::SpecData, '..', 'fixtures', 'vcr_cassettes', 'oddb2xml.json'))
+ puts "Pretty-printing #{vcr_file} exists? #{File.exists?(vcr_file)}"
+ vcr_file_new = vcr_file.sub('.json', '.new')
+ cmd = "cat #{vcr_file} | python -mjson.tool > #{vcr_file_new}"
+ res = system(cmd)
+ FileUtils.mv(vcr_file_new, vcr_file)
end
# Zips input_filenames (using the basename)
def zip_files(zipfile_name, input_filenames)
FileUtils.rm_f(zipfile_name)
@@ -41,34 +96,36 @@
end
end
end
# Unzips into a specific directory
-def unzip_files(zipfile_name, directory)
+def unzip_files(zipfile_name, directory=Dir.pwd)
savedDir = Dir.pwd
FileUtils.makedirs(directory)
Dir.chdir(directory)
Zip::File.open(zipfile_name) do |zip_file|
# Handle entries one by one
zip_file.each do |entry|
# Extract to file/directory/symlink
- puts "Extracting #{entry.name} into #{directory}"
+ puts "downloader_spec.rb: Extracting #{entry.name} exists? #{File.exists?(entry.name)} into #{directory}"
+ FileUtils.rm_f(entry.name, :verbose => true) if File.exists?(entry.name)
entry.extract(entry.name)
end
end
ensure
Dir.chdir(savedDir)
end
+
describe Oddb2xml::RefdataDownloader do
include ServerMockHelper
before(:all) do
VCR.eject_cassette
VCR.configure do |c|
c.before_record(:Refdata_DE) do |i|
if not /WSDL$/.match(i.request.uri) and /refdatabase.refdata.ch\/Service/.match(i.request.uri) and i.response.body.size > 1024*1024
- puts "#{Time.now}: #{__LINE__}: Parsing response.body (#{i.response.body.size} bytes) will take some time. URI was #{i.request.uri}"
+ puts "#{Time.now}: #{__LINE__}: Parsing response.body (#{i.response.body.size/(1024*1024)} MB ) will take some time. URI was #{i.request.uri}"
doc = REXML::Document.new(i.response.body)
items = doc.root.children.first.elements.first
nrItems = doc.root.children.first.elements.first.elements.size
puts "#{Time.now}: #{__LINE__}: Removing most of the #{nrItems} items will take some time"
nrSearched = 0
@@ -77,11 +134,11 @@
nrSearched += 1
puts "#{Time.now}: #{__LINE__}: nrSearched #{nrSearched}/#{nrItems}" if nrSearched % 1000 == 0
items.delete x unless x.elements['GTIN'] and Oddb2xml::GTINS_DRUGS.index(x.elements['GTIN'].text)
}
i.response.body = doc.to_s
- puts "#{Time.now}: response.body is now #{i.response.body.size} bytes long"
+ puts "#{Time.now}: response.body is now #{i.response.body.size/(1024*1024)} MB long"
i.response.headers['Content-Length'] = i.response.body.size
end
end
end
VCR.insert_cassette('oddb2xml', :tag => :Refdata_DE)
@@ -127,49 +184,50 @@
end
end
end
end
-if true
describe Oddb2xml::SwissmedicDownloader do
include ServerMockHelper
- before(:all) do VCR.eject_cassette end
before(:each) do
VCR.configure do |c|
c.before_record(:swissmedic) do |i|
- if i.response.headers['Content-Disposition'] and /www.swissmedic.ch/.match(i.request.uri)
- puts "#{Time.now}: URI was #{i.request.uri}"
+ if i.response.headers['Content-Disposition'] and /www.swissmedic.ch/.match(i.request.uri) and i.response.body.size > 1024*1024
+ puts "#{Time.now}: #{__LINE__} URI was #{i.request.uri}"
m = /filename=.([^\d]+)/.match(i.response.headers['Content-Disposition'][0])
- puts "#{Time.now}: SwissmedicDownloader #{m[1]} (#{i.response.body.size} bytes)."
+ puts "#{Time.now}: #{__LINE__} SwissmedicDownloader #{m[1]} (#{i.response.body.size/(1024*1024)} MB )."
if m and true
name = m[1].chomp('_')
swissmedic_dir = File.join(Oddb2xml::WorkDir, 'swissmedic')
FileUtils.makedirs(swissmedic_dir)
xlsx_name = File.join(swissmedic_dir, name + '.xlsx')
if /Packungen/i.match(xlsx_name)
+ FileUtils.rm(xlsx_name, :verbose => true) if File.exists?(xlsx_name)
File.open(xlsx_name, 'wb+') { |f| f.write(i.response.body) }
- puts "#{Time.now}: Openening saved #{xlsx_name} (#{File.size(xlsx_name)} bytes) will take some time. URI was #{i.request.uri}"
+ FileUtils.cp(xlsx_name, File.join(Oddb2xml::SpecData, 'swissmedic_package_downloaded.xlsx'), :verbose => true, :preserve => true)
+ puts "#{Time.now}: #{__LINE__}: Openening saved #{xlsx_name} (#{File.size(xlsx_name)} bytes) will take some time. URI was #{i.request.uri}"
workbook = RubyXL::Parser.parse(xlsx_name)
worksheet = workbook[0]
drugs = []
Oddb2xml::GTINS_DRUGS.each{ |x| next unless x.to_s.size == 13; drugs << [x.to_s[4..8].to_i, x.to_s[9..11].to_i] };
idx = 6; to_delete = []
puts "#{Time.now}: Finding items to delete will take some time"
while (worksheet.sheet_data[idx])
idx += 1
- next unless worksheet.sheet_data[idx-1][0]
- to_delete << (idx-1) unless drugs.find{ |x| x[0]== worksheet.sheet_data[idx-1][0].value.to_i and
- x[1]== worksheet.sheet_data[idx-1][10].value.to_i
+ next unless worksheet.sheet_data[idx-1][Oddb2xml::COLUMNS_JULY_2015.keys.index(:iksnr)]
+ to_delete << (idx-1) unless drugs.find{ |x| x[0]== worksheet.sheet_data[idx-1][Oddb2xml::COLUMNS_JULY_2015.keys.index(:iksnr)].value.to_i and
+ x[1]== worksheet.sheet_data[idx-1][Oddb2xml::COLUMNS_JULY_2015.keys.index(:ikscd)].value.to_i
}
end
if to_delete.size > 0
puts "#{Time.now}: Deleting #{to_delete.size} of the #{idx} items will take some time"
to_delete.reverse.each{ |row_id| worksheet.delete_row(row_id) }
workbook.write(xlsx_name)
+ FileUtils.cp(xlsx_name, File.join(Oddb2xml::SpecData, 'swissmedic_package_shortened.xlsx'), :verbose => true, :preserve => true)
i.response.body = IO.binread(xlsx_name)
i.response.headers['Content-Length'] = i.response.body.size
- puts "#{Time.now}: response.body is now #{i.response.body.size} bytes long. #{xlsx_name} was #{File.size(xlsx_name)}"
+ puts "#{Time.now}: response.body is now #{i.response.body.size/(1024*1024)} MB long. #{xlsx_name} was #{File.size(xlsx_name)}"
end
end
end
end
end
@@ -177,10 +235,11 @@
end
# 2015-06-10 18:54:40 UTC: SwissmedicDownloader attachment; filename="Zugelassene_Packungen_310515.xlsx" (785630 bytes). URI was https://www.swissmedic.ch/arzneimittel/00156/00221/00222/00230/index.html?download=NHzLpZeg7t,lnp6I0NTU042l2Z6ln1acy4Zn4Z2qZpnO2Yuq2Z6gpJCDdHx7hGym162epYbg2c_JjKbNoKSn6A--&lang=de
context 'orphan' do
before(:each) do
+ VCR.eject_cassette
VCR.insert_cassette('oddb2xml', :tag => :swissmedic, :exclusive => false)
common_before
@downloader = Oddb2xml::SwissmedicDownloader.new(:orphan)
end
after(:each) do common_after end
@@ -203,10 +262,11 @@
end
end
end
context 'fridge' do
before(:each) do
+ VCR.eject_cassette
VCR.insert_cassette('oddb2xml', :tag => :swissmedic, :exclusive => false)
common_before
@downloader = Oddb2xml::SwissmedicDownloader.new(:fridge)
end
after(:each) do common_after end
@@ -220,12 +280,12 @@
end
end
end
context 'package' do
before(:each) do
+ VCR.eject_cassette
VCR.insert_cassette('oddb2xml', :tag => :swissmedic, :exclusive => false)
-# VCR.insert_cassette('oddb2xml', :tag => :swissmedic, :record => :all)
common_before
@downloader = Oddb2xml::SwissmedicDownloader.new(:package)
end
after(:each) do common_after end
context 'download_by for package xls' do
@@ -240,12 +300,11 @@
end
end
describe Oddb2xml::EphaDownloader do
include ServerMockHelper
- before(:all) do VCR.eject_cassette end
- before(:each) do
+ before(:all) do
VCR.configure do |c|
c.before_record(:epha) do |i|
if /epha/.match(i.request.uri)
puts "#{Time.now}: #{__LINE__}: URI was #{i.request.uri}"
lines = i.response.body.split("\n")
@@ -256,15 +315,16 @@
i.response.body = i.response.body.split("\n")[0..5].join("\n")
i.response.headers['Content-Length'] = i.response.body.size
end
end
end
+ VCR.eject_cassette
VCR.insert_cassette('oddb2xml', :tag => :epha)
@downloader = Oddb2xml::EphaDownloader.new
common_before
end
- after(:each) do
+ after(:all) do
common_after
end
it_behaves_like 'any downloader'
context 'when download is called' do
@@ -333,11 +393,11 @@
puts "Saved #{bag_tmp} (#{File.size(tmp_zip)} bytes)"
zip_files(tmp_zip, Dir.glob("#{bag_dir}/*"))
puts "Saved #{tmp_zip} (#{File.size(tmp_zip)} bytes)"
i.response.body = IO.binread(tmp_zip)
i.response.headers['Content-Length'] = i.response.body.size
- puts "#{Time.now}: response.body is now #{i.response.body.size} bytes long. #{tmp_zip} was #{File.size(tmp_zip)}"
+ puts "#{Time.now}: response.body is now #{i.response.body.size/(1024*1024)} MB long. #{tmp_zip} was #{File.size(tmp_zip)}"
end
end
end
VCR.eject_cassette
VCR.use_cassette('oddb2xml', :tag => :bag_xml) do
@@ -454,11 +514,11 @@
before(:all) do VCR.eject_cassette end
before(:each) do
VCR.configure do |c|
c.before_record(:medreg) do |i|
if /medregbm.admin.ch/i.match(i.request.uri)
- puts "#{Time.now}: #{__LINE__}: URI was #{i.request.uri} containing #{i.response.body.size} bytes"
+ puts "#{Time.now}: #{__LINE__}: URI was #{i.request.uri} containing #{i.response.body.size/(1024*1024)} MB "
medreg_dir = File.join(Oddb2xml::WorkDir, 'medreg')
FileUtils.makedirs(medreg_dir)
xlsx_name = File.join(medreg_dir, /ListBetrieb/.match(i.request.uri) ? 'Betriebe.xlsx' : 'Personen.xlsx')
File.open(xlsx_name, 'wb+') { |f| f.write(i.response.body) }
puts "#{Time.now}: Openening saved #{xlsx_name} (#{File.size(xlsx_name)} bytes) will take some time. URI was #{i.request.uri}"
@@ -474,11 +534,11 @@
puts "#{Time.now}: Deleting #{to_delete.size} of the #{idx} items will take some time"
to_delete.reverse.each{ |row_id| worksheet.delete_row(row_id) }
workbook.write(xlsx_name)
i.response.body = IO.binread(xlsx_name)
i.response.headers['Content-Length'] = i.response.body.size
- puts "#{Time.now}: response.body is now #{i.response.body.size} bytes long. #{xlsx_name} was #{File.size(xlsx_name)}"
+ puts "#{Time.now}: response.body is now #{i.response.body.size/(1024*1024)} MB long. #{xlsx_name} was #{File.size(xlsx_name)}"
end
end
end
end
common_before
@@ -530,43 +590,36 @@
end
end
describe Oddb2xml::SwissmedicInfoDownloader do
include ServerMockHelper
- before(:all) do VCR.eject_cassette end
- before(:each) do
+ before(:all) do
VCR.configure do |c|
c.before_record(:swissmedicInfo) do |i|
- puts "#{Time.now}: #{__LINE__}: URI was #{i.request.uri} returning #{i.response.body.size} bytes"
+ puts "#{Time.now}: #{__LINE__}: URI was #{i.request.uri} returning #{i.response.body.size/(1024*1024)} MB "
if i.response.headers['Content-Disposition']
m = /filename=([^\d]+)/.match(i.response.headers['Content-Disposition'][0])
if m
name = m[1].chomp('_')
if /AipsDownload/i.match(name)
- swissmedic_dir = File.join(Oddb2xml::WorkDir, 'swissmedicInfo')
- # as reading the unzipped xml takes over 15 minutes using rexml,
- # we read the xml from the spec/data
- spec_xml = Dir.glob("#{Oddb2xml::SpecData}/AipsDownload.xml")[0]
- tmp_zip = File.join(Oddb2xml::WorkDir, 'AipsDownload.zip')
- File.open(tmp_zip, 'wb+') { |f| f.write(i.response.body) }
- unzip_files(tmp_zip, swissmedic_dir)
- FileUtils.cp(spec_xml, Dir.glob("#{swissmedic_dir}/*.xml")[0], :verbose => true)
- zip_files(tmp_zip, Dir.glob("#{swissmedic_dir}/*.x??"))
+ # we replace this by manually reduced xml file from spec/data
+ # As we only use to create the fachinfo, we don't need many elements
+ tmp_zip = File.join(Oddb2xml::SpecData, 'AipsDownload.zip')
i.response.body = IO.binread(tmp_zip)
i.response.headers['Content-Length'] = i.response.body.size
- puts "#{Time.now}: #{__LINE__}: response.body is now #{i.response.body.size} bytes long. #{tmp_zip} was #{File.size(tmp_zip)}"
+ puts "#{Time.now}: #{__LINE__}: response.body is now #{i.response.body.size/(1024*1024)} MB long. #{tmp_zip} was #{File.size(tmp_zip)}"
end
end
end
end
end
VCR.eject_cassette
VCR.insert_cassette('oddb2xml', :tag => :swissmedicInfo)
common_before
@downloader = Oddb2xml::SwissmedicInfoDownloader.new
end
- after(:each) do common_after end
+ after(:all) do common_after end
it_behaves_like 'any downloader'
context 'when download is called' do
let(:xml) { @downloader.download }
it 'should parse zip to String' do
xml.should be_a String
@@ -581,6 +634,6 @@
expect { xml }.not_to raise_error
File.exist?('swissmedic_info.zip').should eq(false)
end
end
end
-end
+