spec/downloader_spec.rb in oddb2xml-2.2.3 vs spec/downloader_spec.rb in oddb2xml-2.2.4

- old
+ new

@@ -1,10 +1,59 @@ # encoding: utf-8 require 'spec_helper' VCR.eject_cassette # we use insert/eject around each example +# not used but, as I still don't know how to generate +def filter_aips_xml(filename='AipsDownload_ng.xml', ids_to_keep = [55558, 61848]) + puts "File #{filename} exists? #{File.exists?(filename)}" + tst = %(<?xml version="1.0" encoding="utf-8"?> +<medicalInformations> + <medicalInformation type="fi" version="5" lang="de" safetyRelevant="false" informationUpdate="07.2008"> + <title>Zyvoxid®</title> + <authHolder>Pfizer AG</authHolder> + <atcCode>J01XX08</atcCode> + <substances>Linezolid</substances> + <authNrs>55558, 55559, 55560</authNrs> +) + @xml = IO.read(filename) + ausgabe = File.open('tst.out', 'w+') + data = {} + result = MedicalInformationsContent.parse(@xml.sub(Strip_For_Sax_Machine, ''), :lazy => true) + result.medicalInformation.each do |pac| + lang = pac.lang.to_s + next unless lang =~ /de|fr/ + item = {} + keepIt = false + pac.authNrs.split(/[, ]+/).each{ + |id| + if ids_to_keep.index(id.to_i) + data[ [lang, id.to_i] ] = pac + keepIt = true; + ausgabe.puts + break + end + } + html = Nokogiri::HTML.fragment(pac.content.force_encoding('UTF-8')) + item[:paragraph] = html + numbers = /(\d{5})[,\s]*(\d{5})?|(\d{5})[,\s]*(\d{5})?[,\s]*(\d{5})?/.match(html) + if numbers + [$1, $2, $3].compact.each { + |id| + if ids_to_keep.index(id.to_i) + data[ [lang, id.to_i] ] = pac + keepIt = true; + break + end + } + puts "Must keep #{keepIt} #{pac.authNrs}" + end + end + puts data.size + puts data.keys +end + XML_VERSION_1_0 = /xml\sversion=["']1.0["']/ PREP_XML = 'Preparations.xml' shared_examples_for 'any downloader' do # this takes 5 sec. by call for sleep it 'should count retry times as retrievable or not', :slow => true do @@ -27,10 +76,16 @@ end def common_after Dir.chdir(@savedDir) if @savedDir and File.directory?(@savedDir) VCR.eject_cassette + vcr_file = File.expand_path(File.join(Oddb2xml::SpecData, '..', 'fixtures', 'vcr_cassettes', 'oddb2xml.json')) + puts "Pretty-printing #{vcr_file} exists? #{File.exists?(vcr_file)}" + vcr_file_new = vcr_file.sub('.json', '.new') + cmd = "cat #{vcr_file} | python -mjson.tool > #{vcr_file_new}" + res = system(cmd) + FileUtils.mv(vcr_file_new, vcr_file) end # Zips input_filenames (using the basename) def zip_files(zipfile_name, input_filenames) FileUtils.rm_f(zipfile_name) @@ -41,34 +96,36 @@ end end end # Unzips into a specific directory -def unzip_files(zipfile_name, directory) +def unzip_files(zipfile_name, directory=Dir.pwd) savedDir = Dir.pwd FileUtils.makedirs(directory) Dir.chdir(directory) Zip::File.open(zipfile_name) do |zip_file| # Handle entries one by one zip_file.each do |entry| # Extract to file/directory/symlink - puts "Extracting #{entry.name} into #{directory}" + puts "downloader_spec.rb: Extracting #{entry.name} exists? #{File.exists?(entry.name)} into #{directory}" + FileUtils.rm_f(entry.name, :verbose => true) if File.exists?(entry.name) entry.extract(entry.name) end end ensure Dir.chdir(savedDir) end + describe Oddb2xml::RefdataDownloader do include ServerMockHelper before(:all) do VCR.eject_cassette VCR.configure do |c| c.before_record(:Refdata_DE) do |i| if not /WSDL$/.match(i.request.uri) and /refdatabase.refdata.ch\/Service/.match(i.request.uri) and i.response.body.size > 1024*1024 - puts "#{Time.now}: #{__LINE__}: Parsing response.body (#{i.response.body.size} bytes) will take some time. URI was #{i.request.uri}" + puts "#{Time.now}: #{__LINE__}: Parsing response.body (#{i.response.body.size/(1024*1024)} MB ) will take some time. URI was #{i.request.uri}" doc = REXML::Document.new(i.response.body) items = doc.root.children.first.elements.first nrItems = doc.root.children.first.elements.first.elements.size puts "#{Time.now}: #{__LINE__}: Removing most of the #{nrItems} items will take some time" nrSearched = 0 @@ -77,11 +134,11 @@ nrSearched += 1 puts "#{Time.now}: #{__LINE__}: nrSearched #{nrSearched}/#{nrItems}" if nrSearched % 1000 == 0 items.delete x unless x.elements['GTIN'] and Oddb2xml::GTINS_DRUGS.index(x.elements['GTIN'].text) } i.response.body = doc.to_s - puts "#{Time.now}: response.body is now #{i.response.body.size} bytes long" + puts "#{Time.now}: response.body is now #{i.response.body.size/(1024*1024)} MB long" i.response.headers['Content-Length'] = i.response.body.size end end end VCR.insert_cassette('oddb2xml', :tag => :Refdata_DE) @@ -127,49 +184,50 @@ end end end end -if true describe Oddb2xml::SwissmedicDownloader do include ServerMockHelper - before(:all) do VCR.eject_cassette end before(:each) do VCR.configure do |c| c.before_record(:swissmedic) do |i| - if i.response.headers['Content-Disposition'] and /www.swissmedic.ch/.match(i.request.uri) - puts "#{Time.now}: URI was #{i.request.uri}" + if i.response.headers['Content-Disposition'] and /www.swissmedic.ch/.match(i.request.uri) and i.response.body.size > 1024*1024 + puts "#{Time.now}: #{__LINE__} URI was #{i.request.uri}" m = /filename=.([^\d]+)/.match(i.response.headers['Content-Disposition'][0]) - puts "#{Time.now}: SwissmedicDownloader #{m[1]} (#{i.response.body.size} bytes)." + puts "#{Time.now}: #{__LINE__} SwissmedicDownloader #{m[1]} (#{i.response.body.size/(1024*1024)} MB )." if m and true name = m[1].chomp('_') swissmedic_dir = File.join(Oddb2xml::WorkDir, 'swissmedic') FileUtils.makedirs(swissmedic_dir) xlsx_name = File.join(swissmedic_dir, name + '.xlsx') if /Packungen/i.match(xlsx_name) + FileUtils.rm(xlsx_name, :verbose => true) if File.exists?(xlsx_name) File.open(xlsx_name, 'wb+') { |f| f.write(i.response.body) } - puts "#{Time.now}: Openening saved #{xlsx_name} (#{File.size(xlsx_name)} bytes) will take some time. URI was #{i.request.uri}" + FileUtils.cp(xlsx_name, File.join(Oddb2xml::SpecData, 'swissmedic_package_downloaded.xlsx'), :verbose => true, :preserve => true) + puts "#{Time.now}: #{__LINE__}: Openening saved #{xlsx_name} (#{File.size(xlsx_name)} bytes) will take some time. URI was #{i.request.uri}" workbook = RubyXL::Parser.parse(xlsx_name) worksheet = workbook[0] drugs = [] Oddb2xml::GTINS_DRUGS.each{ |x| next unless x.to_s.size == 13; drugs << [x.to_s[4..8].to_i, x.to_s[9..11].to_i] }; idx = 6; to_delete = [] puts "#{Time.now}: Finding items to delete will take some time" while (worksheet.sheet_data[idx]) idx += 1 - next unless worksheet.sheet_data[idx-1][0] - to_delete << (idx-1) unless drugs.find{ |x| x[0]== worksheet.sheet_data[idx-1][0].value.to_i and - x[1]== worksheet.sheet_data[idx-1][10].value.to_i + next unless worksheet.sheet_data[idx-1][Oddb2xml::COLUMNS_JULY_2015.keys.index(:iksnr)] + to_delete << (idx-1) unless drugs.find{ |x| x[0]== worksheet.sheet_data[idx-1][Oddb2xml::COLUMNS_JULY_2015.keys.index(:iksnr)].value.to_i and + x[1]== worksheet.sheet_data[idx-1][Oddb2xml::COLUMNS_JULY_2015.keys.index(:ikscd)].value.to_i } end if to_delete.size > 0 puts "#{Time.now}: Deleting #{to_delete.size} of the #{idx} items will take some time" to_delete.reverse.each{ |row_id| worksheet.delete_row(row_id) } workbook.write(xlsx_name) + FileUtils.cp(xlsx_name, File.join(Oddb2xml::SpecData, 'swissmedic_package_shortened.xlsx'), :verbose => true, :preserve => true) i.response.body = IO.binread(xlsx_name) i.response.headers['Content-Length'] = i.response.body.size - puts "#{Time.now}: response.body is now #{i.response.body.size} bytes long. #{xlsx_name} was #{File.size(xlsx_name)}" + puts "#{Time.now}: response.body is now #{i.response.body.size/(1024*1024)} MB long. #{xlsx_name} was #{File.size(xlsx_name)}" end end end end end @@ -177,10 +235,11 @@ end # 2015-06-10 18:54:40 UTC: SwissmedicDownloader attachment; filename="Zugelassene_Packungen_310515.xlsx" (785630 bytes). URI was https://www.swissmedic.ch/arzneimittel/00156/00221/00222/00230/index.html?download=NHzLpZeg7t,lnp6I0NTU042l2Z6ln1acy4Zn4Z2qZpnO2Yuq2Z6gpJCDdHx7hGym162epYbg2c_JjKbNoKSn6A--&lang=de context 'orphan' do before(:each) do + VCR.eject_cassette VCR.insert_cassette('oddb2xml', :tag => :swissmedic, :exclusive => false) common_before @downloader = Oddb2xml::SwissmedicDownloader.new(:orphan) end after(:each) do common_after end @@ -203,10 +262,11 @@ end end end context 'fridge' do before(:each) do + VCR.eject_cassette VCR.insert_cassette('oddb2xml', :tag => :swissmedic, :exclusive => false) common_before @downloader = Oddb2xml::SwissmedicDownloader.new(:fridge) end after(:each) do common_after end @@ -220,12 +280,12 @@ end end end context 'package' do before(:each) do + VCR.eject_cassette VCR.insert_cassette('oddb2xml', :tag => :swissmedic, :exclusive => false) -# VCR.insert_cassette('oddb2xml', :tag => :swissmedic, :record => :all) common_before @downloader = Oddb2xml::SwissmedicDownloader.new(:package) end after(:each) do common_after end context 'download_by for package xls' do @@ -240,12 +300,11 @@ end end describe Oddb2xml::EphaDownloader do include ServerMockHelper - before(:all) do VCR.eject_cassette end - before(:each) do + before(:all) do VCR.configure do |c| c.before_record(:epha) do |i| if /epha/.match(i.request.uri) puts "#{Time.now}: #{__LINE__}: URI was #{i.request.uri}" lines = i.response.body.split("\n") @@ -256,15 +315,16 @@ i.response.body = i.response.body.split("\n")[0..5].join("\n") i.response.headers['Content-Length'] = i.response.body.size end end end + VCR.eject_cassette VCR.insert_cassette('oddb2xml', :tag => :epha) @downloader = Oddb2xml::EphaDownloader.new common_before end - after(:each) do + after(:all) do common_after end it_behaves_like 'any downloader' context 'when download is called' do @@ -333,11 +393,11 @@ puts "Saved #{bag_tmp} (#{File.size(tmp_zip)} bytes)" zip_files(tmp_zip, Dir.glob("#{bag_dir}/*")) puts "Saved #{tmp_zip} (#{File.size(tmp_zip)} bytes)" i.response.body = IO.binread(tmp_zip) i.response.headers['Content-Length'] = i.response.body.size - puts "#{Time.now}: response.body is now #{i.response.body.size} bytes long. #{tmp_zip} was #{File.size(tmp_zip)}" + puts "#{Time.now}: response.body is now #{i.response.body.size/(1024*1024)} MB long. #{tmp_zip} was #{File.size(tmp_zip)}" end end end VCR.eject_cassette VCR.use_cassette('oddb2xml', :tag => :bag_xml) do @@ -454,11 +514,11 @@ before(:all) do VCR.eject_cassette end before(:each) do VCR.configure do |c| c.before_record(:medreg) do |i| if /medregbm.admin.ch/i.match(i.request.uri) - puts "#{Time.now}: #{__LINE__}: URI was #{i.request.uri} containing #{i.response.body.size} bytes" + puts "#{Time.now}: #{__LINE__}: URI was #{i.request.uri} containing #{i.response.body.size/(1024*1024)} MB " medreg_dir = File.join(Oddb2xml::WorkDir, 'medreg') FileUtils.makedirs(medreg_dir) xlsx_name = File.join(medreg_dir, /ListBetrieb/.match(i.request.uri) ? 'Betriebe.xlsx' : 'Personen.xlsx') File.open(xlsx_name, 'wb+') { |f| f.write(i.response.body) } puts "#{Time.now}: Openening saved #{xlsx_name} (#{File.size(xlsx_name)} bytes) will take some time. URI was #{i.request.uri}" @@ -474,11 +534,11 @@ puts "#{Time.now}: Deleting #{to_delete.size} of the #{idx} items will take some time" to_delete.reverse.each{ |row_id| worksheet.delete_row(row_id) } workbook.write(xlsx_name) i.response.body = IO.binread(xlsx_name) i.response.headers['Content-Length'] = i.response.body.size - puts "#{Time.now}: response.body is now #{i.response.body.size} bytes long. #{xlsx_name} was #{File.size(xlsx_name)}" + puts "#{Time.now}: response.body is now #{i.response.body.size/(1024*1024)} MB long. #{xlsx_name} was #{File.size(xlsx_name)}" end end end end common_before @@ -530,43 +590,36 @@ end end describe Oddb2xml::SwissmedicInfoDownloader do include ServerMockHelper - before(:all) do VCR.eject_cassette end - before(:each) do + before(:all) do VCR.configure do |c| c.before_record(:swissmedicInfo) do |i| - puts "#{Time.now}: #{__LINE__}: URI was #{i.request.uri} returning #{i.response.body.size} bytes" + puts "#{Time.now}: #{__LINE__}: URI was #{i.request.uri} returning #{i.response.body.size/(1024*1024)} MB " if i.response.headers['Content-Disposition'] m = /filename=([^\d]+)/.match(i.response.headers['Content-Disposition'][0]) if m name = m[1].chomp('_') if /AipsDownload/i.match(name) - swissmedic_dir = File.join(Oddb2xml::WorkDir, 'swissmedicInfo') - # as reading the unzipped xml takes over 15 minutes using rexml, - # we read the xml from the spec/data - spec_xml = Dir.glob("#{Oddb2xml::SpecData}/AipsDownload.xml")[0] - tmp_zip = File.join(Oddb2xml::WorkDir, 'AipsDownload.zip') - File.open(tmp_zip, 'wb+') { |f| f.write(i.response.body) } - unzip_files(tmp_zip, swissmedic_dir) - FileUtils.cp(spec_xml, Dir.glob("#{swissmedic_dir}/*.xml")[0], :verbose => true) - zip_files(tmp_zip, Dir.glob("#{swissmedic_dir}/*.x??")) + # we replace this by manually reduced xml file from spec/data + # As we only use to create the fachinfo, we don't need many elements + tmp_zip = File.join(Oddb2xml::SpecData, 'AipsDownload.zip') i.response.body = IO.binread(tmp_zip) i.response.headers['Content-Length'] = i.response.body.size - puts "#{Time.now}: #{__LINE__}: response.body is now #{i.response.body.size} bytes long. #{tmp_zip} was #{File.size(tmp_zip)}" + puts "#{Time.now}: #{__LINE__}: response.body is now #{i.response.body.size/(1024*1024)} MB long. #{tmp_zip} was #{File.size(tmp_zip)}" end end end end end VCR.eject_cassette VCR.insert_cassette('oddb2xml', :tag => :swissmedicInfo) common_before @downloader = Oddb2xml::SwissmedicInfoDownloader.new end - after(:each) do common_after end + after(:all) do common_after end it_behaves_like 'any downloader' context 'when download is called' do let(:xml) { @downloader.download } it 'should parse zip to String' do xml.should be_a String @@ -581,6 +634,6 @@ expect { xml }.not_to raise_error File.exist?('swissmedic_info.zip').should eq(false) end end end -end +