lib/oddb2xml/util.rb in oddb2xml-2.4.8 vs lib/oddb2xml/util.rb in oddb2xml-2.4.9
- old
+ new
@@ -15,31 +15,30 @@
unless defined?(RSpec)
WorkDir = Dir.pwd
Downloads = "#{Dir.pwd}/downloads"
end
@options = {}
- @atc_csv_origin = 'https://raw.githubusercontent.com/epha/robot/master/data/manual/swissmedic/atc.csv'
+ @atc_csv_origin = 'http://download.epha.ch/data/atc/atc.csv'
@atc_csv_content = {}
+ def Oddb2xml.patch_some_utf8(line)
+ begin
+ line.gsub("\u0089", "‰").gsub("\u0092", '’').gsub("\u0096", '-').gsub("\u2013",'-').gsub("\u201D", '"').chomp
+ rescue => error
+ puts "#{error}: in #{line}"
+ line
+ end
+ end
+
def Oddb2xml.convert_to_8859_1(line)
begin
# We want to ignore lines which are not really UTF-8 encoded
- return line.encode('ISO-8859-1')
+ ausgabe = Oddb2xml.patch_some_utf8(line).encode('ISO-8859-1')
+ ausgabe.encode('ISO-8859-1')
rescue => error
- ausgabe = ''
- 0.upto(line.size-1).each do |idx|
- begin
- if line[idx].ord == 8211
- ausgabe += '-'
- else
- ausgabe += line[idx].encode('ISO-8859-1')
- end
- rescue => error
- puts "#{error}: in #{line} at #{idx}"
- end
- end
+ puts "#{error}: in #{line}"
+ require 'pry'; binding.pry
end
- ausgabe.encode('ISO-8859-1')
end
def Oddb2xml.add_epha_changes_for_ATC(iksnr, atc_code)
if @atc_csv_content.size == 0
open(@atc_csv_origin).readlines.each{