Sha256: b9f161753767082f07a61ef862b479093b8eacf5005208e5771120cf20dc4892
Contents?: true
Size: 1.36 KB
Versions: 8
Compression:
Stored size: 1.36 KB
Contents
#!/usr/bin/env ruby require 'rubygems' gem 'dimus-biodiversity' rescue gem 'biodiversity' rescue nil $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__) + "/../lib")) require 'biodiversity' require 'json' if ARGV.empty? puts "Usage:\n\nnnparse file_with_scientific_names [output_file]\n\ndefault output_file is parsed.json\n\n" exit end parser = ScientificNameParser.new dirty_parser = ScientificNameDirtyParser.new output = ARGV[1] || 'parsed.json' o = File.open(output,'w') # parse a file with names count = count2 = 0 names = [] last_result = nil o.write("[\n") IO.foreach(ARGV[0]) do |n| o.write(last_result + ",\n") if last_result name_dict = {} puts 'Parsing names' if count2 == 0 count2 += 1 p count2 if count2 % 5000 == 0 n.strip! name_dict = {:input => n} parsed = parser.parse(n) || dirty_parser.parse(n) unless parsed name_dict[:details] = {:parsed => false} last_result = JSON.generate name_dict puts n count += 1 else begin name_dict[:output] = parsed.value name_dict[:canononical] = parsed.canonical name_dict[:details] = parsed.details name_dict[:parsed => true] last_result = JSON.generate name_dict rescue puts 'PROBLEM: ' + n end end end o.write(last_result + "\n") if last_result o.write("]") puts "Found #{count2} records, #{count} of them could not be parsed."
Version data entries
8 entries across 8 versions & 1 rubygems