Sha256: 0199c782e6e8cef28bda104dca2a9275e13aefa698074209482f9e52bac64631
Contents?: true
Size: 848 Bytes
Versions: 25
Compression:
Stored size: 848 Bytes
Contents
#!/usr/bin/env ruby # encoding: utf-8 require 'biodiversity' require 'json' def parser_error(name) { 'scientificName' => { 'parsed' => false, 'verbatim' => name, 'error' => 'Parser error' } }.to_json end if ARGV.empty? puts "Usage:\n\n" + "nnparse file_with_scientific_names [output_file]\n\n" + "default output_file is parsed.json\n\n" exit end input = ARGV[0] output = ARGV[1] || 'parsed.json' p = ScientificNameParser.new o = open(output, 'w') count = 0 puts 'Parsing...' f = open(input, 'r:utf-8') f.each do |line| count += 1 puts("%s lines parsed" % count) if count % 10000 == 0 name = line.gsub(/^[\d]*\s*/, '').strip begin p.parse(name) parsed_data = p.parsed.all_json rescue parser_error(name) rescue parsed_data = parser_error(name) end o.write parsed_data + "\n" end
Version data entries
25 entries across 25 versions & 2 rubygems