Sha256: b9f161753767082f07a61ef862b479093b8eacf5005208e5771120cf20dc4892

Contents?: true

Size: 1.36 KB

Versions: 8

Compression:

Stored size: 1.36 KB

Contents

#!/usr/bin/env ruby
require 'rubygems'
gem 'dimus-biodiversity' rescue gem 'biodiversity' rescue nil

$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__) + "/../lib"))
require 'biodiversity'
require 'json'

if ARGV.empty?
  puts "Usage:\n\nnnparse file_with_scientific_names [output_file]\n\ndefault output_file is parsed.json\n\n"
  exit
end


parser = ScientificNameParser.new
dirty_parser = ScientificNameDirtyParser.new

output = ARGV[1] || 'parsed.json'
o = File.open(output,'w')

# parse a file with names
count = count2 = 0
names = []
last_result = nil
o.write("[\n")
IO.foreach(ARGV[0]) do |n|
  o.write(last_result + ",\n") if last_result
  name_dict = {}
  puts 'Parsing names' if count2 == 0
  count2 += 1
  p count2  if count2 % 5000 == 0
  n.strip!
  name_dict = {:input => n} 
  parsed = parser.parse(n) || dirty_parser.parse(n)
  unless parsed
    name_dict[:details] = {:parsed => false}
    last_result = JSON.generate name_dict
    puts n
    count += 1
  else
    begin
      name_dict[:output] = parsed.value
      name_dict[:canononical] = parsed.canonical
      name_dict[:details] = parsed.details
      name_dict[:parsed => true]
      last_result =  JSON.generate name_dict
    rescue
     puts 'PROBLEM: ' + n 
    end
  end
end
o.write(last_result + "\n") if last_result
o.write("]")
puts "Found #{count2} records, #{count} of them could not be parsed."

Version data entries

8 entries across 8 versions & 1 rubygems

Version Path
dimus-biodiversity-0.0.13 bin/nnparse
dimus-biodiversity-0.0.15 bin/nnparse
dimus-biodiversity-0.0.16 bin/nnparse
dimus-biodiversity-0.0.18 bin/nnparse
dimus-biodiversity-0.5.1 bin/nnparse
dimus-biodiversity-0.5.2 bin/nnparse
dimus-biodiversity-0.5.3 bin/nnparse
dimus-biodiversity-0.5.4 bin/nnparse