Sha256: 51f7afb85bf31d5bd208c55110bdde39d8ca4651aef43c1db99d19b2f208d5d2

Contents?: true

Size: 1.23 KB

Versions: 61

Compression:

Stored size: 1.23 KB

Contents

#!/usr/bin/env ruby
# encoding: utf-8
ruby_min_version = RUBY_VERSION.split(".")[0..1].join('').to_i
if ruby_min_version < 19
  require 'rubygems'
  gem_name = 'biodiversity'
else
  gem_name = 'biodiversity19'
end
gem gem_name rescue nil

$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__) + "/../lib"))
require 'biodiversity'
require 'json'

def parser_error(name)
  {'scientificName' => {'parsed' => false, 'verbatim' => name,  'error' => 'Parser error'}}.to_json
end

if ARGV.empty?
  puts "Usage:\n\nnnparse file_with_scientific_names [output_file]\n\ndefault output_file is parsed.json\n\n"
  exit
end

input = ARGV[0]
output = ARGV[1] || 'parsed.json'


p = ScientificNameParser.new
o = open(output, 'w')
count = 0
puts 'Parsing...'
f = ruby_min_version < 19 ? open(input) : open(input, 'r:utf-8')
f.each do |line|
  count += 1
  puts("%s lines parsed" % count) if count % 10000 == 0
  name = line.gsub(/^[\d]*\s*/, '').strip
  begin
    if ruby_min_version < 19
      old_kcode = $KCODE
      $KCODE = 'NONE'
    end
    p.parse(name) 
    parsed_data = p.parsed.all_json rescue parser_error(name)
    if ruby_min_version < 19
      $KCODE = old_kcode
    end
  rescue 
    parsed_data = parser_error(name)
  end
  o.write parsed_data + "\n"
end

Version data entries

61 entries across 61 versions & 2 rubygems

Version Path
biodiversity-0.6.3 bin/nnparse