#!/usr/bin/env ruby
# encoding: UTF-8
require File.expand_path(File.dirname(__FILE__) + '/../config/boot')
require 'druid-tools'
require 'optparse'
require 'json'
@@g = GeoHydra::Gazetteer.new
#
# Resolves placenames using local gazetteer
#
# * Changes subject/geographic with GeoNames as authority to have the correct valueURI
# * Adds correct rdf:resource to geo extension
# * Adds a LCSH or LCNAF keyword if needed
#
def resolve_placenames(modsFn, flags)
puts "Processing #{modsFn}" if flags[:verbose]
mods = Nokogiri::XML(File.open(modsFn, 'rb'))
r = mods.xpath('//mods:geographic', { 'mods' => 'http://www.loc.gov/mods/v3' })
r.each do |i|
ap({:i => i}) if flags[:debug]
k = i.content
# Verify Gazetteer keyword
uri = @@g.find_uri_by_keyword(k)
if uri.nil?
puts "WARNING: Missing gazetteer entry for '#{k}'" if flags[:verbose]
next
end
# Ensure correct valueURI for subject/geographic for GeoNames
i['valueURI'] = uri
i['authority'] = 'geonames'
i['authorityURI'] = 'http://www.geonames.org/ontology#'
# Correct any linkages for placenames in the geo extension
coverages = mods.xpath('//mods:extension//dc:coverage', { 'mods' => 'http://www.loc.gov/mods/v3', 'dc' => 'http://purl.org/dc/elements/1.1/' })
coverages.each do |j|
if j['dc:title'] == k
puts "Correcting dc:coverage@rdf:resource for #{k}" if flags[:debug]
j['rdf:resource'] = uri
end
end
# Add a LC heading if needed
lc = @@g.find_lc_by_keyword(k)
ap({:lc => lc}) if flags[:debug]
unless lc.nil? or k == lc
puts "Adding Library of Congress entry to end of MODS record" if flags[:verbose]
lcauth = @@g.find_lcauth_by_keyword(k)
unless lcauth.nil?
lcuri = @@g.find_lcuri_by_keyword(k)
unless lcuri.nil?
lcuri = " valueURI='#{lcuri}'"
end
i.parent.parent << Nokogiri::XML("
#{lc}
").root
end
end
ap({:i => i}) if flags[:debug]
end
# Save XML tree
mods.write_to(File.open(modsFn, 'wb'), :encoding => 'UTF-8', :indent => 2)
end
def main(flags)
File.umask(002)
puts "Searching for MODS records..." if flags[:verbose]
Dir.glob(flags[:workspacedir] + '/**/' + DruidTools::Druid.glob + '/metadata/descMetadata.xml') do |modsFn|
resolve_placenames(modsFn, flags)
end
end
# __MAIN__
begin
flags = {
:debug => false,
:verbose => false,
:workspacedir => GeoHydra::Config.geohydra.workspace || 'workspace'
}
OptionParser.new do |opts|
opts.banner = < flags}) if flags[:debug]
main flags
rescue SystemCallError => e
$stderr.puts "ERROR: #{e.message}"
exit(-1)
end