# Copyright: Copyright 2009 Topic Maps Lab, University of Leipzig. # License: Apache License, Version 2.0 # Ruby Topic Maps (RTM) http://rtm.rubyforge.org/ class RTM::RTMAR def from_xtm2(*args) RTM::AR::IO::FROMXTM2.from_xtm2(self, *args) end end module RTM::AR::IO # XTM2 Export. # Each Topic Maps Construct gets a to_xtm2 method. # The result is a REXML::Element except for TopicMap where it is a REXML::Document. module FROMXTM2 require 'rexml/document' require 'rexml/parsers/sax2parser' require 'rexml/sax2listener' #require 'jrexml' XTM2DEBUG = false # Reads XTM2 from source (io object). # Example: RTM::IO::FROMXTM2.from_xtm2(File.open(file_name),"http://rtm.rubyforge.org/topicmaps/tm1/") # supported options: # :strip_whitespace (defaults to false, may be set to true), # :deprefix (defaults to nil, may be set to a string (or regex) which will be removed from the beginning of an (unresolved) item_identifier if it is there. def self.from_xtm2(base_tms, source, base_locator, target=nil,options={}) tm = base_tms.create(base_locator) unless target list = XTM2Listener.new(base_locator, target || tm, options) parser = REXML::Parsers::SAX2Parser.new(source) parser.listen(list) parser.parse true end class XTM2Listener include REXML::SAX2Listener def initialize(base_locator, target,options={}) @base_locator = base_locator @target = target @targets = [] @path = [] @options=options end private def resolve(iri) warn("resolving nil iri can have unexpected results") unless iri # TODO handle %HH sequences, here or in backend/active_record.rb if @options[:deprefix] iri = iri.gsub(/^#{@options[:deprefix]}/,'') end @target.resolve(iri, @base_locator) end public def start_element(uri, name, qname, attrs) return unless uri == "http://www.topicmaps.org/xtm/" a = attrs #Hash[* attrs.flatten] #puts "Start of tag #{name} - attrs: #{a.inspect}" case name when "topic" @topic = @targets.last.topic_by_item_identifier! resolve(a["id"]) @targets.push @topic when "association" assoc = nil if a["reifier"] ref = @targets.last.topic_map.topic_by_item_identifier! resolve(a["reifier"]) raise "The topic reifiing this association already reifies something else: #{ref}." if ref.reified && !ref.reified.is_a?(RTM::Association) if ref.reified assoc = ref.reified else assoc = @targets.last.create_association assoc.reifier = ref end end assoc ||= @targets.last.create_association @targets.push assoc when "role" if a["reifier"] ref = @targets.last.topic_map.topic_by_item_identifier! resolve(a["reifier"]) raise "The topic reifiing this association role already reifies something else: #{ref}." if ref.reified && !ref.reified.is_a?(RTM::AssociationRole) if ref.reified role = ref.reified else role = @targets.last.create_role role.reifier = ref end end role ||= @targets.last.create_role @targets.push role when "name" if a["reifier"] ref = @targets.last.topic_map.topic_by_item_identifier! resolve(a["reifier"]) raise "The topic reifiing this topic name already reifies something else: #{ref}." if ref.reified && !ref.reified.is_a?(RTM::TopicName) if ref.reified tname = ref.reified else tname = @targets.last.create_name tname.reifier = ref end end tname ||= @targets.last.create_name @targets.push tname when "value" # this is handled in text() when "variant" if a["reifier"] ref = @targets.last.topic_map.topic_by_item_identifier! resolve(a["reifier"]) raise "The topic reifiing this topic variant already reifies something else: #{ref}." if ref.reified && !ref.reified.is_a?(RTM::Variant) if ref.reified variant = ref.reified else variant = @targets.last.create_variant variant.reifier = ref end end variant ||= @targets.last.create_variant @targets.push variant when "scope" # nothing to be done here :) when "instanceOf" # nothing to be done here :) when "type" # nothing to be done here :) when "occurrence" if a["reifier"] ref = @targets.last.topic_map.topic_by_item_identifier! resolve(a["reifier"]) raise "The topic reifiing this topic occurrence already reifies something else: #{ref}." if ref.reified && !ref.reified.is_a?(RTM::Occurrence) if ref.reified occur = ref.reified else occur = @targets.last.create_occurrence occur.reifier = ref end end occur ||= @targets.last.create_occurrence @targets.push occur when "resourceData" occur_or_variant = @targets.last if a["datatype"] occur_or_variant.datatype = a["datatype"] # special handling is done in tag_end else occur_or_variant.datatype = RTM::PSI[:String] end when "topicRef" case @path.last when "scope" @targets.last.scope << a["href"] when "instanceOf" assoc = @targets.last.topic_map.create_association :type => RTM::PSI[:type_instance] assoc.create_role @targets.last, RTM::PSI[:instance] assoc.create_role a["href"], RTM::PSI[:type] when "type" @targets.last.type = a["href"] when "role" @targets.last.player = a["href"] end when "resourceRef" occurrence = @targets.last occurrence.datatype = RTM::PSI[:IRI] occurrence.value = a["href"] when "subjectLocator" @targets.last.subject_locators << a["href"] when "subjectIdentifier" @targets.last.subject_identifiers << a["href"] when "itemIdentity" @targets.last.item_identifiers << resolve(a["href"]) when "topicMap" # check where we are raise "unexpected element topicMap" unless @path.empty? # check version raise "Sorry, only XTM 2.0 is supported" if a["version"] != "2.0" # reifier. if target topic map is already reified we have to merge, if not create topic if a["reifier"] if @target.reifier # TODO this might be no topic here. In this case something should be raised! @target.reifier.item_identifiers << resolve(a["reifier"]) else @target.topic_by_item_identifier! resolve(a["reifier"]) end end @targets.push @target when "mergeMap" # TODO mergeMap warn "mergeMap is not supported yet!" else warn "Unhandled element: #{name}" end @path.push name end # Called when the end tag is reached. In the case of , tag_end # will be called immidiately after tag_start # @p the name of the tag def end_element(uri, name, qname) return unless uri == "http://www.topicmaps.org/xtm/" #puts "end of tag: #{name}" old_name = @path.pop raise "Tag_end did not match: expected: #{old_name}, got: #{name}." unless old_name == name case name when "topicMap", "topic", "association", "role", "name", "variant", "occurrence" @targets.pop when "resourceData" occur = @targets.last # puts "finalizing occurrence #{occur.inspect}" if occur.datatype == RTM::PSI[:IRI] occur.value = resolve(occur.value) elsif occur.datatype == RTM::PSI[:anyType] # TODO Content must be canonicalized according to http://www.isotopicmaps.org/sam/sam-xtm/#sect-xml-canonicalization warn("Content canonicalization is not yet implemented!") end # puts "leaving finalizing occurrence" end # puts "after case" end # Called when text is encountered in the document # @p text the text content. def characters(text) if @options[:strip_whitespace] text.strip! return if text.empty? end case @path.last when "value", "resourceData" if @targets.last.value @targets.last.value += text else @targets.last.value = text end else puts "Found text but don't know that to do: #{text}" unless text.strip.empty? end end def cdata content if @options[:strip_whitespace] content.strip! return if content.empty? end case @path.last when "value", "resourceData" if @targets.last.value @targets.last.value += content else @targets.last.value = content end else puts "Found cdata but don't know that to do: #{content}" unless content.strip.empty? end end def xmldecl version, encoding, standalone warn "XML Version 1.0 expected. Not sure if we can handle this, but we will try." unless version == "1.0" warn "Be aware there is no encoding manipulation done, everything gets in as-is." if encoding # what about standalone? end end end end