require 'java' include_class %w[SAXParser SAXParserFactory].map{|i| "javax.xml.parsers.#{i}"} module RTM::IO # XTM2 Import using JAXP module FROMXTM2JAXP XTM2DEBUG = false # Reads XTM2 from source (io object). # Example: RTM::IO::FROMXTM2JAXP.from_xtm2(File.open(file_name),"http://rtm.rubyforge.org/topicmaps/tm1/") # supported options: # :strip_whitespace (defaults to false, may be set to true), # :deprefix (defaults to nil, may be set to a string (or regex) which will be removed from the beginning of an (unresolved) item_identifier if it is there. def self.from_xtm2(source, base_locator, target=nil,options={}) tm = RTM.create(base_locator) unless target factory = javax.xml.parsers.SAXParserFactory.newInstance() # Specifies that the parser produced by this code will provide support for XML namespaces. factory.setNamespaceAware(true); # Specifies that the parser produced by this code will validate documents as they are parsed. factory.setValidating(true); parser = factory.newSAXParser(); internal_handler = FROMXTM2::XTM2Listener.new(base_locator, target || tm, options) #internal_handler = EmptyHandler.new handler = XML::JAXPSax2wrapper.new(internal_handler) parser.parse(source, handler) #true end end class EmptyHandler def start_document(*args) end def end_document(*args) end def start_element(*args) puts "start_element: #{args.inspect}" end def end_element(*args) puts "end_element: #{args.inspect}" end def characters(*args) puts "characters: #{args.inspect}" end end end module XML # Acts as Callback structure for the JAXP SAX Parser and calls # a REXML SAX2Listener API. include_class "org.xml.sax.helpers.DefaultHandler" class JAXPSax2wrapper < DefaultHandler def initialize(rexml_sax2listener) super() @dest = rexml_sax2listener end def startDocument @dest.start_document end def endDocument @dest.end_document end def startElement(namespaceURI, localName, qName, attr) # copy attributes into hash - this is NOT namespace aware! # comments for implementing namespace-awareness: # if qname==localname, namespace uri seems to be == "" # if uri is "" it should be expanded to the default ns which might be http://www.topicmaps.org/xtm/ if we are "main" # the following just takes all attributes and passes them on, with their qnames, # so it possibly won't work if attributes we need have a namespace declared, even if it is correct. attr2 = {} attr.length.times {|i| attr2[attr.qname(i)] = attr.value(i)} @dest.start_element(namespaceURI, localName, qName, attr2) end def characters(ch, start, length) s = java.lang.String.new(ch,start,length).to_s # hard work to get a ruby string out of a java char[] s.strip! return if s.size == 0 # we suppress whitespace-only characters. it's not 100% correct but... @dest.characters(s) end def endElement(namespaceURI, localName, qName) # puts "endElement: #{namespaceURI.inspect} #{localName.inspect} #{qName.inspect}" @dest.end_element(namespaceURI, localName, qName) end end end