require_relative 'term' module Opener module KAF class Document attr_reader :document attr_reader :lexicons attr_accessor :map def initialize xml @document = xml end def self.from_xml xml new Nokogiri::XML xml end def language @language ||= @document.at_xpath('KAF').attr 'xml:lang' end def terms @terms ||= collection 'KAF/terms/term', Term end def texts @texts ||= collection 'KAF/texts/wf', Text end def raw @document.at('raw').text end def add_linguistic_processor name, version, layer, timestamp: false header = @document.at('kafHeader') || @document.root.add_child('').first procs = header.css('linguisticProcessors').find{ |l| l.attr(:layer) == layer } procs ||= header.add_child("").first lp = procs.add_child('') lp.attr( timestamp: if timestamp then Time.now.iso8601 else '*' end, version: version, name: name, ) lp end def add_word_form params text = @document.at('text') || @document.root.add_child('').first wf = text.add_child("#{params[:text]}") attrs = { wid: "w#{params[:wid]}", sent: params[:sid], para: params[:para], offset: params[:offset], length: params[:length], } wf.attr attrs end def add_term params text = @document.at('terms') || @document.root.add_child('').first term = text.add_child("") attrs = { tid: "t#{params[:tid]}", type: params[:type], lemma: params[:lemma], text: params[:text], pos: params[:pos], morphofeat: params[:morphofeat], } term.attr attrs term.first.add_child("") end def to_xml @document.to_xml indent: 2 end protected def collection query, wrapper @document.xpath(query).map{ |node| wrapper.new self, node } end end end end