require_relative 'term'
module Opener
module KAF
class Document
attr_reader :document
attr_reader :lexicons
attr_accessor :map
def initialize xml
@document = xml
end
def self.from_xml xml
new Nokogiri::XML xml
end
def language
@language ||= @document.at_xpath('KAF').attr 'xml:lang'
end
def terms
@terms ||= collection 'KAF/terms/term', Term
end
def texts
@texts ||= collection 'KAF/texts/wf', Text
end
def raw
@document.at('raw').text
end
def add_linguistic_processor name, version, layer, timestamp: false
header = @document.at('kafHeader') || @document.root.add_child('').first
procs = header.css('linguisticProcessors').find{ |l| l.attr(:layer) == layer }
procs ||= header.add_child("").first
lp = procs.add_child('')
lp.attr(
timestamp: if timestamp then Time.now.iso8601 else '*' end,
version: version,
name: name,
)
lp
end
def add_word_form params
text = @document.at('text') || @document.root.add_child('').first
wf = text.add_child("#{params[:text]}")
attrs = {
wid: "w#{params[:wid]}",
sent: params[:sid],
para: params[:para],
offset: params[:offset],
length: params[:length],
}
wf.attr attrs
end
def add_term params
text = @document.at('terms') || @document.root.add_child('').first
term = text.add_child("")
attrs = {
tid: "t#{params[:tid]}",
type: params[:type],
lemma: params[:lemma],
text: params[:text],
pos: params[:pos],
morphofeat: params[:morphofeat],
}
term.attr attrs
term.first.add_child("")
end
def to_xml
@document.to_xml indent: 2
end
protected
def collection query, wrapper
@document.xpath(query).map{ |node| wrapper.new self, node }
end
end
end
end