Sha256: 60dea2e176273b109fa7b1b08b87916ba68f31e6a7cd83dcd0a0a1d99f48520d
Contents?: true
Size: 1.9 KB
Versions: 1
Compression:
Stored size: 1.9 KB
Contents
require 'rbbt/corpus/document' require 'rbbt/corpus/document_repo' class Corpus attr_accessor :corpora_path, :document_repo, :persistence_dir, :global_annotations def initialize(corpora_path = nil) @corpora_path = case when corpora_path.nil? Rbbt.corpora when (not Resource::Path === corpora_path) Resource::Path.path(corpora_path) else corpora_path end @document_repo = DocumentRepo.get @corpora_path.document_repo, false @persistence_dir = File.join(@corpora_path, "annotations") @global_annotations = TSV.new(TCHash.get(File.join(@persistence_dir, "global_annotations"), :list), :list, :key => "ID", :fields => [ "Start", "End", "Info","Document ID", "Entity Type"]) @global_annotations.unnamed = true end def persistence_for(docid) File.join(persistence_dir, docid) end def document(namespace, id, type, hash) docid = [namespace, id, type, hash] * ":" Document.new(persistence_for(docid), docid, @document_repo[docid], @global_annotations) end def docid(docid) Document.new(persistence_for(docid), docid, @document_repo[docid], @global_annotations) end def add_document(text, namespace, id, type = nil) hash = Digest::MD5.hexdigest(text) @document_repo.add(text, namespace, id, type, hash) end def find(namespace=nil, id = nil, type = nil, hash = nil) @document_repo.find(namespace, id, type, hash).collect{|docid| Document.new(persistence_for(docid), docid, @document_repo[docid], @global_annotations) } end def find_docid(docid) @document_repo.find_docid(docid).collect{|docid| Document.new(persistence_for(docid), docid, @document_repo[docid], @global_annotations) } end def exists?(namespace=nil, id = nil, type = nil, hash = nil) find(namespace, id, type, hash).any? end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
rbbt-text-0.5.0 | lib/rbbt/corpus/corpus.rb |