Sha256: 3727e759f5a3f6808d8d4b90a47d516496292eaa62891b9153fb0539c621d372

Contents?: true

Size: 1.56 KB

Versions: 2

Compression:

Stored size: 1.56 KB

Contents

require 'rbbt-util'

module Document::Corpus

  def self.setup(corpus)
    corpus = Persist.open_tokyocabinet(corpus, true, :single, "BDB") if String === corpus
    corpus.extend Document::Corpus unless Document::Corpus === corpus
    corpus.extend Persist::TSVAdapter unless Persist::TSVAdapter === corpus
    corpus.close
    corpus
  end

  def add_document(document)
    docid = document.docid
    return self[docid] if self.include?(docid)
    self.write_and_close do
      self[docid] = document
    end
  end
  
  def docids(*prefix)
    prefix = prefix * ":" 
    prefix += ":" unless prefix == :all || prefix == "all" || prefix[-1] == ":"
    docids = self.read_and_close do
      prefix == "all" ? self.keys : self.prefix(prefix)
    end
    DocID.setup(docids, :corpus => self)
  end

  def documents(*prefix)
    self.docids(*prefix).document
  end

  def [](*args)
    docid, *rest = args

    res = self.with_read do
      super(*args)
    end
    
    res.force_encoding(Encoding.default_external) if res 
    return res if args.length > 1

    namespace, id, type  = docid.split(":")

    if res.nil?
      if Document::Corpus.claims && Document::Corpus.claims.include?(namespace.to_s)
        res = self.instance_exec(id, type, &Document::Corpus.claims[namespace.to_s])
      end
    end

    res.force_encoding(Encoding.default_external) if res 
    Document.setup(res, namespace, id, type, self) unless res.nil?
    
    res
  end

  class << self
    attr_accessor :claims
    def claim(namespace, &block)
      @claims = {}
      @claims[namespace.to_s] = block
    end
  end

end

Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
rbbt-text-1.3.8 lib/rbbt/document/corpus.rb
rbbt-text-1.3.7 lib/rbbt/document/corpus.rb