Sha256: 688bdcaaa578ff43d4a37f7f8738fba50d7925c77114154d34319d0853d8821d

Contents?: true

Size: 1.68 KB

Versions: 2

Compression:

Stored size: 1.68 KB

Contents

require 'rbbt/util/misc'
require 'tokyocabinet'

class Corpus
  module DocumentRepo
    class OpenError < StandardError;end
    class KeyFormatError < StandardError;end

    TC_CONNECTIONS = {}
    def self.open_tokyocabinet(path, write)
      database = Persist.open_tokyocabinet(path, write, :single, TokyoCabinet::BDB)
      database.extend DocumentRepo
      database
    end

    def docid2fields(docid)
      docid.split(":", -1).values_at 0,1,2,3
    end

    def fields2docid(namespace = nil, id = nil, type = nil, hash = nil)
      [namespace, id, type, hash] * ":"
    end

    def docid(docid)
      get(docid)
    end

    def add(text, namespace, id, type, hash)
      docid = fields2docid(namespace, id, type, hash)

      return docid if self.include?(docid)

      write_and_close do
        self[docid] = text
      end

      docid
    end

    def find(namespace=nil, id = nil, type = nil, hash = nil)
      case
      when namespace.nil?
        self.keys
      when id.nil?
        range_start = [namespace] * ":" + ':'
        range_end   = [namespace] * ":" + ';'
        self.range(range_start, true, range_end, false)
      when (type and hash)
        [[namespace, id, type, hash] * ":"]
      when hash
        [[namespace, id, "", hash] * ":"]
      when type
        range_start = [namespace, id, type] * ":" + ':'
        range_end   = [namespace, id, type] * ":" + ';'
        self.range(range_start, true, range_end, false)
      else
        range_start = [namespace, id] * ":" + ':'
        range_end   = [namespace, id] * ":" + ';'
        self.range(range_start, true, range_end, false)
      end
    end

    def find_docid(docid)
      find(*docid2fields(docid))
    end

  end
end

Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
rbbt-text-1.2.0 lib/rbbt/text/corpus/document_repo.rb
rbbt-text-1.1.9 lib/rbbt/text/corpus/document_repo.rb