# -*- coding: utf-8 -*- # # Copyright (C) 2011 Miho SUZUKI # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . require 'groonga' require 'cgi' module Logaling class GlossaryDB VERSION = 1 def self.open(base_path, encoding, &blk) blk ? GlossaryDB.new.open(base_path, encoding, &blk) : GlossaryDB.new.open(base_path, encoding) end def initialize @database = nil end def open(base_path, encoding) reset_context(encoding) path = File.join(base_path, "logaling.db") if File.exist?(path) @database = Groonga::Database.open(path) else FileUtils.mkdir_p(base_path) populate(path) end if block_given? begin yield(self) ensure close unless closed? end end end def recreate_table version = Groonga["configurations"] ? get_config("version") : 0 if version.to_i != VERSION remove_schema populate_schema add_config("version", VERSION.to_s) end end def close @database.close @database = nil end def deindex_glossary(glossary_name, glossary_source) delete_translations_by_glossary_source(glossary_source) delete_glossary(glossary_name) delete_glossary_source(glossary_source) end def index_glossary(glossary, glossary_name, glossary_source, source_language, target_language, indexed_at) deindex_glossary(glossary_name, glossary_source) add_glossary_source(glossary_source, indexed_at) add_glossary(glossary_name) glossary.each do |term| source_term = term['source_term'] target_term = term['target_term'] note = term['note'] add_translation(glossary_name, glossary_source, source_language, target_language, source_term, target_term, note) end end def lookup(source_term, glossary_source=nil) records_selected = Groonga["translations"].select do |record| conditions = [record.source_term =~ source_term] if glossary_source conditions << (record.source_language =~ glossary_source.source_language) if glossary_source.source_language conditions << (record.target_language =~ glossary_source.target_language) if glossary_source.target_language end conditions end if glossary_source specified_glossary = records_selected.select do |record| record.glossary == glossary_source.glossary end specified_glossary.each do |record| record.key._score += 10 end end records = records_selected.sort([ {:key=>"_score", :order=>'descending'}, {:key=>"glossary", :order=>'ascending'}, {:key=>"source_term", :order=>'ascending'}, {:key=>"target_term", :order=>'ascending'}]) options = {:width => 100, :html_escape => true, :normalize => true} snippet = records_selected.expression.snippet(["", ""], options) struct_result(records, snippet) ensure snippet.close if snippet records_selected.expression.close if records_selected specified_glossary.expression.close if specified_glossary end def lookup_dictionary(search_word) records_selected_source = Groonga["translations"].select do |record| target = record.match_target do |match_record| match_record.source_term * 2 end target =~ search_word end completely_match = records_selected_source.select do |record| record.source_term == search_word end completely_match.each do |record| record.key._score += 10 end records_selected_target = Groonga["translations"].select do |record| record.target_term =~ search_word end records_selected = records_selected_target.union!(records_selected_source) records = records_selected.sort([ {:key=>"_score", :order=>'descending'}, {:key=>"source_term", :order=>'ascending'}, {:key=>"target_term", :order=>'ascending'}]) options = {:width => 100, :html_escape => true, :normalize => true} snippet = records_selected.expression.snippet(["", ""], options) struct_result(records, snippet) ensure snippet.close if snippet records_selected.expression.close if records_selected end def translation_list(glossary_source) records_raw = Groonga["translations"].select do |record| [ record.glossary == glossary_source.glossary, record.source_language == glossary_source.source_language, record.target_language == glossary_source.target_language ] end records = records_raw.sort([ {:key=>"source_term", :order=>'ascending'}, {:key=>"target_term", :order=>'ascending'}]) struct_result(records) ensure records_raw.expression.close end def get_bilingual_pair(source_term, target_term, glossary) records = Groonga["translations"].select do |record| [ record.glossary == glossary, record.source_term == source_term, record.target_term == target_term ] end struct_result(records) ensure records.expression.close end def get_bilingual_pair_with_note(source_term, target_term, note, glossary) records = Groonga["translations"].select do |record| [ record.glossary == glossary, record.source_term == source_term, record.target_term == target_term, record.note == note ] end struct_result(records) ensure records.expression.close end def glossary_source_exist?(glossary_source, indexed_at) glossary = Groonga["glossary_sources"].select do |record| [ record.key == glossary_source, record.indexed_at == indexed_at ] end !glossary.size.zero? ensure glossary.expression.close end def get_all_glossary_source Groonga["glossary_sources"].sort([ {:key=>"_key", :order=>'ascending'} ]).map{|record| record.key} end def get_all_glossary Groonga["glossaries"].sort([ {:key=>"_key", :order=>'ascending'} ]).map{|record| record.key} end private def delete_glossary_source(glossary_source) records = Groonga["glossary_sources"].select do |record| record.key == glossary_source end records.each do |record| record.key.delete end ensure records.expression.close end def add_glossary_source(glossary_source, indexed_at) Groonga["glossary_sources"].add(glossary_source, :indexed_at => indexed_at) end def delete_glossary(glossary_name) records = Groonga["glossaries"].select do |record| record.key == glossary_name end records.each do |record| record.key.delete end ensure records.expression.close end def add_glossary(glossary_name) Groonga["glossaries"].add(glossary_name) end def delete_translations_by_glossary_source(glossary_source) records = Groonga["translations"].select do |record| record.glossary_source == glossary_source end records.each do |record| record.key.delete end ensure records.expression.close end def add_translation(glossary_name, glossary_source, source_language, target_language, source_term, target_term, note) Groonga["translations"].add(:glossary => glossary_name, :glossary_source => glossary_source, :source_language => source_language, :target_language => target_language, :source_term => source_term, :target_term => target_term, :note => note, ) end def reset_context(encoding) Groonga::Context.default_options = {:encoding => encoding} Groonga::Context.default = nil end def populate(path) @database = Groonga::Database.create(:path => path) end def populate_schema Groonga::Schema.define do |schema| schema.create_table("configurations") do |table| table.short_text("conf_key") table.text("conf_value") end schema.create_table("glossary_sources", :type => :hash, :key_type => "ShortText") do |table| table.time("indexed_at") end schema.create_table("glossaries", :type => :hash, :key_type => "ShortText") do |table| end schema.create_table("translations") do |table| table.reference("glossary", "glossaries") table.reference("glossary_source", "glossary_sources") table.short_text("source_language") table.short_text("target_language") table.short_text("source_term") table.text("target_term") table.text("note") end schema.create_table("terms", :type => :patricia_trie, :key_type => "ShortText", :key_normalize => true, :default_tokenizer => "TokenBigram") do |table| table.index("translations.source_term") table.index("translations.target_term") end end end def remove_schema Groonga::Schema.define do |schema| schema.remove_table("configurations") if Groonga["configurations"] schema.remove_table("translations") if Groonga["translations"] schema.remove_table("glossaries") if Groonga["glossaries"] schema.remove_table("glossary_sources") if Groonga["glossary_sources"] schema.remove_table("terms") if Groonga["terms"] end end def closed? @database.nil? or @database.closed? end def struct_result(records, snippet=nil) records.map do |record| term = record.key snipped_source_term = snippet ? snip_source_term(term, snippet) : [] snipped_target_term = snippet ? snip_target_term(term, snippet) : [] {:glossary_name => term.glossary.key, :source_language => term.source_language, :target_language => term.target_language, :source_term => term.source_term, :snipped_source_term => snipped_source_term, :target_term => term.target_term, :snipped_target_term => snipped_target_term, :note => term.note || ''} end end def struct_snipped_text(snipped_text) return [] if snipped_text.empty? word_list = snipped_text.split(/([^<]*<\/snippet>)/) structed_source_term = word_list.map{|word| replaced_word = word.sub(/([^<]*)<\/snippet>/){|match| $1} if replaced_word == word CGI.unescapeHTML(word) else {:keyword => CGI.unescapeHTML(replaced_word)} end } structed_source_term end def snip_source_term(term, snippet) snipped_text = snippet.execute(term.source_term).join struct_snipped_text(snipped_text) end def snip_target_term(term, snippet) snipped_text = snippet.execute(term.target_term).join struct_snipped_text(snipped_text) end def get_config(conf_key) records = Groonga["configurations"].select do |record| record.conf_key == conf_key end value = records.map do |record| config = record.key config.conf_value end value.size > 0 ? value[0] : "" ensure records.expression.close end def add_config(conf_key, conf_value) Groonga["configurations"].add(:conf_key => conf_key, :conf_value => conf_value) end end end