=begin mo.rb - A simple class for operating GNU MO file. Copyright (C) 2003-2006 Masao Mutoh Copyright (C) 2002 Masahiro Sakai, Masao Mutoh Copyright (C) 2001 Masahiro Sakai Masahiro Sakai Masao Mutoh You can redistribute this file and/or modify it under the same term of Ruby. License of Ruby is included with Ruby distribution in the file "README". $Id: mo.rb,v 1.4 2006/04/29 17:17:15 mutoh Exp $ =end require 'gettext/iconv' class MOFile < Hash #:nodoc: class InvalidFormat < RuntimeError; end; Header = Struct.new(:magic, :revision, :nstrings, :orig_table_offset, :translated_table_offset, :hash_table_size, :hash_table_offset) MAGIC_BIG_ENDIAN = "\x95\x04\x12\xde" MAGIC_LITTLE_ENDIAN = "\xde\x12\x04\x95" def self.open(arg = nil, output_charset = nil) result = self.new(output_charset) result.load(arg) end def initialize(output_charset = nil) @filename = nil @last_modified = Time.now @little_endian = true @output_charset = output_charset super() end def update! if FileTest.exist?(@filename) load (@filename) unless (@last_modified == File.ctime(@filename)) else puts "#{@filename} was lost." if $DEBUG clear end self end def load(arg) case arg when String begin @last_modified = File.ctime(arg) rescue Exception end load_from_file(arg) when IO load_from_stream(arg) end @filename = arg self end def load_from_stream(io) magic = io.read(4) case magic when MAGIC_BIG_ENDIAN @little_endian = false when MAGIC_LITTLE_ENDIAN @little_endian = true else raise InvalidFormat.new(sprintf("Unknown signature %s", magic.dump)) end header = Header.new(magic, *(io.read(4 * 6).unpack(@little_endian ? 'V6' : 'N6'))) raise InvalidFormat.new(sprintf("file format revision %d isn't supported", header.revision)) if header.revision > 0 io.pos = header.orig_table_offset orig_table_data = io.read((4 * 2) * header.nstrings).unpack(@little_endian ? 'V*' : 'N*') io.pos = header.translated_table_offset trans_table_data = io.read((4 * 2) * header.nstrings).unpack(@little_endian ? 'V*' : 'N*') original_strings = Array.new(header.nstrings) for i in 0...header.nstrings io.pos = orig_table_data[i * 2 + 1] original_strings[i] = io.read(orig_table_data[i * 2 + 0]) end clear for i in 0...header.nstrings io.pos = trans_table_data[i * 2 + 1] str = io.read(trans_table_data[i * 2 + 0]) if original_strings[i] == "" if str @charset = nil @nplurals = nil @plural = nil str.each_line{|line| if /^Content-Type:/i =~ line and /charset=((?:\w|-)+)/i =~ line @charset = $1 elsif /^Plural-Forms:\s*nplurals\s*\=\s*(\d*);\s*plural\s*\=\s*([^;]*)\n?/ =~ line @nplurals = $1 @plural = $2 end break if @charset and @nplurals } @nplurals = "1" unless @nplurals @plural = "0" unless @plural end else if @output_charset begin str = Iconv.iconv(@output_charset, @charset, str).join if @charset rescue Iconv::Failure if $DEBUG $stderr.print "@charset = ", @charset, "\n" $stderr.print "@output_charset = ", @output_charset, "\n" $stderr.print "msgid = ", original_strings[i], "\n" $stderr.print "msgstr = ", str, "\n" end end end end self[original_strings[i]] = str end self end # From gettext-0.12.1/gettext-tools/lib/hash.c def prime?(candidate) divn = 3 sq = divn * divn while (sq < candidate && candidate % divn != 0) divn += 1 sq += 4 * divn divn += 1 end candidate % divn != 0 end # From gettext-0.12.1/gettext-tools/lib/hash.c def next_prime(seed) seed |= 1 while (! prime?(seed)) seed += 2 end seed end # From gettext-0.12.1/gettext-runtime/intl/hash-string.h # Defines the so called `hashpjw' function by P.J. Weinberger # [see Aho/Sethi/Ullman, COMPILERS: Principles, Techniques and Tools, # 1986, 1987 Bell Telephone Laboratories, Inc.] HASHWORDBITS = 32 def hash_string(str) hval = 0 i = 0 str.each_byte do |b| break if b == '\0' hval <<= 4 hval += b.to_i g = hval & (0xf << (HASHWORDBITS - 4)) if (g != 0) hval ^= g >> (HASHWORDBITS - 8) hval ^= g end end hval end def save_to_stream(io) #Save data as little endian format. header_size = 4 * 7 table_size = 4 * 2 * size hash_table_size = next_prime((size * 4) / 3) hash_table_size = 3 if hash_table_size <= 2 header = Header.new( MAGIC_LITTLE_ENDIAN, # magic 0, # revision size, # nstrings header_size, # orig_table_offset header_size + table_size, # translated_table_offset hash_table_size, # hash_table_size header_size + table_size * 2 # hash_table_offset ) io.write(header.to_a.pack('a4V*')) ary = to_a ary.sort!{|a, b| a[0] <=> b[0]} # sort by original string pos = header.hash_table_size * 4 + header.hash_table_offset orig_table_data = Array.new() ary.each{|item, _| orig_table_data.push(item.size) orig_table_data.push(pos) pos += item.size + 1 # +1 is } io.write(orig_table_data.pack('V*')) trans_table_data = Array.new() ary.each{|_, item| trans_table_data.push(item.size) trans_table_data.push(pos) pos += item.size + 1 # +1 is } io.write(trans_table_data.pack('V*')) hash_tab = Array.new(hash_table_size) j = 0 ary[0...size].each {|key, _| hash_val = hash_string(key) idx = hash_val % hash_table_size if hash_tab[idx] != nil incr = 1 + (hash_val % (hash_table_size - 2)) begin if (idx >= hash_table_size - incr) idx -= hash_table_size - incr else idx += incr end end until (hash_tab[idx] == nil) end hash_tab[idx] = j + 1 j += 1 } hash_tab.collect!{|i| i ? i : 0} io.write(hash_tab.pack('V*')) ary.each{|item, _| io.write(item); io.write("\0") } ary.each{|_, item| io.write(item); io.write("\0") } self end def load_from_file(filename) File.open(filename, 'rb'){|f| load_from_stream(f)} end def save_to_file(filename) File.open(filename, 'wb'){|f| save_to_stream(f)} end def set_comment(msgid_or_sym, comment) #Do nothing end attr_accessor :little_endian, :path, :last_modified attr_reader :charset, :nplurals, :plural end # Test if $0 == __FILE__ if (ARGV.include? "-h") or (ARGV.include? "--help") STDERR.puts("mo.rb [filename.mo ...]") exit end ARGV.each{ |item| mo = MOFile.open(item) puts "------------------------------------------------------------------" puts "charset = \"#{mo.charset}\"" puts "nplurals = \"#{mo.nplurals}\"" puts "plural = \"#{mo.plural}\"" puts "------------------------------------------------------------------" mo.each do |key, value| puts "original message = \"#{key}\"" puts "translated message = \"#{value}\"" puts "--------------------------------------------------------------------" end } end