// Copyright 2009 The Closure Library Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS-IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. /** * @fileoverview The decompressor for Base88 compressed character lists. * * The compression is by base 88 encoding the delta between two adjacent * characters in ths list. The deltas can be positive or negative. Also, there * would be character ranges. These three types of values * are given enum values 0, 1 and 2 respectively. Initial 3 bits are used for * encoding the type and total length of the encoded value. Length enums 0, 1 * and 2 represents lengths 1, 2 and 4. So (value * 8 + type * 3 + length enum) * is encoded in base 88 by following characters for numbers from 0 to 87: * 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ (continued in next line) * abcdefghijklmnopqrstuvwxyz!#$%()*+,-.:;<=>?@[]^_`{|}~ * * Value uses 0 based counting. That is value for the range [a, b] is 0 and * that of [a, c] is 1. Simillarly, the delta of "ab" is 0. * * Following python script can be used to compress character lists taken * standard input: http://go/charlistcompressor.py * */ goog.provide('goog.i18n.CharListDecompressor'); goog.require('goog.array'); goog.require('goog.i18n.uChar'); /** * Class to decompress base88 compressed character list. * @constructor */ goog.i18n.CharListDecompressor = function() { this.buildCharMap_('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqr' + 'stuvwxyz!#$%()*+,-.:;<=>?@[]^_`{|}~'); }; /** * 1-1 mapping from ascii characters used in encoding to an integer in the * range 0 to 87. * @type {Object} * @private */ goog.i18n.CharListDecompressor.prototype.charMap_ = null; /** * Builds the map from ascii characters used for the base88 scheme to number * each character represents. * @param {string} str The string of characters used in base88 scheme. * @private */ goog.i18n.CharListDecompressor.prototype.buildCharMap_ = function(str) { if (!this.charMap_) { this.charMap_ = {}; for (var i = 0; i < str.length; i++) { this.charMap_[str.charAt(i)] = i; } } }; /** * Gets the number encoded in base88 scheme by a substring of given length * and placed at the a given position of the string. * @param {string} str String containing sequence of characters encoding a * number in base 88 scheme. * @param {number} start Starting position of substring encoding the number. * @param {number} leng Length of the substring encoding the number. * @return {number} The encoded number. * @private */ goog.i18n.CharListDecompressor.prototype.getCodeAt_ = function(str, start, leng) { var result = 0; for (var i = 0; i < leng; i++) { var c = this.charMap_[str.charAt(start + i)]; result += c * Math.pow(88, i); } return result; }; /** * Add character(s) specified by the value and type to given list and return * the next character in the sequence. * @param {Array.} list The list of characters to which the specified * characters are appended. * @param {number} lastcode The last codepoint that was added to the list. * @param {number} value The value component that representing the delta or * range. * @param {number} type The type component that representing whether the value * is a positive or negative delta or range. * @return {number} Last codepoint that is added to the list. * @private */ goog.i18n.CharListDecompressor.prototype.addChars_ = function(list, lastcode, value, type) { if (type == 0) { lastcode += value + 1; goog.array.extend(list, goog.i18n.uChar.fromCharCode(lastcode)); } else if (type == 1) { lastcode -= value + 1; goog.array.extend(list, goog.i18n.uChar.fromCharCode(lastcode)); } else if (type == 2) { for (var i = 0; i <= value; i++) { lastcode++; goog.array.extend(list, goog.i18n.uChar.fromCharCode(lastcode)); } } return lastcode; }; /** * Gets the list of characters specified in the given string by base 88 scheme. * @param {string} str The string encoding character list. * @return {Array.} The list of characters specified by the given string * in base 88 scheme. */ goog.i18n.CharListDecompressor.prototype.toCharList = function(str) { var metasize = 8; var result = []; var lastcode = 0; var i = 0; while (i < str.length) { var c = this.charMap_[str.charAt(i)]; var meta = c % metasize; var type = Math.floor(meta / 3); var leng = (meta % 3) + 1; if (leng == 3) { leng++; } var code = this.getCodeAt_(str, i, leng); var value = Math.floor(code / metasize); lastcode = this.addChars_(result, lastcode, value, type); i += leng; } return result; };