#!/usr/bin/env ruby -w # encoding: UTF-8 #-- # # Simple Declarative Language (SDL) for Ruby # Copyright 2005 Ikayzo, inc. # # This program is free software. You can distribute or modify it under the # terms of the GNU Lesser General Public License version 2.1 as published by # the Free Software Foundation. # # This program is distributed AS IS and WITHOUT WARRANTY. OF ANY KIND, # INCLUDING MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. # See the GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, contact the Free Software Foundation, Inc., # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. #++ module SDL4R require 'csv' require 'thread' if __FILE__ == $PROGRAM_NAME require 'rubygems' require 'tzinfo' require 'ostruct' end require 'sdl4r/constant_timezone' # Gathers an index database of time zone abbreviations (e.g. "PST", "JST"). # # For each abbreviation, there are 3 basic cases: # - no ambiguity: the abbreviation is always used with the same offsets (even if in different # countries). # - time ambiguity: the abbreviation has been used in the same places but with different offsets # at different times. In this case, we base the timezone on one of the geographical time zones. # - modern time ambiguity: the abbreviation has been used in different places but only before # 1970. It's been stable since. As it is the case of widely used abbreviations like "CET", a # modernly-used geographical timezone should be used as for the "time ambiguity" case, at least # by default. # - location ambiguity: the abbreviation has been used in different places. In this case, there # is no way to tell, which place is the right one and therefore an error should be raised. # # Note that 'utc_offset' and 'std_offset' of Record are meaningful only if the abbreviation is # not ambiguous. # class TZAbbreviationDB # :nodoc: class Record attr_reader :identifier, :utc_offset, :std_offset, :use attr_accessor :linked_zone_ids # :not_ambiguous # :time_ambiguous # :modern_time_ambiguous # :too_ambiguous # attr_accessor :annotation def initialize(identifier, utc_offset, std_offset, annotation, use, linked_zone_ids) @identifier = identifier @utc_offset = utc_offset @std_offset = std_offset @annotation = annotation @use = use @linked_zone_ids = linked_zone_ids end end DB_FILENAME = File.dirname(__FILE__) + "/tz_abbreviation_db.csv" @@index = nil @@index_mutex = Mutex.new def self.get_timezone(identifier, consider_modern_abbreviations = true) tz = nil begin tz = TZInfo::Timezone.get(identifier) rescue TZInfo::InvalidTimezoneIdentifier => error1 # check whether we can find an abbreviation abbreviation = TZAbbreviationDB.get_record(identifier) if abbreviation if abbreviation.annotation == :not_ambiguous tz = ConstantTimezone.new( abbreviation.identifier, abbreviation.utc_offset, abbreviation.std_offset) elsif abbreviation.annotation == :time_ambiguous or (abbreviation.annotation == :modern_time_ambiguous and consider_modern_abbreviations) begin base_tz = TZInfo::Timezone.get(abbreviation.linked_zone_ids[0]) tz = RelativeTimezone.new(abbreviation.identifier, "", 0, base_tz) rescue TZInfo::InvalidTimezoneIdentifier tz = nil end end end raise error1 if tz.nil? end tz end def self.get_timezone_proxy(identifier, consider_modern_abbreviations = true) return AbbreviationTimezoneProxy.new(identifier, consider_modern_abbreviations) end # Returns the Record corresponding to the specified identifier or nil if not found. # Be sure to check the 'annotation' property before using the data of the record. # def self.get_record(identifier) @@index_mutex.synchronize do load_file unless @@index end return @@index[identifier] end # Loads the CSV file (#DB_FILENAME) into memory. # def self.load_file index = {} CSV.foreach(DB_FILENAME) do |row| record = Record.new( row[0], row[1].to_i, row[2].to_i, row[3].to_sym, row[4].to_sym, row[5..-1].sort!) index[record.identifier] = record end @@index = index end def self.clean_raw_record_index(raw_index) index = {} # Clean up and annotate the ambiguous cases raw_index.each_pair { |identifier, records| if records.length == 1 record = records[0] record.annotation = :not_ambiguous else # check whether locations differ record = create_ambiguous_record(records, nil, :time_ambiguous) if record.annotation == :too_ambiguous salvaged_record = create_ambiguous_record(records, :modern, :modern_time_ambiguous) record = salvaged_record if salvaged_record end end index[record.identifier] = record } return index end # Check the locations of the records and create a corresonding record annotated as ambiguous # according to its level: either 'default_annotation' or :too_ambiguous if the locations where # the abbreviation is used differ along time (for the given 'use'). # # _use_:: indicates the only kind of use considered or nil for all of them # _default_annotation_:: annotation to set if locations do not differ # def self.create_ambiguous_record(records, use, default_annotation) records = records.reject { |item| item.use != use } if use return nil if records.empty? record = records[0] record.annotation = default_annotation locations = nil records.each { |item| if locations record.annotation = :too_ambiguous if locations != item.linked_zone_ids else locations = item.linked_zone_ids end } linked_zone_ids = {} records.each { |item| item.linked_zone_ids.each { |loc| linked_zone_ids[loc] = nil } } record.linked_zone_ids = linked_zone_ids.keys.sort return record end # Creates a CSV index file of abbreviations and their corresponding offsets and unambiguous # corresponding zones. # # This method relies on unpublished internals of TZInfo. Therefore, it might easily break in # the future. # def self.generate_file abbreviation_index = {} TZInfo::Timezone.all_data_zone_identifiers.each { |tz_id| tz = TZInfo::Timezone.get(tz_id) info = tz.instance_variable_get(:@info) offsets = info.instance_variable_get(:@offsets) transitions = info.instance_variable_get(:@transitions) if offsets offsets.each_value { |offset| if tz.identifier != offset.abbreviation.id2name offset_key = [offset.abbreviation.id2name, offset.utc_offset, offset.std_offset] abbreviation_index[offset_key] ||= OpenStruct.new(:timezones => [], :use => :historical) offset_record = abbreviation_index[offset_key] unless offset_record.timezones.include? tz.identifier offset_record.timezones << tz.identifier end # Find the last use of that offset previous_transition = nil (transitions.length - 1).downto(0) { |i| transition = transitions[i] if offset == transition.offset if previous_transition.nil? or previous_transition.at.year >= 1970 offset_record.use = :modern end break end previous_transition = transition } end } end } # Create a raw index of Records keyed by identifiers raw_record_index = {} abbreviation_index.each_pair { |offset, item| record = Record.new(offset[0], offset[1], offset[2], nil, item.use, item.timezones) raw_record_index[record.identifier] ||= [] raw_record_index[record.identifier] << record } record_index = clean_raw_record_index(raw_record_index) sorted = record_index.sort { |a, b| a[0] <=> b[0] } CSV::open(DB_FILENAME, "w") do |writer| sorted.each { |offset, record| writer << [ record.identifier, record.utc_offset, record.std_offset, record.annotation, record.use] + record.linked_zone_ids } end end end if __FILE__ == $PROGRAM_NAME TZAbbreviationDB.generate_file end end