# encoding: UTF-8 # Copyright 2012 Twitter, Inc # http://www.apache.org/licenses/LICENSE-2.0 require 'spec_helper' include TwitterCldr::Tokenizers describe DateTokenizer do describe "#tokens" do it "should tokenize plaintext segments correctly (i.e. Spanish)" do data_reader = TwitterCldr::DataReaders::DateDataReader.new(:es, :type => :full) got = data_reader.tokenizer.tokenize(data_reader.pattern) expected = [ { :value => "EEEE", :type => :pattern }, { :value => ", ", :type => :plaintext }, { :value => "d", :type => :pattern }, { :value => " 'de' ", :type => :plaintext }, { :value => "MMMM", :type => :pattern }, { :value => " 'de' ", :type => :plaintext }, { :value => "y", :type => :pattern } ] check_token_list(got, expected) end it "should tokenize patterns with non-latin characters correctly (i.e. Japanese)" do data_reader = TwitterCldr::DataReaders::DateDataReader.new(:ja, :type => :full) got = data_reader.tokenizer.tokenize(data_reader.pattern) expected = [ { :value => "y", :type => :pattern }, { :value => "年", :type => :plaintext }, { :value => "M", :type => :pattern }, { :value => "月", :type => :plaintext }, { :value => "d", :type => :pattern }, { :value => "日", :type => :plaintext }, { :value => "EEEE", :type => :pattern } ] check_token_list(got, expected) end it "should tokenize composites correctly" do # Ensure that buddhist calendar data is present in th locale. expect(TwitterCldr.get_locale_resource(:th, :calendars)[:th][:calendars][:buddhist]).not_to( be_nil, 'buddhist calendar is missing for :th locale (check resources/locales/th/calendars.yml)' ) data_reader = TwitterCldr::DataReaders::DateDataReader.new(:th, :type => :long, :calendar_type => :buddhist) got = data_reader.tokenizer.tokenize(data_reader.pattern) expected = [ { :value => "d", :type => :pattern }, { :value => " ", :type => :plaintext }, { :value => "MMMM", :type => :pattern }, { :value => " พ.ศ. ", :type => :plaintext }, { :to_s => "y + 543", :type => :composite } ] check_token_list(got, expected) end end end