test/cases/gigo_test.rb in gigo-1.3.0 vs test/cases/gigo_test.rb in gigo-1.4.0

- old
+ new

@@ -9,11 +9,15 @@ let(:data_bad_readin) { "�20 � �Woohoo�" } let(:data_cp1252) { data_utf8.encode('CP1252') } let(:data_bin_apos) { "won\x92t".force_encoding('binary') } let(:data_really_bad) { "ed.Ã\u0083Ã\u0083\xC3" } + let(:data_medico_utf8) { "Med\u00EDco".force_encoding('UTF-8') } + let(:data_medico_iso88591) { "Med\xEDco".force_encoding('iso8859-1') } + let(:data_medico_unknown) { "Med\uFFFDco".force_encoding('UTF-8') } + describe '.encoding' do it 'defaults to UTF-8 encoding' do GIGO.encoding.must_equal Encoding::UTF_8 end @@ -40,23 +44,27 @@ it 'fixes windows apostrophe' do GIGO.load(data_bin_apos).must_equal "won’t" end - it 'should allows properly encoded and marked strings to be passed thru' do + it 'should allow properly encoded and marked strings to be passed thru' do GIGO.load(data_utf8).must_equal data_utf8 GIGO.load(data_utf8_emoji).must_equal data_utf8_emoji end it 'allows data already read in with question marks to pass thru' do GIGO.load(data_bad_readin).must_equal data_bad_readin end it 'allows really bad data to be encoded using default replace and question marks' do - GIGO.load(data_utf8_emoji.force_encoding('ASCII-8BIT')).must_equal data_utf8_emoji + GIGO.load(data_medico_unknown).must_equal "Med�co" end + it 'makes sure UTF-8 data read in as US-ASCII us fixed' do + GIGO.load(data_medico_utf8.force_encoding('US-ASCII')).must_equal 'Medíco' + end + it 'converts windows codepages that are poorly marked as another encoding' do db_data1 = data_cp1252.dup.force_encoding('ASCII-8BIT') GIGO.load(db_data1).must_equal data_utf8 db_data2 = data_cp1252.dup.force_encoding('US-ASCII') GIGO.load(db_data2).must_equal data_utf8 @@ -64,10 +72,15 @@ GIGO.load(db_data3).must_equal data_utf8 db_data4 = data_cp1252.dup GIGO.load(db_data4).must_equal data_utf8 end - it 'can make sure to it is really a valid encoding afteward' do + it 'converts iso8859 when poorly marked as another encoding' do + GIGO.load(data_medico_iso88591).must_equal 'Medíco' + GIGO.load(data_medico_iso88591.force_encoding('US-ASCII')).must_equal 'Medíco' + end + + it 'can make sure to it is really a valid encoding afterward' do html_escape GIGO.load(data_really_bad) end end