spec/name_spec.rb in icu_name-0.0.4 vs spec/name_spec.rb in icu_name-0.0.5
- old
+ new
@@ -56,49 +56,95 @@
end
it "should canconicalise last names" do
Name.new('John', 'O Reilly').last.should == "O'Reilly"
Name.new('dave', 'mcmanus').last.should == "McManus"
- Name.new('pete', 'MACMANUS').last.should == "MacManus"
+ Name.new('pete', 'MACMANUS').last.should == "Macmanus"
end
+
+ it "characters and encoding" do
+ josef = ICU::Name.new('Józef', 'Żabiński')
+ josef.name.should == "Józef Abiski"
+ bu = ICU::Name.new('Bǔ Xiángzhì')
+ bu.name.should == "B. Xiángzhì"
+ eric = ICU::Name.new('éric', 'PRIÉ')
+ eric.rname.should == "Prié, Éric"
+ eric.rname.encoding.name.should == "UTF-8"
+ eric = ICU::Name.new('éric'.encode("ISO-8859-1"), 'PRIÉ'.force_encoding("ASCII-8BIT"))
+ eric.rname.should == "Prié, Éric"
+ eric.rname.encoding.name.should == "UTF-8"
+ eric.name(:ascii => true).should == "Eric Prie"
+ eric_ascii = ICU::Name.new('éric', 'PRIÉ', :ascii => true)
+ eric_ascii.name.should == "Eric Prie"
+ eric.match('Éric', 'Prié').should be_true
+ eric.match('Eric', 'Prie').should be_false
+ eric.match('Eric', 'Prie', :ascii => true).should be_true
+ end
end
context "names that are already canonical" do
it "should not be altered" do
Name.new('Mark J. L.', 'Orr').name.should == 'Mark J. L. Orr'
Name.new('Anna-Marie J.-K.', 'Liviu-Dieter').name.should == 'Anna-Marie J.-K. Liviu-Dieter'
+ Name.new('Èric Cantona').name.should == 'Èric Cantona'
end
end
- context "last names beginning with a single letter followed by a quote" do
+ context "last names involving a quote" do
it "should be handled correctly" do
Name.new('una', "O'boyle").name.should == "Una O'Boyle"
Name.new('jonathan', 'd`arcy').name.should == "Jonathan D'Arcy"
Name.new('erwin e', "L'AMI").name.should == "Erwin E. L'Ami"
Name.new('cormac', "o brien").name.should == "Cormac O'Brien"
+ Name.new('türko', "o özgür").name.should == "Türko O'Özgür"
+ Name.new('türko', "l`özgür").name.should == "Türko L'Özgür"
end
end
- context "last beginning with Mc" do
+ context "last beginning with Mc or Mac" do
it "should be handled correctly" do
Name.new('shane', "mccabe").name.should == "Shane McCabe"
- Name.new('shawn', "macDonagh").name.should == "Shawn MacDonagh"
Name.new('shawn', "macdonagh").name.should == "Shawn Macdonagh"
Name.new('bartlomiej', "macieja").name.should == "Bartlomiej Macieja"
+ Name.new('türko', "mcözgür").name.should == "Türko McÖzgür"
+ Name.new('TÜRKO', "MACÖZGÜR").name.should == "Türko Macözgür"
end
end
+ context "first name initials" do
+ it "should be handled correctly" do
+ Name.new('m j l', 'Orr').first.should == 'M. J. L.'
+ Name.new('Ö. é m', 'Panno').first.should == "Ö. É. M."
+ end
+ end
+
context "doubled barrelled names or initials" do
it "should be handled correctly" do
Name.new('anna-marie', 'den-otter').name.should == 'Anna-Marie Den-Otter'
Name.new('j-k', 'rowling').name.should == 'J.-K. Rowling'
Name.new("mark j. - l", 'ORR').name.should == 'Mark J.-L. Orr'
Name.new('JOHANNA', "lowry-o'REILLY").name.should == "Johanna Lowry-O'Reilly"
Name.new('hannah', "lowry - o reilly").name.should == "Hannah Lowry-O'Reilly"
+ Name.new('hannah', "lowry - o reilly").name.should == "Hannah Lowry-O'Reilly"
+ Name.new('ètienne', "gèrard - mcözgür").name.should == "Ètienne Gèrard-McÖzgür"
end
end
+ context "accented characters and capitalisation" do
+ it "should downcase upper case accented characters where appropriate" do
+ name = Name.new('GEARÓIDÍN', 'UÍ LAIGHLÉIS')
+ name.first.should == 'Gearóidín'
+ name.last.should == 'Uí Laighléis'
+ end
+
+ it "should upcase upper case accented characters where appropriate" do
+ name = Name.new('èric özgür')
+ name.first.should == 'Èric'
+ name.last.should == 'Özgür'
+ end
+ end
+
context "extraneous white space" do
it "should be handled correctly" do
Name.new(' mark j l ', " \t\r\n orr \n").name.should == 'Mark J. L. Orr'
end
end
@@ -108,35 +154,113 @@
Name.new('. mark j..l', 'orr.').name.should == 'Mark J. L. Orr'
end
end
context "construction from a single string" do
- before(:each) do
- @mark1 = Name.new('ORR, mark j l')
- @mark2 = Name.new('MARK J L ORR')
- @oreil = Name.new("O'Reilly, j-k")
- end
-
it "should be possible in simple cases" do
- @mark1.first.should == 'Mark J. L.'
- @mark1.last.should == 'Orr'
- @mark2.first.should == 'Mark J. L.'
- @mark2.last.should == 'Orr'
- @oreil.name.should == "J.-K. O'Reilly"
+ Name.new('ORR, mark j l').rname.should == 'Orr, Mark J. L.'
+ Name.new('MARK J L ORR').rname.should == 'Orr, Mark J. L.'
+ Name.new("j-k O'Reilly").rname.should == "O'Reilly, J.-K."
+ Name.new("j-k O Reilly").rname.should == "O'Reilly, J.-K."
+ Name.new('ètienne o o özgür').name.should == "Ètienne O. O'Özgür"
end
end
context "construction from an instance" do
it "should be possible" do
Name.new(Name.new('ORR, mark j l')).name.should == 'Mark J. L. Orr'
end
end
+ context "encoding" do
+ before(:each) do
+ @first = 'Gearóidín'
+ @last = 'Uí Laighléis'
+ end
+
+ it "should handle UTF-8" do
+ name = Name.new(@first, @last)
+ name.first.should == @first
+ name.last.should == @last
+ name.first.encoding.name.should == "UTF-8"
+ name.last.encoding.name.should == "UTF-8"
+ end
+
+ it "should handle ISO-8859-1" do
+ name = Name.new(@first.encode("ISO-8859-1"), @last.encode("ISO-8859-1"))
+ name.first.should == @first
+ name.last.should == @last
+ name.first.encoding.name.should == "UTF-8"
+ name.last.encoding.name.should == "UTF-8"
+ end
+
+ it "should handle Windows-1252" do
+ name = Name.new(@first.encode("Windows-1252"), @last.encode("Windows-1252"))
+ name.first.should == @first
+ name.last.should == @last
+ name.first.encoding.name.should == "UTF-8"
+ name.last.encoding.name.should == "UTF-8"
+ end
+
+ it "should handle ASCII-8BIT" do
+ name = Name.new(@first.dup.force_encoding('ASCII-8BIT'), @last.dup.force_encoding('ASCII-8BIT'))
+ name.first.should == @first
+ name.last.should == @last
+ name.first.encoding.name.should == "UTF-8"
+ name.last.encoding.name.should == "UTF-8"
+ end
+
+ it "should handle US-ASCII" do
+ @first = 'Gearoidin'
+ @last = 'Ui Laighleis'
+ name = Name.new(@first.encode("US-ASCII"), @last.encode("US-ASCII"))
+ name.first.should == @first
+ name.last.should == @last
+ name.first.encoding.name.should == "UTF-8"
+ name.last.encoding.name.should == "UTF-8"
+ end
+ end
+
+ context "transliteration" do
+ before(:all) do
+ @opt = { :ascii => true }
+ end
+
+ it "should be a no-op for names that already ASCII" do
+ name = Name.new('Mark J. L.', 'Orr')
+ name.first(@opt).should == 'Mark J. L.'
+ name.last(@opt).should == 'Orr'
+ name.name(@opt).should == 'Mark J. L. Orr'
+ name.rname(@opt).should == 'Orr, Mark J. L.'
+ name.to_s(@opt).should == 'Orr, Mark J. L.'
+ end
+
+ it "should remove the accents from accented characters" do
+ name = Name.new('Gearóidín', 'Uí Laighléis')
+ name.first(@opt).should == 'Gearoidin'
+ name.last(@opt).should == 'Ui Laighleis'
+ name.name(@opt).should == 'Gearoidin Ui Laighleis'
+ name.rname(@opt).should == 'Ui Laighleis, Gearoidin'
+ name.to_s(@opt).should == 'Ui Laighleis, Gearoidin'
+ name = Name.new('èric PRIÉ')
+ name.first(@opt).should == 'Eric'
+ name.last(@opt).should == 'Prie'
+ end
+
+ it "should work for the constructor as well as accessors" do
+ name = Name.new('Gearóidín', 'Uí Laighléis', @opt)
+ name.first.should == 'Gearoidin'
+ name.last.should == 'Ui Laighleis'
+ end
+ end
+
context "constuction corner cases" do
it "should be handled correctly" do
Name.new('Orr').name.should == 'Orr'
Name.new('Orr').rname.should == 'Orr'
+ Name.new('Uí Laighléis').rname.should == 'Laighléis, Uí'
+ Name.new('', 'Uí Laighléis', :ascii => true).last.should == 'Ui Laighleis'
Name.new('').name.should == ''
Name.new('').rname.should == ''
Name.new.name.should == ''
Name.new.rname.should == ''
end
@@ -162,18 +286,21 @@
end
it "should be flexible with regards to hyphens in double barrelled names" do
Name.new('J.-K.', 'Rowling').match('J. K.', 'Rowling').should be_true
Name.new('Joanne-K.', 'Rowling').match('Joanne K.', 'Rowling').should be_true
+ Name.new('Èric-K.', 'Cantona').match('Èric K.', 'Cantona').should be_true
end
it "should match initials" do
Name.new('M. J. L.', 'Orr').match('Mark John Legard', 'Orr').should be_true
Name.new('M.', 'Orr').match('Mark', 'Orr').should be_true
Name.new('M. J. L.', 'Orr').match('Mark', 'Orr').should be_true
Name.new('M.', 'Orr').match('M. J.', 'Orr').should be_true
Name.new('M. J. L.', 'Orr').match('M. G.', 'Orr').should be_false
+ Name.new('È', 'Cantona').match('Èric K.', 'Cantona').should be_true
+ Name.new('E. K.', 'Cantona').match('Èric K.', 'Cantona').should be_false
end
it "should not match on full names not in first position or without an exact match" do
Name.new('J. M.', 'Orr').match('John', 'Orr').should be_true
Name.new('M. J.', 'Orr').match('John', 'Orr').should be_false
@@ -204,18 +331,23 @@
Name.new('Alan', 'McDonagh').match('Alan', 'MacDonagh').should be_true
Name.new('Darko', 'Polimac').match('Darko', 'Polimc').should be_false
end
end
- context "accented characters" do
- before(:each) do
- @first = 'Gearóidín'
- @last = 'Uí Laighléis'
+ context "matches involving accented characters" do
+ it "should work for identical names" do
+ Name.new('Gearóidín', 'Uí Laighléis').match('Gearóidín', 'Uí Laighléis').should be_true
+ Name.new('Gearóidín', 'Uí Laighléis').match('Gearoidin', 'Ui Laighleis').should be_false
end
- it "should not yet deal with UTF-8" do
- name = Name.new(@first, @last)
- name.first.should_not == @first
+ it "should work for first name initials" do
+ Name.new('Èric-K.', 'Cantona').match('È. K.', 'Cantona').should be_true
+ Name.new('Èric-K.', 'Cantona').match('E. K.', 'Cantona').should be_false
+ end
+
+ it "the matching of accented characters can be relaxed" do
+ Name.new('Gearóidín', 'Uí Laighléis').match('Gearoidin', 'Ui Laíghleis', :ascii => true).should be_true
+ Name.new('Èric-K.', 'Cantona').match('E. K.', 'Cantona', :ascii => true).should be_true
end
end
end
end