spec/lib/namor_spec.rb in namor-0.0.1 vs spec/lib/namor_spec.rb in namor-0.0.2
- old
+ new
@@ -1,63 +1,94 @@
# spec for name component extraction
require "spec_helper"
describe "name extract" do
+ before(:all) do
+ @namor = Namor::Namor.new
+ @namor.config(:suppress => ['MD', 'DDS'])
+ end
+
it "should handle 2-part names without commas" do
- Namor::extract("john smith").should == ['JOHN', nil, 'SMITH', 'SMITH,JOHN']
+ @namor.extract("john smith").should == ['JOHN', nil, 'SMITH', 'SMITH,JOHN']
end
it "should handle 2-part names with commas" do
- Namor::extract("SMITH, JOHN").should == ['JOHN', nil, 'SMITH', 'SMITH,JOHN']
+ @namor.extract("SMITH, JOHN").should == ['JOHN', nil, 'SMITH', 'SMITH,JOHN']
end
it "should handle 2-part names with commas and middle initials" do
- Namor::extract("SMITH, JOHN R").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R']
+ @namor.extract("SMITH, JOHN R").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R']
end
it "should handle 2-part names with commas and middle initials" do
- Namor::extract("SMITH, JOHN R").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R']
+ @namor.extract("SMITH, JOHN R").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R']
end
it "should strip elements within parentheses" do
- Namor::extract("SMITH, JOHN (Jacko) R").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R']
+ @namor.extract("SMITH, JOHN (Jacko) R").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R']
end
it "should drop periods" do
- Namor::extract("John R. Smith").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R']
+ @namor.extract("John R. Smith").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R']
end
it "should drop spaces in last name (only when input has a comma)" do
- Namor::extract("Smith Jones, Mary").should == ['MARY', nil, 'SMITHJONES', 'SMITHJONES,MARY']
+ @namor.extract("Smith Jones, Mary").should == ['MARY', nil, 'SMITHJONES', 'SMITHJONES,MARY']
end
it "should drop dashes & apostrophes" do
- Namor::extract("Mary Smith-Jones").should == ['MARY', nil, 'SMITHJONES', 'SMITHJONES,MARY']
- Namor::extract("Mary S. O'Keefe").should == ['MARY', 'S', 'OKEEFE', 'OKEEFE,MARY S']
- Namor::extract("Jean-Michel Claude").should == ['JEANMICHEL', nil, 'CLAUDE', 'CLAUDE,JEANMICHEL']
+ @namor.extract("Mary Smith-Jones").should == ['MARY', nil, 'SMITHJONES', 'SMITHJONES,MARY']
+ @namor.extract("Mary S. O'Keefe").should == ['MARY', 'S', 'OKEEFE', 'OKEEFE,MARY S']
+ @namor.extract("Jean-Michel Claude").should == ['JEANMICHEL', nil, 'CLAUDE', 'CLAUDE,JEANMICHEL']
end
it "should concatenate extract name pieces" do
- Namor::extract("rajesh kumar vishnu garuda").should == ['RAJESH', nil, 'KUMARVISHNUGARUDA', 'KUMARVISHNUGARUDA,RAJESH']
- Namor::extract("Kumar, Rajesh Vishnu Garuda").should == ['RAJESH', 'VISHNUGARUDA', 'KUMAR', 'KUMAR,RAJESH VISHNUGARUDA']
+ @namor.extract("rajesh kumar vishnu garuda").should == ['RAJESH', nil, 'KUMARVISHNUGARUDA', 'KUMARVISHNUGARUDA,RAJESH']
+ @namor.extract("Kumar, Rajesh Vishnu Garuda").should == ['RAJESH', 'VISHNUGARUDA', 'KUMAR', 'KUMAR,RAJESH VISHNUGARUDA']
end
it "should excise suffixes like 'Jr.' from lastnames" do
- Namor::extract("Smith Jr, Edward M").should == ['EDWARD', 'M', 'SMITH', 'SMITH,EDWARD M']
+ @namor.extract("Smith Jr, Edward M").should == ['EDWARD', 'M', 'SMITH', 'SMITH,EDWARD M']
end
it "should excise terms from optional suppression list" do
- Namor::extract("Smith Jr, Edward M MD DDS", :suppress => ['MD', 'DDS']).should == ['EDWARD', 'M', 'SMITH', 'SMITH,EDWARD M']
- Namor::extract("Smith Jr, Edward III MD PHD", :suppress => ['MD', 'DDS']).should == ['EDWARD', 'PHD', 'SMITH', 'SMITH,EDWARD PHD']
+ @namor.extract("Smith Jr, Edward M MD DDS").should == ['EDWARD', 'M', 'SMITH', 'SMITH,EDWARD M']
+ @namor.extract("Smith Jr, Edward III MD PHD").should == ['EDWARD', 'PHD', 'SMITH', 'SMITH,EDWARD PHD']
end
it "should handle pathological cases" do
- Namor::extract(", Mary Smith").should == ['MARY', 'SMITH', nil, 'MARY SMITH']
+ @namor.extract(", Mary Smith").should == ['MARY', 'SMITH', nil, 'MARY SMITH']
end
end
describe "with cluster coding" do
+ before(:all) do
+ @namor = Namor::Namor.new
+ @namor.config(:suppress => ['MD', 'DDS'])
+ end
+
it "should generate cluster labels" do
- Namor::extract_with_cluster("Smith Jr, Edward III MD PHD", :suppress => ['MD', 'DDS']).last.should == 'SMITH_EDWARD_PHD'
+ @namor.extract_with_cluster("Smith Jr, Edward III MD PHD").last.should == 'SMITH_EDWARD_PHD'
+ end
+end
+
+describe "name componentization" do
+ before(:all) do
+ @namor = Namor::Namor.new
+ @namor.config(:suppress => ['esq'])
+ end
+
+ it "should include initials" do
+ @namor.components("john q. smith").should == ['JOHN', 'Q', 'SMITH']
+ end
+
+ it "should excise common suffixes" do
+ @namor.components("john smith III").should == ['JOHN', 'SMITH']
+ @namor.components("john smith jr").should == ['JOHN', 'SMITH']
+ end
+
+ it "should excise from suppression list" do
+ @namor.components("john smith esk.").should == ['ESK', 'JOHN', 'SMITH']
+ @namor.components("john smith esq.").should == ['JOHN', 'SMITH']
end
end