# -*- encoding : utf-8 -*-
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
describe String do
describe "word_count" do
it "should return 2" do
"two words".word_count.should == 2
end
it "should handle hypenated words" do
"one-way street".word_count.should == 2
end
it "should handle possessive apostrophe" do
"this is max's ball".word_count.should == 4
end
context "when string is a different encoding" do
it "should not blow up" do
str = "two ¢words".force_encoding("ISO-8859-1")
str.valid_encoding?.should == true
str.word_count.should == 2
end
end
# these examples generated with http://generator.lorem-ipsum.info/
context "when string is in Arabic" do
before{ @string =<<-END
من وقد جحافل ليرتفع بولندا, تعد أم العالم العمليات اقتصادية. مما ثم غضون عشوائية الدنمارك, كل يبق وبعد التاريخ،, تم الفاشي والألمانية، للإمبراطورية أضف. شبح واعتلاء بالهجوم و. الإطلاق الربيع، والإتحاد عن على, تسبب وبداية بتخصيص وضم من. تم بها وبدأت واستمر الإنجليز،, كنقطة انتباه لان تم.
قد فقد دخول."
END
}
it "should count the right number of words" do
@string.word_count.should == 50
end
end
context "when string is in Cyrillic" do
before{ @string = "Йн пэр аффэрт ыпикурэи интылльэгэбат! Эи ючю витаэ экшырки адвыржаряюм. Эож тебиквюэ патриоквюы ан, вим дёко омниюм конкльюдатюрквюэ но! Ыт ыюм нонумй альяквюам, ыам ед эррэм фюйзчыт, вяш ку эквюедым." }
it "should count the right number of words" do
@string.word_count.should == 30
end
end
context "when string is in Hebrew" do
before{ @string = "אל סדר וקשקש תקשורת. רבה לעתים מיותר ומהימנה את, הרוח חשמל כלשהו ויש אל. לעריכת האטמוספירה שער על. הטבע אנגלית שיתופית בקר גם, צעד או מושגי אודות. כתב אם שונה." }
it "should count the right number of words" do
@string.word_count.should == 30
end
end
context "when string contains Chinese" do
# see http://stackoverflow.com/questions/12488565/how-to-count-words-in-a-multi-language-text-using-ruby-javascript
before{ @string = 'The last Olympics was held in 北京' }
it "should count the english words as words, plus one word for each Chinese character" do
@string.word_count.should == 8
end
end
end
describe "contains_cjk?" do
context "when the string contains Chinese characters" do
it "should be true" do
'The last Olympics was held in 北京'.contains_cjk?.should == true
end
end
context "when the string does not contain Chinese characters" do
it "should be false" do
'The last Olympics was held in London'.contains_cjk?.should == false
end
end
end
describe "to_plain_text_preserving_links" do
context "when it contains a tags" do
let(:content){ 'here is foo' }
it "preserves the a tags" do
content.to_plain_text_preserving_links.should == 'here is foo'
end
context "and other tags" do
let(:content){ 'here is foo' }
it "removes any non-a tags" do
content.to_plain_text_preserving_links.should == 'here is foo'
end
end
context "and the a tags contain attributes other than href" do
let(:content){ 'here is foo' }
it "removes any non-href attributes but leaves the href attributes unaffected" do
content.to_plain_text_preserving_links.should == 'here is foo'
end
end
end
end
end