# -*- encoding : utf-8 -*- require File.expand_path(File.dirname(__FILE__) + '/spec_helper') describe String do describe "word_count" do it "should return 2" do "two words".word_count.should == 2 end it "should handle hypenated words" do "one-way street".word_count.should == 2 end it "should handle possessive apostrophe" do "this is max's ball".word_count.should == 4 end context "when string is a different encoding" do it "should not blow up" do str = "two ¢words".force_encoding("ISO-8859-1") str.valid_encoding?.should == true str.word_count.should == 2 end end # these examples generated with http://generator.lorem-ipsum.info/ context "when string is in Arabic" do before{ @string =<<-END من وقد جحافل ليرتفع بولندا, تعد أم العالم العمليات اقتصادية. مما ثم غضون عشوائية الدنمارك, كل يبق وبعد التاريخ،, تم الفاشي والألمانية، للإمبراطورية أضف. شبح واعتلاء بالهجوم و. الإطلاق الربيع، والإتحاد عن على, تسبب وبداية بتخصيص وضم من. تم بها وبدأت واستمر الإنجليز،, كنقطة انتباه لان تم. قد فقد دخول." END } it "should count the right number of words" do @string.word_count.should == 50 end end context "when string is in Cyrillic" do before{ @string = "Йн пэр аффэрт ыпикурэи интылльэгэбат! Эи ючю витаэ экшырки адвыржаряюм. Эож тебиквюэ патриоквюы ан, вим дёко омниюм конкльюдатюрквюэ но! Ыт ыюм нонумй альяквюам, ыам ед эррэм фюйзчыт, вяш ку эквюедым." } it "should count the right number of words" do @string.word_count.should == 30 end end context "when string is in Hebrew" do before{ @string = "אל סדר וקשקש תקשורת. רבה לעתים מיותר ומהימנה את, הרוח חשמל כלשהו ויש אל. לעריכת האטמוספירה שער על. הטבע אנגלית שיתופית בקר גם, צעד או מושגי אודות. כתב אם שונה." } it "should count the right number of words" do @string.word_count.should == 30 end end context "when string contains Chinese" do # see http://stackoverflow.com/questions/12488565/how-to-count-words-in-a-multi-language-text-using-ruby-javascript before{ @string = 'The last Olympics was held in 北京' } it "should count the english words as words, plus one word for each Chinese character" do @string.word_count.should == 8 end end end describe "contains_cjk?" do context "when the string contains Chinese characters" do it "should be true" do 'The last Olympics was held in 北京'.contains_cjk?.should == true end end context "when the string does not contain Chinese characters" do it "should be false" do 'The last Olympics was held in London'.contains_cjk?.should == false end end end describe "to_plain_text_preserving_links" do context "when it contains a tags" do let(:content){ 'here is foo' } it "preserves the a tags" do content.to_plain_text_preserving_links.should == 'here is foo' end context "and other tags" do let(:content){ 'here is foo' } it "removes any non-a tags" do content.to_plain_text_preserving_links.should == 'here is foo' end end context "and the a tags contain attributes other than href" do let(:content){ 'here is foo' } it "removes any non-href attributes but leaves the href attributes unaffected" do content.to_plain_text_preserving_links.should == 'here is foo' end end end end end