require 'spec_helper' RSpec.describe PragmaticSegmenter::Languages::English, "(en)" do context "Golden Rules" do it "Simple period to end sentence #001" do ps = PragmaticSegmenter::Segmenter.new(text: "Hello World. My name is Jonas.", language: "en") expect(ps.segment).to eq(["Hello World.", "My name is Jonas."]) end it "Question mark to end sentence #002" do ps = PragmaticSegmenter::Segmenter.new(text: "What is your name? My name is Jonas.", language: "en") expect(ps.segment).to eq(["What is your name?", "My name is Jonas."]) end it "Exclamation point to end sentence #003" do ps = PragmaticSegmenter::Segmenter.new(text: "There it is! I found it.", language: "en") expect(ps.segment).to eq(["There it is!", "I found it."]) end it "One letter upper case abbreviations #004" do ps = PragmaticSegmenter::Segmenter.new(text: "My name is Jonas E. Smith.", language: "en") expect(ps.segment).to eq(["My name is Jonas E. Smith."]) end it "One letter lower case abbreviations #005" do ps = PragmaticSegmenter::Segmenter.new(text: "Please turn to p. 55.", language: "en") expect(ps.segment).to eq(["Please turn to p. 55."]) end it "Two letter lower case abbreviations in the middle of a sentence #006" do ps = PragmaticSegmenter::Segmenter.new(text: "Were Jane and co. at the party?", language: "en") expect(ps.segment).to eq(["Were Jane and co. at the party?"]) end it "Two letter upper case abbreviations in the middle of a sentence #007" do ps = PragmaticSegmenter::Segmenter.new(text: "They closed the deal with Pitt, Briggs & Co. at noon.", language: "en") expect(ps.segment).to eq(["They closed the deal with Pitt, Briggs & Co. at noon."]) end it "Two letter lower case abbreviations at the end of a sentence #008" do ps = PragmaticSegmenter::Segmenter.new(text: "Let's ask Jane and co. They should know.", language: "en") expect(ps.segment).to eq(["Let's ask Jane and co.", "They should know."]) end it "Two letter upper case abbreviations at the end of a sentence #009" do ps = PragmaticSegmenter::Segmenter.new(text: "They closed the deal with Pitt, Briggs & Co. It closed yesterday.", language: "en") expect(ps.segment).to eq(["They closed the deal with Pitt, Briggs & Co.", "It closed yesterday."]) end it "Two letter (prepositive) abbreviations #010" do ps = PragmaticSegmenter::Segmenter.new(text: "I can see Mt. Fuji from here.", language: "en") expect(ps.segment).to eq(["I can see Mt. Fuji from here."]) end it "Two letter (prepositive & postpositive) abbreviations #011" do ps = PragmaticSegmenter::Segmenter.new(text: "St. Michael's Church is on 5th st. near the light.", language: "en") expect(ps.segment).to eq(["St. Michael's Church is on 5th st. near the light."]) end it "Possesive two letter abbreviations #012" do ps = PragmaticSegmenter::Segmenter.new(text: "That is JFK Jr.'s book.", language: "en") expect(ps.segment).to eq(["That is JFK Jr.'s book."]) end it "Multi-period abbreviations in the middle of a sentence #013" do ps = PragmaticSegmenter::Segmenter.new(text: "I visited the U.S.A. last year.", language: "en") expect(ps.segment).to eq(["I visited the U.S.A. last year."]) end it "Multi-period abbreviations at the end of a sentence #014" do ps = PragmaticSegmenter::Segmenter.new(text: "I live in the E.U. How about you?", language: "en") expect(ps.segment).to eq(["I live in the E.U.", "How about you?"]) end it "U.S. as sentence boundary #015" do ps = PragmaticSegmenter::Segmenter.new(text: "I live in the U.S. How about you?", language: "en") expect(ps.segment).to eq(["I live in the U.S.", "How about you?"]) end it "U.S. as non sentence boundary with next word capitalized #016" do ps = PragmaticSegmenter::Segmenter.new(text: "I work for the U.S. Government in Virginia.", language: "en") expect(ps.segment).to eq(["I work for the U.S. Government in Virginia."]) end it "U.S. as non sentence boundary #017" do ps = PragmaticSegmenter::Segmenter.new(text: "I have lived in the U.S. for 20 years.", language: "en") expect(ps.segment).to eq(["I have lived in the U.S. for 20 years."]) end it "A.M. / P.M. as non sentence boundary and sentence boundary #018" do skip "NOT IMPLEMENTED" ps = PragmaticSegmenter::Segmenter.new(text: "At 5 a.m. Mr. Smith went to the bank. He left the bank at 6 P.M. Mr. Smith then went to the store.", language: "en") expect(ps.segment).to eq(["At 5 a.m. Mr. Smith went to the bank.", "He left the bank at 6 P.M.", "Mr. Smith then went to the store."]) end it "Number as non sentence boundary #019" do ps = PragmaticSegmenter::Segmenter.new(text: "She has $100.00 in her bag.", language: "en") expect(ps.segment).to eq(["She has $100.00 in her bag."]) end it "Number as sentence boundary #020" do ps = PragmaticSegmenter::Segmenter.new(text: "She has $100.00. It is in her bag.", language: "en") expect(ps.segment).to eq(["She has $100.00.", "It is in her bag."]) end it "Parenthetical inside sentence #021" do ps = PragmaticSegmenter::Segmenter.new(text: "He teaches science (He previously worked for 5 years as an engineer.) at the local University.", language: "en") expect(ps.segment).to eq(["He teaches science (He previously worked for 5 years as an engineer.) at the local University."]) end it "Email addresses #022" do ps = PragmaticSegmenter::Segmenter.new(text: "Her email is Jane.Doe@example.com. I sent her an email.", language: "en") expect(ps.segment).to eq(["Her email is Jane.Doe@example.com.", "I sent her an email."]) end it "Web addresses #023" do ps = PragmaticSegmenter::Segmenter.new(text: "The site is: https://www.example.50.com/new-site/awesome_content.html. Please check it out.", language: "en") expect(ps.segment).to eq(["The site is: https://www.example.50.com/new-site/awesome_content.html.", "Please check it out."]) end it "Single quotations inside sentence #024" do ps = PragmaticSegmenter::Segmenter.new(text: "She turned to him, 'This is great.' she said.", language: "en") expect(ps.segment).to eq(["She turned to him, 'This is great.' she said."]) end it "Double quotations inside sentence #025" do ps = PragmaticSegmenter::Segmenter.new(text: "She turned to him, \"This is great.\" she said.", language: "en") expect(ps.segment).to eq(["She turned to him, \"This is great.\" she said."]) end it "Double quotations at the end of a sentence #026" do ps = PragmaticSegmenter::Segmenter.new(text: "She turned to him, \"This is great.\" She held the book out to show him.", language: "en") expect(ps.segment).to eq(["She turned to him, \"This is great.\"", "She held the book out to show him."]) end it "Double punctuation (exclamation point) #027" do ps = PragmaticSegmenter::Segmenter.new(text: "Hello!! Long time no see.", language: "en") expect(ps.segment).to eq(["Hello!!", "Long time no see."]) end it "Double punctuation (question mark) #028" do ps = PragmaticSegmenter::Segmenter.new(text: "Hello?? Who is there?", language: "en") expect(ps.segment).to eq(["Hello??", "Who is there?"]) end it "Double punctuation (exclamation point / question mark) #029" do ps = PragmaticSegmenter::Segmenter.new(text: "Hello!? Is that you?", language: "en") expect(ps.segment).to eq(["Hello!?", "Is that you?"]) end it "Double punctuation (question mark / exclamation point) #030" do ps = PragmaticSegmenter::Segmenter.new(text: "Hello?! Is that you?", language: "en") expect(ps.segment).to eq(["Hello?!", "Is that you?"]) end it "List (period followed by parens and no period to end item) #031" do ps = PragmaticSegmenter::Segmenter.new(text: "1.) The first item 2.) The second item", language: "en") expect(ps.segment).to eq(["1.) The first item", "2.) The second item"]) end it "List (period followed by parens and period to end item) #032" do ps = PragmaticSegmenter::Segmenter.new(text: "1.) The first item. 2.) The second item.", language: "en") expect(ps.segment).to eq(["1.) The first item.", "2.) The second item."]) end it "List (parens and no period to end item) #033" do ps = PragmaticSegmenter::Segmenter.new(text: "1) The first item 2) The second item", language: "en") expect(ps.segment).to eq(["1) The first item", "2) The second item"]) end it "List (parens and period to end item) #034" do ps = PragmaticSegmenter::Segmenter.new(text: "1) The first item. 2) The second item.", language: "en") expect(ps.segment).to eq(["1) The first item.", "2) The second item."]) end it "List (period to mark list and no period to end item) #035" do ps = PragmaticSegmenter::Segmenter.new(text: "1. The first item 2. The second item", language: "en") expect(ps.segment).to eq(["1. The first item", "2. The second item"]) end it "List (period to mark list and period to end item) #036" do ps = PragmaticSegmenter::Segmenter.new(text: "1. The first item. 2. The second item.", language: "en") expect(ps.segment).to eq(["1. The first item.", "2. The second item."]) end it "List with bullet #037" do ps = PragmaticSegmenter::Segmenter.new(text: "• 9. The first item • 10. The second item", language: "en") expect(ps.segment).to eq(["• 9. The first item", "• 10. The second item"]) end it "List with hypthen #038" do ps = PragmaticSegmenter::Segmenter.new(text: "⁃9. The first item ⁃10. The second item", language: "en") expect(ps.segment).to eq(["⁃9. The first item", "⁃10. The second item"]) end it "Alphabetical list #039" do ps = PragmaticSegmenter::Segmenter.new(text: "a. The first item b. The second item c. The third list item", language: "en") expect(ps.segment).to eq(["a. The first item", "b. The second item", "c. The third list item"]) end it "Errant newlines in the middle of sentences (PDF) #040" do ps = PragmaticSegmenter::Segmenter.new(text: "This is a sentence\ncut off in the middle because pdf.", language: "en", doc_type: "pdf") expect(ps.segment).to eq(["This is a sentence cut off in the middle because pdf."]) end it "Errant newlines in the middle of sentences #041" do ps = PragmaticSegmenter::Segmenter.new(text: "It was a cold \nnight in the city.", language: "en") expect(ps.segment).to eq(["It was a cold night in the city."]) end it "Lower case list separated by newline #042" do ps = PragmaticSegmenter::Segmenter.new(text: "features\ncontact manager\nevents, activities\n", language: "en") expect(ps.segment).to eq(["features", "contact manager", "events, activities"]) end it "Geo Coordinates #043" do ps = PragmaticSegmenter::Segmenter.new(text: "You can find it at N°. 1026.253.553. That is where the treasure is.", language: "en") expect(ps.segment).to eq(["You can find it at N°. 1026.253.553.", "That is where the treasure is."]) end it "Named entities with an exclamation point #044" do ps = PragmaticSegmenter::Segmenter.new(text: "She works at Yahoo! in the accounting department.", language: "en") expect(ps.segment).to eq(["She works at Yahoo! in the accounting department."]) end it "I as a sentence boundary and I as an abbreviation #045" do ps = PragmaticSegmenter::Segmenter.new(text: "We make a good team, you and I. Did you see Albert I. Jones yesterday?", language: "en") expect(ps.segment).to eq(["We make a good team, you and I.", "Did you see Albert I. Jones yesterday?"]) end it "Ellipsis at end of quotation #046" do ps = PragmaticSegmenter::Segmenter.new(text: "Thoreau argues that by simplifying one’s life, “the laws of the universe will appear less complex. . . .”", language: "en") expect(ps.segment).to eq(["Thoreau argues that by simplifying one’s life, “the laws of the universe will appear less complex. . . .”"]) end it "Ellipsis with square brackets #047" do ps = PragmaticSegmenter::Segmenter.new(text: "\"Bohr [...] used the analogy of parallel stairways [...]\" (Smith 55).", language: "en") expect(ps.segment).to eq(["\"Bohr [...] used the analogy of parallel stairways [...]\" (Smith 55)."]) end it "Ellipsis as sentence boundary (standard ellipsis rules) #048" do ps = PragmaticSegmenter::Segmenter.new(text: "If words are left off at the end of a sentence, and that is all that is omitted, indicate the omission with ellipsis marks (preceded and followed by a space) and then indicate the end of the sentence with a period . . . . Next sentence.", language: "en") expect(ps.segment).to eq(["If words are left off at the end of a sentence, and that is all that is omitted, indicate the omission with ellipsis marks (preceded and followed by a space) and then indicate the end of the sentence with a period . . . .", "Next sentence."]) end it "Ellipsis as sentence boundary (non-standard ellipsis rules) #049" do ps = PragmaticSegmenter::Segmenter.new(text: "I never meant that.... She left the store.", language: "en") expect(ps.segment).to eq(["I never meant that....", "She left the store."]) end it "Ellipsis as non sentence boundary #050" do ps = PragmaticSegmenter::Segmenter.new(text: "I wasn’t really ... well, what I mean...see . . . what I'm saying, the thing is . . . I didn’t mean it.", language: "en") expect(ps.segment).to eq(["I wasn’t really ... well, what I mean...see . . . what I'm saying, the thing is . . . I didn’t mean it."]) end it "4-dot ellipsis #051" do ps = PragmaticSegmenter::Segmenter.new(text: "One further habit which was somewhat weakened . . . was that of combining words into self-interpreting compounds. . . . The practice was not abandoned. . . .", language: "en") expect(ps.segment).to eq(["One further habit which was somewhat weakened . . . was that of combining words into self-interpreting compounds.", ". . . The practice was not abandoned. . . ."]) end it "No whitespace in between sentences #052" do ps = PragmaticSegmenter::Segmenter.new(text: "Hello world.Today is Tuesday.Mr. Smith went to the store and bought 1,000.That is a lot.", language: "en") expect(ps.segment).to eq(["Hello world.", "Today is Tuesday.", "Mr. Smith went to the store and bought 1,000.", "That is a lot."]) end end describe '#segment' do it 'correctly segments text #001' do ps = PragmaticSegmenter::Segmenter.new(text: "Alice was beginning to get very tired of sitting by her sister on the bank, and of having nothing to do: once or twice she had peeped into the book her sister was reading, but it had no pictures or conversations in it, 'and what is the use of a book,' thought Alice 'without pictures or conversations?'\nSo she was considering in her own mind (as well as she could, for the hot day made her feel very sleepy and stupid), whether the pleasure of making a daisy-chain would be worth the trouble of getting up and picking the daisies, when suddenly a White Rabbit with pink eyes ran close by her.", language: 'en') expect(ps.segment).to eq(["Alice was beginning to get very tired of sitting by her sister on the bank, and of having nothing to do: once or twice she had peeped into the book her sister was reading, but it had no pictures or conversations in it, 'and what is the use of a book,' thought Alice 'without pictures or conversations?'", "So she was considering in her own mind (as well as she could, for the hot day made her feel very sleepy and stupid), whether the pleasure of making a daisy-chain would be worth the trouble of getting up and picking the daisies, when suddenly a White Rabbit with pink eyes ran close by her."]) end it 'correctly segments text #002' do ps = PragmaticSegmenter::Segmenter.new(text: "Either the well was very deep, or she fell very slowly, for she had plenty of time as she went down to look about her and to wonder what was going to happen next. First, she tried to look down and make out what she was coming to, but it was too dark to see anything; then she looked at the sides of the well, and noticed that they were filled with cupboards and book-shelves; here and there she saw maps and pictures hung upon pegs. She took down a jar from one of the shelves as she passed; it was labelled 'ORANGE MARMALADE', but to her great disappointment it was empty: she did not like to drop the jar for fear of killing somebody, so managed to put it into one of the cupboards as she fell past it.\n'Well!' thought Alice to herself, 'after such a fall as this, I shall think nothing of tumbling down stairs! How brave they'll all think me at home! Why, I wouldn't say anything about it, even if I fell off the top of the house!' (Which was very likely true.)", language: 'en', doc_type: 'pdf') expect(ps.segment).to eq(["Either the well was very deep, or she fell very slowly, for she had plenty of time as she went down to look about her and to wonder what was going to happen next.", "First, she tried to look down and make out what she was coming to, but it was too dark to see anything; then she looked at the sides of the well, and noticed that they were filled with cupboards and book-shelves; here and there she saw maps and pictures hung upon pegs.", "She took down a jar from one of the shelves as she passed; it was labelled 'ORANGE MARMALADE', but to her great disappointment it was empty: she did not like to drop the jar for fear of killing somebody, so managed to put it into one of the cupboards as she fell past it.", "'Well!' thought Alice to herself, 'after such a fall as this, I shall think nothing of tumbling down stairs! How brave they'll all think me at home! Why, I wouldn't say anything about it, even if I fell off the top of the house!' (Which was very likely true.)"]) end it 'correctly segments text #003' do ps = PragmaticSegmenter::Segmenter.new(text: "Either the well was very deep, or she fell very slowly, for she had plenty of time as she went down to look about her and to wonder what was going to happen next. First, she tried to look down and make out what she was coming to, but it was too dark to see anything; then she looked at the sides of the well, and noticed that they were filled with cupboards and book-shelves; here and there she saw maps and pictures hung upon pegs. She took down a jar from one of the shelves as she passed; it was labelled 'ORANGE MARMALADE', but to her great disappointment it was empty: she did not like to drop the jar for fear of killing somebody, so managed to put it into one of the cupboards as she fell past it.\r'Well!' thought Alice to herself, 'after such a fall as this, I shall think nothing of tumbling down stairs! How brave they'll all think me at home! Why, I wouldn't say anything about it, even if I fell off the top of the house!' (Which was very likely true.)", language: 'en', doc_type: 'pdf') expect(ps.segment).to eq(["Either the well was very deep, or she fell very slowly, for she had plenty of time as she went down to look about her and to wonder what was going to happen next.", "First, she tried to look down and make out what she was coming to, but it was too dark to see anything; then she looked at the sides of the well, and noticed that they were filled with cupboards and book-shelves; here and there she saw maps and pictures hung upon pegs.", "She took down a jar from one of the shelves as she passed; it was labelled 'ORANGE MARMALADE', but to her great disappointment it was empty: she did not like to drop the jar for fear of killing somebody, so managed to put it into one of the cupboards as she fell past it.", "'Well!' thought Alice to herself, 'after such a fall as this, I shall think nothing of tumbling down stairs! How brave they'll all think me at home! Why, I wouldn't say anything about it, even if I fell off the top of the house!' (Which was very likely true.)"]) end it 'correctly segments text #004' do ps = PragmaticSegmenter::Segmenter.new(text: "'Well!' thought Alice to herself, 'after such a fall as this, I shall think nothing of tumbling down stairs! How brave they'll all think me at home! Why, I wouldn't say anything about it, even if I fell off the top of the house!' (Which was very likely true.)", language: 'en') expect(ps.segment).to eq(["'Well!' thought Alice to herself, 'after such a fall as this, I shall think nothing of tumbling down stairs! How brave they'll all think me at home! Why, I wouldn't say anything about it, even if I fell off the top of the house!' (Which was very likely true.)"]) end it 'correctly segments text #005' do ps = PragmaticSegmenter::Segmenter.new(text: "Down, down, down. Would the fall NEVER come to an end! 'I wonder how many miles I've fallen by this time?' she said aloud.", language: 'en') expect(ps.segment).to eq(["Down, down, down.", "Would the fall NEVER come to an end!", "'I wonder how many miles I've fallen by this time?' she said aloud."]) end it 'correctly segments text #006' do ps = PragmaticSegmenter::Segmenter.new(text: "Either the well was very deep, or she fell very slowly, for she had plenty of time as she went down to look about her and to wonder what was going to happen next. First, she tried to look down and make out what she was coming to, but it was too dark to see anything; then she looked at the sides of the well, and noticed that they were filled with cupboards and book-shelves; here and there she saw maps and pictures hung upon pegs. She took down a jar from one of the shelves as she passed; it was labelled 'ORANGE MARMALADE', but to her great disappointment it was empty: she did not like to drop the jar for fear of killing somebody, so managed to put it into one of the cupboards as she fell past it. 'Well!' thought Alice to herself, 'after such a fall as this, I shall think nothing of tumbling down stairs! How brave they'll all think me at home! Why, I wouldn't say anything about it, even if I fell off the top of the house!' (Which was very likely true.)", language: 'en') expect(ps.segment).to eq(["Either the well was very deep, or she fell very slowly, for she had plenty of time as she went down to look about her and to wonder what was going to happen next.", "First, she tried to look down and make out what she was coming to, but it was too dark to see anything; then she looked at the sides of the well, and noticed that they were filled with cupboards and book-shelves; here and there she saw maps and pictures hung upon pegs.", "She took down a jar from one of the shelves as she passed; it was labelled 'ORANGE MARMALADE', but to her great disappointment it was empty: she did not like to drop the jar for fear of killing somebody, so managed to put it into one of the cupboards as she fell past it.", "'Well!' thought Alice to herself, 'after such a fall as this, I shall think nothing of tumbling down stairs! How brave they'll all think me at home! Why, I wouldn't say anything about it, even if I fell off the top of the house!' (Which was very likely true.)"]) end it 'correctly segments text #007' do ps = PragmaticSegmenter::Segmenter.new(text: 'A minute is a unit of measurement of time or of angle. The minute is a unit of time equal to 1/60th of an hour or 60 seconds by 1. In the UTC time scale, a minute occasionally has 59 or 61 seconds; see leap second. The minute is not an SI unit; however, it is accepted for use with SI units. The symbol for minute or minutes is min. The fact that an hour contains 60 minutes is probably due to influences from the Babylonians, who used a base-60 or sexagesimal counting system. Colloquially, a min. may also refer to an indefinite amount of time substantially longer than the standardized length.', language: 'en') expect(ps.segment).to eq(["A minute is a unit of measurement of time or of angle.", "The minute is a unit of time equal to 1/60th of an hour or 60 seconds by 1.", "In the UTC time scale, a minute occasionally has 59 or 61 seconds; see leap second.", "The minute is not an SI unit; however, it is accepted for use with SI units.", "The symbol for minute or minutes is min.", "The fact that an hour contains 60 minutes is probably due to influences from the Babylonians, who used a base-60 or sexagesimal counting system.", "Colloquially, a min. may also refer to an indefinite amount of time substantially longer than the standardized length."]) end it 'correctly segments text #008' do text = <<-EOF About Me...............................................................................................5 Chapter 2 ...................................................................... 6 Three Weeks Later............................................................................ 7 Better Eating........................................................................................ 8 What's the Score?.............................................................. 9 How To Calculate the Score................... 16-17 EOF ps = PragmaticSegmenter::Segmenter.new(text: text, language: 'en') expect(ps.segment).to eq(["About Me", "Chapter 2", "Three Weeks Later", "Better Eating", "What's the Score?", "How To Calculate the Score"]) end it 'correctly segments text #009' do ps = PragmaticSegmenter::Segmenter.new(text: 'I think Jun. is a great month, said Mr. Suzuki.', language: 'en') expect(ps.segment).to eq(["I think Jun. is a great month, said Mr. Suzuki."]) end it 'correctly segments text #010' do ps = PragmaticSegmenter::Segmenter.new(text: 'Jun. is a great month, said Mr. Suzuki.', language: 'en') expect(ps.segment).to eq(["Jun. is a great month, said Mr. Suzuki."]) end it 'correctly segments text #011' do ps = PragmaticSegmenter::Segmenter.new(text: "I have 1.000.00. Yay $.50 and .50! That's 600.", language: 'en') expect(ps.segment).to eq(["I have 1.000.00.", "Yay $.50 and .50!", "That's 600."]) end it 'correctly segments text #012' do ps = PragmaticSegmenter::Segmenter.new(text: '1.) This is a list item with a parens.', language: 'en') expect(ps.segment).to eq(["1.) This is a list item with a parens."]) end it 'correctly segments text #013' do ps = PragmaticSegmenter::Segmenter.new(text: '1. This is a list item.', language: 'en') expect(ps.segment).to eq(['1. This is a list item.']) end it 'correctly segments text #014' do ps = PragmaticSegmenter::Segmenter.new(text: 'I live in the U.S.A. I went to J.C. Penney.', language: 'en') expect(ps.segment).to eq(["I live in the U.S.A.", "I went to J.C. Penney."]) end it 'correctly segments text #015' do ps = PragmaticSegmenter::Segmenter.new(text: 'His name is Alfred E. Sloan.', language: 'en') expect(ps.segment).to eq(['His name is Alfred E. Sloan.']) end it 'correctly segments text #016' do ps = PragmaticSegmenter::Segmenter.new(text: 'Q. What is his name? A. His name is Alfred E. Sloan.', language: 'en') expect(ps.segment).to eq(['Q. What is his name?', 'A. His name is Alfred E. Sloan.']) end it 'correctly segments text #017' do ps = PragmaticSegmenter::Segmenter.new(text: 'Today is 11.18.2014.', language: 'en') expect(ps.segment).to eq(['Today is 11.18.2014.']) end it 'correctly segments text #018' do ps = PragmaticSegmenter::Segmenter.new(text: 'I need you to find 3 items, e.g. a hat, a coat, and a bag.', language: 'en') expect(ps.segment).to eq(['I need you to find 3 items, e.g. a hat, a coat, and a bag.']) end it 'correctly segments text #019' do ps = PragmaticSegmenter::Segmenter.new(text: "The game is the Giants vs. the Tigers at 10 p.m. I'm going are you?", language: 'en') expect(ps.segment).to eq(["The game is the Giants vs. the Tigers at 10 p.m.", "I'm going are you?"]) end it 'correctly segments text #020' do ps = PragmaticSegmenter::Segmenter.new(text: 'He is no. 5, the shortstop.', language: 'en') expect(ps.segment).to eq(['He is no. 5, the shortstop.']) end it 'correctly segments text #021' do ps = PragmaticSegmenter::Segmenter.new(text: "Remove long strings of dots........please.", language: 'en') expect(ps.segment).to eq(["Remove long strings of dots please."]) end it 'correctly segments text #022' do ps = PragmaticSegmenter::Segmenter.new(text: "See our additional services section or contact us for pricing\n.\n\n\nPricing Additionl Info\n", language: 'en') expect(ps.segment).to eq(["See our additional services section or contact us for pricing.", "Pricing Additionl Info"]) end it 'correctly segments text #023' do ps = PragmaticSegmenter::Segmenter.new(text: "As payment for 1. above, pay us a commission fee of 0 yen and for 2. above, no fee will be paid.", language: 'en') expect(ps.segment).to eq(["As payment for 1. above, pay us a commission fee of 0 yen and for 2. above, no fee will be paid."]) end it 'correctly segments text #024' do ps = PragmaticSegmenter::Segmenter.new(text: "features\ncontact manager\nevents, activities\n", language: 'en') expect(ps.segment).to eq(['features', 'contact manager', 'events, activities']) end it 'correctly segments text #025' do ps = PragmaticSegmenter::Segmenter.new(text: "Git rid of unnecessary white space.", language: 'en') expect(ps.segment).to eq(["Git rid of unnecessary white space."]) end it 'correctly segments text #026' do ps = PragmaticSegmenter::Segmenter.new(text: "See our additional services section or contact us for pricing\n. Pricing Additionl Info", language: 'en') expect(ps.segment).to eq(["See our additional services section or contact us for pricing.", "Pricing Additionl Info"]) end it 'correctly segments text #027' do ps = PragmaticSegmenter::Segmenter.new(text: "Organising your care early \nmeans you'll have months to build a good relationship with your midwife or doctor, ready for \nthe birth.", language: 'en', doc_type: 'pdf') expect(ps.segment).to eq(["Organising your care early means you'll have months to build a good relationship with your midwife or doctor, ready for the birth."]) end it 'correctly segments text #028' do ps = PragmaticSegmenter::Segmenter.new(text: "10. Get some rest \n \nYou have the best chance of having a problem-free pregnancy and a healthy baby if you follow \na few simple guidelines:", language: 'en', doc_type: 'pdf') expect(ps.segment).to eq(["10. Get some rest", "You have the best chance of having a problem-free pregnancy and a healthy baby if you follow a few simple guidelines:"]) end it 'correctly segments text #029' do ps = PragmaticSegmenter::Segmenter.new(text: "• 9. Stop smoking \n• 10. Get some rest \n \nYou have the best chance of having a problem-free pregnancy and a healthy baby if you follow \na few simple guidelines: \n\n1. Organise your pregnancy care early", language: 'en', doc_type: 'pdf') expect(ps.segment).to eq(["• 9. Stop smoking", "• 10. Get some rest", "You have the best chance of having a problem-free pregnancy and a healthy baby if you follow a few simple guidelines:", "1. Organise your pregnancy care early"]) end it 'correctly segments text #030' do ps = PragmaticSegmenter::Segmenter.new(text: "I have 600. How many do you have?", language: 'en') expect(ps.segment).to eq(["I have 600.", "How many do you have?"]) end it 'correctly segments text #031' do ps = PragmaticSegmenter::Segmenter.new(text: "\n3\n\nIntroduction\n\n", language: 'en') expect(ps.segment).to eq(["Introduction"]) end it 'correctly segments text #032' do ps = PragmaticSegmenter::Segmenter.new(text: "\nW\nA\nRN\nI\nNG\n", language: 'en') expect(ps.segment).to eq(["WARNING"]) end it 'correctly segments text #033' do ps = PragmaticSegmenter::Segmenter.new(text: "\n\n\nW\nA\nRN\nI\nNG\n \n/\n \nA\nV\nE\nR\nT\nI\nS\nE\nM\nE\nNT\n", language: 'en') expect(ps.segment).to eq(["WARNING", "AVERTISEMENT"]) end it 'correctly segments text #034' do ps = PragmaticSegmenter::Segmenter.new(text: '"Help yourself, sweetie," shouted Candy and gave her the cookie.', language: 'en') expect(ps.segment).to eq(["\"Help yourself, sweetie,\" shouted Candy and gave her the cookie."]) end it 'correctly segments text #035' do ps = PragmaticSegmenter::Segmenter.new(text: "Until its release, a generic mechanism was known, where the sear keeps the hammer in back position, and when one pulls the trigger, the sear slips out of hammer’s notches, the hammer falls initiating \na shot.", language: 'en') expect(ps.segment).to eq(["Until its release, a generic mechanism was known, where the sear keeps the hammer in back position, and when one pulls the trigger, the sear slips out of hammer’s notches, the hammer falls initiating a shot."]) end it 'correctly segments text #036' do ps = PragmaticSegmenter::Segmenter.new(text: "This is a test. Until its release, a generic mechanism was known, where the sear keeps the hammer in back position, and when one pulls the trigger, the sear slips out of hammer’s notches, the hammer falls initiating \na shot.", language: 'en') expect(ps.segment).to eq(["This is a test.", "Until its release, a generic mechanism was known, where the sear keeps the hammer in back position, and when one pulls the trigger, the sear slips out of hammer’s notches, the hammer falls initiating a shot."]) end it 'correctly segments text #037' do ps = PragmaticSegmenter::Segmenter.new(text: "This was because it was an offensive weapon, designed to fight at a distance up to 400 yd \n( 365.8 m ).", language: 'en') expect(ps.segment).to eq(["This was because it was an offensive weapon, designed to fight at a distance up to 400 yd ( 365.8 m )."]) end it 'correctly segments text #038' do ps = PragmaticSegmenter::Segmenter.new(text: "“Are demonstrations are evidence of the public anger and frustration at opaque environmental management and decision-making?” Others yet say: \"Should we be scared about these 'protests'?\"", language: 'en') expect(ps.segment).to eq(["“Are demonstrations are evidence of the public anger and frustration at opaque environmental management and decision-making?”", "Others yet say: \"Should we be scared about these 'protests'?\""]) end it 'correctly segments text #039' do ps = PragmaticSegmenter::Segmenter.new(text: "www.testurl.Awesome.com", language: 'en') expect(ps.segment).to eq(["www.testurl.Awesome.com"]) end it 'correctly segments text #040' do ps = PragmaticSegmenter::Segmenter.new(text: "http://testurl.Awesome.com", language: 'en') expect(ps.segment).to eq(["http://testurl.Awesome.com"]) end it 'correctly segments text #041' do ps = PragmaticSegmenter::Segmenter.new(text: "St. Michael's Church in is a church.", language: 'en') expect(ps.segment).to eq(["St. Michael's Church in is a church."]) end it 'correctly segments text #042' do ps = PragmaticSegmenter::Segmenter.new(text: "JFK Jr.'s book is on sale.", language: 'en') expect(ps.segment).to eq(["JFK Jr.'s book is on sale."]) end it 'correctly segments text #043' do ps = PragmaticSegmenter::Segmenter.new(text: "This is e.g. Mr. Smith, who talks slowly... And this is another sentence.", language: 'en') expect(ps.segment).to eq(["This is e.g. Mr. Smith, who talks slowly...", "And this is another sentence."]) end it 'correctly segments text #044' do ps = PragmaticSegmenter::Segmenter.new(text: "Leave me alone!, he yelled. I am in the U.S. Army. Charles (Ind.) said he.", language: 'en') expect(ps.segment).to eq(["Leave me alone!, he yelled.", "I am in the U.S. Army.", "Charles (Ind.) said he."]) end it 'correctly segments text #045' do ps = PragmaticSegmenter::Segmenter.new(text: "This is the U.S. Senate my friends. Yes. It is!", language: 'en') expect(ps.segment).to eq(["This is the U.S. Senate my friends.", "Yes.", "It is!"]) end it 'correctly segments text #046' do ps = PragmaticSegmenter::Segmenter.new(text: "Send it to P.O. box 6554", language: 'en') expect(ps.segment).to eq(["Send it to P.O. box 6554"]) end it 'correctly segments text #047' do ps = PragmaticSegmenter::Segmenter.new(text: "There were 500 cases in the U.S. The U.S. Commission asked the U.S. Government to give their opinion on the issue.", language: 'en') expect(ps.segment).to eq(["There were 500 cases in the U.S.", "The U.S. Commission asked the U.S. Government to give their opinion on the issue."]) end it 'correctly segments text #048' do ps = PragmaticSegmenter::Segmenter.new(text: "CELLULAR COMMUNICATIONS INC. sold 1,550,000 common shares at $21.75 each yesterday, according to lead underwriter L.F. Rothschild & Co. (cited from WSJ 05/29/1987)", language: 'en') expect(ps.segment).to eq(["CELLULAR COMMUNICATIONS INC. sold 1,550,000 common shares at $21.75 each yesterday, according to lead underwriter L.F. Rothschild & Co. (cited from WSJ 05/29/1987)"]) end it 'correctly segments text #049' do ps = PragmaticSegmenter::Segmenter.new(text: "Rolls-Royce Motor Cars Inc. said it expects its U.S. sales to remain steady at about 1,200 cars in 1990. `So what if you miss 50 tanks somewhere?' asks Rep. Norman Dicks (D., Wash.), a member of the House group that visited the talks in Vienna. Later, he recalls the words of his Marxist mentor: `The people! Theft! The holy fire!'", language: 'en') expect(ps.segment).to eq(["Rolls-Royce Motor Cars Inc. said it expects its U.S. sales to remain steady at about 1,200 cars in 1990.", "'So what if you miss 50 tanks somewhere?' asks Rep. Norman Dicks (D., Wash.), a member of the House group that visited the talks in Vienna.", "Later, he recalls the words of his Marxist mentor: 'The people! Theft! The holy fire!'"]) end it 'correctly segments text #050' do ps = PragmaticSegmenter::Segmenter.new(text: "He climbed Mt. Fuji.", language: 'en') expect(ps.segment).to eq(["He climbed Mt. Fuji."]) end it 'correctly segments text #051' do ps = PragmaticSegmenter::Segmenter.new(text: "He speaks !Xũ, !Kung, ǃʼOǃKung, !Xuun, !Kung-Ekoka, ǃHu, ǃKhung, ǃKu, ǃung, ǃXo, ǃXû, ǃXung, ǃXũ, and !Xun.", language: 'en') expect(ps.segment).to eq(["He speaks !Xũ, !Kung, ǃʼOǃKung, !Xuun, !Kung-Ekoka, ǃHu, ǃKhung, ǃKu, ǃung, ǃXo, ǃXû, ǃXung, ǃXũ, and !Xun."]) end it 'correctly segments text #052' do ps = PragmaticSegmenter::Segmenter.new(text: "Test strange period.Does it segment correctly.", language: 'en') expect(ps.segment).to eq(["Test strange period.", "Does it segment correctly."]) end it 'correctly segments text #053' do ps = PragmaticSegmenter::Segmenter.new(text: "

Hello

\n

This is a test. Another test.

\n

\n\n

", language: 'en') expect(ps.segment).to eq(["Hello", "This is a test.", "Another test."]) end it 'correctly segments text #054' do ps = PragmaticSegmenter::Segmenter.new(text: "This sentence ends with the psuedo-number x10. This one with the psuedo-number %3.00. One last sentence.", language: 'en') expect(ps.segment).to eq(["This sentence ends with the psuedo-number x10.", "This one with the psuedo-number %3.00.", "One last sentence."]) end it 'correctly segments text #055' do ps = PragmaticSegmenter::Segmenter.new(text: "Testing mixed numbers Jahr10. And another 0.3 %11. That's weird.", language: 'en') expect(ps.segment).to eq(["Testing mixed numbers Jahr10.", "And another 0.3 %11.", "That's weird."]) end it 'correctly segments text #056' do ps = PragmaticSegmenter::Segmenter.new(text: "Were Jane and co. at the party?", language: 'en') expect(ps.segment).to eq(["Were Jane and co. at the party?"]) end it 'correctly segments text #057' do ps = PragmaticSegmenter::Segmenter.new(text: "St. Michael's Church is on 5th st. near the light.", language: 'en') expect(ps.segment).to eq(["St. Michael's Church is on 5th st. near the light."]) end it 'correctly segments text #058' do ps = PragmaticSegmenter::Segmenter.new(text: "Let's ask Jane and co. They should know.", language: 'en') expect(ps.segment).to eq(["Let's ask Jane and co.", "They should know."]) end it 'correctly segments text #059' do ps = PragmaticSegmenter::Segmenter.new(text: "He works at Yahoo! and Y!J.", language: 'en') expect(ps.segment).to eq(["He works at Yahoo! and Y!J."]) end it 'correctly segments text #060' do ps = PragmaticSegmenter::Segmenter.new(text: 'The Scavenger Hunt ends on Dec. 31st, 2011.', language: 'en') expect(ps.segment).to eq(['The Scavenger Hunt ends on Dec. 31st, 2011.']) end it 'correctly segments text #061' do ps = PragmaticSegmenter::Segmenter.new(text: "Putter King Scavenger Hunt Trophy\n(6 3/4\" Engraved Crystal Trophy - Picture Coming Soon)\nThe Putter King team will judge the scavenger hunt and all decisions will be final. The scavenger hunt is open to anyone and everyone. The scavenger hunt ends on Dec. 31st, 2011.", language: 'en') expect(ps.segment).to eq(["Putter King Scavenger Hunt Trophy", "(6 3/4\" Engraved Crystal Trophy - Picture Coming Soon)", "The Putter King team will judge the scavenger hunt and all decisions will be final.", "The scavenger hunt is open to anyone and everyone.", "The scavenger hunt ends on Dec. 31st, 2011."]) end it 'correctly segments text #062' do ps = PragmaticSegmenter::Segmenter.new(text: "Unauthorized modifications, alterations or installations of or to this equipment are prohibited and are in violation of AR 750-10. Any such unauthorized modifications, alterations or installations could result in death, injury or damage to the equipment.", language: 'en') expect(ps.segment).to eq(["Unauthorized modifications, alterations or installations of or to this equipment are prohibited and are in violation of AR 750-10.", "Any such unauthorized modifications, alterations or installations could result in death, injury or damage to the equipment."]) end it 'correctly segments text #063' do ps = PragmaticSegmenter::Segmenter.new(text: "Header 1.2; Attachment Z\n\n\td. Compliance Log – Volume 12 \n\tAttachment A\n\n\te. Additional Logistics Data\n\tSection 10", language: 'en') expect(ps.segment).to eq(["Header 1.2; Attachment Z", "d. Compliance Log – Volume 12", "Attachment A", "e. Additional Logistics Data", "Section 10"]) end it 'correctly segments text #064' do ps = PragmaticSegmenter::Segmenter.new(text: "a.) The first item b.) The second item c.) The third list item", language: 'en') expect(ps.segment).to eq(["a.) The first item", "b.) The second item", "c.) The third list item"]) end it 'correctly segments text #065' do ps = PragmaticSegmenter::Segmenter.new(text: "a) The first item b) The second item c) The third list item", language: 'en') expect(ps.segment).to eq(["a) The first item", "b) The second item", "c) The third list item"]) end it 'correctly segments text #066' do ps = PragmaticSegmenter::Segmenter.new(text: "Hello Wolrd. Here is a secret code AS750-10. Another sentence. Finally, this. 1. The first item 2. The second item 3. The third list item 4. Hello 5. Hello 6. Hello 7. Hello 8. Hello 9. Hello 10. Hello 11. Hello", language: 'en') expect(ps.segment).to eq(["Hello Wolrd.", "Here is a secret code AS750-10.", "Another sentence.", "Finally, this.", "1. The first item", "2. The second item", "3. The third list item", "4. Hello", "5. Hello", "6. Hello", "7. Hello", "8. Hello", "9. Hello", "10. Hello", "11. Hello"]) end it 'correctly segments text #067' do ps = PragmaticSegmenter::Segmenter.new(text: "He works for ABC Ltd. and sometimes for BCD Ltd. She works for ABC Co. and BCD Co. They work for ABC Corp. and BCD Corp.", language: 'en') expect(ps.segment).to eq(["He works for ABC Ltd. and sometimes for BCD Ltd.", "She works for ABC Co. and BCD Co.", "They work for ABC Corp. and BCD Corp."]) end it 'correctly segments text #068' do ps = PragmaticSegmenter::Segmenter.new(text: "<b>J1.txt</b>", language: 'en') expect(ps.segment).to eq(["J1.txt"]) end it 'correctly segments text #069' do ps = PragmaticSegmenter::Segmenter.new(text: "On Jan. 20, former Sen. Barack Obama became the 44th President of the U.S. Millions attended the Inauguration.", language: 'en') expect(ps.segment).to eq(["On Jan. 20, former Sen. Barack Obama became the 44th President of the U.S.", "Millions attended the Inauguration."]) end it 'correctly segments text #070' do ps = PragmaticSegmenter::Segmenter.new(text: "The U.K. Panel on enivronmental issues said it was true. Finally he left the U.K. He went to a new location.", language: 'en') expect(ps.segment).to eq(["The U.K. Panel on enivronmental issues said it was true.", "Finally he left the U.K.", "He went to a new location."]) end it 'correctly segments text #071' do ps = PragmaticSegmenter::Segmenter.new(text: "He left at 6 P.M. Travelers who didn't get the warning at 5 P.M. left later.", language: 'en') expect(ps.segment).to eq(["He left at 6 P.M.", "Travelers who didn't get the warning at 5 P.M. left later."]) end it 'correctly segments text #072' do ps = PragmaticSegmenter::Segmenter.new(text: "He left at 6 a.m. Travelers who didn't get the warning at 5 a.m. left later.", language: 'en') expect(ps.segment).to eq(["He left at 6 a.m.", "Travelers who didn't get the warning at 5 a.m. left later."]) end it 'correctly segments text #073' do ps = PragmaticSegmenter::Segmenter.new(text: "He left at 6 A.M. Travelers who didn't get the warning at 5 A.M. left later.", language: 'en') expect(ps.segment).to eq(["He left at 6 A.M.", "Travelers who didn't get the warning at 5 A.M. left later."]) end it 'correctly segments text #074' do ps = PragmaticSegmenter::Segmenter.new(text: "Hello World. My name is Jonas. What is your name? My name is Jonas. There it is! I found it. My name is Jonas E. Smith. Please turn to p. 55. Were Jane and co. at the party? They closed the deal with Pitt, Briggs & Co. at noon. Let's ask Jane and co. They should know. They closed the deal with Pitt, Briggs & Co. It closed yesterday. I can see Mt. Fuji from here. St. Michael's Church is on 5th st. near the light. That is JFK Jr.'s book. I visited the U.S.A. last year. I live in the E.U. How about you? I live in the U.S. How about you? I work for the U.S. Government in Virginia. I have lived in the U.S. for 20 years. She has $100.00 in her bag. She has $100.00. It is in her bag. He teaches science (He previously worked for 5 years as an engineer.) at the local University. Her email is Jane.Doe@example.com. I sent her an email. The site is: https://www.example.50.com/new-site/awesome_content.html. Please check it out. She turned to him, 'This is great.' she said. She turned to him, \"This is great.\" she said. She turned to him, \"This is great.\" She held the book out to show him. Hello!! Long time no see. Hello?? Who is there? Hello!? Is that you? Hello?! Is that you? 1.) The first item 2.) The second item 1.) The first item. 2.) The second item. 1) The first item 2) The second item 1) The first item. 2) The second item. 1. The first item 2. The second item 1. The first item. 2. The second item. • 9. The first item • 10. The second item ⁃9. The first item ⁃10. The second item a. The first item b. The second item c. The third list item \rIt was a cold \nnight in the city. features\ncontact manager\nevents, activities\n You can find it at N°. 1026.253.553. That is where the treasure is. She works at Yahoo! in the accounting department. We make a good team, you and I. Did you see Albert I. Jones yesterday? Thoreau argues that by simplifying one’s life, “the laws of the universe will appear less complex. . . .”. \"Bohr [...] used the analogy of parallel stairways [...]\" (Smith 55). If words are left off at the end of a sentence, and that is all that is omitted, indicate the omission with ellipsis marks (preceded and followed by a space) and then indicate the end of the sentence with a period . . . . Next sentence. I never meant that.... She left the store. I wasn’t really ... well, what I mean...see . . . what I'm saying, the thing is . . . I didn’t mean it. One further habit which was somewhat weakened . . . was that of combining words into self-interpreting compounds. . . . The practice was not abandoned. . . .", language: nil) expect(ps.segment).to eq(["Hello World.", "My name is Jonas.", "What is your name?", "My name is Jonas.", "There it is!", "I found it.", "My name is Jonas E. Smith.", "Please turn to p. 55.", "Were Jane and co. at the party?", "They closed the deal with Pitt, Briggs & Co. at noon.", "Let's ask Jane and co.", "They should know.", "They closed the deal with Pitt, Briggs & Co.", "It closed yesterday.", "I can see Mt. Fuji from here.", "St. Michael's Church is on 5th st. near the light.", "That is JFK Jr.'s book.", "I visited the U.S.A. last year.", "I live in the E.U.", "How about you?", "I live in the U.S.", "How about you?", "I work for the U.S. Government in Virginia.", "I have lived in the U.S. for 20 years.", "She has $100.00 in her bag.", "She has $100.00.", "It is in her bag.", "He teaches science (He previously worked for 5 years as an engineer.) at the local University.", "Her email is Jane.Doe@example.com.", "I sent her an email.", "The site is: https://www.example.50.com/new-site/awesome_content.html.", "Please check it out.", "She turned to him, 'This is great.' she said.", "She turned to him, \"This is great.\" she said.", "She turned to him, \"This is great.\"", "She held the book out to show him.", "Hello!!", "Long time no see.", "Hello??", "Who is there?", "Hello!?", "Is that you?", "Hello?!", "Is that you?", "1.) The first item", "2.) The second item", "1.) The first item.", "2.) The second item.", "1) The first item", "2) The second item", "1) The first item.", "2) The second item.", "1. The first item", "2. The second item", "1. The first item.", "2. The second item.", "• 9. The first item", "• 10. The second item", "⁃9. The first item", "⁃10. The second item", "a. The first item", "b. The second item", "c. The third list item", "It was a cold night in the city.", "features", "contact manager", "events, activities", "You can find it at N°. 1026.253.553.", "That is where the treasure is.", "She works at Yahoo! in the accounting department.", "We make a good team, you and I.", "Did you see Albert I. Jones yesterday?", "Thoreau argues that by simplifying one’s life, “the laws of the universe will appear less complex. . . .”.", "\"Bohr [...] used the analogy of parallel stairways [...]\" (Smith 55).", "If words are left off at the end of a sentence, and that is all that is omitted, indicate the omission with ellipsis marks (preceded and followed by a space) and then indicate the end of the sentence with a period . . . .", "Next sentence.", "I never meant that....", "She left the store.", "I wasn’t really ... well, what I mean...see . . . what I'm saying, the thing is . . . I didn’t mean it.", "One further habit which was somewhat weakened . . . was that of combining words into self-interpreting compounds.", ". . . The practice was not abandoned. . . ."]) end it 'correctly segments text #075' do ps = PragmaticSegmenter::Segmenter.new(text: "His name is Mark E. Smith. a. here it is b. another c. one more\n They went to the store. It was John A. Smith. She was Jane B. Smith.", language: "en") expect(ps.segment).to eq(["His name is Mark E. Smith.", "a. here it is", "b. another", "c. one more", "They went to the store.", "It was John A. Smith.", "She was Jane B. Smith."]) end it 'correctly segments text #076' do ps = PragmaticSegmenter::Segmenter.new(text: "a) here it is b) another c) one more\n They went to the store. w) hello x) hello y) hello", language: "en") expect(ps.segment).to eq(["a) here it is", "b) another", "c) one more", "They went to the store.", "w) hello", "x) hello", "y) hello"]) end it 'correctly segments text #077' do ps = PragmaticSegmenter::Segmenter.new(text: "Hello{b^>1<b^} hello{b^>115% decrease from baseline resulting in values <11 g/ dL) were observed in 6% of Tracleer-treated patients and 3% of placebo-treated patients.", "Bosentan is highly bound (>98%) to plasma proteins, mainly albumin."]) end it 'correctly segments text #118' do text = "The parties to this Agreement are PragmaticSegmenterExampleCompanyA Inc. (“Company A”), and PragmaticSegmenterExampleCompanyB Inc. (“Company B”)." ps = PragmaticSegmenter::Segmenter.new(text: text, clean: false) expect(ps.segment).to eq(["The parties to this Agreement are PragmaticSegmenterExampleCompanyA Inc. (“Company A”), and PragmaticSegmenterExampleCompanyB Inc. (“Company B”)."]) end it 'correctly segments text #119' do ps = PragmaticSegmenter::Segmenter.new(text: "Unlike the abbreviations i.e. and e.g., viz. is used to indicate a detailed description of something stated before.") expect(ps.segment).to eq(["Unlike the abbreviations i.e. and e.g., viz. is used to indicate a detailed description of something stated before."]) end it 'correctly segments text #120' do ps = PragmaticSegmenter::Segmenter.new(text: "For example, ‘dragonswort… is said that it should be grown in dragon’s blood. It grows at the tops of mountains where there are groves of trees, chiefly in holy places and in the country that is called Apulia’ (translated by Anne Van Arsdall, in Medieval Herbal Remedies: The Old English Herbarium and Anglo-Saxon Medicine p. 154). The Herbal also includes lore about other plants, such as the mandrake.") expect(ps.segment).to eq(["For example, ‘dragonswort… is said that it should be grown in dragon’s blood. It grows at the tops of mountains where there are groves of trees, chiefly in holy places and in the country that is called Apulia’ (translated by Anne Van Arsdall, in Medieval Herbal Remedies: The Old English Herbarium and Anglo-Saxon Medicine p. 154).", "The Herbal also includes lore about other plants, such as the mandrake."]) end it 'correctly segments text #121' do ps = PragmaticSegmenter::Segmenter.new(text: "Here’s the - ahem - official citation: Baker, C., Anderson, Kenneth, Martin, James, & Palen, Leysia. Modeling Open Source Software Communities, ProQuest Dissertations and Theses.") expect(ps.segment).to eq(["Here’s the - ahem - official citation: Baker, C., Anderson, Kenneth, Martin, James, & Palen, Leysia.", "Modeling Open Source Software Communities, ProQuest Dissertations and Theses."]) end it 'correctly segments text #122' do ps = PragmaticSegmenter::Segmenter.new(text: "These include images of various modes of transport and members of the team, all available in .jpeg format. Images can be downloaded from our website. We also offer archives as .zip files.") expect(ps.segment).to eq(["These include images of various modes of transport and members of the team, all available in .jpeg format.", "Images can be downloaded from our website.", "We also offer archives as .zip files."]) end it 'correctly segments text #123' do ps = PragmaticSegmenter::Segmenter.new(text: "Saint Maximus (died 250) is a Christian saint and martyr.[1] The emperor Decius published a decree ordering the veneration of busts of the deified emperors.") expect(ps.segment).to eq(["Saint Maximus (died 250) is a Christian saint and martyr.[1]", "The emperor Decius published a decree ordering the veneration of busts of the deified emperors."]) end it 'correctly segments text #124' do ps = PragmaticSegmenter::Segmenter.new(text: "Differing agendas can potentially create an understanding gap in a consultation.11 12 Take the example of one of the most common presentations in ill health: the common cold.") expect(ps.segment).to eq(["Differing agendas can potentially create an understanding gap in a consultation.11 12", "Take the example of one of the most common presentations in ill health: the common cold."]) end it 'correctly segments text #125' do ps = PragmaticSegmenter::Segmenter.new(text: "Daniel Kahneman popularised the concept of fast and slow thinking: the distinction between instinctive (type 1 thinking) and reflective, analytical cognition (type 2).10 This model relates to doctors achieving a balance between efficiency and effectiveness.") expect(ps.segment).to eq(["Daniel Kahneman popularised the concept of fast and slow thinking: the distinction between instinctive (type 1 thinking) and reflective, analytical cognition (type 2).10", "This model relates to doctors achieving a balance between efficiency and effectiveness."]) end it 'correctly segments text #126' do ps = PragmaticSegmenter::Segmenter.new(text: "Its traditional use[1] is well documented in the ethnobotanical literature [2–11]. Leaves, buds, tar and essential oils are used to treat a wide spectrum of diseases.") expect(ps.segment).to eq(["Its traditional use[1] is well documented in the ethnobotanical literature [2–11].", "Leaves, buds, tar and essential oils are used to treat a wide spectrum of diseases."]) end it 'correctly segments text #127' do ps = PragmaticSegmenter::Segmenter.new(text: "Thus increasing the desire for political reform both in Lancashire and in the country at large.[7][8] This was a serious misdemeanour,[16] encouraging them to declare the assembly illegal as soon as it was announced on 31 July.[17][18] The radicals sought a second opinion on the meeting's legality.") expect(ps.segment).to eq(["Thus increasing the desire for political reform both in Lancashire and in the country at large.[7][8]", "This was a serious misdemeanour,[16] encouraging them to declare the assembly illegal as soon as it was announced on 31 July.[17][18]", "The radicals sought a second opinion on the meeting's legality."]) end it 'correctly segments text #128' do ps = PragmaticSegmenter::Segmenter.new(text: "The table in (4) is a sample from the Wall Street Journal (1987).1 According to the distribution all the pairs given in (4) count as candidates for abbreviations.") expect(ps.segment).to eq([ "The table in (4) is a sample from the Wall Street Journal (1987).1", "According to the distribution all the pairs given in (4) count as candidates for abbreviations."]) end end end