src/cue/lang/NGramIterator.java in ruby_wordcram-2.0.0 vs src/cue/lang/NGramIterator.java in ruby_wordcram-2.0.1
- old
+ new
@@ -25,127 +25,132 @@
/**
* Construct with a {@link String}, some integer n, and a {@link Locale};
* retrieve a sequence of {@link String}s, each of which has n words that appear
* contiguously within a sentence. "Words" are as defined by the
* {@link WordIterator}.
- *
+ *
* <p>
* If you don't provide a {@link Locale}, you get the default {@link Locale} for
* your system, which may or may not be what you want. The {@link Locale} is
* used by a {@link SentenceIterator} to find sentence breaks.
- *
+ *
* <p>
* Example:
- *
+ *
* <pre>
* final String lyric = "This happened once before. I came to your door. No reply.";
* for (final String s : new NGramIterator(3, lyric)) {
* System.out.println(s);
* }
* for (final String s : new NGramIterator(2, lyric)) {
* System.out.println(s);
* }
- *
+ *
* This happened once
* happened once before
* I came to
* came to your
* to your door
- *
+ *
* This happened
* happened once
* once before
* I came
* came to
* to your
* your door
* No reply
* </pre>
- *
+ *
* @author Jonathan Feinberg <jdf@us.ibm.com>
- *
+ *
*/
public class NGramIterator extends IterableText {
- private final SentenceIterator sentenceIterator;
- private final LinkedList<String> grams = new LinkedList<String>();
- private final int n;
- private final StopWords stopWords;
- private String next;
- private Iterator<String> currentWordIterator;
+ private final SentenceIterator sentenceIterator;
+ private final LinkedList<String> grams = new LinkedList<>();
+ private final int n;
+ private final StopWords stopWords;
- public NGramIterator(final int n, final String text) {
- this(n, text, Locale.getDefault());
- }
+ private String next;
+ private Iterator<String> currentWordIterator;
- public NGramIterator(final int n, final String text, final Locale locale) {
- this(n, text, locale, null);
- }
+ public NGramIterator(final int n, final String text) {
+ this(n, text, Locale.getDefault());
+ }
- public NGramIterator(final int n, final String text, final Locale locale,
- final StopWords stopWords) {
- this.n = n;
- this.sentenceIterator = new SentenceIterator(text, locale);
- this.stopWords = stopWords;
- loadNext();
- }
+ public NGramIterator(final int n, final String text, final Locale locale) {
+ this(n, text, locale, null);
+ }
- @Override
- public void remove() {
- throw new UnsupportedOperationException();
- }
+ public NGramIterator(final int n, final String text, final Locale locale,
+ final StopWords stopWords) {
+ this.n = n;
+ this.sentenceIterator = new SentenceIterator(text, locale);
+ this.stopWords = stopWords;
+ loadNext();
+ }
- @Override
- public String next() {
- if (next == null) {
- throw new NoSuchElementException();
- }
- final String result = next;
- loadNext();
- return result;
- }
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
- @Override
- public boolean hasNext() {
- return next != null;
- }
+ @Override
+ public String next() {
+ if (next == null) {
+ throw new NoSuchElementException();
+ }
+ final String result = next;
+ loadNext();
+ return result;
+ }
- private void loadNext() {
- next = null;
- if (!grams.isEmpty()) {
- grams.pop();
- }
- while (grams.size() < n) {
- while (currentWordIterator == null
- || !currentWordIterator.hasNext()) {
- if (!sentenceIterator.hasNext()) {
- return;
- }
- grams.clear();
- currentWordIterator = new WordIterator(sentenceIterator.next())
- .iterator();
- for (int i = 0; currentWordIterator.hasNext() && i < n - 1; i++) {
- maybeAddWord();
- }
- }
- // now grams has n-1 words in it and currentWordIterator hasNext
- maybeAddWord();
- }
- final StringBuilder sb = new StringBuilder();
- for (final String gram : grams) {
- if (sb.length() > 0) {
- sb.append(" ");
- }
- sb.append(gram);
- }
- next = sb.toString();
- }
+ /**
+ *
+ * @return
+ */
+ @Override
+ public boolean hasNext() {
+ return next != null;
+ }
- private void maybeAddWord() {
- final String nextWord = currentWordIterator.next();
- if (stopWords != null && stopWords.isStopWord(nextWord)) {
- grams.clear();
- } else {
- grams.add(nextWord);
- }
- }
+ private void loadNext() {
+ next = null;
+ if (!grams.isEmpty()) {
+ grams.pop();
+ }
+ while (grams.size() < n) {
+ while (currentWordIterator == null
+ || !currentWordIterator.hasNext()) {
+ if (!sentenceIterator.hasNext()) {
+ return;
+ }
+ grams.clear();
+ currentWordIterator = new WordIterator(sentenceIterator.next())
+ .iterator();
+ for (int i = 0; currentWordIterator.hasNext() && i < n - 1; i++) {
+ maybeAddWord();
+ }
+ }
+ // now grams has n-1 words in it and currentWordIterator hasNext
+ maybeAddWord();
+ }
+ final StringBuilder sb = new StringBuilder();
+ grams.forEach((gram) -> {
+ if (sb.length() > 0) {
+ sb.append(" ");
+ }
+ sb.append(gram);
+ });
+ next = sb.toString();
+ }
+
+ private void maybeAddWord() {
+ final String nextWord = currentWordIterator.next();
+ if (stopWords != null && stopWords.isStopWord(nextWord)) {
+ grams.clear();
+ } else {
+ grams.add(nextWord);
+ }
+ }
}