package wordcram; import java.util.*; import java.util.Map.Entry; import cue.lang.Counter; import cue.lang.WordIterator; import cue.lang.stop.StopWords; class WordCounter { private StopWords cueStopWords; private Set extraStopWords = new HashSet<>(); private boolean excludeNumbers; public WordCounter() { this(null); } public WordCounter(StopWords cueStopWords) { this.cueStopWords = cueStopWords; } public WordCounter withExtraStopWords(String extraStopWordsString) { String[] stopWordsArray = extraStopWordsString.toLowerCase().split(" "); extraStopWords = new HashSet<>(Arrays.asList(stopWordsArray)); return this; } public WordCounter shouldExcludeNumbers(boolean shouldExcludeNumbers) { excludeNumbers = shouldExcludeNumbers; return this; } public Word[] count(String text, RenderOptions renderOptions) { if (cueStopWords == null) { cueStopWords = StopWords.guess(text); if (cueStopWords == StopWords.Arabic || cueStopWords == StopWords.Farsi || cueStopWords == StopWords.Hebrew) { renderOptions.rightToLeft = true; } tellScripterAboutTheGuess(cueStopWords); } return countWords(text); } private void tellScripterAboutTheGuess(StopWords stopWords) { // TODO Find a better way to do this; it prints out during the tests. =p if (stopWords == null) { System.out.println("cue.language can't guess what language your text is in."); } else { System.out.println("cue.language guesses your text is in " + stopWords); } } private Word[] countWords(String text) { Counter counter = new Counter<>(); for (String word : new WordIterator(text)) { if (shouldCountWord(word)) { counter.note(word); } } List words = new ArrayList<>(); counter.entrySet().forEach((entry) -> { words.add(new Word(entry.getKey(), (int)entry.getValue())); }); return words.toArray(new Word[0]); } private boolean shouldCountWord(String word) { return !isStopWord(word) && !(excludeNumbers && isNumeric(word)); } private boolean isNumeric(String word) { try { Double.parseDouble(word); return true; } catch (NumberFormatException x) { return false; } } private boolean isStopWord(String word) { boolean cueSaysStopWord = cueStopWords != null && cueStopWords.isStopWord(word); boolean extraSaysStopWord = extraStopWords.contains(word.toLowerCase()); return cueSaysStopWord || extraSaysStopWord; } }