Sha256: b806572ca4f096cece5353e4698f36912c13c8697f3fa6e27d84d7078fda8490
Contents?: true
Size: 691 Bytes
Versions: 5
Compression:
Stored size: 691 Bytes
Contents
# coding=utf-8 __author__ = 'Josu Bermudez <josu.bermudez@deusto.es>' stop_words = set(("a", "an", "the", "of", "at", "on", "upon", "in", "to", "from", "out", "as", "so", "such", "or", "and", "those", "this", "these", "that", "for", ",", "is", "was", "am", "are", "'s", "been", "were")) extended_stop_words = set(("the", "this", "mr.", "miss", "mrs.", "dr.", "ms.", "inc.", "ltd.", "corp.", "'s")) non_words = ("mm", "hmm", "ahem", "um") invalid_stop_words = ("u.s.", "u.k", "u.s.s.r.", "there", "ltd.") invalid_start_words = ("'s",) invalid_end_words = ("etc.", ) location_modifiers = ("east", "west", "north", "south","eastern", "western", "northern", "southern", "upper", "lower")
Version data entries
5 entries across 5 versions & 1 rubygems