# This module is a mixin for classes that want to use a very basic alphabetic # shorthand to reduce text size. The module performs in-place operations, and # expects to find a @compressed instance variable to work from. # # Example: # # include Compress # @original = 'JavaScript' # @compressed = @original.dup # abbr # # => "JS" # # module Compress URL_HOLDER = '__PLACEHOLDER4URLS__' URL_LENGTH = 20 URL_PATTERN = %r{ \b ( (?: [a-z][\w-]+: (?: /{1,3} | [a-z0-9%] ) | www\d{0,3}[.] | [a-z0-9.\-]+[.][a-z]{2,4}/ ) (?: [^\s()<>]+ | \(([^\s()<>]+|(\([^\s()<>]+\)))*\) )+ (?: \(([^\s()<>]+|(\([^\s()<>]+\)))*\) | [^\s`!()\[\]{};:'".,<>?«»“”‘’] ) ) }ix # Calculate the current character count, taking the "virtual size" of # Twitter-shortened URLs into account. def char_count real_url_chars = @urls.join.size virt_url_chars = @urls.count * URL_LENGTH @compressed.size - real_url_chars + virt_url_chars end private # Special abbreviations to increase clarity. # # TODO: A YAML dictionary would be preferrable to case statements if the list # grows to any significant length. def abbr @compressed = @compressed.split.map do |word| case word.downcase when 'and' then '&' when 'javascript' then 'JS' when 'string' then 'str' when 'one' then '1' when 'two' then '2' when 'three' then '3' when 'four' then '4' when 'five' then '5' when 'six' then '6' when 'seven' then '7' when 'eight' then '8' when 'nine' then '9' when 'ten' then '10' when 'eleven' then '11' when 'twelve' then '12' when 'thirteen' then '13' when 'fourteen' then '14' when 'fifteen' then '15' when 'sixteen' then '15' when 'seventeen' then '17' when 'eighteen' then '18' when 'nineteen' then '19' when 'twenty' then '20' else word end end.join ' ' @compressed.gsub! /is (?:an?|the)/, '=' @compressed.gsub! /(in|with)? regards? (to)?/i, 're' @compressed.gsub! /about|regarding|related( to)?|(in response to)/, 're' end # Remove apostrophes from contractions to save more space. def apostrophes @compressed.gsub! /n't/i, 'nt' end # Identify common contractions, taking a few pains to preserve capitalization # of the initial letter. def contractions @compressed.gsub! /I would/i, %q{I'd} @compressed.gsub! /i will(?!= ?not)/i, %q{I'll} @compressed.gsub! /(i)t is/i, %q{\1t's} @compressed.gsub! /(i)s not/i, %q{\1sn't} @compressed.gsub! /(w)ill not/i, %q{\1on't} @compressed.gsub! /(c)an ?not/i, %q{\1an't} @compressed.gsub! /(d)o(es)? not/i, %q{\1o\2n't} @compressed.gsub! /(s)hould not/i, %q{\1houldn't} @compressed.gsub! /(m)ust not/i, %q{\1usn't} end # Fix common grammar mistakes that also save space. def correct_grammar @compressed.gsub! /s's/i, ?' end # Remove duplicate lowercase consonants. Assume duplicate capital letters # like 'LLC' are intentional. def dedupe_consonants consonants = [*'a'..'z'].flatten.reject { |c| c =~ /[aeiou]/ } regex = /(#{consonants})\1+/ @compressed = @compressed.split.map do |word| next word unless word =~ regex word.gsub! regex, $1.to_s end.join ' ' end # Remove duplicate punctuation characters. Make an exception for ellipses # and dashes. def dedupe_punct regex = /([[:punct:]])\1+/ @compressed = @compressed.split.map do |word| word.gsub! /\.{4,}/, '...' word.gsub! /-{3,}/, '--' next word if word.include? '...' or word.match /-{2,3}/ next word unless word =~ regex word.gsub! regex, '\1' end.join ' ' end # Replace 'ing' with 'g'. Excludes short words like "ring" and "sing," and # checks an exception list for special cases. def ing exceptions = %w[fling] @compressed = @compressed.split.map do |word| next word unless word.end_with? 'ing' next word if word.start_with? '#' next word if word.size <= 4 next word if exceptions.include? word word.sub(/ing$/, 'g') end.join ' ' end # Remove lowercase vowels in longer words, unless it is the starting letter. def remove_vowels @compressed = @compressed.split.map do |word| next word if word.start_with? '#' word.size >= 4 ? word.gsub(/(?