require 'rubypants'
require 'digest/md5'
# A collection of simple helpers for improving web
# typograhy. Based on TypographyHelper by Luke Hartman and Typogrify.
#
# @example Using all filters
# require 'typogruby'
# Typogruby.improve('my text')
#
# @example Using a single filter
# Typogruby.initial_quotes('my text')
#
# @see http://github.com/hunter/typography-helper
# @see http://code.google.com/p/typogrify
# @author Arjan van der Gaag
module Typogruby
# Get the current gem version number
# @return [String]
def version
File.read(File.join(File.dirname(__FILE__), *%w{.. VERSION}))
end
# Applies smartypants to a given piece of text
#
# @example
# smartypants('The "Green" man')
# # => 'The “Green” man'
#
# @param [String] text input text
# @return [String] input text with smartypants applied
def smartypants(text)
::RubyPants.new(text).to_html
end
# converts a & surrounded by optional whitespace or a non-breaking space
# to the HTML entity and surrounds it in a span with a styled class.
#
# @example
# amp('One & two')
# # => 'One & two'
# amp('One & two')
# # => 'One & two'
# amp('One & two')
# # => 'One & two'
# amp('One & two')
# # => 'One & two'
#
# @example It won't mess up & that are already wrapped, in entities or URLs
#
# amp('One & two')
# # => 'One & two'
# amp('“this” & that')
# # => '“this” &that'
#
# @example It should ignore standalone amps that are in attributes
# amp('xyz')
# # => 'xyz'
#
# @param [String] text input text
# @return [String] input text with ampersands wrapped
def amp(text)
# $1 is an excluded HTML tag, $2 is the part before the caps and $3 is the amp match
ignore_scripts(text) do |t|
t.gsub(/<(code|pre).+?<\/\1>|(\s| )&(?:amp;|#38;)?(\s| )/) { |str|
$1 ? str : $2 + '&' + $3
}.gsub(/(\w+)="(.*?)&<\/span>(.*?)"/, '\1="\2&\3"')
end
end
# replaces space(s) before the last word (or tag before the last word)
# before an optional closing element (a, em,
# span, strong) before a closing tag (p, h[1-6],
# li, dt, dd) or the end of the string.
#
# @example
# > widont('A very simple test')
# # => 'A very simple test'
#
# @example Single word items shouldn't be changed
# widont('Test')
# # => 'Test'
# widont(' Test')
# # => ' Test'
# widont('
'
#
# @see http://mucur.name/posts/widon-t-and-smartypants-helpers-for-rails
# @see http://shauninman.com/archive/2006/08/22/widont_wordpress_plugin
# @param [String] text input text
# @return [String] input text with non-breaking spaces inserted
def widont(text)
ignore_scripts(text) do |t|
t.gsub(%r{
((?:?(?:a|em|span|strong|i|b)[^>]*>)|[^<>\s]) # must be proceeded by an approved inline opening or closing tag or a nontag/nonspace
\s+ # the space to replace
(([^<>\s]+) # must be flollowed by non-tag non-space characters
\s* # optional white space!
((a|em|span|strong|i|b)>\s*)* # optional closing inline tags with optional white space after each
(((p|h[1-6]|li|dt|dd)>)|$)) # end with a closing p, h1-6, li or the end of the string
}x) { |match| $1 + (match.include?(' ') ? ' ' : ' ') + $2 } # Make sure to not add another nbsp before one already there
end
end
# surrounds two or more consecutive captial letters, perhaps with interspersed digits and periods
# in a span with a styled class.
#
# @example
# caps("A message from KU")
# # => 'A message from KU'
#
# @example Allows digits
# caps("A message from 2KU2 with digits")
# # => 'A message from 2KU2 with digits'
#
# @example All caps with with apostrophes in them shouldn't break. Only handles dump apostrophes though.
# caps("JIMMY'S")
# # => 'JIMMY\\'S'
# caps("D.O.T.HE34TRFID")
# # => 'D.O.T.HE34TRFID'
#
# @param [String] text input text
# @return [String] input text with caps wrapped
def caps(text)
ignore_scripts(text) do |t|
# $1 is an excluded HTML tag, $2 is the part before the caps and $3 is the caps match
t.gsub(/(?i:<(:code|pre).+?<\/\1>)|(\s| |^|'|"|>)([A-Z\d][A-Z\d\.']{1,})(?!\w)/) do |str|
excluded, before, caps = $1, $2, $3
if excluded
str
elsif $3 =~ /^[\d\.]+$/
before + caps
else
before + '' + caps + ''
end
end
end
end
# encloses initial single or double quote, or their entities
# (optionally preceeded by a block element and perhaps an inline element)
# with a span that can be styled.
#
# @example
# initial_quotes('"With primes"')
# # => '"With primes"'
# initial_quotes("'With single primes'")
# # => '\\'With single primes\\''
#
# @example With primes and links
# initial_quotes('"With primes and a link"')
# # => '"With primes and a link"'
#
# @example with Smartypants-quotes
# initial_quotes('“With smartypanted quotes”')
# # => '“With smartypanted quotes”'
#
# @param [String] text input text
# @return [String] input text with initial quotes wrapped
def initial_quotes(text)
# $1 is the initial part of the string, $2 is the quote or entitity, and $3 is the double quote
ignore_scripts(text) do |t|
t.gsub(/((?:<(?:h[1-6]|p|li|dt|dd)[^>]*>|^)\s*(?:<(?:a|em|strong|span)[^>]*>)?)('|‘|‘|("|“|“))/) {$1 + "#{$2}"}
end
end
# main function to do all the functions from the method.
# @param [String] text input text
# @return [String] input text with all filters applied
def improve(text)
initial_quotes(caps(smartypants(widont(amp(text)))))
end
private
# Hackish text filter that will make sure our text filters leave inline
# javascript alone without resorting to a full-blown HTML parser.
#
# The idea is simple: every text filter is applied as a block to this
# method. This will preprocess the text and replace any inline scripts
# with a MD5 hash of its entire contents. Then the filter is called,
# and then the hashes are replaced back with their original content.
def ignore_scripts(text)
@ignored_scripts = {}
modified_text = text.gsub(/