require 'rubypants'
require 'digest/md5'
$KCODE = 'U'
# A collection of simple helpers for improving web
# typograhy. Based on TypographyHelper by Luke Hartman and Typogrify.
#
# @example Using all filters
# require 'typogruby'
# Typogruby.improve('my text')
#
# @example Using a single filter
# Typogruby.initial_quotes('my text')
#
# @see http://github.com/hunter/typography-helper
# @see http://code.google.com/p/typogrify
# @author Arjan van der Gaag
module Typogruby
# Get the current gem version number
# @return [String]
def version
File.read(File.join(File.dirname(__FILE__), *%w{.. VERSION}))
end
# Applies smartypants to a given piece of text
#
# @example
# smartypants('The "Green" man')
# # => 'The “Green” man'
#
# @param [String] text input text
# @return [String] input text with smartypants applied
def smartypants(text)
::RubyPants.new(text).to_html
end
# converts a & surrounded by optional whitespace or a non-breaking space
# to the HTML entity and surrounds it in a span with a styled class.
#
# @example
# amp('One & two')
# # => 'One & two'
# amp('One & two')
# # => 'One & two'
# amp('One & two')
# # => 'One & two'
# amp('One & two')
# # => 'One & two'
#
# @example It won't mess up & that are already wrapped, in entities or URLs
#
# amp('One & two')
# # => 'One & two'
# amp('“this” & that')
# # => '“this” &that'
#
# @example It should ignore standalone amps that are in attributes
# amp('xyz')
# # => 'xyz'
#
# @param [String] text input text
# @return [String] input text with ampersands wrapped
def amp(text)
# $1 is an excluded HTML tag, $2 is the part before the caps and $3 is the amp match
ignore_scripts(text) do |t|
t.gsub(/<(code|pre).+?<\/\1>|(\s| )&(?:amp;|#38;)?(\s| )/) { |str|
$1 ? str : $2 + '&' + $3
}.gsub(/(\w+)="(.*?)&<\/span>(.*?)"/, '\1="\2&\3"')
end
end
# replaces space(s) before the last word (or tag before the last word)
# before an optional closing element (a, em,
# span, strong) before a closing tag (p, h[1-6],
# li, dt, dd) or the end of the string.
#
# @example
# > widont('A very simple test')
# # => 'A very simple test'
#
# @example Single word items shouldn't be changed
# widont('Test')
# # => 'Test'
# widont(' Test')
# # => ' Test'
# widont('
'
#
# @see http://mucur.name/posts/widon-t-and-smartypants-helpers-for-rails
# @see http://shauninman.com/archive/2006/08/22/widont_wordpress_plugin
# @param [String] text input text
# @return [String] input text with non-breaking spaces inserted
def widont(text)
ignore_scripts(text) do |t|
t.gsub(%r{
((?:?(?:a|em|span|strong|i|b)[^>]*>)|[^<>\s]) # must be proceeded by an approved inline opening or closing tag or a nontag/nonspace
\s+ # the space to replace
(([^<>\s]+) # must be flollowed by non-tag non-space characters
\s* # optional white space!
((a|em|span|strong|i|b)>\s*)* # optional closing inline tags with optional white space after each
(((p|h[1-6]|li|dt|dd)>)|$)) # end with a closing p, h1-6, li or the end of the string
}x) { |match| $1 + (match.include?(' ') ? ' ' : ' ') + $2 } # Make sure to not add another nbsp before one already there
end
end
# surrounds two or more consecutive captial letters, perhaps with interspersed digits and periods
# in a span with a styled class.
#
# @example
# caps("A message from KU")
# # => 'A message from KU'
#
# @example Allows digits
# caps("A message from 2KU2 with digits")
# # => 'A message from 2KU2 with digits'
#
# @example All caps with with apostrophes in them shouldn't break. Only handles dump apostrophes though.
# caps("JIMMY'S")
# # => 'JIMMY\\'S'
# caps("D.O.T.HE34TRFID")
# # => 'D.O.T.HE34TRFID'
#
# @param [String] text input text
# @return [String] input text with caps wrapped
def caps(text)
ignore_scripts(text) do |t|
# $1 is an excluded HTML tag, $2 is the part before the caps and $3 is the caps match
t.gsub(/(?i:<(:code|pre).+?<\/\1>)|(\s| |^|'|"|>)([A-Z\d][A-Z\d\.']{1,})(?!\w)/) do |str|
excluded, before, caps = $1, $2, $3
if excluded
str
elsif $3 =~ /^[\d\.]+$/
before + caps
else
before + '' + caps + ''
end
end
end
end
# encloses initial single or double quote, or their entities
# (optionally preceeded by a block element and perhaps an inline element)
# with a span that can be styled.
#
# @example
# initial_quotes('"With primes"')
# # => '"With primes"'
# initial_quotes("'With single primes'")
# # => '\\'With single primes\\''
#
# @example With primes and links
# initial_quotes('"With primes and a link"')
# # => '"With primes and a link"'
#
# @example with Smartypants-quotes
# initial_quotes('“With smartypanted quotes”')
# # => '“With smartypanted quotes”'
#
# @param [String] text input text
# @return [String] input text with initial quotes wrapped
def initial_quotes(text)
# $1 is the initial part of the string, $2 is the quote or entitity, and $3 is the double quote
ignore_scripts(text) do |t|
t.gsub(/((?:<(?:h[1-6]|p|li|dt|dd)[^>]*>|^)\s*(?:<(?:a|em|strong|span)[^>]*>)?)('|‘|‘|("|“|“))/) {$1 + "#{$2}"}
end
end
# Converts special characters (excluding HTML tags) to HTML entities.
#
# @example
# entities("AloĆ« Vera") # => "Aloë Vera"
#
# @param [String] text input text
# @return [String] input text with all special characters converted to
# HTML entities.
def entities(text)
o = ''
text.scan(/(?x)
( <\?(?:[^?]*|\?(?!>))*\?>
|
| <\/? (?i:a|abbr|acronym|address|applet|area|b|base|basefont|bdo|big|blockquote|body|br|button|caption|center|cite|code|col|colgroup|dd|del|dfn|dir|div|dl|dt|em|fieldset|font|form|frame|frameset|h1|h2|h3|h4|h5|h6|head|hr|html|i|iframe|img|input|ins|isindex|kbd|label|legend|li|link|map|menu|meta|noframes|noscript|object|ol|optgroup|option|p|param|pre|q|s|samp|script|select|small|span|strike|strong|style|sub|sup|table|tbody|td|textarea|tfoot|th|thead|title|tr|tt|u|ul|var)\b
(?:[^>"']|"[^"]*"|'[^']*')*
>
| &(?:[a-zA-Z0-9]+|\#[0-9]+|\#x[0-9a-fA-F]+);
)
|([^<&]+|[<&])
/x) do |tag, text|
o << tag.to_s
o << encode(text.to_s)
end
o
end
# main function to do all the functions from the method.
# @param [String] text input text
# @return [String] input text with all filters applied
def improve(text)
initial_quotes(caps(smartypants(widont(amp(text)))))
end
private
# Convert characters from the map in ./lib/characters.txt
# Code taken from TextMate HTML bundle
# @param [String] text input text
# @return [String] input text with all special characters converted to
# HTML entities.
def encode(text)
@char_to_entity ||= begin
map = {}
File.read(File.join(File.dirname(__FILE__), 'characters.txt')).scan(/^(\d+)\s*(.+)$/) do |key, value|
map[[key.to_i].pack('U')] = value
end
map
end
text.gsub(/[^\x00-\x7F]|["'<>&]/) do |ch|
ent = @char_to_entity[ch]
ent ? "{ent};" : sprintf("%02X;", ch.unpack("U")[0])
end
end
# Hackish text filter that will make sure our text filters leave inline
# javascript alone without resorting to a full-blown HTML parser.
#
# The idea is simple: every text filter is applied as a block to this
# method. This will preprocess the text and replace any inline scripts
# with a MD5 hash of its entire contents. Then the filter is called,
# and then the hashes are replaced back with their original content.
def ignore_scripts(text)
@ignored_scripts = {}
modified_text = text.gsub(/