lib/lucky_sneaks/string_extensions.rb in stringex-1.1.0 vs lib/lucky_sneaks/string_extensions.rb in stringex-1.2.0
- old
+ new
@@ -1,17 +1,17 @@
-# coding: utf-8
+# encoding: UTF-8
module LuckySneaks
# These methods are all added on String class.
module StringExtensions
def self.included(base) # :nodoc:
base.extend(ClassMethods)
end
-
+
# Returns the string converted (via Textile/RedCloth) to HTML format
# or self [with a friendly warning] if Redcloth is not available.
- #
+ #
# Using <tt>:lite</tt> argument will cause RedCloth to not wrap the HTML in a container
# P element, which is useful behavior for generating header element text, etc.
# This is roughly equivalent to ActionView's <tt>textilize_without_paragraph</tt>
# except that it makes RedCloth do all the work instead of just gsubbing the return
# from RedCloth.
@@ -29,27 +29,22 @@
else
warn "String#to_html was called without RedCloth being successfully required"
self
end
end
-
- # Create a URI-friendly representation of the string. This is used internally by
+
+ # Create a URI-friendly representation of the string. This is used internally by
# acts_as_url[link:classes/LuckySneaks/ActsAsUrl/ClassMethods.html#M000012]
# but can be called manually in order to generate an URI-friendly version of any string.
def to_url
remove_formatting.downcase.replace_whitespace("-").collapse("-")
end
-
+
# Performs multiple text manipulations. Essentially a shortcut for typing them all. View source
# below to see which methods are run.
def remove_formatting
- strip_html_tags.
- convert_german_umlauts.
- convert_accented_entities.
- convert_misc_entities.
- convert_misc_characters.
- to_ascii.collapse
+ strip_html_tags.convert_accented_entities.convert_misc_entities.convert_misc_characters.to_ascii.collapse
end
# Removes HTML tags from text. This code is simplified from Tobias Luettke's regular expression
# in Typo[http://typosphere.org].
def strip_html_tags(leave_whitespace = false)
@@ -59,40 +54,26 @@
rx = /<[!\/?\[]?(#{name}|--)(\s+(#{attr}(\s+#{attr})*))?\s*([!\/?\]]+|--)?>/
(leave_whitespace) ? gsub(rx, "").strip : gsub(rx, "").gsub(/\s+/, " ").strip
end
# Converts HTML entities into the respective non-accented letters. Examples:
- #
+ #
# "á".convert_accented_entities # => "a"
# "ç".convert_accented_entities # => "c"
# "è".convert_accented_entities # => "e"
# "î".convert_accented_entities # => "i"
# "ø".convert_accented_entities # => "o"
# "ü".convert_accented_entities # => "u"
- #
+ #
# Note: This does not do any conversion of Unicode/Ascii accented-characters. For that
# functionality please use <tt>to_ascii</tt>.
def convert_accented_entities
gsub(/&([A-Za-z])(grave|acute|circ|tilde|uml|ring|cedil|slash);/, '\1')
end
- # Converts German Umlauts to their transliteration according to German conventions.
- def convert_german_umlauts
- map = {
- "Ä" => "ae",
- "Ö" => "oe",
- "Ü" => "ue",
- "ä" => "ae",
- "ö" => "oe",
- "ü" => "ue",
- "ß" => "ss"
- }
- gsub(/#{map.keys.join('|')}/) { |match| map[match] }
- end
-
# Converts HTML entities (taken from common Textile/RedCloth formattings) into plain text formats.
- #
+ #
# Note: This isn't an attempt at complete conversion of HTML entities, just those most likely
# to be generated by Textile.
def convert_misc_entities
dummy = dup
{
@@ -119,22 +100,37 @@
dummy.gsub!(/&#{textiled};/, normal)
end
dummy.gsub(/&[^;]+;/, "")
end
+ # Converts MS Word 'smart punctuation' to ASCII
+ #
+ def convert_smart_punctuation
+ dummy = dup
+ {
+
+ "(“|”|\302\223|\302\224)" => '"',
+ "(‘|’|\302\221|\302\222)" => "'",
+ "…" => "...",
+ }.each do |smart, normal|
+ dummy.gsub!(/#{smart}/, normal)
+ end
+ dummy
+ end
+
# Converts various common plaintext characters to a more URI-friendly representation.
# Examples:
- #
+ #
# "foo & bar".convert_misc_characters # => "foo and bar"
# "Chanel #9".convert_misc_characters # => "Chanel number nine"
# "user@host".convert_misc_characters # => "user at host"
# "google.com".convert_misc_characters # => "google dot com"
# "$10".convert_misc_characters # => "10 dollars"
# "*69".convert_misc_characters # => "star 69"
# "100%".convert_misc_characters # => "100 percent"
# "windows/mac/linux".convert_misc_characters # => "windows slash mac slash linux"
- #
+ #
# Note: Because this method will convert any & symbols to the string "and",
# you should run any methods which convert HTML entities (convert_html_entities and convert_misc_entities)
# before running this method.
def convert_misc_characters
dummy = dup.gsub(/\.{3,}/, " dot dot dot ") # Catch ellipses before single dot rule!
@@ -156,33 +152,34 @@
/(\s|^)£(\d*)(\s|$)/u => '\2 pounds',
/(\s|^)¥(\d*)(\s|$)/u => '\2 yen',
/\s*\*\s*/ => "star",
/\s*%\s*/ => "percent",
/\s*(\\|\/)\s*/ => "slash",
+ /(\s*=\s*)/ => " equals "
}.each do |found, replaced|
replaced = " #{replaced} " unless replaced =~ /\\1/
dummy.gsub!(found, replaced)
end
dummy = dummy.gsub(/(^|\w)'(\w|$)/, '\1\2').gsub(/[\.,:;()\[\]\/\?!\^'"_]/, " ")
end
# Replace runs of whitespace in string. Defaults to a single space but any replacement
# string may be specified as an argument. Examples:
- #
+ #
# "Foo bar".replace_whitespace # => "Foo bar"
# "Foo bar".replace_whitespace("-") # => "Foo-bar"
def replace_whitespace(replace = " ")
gsub(/\s+/, replace)
end
# Removes specified character from the beginning and/or end of the string and then performs
# <tt>String#squeeze(character)</tt>, condensing runs of the character within the string.
- #
+ #
# Note: This method has been superceded by ActiveSupport's squish method.
def collapse(character = " ")
sub(/^#{character}*/, "").sub(/#{character}*$/, "").squeeze(character)
end
-
+
module ClassMethods
# Returns string of random characters with a length matching the specified limit. Excludes 0
# to avoid confusion between 0 and O.
def random(limit)
strong_alphanumerics = %w{