lib/asciidoctor/helpers.rb in asciidoctor-1.5.8 vs lib/asciidoctor/helpers.rb in asciidoctor-2.0.0.rc.1
- old
+ new
@@ -1,7 +1,8 @@
-# encoding: UTF-8
+# frozen_string_literal: true
module Asciidoctor
+# Internal: Except where noted, a module that contains internal helper functions.
module Helpers
# Internal: Require the specified library using Kernel#require.
#
# Attempts to load the library specified in the first argument using the
# Kernel#require. Rescues the LoadError if the library is not available and
@@ -41,89 +42,67 @@
end
end
nil
end
- # Public: Normalize the data to prepare for parsing
+ # Internal: Prepare the source data Array for parsing.
#
- # Delegates to Helpers#normalize_lines_from_string if data is a String.
- # Delegates to Helpers#normalize_lines_array if data is a String Array.
+ # Encodes the data to UTF-8, if necessary, and removes any trailing
+ # whitespace from every line.
#
- # returns a String Array of normalized lines
- def self.normalize_lines data
- ::String === data ? (normalize_lines_from_string data) : (normalize_lines_array data)
- end
-
- # Public: Normalize the array of lines to prepare them for parsing
+ # If a BOM is found at the beginning of the data, a best attempt is made to
+ # encode it to UTF-8 from the specified source encoding.
#
- # Force encodes the data to UTF-8 and removes trailing whitespace from each line.
+ # data - the source data Array to prepare (no nil entries allowed)
#
- # If a BOM is present at the beginning of the data, a best attempt
- # is made to encode from the specified encoding to UTF-8.
- #
- # data - a String Array of lines to normalize
- #
- # returns a String Array of normalized lines
- def self.normalize_lines_array data
- return data if data.empty?
-
- leading_bytes = (first_line = data[0]).unpack 'C3'
- if COERCE_ENCODING
- utf8 = ::Encoding::UTF_8
- if (leading_2_bytes = leading_bytes.slice 0, 2) == BOM_BYTES_UTF_16LE
- # HACK Ruby messes up trailing whitespace on UTF-16LE, so reencode whole document first
- data = data.join
- return (((data.force_encoding ::Encoding::UTF_16LE).slice 1, data.length).encode utf8).each_line.map {|line| line.rstrip }
- elsif leading_2_bytes == BOM_BYTES_UTF_16BE
- data[0] = (first_line.force_encoding ::Encoding::UTF_16BE).slice 1, first_line.length
- return data.map {|line| ((line.force_encoding ::Encoding::UTF_16BE).encode utf8).rstrip }
- elsif leading_bytes == BOM_BYTES_UTF_8
- data[0] = (first_line.force_encoding utf8).slice 1, first_line.length
- end
-
- data.map {|line| line.encoding == utf8 ? line.rstrip : (line.force_encoding utf8).rstrip }
- else
- # Ruby 1.8 has no built-in re-encoding, so no point in removing the UTF-16 BOMs
- data[0] = first_line.slice 3, first_line.length if leading_bytes == BOM_BYTES_UTF_8
+ # returns a String Array of prepared lines
+ def self.prepare_source_array data
+ return [] if data.empty?
+ if (leading_2_bytes = (leading_bytes = (first = data[0]).unpack 'C3').slice 0, 2) == BOM_BYTES_UTF_16LE
+ data[0] = first.byteslice 2, first.bytesize
+ # NOTE you can't split a UTF-16LE string using .lines when encoding is UTF-8; doing so will cause this line to fail
+ return data.map {|line| (line.encode UTF_8, ::Encoding::UTF_16LE).rstrip }
+ elsif leading_2_bytes == BOM_BYTES_UTF_16BE
+ data[0] = first.byteslice 2, first.bytesize
+ return data.map {|line| (line.encode UTF_8, ::Encoding::UTF_16BE).rstrip }
+ elsif leading_bytes == BOM_BYTES_UTF_8
+ data[0] = first.byteslice 3, first.bytesize
+ end
+ if first.encoding == UTF_8
data.map {|line| line.rstrip }
+ else
+ data.map {|line| (line.encode UTF_8).rstrip }
end
end
- # Public: Normalize the String and split into lines to prepare them for parsing
+ # Internal: Prepare the source data String for parsing.
#
- # Force encodes the data to UTF-8 and removes trailing whitespace from each line.
- # Converts the data to a String Array.
+ # Encodes the data to UTF-8, if necessary, splits it into an array, and
+ # removes any trailing whitespace from every line.
#
- # If a BOM is present at the beginning of the data, a best attempt
- # is made to encode from the specified encoding to UTF-8.
+ # If a BOM is found at the beginning of the data, a best attempt is made to
+ # encode it to UTF-8 from the specified source encoding.
#
- # data - a String of lines to normalize
+ # data - the source data String to prepare
#
- # returns a String Array of normalized lines
- def self.normalize_lines_from_string data
+ # returns a String Array of prepared lines
+ def self.prepare_source_string data
return [] if data.nil_or_empty?
-
- leading_bytes = data.unpack 'C3'
- if COERCE_ENCODING
- utf8 = ::Encoding::UTF_8
- if (leading_2_bytes = leading_bytes.slice 0, 2) == BOM_BYTES_UTF_16LE
- data = ((data.force_encoding ::Encoding::UTF_16LE).slice 1, data.length).encode utf8
- elsif leading_2_bytes == BOM_BYTES_UTF_16BE
- data = ((data.force_encoding ::Encoding::UTF_16BE).slice 1, data.length).encode utf8
- elsif leading_bytes == BOM_BYTES_UTF_8
- data = data.encoding == utf8 ? (data.slice 1, data.length) : ((data.force_encoding utf8).slice 1, data.length)
- else
- data = data.force_encoding utf8 unless data.encoding == utf8
- end
- else
- # Ruby 1.8 has no built-in re-encoding, so no point in removing the UTF-16 BOMs
- data = data.slice 3, data.length if leading_bytes == BOM_BYTES_UTF_8
+ if (leading_2_bytes = (leading_bytes = data.unpack 'C3').slice 0, 2) == BOM_BYTES_UTF_16LE
+ data = (data.byteslice 2, data.bytesize).encode UTF_8, ::Encoding::UTF_16LE
+ elsif leading_2_bytes == BOM_BYTES_UTF_16BE
+ data = (data.byteslice 2, data.bytesize).encode UTF_8, ::Encoding::UTF_16BE
+ elsif leading_bytes == BOM_BYTES_UTF_8
+ data = data.byteslice 3, data.bytesize
+ data = data.encode UTF_8 unless data.encoding == UTF_8
+ elsif data.encoding != UTF_8
+ data = data.encode UTF_8
end
- data.each_line.map {|line| line.rstrip }
+ [].tap {|lines| data.each_line {|line| lines << line.rstrip } }
end
- # Public: Efficiently checks whether the specified String resembles a URI
+ # Internal: Efficiently checks whether the specified String resembles a URI
#
# Uses the Asciidoctor::UriSniffRx regex to check whether the String begins
# with a URI prefix (e.g., http://). No validation of the URI is performed.
#
# str - the String to check
@@ -131,11 +110,11 @@
# returns true if the String is a URI, false if it is not
def self.uriish? str
(str.include? ':') && (UriSniffRx.match? str)
end
- # Public: Efficiently retrieves the URI prefix of the specified String
+ # Internal: Efficiently retrieves the URI prefix of the specified String
#
# Uses the Asciidoctor::UriSniffRx regex to match the URI prefix in the
# specified String (e.g., http://), if present.
#
# str - the String to check
@@ -143,29 +122,48 @@
# returns the string URI prefix if the string is a URI, otherwise nil
def self.uri_prefix str
(str.include? ':') && UriSniffRx =~ str ? $& : nil
end
- # Matches the characters in a URI to encode
- REGEXP_ENCODE_URI_CHARS = /[^\w\-.!~*';:@=+$,()\[\]]/
+ # Internal: Encode a URI component String for safe inclusion in a URI.
+ #
+ # str - the URI component String to encode
+ #
+ # Returns the String with all reserved URI characters encoded (e.g., /, &, =, space, etc).
+ if RUBY_ENGINE == 'opal'
+ def self.encode_uri_component str
+ # patch necessary to adhere with RFC-3986 (and thus CGI.escape)
+ # see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURIComponent#Description
+ %x(
+ return encodeURIComponent(str).replace(/%20|[!'()*]/g, function (m) {
+ return m === '%20' ? '+' : '%' + m.charCodeAt(0).toString(16)
+ })
+ )
+ end
+ else
+ CGI = ::CGI
+ def self.encode_uri_component str
+ CGI.escape str
+ end
+ end
- # Public: Encode a String for inclusion in a URI.
+ # Internal: Encode a URI String (namely the path portion).
#
- # str - the String to URI encode
+ # str - the String to encode
#
- # Returns the String with all URI reserved characters encoded.
- def self.uri_encode str
- str.gsub(REGEXP_ENCODE_URI_CHARS) { $&.each_byte.map {|c| sprintf '%%%02X', c }.join }
+ # Returns the String with all spaces replaced with %20.
+ def self.encode_uri str
+ (str.include? ' ') ? (str.gsub ' ', '%20') : str
end
# Public: Removes the file extension from filename and returns the result
#
# filename - The String file name to process
#
# Examples
#
- # Helpers.rootname('part1/chapter1.adoc')
+ # Helpers.rootname 'part1/chapter1.adoc'
# # => "part1/chapter1"
#
# Returns the String filename with the file extension removed
def self.rootname filename
filename.slice 0, ((filename.rindex '.') || filename.length)
@@ -177,25 +175,26 @@
# drop_ext - A Boolean flag indicating whether to drop the extension
# or an explicit String extension to drop (default: nil).
#
# Examples
#
- # Helpers.basename('images/tiger.png', true)
+ # Helpers.basename 'images/tiger.png', true
# # => "tiger"
#
- # Helpers.basename('images/tiger.png', '.png')
+ # Helpers.basename 'images/tiger.png', '.png'
# # => "tiger"
#
# Returns the String filename with leading directories removed and, if specified, the extension removed
- def self.basename(filename, drop_ext = nil)
+ def self.basename filename, drop_ext = nil
if drop_ext
::File.basename filename, (drop_ext == true ? (::File.extname filename) : drop_ext)
else
::File.basename filename
end
end
+ # Internal: Make a directory, ensuring all parent directories exist.
def self.mkdir_p dir
unless ::File.directory? dir
unless (parent_dir = ::File.dirname dir) == '.'
mkdir_p parent_dir
end
@@ -210,18 +209,58 @@
ROMAN_NUMERALS = {
'M' => 1000, 'CM' => 900, 'D' => 500, 'CD' => 400, 'C' => 100, 'XC' => 90,
'L' => 50, 'XL' => 40, 'X' => 10, 'IX' => 9, 'V' => 5, 'IV' => 4, 'I' => 1
}
- # Converts an integer to a Roman numeral.
+ # Internal: Converts an integer to a Roman numeral.
#
# val - the [Integer] value to convert
#
# Returns the [String] roman numeral for this integer
def self.int_to_roman val
- ROMAN_NUMERALS.map {|l, i|
+ ROMAN_NUMERALS.map do |l, i|
repeat, val = val.divmod i
l * repeat
- }.join
+ end.join
+ end
+
+ # Internal: Get the next value in the sequence.
+ #
+ # Handles both integer and character sequences.
+ #
+ # current - the value to increment as a String or Integer
+ #
+ # returns the next value in the sequence according to the current value's type
+ def self.nextval current
+ if ::Integer === current
+ current + 1
+ else
+ intval = current.to_i
+ if intval.to_s != current.to_s
+ (current[0].ord + 1).chr
+ else
+ intval + 1
+ end
+ end
+ end
+
+ # Internal: Resolve the specified object as a Class
+ #
+ # object - The Object to resolve as a Class
+ #
+ # Returns a Class if the specified object is a Class (but not a Module) or
+ # a String that resolves to a Class; otherwise, nil
+ def self.resolve_class object
+ ::Class === object ? object : (::String === object ? (class_for_name object) : nil)
+ end
+
+ # Internal: Resolves a Class object (not a Module) for the qualified name.
+ #
+ # Returns Class
+ def self.class_for_name qualified_name
+ raise unless ::Class === (resolved = ::Object.const_get qualified_name, false)
+ resolved
+ rescue
+ raise ::NameError, %(Could not resolve class for name: #{qualified_name})
end
end
end