helpers.rb in asciidoctor-2.0.0.rc.1

- old
+ new

@@ -1,7 +1,8 @@
-# encoding: UTF-8
+# frozen_string_literal: true
 module Asciidoctor
+# Internal: Except where noted, a module that contains internal helper functions.
 module Helpers
   # Internal: Require the specified library using Kernel#require.
   #
   # Attempts to load the library specified in the first argument using the
   # Kernel#require. Rescues the LoadError if the library is not available and
@@ -41,89 +42,67 @@
       end
     end
     nil
   end
 
-  # Public: Normalize the data to prepare for parsing
+  # Internal: Prepare the source data Array for parsing.
   #
-  # Delegates to Helpers#normalize_lines_from_string if data is a String.
-  # Delegates to Helpers#normalize_lines_array if data is a String Array.
+  # Encodes the data to UTF-8, if necessary, and removes any trailing
+  # whitespace from every line.
   #
-  # returns a String Array of normalized lines
-  def self.normalize_lines data
-    ::String === data ? (normalize_lines_from_string data) : (normalize_lines_array data)
-  end
-
-  # Public: Normalize the array of lines to prepare them for parsing
+  # If a BOM is found at the beginning of the data, a best attempt is made to
+  # encode it to UTF-8 from the specified source encoding.
   #
-  # Force encodes the data to UTF-8 and removes trailing whitespace from each line.
+  # data - the source data Array to prepare (no nil entries allowed)
   #
-  # If a BOM is present at the beginning of the data, a best attempt
-  # is made to encode from the specified encoding to UTF-8.
-  #
-  # data - a String Array of lines to normalize
-  #
-  # returns a String Array of normalized lines
-  def self.normalize_lines_array data
-    return data if data.empty?
-
-    leading_bytes = (first_line = data[0]).unpack 'C3'
-    if COERCE_ENCODING
-      utf8 = ::Encoding::UTF_8
-      if (leading_2_bytes = leading_bytes.slice 0, 2) == BOM_BYTES_UTF_16LE
-        # HACK Ruby messes up trailing whitespace on UTF-16LE, so reencode whole document first
-        data = data.join
-        return (((data.force_encoding ::Encoding::UTF_16LE).slice 1, data.length).encode utf8).each_line.map {|line| line.rstrip }
-      elsif leading_2_bytes == BOM_BYTES_UTF_16BE
-        data[0] = (first_line.force_encoding ::Encoding::UTF_16BE).slice 1, first_line.length
-        return data.map {|line| ((line.force_encoding ::Encoding::UTF_16BE).encode utf8).rstrip }
-      elsif leading_bytes == BOM_BYTES_UTF_8
-        data[0] = (first_line.force_encoding utf8).slice 1, first_line.length
-      end
-
-      data.map {|line| line.encoding == utf8 ? line.rstrip : (line.force_encoding utf8).rstrip }
-    else
-      # Ruby 1.8 has no built-in re-encoding, so no point in removing the UTF-16 BOMs
-      data[0] = first_line.slice 3, first_line.length if leading_bytes == BOM_BYTES_UTF_8
+  # returns a String Array of prepared lines
+  def self.prepare_source_array data
+    return [] if data.empty?
+    if (leading_2_bytes = (leading_bytes = (first = data[0]).unpack 'C3').slice 0, 2) == BOM_BYTES_UTF_16LE
+      data[0] = first.byteslice 2, first.bytesize
+      # NOTE you can't split a UTF-16LE string using .lines when encoding is UTF-8; doing so will cause this line to fail
+      return data.map {|line| (line.encode UTF_8, ::Encoding::UTF_16LE).rstrip }
+    elsif leading_2_bytes == BOM_BYTES_UTF_16BE
+      data[0] = first.byteslice 2, first.bytesize
+      return data.map {|line| (line.encode UTF_8, ::Encoding::UTF_16BE).rstrip }
+    elsif leading_bytes == BOM_BYTES_UTF_8
+      data[0] = first.byteslice 3, first.bytesize
+    end
+    if first.encoding == UTF_8
       data.map {|line| line.rstrip }
+    else
+      data.map {|line| (line.encode UTF_8).rstrip }
     end
   end
 
-  # Public: Normalize the String and split into lines to prepare them for parsing
+  # Internal: Prepare the source data String for parsing.
   #
-  # Force encodes the data to UTF-8 and removes trailing whitespace from each line.
-  # Converts the data to a String Array.
+  # Encodes the data to UTF-8, if necessary, splits it into an array, and
+  # removes any trailing whitespace from every line.
   #
-  # If a BOM is present at the beginning of the data, a best attempt
-  # is made to encode from the specified encoding to UTF-8.
+  # If a BOM is found at the beginning of the data, a best attempt is made to
+  # encode it to UTF-8 from the specified source encoding.
   #
-  # data - a String of lines to normalize
+  # data - the source data String to prepare
   #
-  # returns a String Array of normalized lines
-  def self.normalize_lines_from_string data
+  # returns a String Array of prepared lines
+  def self.prepare_source_string data
     return [] if data.nil_or_empty?
-
-    leading_bytes = data.unpack 'C3'
-    if COERCE_ENCODING
-      utf8 = ::Encoding::UTF_8
-      if (leading_2_bytes = leading_bytes.slice 0, 2) == BOM_BYTES_UTF_16LE
-        data = ((data.force_encoding ::Encoding::UTF_16LE).slice 1, data.length).encode utf8
-      elsif leading_2_bytes == BOM_BYTES_UTF_16BE
-        data = ((data.force_encoding ::Encoding::UTF_16BE).slice 1, data.length).encode utf8
-      elsif leading_bytes == BOM_BYTES_UTF_8
-        data = data.encoding == utf8 ? (data.slice 1, data.length) : ((data.force_encoding utf8).slice 1, data.length)
-      else
-        data = data.force_encoding utf8 unless data.encoding == utf8
-      end
-    else
-      # Ruby 1.8 has no built-in re-encoding, so no point in removing the UTF-16 BOMs
-      data = data.slice 3, data.length if leading_bytes == BOM_BYTES_UTF_8
+    if (leading_2_bytes = (leading_bytes = data.unpack 'C3').slice 0, 2) == BOM_BYTES_UTF_16LE
+      data = (data.byteslice 2, data.bytesize).encode UTF_8, ::Encoding::UTF_16LE
+    elsif leading_2_bytes == BOM_BYTES_UTF_16BE
+      data = (data.byteslice 2, data.bytesize).encode UTF_8, ::Encoding::UTF_16BE
+    elsif leading_bytes == BOM_BYTES_UTF_8
+      data = data.byteslice 3, data.bytesize
+      data = data.encode UTF_8 unless data.encoding == UTF_8
+    elsif data.encoding != UTF_8
+      data = data.encode UTF_8
     end
-    data.each_line.map {|line| line.rstrip }
+    [].tap {|lines| data.each_line {|line| lines << line.rstrip } }
   end
 
-  # Public: Efficiently checks whether the specified String resembles a URI
+  # Internal: Efficiently checks whether the specified String resembles a URI
   #
   # Uses the Asciidoctor::UriSniffRx regex to check whether the String begins
   # with a URI prefix (e.g., http://). No validation of the URI is performed.
   #
   # str - the String to check
@@ -131,11 +110,11 @@
   # returns true if the String is a URI, false if it is not
   def self.uriish? str
     (str.include? ':') && (UriSniffRx.match? str)
   end
 
-  # Public: Efficiently retrieves the URI prefix of the specified String
+  # Internal: Efficiently retrieves the URI prefix of the specified String
   #
   # Uses the Asciidoctor::UriSniffRx regex to match the URI prefix in the
   # specified String (e.g., http://), if present.
   #
   # str - the String to check
@@ -143,29 +122,48 @@
   # returns the string URI prefix if the string is a URI, otherwise nil
   def self.uri_prefix str
     (str.include? ':') && UriSniffRx =~ str ? $& : nil
   end
 
-  # Matches the characters in a URI to encode
-  REGEXP_ENCODE_URI_CHARS = /[^\w\-.!~*';:@=+$,()\[\]]/
+  # Internal: Encode a URI component String for safe inclusion in a URI.
+  #
+  # str - the URI component String to encode
+  #
+  # Returns the String with all reserved URI characters encoded (e.g., /, &, =, space, etc).
+  if RUBY_ENGINE == 'opal'
+    def self.encode_uri_component str
+      # patch necessary to adhere with RFC-3986 (and thus CGI.escape)
+      # see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURIComponent#Description
+      %x(
+        return encodeURIComponent(str).replace(/%20|[!'()*]/g, function (m) {
+          return m === '%20' ? '+' : '%' + m.charCodeAt(0).toString(16)
+        })
+      )
+    end
+  else
+    CGI = ::CGI
+    def self.encode_uri_component str
+      CGI.escape str
+    end
+  end
 
-  # Public: Encode a String for inclusion in a URI.
+  # Internal: Encode a URI String (namely the path portion).
   #
-  # str - the String to URI encode
+  # str - the String to encode
   #
-  # Returns the String with all URI reserved characters encoded.
-  def self.uri_encode str
-    str.gsub(REGEXP_ENCODE_URI_CHARS) { $&.each_byte.map {|c| sprintf '%%%02X', c }.join }
+  # Returns the String with all spaces replaced with %20.
+  def self.encode_uri str
+    (str.include? ' ') ? (str.gsub ' ', '%20') : str
   end
 
   # Public: Removes the file extension from filename and returns the result
   #
   # filename - The String file name to process
   #
   # Examples
   #
-  #   Helpers.rootname('part1/chapter1.adoc')
+  #   Helpers.rootname 'part1/chapter1.adoc'
   #   # => "part1/chapter1"
   #
   # Returns the String filename with the file extension removed
   def self.rootname filename
     filename.slice 0, ((filename.rindex '.') || filename.length)
@@ -177,25 +175,26 @@
   # drop_ext - A Boolean flag indicating whether to drop the extension
   #            or an explicit String extension to drop (default: nil).
   #
   # Examples
   #
-  #   Helpers.basename('images/tiger.png', true)
+  #   Helpers.basename 'images/tiger.png', true
   #   # => "tiger"
   #
-  #   Helpers.basename('images/tiger.png', '.png')
+  #   Helpers.basename 'images/tiger.png', '.png'
   #   # => "tiger"
   #
   # Returns the String filename with leading directories removed and, if specified, the extension removed
-  def self.basename(filename, drop_ext = nil)
+  def self.basename filename, drop_ext = nil
     if drop_ext
       ::File.basename filename, (drop_ext == true ? (::File.extname filename) : drop_ext)
     else
       ::File.basename filename
     end
   end
 
+  # Internal: Make a directory, ensuring all parent directories exist.
   def self.mkdir_p dir
     unless ::File.directory? dir
       unless (parent_dir = ::File.dirname dir) == '.'
         mkdir_p parent_dir
       end
@@ -210,18 +209,58 @@
   ROMAN_NUMERALS = {
     'M' => 1000, 'CM' => 900, 'D' => 500, 'CD' => 400, 'C' => 100, 'XC' => 90,
     'L' => 50, 'XL' => 40, 'X' => 10, 'IX' => 9, 'V' => 5, 'IV' => 4, 'I' => 1
   }
 
-  # Converts an integer to a Roman numeral.
+  # Internal: Converts an integer to a Roman numeral.
   #
   # val - the [Integer] value to convert
   #
   # Returns the [String] roman numeral for this integer
   def self.int_to_roman val
-    ROMAN_NUMERALS.map {|l, i|
+    ROMAN_NUMERALS.map do |l, i|
       repeat, val = val.divmod i
       l * repeat
-    }.join
+    end.join
+  end
+
+  # Internal: Get the next value in the sequence.
+  #
+  # Handles both integer and character sequences.
+  #
+  # current - the value to increment as a String or Integer
+  #
+  # returns the next value in the sequence according to the current value's type
+  def self.nextval current
+    if ::Integer === current
+      current + 1
+    else
+      intval = current.to_i
+      if intval.to_s != current.to_s
+        (current[0].ord + 1).chr
+      else
+        intval + 1
+      end
+    end
+  end
+
+  # Internal: Resolve the specified object as a Class
+  #
+  # object - The Object to resolve as a Class
+  #
+  # Returns a Class if the specified object is a Class (but not a Module) or
+  # a String that resolves to a Class; otherwise, nil
+  def self.resolve_class object
+    ::Class === object ? object : (::String === object ? (class_for_name object) : nil)
+  end
+
+  # Internal: Resolves a Class object (not a Module) for the qualified name.
+  #
+  # Returns Class
+  def self.class_for_name qualified_name
+    raise unless ::Class === (resolved = ::Object.const_get qualified_name, false)
+    resolved
+  rescue
+    raise ::NameError, %(Could not resolve class for name: #{qualified_name})
   end
 end
 end