lib/asciidoctor/helpers.rb in asciidoctor-1.5.8 vs lib/asciidoctor/helpers.rb in asciidoctor-2.0.0.rc.1

- old
+ new

@@ -1,7 +1,8 @@ -# encoding: UTF-8 +# frozen_string_literal: true module Asciidoctor +# Internal: Except where noted, a module that contains internal helper functions. module Helpers # Internal: Require the specified library using Kernel#require. # # Attempts to load the library specified in the first argument using the # Kernel#require. Rescues the LoadError if the library is not available and @@ -41,89 +42,67 @@ end end nil end - # Public: Normalize the data to prepare for parsing + # Internal: Prepare the source data Array for parsing. # - # Delegates to Helpers#normalize_lines_from_string if data is a String. - # Delegates to Helpers#normalize_lines_array if data is a String Array. + # Encodes the data to UTF-8, if necessary, and removes any trailing + # whitespace from every line. # - # returns a String Array of normalized lines - def self.normalize_lines data - ::String === data ? (normalize_lines_from_string data) : (normalize_lines_array data) - end - - # Public: Normalize the array of lines to prepare them for parsing + # If a BOM is found at the beginning of the data, a best attempt is made to + # encode it to UTF-8 from the specified source encoding. # - # Force encodes the data to UTF-8 and removes trailing whitespace from each line. + # data - the source data Array to prepare (no nil entries allowed) # - # If a BOM is present at the beginning of the data, a best attempt - # is made to encode from the specified encoding to UTF-8. - # - # data - a String Array of lines to normalize - # - # returns a String Array of normalized lines - def self.normalize_lines_array data - return data if data.empty? - - leading_bytes = (first_line = data[0]).unpack 'C3' - if COERCE_ENCODING - utf8 = ::Encoding::UTF_8 - if (leading_2_bytes = leading_bytes.slice 0, 2) == BOM_BYTES_UTF_16LE - # HACK Ruby messes up trailing whitespace on UTF-16LE, so reencode whole document first - data = data.join - return (((data.force_encoding ::Encoding::UTF_16LE).slice 1, data.length).encode utf8).each_line.map {|line| line.rstrip } - elsif leading_2_bytes == BOM_BYTES_UTF_16BE - data[0] = (first_line.force_encoding ::Encoding::UTF_16BE).slice 1, first_line.length - return data.map {|line| ((line.force_encoding ::Encoding::UTF_16BE).encode utf8).rstrip } - elsif leading_bytes == BOM_BYTES_UTF_8 - data[0] = (first_line.force_encoding utf8).slice 1, first_line.length - end - - data.map {|line| line.encoding == utf8 ? line.rstrip : (line.force_encoding utf8).rstrip } - else - # Ruby 1.8 has no built-in re-encoding, so no point in removing the UTF-16 BOMs - data[0] = first_line.slice 3, first_line.length if leading_bytes == BOM_BYTES_UTF_8 + # returns a String Array of prepared lines + def self.prepare_source_array data + return [] if data.empty? + if (leading_2_bytes = (leading_bytes = (first = data[0]).unpack 'C3').slice 0, 2) == BOM_BYTES_UTF_16LE + data[0] = first.byteslice 2, first.bytesize + # NOTE you can't split a UTF-16LE string using .lines when encoding is UTF-8; doing so will cause this line to fail + return data.map {|line| (line.encode UTF_8, ::Encoding::UTF_16LE).rstrip } + elsif leading_2_bytes == BOM_BYTES_UTF_16BE + data[0] = first.byteslice 2, first.bytesize + return data.map {|line| (line.encode UTF_8, ::Encoding::UTF_16BE).rstrip } + elsif leading_bytes == BOM_BYTES_UTF_8 + data[0] = first.byteslice 3, first.bytesize + end + if first.encoding == UTF_8 data.map {|line| line.rstrip } + else + data.map {|line| (line.encode UTF_8).rstrip } end end - # Public: Normalize the String and split into lines to prepare them for parsing + # Internal: Prepare the source data String for parsing. # - # Force encodes the data to UTF-8 and removes trailing whitespace from each line. - # Converts the data to a String Array. + # Encodes the data to UTF-8, if necessary, splits it into an array, and + # removes any trailing whitespace from every line. # - # If a BOM is present at the beginning of the data, a best attempt - # is made to encode from the specified encoding to UTF-8. + # If a BOM is found at the beginning of the data, a best attempt is made to + # encode it to UTF-8 from the specified source encoding. # - # data - a String of lines to normalize + # data - the source data String to prepare # - # returns a String Array of normalized lines - def self.normalize_lines_from_string data + # returns a String Array of prepared lines + def self.prepare_source_string data return [] if data.nil_or_empty? - - leading_bytes = data.unpack 'C3' - if COERCE_ENCODING - utf8 = ::Encoding::UTF_8 - if (leading_2_bytes = leading_bytes.slice 0, 2) == BOM_BYTES_UTF_16LE - data = ((data.force_encoding ::Encoding::UTF_16LE).slice 1, data.length).encode utf8 - elsif leading_2_bytes == BOM_BYTES_UTF_16BE - data = ((data.force_encoding ::Encoding::UTF_16BE).slice 1, data.length).encode utf8 - elsif leading_bytes == BOM_BYTES_UTF_8 - data = data.encoding == utf8 ? (data.slice 1, data.length) : ((data.force_encoding utf8).slice 1, data.length) - else - data = data.force_encoding utf8 unless data.encoding == utf8 - end - else - # Ruby 1.8 has no built-in re-encoding, so no point in removing the UTF-16 BOMs - data = data.slice 3, data.length if leading_bytes == BOM_BYTES_UTF_8 + if (leading_2_bytes = (leading_bytes = data.unpack 'C3').slice 0, 2) == BOM_BYTES_UTF_16LE + data = (data.byteslice 2, data.bytesize).encode UTF_8, ::Encoding::UTF_16LE + elsif leading_2_bytes == BOM_BYTES_UTF_16BE + data = (data.byteslice 2, data.bytesize).encode UTF_8, ::Encoding::UTF_16BE + elsif leading_bytes == BOM_BYTES_UTF_8 + data = data.byteslice 3, data.bytesize + data = data.encode UTF_8 unless data.encoding == UTF_8 + elsif data.encoding != UTF_8 + data = data.encode UTF_8 end - data.each_line.map {|line| line.rstrip } + [].tap {|lines| data.each_line {|line| lines << line.rstrip } } end - # Public: Efficiently checks whether the specified String resembles a URI + # Internal: Efficiently checks whether the specified String resembles a URI # # Uses the Asciidoctor::UriSniffRx regex to check whether the String begins # with a URI prefix (e.g., http://). No validation of the URI is performed. # # str - the String to check @@ -131,11 +110,11 @@ # returns true if the String is a URI, false if it is not def self.uriish? str (str.include? ':') && (UriSniffRx.match? str) end - # Public: Efficiently retrieves the URI prefix of the specified String + # Internal: Efficiently retrieves the URI prefix of the specified String # # Uses the Asciidoctor::UriSniffRx regex to match the URI prefix in the # specified String (e.g., http://), if present. # # str - the String to check @@ -143,29 +122,48 @@ # returns the string URI prefix if the string is a URI, otherwise nil def self.uri_prefix str (str.include? ':') && UriSniffRx =~ str ? $& : nil end - # Matches the characters in a URI to encode - REGEXP_ENCODE_URI_CHARS = /[^\w\-.!~*';:@=+$,()\[\]]/ + # Internal: Encode a URI component String for safe inclusion in a URI. + # + # str - the URI component String to encode + # + # Returns the String with all reserved URI characters encoded (e.g., /, &, =, space, etc). + if RUBY_ENGINE == 'opal' + def self.encode_uri_component str + # patch necessary to adhere with RFC-3986 (and thus CGI.escape) + # see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURIComponent#Description + %x( + return encodeURIComponent(str).replace(/%20|[!'()*]/g, function (m) { + return m === '%20' ? '+' : '%' + m.charCodeAt(0).toString(16) + }) + ) + end + else + CGI = ::CGI + def self.encode_uri_component str + CGI.escape str + end + end - # Public: Encode a String for inclusion in a URI. + # Internal: Encode a URI String (namely the path portion). # - # str - the String to URI encode + # str - the String to encode # - # Returns the String with all URI reserved characters encoded. - def self.uri_encode str - str.gsub(REGEXP_ENCODE_URI_CHARS) { $&.each_byte.map {|c| sprintf '%%%02X', c }.join } + # Returns the String with all spaces replaced with %20. + def self.encode_uri str + (str.include? ' ') ? (str.gsub ' ', '%20') : str end # Public: Removes the file extension from filename and returns the result # # filename - The String file name to process # # Examples # - # Helpers.rootname('part1/chapter1.adoc') + # Helpers.rootname 'part1/chapter1.adoc' # # => "part1/chapter1" # # Returns the String filename with the file extension removed def self.rootname filename filename.slice 0, ((filename.rindex '.') || filename.length) @@ -177,25 +175,26 @@ # drop_ext - A Boolean flag indicating whether to drop the extension # or an explicit String extension to drop (default: nil). # # Examples # - # Helpers.basename('images/tiger.png', true) + # Helpers.basename 'images/tiger.png', true # # => "tiger" # - # Helpers.basename('images/tiger.png', '.png') + # Helpers.basename 'images/tiger.png', '.png' # # => "tiger" # # Returns the String filename with leading directories removed and, if specified, the extension removed - def self.basename(filename, drop_ext = nil) + def self.basename filename, drop_ext = nil if drop_ext ::File.basename filename, (drop_ext == true ? (::File.extname filename) : drop_ext) else ::File.basename filename end end + # Internal: Make a directory, ensuring all parent directories exist. def self.mkdir_p dir unless ::File.directory? dir unless (parent_dir = ::File.dirname dir) == '.' mkdir_p parent_dir end @@ -210,18 +209,58 @@ ROMAN_NUMERALS = { 'M' => 1000, 'CM' => 900, 'D' => 500, 'CD' => 400, 'C' => 100, 'XC' => 90, 'L' => 50, 'XL' => 40, 'X' => 10, 'IX' => 9, 'V' => 5, 'IV' => 4, 'I' => 1 } - # Converts an integer to a Roman numeral. + # Internal: Converts an integer to a Roman numeral. # # val - the [Integer] value to convert # # Returns the [String] roman numeral for this integer def self.int_to_roman val - ROMAN_NUMERALS.map {|l, i| + ROMAN_NUMERALS.map do |l, i| repeat, val = val.divmod i l * repeat - }.join + end.join + end + + # Internal: Get the next value in the sequence. + # + # Handles both integer and character sequences. + # + # current - the value to increment as a String or Integer + # + # returns the next value in the sequence according to the current value's type + def self.nextval current + if ::Integer === current + current + 1 + else + intval = current.to_i + if intval.to_s != current.to_s + (current[0].ord + 1).chr + else + intval + 1 + end + end + end + + # Internal: Resolve the specified object as a Class + # + # object - The Object to resolve as a Class + # + # Returns a Class if the specified object is a Class (but not a Module) or + # a String that resolves to a Class; otherwise, nil + def self.resolve_class object + ::Class === object ? object : (::String === object ? (class_for_name object) : nil) + end + + # Internal: Resolves a Class object (not a Module) for the qualified name. + # + # Returns Class + def self.class_for_name qualified_name + raise unless ::Class === (resolved = ::Object.const_get qualified_name, false) + resolved + rescue + raise ::NameError, %(Could not resolve class for name: #{qualified_name}) end end end