utils.rb in jekyll-3.7.0

- old
+ new

@@ -1,19 +1,20 @@
-
 # frozen_string_literal: true
 
 module Jekyll
   module Utils
     extend self
     autoload :Ansi, "jekyll/utils/ansi"
     autoload :Exec, "jekyll/utils/exec"
+    autoload :Internet, "jekyll/utils/internet"
     autoload :Platforms, "jekyll/utils/platforms"
     autoload :Rouge, "jekyll/utils/rouge"
+    autoload :ThreadEvent, "jekyll/utils/thread_event"
     autoload :WinTZ, "jekyll/utils/win_tz"
 
     # Constants for use in #slugify
-    SLUGIFY_MODES = %w(raw default pretty ascii).freeze
+    SLUGIFY_MODES = %w(raw default pretty ascii latin).freeze
     SLUGIFY_RAW_REGEXP = Regexp.new('\\s+').freeze
     SLUGIFY_DEFAULT_REGEXP = Regexp.new("[^[:alnum:]]+").freeze
     SLUGIFY_PRETTY_REGEXP = Regexp.new("[^[:alnum:]._~!$&'()+,;=@]+").freeze
     SLUGIFY_ASCII_REGEXP = Regexp.new("[^[A-Za-z0-9]]+").freeze
 
@@ -167,10 +168,14 @@
     # are not replaced with hyphen.
     #
     # When mode is "ascii", some everything else except ASCII characters
     # a-z (lowercase), A-Z (uppercase) and 0-9 (numbers) are not replaced with hyphen.
     #
+    # When mode is "latin", the input string is first preprocessed so that
+    # any letters with accents are replaced with the plain letter. Afterwards,
+    # it follows the "default" mode of operation.
+    #
     # If cased is true, all uppercase letters in the result string are
     # replaced with their lowercase counterparts.
     #
     # Examples:
     #   slugify("The _config.yml file")
@@ -181,41 +186,28 @@
     #
     #   slugify("The _config.yml file", "pretty", true)
     #   # => "The-_config.yml file"
     #
     #   slugify("The _config.yml file", "ascii")
-    #   # => "the-config.yml-file"
+    #   # => "the-config-yml-file"
     #
+    #   slugify("The _config.yml file", "latin")
+    #   # => "the-config-yml-file"
+    #
     # Returns the slugified string.
     def slugify(string, mode: nil, cased: false)
       mode ||= "default"
       return nil if string.nil?
 
       unless SLUGIFY_MODES.include?(mode)
         return cased ? string : string.downcase
       end
 
-      # Replace each character sequence with a hyphen
-      re =
-        case mode
-        when "raw"
-          SLUGIFY_RAW_REGEXP
-        when "default"
-          SLUGIFY_DEFAULT_REGEXP
-        when "pretty"
-          # "._~!$&'()+,;=@" is human readable (not URI-escaped) in URL
-          # and is allowed in both extN and NTFS.
-          SLUGIFY_PRETTY_REGEXP
-        when "ascii"
-          # For web servers not being able to handle Unicode, the safe
-          # method is to ditch anything else but latin letters and numeric
-          # digits.
-          SLUGIFY_ASCII_REGEXP
-        end
+      # Drop accent marks from latin characters. Everything else turns to ?
+      string = ::I18n.transliterate(string) if mode == "latin"
 
-      # Strip according to the mode
-      slug = string.gsub(re, "-")
+      slug = replace_character_sequence_with_hyphen(string, :mode => mode)
 
       # Remove leading/trailing hyphen
       slug.gsub!(%r!^\-|\-$!i, "")
 
       slug.downcase! unless cased
@@ -333,8 +325,35 @@
     private
     def duplicate_frozen_values(target)
       target.each do |key, val|
         target[key] = val.dup if val.frozen? && duplicable?(val)
       end
+    end
+
+    # Replace each character sequence with a hyphen.
+    #
+    # See Utils#slugify for a description of the character sequence specified
+    # by each mode.
+    private
+    def replace_character_sequence_with_hyphen(string, mode: "default")
+      replaceable_char =
+        case mode
+        when "raw"
+          SLUGIFY_RAW_REGEXP
+        when "pretty"
+          # "._~!$&'()+,;=@" is human readable (not URI-escaped) in URL
+          # and is allowed in both extN and NTFS.
+          SLUGIFY_PRETTY_REGEXP
+        when "ascii"
+          # For web servers not being able to handle Unicode, the safe
+          # method is to ditch anything else but latin letters and numeric
+          # digits.
+          SLUGIFY_ASCII_REGEXP
+        else
+          SLUGIFY_DEFAULT_REGEXP
+        end
+
+      # Strip according to the mode
+      string.gsub(replaceable_char, "-")
     end
   end
 end