lib/jekyll/utils.rb in jekyll-3.6.3 vs lib/jekyll/utils.rb in jekyll-3.7.0

- old
+ new

@@ -1,19 +1,20 @@ - # frozen_string_literal: true module Jekyll module Utils extend self autoload :Ansi, "jekyll/utils/ansi" autoload :Exec, "jekyll/utils/exec" + autoload :Internet, "jekyll/utils/internet" autoload :Platforms, "jekyll/utils/platforms" autoload :Rouge, "jekyll/utils/rouge" + autoload :ThreadEvent, "jekyll/utils/thread_event" autoload :WinTZ, "jekyll/utils/win_tz" # Constants for use in #slugify - SLUGIFY_MODES = %w(raw default pretty ascii).freeze + SLUGIFY_MODES = %w(raw default pretty ascii latin).freeze SLUGIFY_RAW_REGEXP = Regexp.new('\\s+').freeze SLUGIFY_DEFAULT_REGEXP = Regexp.new("[^[:alnum:]]+").freeze SLUGIFY_PRETTY_REGEXP = Regexp.new("[^[:alnum:]._~!$&'()+,;=@]+").freeze SLUGIFY_ASCII_REGEXP = Regexp.new("[^[A-Za-z0-9]]+").freeze @@ -167,10 +168,14 @@ # are not replaced with hyphen. # # When mode is "ascii", some everything else except ASCII characters # a-z (lowercase), A-Z (uppercase) and 0-9 (numbers) are not replaced with hyphen. # + # When mode is "latin", the input string is first preprocessed so that + # any letters with accents are replaced with the plain letter. Afterwards, + # it follows the "default" mode of operation. + # # If cased is true, all uppercase letters in the result string are # replaced with their lowercase counterparts. # # Examples: # slugify("The _config.yml file") @@ -181,41 +186,28 @@ # # slugify("The _config.yml file", "pretty", true) # # => "The-_config.yml file" # # slugify("The _config.yml file", "ascii") - # # => "the-config.yml-file" + # # => "the-config-yml-file" # + # slugify("The _config.yml file", "latin") + # # => "the-config-yml-file" + # # Returns the slugified string. def slugify(string, mode: nil, cased: false) mode ||= "default" return nil if string.nil? unless SLUGIFY_MODES.include?(mode) return cased ? string : string.downcase end - # Replace each character sequence with a hyphen - re = - case mode - when "raw" - SLUGIFY_RAW_REGEXP - when "default" - SLUGIFY_DEFAULT_REGEXP - when "pretty" - # "._~!$&'()+,;=@" is human readable (not URI-escaped) in URL - # and is allowed in both extN and NTFS. - SLUGIFY_PRETTY_REGEXP - when "ascii" - # For web servers not being able to handle Unicode, the safe - # method is to ditch anything else but latin letters and numeric - # digits. - SLUGIFY_ASCII_REGEXP - end + # Drop accent marks from latin characters. Everything else turns to ? + string = ::I18n.transliterate(string) if mode == "latin" - # Strip according to the mode - slug = string.gsub(re, "-") + slug = replace_character_sequence_with_hyphen(string, :mode => mode) # Remove leading/trailing hyphen slug.gsub!(%r!^\-|\-$!i, "") slug.downcase! unless cased @@ -333,8 +325,35 @@ private def duplicate_frozen_values(target) target.each do |key, val| target[key] = val.dup if val.frozen? && duplicable?(val) end + end + + # Replace each character sequence with a hyphen. + # + # See Utils#slugify for a description of the character sequence specified + # by each mode. + private + def replace_character_sequence_with_hyphen(string, mode: "default") + replaceable_char = + case mode + when "raw" + SLUGIFY_RAW_REGEXP + when "pretty" + # "._~!$&'()+,;=@" is human readable (not URI-escaped) in URL + # and is allowed in both extN and NTFS. + SLUGIFY_PRETTY_REGEXP + when "ascii" + # For web servers not being able to handle Unicode, the safe + # method is to ditch anything else but latin letters and numeric + # digits. + SLUGIFY_ASCII_REGEXP + else + SLUGIFY_DEFAULT_REGEXP + end + + # Strip according to the mode + string.gsub(replaceable_char, "-") end end end