lib/url_regex.rb in url_regex-0.0.2 vs lib/url_regex.rb in url_regex-0.0.3
- old
+ new
@@ -1,60 +1,76 @@
require 'url_regex/version'
# Provides the best known regex for validating and extracting URLs.
# It uses amazing job done by [Diego Perini](https://gist.github.com/dperini/729294)
# and [Mathias Bynens](https://mathiasbynens.be/demo/url-regex).
-
module UrlRegex
# Returns the regex for URLs parsing or validating.
#
# @param scheme_required [Boolean] will the regex require scheme presence, defaults to true
# @param mode [Symbol] purpose of the regex, `:validation` or `parsing`, defaults to `:validation`
# @return [Regex] regex for parsing or validating
def self.get(scheme_required: true, mode: :validation)
raise ArgumentError, "wrong mode: #{mode}" if MODES.index(mode).nil?
+
scheme = scheme_required ? PROTOCOL_IDENTIFIER : PROTOCOL_IDENTIFIER_OPTIONAL
- mode == :validation ? /\A#{scheme} #{BASE}\z/xi : /#{scheme} #{BASE}/xi
+
+ case mode
+ when :validation
+ /\A#{scheme} #{BASE}\z/xi
+ when :parsing
+ /#{scheme} #{BASE}/xi
+ when :javascript
+ /^#{scheme}#{JAVASCRIPT_BASE}$/
+ end
end
BASE = '
- # user:pass authentication
- (?:\S+(?::\S*)?@)?
+ # user:pass authentication
+ (?:\S+(?::\S*)?@)?
- (?:
- # IP address exclusion
- # private & local networks
- (?!(?:10|127)(?:\.\d{1,3}){3})
- (?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})
- (?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})
- # IP address dotted notation octets
- # excludes loopback network 0.0.0.0
- # excludes reserved space >= 224.0.0.0
- # excludes network & broadcast addresses
- # (first & last IP address of each class)
- (?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])
- (?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}
- (?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))
- |
- # host name
- (?:(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)
- # domain name
- (?:\.(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)*
- # TLD identifier
- (?:\.(?:[a-z\u00a1-\uffff]{2,}))
- # TLD may end with dot
- \.?
- )
+ (?:
+ # IP address exclusion
+ # private & local networks
+ (?!(?:10|127)(?:\.\d{1,3}){3})
+ (?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})
+ (?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})
+ # IP address dotted notation octets
+ # excludes loopback network 0.0.0.0
+ # excludes reserved space >= 224.0.0.0
+ # excludes network & broadcast addresses
+ # (first & last IP address of each class)
+ (?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])
+ (?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}
+ (?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))
+ |
+ # host name
+ (?:(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)
+ # domain name
+ (?:\.(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)*
+ # TLD identifier
+ (?:\.(?:[a-z\u00a1-\uffff]{2,}))
+ # TLD may end with dot
+ \.?
+ )
- # port number
- (?::\d{2,5})?
+ # port number
+ (?::\d{2,5})?
- # resource path
- (?:[/?#]\S*)?
- '.freeze
+ # resource path
+ (?:[/?#]\S*)?
+'.freeze
+ JAVASCRIPT_BASE = '
+ (?:\S+(?::\S*)?@)?
+ (?:(?!(?:10|127)(?:\.\d{1,3}){3})(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})
+ (?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])
+ (?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|
+ (?:(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)(?:\.(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)*
+ (?:\.(?:[a-z\u00a1-\uffff]{2,}))\.?)(?::\d{2,5})?(?:[/?#]\S*)?'.gsub(/\s+/, '').freeze
+
PROTOCOL_IDENTIFIER = '(?:(?:https?|ftp)://)'.freeze
PROTOCOL_IDENTIFIER_OPTIONAL = '(?:(?:https?|ftp)://)?'.freeze
- MODES = [:validation, :parsing].freeze
+ MODES = [:validation, :parsing, :javascript].freeze
private_constant :BASE, :PROTOCOL_IDENTIFIER, :PROTOCOL_IDENTIFIER_OPTIONAL, :MODES
end