lib/public_suffix/rule.rb in public_suffix-1.5.3 vs lib/public_suffix/rule.rb in public_suffix-2.0.0
- old
+ new
@@ -1,65 +1,55 @@
+# = Public Suffix
#
-# Public Suffix
-#
# Domain name parser based on the Public Suffix List.
#
-# Copyright (c) 2009-2015 Simone Carletti <weppos@weppos.net>
-#
+# Copyright (c) 2009-2016 Simone Carletti <weppos@weppos.net>
module PublicSuffix
# A Rule is a special object which holds a single definition
# of the Public Suffix List.
#
- # There are 3 types of ruleas, each one represented by a specific
+ # There are 3 types of rules, each one represented by a specific
# subclass within the +PublicSuffix::Rule+ namespace.
#
# To create a new Rule, use the {PublicSuffix::Rule#factory} method.
#
# PublicSuffix::Rule.factory("ar")
# # => #<PublicSuffix::Rule::Normal>
#
module Rule
- #
# = Abstract rule class
#
# This represent the base class for a Rule definition
- # in the {Public Suffix List}[http://publicsuffix.org].
- #
+ # in the {Public Suffix List}[https://publicsuffix.org].
+ #
# This is intended to be an Abstract class
# and you shouldn't create a direct instance. The only purpose
# of this class is to expose a common interface
# for all the available subclasses.
#
# * {PublicSuffix::Rule::Normal}
# * {PublicSuffix::Rule::Exception}
# * {PublicSuffix::Rule::Wildcard}
#
- # == Properties
+ # ## Properties
#
# A rule is composed by 4 properties:
#
- # name - The name of the rule, corresponding to the rule definition
- # in the public suffix list
- # value - The value, a normalized version of the rule name.
+ # value - A normalized version of the rule name.
# The normalization process depends on rule tpe.
- # type - The rule type (:normal, :wildcard, :exception)
- # labels - The canonicalized rule name
#
# Here's an example
#
# PublicSuffix::Rule.factory("*.google.com")
# #<PublicSuffix::Rule::Wildcard:0x1015c14b0
- # @labels=["com", "google"],
- # @name="*.google.com",
- # @type=:wildcard,
# @value="google.com"
# >
#
- # == Rule Creation
+ # ## Rule Creation
#
# The best way to create a new rule is passing the rule name
# to the <tt>PublicSuffix::Rule.factory</tt> method.
#
# PublicSuffix::Rule.factory("com")
@@ -69,304 +59,273 @@
# # => PublicSuffix::Rule::Wildcard
#
# This method will detect the rule type and create an instance
# from the proper rule class.
#
- # == Rule Usage
+ # ## Rule Usage
#
- # A rule describes the composition of a domain name
- # and explains how to tokenize the domain name
- # into tld, sld and trd.
+ # A rule describes the composition of a domain name and explains how to tokenize
+ # the name into tld, sld and trd.
#
- # To use a rule, you first need to be sure the domain you want to tokenize
+ # To use a rule, you first need to be sure the name you want to tokenize
# can be handled by the current rule.
# You can use the <tt>#match?</tt> method.
#
# rule = PublicSuffix::Rule.factory("com")
- #
+ #
# rule.match?("google.com")
# # => true
- #
+ #
# rule.match?("google.com")
# # => false
#
- # Rule order is significant. A domain can match more than one rule.
+ # Rule order is significant. A name can match more than one rule.
# See the {Public Suffix Documentation}[http://publicsuffix.org/format/]
# to learn more about rule priority.
#
# When you have the right rule, you can use it to tokenize the domain name.
- #
+ #
# rule = PublicSuffix::Rule.factory("com")
- #
+ #
# rule.decompose("google.com")
# # => ["google", "com"]
- #
+ #
# rule.decompose("www.google.com")
# # => ["www.google", "com"]
#
# @abstract
#
class Base
- attr_reader :name, :value, :labels
+ # @return [String] the rule definition
+ attr_reader :value
+ # @return [Boolean] true if the rule is a private domain
+ attr_reader :private
+
+
# Initializes a new rule with name and value.
# If value is +nil+, name also becomes the value for this rule.
#
- # @param [String] name
- # The name of the rule
- # @param [String] value
- # The value of the rule. If nil, defaults to +name+.
- #
- def initialize(name, value = nil)
- @name = name.to_s
- @value = value || @name
- @labels = Domain.domain_to_labels(@value)
+ # @param value [String] the value of the rule
+ def initialize(value, private: false)
+ @value = value.to_s
+ @private = private
end
- #
- # The rule type name.
- #
- # @return [Symbol]
- #
- def self.type
- @type ||= self.name.split("::").last.downcase.to_sym
- end
-
- #
- # @see {type}
- #
- def type
- self.class.type
- end
-
# Checks whether this rule is equal to <tt>other</tt>.
#
- # @param [PublicSuffix::Rule::*] other
- # The rule to compare.
- #
+ # @param [PublicSuffix::Rule::*] other The rule to compare
# @return [Boolean]
# Returns true if this rule and other are instances of the same class
# and has the same value, false otherwise.
def ==(other)
- return false unless other.is_a?(self.class)
- self.equal?(other) ||
- self.name == other.name
+ equal?(other) || (self.class == other.class && value == other.value)
end
- alias :eql? :==
+ alias eql? ==
- # Checks if this rule matches +domain+.
+ # Checks if this rule matches +name+.
#
- # @param [String, #to_s] domain
- # The domain name to check.
+ # A domain name is said to match a rule if and only if
+ # all of the following conditions are met:
#
- # @return [Boolean]
+ # - When the domain and rule are split into corresponding labels,
+ # that the domain contains as many or more labels than the rule.
+ # - Beginning with the right-most labels of both the domain and the rule,
+ # and continuing for all labels in the rule, one finds that for every pair,
+ # either they are identical, or that the label from the rule is "*".
#
+ # @see https://publicsuffix.org/list/
+ #
# @example
- # rule = Rule.factory("com")
- # # #<PublicSuffix::Rule::Normal>
- # rule.match?("example.com")
+ # Rule.factory("com").match?("example.com")
# # => true
- # rule.match?("example.net")
+ # Rule.factory("com").match?("example.net")
# # => false
#
- def match?(domain)
- l1 = labels
- l2 = Domain.domain_to_labels(domain)
- odiff(l1, l2).empty?
- end
-
- # Checks if this rule allows +domain+.
- #
- # @param [String, #to_s] domain
- # The domain name to check.
- #
+ # @param name [String, #to_s] The domain name to check.
# @return [Boolean]
- #
- # @example
- # rule = Rule.factory("*.do")
- # # => #<PublicSuffix::Rule::Wildcard>
- # rule.allow?("example.do")
- # # => false
- # rule.allow?("www.example.do")
- # # => true
- #
- def allow?(domain)
- !decompose(domain).last.nil?
+ def match?(name)
+ # Note: it works because of the assumption there are no
+ # rules like foo.*.com. If the assumption is incorrect,
+ # we need to properly walk the input and skip parts according
+ # to wildcard component.
+ diff = name.chomp(value)
+ diff.empty? || diff[-1] == "."
end
- # Gets the length of this rule for comparison.
- # The length usually matches the number of rule +parts+.
- #
- # Subclasses might actually override this method.
- #
- # @return [Integer] The number of parts.
- def length
- parts.length
- end
-
- #
- # @raise [NotImplementedError]
# @abstract
def parts
- raise(NotImplementedError,"#{self.class}##{__method__} is not implemented")
+ raise NotImplementedError
end
- #
- # @param [String, #to_s] domain
- # The domain name to decompose.
- #
- # @return [Array<String, nil>]
- #
- # @raise [NotImplementedError]
# @abstract
- def decompose(domain)
- raise(NotImplementedError,"#{self.class}##{__method__} is not implemented")
+ def length
+ raise NotImplementedError
end
- private
-
- def odiff(one, two)
- ii = 0
-
- while(ii < one.size && one[ii] == two[ii])
- ii += 1
- end
-
- one[ii..one.length]
+ # @abstract
+ # @param [String, #to_s] name The domain name to decompose
+ # @return [Array<String, nil>]
+ def decompose(*)
+ raise NotImplementedError
end
end
+ # Normal represents a standard rule (e.g. com).
class Normal < Base
- # Initializes a new rule with +name+.
+ # Initializes a new rule from +definition+.
#
- # @param [String] name
- # The name of this rule.
+ # @param definition [String] the rule as defined in the PSL
+ def initialize(definition, **options)
+ super(definition, **options)
+ end
+
+ # Gets the original rule definition.
#
- def initialize(name)
- super(name, name)
+ # @return [String] The rule definition.
+ def rule
+ value
end
+ # Decomposes the domain name according to rule properties.
+ #
+ # @param [String, #to_s] name The domain name to decompose
+ # @return [Array<String>] The array with [trd + sld, tld].
+ def decompose(domain)
+ suffix = parts.join('\.')
+ matches = domain.to_s.match(/^(.*)\.(#{suffix})$/)
+ matches ? matches[1..2] : [nil, nil]
+ end
+
# dot-split rule value and returns all rule parts
# in the order they appear in the value.
#
# @return [Array<String>]
def parts
- @parts ||= @value.split(".")
+ @value.split(DOT)
end
- # Decomposes the domain according to rule properties.
+ # Gets the length of this rule for comparison,
+ # represented by the number of dot-separated parts in the rule.
#
- # @param [String, #to_s] domain
- # The domain name to decompose.
- #
- # @return [Array<String>]
- # The array with [trd + sld, tld].
- #
- def decompose(domain)
- domain.to_s.chomp(".") =~ /^(.*)\.(#{parts.join('\.')})$/
- [$1, $2]
+ # @return [Integer] The length of the rule.
+ def length
+ @length ||= parts.length
end
end
+ # Wildcard represents a wildcard rule (e.g. *.co.uk).
class Wildcard < Base
- # Initializes a new rule with +name+.
+ # Initializes a new rule from +definition+.
#
- # @param [String] name
- # The name of this rule.
+ # The wildcard "*" is removed from the value, as it's common
+ # for each wildcard rule.
#
- def initialize(name)
- super(name, name.to_s[2..-1])
+ # @param definition [String] the rule as defined in the PSL
+ def initialize(definition, **options)
+ super(definition.to_s[2..-1], **options)
end
+ # Gets the original rule definition.
+ #
+ # @return [String] The rule definition.
+ def rule
+ value == "" ? STAR : STAR + DOT + value
+ end
+
+ # Decomposes the domain name according to rule properties.
+ #
+ # @param [String, #to_s] name The domain name to decompose
+ # @return [Array<String>] The array with [trd + sld, tld].
+ def decompose(domain)
+ suffix = ([".*?"] + parts).join('\.')
+ matches = domain.to_s.match(/^(.*)\.(#{suffix})$/)
+ matches ? matches[1..2] : [nil, nil]
+ end
+
# dot-split rule value and returns all rule parts
# in the order they appear in the value.
#
# @return [Array<String>]
def parts
- @parts ||= @value.split(".")
+ @value.split(DOT)
end
- # Overwrites the default implementation to cope with
- # the +*+ char.
+ # Gets the length of this rule for comparison,
+ # represented by the number of dot-separated parts in the rule
+ # plus 1 for the *.
#
- # @return [Integer] The number of parts.
+ # @return [Integer] The length of the rule.
def length
- parts.length + 1 # * counts as 1
+ @length ||= parts.length + 1 # * counts as 1
end
- # Decomposes the domain according to rule properties.
- #
- # @param [String, #to_s] domain
- # The domain name to decompose.
- #
- # @return [Array<String>]
- # The array with [trd + sld, tld].
- #
- def decompose(domain)
- domain.to_s.chomp(".") =~ /^(.*)\.(.*?\.#{parts.join('\.')})$/
- [$1, $2]
- end
-
end
+ # Exception represents an exception rule (e.g. !parliament.uk).
class Exception < Base
- # Initializes a new rule with +name+.
+ # Initializes a new rule from +definition+.
#
- # @param [String] name The name of this rule.
+ # The bang ! is removed from the value, as it's common
+ # for each wildcard rule.
#
- def initialize(name)
- super(name, name.to_s[1..-1])
+ # @param definition [String] the rule as defined in the PSL
+ def initialize(definition, **options)
+ super(definition.to_s[1..-1], **options)
end
+ # Gets the original rule definition.
+ #
+ # @return [String] The rule definition.
+ def rule
+ BANG + value
+ end
+
+ # Decomposes the domain name according to rule properties.
+ #
+ # @param [String, #to_s] name The domain name to decompose
+ # @return [Array<String>] The array with [trd + sld, tld].
+ def decompose(domain)
+ suffix = parts.join('\.')
+ matches = domain.to_s.match(/^(.*)\.(#{suffix})$/)
+ matches ? matches[1..2] : [nil, nil]
+ end
+
# dot-split rule value and returns all rule parts
# in the order they appear in the value.
# The leftmost label is not considered a label.
#
# See http://publicsuffix.org/format/:
# If the prevailing rule is a exception rule,
- # modify it by removing the leftmost label.
+ # modify it by removing the leftmost label.
#
# @return [Array<String>]
def parts
- @parts ||= @value.split(".")[1..-1]
+ @value.split(DOT)[1..-1]
end
- # Decomposes the domain according to rule properties.
+ # Gets the length of this rule for comparison,
+ # represented by the number of dot-separated parts in the rule.
#
- # @param [String, #to_s] domain
- # The domain name to decompose.
- #
- # @return [Array<String>]
- # The array with [trd + sld, tld].
- #
- def decompose(domain)
- domain.to_s.chomp(".") =~ /^(.*)\.(#{parts.join('\.')})$/
- [$1, $2]
+ # @return [Integer] The length of the rule.
+ def length
+ @length ||= parts.length
end
end
- RULES = {
- '*' => Wildcard,
- '!' => Exception
- }
- RULES.default = Normal
# Takes the +name+ of the rule, detects the specific rule class
# and creates a new instance of that class.
# The +name+ becomes the rule +value+.
#
- # @param [String] name The rule definition.
- #
- # @return [PublicSuffix::Rule::*] A rule instance.
- #
# @example Creates a Normal rule
# PublicSuffix::Rule.factory("ar")
# # => #<PublicSuffix::Rule::Normal>
#
# @example Creates a Wildcard rule
@@ -375,11 +334,31 @@
#
# @example Creates an Exception rule
# PublicSuffix::Rule.factory("!congresodelalengua3.ar")
# # => #<PublicSuffix::Rule::Exception>
#
- def self.factory(name)
- RULES[name.to_s[0,1]].new(name)
+ # @param [String] content The rule content.
+ # @return [PublicSuffix::Rule::*] A rule instance.
+ def self.factory(content, **options)
+ case content.to_s[0, 1]
+ when STAR
+ Wildcard
+ when BANG
+ Exception
+ else
+ Normal
+ end.new(content, **options)
+ end
+
+ # The default rule to use if no rule match.
+ #
+ # The default rule is "*". From https://publicsuffix.org/list/:
+ #
+ # > If no rules match, the prevailing rule is "*".
+ #
+ # @return [PublicSuffix::Rule::Wildcard] The default rule.
+ def self.default
+ factory(STAR)
end
end
end