lib/defender/document.rb in defender-0.2.0 vs lib/defender/document.rb in defender-1.0.0beta1
- old
+ new
@@ -1,423 +1,89 @@
module Defender
- ##
- # A document contains content to be analyzed by Defensio, or that has been
- # analyzed.
- #
- # Most of the Defensio API revolves around documents, including the detection
- # of unwanted content.
class Document
##
- # Whether the document should be published by your Web site or not. For
- # example, spam and malicious content are not allowed.
+ # Whether the document should be published on your Web site or not.
#
- # This is the only attribute that can be updated after the initial saving.
- # Use this for retraining purposes.
+ # For example, spam and malicious content are not allowed.
#
# @return [Boolean]
attr_accessor :allow
- alias :allow? :allow
+ alias_method :allow?, :allow
##
- # The type of content in the document.
+ # The information about the document. This hash accepts so many parameters
+ # I won't list them here. Go look at the [Defensio API docs]
+ # (http://defensio.com/api) instead.
#
- # @return [String] The possible values are innocent, spam and malicious.
- attr_reader :classification
-
- ##
- # Whether the document matches profanity or other words defined by the
- # user. For example, this is useful to detect obscene comments posted
- # to your Web site. When true, you can obtain a filtered version of the
- # document by calling {#filter!}.
+ # Defender will replace all underscores in keys with dashes, so you can use
+ # `:author_email` instead of `'author-email'`.
#
- # @return [Boolean]
- attr_reader :profane
- alias :profane? :profane
+ # @return [Hash{#to_s => #to_s}]
+ attr_accessor :data
##
- # A unique identifier for the document. You need this value to perform new
- # requests on the same document. Signatures should be kept private and never
- # be shared with your users.
+ # A unique identifier for the document.
#
+ # This is needed to retrieve the status back from Defensio and to submit
+ # false negatives/positives to Defensio. Signatures should be kept private
+ # and never shared with your users.
+ #
# @return [String]
attr_reader :signature
##
- # A numeric value indicating how strongly the document resembles spam. For
- # example, a document containing many links to pharmaceutical sites is
- # likely to have a very high spaminess value. This value should only be used
- # for sorting, and should never be used to determine if a document should be
- # allowed or not. Spaminess should be kept private and never be shared with
- # your users.
+ # Retrieves the status of a document back from Defensio.
#
- # @return [Float<0..1>] A float value between 0 and 1, whith 1 being
- # extremely spammy. For example, 0.89 (89%).
- attr_reader :spaminess
-
- ##
- # The string containing the body of the document. This field is required.
+ # Please note that this only retrieves the status of the document (like
+ # it's spaminess, whether it should be allowed or not, etc.) and not the
+ # content of the request (all of the data in the {#data} hash).
#
- # @return [String]
- attr_accessor :content
-
- ##
- # The platform which the document is submitted on.
- #
- # One word, lower case. Spaces should be converted to underscores.
- #
- # *Examples:*
- # wordpress, pixelpost, drupal, phpbb, movable_type
- #
- # The default is 'ruby'.
- #
- # @return [String]
- attr_accessor :platform
-
- ##
- # Identified the type of content to be analyzed.
- #
- # Use *test* only for testing purposes.
- #
- # When *type* is set to *test*, Defensio (not Defender) parses content for
- # classification and spaminess. For example, if you want the API to return
- # *malicious* as the classification and a spaminess of *0.99*, insert the
- # following in content:
- # [malicious,0.99]
- #
- # There are three possible classifications:
- #
- # * innocent
- # * spam
- # * malicious
- #
- # Spaminess should be a decimal value between 0 and 1 (see
- # {#spaminess})
- #
- # *IMPORTANT*
- #
- # Do *NOT* leave type set to *test* in production. This could represent a
- # significant security breach.
- attr_accessor :type
-
- ##
- # The email address of the author of the document.
- #
- # @return [String]
- attr_accessor :author_email
-
- ##
- # The IP address of the author of the document.
- #
- # For example, this could be the IP address of the person posting a comment
- # on a blog.
- #
- # @return [String]
- attr_accessor :author_ip
-
- ##
- # Whether or not the user posting the document is logged in onto your Web
- # site, either through your own authentication mechanism or through OpenID.
- #
- # @see Document#author_openid
- # @see Document#author_trusted
- # @return [Boolean]
- attr_accessor :author_logged_in
-
- ##
- # The name of the author of the document.
- #
- # @return [Boolean]
- attr_accessor :author_name
-
- ##
- # The OpenID URL of the logged-on user. Must be used in conjunction with
- # {Document#author_logged_in} = true.
- #
- # OpenID authentication must be taken care of by your application. Only send
- # this parameter if you have successfully authenticated the user with
- # OpenID.
- #
- # @return [String]
- attr_accessor :author_openid
-
- ##
- # Whether or not the user is an administrator, moderator or editor of your
- # Web site. Pass `true` only if you can guarantee that the user has been
- # authenticated, has a role of responsibility, and can be trusted as a good
- # Web citizen.
- #
- # @return [Boolean]
- attr_accessor :author_trusted
-
- ##
- # The URL of the person posting the document.
- #
- # @return [String]
- attr_accessor :author_url
-
- ##
- # Whether or not the Web browser used to post the document (i.e., the
- # comment) has cookies enabled. If no such detection has been made, leave
- # this value empty.
- #
- # @return [Boolean]
- attr_accessor :browser_cookies
-
- ##
- # Whether or not the Web browser used to post the document (i.e., the
- # comment) has JavaScript enabled. If no such detection has been made, leave
- # this value empty.
- #
- # @return [Boolean]
- attr_accessor :browser_javascript
-
- ##
- # The URL of the document being posted.
- #
- # *Examples*
- #
- # For a comment on a blog, the permalink URL might be:
- #
- # 'http://yourdomain.com/article#comment-51'
- #
- # For an article, it might be:
- #
- # 'http://yourdomain.com/article'
- #
- # @return [String]
- attr_accessor :document_permalink
-
- ##
- # Contains the HTTP headers sent with the request. You can send a few values
- # or all values. Because this information helps Defensio determine if a
- # document is innocent or not, the more headers you send, the better.
- #
- # @see #referrer
- # @return [Hash{String => String}, Array<String>] You can pass a hash with
- # key => values, or an array where each entry has the format `"HEADER:
- # value"`
- attr_accessor :http_headers
-
- ##
- # The date the parent document was posted. For example, on a blog, this
- # would be the date the article related to the comment (document) was
- # posted.
- #
- # If you are using threaded comments, send the date the article was posted,
- # *not* the date the parent comment was posted.
- #
- # @return [Time, Date, DateTime, "yyyy-mm-dd"] If a Time or DateTime is passed, only the
- # date part will be saved.
- attr_accessor :parent_document_date
-
- ##
- # The URL of the parent document. For example, on a blog, this would be the
- # URL of the article on which the comment (document) was posted.
- #
- # @see #document_permalink
- # @return [String]
- attr_accessor :parent_document_permalink
-
- ##
- # Provide the value of the HTTP_REFERER (note the spelling) in this field.
- #
- # @see #http_headers
- # @return [String]
- attr_accessor :referrer
-
- ##
- # Provide the title of the document being sent. For example, this might be
- # the title of a blog article.
- #
- # Do not send this information if no title has been provided.
- attr_accessor :title
-
- ##
- # Is the document still pending?
- #
- # @return [Boolean]
- attr_reader :pending
- alias :pending? :pending
-
- ##
- # Set the pending attribute to true. Only to be used by {find} and similar
- # methods.
- #
- # @private
- def pending!; @pending = true; end
-
- ##
- # Retrieves a document from the Defensio server.
- #
- # This can be called up to 30 days after the initial posting of a document
- # to Defensio.
- #
- # @return [Document]
+ # @param [String] signature The signature of the document to retrieve
+ # @return [Document] The document to retrieve
def self.find(signature)
- document = new()
- response = Defender.get("/#{Defender.api_key}/documents/#{signature}.json")['defensio-result']
- if response['status'] == 'success' || response['status'] == 'pending'
- document.set_attributes(response)
- document.pending! if response['status'] == 'pending'
- else
- raise StandardError, response['message']
- end
+ document = new
+ _code, data = Defender.defensio.get_document(signature)
+ document.instance_variable_set(:@saved, true)
+ document.instance_variable_set(:@allow, data['allow'])
+ document.instance_variable_set(:@signature, signature)
+
document
end
##
- # Create a new document.
- def initialize()
+ # Initializes a new document
+ def initialize
+ @data = {}
+ @saved = false
end
##
- # Re-retrieves the document from the Defensio server
- #
- # This can be called up to 30 days after the initial posting of the document
- # to Defensio
- #
- # @return [true] The document was updated.
- # @return [false] The document was not updated (still pending).
- def refresh!
- response = Defender.get("/#{Defender.api_key}/documents/#{signature}.json")['defensio-result']
- if response['status'] == 'success'
- document.set_attributes(response)
- return true
- elsif response['status'] == 'pending'
- pending!
- return false
- else
- raise StandardError, response['message']
- end
+ # @return [Boolean] Has the document been submitted to Defensio?
+ def saved?
+ @saved
end
##
- # Creates an attributes hash to be sent to Defensio. This method will make
- # sure that the required attributess are in, and the names of the attributes
- # are correct.
+ # Submit the document to Defensio.
#
- # @return [Hash{String => String}]
- def attributes_hash
- options = {
- 'client' => "Defender | #{Defender::VERSION} | Henrik Hodne | henrik.hodne@binaryhex.com",
- 'platform' => platform || "ruby",
- 'content' => content,
- 'type' => type
- }
- [
- :author_email, :author_ip, :author_logged_in, :author_name, :author_openid,
- :author_trusted, :author_url, :browser_cookies, :browser_javascript,
- :document_permalink, :referrer, :title, :parent_document_permalink
- ].each do |symbol|
- options[symbol.to_s.gsub("_", "-")] = self.send(symbol)
- end
-
- headers = http_headers
- unless headers.nil?
- options['http-headers'] = headers.to_a.map do |kv|
- kv.respond_to?(:join) ? kv.join(": ") : kv
- end.join("\n")
- end
-
- pddate = parent_document_date
- options['parent-document-date'] = pddate.respond_to?(:strftime) ?
- pddate.strftime("%Y-%m-%d") : pddate
-
- formatted_options = {}
-
- options.each do |key, value|
- formatted_options[key] = value.to_s unless value.nil?
- end
-
- formatted_options
- end
-
- ##
- # Post the document to Defensio to be analyzed for spam and malicious
- # content.
+ # This will send all of the {#data} if the document hasn't been saved
+ # before. If it has been saved, it will submit whether the document was a
+ # false positive/negative (set the {#allow} param before saving to do
+ # this).
#
- # @param [Boolean] async Whether or not the document analysis should be done
- # asynchronously. With asynchronous document analysis you will obtain
- # better accuracy. Do not poll the servers more than once every 30 seconds
- # for each document. To avoid polling, set the callback URL with
- # {Defender.async_callback}. You can get the information from the server
- # using the {#refresh!} method or calling {Document.find} with the
- # signature.
- #
- # @see #pending?
- #
- # @raise ArgumentError if a required field is not set.
- # @return [Boolean] Whether the record was saved or not.
- def save(async=false)
- if sig = signature # The document is submitted to Defensio
- response = Defender.put("/#{Defender.api_key}/documents/#{sig}.json",
- :allow => allow?)['defensio-result']
+ # @see #saved?
+ def save
+ if saved?
+ _code, data = Defender.defensio.put_document(@signature, {:allow => @allow})
else
- hsh = attributes_hash
- if attributes_hash['content'].nil?
- raise ArgumentError, 'The content field is required'
- end
- if attributes_hash['type'].nil?
- raise ArgumentError, 'The type field is required'
- end
-
- if async
- hsh['async'] = 'true'
- hsh['async-callback'] = Defender.async_callback if Defender.async_callback
- end
- response = Defender.post("/#{Defender.api_key}/documents.json", hsh)['defensio-result']
- end
- if response['status'] == 'success'
- set_attributes(response)
- return true
- elsif response['status'] == 'pending'
- set_attributes(response) # Some fields are blank
- @pending = true
- return true
- else
- return false
- end
- end
-
- def set_attributes(attributes)
- [:classification, :signature, :spaminess, :allow].each do |symbol|
- self.instance_variable_set(:"@#{symbol}", attributes[symbol.to_s])
- end
- @profane = attributes['profanity-match']
- undefine_setters
- end
-
- ##
- # Filters the provided fields. The filtering is based on a default
- # dictionary and one previously configured by the user.
- #
- # @param [Array<Symbol>] *args The fields to filter (like `:content`,
- # `:author_name`, etc.)
- def filter!(*args)
- filter = {}
- args.each {|arg| filter[arg] = __send__(arg) }
- response = Defender.post("/#{Defender.api_key}/profanity-filter.json", filter)['defensio-result']
- if response['status'] == 'success'
- response['filtered'].each do |key, value|
- self.instance_variable_set(:"@#{key}", value)
- end
- else
- raise StandardError, response['message']
- end
- end
-
- private
-
- def undefine_setters
- [
- :content=, :platform=, :type=, :author_email=, :author_ip=,
- :author_logged_in=, :author_name=, :author_openid=,
- :author_trusted=, :author_url=, :browser_cookies=,
- :browser_javascript=, :document_permalink=, :http_headers=,
- :parent_document_date=, :referrer=, :title=
- ].each do |method|
- # TODO: Fix hack.
- instance_eval "def self.#{method}(*args)\nmethod_missing(#{method.inspect}, *args)\nend"
+ data = {}
+ @data.each { |k,v|
+ data[k.to_s.gsub('_','-')] = v.to_s
+ }
+ _code, data = Defender.defensio.post_document(@data)
+ @allow = data['allow']
+ @signature = data['signature']
+ @saved = true
end
end
end
end