require 'net/http' require 'uri' require 'yaml' # = Defensio # Adapted from code originally by Technoweenie. Updated to the 1.2 API, and # refactored. # # = License # Technoweenie fails to mention the license of his original code, so I assume # that it is either under MIT or public domain. As such, I release this code # under the MIT license. # # Copyright (c) 2008, James Herdman # # = Important Note # * most documentation below is adapted from the Defensio API (v 1.2) manual # * unless otherwise stated, all arguments are expected to be Strings module Viking class Defensio < Base class << self attr_accessor :host, :port, :api_version, :standard_headers, :service_type end attr_accessor :proxy_port, :proxy_host attr_reader :last_response self.service_type = :blog self.host = 'api.defensio.com' self.api_version = '1.2' self.standard_headers = { 'User-Agent' => "Viking (Ruby Gem) v#{Viking::VERSION::STRING}", 'Content-Type' => 'application/x-www-form-urlencoded' } # Create a new instance of the Akismet class # # ==== Arguments # Arguments are provided in the form of a Hash with the following keys # (as Symbols) available: # # +api_key+:: your Defensio API key # +blog+:: the blog associated with your api key # # The following keys are available and are entirely optional. They are # available incase communication with Akismet's servers requires a # proxy port and/or host: # # * +proxy_port+ # * +proxy_host+ def initialize(options) super self.verify_options = false end # This action verifies that the key is valid for the owner calling the # service. A user must have a valid API key in order to use Defensio. # # ==== Returns # true, false def verified? return false if invalid_options? (@verify_options ||= call_defensio('validate-key'))[:status] == 'success' end # This action should be invoked upon the publication of an article to # announce its existence. The actual content of the article is sent to # Defensio for analysis. # # ==== Arguments # Provided in a Hash with the following keys: # # +owner_url+ (*required*):: # the URL of the owner using Defensio. Note that this value should be # provided in your default options for Viking and will be automatically # inserted into your call. # +article_author+ (*required*):: # the name of the author of the article # +article_author_email+ (*required*):: # the email address of the article's author # +article_title+ (*required*):: # the title of the article # +article_content+ (*required*):: # the contents of the article # +permalink+ (*required*):: # the permalink of the article # # ==== Returns # Hash:: # contains server response. Should things go awry, the full response # will be provided. # # ===== Response structure # The following are the fields returned from the server and their possible # values: # # +status+:: # indicates whether or not the key is valid for this blog. Either # 'success' or 'fail'. # +message+:: # the message provided by the action, if applicable # +api_version+:: # the API version used to process the request def check_article(options={}) return false if invalid_options? call_defensio 'announce-article', options end # This central action determines not only whether Defensio thinks a # comment is spam or not, but also a measure of its "spaminess", i.e. its # relative likelihood of being spam. # # It should be noted that one of Defensio's key features is its ability to # rank spam according to how "spammy" it appears to be. In order to make # the most of the Defensio system in their applications, developers should # take advantage of the spaminess value returned by this function, to # build interfaces that make it easy for the user to quickly sort through # and manage their spamboxes. # # ==== Arguments # Provide arguments in a Hash. The following keys are available: # # +owner_url+ (*required*):: # the URL of the owner using Defensio. Note that this value should be # provided in your default options for Viking and will be automatically # inserted into your call. # +user_ip+ (*required*):: # the IP address of whomever is posting the comment # +article_date+ <~strftime> (*required*):: # the date the original blog article was posted # +comment_author+ (*required*):: # the name of the author of the comment # +comment_type+ (*required*):: # the type of comment being posted to the article. This is expected to # be any of the following: 'comment', 'trackback', 'pingback', 'other' # +comment_content+:: # the content of the comment in question # +comment_author_email+:: # the email address of the comment's author # +permalink+:: # the permalink of the blog post to which the comment is being posted # +referrer+:: # the URL of the site that brought the commenter to this page # +user_logged_in+:: # whether or not the user leaving the comment is logged into the client # platform. Expected to be either +true+ or +false+. # +trusted_user+:: # whether or not the user is an administrator or modertor or editor of # the blog. This should only ever be true if the blogging platform can # guarentee that the user has been authenticated and authorized for this # role. This is expected to be either +true+ or +false+. # +openid+:: # the OpenID URL of the currently logged in user. Must be used in # conjunction with +user_logged_in+ as +true+. OpenID authentication # must be taken care of by your application. # +test_force+:: # FOR TESTING PURPOSES ONLY: use this parameter to force the # outcome of +audit_comment+. Optionally affix (with a comma) a desired # +spaminess+ return value (in the range 0 to 1) (e.g. "spam,x.xxxx" # "ham,x.xxxx" ) # # ==== Returns # Hash:: # contains server response. Should things go awry, the full response # will be provided. # # ===== Response structure # The following are the fields returned from the server and their possible # values: # # +status+:: # indicates whether or not the key is valid for this blog. Either # 'success' or 'fail'. # +message+:: # the message provided by the action, if applicable # +api_version+:: # the API version used to process the request # +signature+:: # this uniquely identifies a message in the Defensio system. This should # be retained by the client for retraining purposes. # +spam+:: # whether or not Defensio believes the comment to be spam. This will be # either +true+ or +false+ # +spaminess+:: # a value indicating the relative likelihood that a comment is spam. # This should be retained to aid in building spam sorting interfaces. def check_comment(options={}) return false if invalid_options? options[:article_date] = options[:article_date].strftime("%Y/%m/%d") # e.g. 2007/05/16 call_defensio 'audit-comment', options end # This action is used to retrain false negatives. That is to say, to # indicate to the filter that comments originally tagged as "ham" (i.e. # legitimate) were in fact spam. # # Retraining the filter in this manner contributes to a personalized # learning effect on the filtering algorithm that will improve accuracy # for each user over time. # # ==== Arguments # Provide arguments in a Hash. The following keys are available: # # +owner_url+ (*required*):: # the URL of the owner using Defensio. Note that this value should be # provided in your default options for Viking and will be automatically # inserted into your call. # +signatures+ (comma separated Strings)(*required*):: # a comma separated list of signatures (or single entry) to be submitted # for retraining. The signatures were provided by Defensio when a # comment was first audited. # # ==== Returns # Hash:: # contains server response. Should things go awry, the full response # will be provided. # # ===== Response structure # The following are the fields returned from the server and their possible # values: # # +status+:: # indicates whether or not the key is valid for this blog. Either # 'success' or 'fail'. # +message+:: # the message provided by the action, if applicable # +api_version+:: # the API version used to process the request def mark_as_spam(options={}) return false if invalid_options? call_defensio 'report-false-negatives', options end # This action is used to retrain false positives. That is to say, to # indicate to the filter that comments originally tagged as spam were in # fact "ham" (i.e. legitimate comments). # # Retraining the filter in this manner contributes to a personalized # learning effect on the filtering algorithm that will improve accuracy # for each user over time. # # ==== Arguments # Provide arguments in a Hash. The following keys are available: # # +owner_url+ (*required*):: # the URL of the owner using Defensio. Note that this value should be # provided in your default options for Viking and will be automatically # inserted into your call. # +signatures+ (comma separated Strings)(*required*):: # a comma separated list of signatures (or single entry) to be submitted # for retraining. The signatures were provided by Defensio when a # comment was first audited. # # ==== Returns # Hash:: # contains server response. Should things go awry, the full response # will be provided. # # ===== Response structure # The following are the fields returned from the server and their possible # values: # # +status+:: # indicates whether or not the key is valid for this blog. Either # 'success' or 'fail'. # +message+:: # the message provided by the action, if applicable # +api_version+:: # the API version used to process the request def mark_as_ham(options={}) return false if invalid_options? call_defensio 'report-false-positives', options end # This action returns basic statistics regarding the performance of # Defensio since activation # # ==== Returns # Hash:: # contains server response. Should things go awry, the full response # will be provided. # # ===== Response structure # The following are the fields returned from the server and their possible # values: # # +status+:: # indicates whether or not the key is valid for this blog. Either # 'success' or 'fail'. # +message+:: # the message provided by the action, if applicable # +api_version+:: # the API version used to process the request # +accuracy+:: # a value between 0 and 1 representing the percentage of comments # correctly identified as spam or ham by Defensio on this blog # +spam+:: # the number of spam comments caught by the filter # +ham+:: # the number of legitimate comments caught by the filter # +false_positives+:: # the number of times legitimate messages have been retrained (i.e. # "de-spammed") by the user # +false_negatives+:: # the number of times a comments had to be marked as spam by the user # +learning+:: # whether or not Defensio is still in its initial learning phase (either # +true+ or +false+) # +learning_status+:: # more reasons on why Defensio is still learning def stats return false if invalid_options? call_defensio 'get-stats' end # Formats a URL for use with the Defensio service. # # ==== Arguments # +action+ :: the action you wish to call # # ==== Returns # String # # ==== Example # > defensio.url('get-stats') # => '/blog/1.2/get-stats/1234abc.yaml' def url(action) URI.escape( [ '', # ensures opening / self.class.service_type, self.class.api_version, action, options[:api_key] ].join('/') ) << '.yaml' end protected def call_defensio(action, params={}) resp = defensio_http.post( url(action), data(params), self.class.standard_headers ) log_request(url(action), data, resp) process_response_body(resp.body) end def defensio_http Net::HTTP.new( self.class.host, self.class.port, options[:proxy_host], options[:proxy_port] ) end def data(params={}) params. update('owner-url' => options[:blog] || options[:owner_url]). dasherize_keys. to_query end private attr_accessor :verify_options def process_response_body(response_body) data = YAML.load(response_body) return data['defensio-result'].symbolize_keys rescue { :data => data, :status => 'fail' } end end end