Sha256: d8c18ab857bdac41b0627681fecf50606727264d4c8b696d26befa7c9aea2ebe

Contents?: true

Size: 1.28 KB

Versions: 12

Compression:

Stored size: 1.28 KB

Contents

module Gluttonberg
  module Content
    require 'despamilator/filter'

    module DespamilatorFilter

      class UnusualCharacters < Despamilator::Filter

        def name
          'Unusual Characters'
        end

        def description
          'Detects and scores each occurrence of an unusual 2 or 3 character combination'
        end

        def parse subject
          initialize_combos
          tokenize(subject.text.without_uris).each do |token|
            subject.register_match!({:score => 0.05, :filter => self}) if @@combos[token.to_sym]
          end
        end

        private

        def tokenize text
          tokens = []
          text.downcase.split(/[^a-z]/).each do |word|
            word.chars.each_with_index do |c, i|
              substr = word[i,i+3]
              tokens << substr.to_sym if substr.length == 3
              tokens << substr[0,2].to_sym if substr.length > 1
            end
          end
          tokens
        end

        def initialize_combos
          @@combos ||= {}
          return @@combos unless @@combos.empty?

          File.open(File.join(File.dirname(__FILE__), %w{.. conf unusual_characters.txt}), 'r').each do |line|
            @@combos[line.strip.to_sym] = true
          end
        end

      end

    end
  end #Content
end #Gluttonberg

Version data entries

12 entries across 12 versions & 1 rubygems

Version Path
gluttonberg-core-3.0.2 lib/gluttonberg/content/despamilator/filter/unusual_characters.rb
gluttonberg-core-3.0.1 lib/gluttonberg/content/despamilator/filter/unusual_characters.rb
gluttonberg-core-3.0.0 lib/gluttonberg/content/despamilator/filter/unusual_characters.rb
gluttonberg-core-2.6.4 lib/gluttonberg/content/despamilator/filter/unusual_characters.rb
gluttonberg-core-2.6.3 lib/gluttonberg/content/despamilator/filter/unusual_characters.rb
gluttonberg-core-2.6.2 lib/gluttonberg/content/despamilator/filter/unusual_characters.rb
gluttonberg-core-2.6.1 lib/gluttonberg/content/despamilator/filter/unusual_characters.rb
gluttonberg-core-2.6.0 lib/gluttonberg/content/despamilator/filter/unusual_characters.rb
gluttonberg-core-2.5.9 lib/gluttonberg/content/despamilator/filter/unusual_characters.rb
gluttonberg-core-2.5.8 lib/gluttonberg/content/despamilator/filter/unusual_characters.rb
gluttonberg-core-2.5.7 lib/gluttonberg/content/despamilator/filter/unusual_characters.rb
gluttonberg-core-2.5.6 lib/gluttonberg/content/despamilator/filter/unusual_characters.rb