Sha256: 911f16c88fb5f469bfaa744ce9f6cc521d7837e332d8d8823135a0caf1e5cc04

Contents?: true

Size: 915 Bytes

Versions: 3

Compression:

Stored size: 915 Bytes

Contents

module Plagiarism
  module Strategies
    class Engine
      THRESHOLD = 0.8

      attr_accessor :content, :params

      class << self
        def fetch(content, params)
          raise
        end

        def valid_segments(ps, params)
          ps.segment.count do |sentence|
            typhoeus = fetch("\"#{sentence}\"", params)
            exists?(typhoeus.response_body)
          end
        end

        def exists?(response)
          raise
        end

        def whitelists_regex
          whitelists = Config.whitelists.map { |w| Regexp.new w }
          Regexp.union whitelists
        end
      end

      def initialize(c, p)
        @content, @params = c, p
      end

      def unique?
        ps = PragmaticSegmenter::Segmenter.new(text: content)
        valid_segments = self.class.valid_segments(ps, params)
        valid_segments / ps.segment.size >= THRESHOLD
      end
    end
  end
end

Version data entries

3 entries across 3 versions & 1 rubygems

Version Path
plagiarism2-0.0.3 lib/plagiarism/strategries/engine.rb
plagiarism2-0.0.2 lib/plagiarism/strategries/engine.rb
plagiarism2-0.0.1 lib/plagiarism/strategries/engine.rb