lib/plagiarism/strategries/engine.rb in plagiarism2-0.0.5 vs lib/plagiarism/strategries/engine.rb in plagiarism2-0.0.6
- old
+ new
@@ -8,34 +8,52 @@
class << self
def fetch(content, params)
raise
end
+ def iterate(r)
+ raise
+ end
+
+ def exists?(response)
+ iterate(response) { |uri| uri.host =~ whitelists_regex }
+ end
+
def valid_segments(ps, params)
ps.segment.count do |sentence|
typhoeus = fetch("\"#{sentence}\"", params)
typhoeus.success? && exists?(typhoeus.response_body)
end
end
- def exists?(response)
- raise
- end
-
def whitelists_regex
whitelists = Config.whitelists.map { |w| Regexp.new w }
Regexp.union whitelists
end
end
def initialize(c, p)
@content, @params = c, p
end
+ def retrieve_link(response)
+ raise
+ end
+
def unique?
ps = PragmaticSegmenter::Segmenter.new(text: content)
valid_segments = self.class.valid_segments(ps, params)
valid_segments / ps.segment.size >= THRESHOLD
end
+
+ def match
+ typhoeus = self.class.fetch("\"#{content}\"", params)
+ typhoeus.success? && retrieve_link(typhoeus.response_body)
+ end
+
+ def retrieve_link(response)
+ self.class.iterate(response) { |uri| uri.host !~ self.class.whitelists_regex and return uri.to_s }
+ end
+
end
end
end