# frozen_string_literal: true module WhatWeb module Matcher class GHDB < Base attr_reader :query def initialize(target, match) super(target, match) @query = match[:ghdb].to_s end def match_intitle? # extract either the next word or the following words enclosed in "s, it can't possibly be both intitle = (query.scan(/intitle:"([^"]*)"/i) + query.scan(/intitle:([^"]\w+)/i)).flatten.join("|") return false if intitle.empty? target.body.match? /[^<]*#{Regexp.escape(intitle)}[^<]*<\/title>/i end def match_filetype? filetype = (query.scan(/filetype:"([^"]*)"/i) + query.scan(/filetype:([^"]\w+)/i)).flatten.join("|") return false if filetype.empty? base_uri = target.uri.to_s.split("?").first base_uri.match? /#{Regexp.escape(filetype)}$/i end def match_inurl? inurl = (query.scan(/inurl:"([^"]*)"/i) + query.scan(/inurl:([^"]\w+)(\.*)(\w*)/i)).flatten return false if inurl.empty? # can occur multiple times. inurl.all? { |x| target.uri.to_s.match? /#{Regexp.escape(x)}/i } end def query_for_others s = query s = s.gsub(/intitle:"([^"]*)"/i, '').gsub(/intitle:([^"]\w+)/i, '') s = s.gsub(/filetype:"([^"]*)"/i, '').gsub( /filetype:([^"]\w+)/i, '') s = s.gsub(/inurl:"([^"]*)"/i, '').gsub(/inurl:([^"]\w+)(\.*)(\w*)/i, '') s end def match_others? words = query_for_others.scan(/([^ "]+)|("[^"]+")/i).flatten.compact.each { |w| w.delete!('"') }.sort.uniq return false if words.empty? words.all? do |w| # does it start with a - ? if w[0] == '-' # reverse true/false if it begins with a - !target.text.match? /#{Regexp.escape(w[1..-1])}/i else w = w[1..-1] if w[0] == '+' # if it starts with +, ignore the 1st char target.text.match? /#{Regexp.escape(w)}/i end end end def match? matches = [] # does it contain intitle? matches << match_intitle? if /intitle:/i.match?(query) matches << match_filetype? if /filetype:/i.match?(query) matches << match_inurl? if /inurl:/i.match?(query) matches << match_others? # if all matcbhes are true, then true matches.uniq == [true] end end end end