lib/url_categorise/client.rb in UrlCategorise-0.0.1 vs lib/url_categorise/client.rb in UrlCategorise-0.0.2

- old
+ new

@@ -2,31 +2,23 @@ class Client < ApiPattern::Client include ::UrlCategorise::Constants attr_reader :host_urls, :hosts + # TODO: Save to folder + # TODO: Read from disk the database # TODO: Sanctioned IPs - # TODO: More default lists # TODO: ActiveRecord support # TODO: List of abuse IPs - # TODO: https://github.com/blocklistproject/Lists - # TODO: https://github.com/nickoppen/pihole-blocklists def initialize(host_urls: DEFAULT_HOST_URLS) @host_urls = host_urls - # @hosts = fetch_and_build_host_lists + @hosts = fetch_and_build_host_lists end - def self.compatible_api_version - 'v1' - end - - def self.api_version - 'v2 2023-05-19' - end - def categorise(url) host = (URI.parse(url).host || url).downcase + host = host.gsub("www.", "") @hosts.keys.select do |category| @hosts[category].include?(host) end end @@ -40,30 +32,79 @@ def count_of_categories @hosts.keys.size end def size_of_data + hash_size_in_mb(@hosts) + end + private + + def hash_size_in_mb(hash) + size = 0 + hash.each do |key, value| + size += value.join.length + end + (size / 1.megabyte).round(2) end def fetch_and_build_host_lists @hosts = {} host_urls.keys.each do |category| @hosts[category] = build_host_data(host_urls[category]) end + sub_category_values = categories_with_keys + sub_category_values.keys.each do |category| + original_value = @hosts[category] || [] + + extra_category_values = sub_category_values[category].each do |sub_category| + @hosts[sub_category] + end + + original_value << extra_category_values + @hosts[category] = original_value + end + @hosts end def build_host_data(urls) urls.map do |url| + next unless url_valid?(url) + raw_data = HTTParty.get(url) raw_data.split("\n").reject do |line| - line.include?("#") + line[0] == "#" end.map do |line| - line.gsub("0.0.0.0 ", "") + line.split(' ')[1] # Select the domain name # gsub("0.0.0.0 ", "") end end.flatten.compact.sort + end + + def categories_with_keys + keyed_categories = {} + + host_urls.keys.each do |category| + category_values = host_urls[category].select do |url| + url_not_valid?(url) && url.is_a?(Symbol) + end + + keyed_categories[category] = category_values + end + + keyed_categories + end + + def url_not_valid?(url) + url_valid?(url) + end + + def url_valid?(url) + uri = URI.parse(url) + uri.is_a?(URI::HTTP) && !uri.host.nil? + rescue URI::InvalidURIError + false end end end