lib/url_categorise/client.rb in UrlCategorise-0.0.1 vs lib/url_categorise/client.rb in UrlCategorise-0.0.2
- old
+ new
@@ -2,31 +2,23 @@
class Client < ApiPattern::Client
include ::UrlCategorise::Constants
attr_reader :host_urls, :hosts
+ # TODO: Save to folder
+ # TODO: Read from disk the database
# TODO: Sanctioned IPs
- # TODO: More default lists
# TODO: ActiveRecord support
# TODO: List of abuse IPs
- # TODO: https://github.com/blocklistproject/Lists
- # TODO: https://github.com/nickoppen/pihole-blocklists
def initialize(host_urls: DEFAULT_HOST_URLS)
@host_urls = host_urls
- # @hosts = fetch_and_build_host_lists
+ @hosts = fetch_and_build_host_lists
end
- def self.compatible_api_version
- 'v1'
- end
-
- def self.api_version
- 'v2 2023-05-19'
- end
-
def categorise(url)
host = (URI.parse(url).host || url).downcase
+ host = host.gsub("www.", "")
@hosts.keys.select do |category|
@hosts[category].include?(host)
end
end
@@ -40,30 +32,79 @@
def count_of_categories
@hosts.keys.size
end
def size_of_data
+ hash_size_in_mb(@hosts)
+ end
+ private
+
+ def hash_size_in_mb(hash)
+ size = 0
+ hash.each do |key, value|
+ size += value.join.length
+ end
+ (size / 1.megabyte).round(2)
end
def fetch_and_build_host_lists
@hosts = {}
host_urls.keys.each do |category|
@hosts[category] = build_host_data(host_urls[category])
end
+ sub_category_values = categories_with_keys
+ sub_category_values.keys.each do |category|
+ original_value = @hosts[category] || []
+
+ extra_category_values = sub_category_values[category].each do |sub_category|
+ @hosts[sub_category]
+ end
+
+ original_value << extra_category_values
+ @hosts[category] = original_value
+ end
+
@hosts
end
def build_host_data(urls)
urls.map do |url|
+ next unless url_valid?(url)
+
raw_data = HTTParty.get(url)
raw_data.split("\n").reject do |line|
- line.include?("#")
+ line[0] == "#"
end.map do |line|
- line.gsub("0.0.0.0 ", "")
+ line.split(' ')[1] # Select the domain name # gsub("0.0.0.0 ", "")
end
end.flatten.compact.sort
+ end
+
+ def categories_with_keys
+ keyed_categories = {}
+
+ host_urls.keys.each do |category|
+ category_values = host_urls[category].select do |url|
+ url_not_valid?(url) && url.is_a?(Symbol)
+ end
+
+ keyed_categories[category] = category_values
+ end
+
+ keyed_categories
+ end
+
+ def url_not_valid?(url)
+ url_valid?(url)
+ end
+
+ def url_valid?(url)
+ uri = URI.parse(url)
+ uri.is_a?(URI::HTTP) && !uri.host.nil?
+ rescue URI::InvalidURIError
+ false
end
end
end