Sha256: 06d36b82e3ef0fccf7795cb72336ead54933e7564ed653d207cfb189a602496e
Contents?: true
Size: 1.07 KB
Versions: 2
Compression:
Stored size: 1.07 KB
Contents
$LOAD_PATH.unshift(File.dirname(__FILE__)) unless $LOAD_PATH.include?(File.dirname(__FILE__)) require 'addressable/uri' require 'domainatrix/domain_parser' require 'domainatrix/url' require 'uri' begin require 'uri' rescue LoadError end module Domainatrix VERSION = "0.0.11" DOMAIN_PARSER = DomainParser.new("#{File.dirname(__FILE__)}/effective_tld_names.dat") def self.parse(url) Url.new(DOMAIN_PARSER.parse(url)) end def self.scan(text, &block) return [] unless text @schemes ||= %w(http https) all_trailing_clutter = /[.,:);]+$/ clutter_without_parens = /[.,:);]+$/ candidate_urls = ::URI.extract(text, @schemes) candidate_urls.map! do |url| # If the URL has an open paren, allow closing parens. if url.include?("(") url.gsub(clutter_without_parens, '') else url.gsub(all_trailing_clutter, '') end end urls = candidate_urls.map do |url| begin parse(url) rescue Addressable::URI::InvalidURIError end end.compact urls.map!(&block) if block urls end end
Version data entries
2 entries across 2 versions & 1 rubygems
Version | Path |
---|---|
shadowbq-domainatrix-0.0.12 | lib/domainatrix.rb |
shadowbq-domainatrix-0.0.11 | lib/domainatrix.rb |