module Wovnrb
# URL utility ported from html-swapper
class URL
module FileExtension
IMG_FILES = 'jpe|jpe?g|bmp|gif|png|btif|tiff?|psd|djvu?|xif|wbmp|webp|p(n|b|g|p)m|rgb|tga|x(b|p)m|xwd|pic|ico|fh(c|4|5|7)?|xif|f(bs|px|st)'.freeze
AUDIO_FILES = 'mp(3|2)|m(p?2|3|p?4|pg)a|midi?|kar|rmi|web(m|a)|aif(f?|c)|w(ma|av|ax)|m(ka|3u)|sil|s3m|og(a|g)|uvv?a'.freeze
VIDEO_FILES = 'm(x|4)u|fl(i|v)|3g(p|2)|jp(gv|g?m)|mp(4v?|g4|(?!$)e?g?)|m(1|2)v|ogv|m(ov|ng)|qt|uvv?(h|m|p|s|v)|dvb|mk(v|3d|s)|f4v|as(x|f)|w(m(v|x)|vx)|xvid'.freeze
DOC_FILES = '(7|g)?zip|tar|rar|7z|gz|ez|aw|atom(cat|svc)?|(cc)?xa?ml|cdmi(a|c|d|o|q)?|epub|g(ml|px|xf)|jar|js|ser|class|json(ml)?|do(c|t)(m|x)?|xls(m|x)?|xps|pp(a|tx?|s)m?|potm?|sldm|mp(p|t)|bin|dms|lrf|mar|so|dist|distz|m?pkg|bpk|dump|rtf|tfi|pdf|pgp|apk|o(t|d)(b|c|ft?|g|h|i|p|s|t)'.freeze
end
# TODO: Maybe this should be applied to all get_attribute calls rather than just href
def self.normalize_url(href)
return nil unless href
href.delete("\u200b").strip
end
def self.absolute_url?(href)
href =~ %r{^(https?:)?//}i
end
def self.absolute_path?(href)
href.match?(%r{^/})
end
def self.relative_path?(href)
!absolute_url?(href) && !absolute_path?(href)
end
# @param parsed_uri [Addressable::URI]
def self.path_and_query(parsed_uri)
parsed_uri.path + (parsed_uri.query ? "?#{parsed_uri.query}" : '')
end
def self.path_and_query_and_hash(parsed_uri)
uri = parsed_uri.path
uri += "?#{parsed_uri.query}" if parsed_uri.query
uri += "##{parsed_uri.fragment}" if parsed_uri.fragment
uri
end
def self.host_with_port(parsed_uri)
if parsed_uri.port
"#{parsed_uri.host}:#{parsed_uri.port}"
else
parsed_uri.host.to_s
end
end
def self.resolve_absolute_uri(base_url, href)
# This resolves ./../ and also handles href already being absolute
Addressable::URI.join(base_url, href)
rescue Addressable::URI::InvalidURIError, ArgumentError => e
Rollbar.warning('Failed to resolve absolute URI', original_error: e, base_url: base_url, href: href)
raise
end
def self.resolve_absolute_path(base_url, href)
normalized_uri = resolve_absolute_uri(base_url, href)
path = normalized_uri.path
query = normalized_uri.query ? "?#{normalized_uri.query}" : ''
fragment = normalized_uri.fragment ? "##{normalized_uri.fragment}" : ''
path + query + fragment
end
# Set the path lang to
def self.prepend_path(url, dir)
url.sub(%r{(.+\.[^/]+)(/|$)}, "\\1/#{dir}\\2")
end
def self.trim_slashes(path)
path.gsub(%r{^/|/$}, '')
end
def self.prepend_path_slash(path)
path ||= ''
return path if path.starts_with?('/')
"/#{path}"
end
def self.join_paths(*paths)
paths.inject('') do |left, right|
case [left.end_with?('/'), right.start_with?('/')]
when [true, true]
left + right[1..-1]
when [false, false]
left + (right.blank? ? right : "/#{right}")
else
left + right
end
end
end
# @param uri [Addressable::URI]
# @param new_protocol [String | nil]
# @return copy of uri [Addressable::URI]
def self.change_protocol(uri, new_protocol)
result = uri.dup
result.scheme = new_protocol
result
end
def self.valid_protocol?(href)
scheme_matches = /^\s*(?[a-zA-Z]+):/.match(href)
scheme = scheme_matches ? scheme_matches[:scheme] : nil
scheme.nil? || %w[http https].include?(scheme)
end
def self.file?(href_with_query_and_hash)
href = remove_query_and_hash(href_with_query_and_hash)
img_files = %r{^(https?://)?.*(\.(#{FileExtension::IMG_FILES}))((\?|#).*)?$}io
audio_files = %r{^(https?://)?.*(\.(#{FileExtension::AUDIO_FILES}))((\?|#).*)?$}io
video_files = %r{^(https?://)?.*(\.(#{FileExtension::VIDEO_FILES}))((\?|#).*)?$}io
doc_files = %r{^(https?://)?.*(\.(#{FileExtension::DOC_FILES}))((\?|#).*)?$}io
href.match?(img_files) || href.match?(audio_files) || href.match?(video_files) || href.match?(doc_files)
end
def self.remove_query_and_hash(href)
href.gsub(/[#?].*/, '')
end
# if original path does not end in slash, remove it from new path
# if original path ends in slash, add it to new path
def self.normalize_path_slash(original_path, new_path)
if !original_path.end_with?('/') && new_path.end_with?('/')
new_path = new_path.chomp('/')
elsif original_path.end_with?('/') && !new_path.end_with?('/')
new_path += '/'
end
new_path
end
end
end