Sha256: 1499c2067cd55366e4a90a8db23fcb7084f6afed61f996be6a8a57747a9f9dde

Contents?: true

Size: 943 Bytes

Versions: 1

Compression:

Stored size: 943 Bytes

Contents


module IMW
  module URIScrubber

    def scrubbed
      to_dirpath
    end
  end
end

module Scrub
  #
  # start with a letter, and contain only A-Za-z0-9_
  #
  class SimplifiedURL < Scrub::Generic
    self.complaint  = "should follow our zany simplified URL rules: com.host.dot-reversed:schemeifnothttp/path/seg_men-ts/stuff.ext-SHA1ifweird"
    self.validator  = %r{#{Addressable::URI::SAFE_CHARS}#{Addressable::URI::RESERVED_CHARS}}u
    self.replacer   = ''
    include Scrub::Lowercased
    attr_accessor :uri

    def valid? str
      str.to_s.downcase == sanitize(str)
    end

    def sanitize str
      # if this fails just normalize once, or don't set $KCODE: http://bit.ly/1664vp
      uri = Addressable::URI.heuristic_parse(str.to_s).normalize
      # print [uri.host, uri.host_valid?, uri.path, uri.path_valid?].inspect
      if uri.host_valid?
        uri.scrubbed
      else
        uri.uuid_path
      end
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
imw-0.1.0 lib/imw/dataset/scrub/scrub_simple_url.rb