1 #--
2 # Addressable, Copyright (c) 2006-2007 Bob Aman
3 #
4 # Permission is hereby granted, free of charge, to any person obtaining
5 # a copy of this software and associated documentation files (the
6 # "Software"), to deal in the Software without restriction, including
7 # without limitation the rights to use, copy, modify, merge, publish,
8 # distribute, sublicense, and/or sell copies of the Software, and to
9 # permit persons to whom the Software is furnished to do so, subject to
10 # the following conditions:
11 #
12 # The above copyright notice and this permission notice shall be
13 # included in all copies or substantial portions of the Software.
14 #
22 #++
24 $:.unshift(File.expand_path(File.join(File.dirname(__FILE__), '/..')))
25 $:.uniq!
27 require 'addressable/version'
29 module Addressable
30 # This is an implementation of a URI parser based on RFC 3986, 3987.
31 class URI
32 # Raised if something other than a uri is supplied.
33 class InvalidURIError < StandardError
34 end
36 # Raised if an invalid method option is supplied.
37 class InvalidOptionError < StandardError
38 end
40 # Raised if an invalid method option is supplied.
41 class InvalidTemplateValue < StandardError
42 end
44 module CharacterClasses
45 ALPHA = "a-zA-Z"
46 DIGIT = "0-9"
47 GEN_DELIMS = "\\:\\/\\?\\#\\[\\]\\@"
48 SUB_DELIMS = "\\!\\$\\&\\'\\(\\)\\*\\+\\,\\;\\="
50 UNRESERVED = ALPHA + DIGIT + "\\-\\.\\_\\~"
52 SCHEME = ALPHA + DIGIT + "\\-\\+\\."
54 PATH = PCHAR + "\\/"
55 QUERY = PCHAR + "\\/\\?"
56 FRAGMENT = PCHAR + "\\/\\?"
57 end
59 # Returns a URI object based on the parsed string.
60 def self.parse(uri_string)
61 return nil if uri_string.nil?
63 # If a URI object is passed, just return itself.
64 return uri_string if uri_string.kind_of?(self)
66 # If a URI object of the Ruby standard library variety is passed,
67 # convert it to a string, then parse the string.
68 if uri_string.class.name =~ /^URI::/
69 uri_string = uri_string.to_s
70 end
72 uri_regex =
73 /^(([^:\/?#]+):)?(\/\/([^\/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?/
74 scan = uri_string.scan(uri_regex)
75 fragments = scan[0]
76 return nil if fragments.nil?
77 scheme = fragments[1]
78 authority = fragments[3]
79 path = fragments[4]
80 query = fragments[6]
81 fragment = fragments[8]
82 userinfo = nil
83 user = nil
84 password = nil
85 host = nil
86 port = nil
87 if authority != nil
88 userinfo = authority.scan(/^([^\[\]]*)@/).flatten[0]
89 if userinfo != nil
90 user = userinfo.strip.scan(/^([^:]*):?/).flatten[0]
91 password = userinfo.strip.scan(/:(.*)$/).flatten[0]
92 end
93 host = authority.gsub(/^([^\[\]]*)@/, "").gsub(/:([^:@\[\]]*?)$/, "")
94 port = authority.scan(/:([^:@\[\]]*?)$/).flatten[0]
95 end
96 if port == ""
97 port = nil
98 end
100 return Addressable::URI.new(
101 scheme, user, password, host, port, path, query, fragment)
102 end
104 # Converts an input to a URI. The input does not have to be a valid
105 # URI -- the method will use heuristics to guess what URI was intended.
106 # This is not standards compliant, merely user-friendly.
107 def self.heuristic_parse(input, hints={})
108 input = input.dup
109 hints = {
110 :scheme => "http"
111 }.merge(hints)
112 case input
113 when /^http:\/+/
114 input.gsub!(/^http:\/+/, "http://")
115 when /^feed:\/+http:\/+/
116 input.gsub!(/^feed:\/+http:\/+/, "feed:http://")
117 when /^feed:\/+/
118 input.gsub!(/^feed:\/+/, "feed://")
119 when /^file:\/+/
120 input.gsub!(/^file:\/+/, "file:///")
121 end
122 parsed = self.parse(input)
123 if parsed.scheme =~ /^[^\/?#\.]+\.[^\/?#]+$/
124 parsed = self.parse(hints[:scheme] + "://" + input)
125 end
126 if parsed.authority == nil
127 if parsed.path =~ /^[^\/]+\./
128 new_host = parsed.path.scan(/^([^\/]+\.[^\/]*)/).flatten[0]
129 if new_host
130 new_path = parsed.path.gsub(
131 Regexp.new("^" + Regexp.escape(new_host)), "")
132 parsed.host = new_host
133 parsed.path = new_path
134 parsed.scheme = hints[:scheme]
135 end
136 end
137 end
138 return parsed
139 end
141 # Converts a path to a file protocol URI. If the path supplied is
142 # relative, it will be returned as a relative URI. If the path supplied
143 # is actually a URI, it will return the parsed URI.
144 def self.convert_path(path)
145 return nil if path.nil?
147 converted_uri = path.strip
148 if converted_uri.length > 0 && converted_uri[0..0] == "/"
149 converted_uri = "file://" + converted_uri
150 end
151 if converted_uri.length > 0 &&
152 converted_uri.scan(/^[a-zA-Z]:[\\\/]/).size > 0
153 converted_uri = "file:///" + converted_uri
154 end
155 converted_uri.gsub!(/^file:\/*/i, "file:///")
156 if converted_uri =~ /^file:/i
157 # Adjust windows-style uris
158 converted_uri.gsub!(/^file:\/\/\/([a-zA-Z])\|/i, 'file:///\1:')
159 converted_uri.gsub!(/\\/, '/')
160 converted_uri = self.parse(converted_uri).normalize
161 if File.exists?(converted_uri.path) &&
162 File.stat(converted_uri.path).directory?
163 converted_uri.path.gsub!(/\/$/, "")
164 converted_uri.path = converted_uri.path + '/'
165 end
166 else
167 converted_uri = self.parse(converted_uri)
168 end
170 return converted_uri
171 end
173 # Expands a URI template into a full URI.
174 #
175 # An optional processor object may be supplied. The object should
176 # respond to either the :validate or :transform messages or both.
177 # Both the :validate and :transform methods should take two parameters:
178 # :name and :value. The :validate method should return true or false;
179 # true if the value of the variable is valid, false otherwise. The
180 # :transform method should return the transformed variable value as a
181 # string.
182 #
183 # An example:
184 #
185 # class ExampleProcessor
186 # def self.validate(name, value)
187 # return !!(value =~ /^[\w ]+$/) if name == "query"
188 # return true
189 # end
190 #
191 # def self.transform(name, value)
192 # return value.gsub(/ /, "+") if name == "query"
193 # return value
194 # end
195 # end
196 #
197 # Addressable::URI.expand_template(
198 # "http://example.com/search/{query}/",
199 # {"query" => "an example search query"},
200 # ExampleProcessor).to_s
201 # => "http://example.com/search/an+example+search+query/"
202 def self.expand_template(pattern, mapping, processor=nil)
203 result = pattern.dup
204 for name, value in mapping
205 transformed_value = value
206 if processor != nil
207 if processor.respond_to?(:validate)
208 if !processor.validate(name, value)
209 raise InvalidTemplateValue,
210 "(#{name}, #{value}) is an invalid template value."
211 end
212 end
213 if processor.respond_to?(:transform)
214 transformed_value = processor.transform(name, value)
215 end
216 end
218 # Handle percent escaping
219 transformed_value = self.encode_segment(transformed_value,
220 Addressable::URI::CharacterClasses::RESERVED +
221 Addressable::URI::CharacterClasses::UNRESERVED)
223 result.gsub!(/\{#{Regexp.escape(name)}\}/, transformed_value)
224 end
225 result.gsub!(
226 /\{[#{Addressable::URI::CharacterClasses::UNRESERVED}]+\}/, "")
227 return Addressable::URI.parse(result)
228 end
230 # Extracts a mapping from the URI using a URI Template pattern.
231 # Returns nil if the pattern doesn't match the URI.
232 #
233 # An optional processor object may be supplied. The object should
234 # respond to either the :restore or :match messages or both.
235 # The :restore method should take two parameters: :name and :value.
236 # The :restore method should reverse any transformations that have been
237 # performed on the value to ensure a valid URI. The :match method
238 # should take a single parameter: :name. The :match method should
239 # return a String containing a regular expression capture group for
240 # matching on that particular variable. The default value is ".*".
241 #
242 # An example:
243 #
244 # class ExampleProcessor
245 # def self.restore(name, value)
246 # return value.gsub(/\+/, " ") if name == "query"
247 # return value
248 # end
249 #
250 # def self.match(name)
251 # return ".*?" if name == "first"
252 # return ".*"
253 # end
254 # end
255 #
256 # uri = Addressable::URI.parse(
257 # "http://example.com/search/an+example+search+query/")
258 # uri.extract_mapping("http://example.com/search/{query}/",
259 # ExampleProcessor)
260 # => {"query" => "an example search query"}
261 #
262 # uri = Addressable::URI.parse(
263 # "http://example.com/a/b/c/")
264 # uri.extract_mapping("http://example.com/{first}/{second}/",
265 # ExampleProcessor)
266 # => {"first" => "a", "second" => "b/c"}
267 def extract_mapping(pattern, processor=nil)
268 mapping = {}
269 variable_regexp =
270 /\{([#{Addressable::URI::CharacterClasses::UNRESERVED}]+)\}/
272 # Get all the variables in the pattern
273 variables = pattern.scan(variable_regexp).flatten
275 # Initialize all result values to the empty string
276 variables.each { |v| mapping[v] = "" }
278 # Escape the pattern
279 escaped_pattern =
280 Regexp.escape(pattern).gsub(/\\\{/, "{").gsub(/\\\}/, "}")
282 # Create a regular expression that captures the values of the
283 # variables in the URI.
284 regexp_string = escaped_pattern.gsub(variable_regexp) do |v|
285 capture_group = "(.*)"
287 if processor != nil
288 if processor.respond_to?(:match)
289 name = v.scan(variable_regexp).flatten[0]
290 capture_group = "(#{processor.match(name)})"
291 end
292 end
294 capture_group
295 end
297 # Ensure that the regular expression matches the whole URI.
298 regexp_string = "^#{regexp_string}$"
300 regexp = Regexp.new(regexp_string)
301 values = self.to_s.scan(regexp).flatten
303 if variables.size == values.size && variables.size > 0
304 # We have a match.
305 for i in 0...variables.size
306 name = variables[i]
307 value = values[i]
309 if processor != nil
310 if processor.respond_to?(:restore)
311 value = processor.restore(name, value)
312 end
313 end
315 mapping[name] = value
316 end
317 return mapping
318 elsif self.to_s == pattern
319 # The pattern contained no variables but still matched.
320 return mapping
321 else
322 # Pattern failed to match URI.
323 return nil
324 end
325 end
327 # Joins several uris together.
328 def self.join(*uris)
329 uri_objects = uris.collect do |uri|
330 uri.kind_of?(self) ? uri : self.parse(uri.to_s)
331 end
332 result = uri_objects.shift.dup
333 for uri in uri_objects
334 result.merge!(uri)
335 end
336 return result
337 end
339 # Percent encodes a URI segment. Returns a string. Takes an optional
340 # character class parameter, which should be specified as a string
341 # containing a regular expression character class (not including the
342 # surrounding square brackets). The character class parameter defaults
343 # to the reserved plus unreserved character classes specified in
344 # RFC 3986. Usage of the constants within the CharacterClasses module is
345 # highly recommended when using this method.
346 #
347 # An example:
348 #
349 # Addressable::URI.escape_segment("simple-example", "b-zB-Z0-9")
350 # => "simple%2Dex%61mple"
351 def self.encode_segment(segment, character_class=
352 Addressable::URI::CharacterClasses::RESERVED +
353 Addressable::URI::CharacterClasses::UNRESERVED)
354 return nil if segment.nil?
355 return segment.gsub(
356 /[^#{character_class}]/
357 ) do |sequence|
358 ("%" + sequence.unpack('C')[0].to_s(16).upcase)
359 end
360 end
362 # Unencodes any percent encoded characters within a URI segment.
363 # Returns a string.
364 def self.unencode_segment(segment)
365 return nil if segment.nil?
366 return segment.to_s.gsub(/%[0-9a-f]{2}/i) do |sequence|
367 sequence[1..3].to_i(16).chr
368 end
369 end
371 # Percent encodes any special characters in the URI. This method does
372 # not take IRIs or IDNs into account.
373 def self.encode(uri)
374 uri_object = uri.kind_of?(self) ? uri : self.parse(uri.to_s)
375 return Addressable::URI.new(
376 self.encode_segment(uri_object.scheme,
377 Addressable::URI::CharacterClasses::SCHEME),
378 self.encode_segment(uri_object.user,
379 Addressable::URI::CharacterClasses::AUTHORITY),
380 self.encode_segment(uri_object.password,
381 Addressable::URI::CharacterClasses::AUTHORITY),
382 self.encode_segment(uri_object.host,
383 Addressable::URI::CharacterClasses::AUTHORITY),
384 self.encode_segment(uri_object.specified_port,
385 Addressable::URI::CharacterClasses::AUTHORITY),
386 self.encode_segment(uri_object.path,
387 Addressable::URI::CharacterClasses::PATH),
388 self.encode_segment(uri_object.query,
389 Addressable::URI::CharacterClasses::QUERY),
390 self.encode_segment(uri_object.fragment,
391 Addressable::URI::CharacterClasses::FRAGMENT)
392 ).to_s
393 end
395 class << self
396 alias_method :escape, :encode
397 end
399 # Normalizes the encoding of a URI. Characters within a hostname are
400 # not percent encoded to allow for internationalized domain names.
401 def self.normalized_encode(uri)
402 uri_object = uri.kind_of?(self) ? uri : self.parse(uri.to_s)
403 segments = {
404 :scheme => self.unencode_segment(uri_object.scheme),
405 :user => self.unencode_segment(uri_object.user),
406 :password => self.unencode_segment(uri_object.password),
407 :host => self.unencode_segment(uri_object.host),
408 :port => self.unencode_segment(uri_object.specified_port),
409 :path => self.unencode_segment(uri_object.path),
410 :query => self.unencode_segment(uri_object.query),
411 :fragment => self.unencode_segment(uri_object.fragment)
412 }
413 if URI::IDNA.send(:use_libidn?)
414 segments.each do |key, value|
415 if value != nil
416 segments[key] = IDN::Stringprep.nfkc_normalize(value.to_s)
417 end
418 end
419 end
420 return Addressable::URI.new(
421 self.encode_segment(segments[:scheme],
422 Addressable::URI::CharacterClasses::SCHEME),
423 self.encode_segment(segments[:user],
424 Addressable::URI::CharacterClasses::AUTHORITY),
425 self.encode_segment(segments[:password],
426 Addressable::URI::CharacterClasses::AUTHORITY),
427 segments[:host],
428 segments[:port],
429 self.encode_segment(segments[:path],
430 Addressable::URI::CharacterClasses::PATH),
431 self.encode_segment(segments[:query],
432 Addressable::URI::CharacterClasses::QUERY),
433 self.encode_segment(segments[:fragment],
434 Addressable::URI::CharacterClasses::FRAGMENT)
435 ).to_s
436 end
438 # Extracts uris from an arbitrary body of text.
439 def self.extract(text, options={})
440 defaults = {:base => nil, :parse => false}
441 options = defaults.merge(options)
442 raise InvalidOptionError unless (options.keys - defaults.keys).empty?
443 # This regular expression needs to be less forgiving or else it would
444 # match virtually all text. Which isn't exactly what we're going for.
445 extract_regex = /((([a-z\+]+):)[^ \n\<\>\"\\]+[\w\/])/
446 extracted_uris =
447 text.scan(extract_regex).collect { |match| match[0] }
448 sgml_extract_regex = /<[^>]+href=\"([^\"]+?)\"[^>]*>/
449 sgml_extracted_uris =
450 text.scan(sgml_extract_regex).collect { |match| match[0] }
451 extracted_uris.concat(sgml_extracted_uris - extracted_uris)
452 textile_extract_regex = /\".+?\":([^ ]+\/[^ ]+)[ \,\.\;\:\?\!\<\>\"]/i
453 textile_extracted_uris =
454 text.scan(textile_extract_regex).collect { |match| match[0] }
455 extracted_uris.concat(textile_extracted_uris - extracted_uris)
456 parsed_uris = []
457 base_uri = nil
458 if options[:base] != nil
459 base_uri = options[:base] if options[:base].kind_of?(self)
460 base_uri = self.parse(options[:base].to_s) if base_uri == nil
461 end
462 for uri_string in extracted_uris
463 begin
464 if base_uri == nil
465 parsed_uris << self.parse(uri_string)
466 else
467 parsed_uris << (base_uri + self.parse(uri_string))
468 end
469 rescue Exception
470 nil
471 end
472 end
473 parsed_uris.reject! do |uri|
474 (uri.scheme =~ /T\d+/ ||
475 uri.scheme == "xmlns" ||
476 uri.scheme == "xml" ||
477 uri.scheme == "thr" ||
478 uri.scheme == "this" ||
479 uri.scheme == "float" ||
480 uri.scheme == "user" ||
481 uri.scheme == "username" ||
482 uri.scheme == "out")
483 end
484 if options[:parse]
485 return parsed_uris
486 else
487 return parsed_uris.collect { |uri| uri.to_s }
488 end
489 end
491 # Creates a new uri object from component parts. Passing nil for
492 # any of these parameters is acceptable.
493 def initialize(scheme, user, password, host, port, path, query, fragment)
494 @scheme = scheme
495 @scheme = nil if @scheme.to_s.strip == ""
496 @user = user
497 @password = password
498 @host = host
499 @specified_port = port.to_s
500 @port = port.kind_of?(Fixnum) ? port.to_s : port
501 if @port != nil && !(@port =~ /^\d+$/)
502 raise InvalidURIError,
503 "Invalid port number: #{@port.inspect}"
504 end
505 @port = @port.to_i
506 @port = nil if @port == 0
507 @path = (path || "")
508 @query = query
509 @fragment = fragment
511 validate()
512 end
514 # Returns the scheme (protocol) for this URI.
515 def scheme
516 return @scheme
517 end
519 # Sets the scheme (protocol for this URI.)
520 def scheme=(new_scheme)
521 @scheme = new_scheme
522 end
524 # Returns the user for this URI.
525 def user
526 return @user
527 end
529 # Sets the user for this URI.
530 def user=(new_user)
531 @user = new_user
533 # You can't have a nil user with a non-nil password
534 if @password != nil
535 @user = "" if @user.nil?
536 end
538 # Reset dependant values
539 @userinfo = nil
540 @authority = nil
542 # Ensure we haven't created an invalid URI
543 validate()
544 end
546 # Returns the password for this URI.
547 def password
548 return @password
549 end
551 # Sets the password for this URI.
552 def password=(new_password)
553 @password = new_password
555 # You can't have a nil user with a non-nil password
556 if @password != nil
557 @user = "" if @user.nil?
558 end
560 # Reset dependant values
561 @userinfo = nil
562 @authority = nil
564 # Ensure we haven't created an invalid URI
565 validate()
566 end
568 # Returns the username and password segment of this URI.
569 def userinfo
570 if !defined?(@userinfo) || @userinfo == nil
571 current_user = self.user
572 current_password = self.password
573 if !current_user && !current_password
574 @userinfo = nil
575 elsif current_user && current_password
576 @userinfo = "#{current_user}:#{current_password}"
577 elsif current_user && !current_password
578 @userinfo = "#{current_user}"
579 end
580 end
581 return @userinfo
582 end
584 # Sets the username and password segment of this URI.
585 def userinfo=(new_userinfo)
586 new_user = new_userinfo.to_s.strip.scan(/^(.*):/).flatten[0]
587 new_password = new_userinfo.to_s.strip.scan(/:(.*)$/).flatten[0]
589 # Password assigned first to ensure validity in case of nil
590 self.password = new_password
591 self.user = new_user
593 # Reset dependant values
594 @authority = nil
596 # Ensure we haven't created an invalid URI
597 validate()
598 end
600 # Returns the host for this URI.
601 def host
602 return @host
603 end
605 # Sets the host for this URI.
606 def host=(new_host)
607 @host = new_host
609 # Reset dependant values
610 @authority = nil
612 # Ensure we haven't created an invalid URI
613 validate()
614 end
616 # Returns the authority segment of this URI.
617 def authority
618 if !defined?(@authority) || @authority.nil?
619 return nil if self.host.nil?
620 @authority = ""
621 if self.userinfo != nil
622 @authority << "#{self.userinfo}@"
623 end
624 @authority << self.host
625 if self.specified_port != nil
626 @authority << ":#{self.specified_port}"
627 end
628 end
629 return @authority
630 end
632 # Sets the authority segment of this URI.
633 def authority=(new_authority)
634 if new_authority
635 new_userinfo = new_authority.scan(/^([^\[\]]*)@/).flatten[0]
636 if new_userinfo
637 new_user = new_userinfo.strip.scan(/^([^:]*):?/).flatten[0]
638 new_password = new_userinfo.strip.scan(/:(.*)$/).flatten[0]
639 end
640 new_host =
641 new_authority.gsub(/^([^\[\]]*)@/, "").gsub(/:([^:@\[\]]*?)$/, "")
642 new_port =
643 new_authority.scan(/:([^:@\[\]]*?)$/).flatten[0]
644 end
646 # Password assigned first to ensure validity in case of nil
647 self.password = new_password
648 self.user = new_user
649 self.host = new_host
651 # Port reset to allow port normalization
652 @port = nil
653 @specified_port = new_port
655 # Ensure we haven't created an invalid URI
656 validate()
657 end
659 # Returns an array of known ip-based schemes. These schemes typically
660 # use a similar URI form:
661 # //<user>:<password>@<host>:<port>/<url-path>
662 def self.ip_based_schemes
663 return self.scheme_mapping.keys
664 end
666 # Returns a hash of common IP-based schemes and their default port
667 # numbers. Adding new schemes to this hash, as necessary, will allow
668 # for better URI normalization.
669 def self.scheme_mapping
670 if !defined?(@protocol_mapping) || @protocol_mapping.nil?
671 @protocol_mapping = {
672 "http" => 80,
673 "https" => 443,
674 "ftp" => 21,
675 "tftp" => 69,
676 "ssh" => 22,
677 "svn+ssh" => 22,
678 "telnet" => 23,
679 "nntp" => 119,
680 "gopher" => 70,
681 "wais" => 210,
682 "ldap" => 389,
683 "prospero" => 1525
684 }
685 end
686 return @protocol_mapping
687 end
689 # Returns the port number for this URI. This method will normalize to the
690 # default port for the URI's scheme if the port isn't explicitly specified
691 # in the URI.
692 def port
693 if @port.to_i == 0
694 if self.scheme
695 @port = self.class.scheme_mapping[self.scheme.strip.downcase]
696 else
697 @port = nil
698 end
699 return @port
700 else
701 @port = @port.to_i
702 return @port
703 end
704 end
706 # Sets the port for this URI.
707 def port=(new_port)
708 @port = new_port.to_s.to_i
709 @specified_port = @port
710 @authority = nil
711 end
713 # Returns the port number that was actually specified in the URI string.
714 def specified_port
715 port = @specified_port.to_s.to_i
716 if port == 0
717 return nil
718 else
719 return port
720 end
721 end
723 # Returns the path for this URI.
724 def path
725 return @path
726 end
728 # Sets the path for this URI.
729 def path=(new_path)
730 @path = (new_path || "")
731 end
733 # Returns the basename, if any, of the file at the path being referenced.
734 # Returns nil if there is no path component.
735 def basename
736 # Path cannot be nil
737 return File.basename(self.path).gsub(/;[^\/]*$/, "")
738 end
740 # Returns the extension, if any, of the file at the path being referenced.
741 # Returns "" if there is no extension or nil if there is no path
742 # component.
743 def extname
744 return nil unless self.path
745 return File.extname(self.basename)
746 end
748 # Returns the query string for this URI.
749 def query
750 return @query
751 end
753 # Sets the query string for this URI.
754 def query=(new_query)
755 @query = new_query
756 end
758 # Returns the fragment for this URI.
759 def fragment
760 return @fragment
761 end
763 # Sets the fragment for this URI.
764 def fragment=(new_fragment)
765 @fragment = new_fragment
766 end
768 # Returns true if the URI uses an IP-based protocol.
769 def ip_based?
770 if self.scheme
771 return self.class.ip_based_schemes.include?(
772 self.scheme.strip.downcase)
773 end
774 return false
775 end
777 # Returns true if this URI is known to be relative.
778 def relative?
779 return self.scheme.nil?
780 end
782 # Returns true if this URI is known to be absolute.
783 def absolute?
784 return !relative?
785 end
787 # Joins two URIs together.
788 def +(uri)
789 if !uri.kind_of?(self.class)
790 uri = URI.parse(uri.to_s)
791 end
792 if uri.to_s == ""
793 return self.dup
794 end
796 joined_scheme = nil
797 joined_user = nil
798 joined_password = nil
799 joined_host = nil
800 joined_port = nil
801 joined_path = nil
802 joined_query = nil
803 joined_fragment = nil
805 # Section 5.2.2 of RFC 3986
806 if uri.scheme != nil
807 joined_scheme = uri.scheme
808 joined_user = uri.user
809 joined_password = uri.password
810 joined_host = uri.host
811 joined_port = uri.specified_port
812 joined_path = self.class.normalize_path(uri.path)
813 joined_query = uri.query
814 else
815 if uri.authority != nil
816 joined_user = uri.user
817 joined_password = uri.password
818 joined_host = uri.host
819 joined_port = uri.specified_port
820 joined_path = self.class.normalize_path(uri.path)
821 joined_query = uri.query
822 else
823 if uri.path == nil || uri.path == ""
824 joined_path = self.path
825 if uri.query != nil
826 joined_query = uri.query
827 else
828 joined_query = self.query
829 end
830 else
831 if uri.path[0..0] == "/"
832 joined_path = self.class.normalize_path(uri.path)
833 else
834 base_path = self.path.dup
835 base_path = "" if base_path == nil
836 base_path = self.class.normalize_path(base_path)
838 # Section 5.2.3 of RFC 3986
839 #
840 # Removes the right-most path segment from the base path.
841 if base_path =~ /\//
842 base_path.gsub!(/\/[^\/]+$/, "/")
843 else
844 base_path = ""
845 end
847 # If the base path is empty and an authority segment has been
848 # defined, use a base path of "/"
849 if base_path == "" && self.authority != nil
850 base_path = "/"
851 end
853 joined_path = self.class.normalize_path(base_path + uri.path)
854 end
855 joined_query = uri.query
856 end
857 joined_user = self.user
858 joined_password = self.password
859 joined_host = self.host
860 joined_port = self.specified_port
861 end
862 joined_scheme = self.scheme
863 end
864 joined_fragment = uri.fragment
866 return Addressable::URI.new(
867 joined_scheme,
868 joined_user,
869 joined_password,
870 joined_host,
871 joined_port,
872 joined_path,
873 joined_query,
874 joined_fragment
875 )
876 end
878 # Merges two URIs together.
879 def merge(uri)
880 return self + uri
881 end
882 alias_method :join, :merge
884 # Destructive form of merge.
885 def merge!(uri)
886 replace_self(self.merge(uri))
887 end
888 alias_method :join!, :merge!
890 # Returns the shortest normalized relative form of this URI that uses the
891 # supplied URI as a base for resolution. Returns an absolute URI if
892 # necessary.
893 def route_from(uri)
894 uri = self.class.parse(uri).normalize
895 normalized_self = self.normalize
896 if normalized_self.relative?
897 raise ArgumentError, "Expected absolute URI, got: #{self.to_s}"
898 end
899 if uri.relative?
900 raise ArgumentError, "Expected absolute URI, got: #{uri.to_s}"
901 end
902 if normalized_self == uri
903 return Addressable::URI.parse("##{normalized_self.fragment}")
904 end
905 segments = normalized_self.to_hash
906 if normalized_self.scheme == uri.scheme
907 segments[:scheme] = nil
908 if normalized_self.authority == uri.authority
909 segments[:user] = nil
910 segments[:password] = nil
911 segments[:host] = nil
912 segments[:port] = nil
913 if normalized_self.path == uri.path
914 segments[:path] = nil
915 if normalized_self.query == uri.query
916 segments[:query] = nil
917 end
918 else
919 if uri.path != "/"
920 segments[:path].gsub!(
921 Regexp.new("^" + Regexp.escape(uri.path)), "")
922 end
923 end
924 end
925 end
926 # Avoid network-path references.
927 if segments[:host] != nil
928 segments[:scheme] = normalized_self.scheme
929 end
930 return Addressable::URI.new(
931 segments[:scheme],
932 segments[:user],
933 segments[:password],
934 segments[:host],
935 segments[:port],
936 segments[:path],
937 segments[:query],
938 segments[:fragment]
939 )
940 end
942 # Returns the shortest normalized relative form of the supplied URI that
943 # uses this URI as a base for resolution. Returns an absolute URI if
944 # necessary.
945 def route_to(uri)
946 return self.class.parse(uri).route_from(self)
947 end
949 # Returns a normalized URI object.
950 #
951 # NOTE: This method does not attempt to fully conform to specifications.
952 # It exists largely to correct other people's failures to read the
953 # specifications, and also to deal with caching issues since several
954 # different URIs may represent the same resource and should not be
955 # cached multiple times.
956 def normalize
957 normalized_scheme = nil
958 normalized_scheme = self.scheme.strip.downcase if self.scheme != nil
959 normalized_scheme = "svn+ssh" if normalized_scheme == "ssh+svn"
960 if normalized_scheme == "feed"
961 if self.to_s =~ /^feed:\/*http:\/*/
962 return self.class.parse(
963 self.to_s.scan(/^feed:\/*(http:\/*.*)/).flatten[0]).normalize
964 end
965 end
966 normalized_user = nil
967 normalized_user = self.user.strip if self.user != nil
968 normalized_password = nil
969 normalized_password = self.password.strip if self.password != nil
971 # If we are using http or https and user/password are blank,
972 # then we remove them
973 if normalized_scheme =~ /https?/ && normalized_user == "" &&
974 (!normalized_password || normalized_password == "")
975 normalized_user = nil
976 normalized_password = nil
977 end
979 normalized_host = nil
980 normalized_host = self.host.strip.downcase if self.host != nil
981 if normalized_host != nil
982 begin
983 normalized_host = URI::IDNA.to_ascii(normalized_host)
984 rescue Exception
985 nil
986 end
987 if normalized_host[-1..-1] == "."
988 normalized_host = normalized_host[0...-1]
989 end
990 end
992 normalized_port = self.port
993 if self.class.scheme_mapping[normalized_scheme] == normalized_port
994 normalized_port = nil
995 end
996 normalized_path = nil
997 normalized_path = self.path.strip if self.path != nil
998 if normalized_path != nil
999 normalized_path = self.class.normalize_path(normalized_path)
1000 end
1001 if normalized_path == ""
1002 if ["http", "https", "ftp", "tftp"].include?(normalized_scheme)
1003 normalized_path = "/"
1004 end
1005 end
1007 normalized_query = nil
1008 normalized_query = self.query.strip if self.query != nil
1010 normalized_fragment = nil
1011 normalized_fragment = self.fragment.strip if self.fragment != nil
1012 return Addressable::URI.parse(
1013 Addressable::URI.normalized_encode(Addressable::URI.new(
1014 normalized_scheme,
1015 normalized_user,
1016 normalized_password,
1017 normalized_host,
1018 normalized_port,
1019 normalized_path,
1020 normalized_query,
1021 normalized_fragment
1022 )))
1023 end
1025 # Destructively normalizes this URI object.
1026 def normalize!
1027 replace_self(self.normalize)
1028 end
1030 # Creates a URI suitable for display to users. If semantic attacks are
1031 # likely, the application should try to detect these and warn the user.
1032 # See RFC 3986 section 7.6 for more information.
1033 def display_uri
1034 display_uri = self.normalize
1035 begin
1036 display_uri.instance_variable_set("@host",
1037 URI::IDNA.to_unicode(display_uri.host))
1038 rescue Exception
1039 nil
1040 end
1041 return display_uri
1042 end
1044 # Returns true if the URI objects are equal. This method normalizes
1045 # both URIs before doing the comparison, and allows comparison against
1046 # strings.
1047 def ===(uri)
1048 if uri.respond_to?(:normalize)
1049 uri_string = uri.normalize.to_s
1050 else
1051 begin
1052 uri_string = URI.parse(uri.to_s).normalize.to_s
1053 rescue InvalidURIError
1054 return false
1055 end
1056 end
1057 return self.normalize.to_s == uri_string
1058 end
1060 # Returns true if the URI objects are equal. This method normalizes
1061 # both URIs before doing the comparison.
1062 def ==(uri)
1063 return false unless uri.kind_of?(self.class)
1064 return self.normalize.to_s == uri.normalize.to_s
1065 end
1067 # Returns true if the URI objects are equal. This method does NOT
1068 # normalize either URI before doing the comparison.
1069 def eql?(uri)
1070 return false unless uri.kind_of?(self.class)
1071 return self.to_s == uri.to_s
1072 end
1074 # Returns a hash value that will make a URI equivalent to its normalized
1075 # form.
1076 def hash
1077 return (self.normalize.to_s.hash * -1)
1078 end
1080 # Clones the URI object.
1081 def dup
1082 duplicated_scheme = self.scheme ? self.scheme.dup : nil
1083 duplicated_user = self.user ? self.user.dup : nil
1084 duplicated_password = self.password ? self.password.dup : nil
1085 duplicated_host = self.host ? self.host.dup : nil
1086 duplicated_port = self.specified_port
1087 duplicated_path = self.path ? self.path.dup : nil
1088 duplicated_query = self.query ? self.query.dup : nil
1089 duplicated_fragment = self.fragment ? self.fragment.dup : nil
1090 duplicated_uri = Addressable::URI.new(
1091 duplicated_scheme,
1092 duplicated_user,
1093 duplicated_password,
1094 duplicated_host,
1095 duplicated_port,
1096 duplicated_path,
1097 duplicated_query,
1098 duplicated_fragment
1099 )
1100 return duplicated_uri
1101 end
1103 # Returns the assembled URI as a string.
1104 def to_s
1105 uri_string = ""
1106 uri_string << "#{self.scheme}:" if self.scheme != nil
1107 uri_string << "//#{self.authority}" if self.authority != nil
1108 uri_string << self.path.to_s
1109 uri_string << "?#{self.query}" if self.query != nil
1110 uri_string << "##{self.fragment}" if self.fragment != nil
1111 return uri_string
1112 end
1114 # Returns a Hash of the URI segments.
1115 def to_hash
1116 return {
1117 :scheme => self.scheme,
1118 :user => self.user,
1119 :password => self.password,
1120 :host => self.host,
1121 :port => self.specified_port,
1122 :path => self.path,
1123 :query => self.query,
1124 :fragment => self.fragment
1125 }
1126 end
1128 # Returns a string representation of the URI object's state.
1129 def inspect
1130 sprintf("#<%s:%#0x URI:%s>", self.class.to_s, self.object_id, self.to_s)
1131 end
1133 # This module handles internationalized domain names. When Ruby has an
1134 # implementation of nameprep, stringprep, punycode, etc, this
1135 # module should contain an actual implementation of IDNA instead of
1136 # returning nil if libidn can't be used.
1137 module IDNA
1138 # Returns the ascii representation of the label.
1139 def self.to_ascii(label)
1140 return nil if label.nil?
1141 if self.use_libidn?
1142 return IDN::Idna.toASCII(label)
1143 else
1144 raise NotImplementedError,
1145 "There is no available pure-ruby implementation. " +
1146 "Install libidn bindings."
1147 end
1148 end
1150 # Returns the unicode representation of the label.
1151 def self.to_unicode(label)
1152 return nil if label.nil?
1153 if self.use_libidn?
1154 return IDN::Idna.toUnicode(label)
1155 else
1156 raise NotImplementedError,
1157 "There is no available pure-ruby implementation. " +
1158 "Install libidn bindings."
1159 end
1160 end
1162 private
1163 # Determines if the libidn bindings are available and able to be used.
1164 def self.use_libidn?
1165 if !defined?(@use_libidn) || @use_libidn.nil?
1166 begin
1167 require 'rubygems'
1168 rescue LoadError
1169 nil
1170 end
1171 begin
1172 require 'idn'
1173 rescue LoadError
1174 nil
1175 end
1176 @use_libidn = !!(defined?(IDN::Idna))
1177 end
1178 return @use_libidn
1179 end
1180 end
1182 private
1183 # Resolves paths to their simplest form.
1184 def self.normalize_path(path)
1185 # Section 5.2.4 of RFC 3986
1187 return nil if path.nil?
1188 normalized_path = path.dup
1189 previous_state = normalized_path.dup
1190 begin
1191 previous_state = normalized_path.dup
1192 normalized_path.gsub!(/\/\.\//, "/")
1193 normalized_path.gsub!(/\/\.$/, "/")
1194 parent = normalized_path.scan(/\/([^\/]+)\/\.\.\//).flatten[0]
1195 if parent != "." && parent != ".."
1196 normalized_path.gsub!(/\/#{parent}\/\.\.\//, "/")
1197 end
1198 parent = normalized_path.scan(/\/([^\/]+)\/\.\.$/).flatten[0]
1199 if parent != "." && parent != ".."
1200 normalized_path.gsub!(/\/#{parent}\/\.\.$/, "/")
1201 end
1202 normalized_path.gsub!(/^\.\.?\/?/, "")
1203 normalized_path.gsub!(/^\/\.\.?\//, "/")
1204 end until previous_state == normalized_path
1205 return normalized_path
1206 end
1208 # Ensures that the URI is valid.
1209 def validate
1210 if self.scheme != nil &&
1211 (self.host == nil || self.host == "") &&
1212 (self.path == nil || self.path == "")
1213 raise InvalidURIError,
1214 "Absolute URI missing hierarchical segment."
1215 end
1216 if self.host == nil
1217 if self.specified_port != nil ||
1218 self.user != nil ||
1219 self.password != nil
1220 raise InvalidURIError, "Hostname not supplied."
1221 end
1222 end
1223 end
1225 # Replaces the internal state of self with the specified URI's state.
1226 # Used in destructive operations to avoid massive code repetition.
1227 def replace_self(uri)
1228 # Reset dependant values
1229 @userinfo = nil
1230 @authority = nil
1232 @scheme = uri.scheme
1233 @user = uri.user
1234 @password = uri.password
1235 @host = uri.host
1236 @specified_port = uri.instance_variable_get("@specified_port")
1237 @port = @specified_port.to_s.to_i
1238 @path = uri.path
1239 @query = uri.query
1240 @fragment = uri.fragment
1241 return self
1242 end
1243 end
1244 end
