url.rb in ronin-1.3.0

- old
+ new

@@ -16,17 +16,19 @@
 # You should have received a copy of the GNU General Public License
 # along with Ronin.  If not, see <http://www.gnu.org/licenses/>.
 #
 
 require 'ronin/model'
+require 'ronin/model/importable'
 require 'ronin/url_scheme'
 require 'ronin/url_query_param'
 require 'ronin/host_name'
 require 'ronin/tcp_port'
 require 'ronin/web_credential'
 
 require 'dm-timestamps'
+require 'uri/generic'
 require 'uri/http'
 require 'uri/https'
 require 'uri/ftp'
 require 'uri/query_params'
 
@@ -35,10 +37,11 @@
   # Represents URLs that can be stored in the {Database}.
   #
   class URL
 
     include Model
+    include Model::Importable
     include DataMapper::Timestamps
 
     # Mapping of URL Schemes and URI classes
     SCHEMES = {
       'https' => ::URI::HTTPS,
@@ -75,10 +78,49 @@
 
     # Defines the created_at timestamp
     timestamps :created_at
 
     #
+    # Extracts URLs from the given text.
+    #
+    # @param [String] text
+    #   The text to parse.
+    #
+    # @yield [url]
+    #   The given block will be passed each extracted URL.
+    #
+    # @yieldparam [URL] url
+    #   An extracted URL from the text.
+    #
+    # @return [Array<URL>]
+    #   If no block is given, an Array of the extracted URLs is returned.
+    #
+    # @see http://rubydoc.info/stdlib/uri/URI#extract-class_method
+    # @see URL.parse
+    #
+    # @since 1.3.0
+    #
+    # @api public
+    #
+    def self.extract(text)
+      return enum_for(:extract,text).to_a unless block_given?
+
+      ::URI.extract(text) do |uri|
+        uri = begin
+                ::URI.parse(uri)
+              rescue URI::InvalidURIError
+                # URI.extract can parse URIs that URI.parse cannot handle
+                next
+              end
+
+        yield from(uri)
+      end
+
+      return nil
+    end
+
+    #
     # Searches for all URLs using HTTP.
     #
     # @return [Array<URL>]
     #   The matching URLs.
     #
@@ -401,10 +443,10 @@
     #
     # @api public
     #
     def to_uri
       # map the URL scheme to a URI class
-      url_class = (SCHEMES[self.scheme.name] || ::URI::Generic)
+      url_class = SCHEMES.fetch(self.scheme.name,::URI::Generic)
 
       host = if self.host_name
                self.host_name.address
              end
       port = if self.port