lib/ronin/url.rb in ronin-1.2.0 vs lib/ronin/url.rb in ronin-1.3.0
- old
+ new
@@ -16,17 +16,19 @@
# You should have received a copy of the GNU General Public License
# along with Ronin. If not, see <http://www.gnu.org/licenses/>.
#
require 'ronin/model'
+require 'ronin/model/importable'
require 'ronin/url_scheme'
require 'ronin/url_query_param'
require 'ronin/host_name'
require 'ronin/tcp_port'
require 'ronin/web_credential'
require 'dm-timestamps'
+require 'uri/generic'
require 'uri/http'
require 'uri/https'
require 'uri/ftp'
require 'uri/query_params'
@@ -35,10 +37,11 @@
# Represents URLs that can be stored in the {Database}.
#
class URL
include Model
+ include Model::Importable
include DataMapper::Timestamps
# Mapping of URL Schemes and URI classes
SCHEMES = {
'https' => ::URI::HTTPS,
@@ -75,10 +78,49 @@
# Defines the created_at timestamp
timestamps :created_at
#
+ # Extracts URLs from the given text.
+ #
+ # @param [String] text
+ # The text to parse.
+ #
+ # @yield [url]
+ # The given block will be passed each extracted URL.
+ #
+ # @yieldparam [URL] url
+ # An extracted URL from the text.
+ #
+ # @return [Array<URL>]
+ # If no block is given, an Array of the extracted URLs is returned.
+ #
+ # @see http://rubydoc.info/stdlib/uri/URI#extract-class_method
+ # @see URL.parse
+ #
+ # @since 1.3.0
+ #
+ # @api public
+ #
+ def self.extract(text)
+ return enum_for(:extract,text).to_a unless block_given?
+
+ ::URI.extract(text) do |uri|
+ uri = begin
+ ::URI.parse(uri)
+ rescue URI::InvalidURIError
+ # URI.extract can parse URIs that URI.parse cannot handle
+ next
+ end
+
+ yield from(uri)
+ end
+
+ return nil
+ end
+
+ #
# Searches for all URLs using HTTP.
#
# @return [Array<URL>]
# The matching URLs.
#
@@ -401,10 +443,10 @@
#
# @api public
#
def to_uri
# map the URL scheme to a URI class
- url_class = (SCHEMES[self.scheme.name] || ::URI::Generic)
+ url_class = SCHEMES.fetch(self.scheme.name,::URI::Generic)
host = if self.host_name
self.host_name.address
end
port = if self.port