lib/ronin/url.rb in ronin-1.2.0 vs lib/ronin/url.rb in ronin-1.3.0

- old
+ new

@@ -16,17 +16,19 @@ # You should have received a copy of the GNU General Public License # along with Ronin. If not, see <http://www.gnu.org/licenses/>. # require 'ronin/model' +require 'ronin/model/importable' require 'ronin/url_scheme' require 'ronin/url_query_param' require 'ronin/host_name' require 'ronin/tcp_port' require 'ronin/web_credential' require 'dm-timestamps' +require 'uri/generic' require 'uri/http' require 'uri/https' require 'uri/ftp' require 'uri/query_params' @@ -35,10 +37,11 @@ # Represents URLs that can be stored in the {Database}. # class URL include Model + include Model::Importable include DataMapper::Timestamps # Mapping of URL Schemes and URI classes SCHEMES = { 'https' => ::URI::HTTPS, @@ -75,10 +78,49 @@ # Defines the created_at timestamp timestamps :created_at # + # Extracts URLs from the given text. + # + # @param [String] text + # The text to parse. + # + # @yield [url] + # The given block will be passed each extracted URL. + # + # @yieldparam [URL] url + # An extracted URL from the text. + # + # @return [Array<URL>] + # If no block is given, an Array of the extracted URLs is returned. + # + # @see http://rubydoc.info/stdlib/uri/URI#extract-class_method + # @see URL.parse + # + # @since 1.3.0 + # + # @api public + # + def self.extract(text) + return enum_for(:extract,text).to_a unless block_given? + + ::URI.extract(text) do |uri| + uri = begin + ::URI.parse(uri) + rescue URI::InvalidURIError + # URI.extract can parse URIs that URI.parse cannot handle + next + end + + yield from(uri) + end + + return nil + end + + # # Searches for all URLs using HTTP. # # @return [Array<URL>] # The matching URLs. # @@ -401,10 +443,10 @@ # # @api public # def to_uri # map the URL scheme to a URI class - url_class = (SCHEMES[self.scheme.name] || ::URI::Generic) + url_class = SCHEMES.fetch(self.scheme.name,::URI::Generic) host = if self.host_name self.host_name.address end port = if self.port