lib/ronin/web.rb in ronin-web-0.3.0.rc1 vs lib/ronin/web.rb in ronin-web-1.0.0.beta1
- old
+ new
@@ -1,33 +1,392 @@
#
-# Ronin Web - A Ruby library for Ronin that provides support for web
-# scraping and spidering functionality.
+# ronin-web - A collection of useful web helper methods and commands.
#
-# Copyright (c) 2006-2011 Hal Brodigan (postmodern.mod3 at gmail.com)
+# Copyright (c) 2006-2022 Hal Brodigan (postmodern.mod3 at gmail.com)
#
-# This file is part of Ronin Web.
-#
-# Ronin is free software: you can redistribute it and/or modify
+# ronin-web is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
-# Ronin is distributed in the hope that it will be useful,
+# ronin-web is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
-# along with Ronin. If not, see <http://www.gnu.org/licenses/>.
+# along with ronin-web. If not, see <https://www.gnu.org/licenses/>.
#
-require 'ronin/web/extensions'
-require 'ronin/web/web'
+require 'ronin/web/html'
+require 'ronin/web/xml'
require 'ronin/web/spider'
require 'ronin/web/server'
-require 'ronin/web/proxy'
+require 'ronin/web/user_agents'
+require 'ronin/web/mechanize'
require 'ronin/web/version'
-require 'ronin/config'
+require 'ronin/support/network/http'
+require 'uri'
+require 'open-uri'
+require 'nokogiri'
+require 'nokogiri/ext'
+require 'nokogiri/diff'
+require 'open_namespace'
+
module Ronin
- Config.load :web
+ module Web
+ include OpenNamespace
+
+ #
+ # Parses the body of a document into a HTML document object.
+ #
+ # @param [String, IO] body
+ # The body of the document to parse.
+ #
+ # @yield [doc]
+ # If a block is given, it will be passed the newly created document
+ # object.
+ #
+ # @yieldparam [Nokogiri::HTML::Document] doc
+ # The new HTML document object.
+ #
+ # @return [Nokogiri::HTML::Document]
+ # The new HTML document object.
+ #
+ # @see http://rubydoc.info/gems/nokogiri/Nokogiri/HTML/Document
+ #
+ # @api public
+ #
+ def self.html(body,&block)
+ HTML.parse(body,&block)
+ end
+
+ #
+ # Creates a new Nokogiri::HTML::Builder.
+ #
+ # @yield []
+ # The block that will be used to construct the HTML document.
+ #
+ # @return [Nokogiri::HTML::Builder]
+ # The new HTML builder object.
+ #
+ # @example
+ # Web.build_html do
+ # html {
+ # body {
+ # div(style: 'display:none;') {
+ # object(classid: 'blabla')
+ # }
+ # }
+ # }
+ # end
+ #
+ # @see http://rubydoc.info/gems/nokogiri/Nokogiri/HTML/Builder
+ #
+ # @api public
+ #
+ def self.build_html(&block)
+ HTML.build(&block)
+ end
+
+ #
+ # Parses the body of a document into a XML document object.
+ #
+ # @param [String, IO] body
+ # The body of the document to parse.
+ #
+ # @yield [doc]
+ # If a block is given, it will be passed the newly created document
+ # object.
+ #
+ # @yieldparam [Nokogiri::XML::Document] doc
+ # The new XML document object.
+ #
+ # @return [Nokogiri::XML::Document]
+ # The new XML document object.
+ #
+ # @see http://rubydoc.info/gems/nokogiri/Nokogiri/XML/Document
+ #
+ # @api public
+ #
+ def self.xml(body,&block)
+ XML.parse(body,&block)
+ end
+
+ #
+ # Creates a new Nokogiri::XML::Builder.
+ #
+ # @yield []
+ # The block that will be used to construct the XML document.
+ #
+ # @return [Nokogiri::XML::Builder]
+ # The new XML builder object.
+ #
+ # @example
+ # Web.build_xml do
+ # post(id: 2) {
+ # title { text('some example') }
+ # body { text('this is one contrived example.') }
+ # }
+ # end
+ #
+ # @see http://rubydoc.info/gems/nokogiri/Nokogiri/XML/Builder
+ #
+ # @api public
+ #
+ def self.build_xml(&block)
+ XML.build(&block)
+ end
+
+ #
+ # Opens a URL as a temporary file.
+ #
+ # @param [String, :random, :chrome, :chrome_linux, :chrome_macos,
+ # :chrome_windows, :chrome_iphone, :chrome_ipad,
+ # :chrome_android, :firefox, :firefox_linux, :firefox_macos,
+ # :firefox_windows, :firefox_iphone, :firefox_ipad,
+ # :firefox_android, :safari, :safari_macos, :safari_iphone,
+ # :safari_ipad, :edge, :linux, :macos, :windows, :iphone,
+ # :ipad, :android, nil] user_agent
+ # The `User-Agent` string to use.
+ #
+ # @param [String, URI::HTTP, nil] proxy
+ # The proxy URI to use.
+ #
+ # @param [String, URI::HTTP, nil] referer
+ # The optional `Referer` header to send.
+ #
+ # @param [String, Ronin::Support::Network::HTTP::Cookie, nil] cookie
+ # The optional `Cookie` header to send.
+ #
+ # @param [Hash{Symbol => Object}] kwargs
+ # Additional keyword arguments.
+ #
+ # @option kwargs [String] :user
+ # The HTTP Basic Authentication user name.
+ #
+ # @option kwargs [String] :password
+ # The HTTP Basic Authentication password.
+ #
+ # @option kwargs [Proc] :content_length_proc
+ # A callback which will be passed the content-length of the HTTP
+ # response.
+ #
+ # @option kwargs [Proc] :progress_proc
+ # A callback which will be passed the size of each fragment, once
+ # received from the server.
+ #
+ # @return [File]
+ # The contents of the URL.
+ #
+ # @example Open a given URL.
+ # Web.open('https://www.example.com/')
+ #
+ # @example Open a given URL, using a built-in User-Agent:
+ # Web.open('https://www.example.com/', user_agent: :linux)
+ #
+ # @example Open a given URL, using a custom User-Agent string:
+ # Web.open('https://www.example.com/', user_agent: '...')
+ #
+ # @example Open a given URL, using a custom User-Agent string.
+ # Web.open('https://www.example.com/', user_agent: 'the future')
+ #
+ # @see http://rubydoc.info/stdlib/open-uri
+ #
+ # @api public
+ #
+ def self.open(url, proxy: Web.proxy,
+ user_agent: Web.user_agent,
+ referer: nil,
+ cookie: nil,
+ **kwargs)
+ headers = {}
+
+ if user_agent
+ headers['User-Agent'] = case user_agent
+ when Symbol
+ Support::Network::HTTP::UserAgents[user_agent]
+ else
+ user_agent
+ end
+ end
+
+ headers['Referer'] = referer if referer
+ headers['Cookie'] = cookie if cookie
+
+ return URI.open(url, headers, proxy: proxy, **kwargs)
+ end
+
+ #
+ # A persistent Mechanize Agent.
+ #
+ # @return [Mechanize]
+ # The persistent Mechanize Agent.
+ #
+ # @see Mechanize
+ #
+ # @api public
+ #
+ def self.agent
+ @agent ||= Mechanize.new
+ end
+
+ #
+ # Creates a Mechanize Page for the contents at a given URL.
+ #
+ # @param [URI::Generic, String] url
+ # The URL to request.
+ #
+ # @param [Array, Hash] parameters
+ # Additional parameters for the GET request.
+ #
+ # param [Hash] headers
+ # Additional headers for the GET request.
+ #
+ # @yield [page]
+ # If a block is given, it will be passed the page for the requested
+ # URL.
+ #
+ # @yieldparam [Mechanize::Page] page
+ # The requested page.
+ #
+ # @return [Mechanize::Page]
+ # The requested page.
+ #
+ # @example
+ # Web.get('http://www.rubyinside.com')
+ # # => Mechanize::Page
+ #
+ # @example
+ # Web.get('http://www.rubyinside.com') do |page|
+ # page.search('div.post/h2/a').each do |title|
+ # puts title.inner_text
+ # end
+ # end
+ #
+ # @see http://rubydoc.info/gems/mechanize/Mechanize/Page
+ #
+ # @api public
+ #
+ def self.get(url,parameters={},headers={},&block)
+ agent.get(url,parameters,nil,headers,&block)
+ end
+
+ #
+ # Requests the body of the Mechanize Page created from the response
+ # of the given URL.
+ #
+ # @param [URI::Generic, String] url
+ # The URL to request.
+ #
+ # @param [Array, Hash] parameters
+ # Additional parameters for the GET request.
+ #
+ # param [Hash] headers
+ # Additional headers for the GET request.
+ #
+ # @yield [body]
+ # If a block is given, it will be passed the body of the page.
+ #
+ # @yieldparam [String] body
+ # The requested body of the page.
+ #
+ # @return [String]
+ # The requested body of the page.
+ #
+ # @example
+ # Web.get_body('http://www.rubyinside.com') # => String
+ #
+ # @example
+ # Web.get_body('http://www.rubyinside.com') do |body|
+ # puts body
+ # end
+ #
+ # @see get
+ #
+ # @api public
+ #
+ def self.get_body(url,parameters={},headers={})
+ body = get(url,parameters,headers).body
+
+ yield body if block_given?
+ return body
+ end
+
+ #
+ # Posts to a given URL and creates a Mechanize Page from the response.
+ #
+ # @param [URI::Generic, String] url
+ # The URL to request.
+ #
+ # @param [Hash] query
+ # Additional query parameters for the POST request.
+ #
+ # @param [Hash] headers
+ # Additional headers for the POST request.
+ #
+ # @yield [page]
+ # If a block is given, it will be passed the page for the requested
+ # URL.
+ #
+ # @yieldparam [Mechanize::Page] page
+ # The requested page.
+ #
+ # @return [Mechanize::Page]
+ # The requested page.
+ #
+ # @example
+ # Web.post('http://www.rubyinside.com')
+ # # => Mechanize::Page
+ #
+ # @see http://rubydoc.info/gems/mechanize/Mechanize/Page
+ #
+ # @api public
+ #
+ def self.post(url,query={},headers={},&block)
+ agent.post(url,query,headers={},&block)
+ end
+
+ #
+ # Posts to a given URL and returns the body of the Mechanize Page
+ # created from the response.
+ #
+ # @param [URI::Generic, String] url
+ # The URL to request.
+ #
+ # @param [Hash] query
+ # Additional query parameters for the POST request.
+ #
+ # @param [Hash] headers
+ # Additional headers for the POST request.
+ #
+ # @yield [body]
+ # If a block is given, it will be passed the body of the page.
+ #
+ # @yieldparam [Mechanize::Page] page
+ # The body of the requested page.
+ #
+ # @return [Mechanize::Page]
+ # The body of the requested page.
+ #
+ # @example
+ # Web.post_body('http://www.rubyinside.com')
+ # # => String
+ #
+ # @example
+ # Web.post_body('http://www.rubyinside.com') do |body|
+ # puts body
+ # end
+ #
+ # @see post
+ #
+ # @api public
+ #
+ def self.post_body(url,query={},headers={})
+ body = post(url,query,headers).body
+
+ yield body if block_given?
+ return body
+ end
+ end
end