lib/ronin/web.rb in ronin-web-0.3.0.rc1 vs lib/ronin/web.rb in ronin-web-1.0.0.beta1

- old
+ new

@@ -1,33 +1,392 @@ # -# Ronin Web - A Ruby library for Ronin that provides support for web -# scraping and spidering functionality. +# ronin-web - A collection of useful web helper methods and commands. # -# Copyright (c) 2006-2011 Hal Brodigan (postmodern.mod3 at gmail.com) +# Copyright (c) 2006-2022 Hal Brodigan (postmodern.mod3 at gmail.com) # -# This file is part of Ronin Web. -# -# Ronin is free software: you can redistribute it and/or modify +# ronin-web is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # -# Ronin is distributed in the hope that it will be useful, +# ronin-web is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License -# along with Ronin. If not, see <http://www.gnu.org/licenses/>. +# along with ronin-web. If not, see <https://www.gnu.org/licenses/>. # -require 'ronin/web/extensions' -require 'ronin/web/web' +require 'ronin/web/html' +require 'ronin/web/xml' require 'ronin/web/spider' require 'ronin/web/server' -require 'ronin/web/proxy' +require 'ronin/web/user_agents' +require 'ronin/web/mechanize' require 'ronin/web/version' -require 'ronin/config' +require 'ronin/support/network/http' +require 'uri' +require 'open-uri' +require 'nokogiri' +require 'nokogiri/ext' +require 'nokogiri/diff' +require 'open_namespace' + module Ronin - Config.load :web + module Web + include OpenNamespace + + # + # Parses the body of a document into a HTML document object. + # + # @param [String, IO] body + # The body of the document to parse. + # + # @yield [doc] + # If a block is given, it will be passed the newly created document + # object. + # + # @yieldparam [Nokogiri::HTML::Document] doc + # The new HTML document object. + # + # @return [Nokogiri::HTML::Document] + # The new HTML document object. + # + # @see http://rubydoc.info/gems/nokogiri/Nokogiri/HTML/Document + # + # @api public + # + def self.html(body,&block) + HTML.parse(body,&block) + end + + # + # Creates a new Nokogiri::HTML::Builder. + # + # @yield [] + # The block that will be used to construct the HTML document. + # + # @return [Nokogiri::HTML::Builder] + # The new HTML builder object. + # + # @example + # Web.build_html do + # html { + # body { + # div(style: 'display:none;') { + # object(classid: 'blabla') + # } + # } + # } + # end + # + # @see http://rubydoc.info/gems/nokogiri/Nokogiri/HTML/Builder + # + # @api public + # + def self.build_html(&block) + HTML.build(&block) + end + + # + # Parses the body of a document into a XML document object. + # + # @param [String, IO] body + # The body of the document to parse. + # + # @yield [doc] + # If a block is given, it will be passed the newly created document + # object. + # + # @yieldparam [Nokogiri::XML::Document] doc + # The new XML document object. + # + # @return [Nokogiri::XML::Document] + # The new XML document object. + # + # @see http://rubydoc.info/gems/nokogiri/Nokogiri/XML/Document + # + # @api public + # + def self.xml(body,&block) + XML.parse(body,&block) + end + + # + # Creates a new Nokogiri::XML::Builder. + # + # @yield [] + # The block that will be used to construct the XML document. + # + # @return [Nokogiri::XML::Builder] + # The new XML builder object. + # + # @example + # Web.build_xml do + # post(id: 2) { + # title { text('some example') } + # body { text('this is one contrived example.') } + # } + # end + # + # @see http://rubydoc.info/gems/nokogiri/Nokogiri/XML/Builder + # + # @api public + # + def self.build_xml(&block) + XML.build(&block) + end + + # + # Opens a URL as a temporary file. + # + # @param [String, :random, :chrome, :chrome_linux, :chrome_macos, + # :chrome_windows, :chrome_iphone, :chrome_ipad, + # :chrome_android, :firefox, :firefox_linux, :firefox_macos, + # :firefox_windows, :firefox_iphone, :firefox_ipad, + # :firefox_android, :safari, :safari_macos, :safari_iphone, + # :safari_ipad, :edge, :linux, :macos, :windows, :iphone, + # :ipad, :android, nil] user_agent + # The `User-Agent` string to use. + # + # @param [String, URI::HTTP, nil] proxy + # The proxy URI to use. + # + # @param [String, URI::HTTP, nil] referer + # The optional `Referer` header to send. + # + # @param [String, Ronin::Support::Network::HTTP::Cookie, nil] cookie + # The optional `Cookie` header to send. + # + # @param [Hash{Symbol => Object}] kwargs + # Additional keyword arguments. + # + # @option kwargs [String] :user + # The HTTP Basic Authentication user name. + # + # @option kwargs [String] :password + # The HTTP Basic Authentication password. + # + # @option kwargs [Proc] :content_length_proc + # A callback which will be passed the content-length of the HTTP + # response. + # + # @option kwargs [Proc] :progress_proc + # A callback which will be passed the size of each fragment, once + # received from the server. + # + # @return [File] + # The contents of the URL. + # + # @example Open a given URL. + # Web.open('https://www.example.com/') + # + # @example Open a given URL, using a built-in User-Agent: + # Web.open('https://www.example.com/', user_agent: :linux) + # + # @example Open a given URL, using a custom User-Agent string: + # Web.open('https://www.example.com/', user_agent: '...') + # + # @example Open a given URL, using a custom User-Agent string. + # Web.open('https://www.example.com/', user_agent: 'the future') + # + # @see http://rubydoc.info/stdlib/open-uri + # + # @api public + # + def self.open(url, proxy: Web.proxy, + user_agent: Web.user_agent, + referer: nil, + cookie: nil, + **kwargs) + headers = {} + + if user_agent + headers['User-Agent'] = case user_agent + when Symbol + Support::Network::HTTP::UserAgents[user_agent] + else + user_agent + end + end + + headers['Referer'] = referer if referer + headers['Cookie'] = cookie if cookie + + return URI.open(url, headers, proxy: proxy, **kwargs) + end + + # + # A persistent Mechanize Agent. + # + # @return [Mechanize] + # The persistent Mechanize Agent. + # + # @see Mechanize + # + # @api public + # + def self.agent + @agent ||= Mechanize.new + end + + # + # Creates a Mechanize Page for the contents at a given URL. + # + # @param [URI::Generic, String] url + # The URL to request. + # + # @param [Array, Hash] parameters + # Additional parameters for the GET request. + # + # param [Hash] headers + # Additional headers for the GET request. + # + # @yield [page] + # If a block is given, it will be passed the page for the requested + # URL. + # + # @yieldparam [Mechanize::Page] page + # The requested page. + # + # @return [Mechanize::Page] + # The requested page. + # + # @example + # Web.get('http://www.rubyinside.com') + # # => Mechanize::Page + # + # @example + # Web.get('http://www.rubyinside.com') do |page| + # page.search('div.post/h2/a').each do |title| + # puts title.inner_text + # end + # end + # + # @see http://rubydoc.info/gems/mechanize/Mechanize/Page + # + # @api public + # + def self.get(url,parameters={},headers={},&block) + agent.get(url,parameters,nil,headers,&block) + end + + # + # Requests the body of the Mechanize Page created from the response + # of the given URL. + # + # @param [URI::Generic, String] url + # The URL to request. + # + # @param [Array, Hash] parameters + # Additional parameters for the GET request. + # + # param [Hash] headers + # Additional headers for the GET request. + # + # @yield [body] + # If a block is given, it will be passed the body of the page. + # + # @yieldparam [String] body + # The requested body of the page. + # + # @return [String] + # The requested body of the page. + # + # @example + # Web.get_body('http://www.rubyinside.com') # => String + # + # @example + # Web.get_body('http://www.rubyinside.com') do |body| + # puts body + # end + # + # @see get + # + # @api public + # + def self.get_body(url,parameters={},headers={}) + body = get(url,parameters,headers).body + + yield body if block_given? + return body + end + + # + # Posts to a given URL and creates a Mechanize Page from the response. + # + # @param [URI::Generic, String] url + # The URL to request. + # + # @param [Hash] query + # Additional query parameters for the POST request. + # + # @param [Hash] headers + # Additional headers for the POST request. + # + # @yield [page] + # If a block is given, it will be passed the page for the requested + # URL. + # + # @yieldparam [Mechanize::Page] page + # The requested page. + # + # @return [Mechanize::Page] + # The requested page. + # + # @example + # Web.post('http://www.rubyinside.com') + # # => Mechanize::Page + # + # @see http://rubydoc.info/gems/mechanize/Mechanize/Page + # + # @api public + # + def self.post(url,query={},headers={},&block) + agent.post(url,query,headers={},&block) + end + + # + # Posts to a given URL and returns the body of the Mechanize Page + # created from the response. + # + # @param [URI::Generic, String] url + # The URL to request. + # + # @param [Hash] query + # Additional query parameters for the POST request. + # + # @param [Hash] headers + # Additional headers for the POST request. + # + # @yield [body] + # If a block is given, it will be passed the body of the page. + # + # @yieldparam [Mechanize::Page] page + # The body of the requested page. + # + # @return [Mechanize::Page] + # The body of the requested page. + # + # @example + # Web.post_body('http://www.rubyinside.com') + # # => String + # + # @example + # Web.post_body('http://www.rubyinside.com') do |body| + # puts body + # end + # + # @see post + # + # @api public + # + def self.post_body(url,query={},headers={}) + body = post(url,query,headers).body + + yield body if block_given? + return body + end + end end