#
# ronin-web-spider - A collection of common web spidering routines.
#
# Copyright (c) 2022 Hal Brodigan (postmodern.mod3 at gmail.com)
#
# ronin-web-spider is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# ronin-web-spider is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with ronin-web-spider. If not, see .
#
require 'ronin/web/spider/archive'
require 'ronin/web/spider/exceptions'
module Ronin
module Web
module Spider
#
# Represents a web archive directory that is backed by Git.
#
# ## Example
#
# Spider a host and archive every web page to a Git repository:
#
# require 'ronin/web/spider'
# require 'ronin/web/spider/git_archive'
# require 'date'
#
# Ronin::Web::Spider::GitArchive.open('path/to/root') do |archive|
# archive.commit("Updated #{Date.today}") do
# Ronin::Web::Spider.every_page(host: 'example.com') do |page|
# archive.write(page.url,page.body)
# end
# end
# end
#
class GitArchive < Archive
#
# Creates the Git archive, if it already does not exist.
#
# @param [String] root
# The path to the new Git archive.
#
# @yield [archive]
# If a block is given, it will be passed the newly created Git
# archive.
#
# @yieldparam [GitArchive] archive
# The newly created Git archive.
#
# @return [GitArchive]
# The newly created Git archive.
#
def self.open(root)
super(root) do |archive|
archive.init unless archive.git?
yield archive if block_given?
end
end
#
# Determines if the git repository has been initialized.
#
# @return [Boolean]
#
def git?
File.directory?(File.join(@root,'.git'))
end
#
# Initializes the Git repository.
#
# @return [true]
# Indicates the Git repository was successfully initialized.
#
# @raise [GitError]
# Indicates that the `git` command exited with an error.
#
# @raise [GitNotInstalled]
# Indicates that `git` was not installed or could not be found in the
# `$PATH` environment variable.
#
def init
git('init')
end
#
# Saves a webpage to the Git archive.
#
# @param [URI::HTTP] url
# The URL of the response.
#
# @param [String] body
# The response body to save.
#
# @return [String]
# The full path to the archived page.
#
# @raise [GitError]
# Indicates that the `git` command exited with an error.
#
# @raise [GitNotInstalled]
# Indicates that `git` was not installed or could not be found in the
# `$PATH` environment variable.
#
def write(url,body)
absolute_path = super(url,body)
git('add',absolute_path)
return absolute_path
end
#
# Commits changes to the Git archive.
#
# @param [String] message
# The commit message.
#
# @yield [self]
# If a block is given it will be called before committing any changes.
#
# @return [true]
# Indicates whether the changes were successfully committed.
#
# @raise [GitError]
# Indicates the `git` command exited with an error.
#
# @raise [GitNotInstalled]
# Indicates that `git` was not installed or could not be found in the
# `$PATH` environment variable.
#
# @example
# archive.write(url,response.body)
# archive.commit "Updated #{Date.today}"
#
# @example with a block:
# archive.commit("Updated #{Date.today}") do
# Ronin::Web::Spider.every_page(host: 'example.com') do |page|
# archive.write(page.url,page.body)
# end
# end
#
def commit(message)
yield self if block_given?
git('commit','-m',message.to_s)
end
private
#
# Executes a `git` command in the archive root directory..
#
# @param [Array] args
# Additional arguments for the `git` command.
#
# @return [true]
# Indicates that the `git` command executed successfully.
#
# @raise [GitError]
# Indicates that the `git` command exited with an error.
#
# @raise [GitNotInstalled]
# Indicates that `git` was not installed or could not be found in the
# `$PATH` environment variable.
#
def git(*args)
command = ['git', '-C', @root]
command.concat(args)
case system(*command)
when false
raise(GitError,"git command failed: #{command.join(' ')}")
when nil
raise(GitNotInstalled,"the git command was not found")
else
true
end
end
end
end
end
end