Sha256: 35df5ee5185a1613dd61609064e15d461cd92d140a38eaea5340c1116c141b15

Contents?: true

Size: 1.73 KB

Versions: 14

Compression:

Stored size: 1.73 KB

Contents

module Distil
   
  class RecursiveHTTPFetcher
    attr_accessor :quiet
    
    def initialize(urls_to_fetch, level = 1, cwd = ".")
      @level = level
      @cwd = cwd
      @urls_to_fetch = RUBY_VERSION >= '1.9' ? urls_to_fetch.to_s.lines : urls_to_fetch.to_s.to_a
      @quiet = true
    end

    def ls
      @urls_to_fetch.collect do |url|
        if url =~ /^svn(\+ssh)?:\/\/.*/ || url =~ /\/svn\//
          `svn ls #{url}`.split("\n").map {|entry| "/#{entry}"} rescue nil
        else
          open(url) do |stream|
            links("", stream.read)
          end rescue nil
        end
      end.flatten
    end

    def push_d(dir)
      @cwd = File.join(@cwd, dir)
      FileUtils.mkdir_p(@cwd)
    end

    def pop_d
      @cwd = File.dirname(@cwd)
    end

    def links(base_url, contents)
      links = []
      contents.scan(/href\s*=\s*\"*[^\">]*/i) do |link|
        link = link.sub(/href="/i, "")
        next if link =~ /svnindex.xsl$/
        next if link =~ /^(\w*:|)\/\// || link =~ /^\./
        links << File.join(base_url, link)
      end
      links
    end
  
    def download(link)
      puts "+ #{File.join(@cwd, File.basename(link))}" unless @quiet
      open(link) do |stream|
        File.open(File.join(@cwd, File.basename(link)), "wb") do |file|
          file.write(stream.read)
        end
      end
    end
  
    def fetch(links = @urls_to_fetch)
      links.each do |l|
        (l =~ /\/$/ || links == @urls_to_fetch) ? fetch_dir(l) : download(l)
      end
    end
  
    def fetch_dir(url)
      @level += 1
      push_d(File.basename(url)) if @level > 0
      open(url) do |stream|
        contents =  stream.read
        fetch(links(url, contents))
      end
      pop_d if @level > 0
      @level -= 1
    end
  end

end

Version data entries

14 entries across 14 versions & 1 rubygems

Version Path
distil-0.14.5.a lib/distil/recursive-http-fetcher.rb
distil-0.14.4 lib/distil/recursive-http-fetcher.rb
distil-0.14.3 lib/distil/recursive-http-fetcher.rb
distil-0.14.2 lib/distil/recursive-http-fetcher.rb
distil-0.14.2.a lib/distil/recursive-http-fetcher.rb
distil-0.14.1 lib/distil/recursive-http-fetcher.rb
distil-0.14.1.a lib/distil/recursive-http-fetcher.rb
distil-0.14.0 lib/distil/recursive-http-fetcher.rb
distil-0.14.0.i lib/distil/recursive-http-fetcher.rb
distil-0.14.0.h lib/distil/recursive-http-fetcher.rb
distil-0.14.0.g lib/distil/recursive-http-fetcher.rb
distil-0.14.0.d lib/distil/recursive-http-fetcher.rb
distil-0.14.0.c lib/distil/recursive-http-fetcher.rb
distil-0.14.0.b lib/distil/recursive-http-fetcher.rb