Sha256: ed3f9756ad3692fd6f51b4932d3c69416e95895580a500a22285a0d098335f5e

Contents?: true

Size: 776 Bytes

Versions: 11

Compression:

Stored size: 776 Bytes

Contents

require 'nokogiri'
module Trackman
  module Urls
    module HtmlParser
      include CssParser
      
      def parse html
        doc = Nokogiri::HTML(html)
        (img(doc) + js(doc) + css(doc) + parse_css(html)).uniq
      end
      
      def img doc
        imgs = refine(doc.css('img'), 'src')
        icons = refine(doc.xpath('//link[@rel="icon"]'), 'href')
        
        imgs + icons
      end
      
      def js doc
        refine(doc.xpath('//script'), 'src')
      end
      def css doc
        refine(doc.xpath('//link[@type="text/css"]'), 'href')
      end

      def refine(paths, node)
        temp = paths.map{|n| n[node].to_s.gsub(/\?[^\?]*$/, '') }
        temp.select{|n| n && n =~ /\w/ && n.internal_path? && !n.embedded? }
      end
    end
  end
end

Version data entries

11 entries across 11 versions & 1 rubygems

Version Path
trackman-0.6.18 lib/trackman/urls/html_parser.rb
trackman-0.6.17 lib/trackman/urls/html_parser.rb
trackman-0.6.16 lib/trackman/urls/html_parser.rb
trackman-0.6.15 lib/trackman/urls/html_parser.rb
trackman-0.6.14 lib/trackman/urls/html_parser.rb
trackman-0.6.13 lib/trackman/urls/html_parser.rb
trackman-0.6.12 lib/trackman/urls/html_parser.rb
trackman-0.6.11 lib/trackman/urls/html_parser.rb
trackman-0.6.10 lib/trackman/urls/html_parser.rb
trackman-0.6.9 lib/trackman/urls/html_parser.rb
trackman-0.6.8 lib/trackman/urls/html_parser.rb