Sha256: 4ee16b2c0207d1d800e0c22c914ff49a4f648c86ff38ac06034d9438aafe50fb

Contents?: true

Size: 1.16 KB

Versions: 1

Compression:

Stored size: 1.16 KB

Contents

# frozen_string_literal: true

require 'nokogiri'
require 'net/http'
require 'uri'

require_relative './errors'

module HtmlKit
  ##
  # === HtmlKit::Document
  #
  # Used for parsing urls which are `http` or `https`
  # You can use this to validate a html document. This uses nokogiri internally.
  # Warning: Currently doesn't support HTML5 tags
  #
  # For example (from irb):
  #
  #  irb(main):001:0> require 'html_kit'
  #  => true
  #  irb(main):002:0> doc = HtmlKit::Document.new('http://www.nokogiri.org/index.html')
  #  => #<HtmlKit::Document:0x007fbb5408cfe8 @url="http://www.nokogiri.org/index.html">
  #  irb(main):003:0> doc.valid?
  #  => false
  ##
  class Document
    def initialize(url)
      @url = url.strip

      raise HtmlKit::Errors::InvalidUrlError unless supported_scheme?
    end

    def valid?
      errors.empty?
    end

    def errors
      document.errors
    end

    def html5?
      document.internal_subset.html5_dtd?
    end

    private

    def supported_scheme?
      !(@url =~ /^https:|^http:/).nil?
    end

    def document
      Nokogiri::HTML(html_content)
    end

    def html_content
      Net::HTTP.get(URI.parse(@url))
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
html_kit-0.1.1 lib/html_kit/document.rb