require_relative '../../test_helper' describe DocParser::Document do before do Log4r::Logger['docparser'].level = Log4r::INFO $output = DocParser::NilOutput.new @parser = Class.new do define_method(:outputs) { [$output] } end.new @test_doc_path = File.join($SUPPORT_DIR, 'test_html.html') @test_doc = DocParser::Document.new(filename: @test_doc_path, parser: @parser) end it 'should read HTML contents' do file = File.join($SUPPORT_DIR, 'test_html.html') doc = DocParser::Document.new(filename: file, parser: @parser) doc.doc.must_be_instance_of Nokogiri::HTML::Document doc.html.must_equal(open(file).read) end it 'should read XML contents' do file = File.join($SUPPORT_DIR, 'test_xml.xml') doc = DocParser::Document.new(filename: file, parser: @parser) doc.doc.must_be_instance_of Nokogiri::XML::Document doc.html.must_equal(open(file).read) doc.xpath_content('xmltest > title').must_equal('Test XML') doc.xpath_content('xmltest > test').must_equal('Character Data') end it 'should read remote contents' do url = 'https://gist.github.com/jurriaan/3f2750aa546e3e6719cf/raw' doc = DocParser::Document.new(filename: url, parser: @parser) doc.html.must_equal(open(url).read) end it 'should use the correct encoding' do file = File.join($SUPPORT_DIR, 'test_encoding.html') file2 = File.join($SUPPORT_DIR, 'test_encoding2.html') doc = DocParser::Document.new(filename: file, parser: @parser) doc2 = DocParser::Document.new(filename: file2, parser: @parser, encoding: 'iso-8859-1') doc.html.must_equal(doc2.html) doc.css_content('#encoding').must_equal(doc2.css_content('#encoding')) end it 'should specify filename and encoding in #inspect' do @test_doc.inspect.must_include(@test_doc.filename) @test_doc.inspect.must_include(@test_doc.encoding) end it 'should get the title of a document' do @test_doc.title.must_equal('Test HTML') end it 'should store the path to the document' do @test_doc.filename.must_equal(@test_doc_path) end it 'should be possible to use css queries' do css = 'article > h1 + p' css_content = @test_doc.css_content(css) css_element = @test_doc.css(css) css_content.must_equal('Great article it is') css_content.must_equal(css_element.first.content) end it 'should be possible to use xpath queries' do xpath = '//li/ancestor::article/h1' xpath_content = @test_doc.xpath_content(xpath) xpath_element = @test_doc.xpath(xpath) xpath_content.must_equal('This is an article') xpath_content.must_equal(xpath_element.first.content) end it 'should be possible to use regular expressions' do regex = @test_doc.regexp(/\