example.rb in docparser-0.0.1 vs example.rb in docparser-0.1.0
- old
+ new
@@ -1,22 +1,24 @@
#
-
-# An example of parsing a popular dutch website..
+# An example of parsing hackaday.com
# (C) 2013 Jurriaan Pruis
#
+$LOAD_PATH.unshift __dir__
+require File.expand_path('lib/docparser.rb', __dir__)
+require 'tmpdir'
-require 'docparser'
include DocParser
-output = HTMLOutput.new filename: 'hackaday.html'
+output = MultiOutput.new(filename: 'hackaday')
output.header = 'Title', 'Author', 'Publication date', 'URL', 'Summary'
-parser = Parser.new(files: (1..20).map {|i| "http://hackaday.com/page/#{i}/"}, parallel: false, output: output)
+files = Dir[File.join(__dir__, 'test/support/hackaday/*.html')]
+parser = Parser.new(files: files, parallel: false, output: output)
parser.parse! do
css('#content .post') do |post|
title_el = post.search('.entry-title a').first
title = title_el.content
- author =post.search('.post-info .author .fn a').first.content
+ author = post.search('.post-info .author .fn a').first.content
published_time = post.search('.post-info .date.published').first.content
- url = title_el.attributes['href']
+ url = title_el.attributes['href'].value
summary = post.search('.entry-content').first.content.strip
add_row title, author, published_time, url, summary
end
end
\ No newline at end of file