Sha256: 09e41f858208c15db521b94d1d91a30460343d491278c0a635ed8334de41c065
Contents?: true
Size: 795 Bytes
Versions: 1
Compression:
Stored size: 795 Bytes
Contents
# # An example of parsing a popular dutch website.. # (C) 2013 Jurriaan Pruis # require 'docparser' include DocParser output = HTMLOutput.new filename: 'hackaday.html' output.header = 'Title', 'Author', 'Publication date', 'URL', 'Summary' parser = Parser.new(files: (1..20).map {|i| "http://hackaday.com/page/#{i}/"}, parallel: false, output: output) parser.parse! do css('#content .post') do |post| title_el = post.search('.entry-title a').first title = title_el.content author =post.search('.post-info .author .fn a').first.content published_time = post.search('.post-info .date.published').first.content url = title_el.attributes['href'] summary = post.search('.entry-content').first.content.strip add_row title, author, published_time, url, summary end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
docparser-0.0.1 | example.rb |