Sha256: f59b4542da46548e430628d171ebc183e5f685dddefc655e43b3bd52f0114466
Contents?: true
Size: 866 Bytes
Versions: 4
Compression:
Stored size: 866 Bytes
Contents
# # An example of parsing hackaday.com # (C) 2013 Jurriaan Pruis # $LOAD_PATH.unshift __dir__ require File.expand_path('lib/docparser.rb', __dir__) include DocParser output = MultiOutput.new(filename: 'hackaday') output.header = 'Title', 'Author', 'Publication date', 'URL', 'Summary' files = Dir[File.join(__dir__, 'test/support/hackaday/*.html')] parser = Parser.new(files: files, parallel: false, output: output) parser.parse! do css('#content .post') do |post| title_el = post.search('.entry-title a').first title = title_el.content author = post.search('.post-info .author .fn a').first.content published_time = post.search('.post-info .date.published').first.content url = title_el.attributes['href'].value summary = post.search('.entry-content').first.content.strip add_row title, author, published_time, url, summary end end
Version data entries
4 entries across 4 versions & 1 rubygems
Version | Path |
---|---|
docparser-0.2.3 | example.rb |
docparser-0.2.2 | example.rb |
docparser-0.2.0 | example.rb |
docparser-0.1.6 | example.rb |