Sha256: f59b4542da46548e430628d171ebc183e5f685dddefc655e43b3bd52f0114466

Contents?: true

Size: 866 Bytes

Versions: 4

Compression:

Stored size: 866 Bytes

Contents

#
# An example of parsing hackaday.com
# (C) 2013 Jurriaan Pruis
#
$LOAD_PATH.unshift __dir__
require File.expand_path('lib/docparser.rb', __dir__)

include DocParser
output = MultiOutput.new(filename: 'hackaday')
output.header = 'Title', 'Author', 'Publication date', 'URL', 'Summary'
files = Dir[File.join(__dir__, 'test/support/hackaday/*.html')]
parser = Parser.new(files: files, parallel: false, output: output)
parser.parse! do
  css('#content .post') do |post|
    title_el = post.search('.entry-title a').first
    title = title_el.content
    author = post.search('.post-info .author .fn a').first.content
    published_time = post.search('.post-info .date.published').first.content
    url = title_el.attributes['href'].value
    summary = post.search('.entry-content').first.content.strip
    add_row title, author, published_time, url, summary
  end
end

Version data entries

4 entries across 4 versions & 1 rubygems

Version Path
docparser-0.2.3 example.rb
docparser-0.2.2 example.rb
docparser-0.2.0 example.rb
docparser-0.1.6 example.rb