Sha256: 9b304d58cd68b9a6f36ef1b413a1732ba7629647b00cdf351278f07ac990ce20

Contents?: true

Size: 1.04 KB

Versions: 5

Compression:

Stored size: 1.04 KB

Contents

require 'article'
require 'contracts'
require 'nokogiri'
require 'rss'
require 'tracker'

# Class for handling RSS feed to grab posts
class Scraper
  include Contracts::Core

  # Array of Hashes
  attr_reader :articles

  # URL to pull the initial feed
  AWL_RSS_URL = 'http://feeds2.feedburner.com/TheAwl'
  # Shortcut for contracts
  C = Contracts

  Contract C::None => C::ArrayOf[Article]
  # Retrieve a list of posts and return array of short links
  def retrieve_posts
    # Get posts
    rss = RSS::Parser.parse(AWL_RSS_URL)

    # Grab shortened URLs
    links = rss.items.map(&:guid).map(&:content)

    @articles = []

    links.each do |link|
      @articles << Article.new(link)
    end

    # TODO: Only grab the tags for articles that haven't already be tweeted
    @articles.map(&:retrieve_tags)
  end

  Contract C::None => C::ArrayOf[Article]
  # Subtrack saved artciles from the list of articles
  def subtract_cache
    tracker = Tracker.new
    tracker.read_articles
    @articles.delete_if { |x| tracker.articles.include?(x.link) }
  end
end

Version data entries

5 entries across 5 versions & 1 rubygems

Version Path
awl_tags_twitter-1.0.2 lib/scraper.rb
awl_tags_twitter-1.0.1 lib/scraper.rb
awl_tags_twitter-1.0.0 lib/scraper.rb
awl_tags_twitter-0.0.4 lib/scraper.rb
awl_tags_twitter-0.0.3 lib/scraper.rb