require 'curb-fu'
require 'hpricot'

module Govfeed

  # A helper function to truncate down to the specified number of words, default is 30
  def self.truncate_words(text, length = 30, end_string = ' ...')
    if text == nil
      return
    end
    words = text.split()
    words[0..(length-1)].join(' ') + (words.length > length ? end_string : '')
  end

  # returns a list of government feeds
  def self.getFeedList
    {
      :us_federal => "http://www.usa.gov/rss/updates.xml",
      :us_al => "http://media.alabama.gov/rss/rss.aspx",
      :us_az => "http://www.azdps.gov/RSS/News/",
      :us_ca => "http://news.ca.gov/news/feed",
      :us_de => "http://portal.delaware.gov/delaware-rss.xml",
      :us_ga => "http://georgia.gov/rss/ga-agency-news.rss",
      :us_hi => "http://www.oha.org/index2.php?option=ds-syndicate&version=1&feed_id=1",
      :us_il => "http://www.illinois.gov/PressReleases/RSS/Main_State_Page.xml",
      :us_in => "http://www.in.gov/portal/news_events/39832.xml",
      :us_ks => "http://www.kansas.gov/feed/",
      :us_ky => "http://migration.kentucky.gov/g2p/KII.G2P.Portal.CMS.Templates/G2PPortalRssPage.aspx?application=NEWSROOM",
      :us_la => "http://wwwprd.doa.louisiana.gov/LaNews/PublicPages/RSSFeed.xml",
      :us_me => "http://www.maine.gov/tools/whatsnew/rss.php?tid=27",
      :us_md => "http://choosemaryland.org/RSS/DBEDNewsFeed.aspx",
      :us_ma => "http://innovation.blog.state.ma.us/blog/atom.xml",
      :us_mi => "http://www.michigan.gov/rss/0,2348,7-124-53818--62431-,00.xml",
      :us_mo => "http://www.mo.gov/news/?xml=all",
      :us_nj => "http://www.state.nj.us/nj/home/features/news/approved/rss.xml",
      :us_nm => "http://newmexico.sks.com/government/CalendarRssService.ashx?Id=4ae1958b9f094127bcb6e6580e29cf85&type=c&uri=%2fgovernment%2fopen_meetings.aspx",
      :us_ny => "http://www.nysenate.gov/rss",
      :us_nc => "http://www.dornc.com/rss/headlines.xml",
      :us_nd => "http://www.commerce.nd.gov/news/newsFeed.asp",
      :us_oh => "http://development.ohio.gov/rss/Feeds/BusinessInvestmentsIncentives.xml",
      :us_ok => "feed://www.ok.gov/genthree/rss.php?agency_id=0",
      :us_pa => "http://www.state.pa.us/portal/server.pt/gateway/PTARGS_0_2_134550_3013_803012_43/SearchXml/SnapshotQueryRss.axd?pubdate=104&id=2067",
      :us_ri => "http://rigov.tumblr.com/rss",
      :us_sd => "http://www.sd.gov/rss/",
      :us_tn => "http://news.tn.gov/rss.xml",
      :us_tx => "http://txapps.texas.gov/portal/tol/en/rss",
      :us_va => "http://www.governor.virginia.gov/News/rss/index.cfm",
      :us_vt => "http://www.vermont.gov/portal/rss/feeds/news.php",
      :us_wa => "http://access.wa.gov/news/thismonth.xml",
      :us_wv => "http://www.wv.gov/_layouts/feed.aspx?xsl=1&web=%2Fnews&page=fa057236-2db7-4147-b6d0-d95f3025f5a2&wp=38869cd8-fc30-4334-b410-c84c890ab9a5",
      :us_wi => "http://www.dhs.wisconsin.gov/news/pressreleases/rss.xml",
      :cato => "http://feeds.cato.org/CatoDispatch.xml",
      :heritage => "http://blog.heritage.org/feed/",
      :mises => "http://feeds.mises.org/MisesDailyArticles?format=xml",
      :ca_federal => "http://news.gc.ca/web/rss-eng.do",
      :ca_ab => "http://www.gov.ab.ca/acn/RSS_FEEDS/RSS_ALL_News.xml",
      :ca_bc => "http://www2.news.gov.bc.ca/nrm_rss_news/govwide.xml",
      :ca_mn => "http://news.gov.mb.ca/news/index.rss",
      :ca_nb => "http://www2.gnb.ca/content/gnb/en/news/local_government/_jcr_content/mainContent_par/newslist.rss1.html",
      :ca_ns => "http://www.gov.ns.ca/news/rss/rss.asp",
      :ca_on => "http://news.ontario.ca/newsroom/en/rss/allnews.rss",
      :ca_pe => "http://www.gov.pe.ca/index.php3?number=rss",
      :ca_sk => "http://www.gov.sk.ca/Common/PageTemplates/rss.aspx"
    }
  end


  def self.getFeed(feed, numberOfStories = 0)

    # This hash contains the Feed URLs
    # The format goes Federal feeds, State\Province feeds, Think Tank\Institution feeds
    # Some states\provinces\territories do not yet have RSS feeds or feeds that work (Idaho).
    # They will be added to this hash when they are created or fixed.
    feed_url = self.getFeedList

    # Curb-Fu will be fetching the feed.
    feed = CurbFu.get(feed_url[feed])
    
    # the hpricot gem will be used to parse it as we build the HTML
    doc = Hpricot(feed.body.to_s)

    # RSS feed heading
    rss = "<h1 id=\"govfeedTitle\">" + (doc/"title")[0].inner_text  + "</h1>\n"

    # Make sure that the feed description exists before attempting to parse it
    if (((doc/"description")[0] != nil) || ((doc/"description")[0] != ""))
      rss += "<div class=\"govfeedDiv\">" + (doc/"description")[0].inner_text + "</div>\n"
    end    

    # Make sure that the feed image exists before attempting to parse it
    if (doc/"image")[0] != nil
      rss += "<div class=\"govfeedImage\"><img src=\"" + (doc/"image"/"url")[0].inner_text + "\" alt=\"\" /></div>\n"
    end    

    # individual RSS feed items
    # If the number of stories parameter was not passed in, fetch all
    if numberOfStories == 0
      (doc/"item").each do |item|
        rss += "<a href=\"" + (item/"link").inner_text + "\"><h2 class=\"govfeedHeading\">" + (item/"title").inner_text + "</h2></a>\n"
        rss += "<div class=\"govfeedDiv\">" + (item/"pubDate").inner_text + "<br />" + self.truncate_words((item/"description").inner_text, 40) + "</div>\n"
      end
    else
      counter = 0
      (doc/"item").each do |item|
        rss += item.inner_text
        rss += "<a href=\"" + (item/"link").inner_text + "\"><h2 class=\"govfeedHeading\">" + (item/"title").inner_text + "</h2></a>\n"        
        rss += "<div class=\"govfeedDiv\">" + (item/"pubDate").inner_text + "<br />" + self.truncate_words((item/"description").inner_text, 40) + "</div>\n"
        counter = counter + 1
        if counter == numberOfStories
          break
        end
      end
    end

    rss

  end

end