require 'curb-fu' require 'hpricot' module Govfeed # A helper function to truncate down to the specified number of words, default is 30 def self.truncate_words(text, length = 30, end_string = ' ...') if text == nil return end words = text.split() words[0..(length-1)].join(' ') + (words.length > length ? end_string : '') end # returns a list of government feeds def self.getFeedList { :us_federal => "http://www.usa.gov/rss/updates.xml", :us_al => "http://media.alabama.gov/rss/rss.aspx", :us_az => "http://www.azdps.gov/RSS/News/", :us_ca => "http://news.ca.gov/news/feed", :us_de => "http://portal.delaware.gov/delaware-rss.xml", :us_ga => "http://georgia.gov/rss/ga-agency-news.rss", :us_hi => "http://www.oha.org/index2.php?option=ds-syndicate&version=1&feed_id=1", :us_il => "http://www.illinois.gov/PressReleases/RSS/Main_State_Page.xml", :us_in => "http://www.in.gov/portal/news_events/39832.xml", :us_ks => "http://www.kansas.gov/feed/", :us_ky => "http://migration.kentucky.gov/g2p/KII.G2P.Portal.CMS.Templates/G2PPortalRssPage.aspx?application=NEWSROOM", :us_la => "http://wwwprd.doa.louisiana.gov/LaNews/PublicPages/RSSFeed.xml", :us_me => "http://www.maine.gov/tools/whatsnew/rss.php?tid=27", :us_md => "http://choosemaryland.org/RSS/DBEDNewsFeed.aspx", :us_ma => "http://innovation.blog.state.ma.us/blog/atom.xml", :us_mi => "http://www.michigan.gov/rss/0,2348,7-124-53818--62431-,00.xml", :us_mo => "http://www.mo.gov/news/?xml=all", :us_nj => "http://www.state.nj.us/nj/home/features/news/approved/rss.xml", :us_nm => "http://newmexico.sks.com/government/CalendarRssService.ashx?Id=4ae1958b9f094127bcb6e6580e29cf85&type=c&uri=%2fgovernment%2fopen_meetings.aspx", :us_ny => "http://www.nysenate.gov/rss", :us_nc => "http://www.dornc.com/rss/headlines.xml", :us_nd => "http://www.commerce.nd.gov/news/newsFeed.asp", :us_oh => "http://development.ohio.gov/rss/Feeds/BusinessInvestmentsIncentives.xml", :us_ok => "feed://www.ok.gov/genthree/rss.php?agency_id=0", :us_pa => "http://www.state.pa.us/portal/server.pt/gateway/PTARGS_0_2_134550_3013_803012_43/SearchXml/SnapshotQueryRss.axd?pubdate=104&id=2067", :us_ri => "http://rigov.tumblr.com/rss", :us_sd => "http://www.sd.gov/rss/", :us_tn => "http://news.tn.gov/rss.xml", :us_tx => "http://txapps.texas.gov/portal/tol/en/rss", :us_va => "http://www.governor.virginia.gov/News/rss/index.cfm", :us_vt => "http://www.vermont.gov/portal/rss/feeds/news.php", :us_wa => "http://access.wa.gov/news/thismonth.xml", :us_wv => "http://www.wv.gov/_layouts/feed.aspx?xsl=1&web=%2Fnews&page=fa057236-2db7-4147-b6d0-d95f3025f5a2&wp=38869cd8-fc30-4334-b410-c84c890ab9a5", :us_wi => "http://www.dhs.wisconsin.gov/news/pressreleases/rss.xml", :cato => "http://feeds.cato.org/CatoDispatch.xml", :heritage => "http://blog.heritage.org/feed/", :mises => "http://feeds.mises.org/MisesDailyArticles?format=xml", :ca_federal => "http://news.gc.ca/web/rss-eng.do", :ca_ab => "http://www.gov.ab.ca/acn/RSS_FEEDS/RSS_ALL_News.xml", :ca_bc => "http://www2.news.gov.bc.ca/nrm_rss_news/govwide.xml", :ca_mn => "http://news.gov.mb.ca/news/index.rss", :ca_nb => "http://www2.gnb.ca/content/gnb/en/news/local_government/_jcr_content/mainContent_par/newslist.rss1.html", :ca_ns => "http://www.gov.ns.ca/news/rss/rss.asp", :ca_on => "http://news.ontario.ca/newsroom/en/rss/allnews.rss", :ca_pe => "http://www.gov.pe.ca/index.php3?number=rss", :ca_sk => "http://www.gov.sk.ca/Common/PageTemplates/rss.aspx" } end def self.getFeed(feed, numberOfStories = 0) # This hash contains the Feed URLs # The format goes Federal feeds, State\Province feeds, Think Tank\Institution feeds # Some states\provinces\territories do not yet have RSS feeds or feeds that work (Idaho). # They will be added to this hash when they are created or fixed. feed_url = self.getFeedList # Curb-Fu will be fetching the feed. feed = CurbFu.get(feed_url[feed]) # the hpricot gem will be used to parse it as we build the HTML doc = Hpricot(feed.body.to_s) # RSS feed heading rss = "