# -*- coding: utf-8 -*- require 'rss' require 'rubygems' require 'hpricot' # 2009/12/26 現在 GAE では nokogiri が使えないので hpricot require 'dm-core' require 'dm-types' require 'dm-validations' module Consadole module NikkanSports class Entry module Parser def self.included mod mod.extend ClassMethods end module ClassMethods def parse uri, doc target = Hpricot(doc) entry = self.new entry.uri = uri entry.title = target.at('h1').inner_text entry.text = target.search('div#news p')[0...-1].to_html.gsub(/【.+】<\/p>$/,'

') entry.auther = target.search('div#news p')[0...-1].to_html.match(/【(.+)】<\/p>$/).to_a[1] entry.post_date = Time.strptime(target.search('p.timeStamp').inner_text.match(/\d+年\d+月\d+日\d+時\d+分/).to_s, '%Y年%m月%d日%H時%M分') if tmp = target.at('dl.photo img') source = IMAGE_BASE_URI + tmp['src'].sub(/\.jpg$/, '-big.jpg') entry.image_src = source entry.image_binary = Net::HTTP.get(source) entry.image_alt = tmp['alt'] end entry end end end IMAGE_BASE_URI = URI.parse('http://cache2.nipc.jp/soccer/news/') include Parser include DataMapper::Resource property :id, Serial property :uri, URI, :unique => true property :title, String property :text, Text property :auther, String property :post_date, Time property :image_src, URI property :image_alt, String property :image_binary, Object end FEED_URI = URI.parse('http://www.nikkansports.com/rss/soccer/jleague/consadole.rdf') def NikkanSports.uris rss = RSS::Parser.parse(FEED_URI, false) rss.items.map(&:link) end end end