Sha256: 1032c53d1a49599ec5ac5b0abefe76c6d3b50126356d02c997bd491c7124c713
Contents?: true
Size: 1003 Bytes
Versions: 3
Compression:
Stored size: 1003 Bytes
Contents
class TaiwaneseNewsParser::Parser::Tvbs < TaiwaneseNewsParser::Parser def self.domain 'tvbs.com.tw' end def self.names ['TVBS'] end def self.applicable?(url) url.match(%r{tvbs\.com\.tw/entry}) end def doc @raw = open(url).read @doc = Nokogiri::HTML(@raw) end #url = 'http://news.tvbs.com.tw/entry/519673' def parse @article[:title] = doc.at_css('article h1').text @article[:company_name] = parse_company_name @article[:content] = doc.css('article .content').text time = doc.at_css('article .meta-data .dateline').text[%r{時間:\d{4}/\d{1,2}/\d{1,2} \d{2}:\d{2}}] @article[:published_at] = Time.parse("#{time}:00") @article[:reporter_name] = parse_reporter_name() clean_up @article end def parse_reporter_name doc.at_css('article .meta-data .reporter').text[%r{記者:(.+)},1] end def parse_company_name self.class.names.first end def self.parse_url_id(url) url[%r{/entry/(\d+)},1] end end
Version data entries
3 entries across 3 versions & 1 rubygems