require 'feedparser/html2text-parser'
require 'feedparser/filesizes'
class String
# Convert an HTML text to plain text
def html2text(wrapto = false)
text = self.clone
# parse HTML
p = FeedParser::HTML2TextParser::new(true)
p.feed(text)
p.close
text = p.savedata
# remove leading and trailing whilespace
text.gsub!(/\A\s*/m, '')
text.gsub!(/\s*\Z/m, '')
# remove whitespace around \n
text.gsub!(/ *\n/m, "\n")
text.gsub!(/\n */m, "\n")
# and duplicates \n
text.gsub!(/\n\n+/m, "\n\n")
# and remove duplicated whitespace
text.gsub!(/[ \t]+/, ' ')
# finally, wrap the text if requested
return wrap_text(text, wrapto) if wrapto
text
end
def wrap_text(text, wrapto = 72)
text.gsub(/(.{1,#{wrapto}})( +|$)\n?/, "\\1\\2\n")
end
end
module FeedParser
class Feed
def to_text(localtime = true, wrapto = false)
s = ''
s += "Type: #{@type}\n"
s += "Encoding: #{@encoding}\n"
s += "Title: #{@title}\n"
s += "Link: #{@link}\n"
if @description
s += "Description: #{@description.html2text}\n"
else
s += "Description:\n"
end
s += "Creator: #{@creator}\n"
s += "\n"
@items.each do |i|
s += '*' * 40 + "\n"
s += i.to_text(localtime, wrapto)
end
s
end
end
class FeedItem
def to_text(localtime = true, wrapto = false, header = true)
s = ""
if header
s += "Item: "
s += @title if @title
s += "\n<#{link}>" if link
if @date
if localtime
s += "\nDate: #{@date.to_s}"
else
s += "\nDate: #{@date.getutc.to_s}"
end
end
s += "\n"
else
s += "<#{link}>\n\n" if link
end
s += "#{@content.html2text(wrapto).chomp}\n" if @content
if @enclosures and @enclosures.length > 0
s += "\nFiles:"
@enclosures.each do |e|
s += "\n #{e[0]} (#{e[1].to_i.to_human_readable}, #{e[2]})"
end
end
if not header
s += "-- "
end
s += "\nFeed: "
s += @feed.title if @feed.title
s += "\n<#{@feed.link}>" if @feed.link
if not header
s += "\nItem: "
s += @title if @title
s += "\n<#{link}>" if link
if @date
if localtime
s += "\nDate: #{@date.to_s}"
else
s += "\nDate: #{@date.getutc.to_s}"
end
end
end
s += "\nAuthor: #{creator}" if creator
s += "\nSubject: #{@subject}" if @subject
s += "\nFiled under: #{@categories.join(', ')}" unless @categories.empty?
s += "\n" # final newline, for compat with history
s
end
end
end