Sha256: 3efdeb23231be6e1d3f1ea22658e2956348f54ab474644d2837f6a0a65640747

Contents?: true

Size: 1.98 KB

Versions: 2

Compression:

Stored size: 1.98 KB

Contents

require 'feedparser'
require 'feedparser/html2text-parser'
require 'feedparser/filesizes'

class String
  # Convert an HTML text to plain text
  def html2text
    text = self.clone
    # parse HTML
    p = FeedParser::HTML2TextParser::new(true)
    p.feed(text)
    p.close
    text = p.savedata
    # remove leading and trailing whilespace
    text.gsub!(/\A\s*/m, '')
    text.gsub!(/\s*\Z/m, '')
    # remove whitespace around \n
    text.gsub!(/ *\n/m, "\n")
    text.gsub!(/\n */m, "\n")
    # and duplicates \n
    text.gsub!(/\n\n+/m, "\n\n")
    # and remove duplicated whitespace
    text.gsub!(/[ \t]+/, ' ')
    text
  end
end

module FeedParser
  class Feed
    def to_text(localtime = true)
      s = ''
      s += "Type: #{@type}\n"
      s += "Encoding: #{@encoding}\n"
      s += "Title: #{@title}\n"
      s += "Link: #{@link}\n"
      if @description
        s += "Description: #{@description.html2text}\n"
      else
        s += "Description:\n"
      end
      s += "Creator: #{@creator}\n"
      s += "\n"
      @items.each do |i|
        s += '*' * 40 + "\n"
        s += i.to_text(localtime)
      end
      s
    end
  end

  class FeedItem
    def to_text(localtime = true)
      s = ""
      s += "Feed: "
      s += @feed.title + ' ' if @feed.title
      s += "<#{@feed.link}>" if @feed.link
      s += "\n"
      s += "Item: "
      s += @title + ' ' if @title
      s += "<#{@link}>" if @link
      s += "\n"
      if @date
        if localtime
          s += "\nDate: #{@date.to_s}"
        else
          s += "\nDate: #{@date.getutc.to_s}"
        end
      end
      s += "\nAuthor: #{@creator}" if @creator
      s += "\nSubject: #{@subject}" if @subject
      s += "\nCategory: #{@category}" if @category
      s += "\n\n"
      s += "#{@content.html2text}\n" if @content
      if @enclosures and @enclosures.length > 0
        s += "Files:\n"
        @enclosures.each do |e|
          s += " #{e[0]} (#{e[1].to_i.to_human_readable}, #{e[2]})\n"
        end
      end
      s 
    end
  end
end

Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
whistle-0.1 vendor/ruby-feedparser-0.5-stripped/lib/feedparser/text-output.rb
whistle-0.1.1 vendor/ruby-feedparser-0.5-stripped/lib/feedparser/text-output.rb