# encoding: UTF-8
module Publishr
class HtmlProcessor
def initialize(markup='',inpath='',metadata={},rails_resources_url='')
@markup = markup
@lines = markup.split("\n")
@line = ''
@inpath = inpath
@metadata = metadata
@rails_resources_url = rails_resources_url
@custom_fixes = File.open(File.join(@inpath,'html_postprocessing.rb'), 'r').read if File.exists?(File.join(@inpath,'html_postprocessing.rb'))
@depth = 0
@quotetype = nil
@add_footnote = false
@process_footnotes = false
@footnote_number = 0
@footnote_reference = ''
end
def self.sanitize(html)
modified_lines = []
quote_enabled = false
html.split("\n").each do |line|
line.gsub!('
', '
') if quote_enabled == false and (line.include?('MsoQuote') or line.include?('BodyTextQuote') or line.include?('BodyTextTranscript') or line.include?('BodyTextEmail')) modified_lines << "
\n" quote_enabled = true end if quote_enabled == true and (line.include?('MsoBodyText') or line.include?('BodyTextMod')) modified_lines << "\n" quote_enabled = false end modified_lines << line end modified_lines = modified_lines.join("\n") sanitized_html = Sanitize.clean(modified_lines, :elements => ['b','i','em','strong','code','br','var','p','blockquote','img'], :attributes => { 'img' => ['src', 'alt'] }) sanitized_html.gsub! /\[HK.*?\]/, '' return sanitized_html end def degrade processed_lines = [] @lines.each do |l| @line = l process_line processed_lines << @line end processed_lines.join("\n") end def process_line @process_footnotes = true if @line.include?('
/){ "
" } @footnote_reference = /
[#{ @footnote_number }]: " @add_footnote = false end def make_footnote_paragraph @line.gsub! /
' @line.gsub! '=======', '
' @line.gsub! /»»».*$/, '
' end def add_image_captions # @line = "\3
\3
<\/p>/, '
\2