require 'cgi'
require 'hpricot'
require 'html2haml/html/erb'
# Haml monkeypatches various Hpricot classes
# to add methods for conversion to Haml.
# @private
module Hpricot
# @see Hpricot
module Node
# Whether this node has already been converted to Haml.
# Only used for text nodes and elements.
#
# @return [Boolean]
attr_accessor :converted_to_haml
# Returns the Haml representation of the given node.
#
# @param tabs [Fixnum] The indentation level of the resulting Haml.
# @option options (see Haml::HTML#initialize)
def to_haml(tabs, options)
return "" if converted_to_haml || to_s.strip.empty?
text = uninterp(self.to_s)
node = next_node
while node.is_a?(::Hpricot::Elem) && node.name == "haml:loud"
node.converted_to_haml = true
text << '#{' <<
CGI.unescapeHTML(node.inner_text).gsub(/\n\s*/, ' ').strip << '}'
if node.next_node.is_a?(::Hpricot::Text)
node = node.next_node
text << uninterp(node.to_s)
node.converted_to_haml = true
end
node = node.next_node
end
return parse_text_with_interpolation(text, tabs)
end
private
def erb_to_interpolation(text, options)
return text unless options[:erb]
text = CGI.escapeHTML(uninterp(text))
%w[ ].each {|str| text.gsub!(CGI.escapeHTML(str), str)}
::Hpricot::XML(text).children.inject("") do |str, elem|
if elem.is_a?(::Hpricot::Text)
str + CGI.unescapeHTML(elem.to_s)
else # element
str + '#{' + CGI.unescapeHTML(elem.innerText.strip) + '}'
end
end
end
def tabulate(tabs)
' ' * tabs
end
def uninterp(text)
text.gsub('#{', '\#{') #'
end
def attr_hash
attributes.to_hash
end
def parse_text(text, tabs)
parse_text_with_interpolation(uninterp(text), tabs)
end
def parse_text_with_interpolation(text, tabs)
text.strip!
return "" if text.empty?
text.split("\n").map do |line|
line.strip!
"#{tabulate(tabs)}#{'\\' if Haml::Parser::SPECIAL_CHARACTERS.include?(line[0])}#{line}\n"
end.join
end
end
end
# @private
HAML_TAGS = %w[haml:block haml:loud haml:silent]
HAML_TAGS.each do |t|
Hpricot::ElementContent[t] = {}
Hpricot::ElementContent.keys.each do |key|
Hpricot::ElementContent[t][key.hash] = true
end
end
Hpricot::ElementContent.keys.each do |k|
HAML_TAGS.each do |el|
val = Hpricot::ElementContent[k]
val[el.hash] = true if val.is_a?(Hash)
end
end
module Haml
# Converts HTML documents into Haml templates.
# Depends on [Hpricot](http://github.com/whymirror/hpricot) for HTML parsing.
# If ERB conversion is being used, also depends on
# [Erubis](http://www.kuwata-lab.com/erubis) to parse the ERB
# and [ruby_parser](http://parsetree.rubyforge.org/) to parse the Ruby code.
#
# Example usage:
#
# Haml::HTML.new("Blat").render
# #=> "%a{:href => 'http://google.com'} Blat"
class HTML
# @param template [String, Hpricot::Node] The HTML template to convert
# @option options :erb [Boolean] (false) Whether or not to parse
# ERB's `<%= %>` and `<% %>` into Haml's `=` and `-`
# @option options :xhtml [Boolean] (false) Whether or not to parse
# the HTML strictly as XHTML
def initialize(template, options = {})
@options = options
if template.is_a? Hpricot::Node
@template = template
else
if template.is_a? IO
template = template.read
end
template = Haml::Util.check_encoding(template) {|msg, line| raise Haml::Error.new(msg, line)}
if @options[:erb]
require 'html2haml/html/erb'
template = ERB.compile(template)
end
method = @options[:xhtml] ? Hpricot.method(:XML) : method(:Hpricot)
@template = method.call(template.gsub('&', '&'))
end
end
# Processes the document and returns the result as a string
# containing the Haml template.
def render
@template.to_haml(0, @options)
end
alias_method :to_haml, :render
TEXT_REGEXP = /^(\s*).*$/
# @see Hpricot
# @private
class ::Hpricot::Doc
# @see Haml::HTML::Node#to_haml
def to_haml(tabs, options)
(children || []).inject('') {|s, c| s << c.to_haml(0, options)}
end
end
# @see Hpricot
# @private
class ::Hpricot::XMLDecl
# @see Haml::HTML::Node#to_haml
def to_haml(tabs, options)
"#{tabulate(tabs)}!!! XML\n"
end
end
# @see Hpricot
# @private
class ::Hpricot::CData
# @see Haml::HTML::Node#to_haml
def to_haml(tabs, options)
content = parse_text_with_interpolation(
erb_to_interpolation(self.content, options), tabs + 1)
"#{tabulate(tabs)}:cdata\n#{content}"
end
end
# @see Hpricot
# @private
class ::Hpricot::DocType
# @see Haml::HTML::Node#to_haml
def to_haml(tabs, options)
attrs = public_id.nil? ? ["", "", ""] :
public_id.scan(/DTD\s+([^\s]+)\s*([^\s]*)\s*([^\s]*)\s*\/\//)[0]
raise Haml::SyntaxError.new("Invalid doctype") if attrs == nil
type, version, strictness = attrs.map { |a| a.downcase }
if type == "html"
version = ""
strictness = "strict" if strictness == ""
end
if version == "1.0" || version.empty?
version = nil
end
if strictness == 'transitional' || strictness.empty?
strictness = nil
end
version = " #{version.capitalize}" if version
strictness = " #{strictness.capitalize}" if strictness
"#{tabulate(tabs)}!!!#{version}#{strictness}\n"
end
end
# @see Hpricot
# @private
class ::Hpricot::Comment
# @see Haml::HTML::Node#to_haml
def to_haml(tabs, options)
content = self.content
if content =~ /\A(\[[^\]]+\])>(.*) 1 # Multiline script block
# Normalize the indentation so that the last line is the base
indent_str = lines.last[/^[ \t]*/]
indent_re = /^[ \t]{0,#{indent_str.count(" ") + 8 * indent_str.count("\t")}}/
lines.map! {|s| s.gsub!(indent_re, '')}
# Add an extra " " to make it indented relative to "= "
lines[1..-1].each {|s| s.gsub!(/^/, " ")}
# Add | at the end, properly aligned
length = lines.map {|s| s.size}.max + 1
lines.map! {|s| "%#{-length}s|" % s}
if next_sibling && next_sibling.is_a?(Hpricot::Elem) && next_sibling.name == "haml:loud" &&
next_sibling.inner_text.split("\n").reject {|s| s.strip.empty?}.size > 1
lines << "-#"
end
end
return lines.map {|s| output + s + "\n"}.join
when "silent"
return CGI.unescapeHTML(inner_text).split("\n").map do |line|
next "" if line.strip.empty?
"#{output}- #{line.strip}\n"
end.join
when "block"
return render_children("", tabs, options)
end
end
if self.next && self.next.text? && self.next.content =~ /\A[^\s]/
if self.previous.nil? || self.previous.text? &&
(self.previous.content =~ /[^\s]\Z/ ||
self.previous.content =~ /\A\s*\Z/ && self.previous.previous.nil?)
nuke_outer_whitespace = true
else
output << "= succeed #{self.next.content.slice!(/\A[^\s]+/).dump} do\n"
tabs += 1
output << tabulate(tabs)
end
end
output << "%#{name}" unless name == 'div' &&
(static_id?(options) ||
static_classname?(options) &&
attr_hash['class'].split(' ').any?(&method(:haml_css_attr?)))
if attr_hash
if static_id?(options)
output << "##{attr_hash['id']}"
remove_attribute('id')
end
if static_classname?(options)
leftover = attr_hash['class'].split(' ').reject do |c|
next unless haml_css_attr?(c)
output << ".#{c}"
end
remove_attribute('class')
set_attribute('class', leftover.join(' ')) unless leftover.empty?
end
output << haml_attributes(options) if attr_hash.length > 0
end
output << ">" if nuke_outer_whitespace
output << "/" if empty? && !etag
if children && children.size == 1
child = children.first
if child.is_a?(::Hpricot::Text)
if !child.to_s.include?("\n")
text = child.to_haml(tabs + 1, options)
return output + " " + text.lstrip.gsub(/^\\/, '') unless text.chomp.include?("\n") || text.empty?
return output + "\n" + text
elsif ["pre", "textarea"].include?(name) ||
(name == "code" && parent.is_a?(::Hpricot::Elem) && parent.name == "pre")
return output + "\n#{tabulate(tabs + 1)}:preserve\n" +
innerText.gsub(/^/, tabulate(tabs + 2))
end
elsif child.is_a?(::Hpricot::Elem) && child.name == "haml:loud"
return output + child.to_haml(tabs + 1, options).lstrip
end
end
render_children(output + "\n", tabs, options)
end
private
def render_children(so_far, tabs, options)
(self.children || []).inject(so_far) do |output, child|
output + child.to_haml(tabs + 1, options)
end
end
def dynamic_attributes
@dynamic_attributes ||= begin
Hash[attr_hash.map do |name, value|
next if value.empty?
full_match = nil
ruby_value = value.gsub(%r{\s*(.+?)\s*}) do
full_match = $`.empty? && $'.empty?
CGI.unescapeHTML(full_match ? $1: "\#{#{$1}}")
end
next if ruby_value == value
[name, full_match ? ruby_value : %("#{ruby_value}")]
end]
end
end
def to_haml_filter(filter, tabs, options)
content =
if children.first.is_a?(::Hpricot::CData)
children.first.content
else
CGI.unescapeHTML(self.innerText)
end
content = erb_to_interpolation(content, options)
content.gsub!(/\A\s*\n(\s*)/, '\1')
original_indent = content[/\A(\s*)/, 1]
if content.split("\n").all? {|l| l.strip.empty? || l =~ /^#{original_indent}/}
content.gsub!(/^#{original_indent}/, tabulate(tabs + 1))
else
# Indentation is inconsistent. Strip whitespace from start and indent all
# to ensure valid Haml.
content.lstrip!
content.gsub!(/^/, tabulate(tabs + 1))
end
content.rstrip!
content << "\n"
"#{tabulate(tabs)}:#{filter}\n#{content}"
end
def static_attribute?(name, options)
attr_hash[name] && !dynamic_attribute?(name, options)
end
def dynamic_attribute?(name, options)
options[:erb] and dynamic_attributes.key?(name)
end
def static_id?(options)
static_attribute?('id', options) && haml_css_attr?(attr_hash['id'])
end
def static_classname?(options)
static_attribute?('class', options)
end
def haml_css_attr?(attr)
attr =~ /^[-:\w]+$/
end
# Returns a string representation of an attributes hash
# that's prettier than that produced by Hash#inspect
def haml_attributes(options)
attrs = attr_hash.sort.map do |name, value|
haml_attribute_pair(name, value, options)
end
if options[:html_style_attributes]
"(#{attrs.join(' ')})"
else
"{#{attrs.join(', ')}}"
end
end
# Returns the string representation of a single attribute key value pair
def haml_attribute_pair(name, value, options)
value = dynamic_attribute?(name, options) ? dynamic_attributes[name] : value.inspect
if options[:html_style_attributes]
"#{name}=#{value}"
else
name = name.index(/\W/) ? name.inspect : ":#{name}"
"#{name} => #{value}"
end
end
end
end
end