require "sablon/html/ast"
require "sablon/html/visitor"
module Sablon
class HTMLConverter
class ASTBuilder
Layer = Struct.new(:items, :ilvl)
def initialize(nodes)
@layers = [Layer.new(nodes, false)]
@root = Root.new([])
end
def to_ast
@root
end
def new_layer(ilvl: false)
@layers.push Layer.new([], ilvl)
end
def next
current_layer.items.shift
end
def push(node)
@layers.last.items.push node
end
def push_all(nodes)
nodes.each(&method(:push))
end
def done?
!current_layer.items.any?
end
def nested?
ilvl > 0
end
def ilvl
@layers.select { |layer| layer.ilvl }.size - 1
end
def emit(node)
@root.nodes << node
end
private
def current_layer
if @layers.any?
last_layer = @layers.last
if last_layer.items.any?
last_layer
else
@layers.pop
current_layer
end
else
Layer.new([], false)
end
end
end
def process(input)
processed_ast(input).to_docx
end
def processed_ast(input)
ast = build_ast(input)
ast
end
def build_ast(input)
doc = Nokogiri::HTML.fragment(input)
@builder = ASTBuilder.new(doc.children)
while !@builder.done?
ast_next_paragraph
end
@builder.to_ast
end
private
def ast_next_paragraph
node = @builder.next
if node.name == 'div' || node.name == 'p'
@builder.new_layer
@builder.emit Paragraph.new('Paragraph', text(node.children))
elsif node.name == 'ul'
@builder.new_layer ilvl: true
unless @builder.nested?
@definition = Sablon::Numbering.instance.register('ListBullet')
end
@builder.push_all(node.children)
elsif node.name == 'ol'
@builder.new_layer ilvl: true
unless @builder.nested?
@definition = Sablon::Numbering.instance.register('ListNumber')
end
@builder.push_all(node.children)
elsif node.name == 'li'
@builder.new_layer
@builder.emit ListParagraph.new(@definition.style, text(node.children), @definition.numid, @builder.ilvl)
elsif node.text?
# SKIP?
else
raise ArgumentError, "Don't know how to handle node: #{node.inspect}"
end
end
def text(nodes)
runs = nodes.map do |node|
if node.text?
Text.new(node.text)
elsif node.name == 'br'
Newline.new
elsif node.name == 'strong'
Bold.new(node.text)
elsif node.name == 'em'
Italic.new(node.text)
elsif ['ul', 'ol', 'p', 'div'].include?(node.name)
@builder.push(node)
nil
else
raise ArgumentError, "Don't know how to handle node: #{node.inspect}"
end
end
Collection.new(runs.compact)
end
end
end