module Panda module CMS class HtmlToEditorJsConverter class ConversionError < StandardError; end def self.convert(html) return {} if html.blank? # If it's already in EditorJS format, return as is return html if html.is_a?(Hash) && (html["blocks"].present? || html[:blocks].present?) begin # Parse the HTML content doc = Nokogiri::HTML.fragment(html.to_s) raise ConversionError, "Failed to parse HTML content" unless doc blocks = [] current_text = "" doc.children.each do |node| case node.name when "h1", "h2", "h3", "h4", "h5", "h6" # Add any accumulated text as a paragraph before the header if current_text.present? blocks << create_paragraph_block(current_text) current_text = "" end blocks << { "type" => "header", "data" => { "text" => node.text.strip, "level" => node.name[1].to_i } } when "p", "div" # Add any accumulated text first if current_text.present? blocks << create_paragraph_block(current_text) current_text = "" end if node.name == "div" # Process div children separately node.children.each do |child| case child.name when "h1", "h2", "h3", "h4", "h5", "h6" blocks << { "type" => "header", "data" => { "text" => child.text.strip, "level" => child.name[1].to_i } } when "p" text = process_inline_elements(child) paragraphs = text.split(/\s*/).map(&:strip) paragraphs.each do |paragraph| blocks << create_paragraph_block(paragraph) if paragraph.present? end when "ul", "ol" items = child.css("li").map { |li| process_inline_elements(li) } next if items.empty? blocks << { "type" => "list", "data" => { "style" => (child.name == "ul") ? "unordered" : "ordered", "items" => items } } when "blockquote" blocks << { "type" => "quote", "data" => { "text" => process_inline_elements(child), "caption" => "", "alignment" => "left" } } when "text" text = child.text.strip current_text += text if text.present? end end else # Handle p with nested content text = process_inline_elements(node) paragraphs = text.split(/\s*/).map(&:strip) paragraphs.each do |paragraph| blocks << create_paragraph_block(paragraph) if paragraph.present? end end when "br" current_text += "\n\n" when "text" text = node.text.strip current_text += text if text.present? when "ul", "ol" # Add any accumulated text first if current_text.present? blocks << create_paragraph_block(current_text) current_text = "" end items = node.css("li").map { |li| process_inline_elements(li) } next if items.empty? blocks << { "type" => "list", "data" => { "style" => (node.name == "ul") ? "unordered" : "ordered", "items" => items } } when "blockquote" # Add any accumulated text first if current_text.present? blocks << create_paragraph_block(current_text) current_text = "" end blocks << { "type" => "quote", "data" => { "text" => process_inline_elements(node), "caption" => "", "alignment" => "left" } } end end # Add any remaining text as a final paragraph if current_text.present? # Split any remaining text on double line breaks paragraphs = current_text.split(/\n\n+/).map(&:strip) paragraphs.each do |paragraph| blocks << create_paragraph_block(paragraph) if paragraph.present? end end raise ConversionError, "No valid content blocks found" if blocks.empty? { "time" => Time.current.to_i * 1000, "blocks" => blocks, "version" => "2.28.2" } rescue Nokogiri::SyntaxError => e raise ConversionError, "Invalid HTML syntax: #{e.message}" rescue => e raise ConversionError, "Conversion failed: #{e.message}" end end def self.create_paragraph_block(text) { "type" => "paragraph", "data" => { "text" => text.strip } } end def self.process_inline_elements(node) result = "" node.children.each do |child| case child.name when "br" result += "
" when "text" result += child.text when "strong", "b" result += "#{child.text}" when "em", "i" result += "#{child.text}" when "a" href = child["href"] text = child.text.strip # Handle email links specially if href&.start_with?("mailto:") email = href.sub("mailto:", "") result += "#{text}" else result += "#{text}" end else result += if child.text? child.text else child.to_html end end end result.strip end end end end