class UnderOs::Parser::HTML def parse(html) html = html.strip.gsub(/<\!--[\s\S]*?-->/, '').gsub(/>\s+/, '>').gsub(/\s+]*)>/) @node = {tag: m[1], attrs: parse_attrs_in(m[2])} if parent = @stack.last parent[:children] ||= [] parent[:children] << @node parent.delete(:text) # it can have either text or children else @top << @node end @stack << @node m[0].size end end def close_tag if m = @chunk.match(/\A<\/([a-z]+)>/) while node = @stack.pop if node[:tag] != m[1] if @stack.size > 0 @stack.last[:children] += node[:children] || [] node.delete(:children) node.delete(:text) end else break end end @node = @stack.last m[0].size end end def plain_text if m = @chunk.match(/\A([^<]+)/) @stack.last[:text] = m[1] if @stack.last m[0].size end end def parse_attrs_in(string) merge_data_attrs({}.tap do |hash| string.scan(/([a-z][a-z_\-\d]+)=('|")(.+?)(\2)/).each do |match| value = match[0] == match[2] ? true : match[2] value = true if value == 'true' value = false if value == 'false' hash[match[0].to_sym] = value end end) end def merge_data_attrs(hash) hash.keys.each do |key| if key.to_s.starts_with?('data-') hash[:data] ||= {} value = hash.delete(key) key = key.to_s.gsub(/^data\-/, '').camelize hash[:data][key.to_sym] = value end end hash end end