require 'kramdown' require 'sanitize' module CrossOrigen # This is the base class of all doc formats that are # XML based class XMLDoc CreationInfo = Struct.new(:author, :date, :revision, :source) ImportInfo = Struct.new(:name, :date) attr_accessor :creation_info, :import_info # These (in many cases illegal) tags will be forced to their valid equivalents # These will be executed in the defined order, so for later xfrms you can for example # assume that all 'rows' have already been converted to 'tr' # valid equivalents HTML_TRANSFORMS = { 'table/title' => 'caption', 'table//row' => 'tr', 'thead//entry' => 'th', 'table//entry' => 'td', 'td/p' => 'span', 'th/p' => 'span' } # This can be used to perform additional by-node transformation if required, normally # this should be used if transform of a node attribute is required HTML_TRANSFORMER = lambda do |env| if env[:node_name] == 'td' || env[:node_name] == 'th' if env[:node].attr('nameend') first = env[:node].attr('namest').sub('col', '').to_i last = env[:node].attr('nameend').sub('col', '').to_i env[:node].set_attribute('colspan', (last - first + 1).to_s) end end end # Defines the rules for sanitization of any HTML strings that will be converted # to markdown for representation within Origen HTML_SANITIZATION_CONFIG = { # Only these tags will be allowed through, everything else will be stripped # Note that this is applied after the transforms listed above elements: %w(b em i strong u p ul ol li table tr td th tbody thead), attributes: { 'td' => ['colspan'], 'th' => ['colspan'] }, # Not planning to allow any of these right now, but keeping around # as an example of how to do so #:protocols => { # 'a' => {'href' => ['http', 'https', 'mailto']} # } transformers: HTML_TRANSFORMER } # Returns the object that included the CrossOrigen module attr_reader :owner def initialize(owner) @owner = owner @creation_info = CreationInfo.new @import_info = ImportInfo.new end # Tries the given methods on the owner and returns the first one to return a value, # ultimately returns nil if no value is found. # # To test an object other than the owner pass it as the first argument. def try(*methods) if methods.first.is_a?(Symbol) obj = owner else obj = methods.shift end methods.each do |method| if obj.respond_to?(method) val = obj.send(method) return val if val end end nil end # This returns the doc wrapped by a Nokogiri doc def doc(path, _options = {}) require 'nokogiri' File.open(path) do |f| yield Nokogiri::XML(f) end end def extract(element, path, options = {}) options = { format: :string, hex: false, default: nil, downcase: false, return: :text, # A value or array or values which are considered to be nil, if this is the value # to be returned then nil will be returned instead nil_on: false }.merge(options) node = element.at_xpath(path) if node if options[:format] == :string str = node.send(options[:return]).strip str = str.downcase if options[:downcase] if options[:nil_on] && [options[:nil_on]].flatten.include?(str) nil else str end elsif options[:format] == :integer val = node.send(options[:return]) if val =~ /^0x(.*)/ Regexp.last_match[1].to_i(16) elsif options[:hex] val.to_i(16) else val.to_i(10) end else fail "Unknown format: #{options[:format]}" end else options[:default] end end # Freescale register descriptions are like the wild west, need to do some pre-screening # to approach valid HTML before handing off to other off the shelf sanitizers def pre_sanitize(html) html = Nokogiri::HTML.fragment(html) HTML_TRANSFORMS.each do |orig, new| html.xpath(".//#{orig}").each { |node| node.name = new } end html.to_html end # Does its best to convert the given html fragment to markdown # # The final markdown may still contain some HTML tags, but any weird # markup which may break a future markdown -> html conversion will # be removed def to_markdown(html, _options = {}) cleaned = html.scrub cleaned = pre_sanitize(cleaned) cleaned = Sanitize.fragment(cleaned, HTML_SANITIZATION_CONFIG) Kramdown::Document.new(cleaned, input: :html).to_kramdown.strip rescue 'The description could not be imported, the most likely cause of this is that it contained illegal HTML markup' end # Convert the given markdown string to HTML def to_html(string, _options = {}) # Escape any " that are not already escaped string.gsub!(/([^\\])"/, '\1\"') # Escape any ' that are not already escaped string.gsub!(/([^\\])'/, %q(\1\\\')) html = Kramdown::Document.new(string, input: :kramdown).to_html end # fetch an XML snippet passed and extract and format the data def fetch(xml, options = {}) options = { type: String, downcase: false, symbolize: false, strip: false, squeeze: false, squeeze_lines: false, rm_specials: false, whitespace: false, get_text: false, to_i: false, to_html: false, to_bool: false, children: false, to_dec: false, to_f: false, underscore: false }.update(options) options[:symbolize] = options[:to_sym] if options[:to_sym] # Check for incompatible options xml_orig = xml numeric_methods = [:to_i, :to_f, :to_dec] if options[:get_text] == true && options[:to_html] == true fail 'Cannot use :get_text and :to_html options at the same time, exiting...' end if options[:symbolize] == true fail 'Cannot convert to a number of any type and symbolize at the same time' if numeric_methods.reject { |arg| options[arg] == true }.size < 3 end fail 'Cannot select multiple numeric conversion args at the same time' if numeric_methods.reject { |arg| options[arg] == true }.size < 2 if xml.nil? Origen.log.debug 'XML data is nil!' return nil end xml = xml.text if options[:get_text] == true # Sometimes XML snippets get sent as nodes or as Strings # Must skip this code if a String as it is designed to change # the XML node into a string unless xml.is_a? String if options[:to_html] == true if xml.children # If there are children to this XMl node then grab the content there if xml.children.empty? || options[:children] == false xml = xml.to_html else xml = xml.children.to_html end end end end unless xml.is_a? options[:type] Origen.log.debug "XML data is not of correct type '#{options[:type]}'" Origen.log.debug "xml is \n#{xml}" return nil end if options[:type] == String if xml.match(/\s+/) && options[:whitespace] == false Origen.log.debug "XML data '#{xml}' cannot have white space" return nil end xml.downcase! if options[:downcase] == true xml = xml.underscore if options[:underscore] == true xml.strip! if options[:strip] == true xml.squeeze!(' ') if options[:squeeze] == true xml = xml.squeeze_lines if options[:squeeze_lines] == true xml.gsub!(/[^0-9A-Za-z]/, '_') if options[:rm_specials] == true if options[:symbolize] == true return xml.to_sym elsif options[:to_i] == true return xml.to_i elsif options[:to_dec] == true return xml.to_dec elsif options[:to_f] == true return xml.to_f elsif [true, false].include?(xml.to_bool) && options[:to_bool] == true # If the string can convert to Boolean then return TrueClass or FalseClass return xml.to_bool else return xml end else # No real examples yet of non-string content return xml end end end end