lib/right_develop/ci/util.rb in right_develop-2.1.3 vs lib/right_develop/ci/util.rb in right_develop-2.1.5

- old
+ new

@@ -1,5 +1,9 @@ +if RUBY_VERSION =~ /^1\.8/ + require 'iconv' +end + module RightDevelop::CI module Util module_function # Regular expression used to determine which characters of a string are allowed @@ -12,10 +16,15 @@ JAVA_PACKAGE_SEPARATOR = '.' # Replacement codepoint that looks a bit like a period JAVE_PACKAGE_SEPARATOR_HOMOGLYPH = '&#xb7;' + # Regular expression that matches characters that need to be escaped inside CDATA + # c.f. http://www.w3.org/TR/xml11/#charsets + # RestrictedChar ::= [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F] + INVALID_CDATA_CHARACTER = Regexp.new '[\x01-\x08\x0b-\x0c\x0e-\x1f\x7f-\x84\x86-\x9f]', nil, 'n' # Ruby 1.8-2.1 compatible + # Make a string suitable for parsing by Jenkins JUnit display plugin by escaping any non-valid # Java class name characters as an XML entity. This prevents Jenkins from interpreting "hi1.2" # as a package-and-class name. # # @param [String] name @@ -33,8 +42,30 @@ result << "&#x#{chr};" end end result + end + + # Strip invalid UTF-8 sequences from a string and entity-escape any character that can't legally + # appear inside XML CDATA. If test output contains weird data, we could end up generating + # invalid JUnit XML which will choke Java. Preserve the purity of essence of our precious XML + # fluids! + # + # @return [String] the input with all invalid UTF-8 replaced by the empty string + # @param [String] untrusted a string (of any encoding) that might contain invalid UTF-8 sequences + def purify(untrusted) + # First pass: strip bad UTF-8 characters + if RUBY_VERSION =~ /^1\.8/ + iconv = Iconv.new('UTF-8//IGNORE', 'UTF-8') + result = iconv.iconv(untrusted) + else + result = untrusted.force_encoding(Encoding::BINARY).encode('UTF-8', :undef=>:replace, :replace=>'') + end + + # Second pass: entity escape characters that can't appear in XML CDATA. + result.gsub(INVALID_CDATA_CHARACTER) do |ch| + "&#x%s;" % [ch.unpack('H*').first] + end end end end \ No newline at end of file