lib/right_develop/ci/util.rb in right_develop-2.1.3 vs lib/right_develop/ci/util.rb in right_develop-2.1.5
- old
+ new
@@ -1,5 +1,9 @@
+if RUBY_VERSION =~ /^1\.8/
+ require 'iconv'
+end
+
module RightDevelop::CI
module Util
module_function
# Regular expression used to determine which characters of a string are allowed
@@ -12,10 +16,15 @@
JAVA_PACKAGE_SEPARATOR = '.'
# Replacement codepoint that looks a bit like a period
JAVE_PACKAGE_SEPARATOR_HOMOGLYPH = '·'
+ # Regular expression that matches characters that need to be escaped inside CDATA
+ # c.f. http://www.w3.org/TR/xml11/#charsets
+ # RestrictedChar ::= [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]
+ INVALID_CDATA_CHARACTER = Regexp.new '[\x01-\x08\x0b-\x0c\x0e-\x1f\x7f-\x84\x86-\x9f]', nil, 'n' # Ruby 1.8-2.1 compatible
+
# Make a string suitable for parsing by Jenkins JUnit display plugin by escaping any non-valid
# Java class name characters as an XML entity. This prevents Jenkins from interpreting "hi1.2"
# as a package-and-class name.
#
# @param [String] name
@@ -33,8 +42,30 @@
result << "&#x#{chr};"
end
end
result
+ end
+
+ # Strip invalid UTF-8 sequences from a string and entity-escape any character that can't legally
+ # appear inside XML CDATA. If test output contains weird data, we could end up generating
+ # invalid JUnit XML which will choke Java. Preserve the purity of essence of our precious XML
+ # fluids!
+ #
+ # @return [String] the input with all invalid UTF-8 replaced by the empty string
+ # @param [String] untrusted a string (of any encoding) that might contain invalid UTF-8 sequences
+ def purify(untrusted)
+ # First pass: strip bad UTF-8 characters
+ if RUBY_VERSION =~ /^1\.8/
+ iconv = Iconv.new('UTF-8//IGNORE', 'UTF-8')
+ result = iconv.iconv(untrusted)
+ else
+ result = untrusted.force_encoding(Encoding::BINARY).encode('UTF-8', :undef=>:replace, :replace=>'')
+ end
+
+ # Second pass: entity escape characters that can't appear in XML CDATA.
+ result.gsub(INVALID_CDATA_CHARACTER) do |ch|
+ "&#x%s;" % [ch.unpack('H*').first]
+ end
end
end
end
\ No newline at end of file