/*! * Aloha Editor * Author & Copyright (c) 2010 Gentics Software GmbH * aloha-sales@gentics.com * Licensed unter the terms of http://www.aloha-editor.com/license.html */ /** * Provides public utility methods to convert DOM nodes to XHTML. */ define( ['aloha', 'aloha/jquery', 'aloha/console'], function( Aloha, $, console) { /** * Gets the attributes of the given element. * * @param element * An element to get the attributes for. * @return * An array of consisting of [name, value] tuples for each attribute. * Attribute values may be strings, booleans or undefined. */ function getAttrs(element) { var attrs = element.attributes; var cleanAttrs = []; for ( var i = 0; i < attrs.length; i++ ) { var attr = attrs[ i ]; if ( typeof attr.specified === "undefined" || attr.specified ) { var name = attr.nodeName; // Use jQuery to get a corrected style attribute on IE. // Otherwise prefer getAttribute() over attr.nodeValue as the // latter stringifies the attribute value. // There seems to be a jQuery bug that returns undefined // for the "checked" attribute on IE7, otherwise we // could always use jquery. var value = ( "style" === name ? $.attr(element, name) : element.getAttribute(name) ); cleanAttrs.push( [ name, value ] ); } } return cleanAttrs; } /** * Elements that are to be serialized like and not like */ var emptyElements = [ "area", "base", "basefont", "br", "col", "frame", "hr", "img", "input", "isindex", "link", "meta", "param", "embed" ]; /** * Attributes that are to be serialized like checked="checked" for any true attribute value. */ var booleanAttrs = [ "checked", "compact", "declare", "defer", "disabled", "ismap", "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly", "selected" ]; /** * Encodes a string meant to be used wherever parsable character data occurs in XML. * @param str * An unencoded piece of character data * @return * The given string with & and < characters replaced with the corresponding HTML entity references. */ function encodePcdata(str) { return str.replace(/&/g, '&').replace(/{content} * into * {content} * This seems to occur with any element IE doesn't recognize. * * @param element * An element node. * @return * true if the given element isn't recognized by IE and * causes a broken DOM structure as outlined above. */ function isUnrecognized(element) { var closingName = "/" + element.nodeName; var sibling = element.nextSibling; while (null != sibling) { if (closingName == sibling.nodeName) { return true; } sibling = sibling.nextSibling; } return false; } /** * Serializes the children of the given element into an XHTML string. * * The same as serializeElement() except it only serializes the children. * The start and end tag of the given element will not appear in the resulting XHTML. * * @see serializeElement() */ function serializeChildren(element, child, unrecognized, xhtml) { while (null != child) { if (1 === child.nodeType && unrecognized && "/" + element.nodeName == child.nodeName) { child = child.nextSibling; break; } else if (1 === child.nodeType && isUnrecognized(child)) { child = serializeElement(child, child.nextSibling, true, xhtml); } else { serialize(child, xhtml); child = child.nextSibling; } } return child; } /** * Serializes an element into an XHTML string. * * @param element * An element to serialize. * @param child * The first child of the given element. This will usually be * element.firstChild. On IE this may be element.nextSibling because * of the broken DOM structure IE sometimes generates. * @param unrecognized * Whether the given element is unrecognized on IE. If IE doesn't * recognize the element, it will create a broken DOM structure * which has to be compensated for. See isUnrecognized() for more. * @param xhtml * An array which receives the serialized element and whic, if joined, * will yield the XHTML string. * @return * null if all siblings of the given child have been processed as children * of the given element, or otherwise the first sibling of child that is not considered * a child of the given element. */ function serializeElement(element, child, unrecognized, xhtml) { // TODO: we should only lowercase element names if they are in an HTML namespace var elementName = element.nodeName.toLowerCase(); // This is a hack around an IE bug which strips the namespace prefix // of element.nodeName if it occurs inside an contentEditable=true. if (element.scopeName && 'HTML' != element.scopeName && -1 === elementName.indexOf(':')) { elementName = element.scopeName.toLowerCase() + ':' + elementName; } if ( ! unrecognized && null == child && -1 !== $.inArray(elementName, emptyElements) ) { xhtml.push('<' + elementName + makeAttrString(element) + '/>'); } else { xhtml.push('<' + elementName + makeAttrString(element) + '>'); child = serializeChildren(element, child, unrecognized, xhtml); xhtml.push(''); } return child; } /** * Serializes a DOM node into a XHTML string. * * @param node * A DOM node to serialize. * @param xhtml * An array that will receive snippets of XHTML, * which if joined will yield the XHTML string. */ function serialize(node, xhtml) { var nodeType = node.nodeType; if (1 === nodeType) { serializeElement(node, node.firstChild, isUnrecognized(node), xhtml); } else if (3 === node.nodeType) { xhtml.push(encodePcdata(node.nodeValue)); } else if (8 === node.nodeType) { xhtml.push('<' + '!--' + node.nodeValue + '-->'); } else { console.log('Unknown node type encountered during serialization, ignoring it:' + ' type=' + node.nodeType + ' name=' + node.nodeName + ' value=' + node.nodeValue); } } return { /** * Serializes a number of DOM nodes in an array-like object to an XHTML string. * * The XHTML of the nodes in the given array-like object will be concatenated. * * @param nodes * An array or jQuery object or another array-like object to serialize. * @return * The serialized XHTML String representing the given DOM nodes in the given array-like object. * The result may look like an XML fragment with multiple top-level elements and text nodes. * @see nodeToXhtml() */ contentsToXhtml: function(element) { var xhtml = []; serializeChildren(element, element.firstChild, false, xhtml); return xhtml.join(""); }, /** * Serializes a DOM node to an XHTML string. * * Beware that the serialization method will generate XHTML as * close as possible to the DOM tree represented by the given * node. The result will only be valid XHTML if the DOM tree * doesn't violate any contained-in rules. * * Element attributes with an empty string as value will not * appear in the serialized output. * * When iterating over the DOM, CDATA sections are comment nodes * on some browsers (Chrome) and not there at all on others (IE). * This is the same as what comes out from element.innerHTML. * * IE8 bug: comments will sometimes be silently stripped inside * contentEditable=true. Conditional includes don't work inside * contentEditable=true. See the tests for more information. * * IE8 bug: a title element will not be serialized correctly * unless it occurs in the head of a HTML document, even if it occurs * in a non-HTML namespace (maybe it works with a prefix). * This will probably also apply for other HTML elements that * occur in the header. * * IE8 bug: unrecognized elements in the HTML scope will cause * broken DOM structure (some HTML5 elements that are not yet * implemented in IE for example). Some effort was made to fix a * broken DOM structure, if it is encountered. There is one case * which results in an unrecoverably broken DOM structure, which * is an unrecognized element not preceded by some text. See the * tests for further information. * * IE8 bug: whitespace is not reliably preserved when the style * white-space:pre (or similar) is used. See the tests for * further information. Whitespace inside
 elements will
		 * be preserved, but \n characters will become \r characters.
		 *
		 * IE7 bug: URLs in href and src attributes of a and img
		 * elements will be absolutized (including hostname and
		 * protocol) if they are given as a relative path.
		 *
		 * IE bug: Namespace support inside contentEditable=true is a
		 * bit shaky on IE. Don't use it if possible. See the tests to
		 * get an idea of what seems to work. Make namespace prefixes
		 * and element names all lower-case, as they are always
		 * lower-cased, even if the element doesn't occur in an HTML
		 * namespace. Don't use default namespaces, use prefixes (except
		 * for an HTML namespace).
		 *
		 * @param node
		 *        A DOM node to serialize
		 * @return
		 *        The serialized XHTML string represnting the given DOM node.
		 */
		nodeToXhtml: function(node) {
			var xhtml = [];
			serialize(node, xhtml);
			return xhtml.join("");
		}
	};
});