/* dom-to-xhtml.js is part of Aloha Editor project http://aloha-editor.org * * Aloha Editor is a WYSIWYG HTML5 inline editing library and editor. * Copyright (c) 2010-2012 Gentics Software GmbH, Vienna, Austria. * Contributors http://aloha-editor.org/contribution.php * * Aloha Editor is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or any later version. * * Aloha Editor is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * As an additional permission to the GNU GPL version 2, you may distribute * non-source (e.g., minimized or compacted) forms of the Aloha-Editor * source code without the copy of the GNU GPL normally required, * provided you include this license notice and a URL through which * recipients can access the Corresponding Source. */ /** * Provides public utility methods to convert DOM nodes to XHTML. */ define([ 'jquery', 'util/dom2', 'util/misc', 'util/browser', 'aloha/ephemera', 'aloha/console' ], function( $, Dom, Misc, Browser, Ephemera, console ) { "use strict"; /** * Elements that are to be serialized like and not like */ var emptyElements = { "area": true, "base": true, "basefont": true, "br": true, "col": true, "frame": true, "hr": true, "img": true, "input": true, "isindex": true, "link": true, "meta": true, "param": true, "embed": true }; /** * Attributes that are to be serialized like checked="checked" for any attribute value. */ var booleanAttrs = { "checked": true, "compact": true, "declare": true, "defer": true, "disabled": true, "ismap": true, "multiple": true, "nohref": true, "noresize": true, "noshade": true, "nowrap": true, "readonly": true, "selected": true }; /** * Maps element names to a boolean that indicates whether IE7/IE8 doesn't recognize the element. * This is necessary to repair the broken DOM structure caused by unrecognized elements. * Contains some intial values to cover most common cases. If an * element is serialized that is not present here, it will be * examined (which may be costly) and added dynamically. * See isUnrecognized(). */ var isUnrecognizedMap = { "DIV": false, "SPAN": false, "UL": false, "OL": false, "LI": false, "TABLE": false, "TR": false, "TD": false, "TH": false, "I": false, "B": false, "EM": false, "STRONG": false, "A": false, "P": false }; /** * Encodes a string meant to be used wherever parsable character data occurs in XML. * @param str * An unencoded piece of character data * @return * The given string with & and < characters replaced with the corresponding HTML entity references. */ function encodePcdata(str) { return str.replace(/&/g, '&').replace(/{content} * into * {content} * This seems to occur with any element IE doesn't recognize. * * @param element * An element node. * @return * true if the given element isn't recognized by IE and * causes a broken DOM structure as outlined above. */ function isUnrecognized(element) { var name = element.nodeName; var unrecognized = isUnrecognizedMap[name]; if (null != unrecognized) { return unrecognized; } var closingName = "/" + element.nodeName; var sibling = element.nextSibling; unrecognized = false; while (null != sibling) { if (closingName == sibling.nodeName) { unrecognized = true; break; } sibling = sibling.nextSibling; } isUnrecognizedMap[name] = unrecognized; return unrecognized; } /** * Serializes the children of the given element into an XHTML string. * * The same as serializeElement() except it only serializes the children. * The start and end tag of the given element will not appear in the resulting XHTML. * * @see serializeElement() */ function serializeChildren(element, child, unrecognized, ephemera, xhtml) { while (null != child) { if (1 === child.nodeType && unrecognized && "/" + element.nodeName == child.nodeName) { child = child.nextSibling; break; } else if (1 === child.nodeType && isUnrecognized(child)) { child = serializeElement(child, child.nextSibling, true, ephemera, xhtml); } else { serialize(child, ephemera, xhtml); child = child.nextSibling; } } return child; } /** * Serializes an element into an XHTML string. * * @param element * An element to serialize. * @param child * The first child of the given element. This will usually be * element.firstChild. On IE this may be element.nextSibling because * of the broken DOM structure IE sometimes generates. * @param unrecognized * Whether the given element is unrecognized on IE. If IE doesn't * recognize the element, it will create a broken DOM structure * which has to be compensated for. See isUnrecognized() for more. * @param ephemera * Describes content that should not be serialized. * Only attrMap and attrRxs are supported at the moment. * See Ephemera.ephemera(). * @param xhtml * An array which receives the serialized element and whic, if joined, * will yield the XHTML string. * @return * null if all siblings of the given child have been processed as children * of the given element, or otherwise the first sibling of child that is not considered * a child of the given element. */ function serializeElement(element, child, unrecognized, ephemera, xhtml) { // TODO: we should only lowercase element names if they are in an HTML namespace var elementName = element.nodeName.toLowerCase(); // This is a hack around an IE bug which strips the namespace prefix // of element.nodeName if it occurs inside an contentEditable=true. if (element.scopeName && 'HTML' != element.scopeName && -1 === elementName.indexOf(':')) { elementName = element.scopeName.toLowerCase() + ':' + elementName; } if (!unrecognized && null == child && emptyElements[elementName]) { xhtml.push('<' + elementName + makeAttrString(element, ephemera) + '/>'); } else { xhtml.push('<' + elementName + makeAttrString(element, ephemera) + '>'); child = serializeChildren(element, child, unrecognized, ephemera, xhtml); xhtml.push(''); } return child; } /** * Serializes a DOM node into a XHTML string. * * @param node * A DOM node to serialize. * @param ephemera * Describes content that should not be serialized. * Only attrMap and attrRxs are supported at the moment. * See Ephemera.ephemera(). * @param xhtml * An array that will receive snippets of XHTML, * which if joined will yield the XHTML string. */ function serialize(node, ephemera, xhtml) { var nodeType = node.nodeType; if (1 === nodeType) { serializeElement(node, node.firstChild, isUnrecognized(node), ephemera, xhtml); } else if (3 === node.nodeType) { xhtml.push(encodePcdata(node.nodeValue)); } else if (8 === node.nodeType) { xhtml.push('<' + '!--' + node.nodeValue + '-->'); } else { console.warn('Unknown node type encountered during serialization, ignoring it:' + ' type=' + node.nodeType + ' name=' + node.nodeName + ' value=' + node.nodeValue); } } return { /** * Serializes a number of DOM nodes in an array-like object to an XHTML string. * * The XHTML of the nodes in the given array-like object will be concatenated. * * @param nodes * An array or jQuery object or another array-like object to serialize. * @param ephemera * Describes content that should not be serialized. * Only attrMap and attrRxs are supported at the moment. * See Ephemera.ephemera(). * @return * The serialized XHTML String representing the given DOM nodes in the given array-like object. * The result may look like an XML fragment with multiple top-level elements and text nodes. * @see nodeToXhtml() */ contentsToXhtml: function(element, ephemera) { var xhtml = []; serializeChildren(element, element.firstChild, false, ephemera, xhtml); return xhtml.join(""); }, /** * Serializes a DOM node to an XHTML string. * * Beware that the serialization method will generate XHTML as * close as possible to the DOM tree represented by the given * node. The result will only be valid XHTML if the DOM tree * doesn't violate any contained-in rules. * * Element attributes with an empty string as value will not * appear in the serialized output. * * Element attribute names are case-insensitive in HTML5, so * they may come out in mixed-case depending on what the browser * provides. * * When iterating over the DOM, CDATA sections are comment nodes * on some browsers (Chrome) and not there at all on others (IE). * This is the same as what comes out from element.innerHTML. * * IE8 bug: comments will sometimes be silently stripped inside * contentEditable=true. Conditional includes don't work inside * contentEditable=true. See the tests for more information. * * IE8 bug: a title element will not be serialized correctly * unless it occurs in the head of a HTML document, even if it occurs * in a non-HTML namespace (maybe it works with a prefix). * This will probably also apply for other HTML elements that * occur in the header. * * IE8 bug: unrecognized elements in the HTML scope will cause * broken DOM structure (some HTML5 elements that are not yet * implemented in IE for example). Some effort was made to fix a * broken DOM structure, if it is encountered. There is one case * which results in an unrecoverably broken DOM structure, which * is an unrecognized element not preceded by some text. See the * tests for further information. * * IE8 bug: whitespace is not reliably preserved when the style * white-space:pre (or similar) is used. See the tests for * further information. Whitespace inside
 elements will
		 * be preserved, but \n characters will become \r characters.
		 *
		 * IE7 bug: URLs in href and src attributes of a and img
		 * elements will be absolutized (including hostname and
		 * protocol) if they are given as a relative path.
		 *
		 * IE bug: Namespace support inside contentEditable=true is a
		 * bit shaky on IE. Don't use it if possible. See the tests to
		 * get an idea of what seems to work. Make namespace prefixes
		 * and element names all lower-case, as they are always
		 * lower-cased, even if the element doesn't occur in an HTML
		 * namespace. Don't use default namespaces, use prefixes (except
		 * for an HTML namespace).
		 *
		 * @param node
		 *        A DOM node to serialize
		 * @param ephemera
		 *        Describes content that should not be serialized.
		 *        Only attrMap and attrRxs are supported at the moment.
		 *        See Ephemera.ephemera().
		 * @return
		 *        The serialized XHTML string represnting the given DOM node.
		 */
		nodeToXhtml: function(node, ephemera) {
			var xhtml = [];
			serialize(node, ephemera, xhtml);
			return xhtml.join("");
		}
	};
});