define("dijit/_editor/html", [ "dojo/_base/lang", // lang.isString "dojo/_base/sniff", // has("ie") ".." // for exporting symbols to dijit._editor (remove for 2.0) ], function(lang, has, dijit){ // module: // dijit/_editor/html // summary: // Utility functions used by editor lang.getObject("_editor", true, dijit); dijit._editor.escapeXml=function(/*String*/str, /*Boolean?*/noSingleQuotes){ // summary: // Adds escape sequences for special characters in XML: &<>"' // Optionally skips escapes for single quotes str = str.replace(/&/gm, "&").replace(//gm, ">").replace(/"/gm, """); if(!noSingleQuotes){ str = str.replace(/'/gm, "'"); } return str; // string }; dijit._editor.getNodeHtml=function(/* DomNode */node){ var output; switch(node.nodeType){ case 1: //element node var lName = node.nodeName.toLowerCase(); if(!lName || lName.charAt(0) == "/"){ // IE does some strange things with malformed HTML input, like // treating a close tag without an open tag , as // a new tag with tagName of /span. Corrupts output HTML, remove // them. Other browsers don't prefix tags that way, so will // never show up. return ""; } output = '<' + lName; //store the list of attributes and sort it to have the //attributes appear in the dictionary order var attrarray = []; var attr; if(has("ie") && node.outerHTML){ var s = node.outerHTML; s = s.substr(0, s.indexOf('>')) .replace(/(['"])[^"']*\1/g, ''); //to make the following regexp safe var reg = /(\b\w+)\s?=/g; var m, key; while((m = reg.exec(s))){ key = m[1]; if(key.substr(0,3) != '_dj'){ if(key == 'src' || key == 'href'){ if(node.getAttribute('_djrealurl')){ attrarray.push([key,node.getAttribute('_djrealurl')]); continue; } } var val, match; switch(key){ case 'style': val = node.style.cssText.toLowerCase(); break; case 'class': val = node.className; break; case 'width': if(lName === "img"){ // This somehow gets lost on IE for IMG tags and the like // and we have to find it in outerHTML, known IE oddity. match=/width=(\S+)/i.exec(s); if(match){ val = match[1]; } break; } case 'height': if(lName === "img"){ // This somehow gets lost on IE for IMG tags and the like // and we have to find it in outerHTML, known IE oddity. match=/height=(\S+)/i.exec(s); if(match){ val = match[1]; } break; } default: val = node.getAttribute(key); } if(val != null){ attrarray.push([key, val.toString()]); } } } }else{ var i = 0; while((attr = node.attributes[i++])){ //ignore all attributes starting with _dj which are //internal temporary attributes used by the editor var n = attr.name; if(n.substr(0,3) != '_dj' /*&& (attr.specified == undefined || attr.specified)*/){ var v = attr.value; if(n == 'src' || n == 'href'){ if(node.getAttribute('_djrealurl')){ v = node.getAttribute('_djrealurl'); } } attrarray.push([n,v]); } } } attrarray.sort(function(a,b){ return a[0] < b[0] ? -1 : (a[0] == b[0] ? 0 : 1); }); var j = 0; while((attr = attrarray[j++])){ output += ' ' + attr[0] + '="' + (lang.isString(attr[1]) ? dijit._editor.escapeXml(attr[1], true) : attr[1]) + '"'; } if(lName === "script"){ // Browsers handle script tags differently in how you get content, // but innerHTML always seems to work, so insert its content that way // Yes, it's bad to allow script tags in the editor code, but some people // seem to want to do it, so we need to at least return them right. // other plugins/filters can strip them. output += '>' + node.innerHTML +''; }else{ if(node.childNodes.length){ output += '>' + dijit._editor.getChildrenHtml(node)+''; }else{ switch(lName){ case 'br': case 'hr': case 'img': case 'input': case 'base': case 'meta': case 'area': case 'basefont': // These should all be singly closed output += ' />'; break; default: // Assume XML style separate closure for everything else. output += '>'; } } } break; case 4: // cdata case 3: // text // FIXME: output = dijit._editor.escapeXml(node.nodeValue, true); break; case 8: //comment // FIXME: output = ''; break; default: output = ""; } return output; }; dijit._editor.getChildrenHtml = function(/* DomNode */dom){ // summary: // Returns the html content of a DomNode and children var out = ""; if(!dom){ return out; } var nodes = dom["childNodes"] || dom; //IE issue. //If we have an actual node we can check parent relationships on for IE, //We should check, as IE sometimes builds invalid DOMS. If no parent, we can't check //And should just process it and hope for the best. var checkParent = !has("ie") || nodes !== dom; var node, i = 0; while((node = nodes[i++])){ //IE is broken. DOMs are supposed to be a tree. But in the case of malformed HTML, IE generates a graph //meaning one node ends up with multiple references (multiple parents). This is totally wrong and invalid, but //such is what it is. We have to keep track and check for this because otherise the source output HTML will have dups. //No other browser generates a graph. Leave it to IE to break a fundamental DOM rule. So, we check the parent if we can //If we can't, nothing more we can do other than walk it. if(!checkParent || node.parentNode == dom){ out += dijit._editor.getNodeHtml(node); } } return out; // String }; return dijit._editor; });