//[4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] //[4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040] //[5] Name ::= NameStartChar (NameChar)* var nameStartChar = /[A-Z_a-z\xC0-\xD6\xD8-\xF6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]///\u10000-\uEFFFF var nameChar = new RegExp("[\\-\\.0-9"+nameStartChar.source.slice(1,-1)+"\u00B7\u0300-\u036F\\ux203F-\u2040]"); var tagNamePattern = new RegExp('^'+nameStartChar.source+nameChar.source+'*(?:\:'+nameStartChar.source+nameChar.source+'*)?$'); //var tagNamePattern = /^[a-zA-Z_][\w\-\.]*(?:\:[a-zA-Z_][\w\-\.]*)?$/ //var handlers = 'resolveEntity,getExternalSubset,characters,endDocument,endElement,endPrefixMapping,ignorableWhitespace,processingInstruction,setDocumentLocator,skippedEntity,startDocument,startElement,startPrefixMapping,notationDecl,unparsedEntityDecl,error,fatalError,warning,attributeDecl,elementDecl,externalEntityDecl,internalEntityDecl,comment,endCDATA,endDTD,endEntity,startCDATA,startDTD,startEntity'.split(',') //S_TAG, S_ATTR, S_EQ, S_V //S_ATTR_S, S_E, S_S, S_C var S_TAG = 0;//tag name offerring var S_ATTR = 1;//attr name offerring var S_ATTR_S=2;//attr name end and space offer var S_EQ = 3;//=space? var S_V = 4;//attr value(no quot value only) var S_E = 5;//attr value end and no space(quot end) var S_S = 6;//(attr value end || tag end ) && (space offer) var S_C = 7;//closed el function XMLReader(){ } XMLReader.prototype = { parse:function(source,defaultNSMap,entityMap){ var domBuilder = this.domBuilder; domBuilder.startDocument(); _copy(defaultNSMap ,defaultNSMap = {}) parse(source,defaultNSMap,entityMap, domBuilder,this.errorHandler); domBuilder.endDocument(); } } function parse(source,defaultNSMapCopy,entityMap,domBuilder,errorHandler){ function fixedFromCharCode(code) { // String.prototype.fromCharCode does not supports // > 2 bytes unicode chars directly if (code > 0xffff) { code -= 0x10000; var surrogate1 = 0xd800 + (code >> 10) , surrogate2 = 0xdc00 + (code & 0x3ff); return String.fromCharCode(surrogate1, surrogate2); } else { return String.fromCharCode(code); } } function entityReplacer(a){ var k = a.slice(1,-1); if(k in entityMap){ return entityMap[k]; }else if(k.charAt(0) === '#'){ return fixedFromCharCode(parseInt(k.substr(1).replace('x','0x'))) }else{ errorHandler.error('entity not found:'+a); return a; } } function appendText(end){//has some bugs var xt = source.substring(start,end).replace(/&#?\w+;/g,entityReplacer); locator&&position(start); domBuilder.characters(xt,0,end-start); start = end } function position(start,m){ while(start>=endPos && (m = linePattern.exec(source))){ startPos = m.index; endPos = startPos + m[0].length; locator.lineNumber++; //console.log('line++:',locator,startPos,endPos) } locator.columnNumber = start-startPos+1; } var startPos = 0; var endPos = 0; var linePattern = /.+(?:\r\n?|\n)|.*$/g var locator = domBuilder.locator; var parseStack = [{currentNSMap:defaultNSMapCopy}] var closeMap = {}; var start = 0; while(true){ var i = source.indexOf('<',start); if(i>start){ appendText(i); } switch(source.charAt(i+1)){ case '/': var end = source.indexOf('>',i+3); var tagName = source.substring(i+2,end); var config = parseStack.pop(); var localNSMap = config.localNSMap; if(config.tagName != tagName){ errorHandler.fatalError("end tag name: "+tagName+' is not match the current start tagName:'+config.tagName ); } domBuilder.endElement(config.uri,config.localName,tagName); if(localNSMap){ for(var prefix in localNSMap){ domBuilder.endPrefixMapping(prefix) ; } } end++; break; // end elment case '?':// locator&&position(i); end = parseInstruction(source,i,domBuilder); break; case '!':// 0){ value = source.slice(start,p).replace(/&#?\w+;/g,entityReplacer); el.add(attrName,value,start-1); s = S_E; }else{ //fatalError: no end quot match throw new Error('attribute value no end \''+c+'\' match'); } }else if(s == S_V){ value = source.slice(start,p).replace(/&#?\w+;/g,entityReplacer); //console.log(attrName,value,start,p) el.add(attrName,value,start); //console.dir(el) errorHandler.warning('attribute "'+attrName+'" missed start quot('+c+')!!'); start = p+1; s = S_E }else{ //fatalError: no equal before throw new Error('attribute value must after "="'); } break; case '/': switch(s){ case S_TAG: el.setTagName(source.slice(start,p)); case S_E: case S_S: case S_C: s = S_C; el.closed = true; case S_V: case S_ATTR: case S_ATTR_S: break; //case S_EQ: default: throw new Error("attribute invalid close char('/')") } break; case '>': switch(s){ case S_TAG: el.setTagName(source.slice(start,p)); case S_E: case S_S: case S_C: break;//normal case S_V://Compatible state case S_ATTR: value = source.slice(start,p); if(value.slice(-1) === '/'){ el.closed = true; value = value.slice(0,-1) } case S_ATTR_S: if(s === S_ATTR_S){ value = attrName; } if(s == S_V){ errorHandler.warning('attribute "'+value+'" missed quot(")!!'); el.add(attrName,value.replace(/&#?\w+;/g,entityReplacer),start) }else{ errorHandler.warning('attribute "'+value+'" missed value!! "'+value+'" instead!!') el.add(value,value,start) } break; case S_EQ: throw new Error('attribute value missed!!'); } // console.log(tagName,tagNamePattern,tagNamePattern.test(tagName)) return p; /*xml space '\x20' | #x9 | #xD | #xA; */ case '\u0080': c = ' '; default: if(c<= ' '){//space switch(s){ case S_TAG: el.setTagName(source.slice(start,p));//tagName s = S_S; break; case S_ATTR: attrName = source.slice(start,p) s = S_ATTR_S; break; case S_V: var value = source.slice(start,p).replace(/&#?\w+;/g,entityReplacer); errorHandler.warning('attribute "'+value+'" missed quot(")!!'); el.add(attrName,value,start) case S_E: s = S_S; break; //case S_S: //case S_EQ: //case S_ATTR_S: // void();break; //case S_C: //ignore warning } }else{//not space //S_TAG, S_ATTR, S_EQ, S_V //S_ATTR_S, S_E, S_S, S_C switch(s){ //case S_TAG:void();break; //case S_ATTR:void();break; //case S_V:void();break; case S_ATTR_S: errorHandler.warning('attribute "'+attrName+'" missed value!! "'+attrName+'" instead!!') el.add(attrName,attrName,start); start = p; s = S_ATTR; break; case S_E: errorHandler.warning('attribute space is required"'+attrName+'"!!') case S_S: s = S_ATTR; start = p; break; case S_EQ: s = S_V; start = p; break; case S_C: throw new Error("elements closed character '/' and '>' must be connected to"); } } } p++; } } /** * @return end of the elementStartPart(end of elementEndPart for selfClosed el) */ function appendElement(el,domBuilder,parseStack){ var tagName = el.tagName; var localNSMap = null; var currentNSMap = parseStack[parseStack.length-1].currentNSMap; var i = el.length; while(i--){ var a = el[i]; var qName = a.qName; var value = a.value; var nsp = qName.indexOf(':'); if(nsp>0){ var prefix = a.prefix = qName.slice(0,nsp); var localName = qName.slice(nsp+1); var nsPrefix = prefix === 'xmlns' && localName }else{ localName = qName; prefix = null nsPrefix = qName === 'xmlns' && '' } //can not set prefix,because prefix !== '' a.localName = localName ; //prefix == null for no ns prefix attribute if(nsPrefix !== false){//hack!! if(localNSMap == null){ localNSMap = {} _copy(currentNSMap,currentNSMap={}) } currentNSMap[nsPrefix] = localNSMap[nsPrefix] = value; a.uri = 'http://www.w3.org/2000/xmlns/' domBuilder.startPrefixMapping(nsPrefix, value) } } var i = el.length; while(i--){ a = el[i]; var prefix = a.prefix; if(prefix){//no prefix attribute has no namespace if(prefix === 'xml'){ a.uri = 'http://www.w3.org/XML/1998/namespace'; }if(prefix !== 'xmlns'){ a.uri = currentNSMap[prefix] } } } var nsp = tagName.indexOf(':'); if(nsp>0){ prefix = el.prefix = tagName.slice(0,nsp); localName = el.localName = tagName.slice(nsp+1); }else{ prefix = null;//important!! localName = el.localName = tagName; } //no prefix element has default namespace var ns = el.uri = currentNSMap[prefix || '']; domBuilder.startElement(ns,localName,tagName,el); //endPrefixMapping and startPrefixMapping have not any help for dom builder //localNSMap = null if(el.closed){ domBuilder.endElement(ns,localName,tagName); if(localNSMap){ for(prefix in localNSMap){ domBuilder.endPrefixMapping(prefix) } } }else{ el.currentNSMap = currentNSMap; el.localNSMap = localNSMap; parseStack.push(el); } } function parseHtmlSpecialContent(source,elStartEnd,tagName,entityReplacer,domBuilder){ if(/^(?:script|textarea)$/i.test(tagName)){ var elEndStart = source.indexOf('',elStartEnd); var text = source.substring(elStartEnd+1,elEndStart); if(/[&<]/.test(text)){ if(/^script$/i.test(tagName)){ //if(!/\]\]>/.test(text)){ //lexHandler.startCDATA(); domBuilder.characters(text,0,text.length); //lexHandler.endCDATA(); return elEndStart; //} }//}else{//text area text = text.replace(/&#?\w+;/g,entityReplacer); domBuilder.characters(text,0,text.length); return elEndStart; //} } } return elStartEnd+1; } function fixSelfClosed(source,elStartEnd,tagName,closeMap){ //if(tagName in closeMap){ var pos = closeMap[tagName]; if(pos == null){ //console.log(tagName) pos = closeMap[tagName] = source.lastIndexOf('') } return pos',start+4); //append comment source.substring(4,end)//