sax.js 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644
  1. var NAMESPACE = require("./conventions").NAMESPACE;
  2. //[4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
  3. //[4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
  4. //[5] Name ::= NameStartChar (NameChar)*
  5. var nameStartChar = /[A-Z_a-z\xC0-\xD6\xD8-\xF6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]///\u10000-\uEFFFF
  6. var nameChar = new RegExp("[\\-\\.0-9"+nameStartChar.source.slice(1,-1)+"\\u00B7\\u0300-\\u036F\\u203F-\\u2040]");
  7. var tagNamePattern = new RegExp('^'+nameStartChar.source+nameChar.source+'*(?:\:'+nameStartChar.source+nameChar.source+'*)?$');
  8. //var tagNamePattern = /^[a-zA-Z_][\w\-\.]*(?:\:[a-zA-Z_][\w\-\.]*)?$/
  9. //var handlers = 'resolveEntity,getExternalSubset,characters,endDocument,endElement,endPrefixMapping,ignorableWhitespace,processingInstruction,setDocumentLocator,skippedEntity,startDocument,startElement,startPrefixMapping,notationDecl,unparsedEntityDecl,error,fatalError,warning,attributeDecl,elementDecl,externalEntityDecl,internalEntityDecl,comment,endCDATA,endDTD,endEntity,startCDATA,startDTD,startEntity'.split(',')
  10. //S_TAG, S_ATTR, S_EQ, S_ATTR_NOQUOT_VALUE
  11. //S_ATTR_SPACE, S_ATTR_END, S_TAG_SPACE, S_TAG_CLOSE
  12. var S_TAG = 0;//tag name offerring
  13. var S_ATTR = 1;//attr name offerring
  14. var S_ATTR_SPACE=2;//attr name end and space offer
  15. var S_EQ = 3;//=space?
  16. var S_ATTR_NOQUOT_VALUE = 4;//attr value(no quot value only)
  17. var S_ATTR_END = 5;//attr value end and no space(quot end)
  18. var S_TAG_SPACE = 6;//(attr value end || tag end ) && (space offer)
  19. var S_TAG_CLOSE = 7;//closed el<el />
  20. /**
  21. * Creates an error that will not be caught by XMLReader aka the SAX parser.
  22. *
  23. * @param {string} message
  24. * @param {any?} locator Optional, can provide details about the location in the source
  25. * @constructor
  26. */
  27. function ParseError(message, locator) {
  28. this.message = message
  29. this.locator = locator
  30. if(Error.captureStackTrace) Error.captureStackTrace(this, ParseError);
  31. }
  32. ParseError.prototype = new Error();
  33. ParseError.prototype.name = ParseError.name
  34. function XMLReader(){
  35. }
  36. XMLReader.prototype = {
  37. parse:function(source,defaultNSMap,entityMap){
  38. var domBuilder = this.domBuilder;
  39. domBuilder.startDocument();
  40. _copy(defaultNSMap ,defaultNSMap = {})
  41. parse(source,defaultNSMap,entityMap,
  42. domBuilder,this.errorHandler);
  43. domBuilder.endDocument();
  44. }
  45. }
  46. function parse(source,defaultNSMapCopy,entityMap,domBuilder,errorHandler){
  47. function fixedFromCharCode(code) {
  48. // String.prototype.fromCharCode does not supports
  49. // > 2 bytes unicode chars directly
  50. if (code > 0xffff) {
  51. code -= 0x10000;
  52. var surrogate1 = 0xd800 + (code >> 10)
  53. , surrogate2 = 0xdc00 + (code & 0x3ff);
  54. return String.fromCharCode(surrogate1, surrogate2);
  55. } else {
  56. return String.fromCharCode(code);
  57. }
  58. }
  59. function entityReplacer(a){
  60. var k = a.slice(1,-1);
  61. if(k in entityMap){
  62. return entityMap[k];
  63. }else if(k.charAt(0) === '#'){
  64. return fixedFromCharCode(parseInt(k.substr(1).replace('x','0x')))
  65. }else{
  66. errorHandler.error('entity not found:'+a);
  67. return a;
  68. }
  69. }
  70. function appendText(end){//has some bugs
  71. if(end>start){
  72. var xt = source.substring(start,end).replace(/&#?\w+;/g,entityReplacer);
  73. locator&&position(start);
  74. domBuilder.characters(xt,0,end-start);
  75. start = end
  76. }
  77. }
  78. function position(p,m){
  79. while(p>=lineEnd && (m = linePattern.exec(source))){
  80. lineStart = m.index;
  81. lineEnd = lineStart + m[0].length;
  82. locator.lineNumber++;
  83. //console.log('line++:',locator,startPos,endPos)
  84. }
  85. locator.columnNumber = p-lineStart+1;
  86. }
  87. var lineStart = 0;
  88. var lineEnd = 0;
  89. var linePattern = /.*(?:\r\n?|\n)|.*$/g
  90. var locator = domBuilder.locator;
  91. var parseStack = [{currentNSMap:defaultNSMapCopy}]
  92. var closeMap = {};
  93. var start = 0;
  94. while(true){
  95. try{
  96. var tagStart = source.indexOf('<',start);
  97. if(tagStart<0){
  98. if(!source.substr(start).match(/^\s*$/)){
  99. var doc = domBuilder.doc;
  100. var text = doc.createTextNode(source.substr(start));
  101. doc.appendChild(text);
  102. domBuilder.currentElement = text;
  103. }
  104. return;
  105. }
  106. if(tagStart>start){
  107. appendText(tagStart);
  108. }
  109. switch(source.charAt(tagStart+1)){
  110. case '/':
  111. var end = source.indexOf('>',tagStart+3);
  112. var tagName = source.substring(tagStart + 2, end).replace(/[ \t\n\r]+$/g, '');
  113. var config = parseStack.pop();
  114. if(end<0){
  115. tagName = source.substring(tagStart+2).replace(/[\s<].*/,'');
  116. errorHandler.error("end tag name: "+tagName+' is not complete:'+config.tagName);
  117. end = tagStart+1+tagName.length;
  118. }else if(tagName.match(/\s</)){
  119. tagName = tagName.replace(/[\s<].*/,'');
  120. errorHandler.error("end tag name: "+tagName+' maybe not complete');
  121. end = tagStart+1+tagName.length;
  122. }
  123. var localNSMap = config.localNSMap;
  124. var endMatch = config.tagName == tagName;
  125. var endIgnoreCaseMach = endMatch || config.tagName&&config.tagName.toLowerCase() == tagName.toLowerCase()
  126. if(endIgnoreCaseMach){
  127. domBuilder.endElement(config.uri,config.localName,tagName);
  128. if(localNSMap){
  129. for(var prefix in localNSMap){
  130. domBuilder.endPrefixMapping(prefix) ;
  131. }
  132. }
  133. if(!endMatch){
  134. errorHandler.fatalError("end tag name: "+tagName+' is not match the current start tagName:'+config.tagName ); // No known test case
  135. }
  136. }else{
  137. parseStack.push(config)
  138. }
  139. end++;
  140. break;
  141. // end elment
  142. case '?':// <?...?>
  143. locator&&position(tagStart);
  144. end = parseInstruction(source,tagStart,domBuilder);
  145. break;
  146. case '!':// <!doctype,<![CDATA,<!--
  147. locator&&position(tagStart);
  148. end = parseDCC(source,tagStart,domBuilder,errorHandler);
  149. break;
  150. default:
  151. locator&&position(tagStart);
  152. var el = new ElementAttributes();
  153. var currentNSMap = parseStack[parseStack.length-1].currentNSMap;
  154. //elStartEnd
  155. var end = parseElementStartPart(source,tagStart,el,currentNSMap,entityReplacer,errorHandler);
  156. var len = el.length;
  157. if(!el.closed && fixSelfClosed(source,end,el.tagName,closeMap)){
  158. el.closed = true;
  159. if(!entityMap.nbsp){
  160. errorHandler.warning('unclosed xml attribute');
  161. }
  162. }
  163. if(locator && len){
  164. var locator2 = copyLocator(locator,{});
  165. //try{//attribute position fixed
  166. for(var i = 0;i<len;i++){
  167. var a = el[i];
  168. position(a.offset);
  169. a.locator = copyLocator(locator,{});
  170. }
  171. domBuilder.locator = locator2
  172. if(appendElement(el,domBuilder,currentNSMap)){
  173. parseStack.push(el)
  174. }
  175. domBuilder.locator = locator;
  176. }else{
  177. if(appendElement(el,domBuilder,currentNSMap)){
  178. parseStack.push(el)
  179. }
  180. }
  181. if (NAMESPACE.isHTML(el.uri) && !el.closed) {
  182. end = parseHtmlSpecialContent(source,end,el.tagName,entityReplacer,domBuilder)
  183. } else {
  184. end++;
  185. }
  186. }
  187. }catch(e){
  188. if (e instanceof ParseError) {
  189. throw e;
  190. }
  191. errorHandler.error('element parse error: '+e)
  192. end = -1;
  193. }
  194. if(end>start){
  195. start = end;
  196. }else{
  197. //TODO: 这里有可能sax回退,有位置错误风险
  198. appendText(Math.max(tagStart,start)+1);
  199. }
  200. }
  201. }
  202. function copyLocator(f,t){
  203. t.lineNumber = f.lineNumber;
  204. t.columnNumber = f.columnNumber;
  205. return t;
  206. }
  207. /**
  208. * @see #appendElement(source,elStartEnd,el,selfClosed,entityReplacer,domBuilder,parseStack);
  209. * @return end of the elementStartPart(end of elementEndPart for selfClosed el)
  210. */
  211. function parseElementStartPart(source,start,el,currentNSMap,entityReplacer,errorHandler){
  212. /**
  213. * @param {string} qname
  214. * @param {string} value
  215. * @param {number} startIndex
  216. */
  217. function addAttribute(qname, value, startIndex) {
  218. if (el.attributeNames.hasOwnProperty(qname)) {
  219. errorHandler.fatalError('Attribute ' + qname + ' redefined')
  220. }
  221. el.addValue(qname, value, startIndex)
  222. }
  223. var attrName;
  224. var value;
  225. var p = ++start;
  226. var s = S_TAG;//status
  227. while(true){
  228. var c = source.charAt(p);
  229. switch(c){
  230. case '=':
  231. if(s === S_ATTR){//attrName
  232. attrName = source.slice(start,p);
  233. s = S_EQ;
  234. }else if(s === S_ATTR_SPACE){
  235. s = S_EQ;
  236. }else{
  237. //fatalError: equal must after attrName or space after attrName
  238. throw new Error('attribute equal must after attrName'); // No known test case
  239. }
  240. break;
  241. case '\'':
  242. case '"':
  243. if(s === S_EQ || s === S_ATTR //|| s == S_ATTR_SPACE
  244. ){//equal
  245. if(s === S_ATTR){
  246. errorHandler.warning('attribute value must after "="')
  247. attrName = source.slice(start,p)
  248. }
  249. start = p+1;
  250. p = source.indexOf(c,start)
  251. if(p>0){
  252. value = source.slice(start,p).replace(/&#?\w+;/g,entityReplacer);
  253. addAttribute(attrName, value, start-1);
  254. s = S_ATTR_END;
  255. }else{
  256. //fatalError: no end quot match
  257. throw new Error('attribute value no end \''+c+'\' match');
  258. }
  259. }else if(s == S_ATTR_NOQUOT_VALUE){
  260. value = source.slice(start,p).replace(/&#?\w+;/g,entityReplacer);
  261. //console.log(attrName,value,start,p)
  262. addAttribute(attrName, value, start);
  263. //console.dir(el)
  264. errorHandler.warning('attribute "'+attrName+'" missed start quot('+c+')!!');
  265. start = p+1;
  266. s = S_ATTR_END
  267. }else{
  268. //fatalError: no equal before
  269. throw new Error('attribute value must after "="'); // No known test case
  270. }
  271. break;
  272. case '/':
  273. switch(s){
  274. case S_TAG:
  275. el.setTagName(source.slice(start,p));
  276. case S_ATTR_END:
  277. case S_TAG_SPACE:
  278. case S_TAG_CLOSE:
  279. s =S_TAG_CLOSE;
  280. el.closed = true;
  281. case S_ATTR_NOQUOT_VALUE:
  282. case S_ATTR:
  283. case S_ATTR_SPACE:
  284. break;
  285. //case S_EQ:
  286. default:
  287. throw new Error("attribute invalid close char('/')") // No known test case
  288. }
  289. break;
  290. case ''://end document
  291. errorHandler.error('unexpected end of input');
  292. if(s == S_TAG){
  293. el.setTagName(source.slice(start,p));
  294. }
  295. return p;
  296. case '>':
  297. switch(s){
  298. case S_TAG:
  299. el.setTagName(source.slice(start,p));
  300. case S_ATTR_END:
  301. case S_TAG_SPACE:
  302. case S_TAG_CLOSE:
  303. break;//normal
  304. case S_ATTR_NOQUOT_VALUE://Compatible state
  305. case S_ATTR:
  306. value = source.slice(start,p);
  307. if(value.slice(-1) === '/'){
  308. el.closed = true;
  309. value = value.slice(0,-1)
  310. }
  311. case S_ATTR_SPACE:
  312. if(s === S_ATTR_SPACE){
  313. value = attrName;
  314. }
  315. if(s == S_ATTR_NOQUOT_VALUE){
  316. errorHandler.warning('attribute "'+value+'" missed quot(")!');
  317. addAttribute(attrName, value.replace(/&#?\w+;/g,entityReplacer), start)
  318. }else{
  319. if(!NAMESPACE.isHTML(currentNSMap['']) || !value.match(/^(?:disabled|checked|selected)$/i)){
  320. errorHandler.warning('attribute "'+value+'" missed value!! "'+value+'" instead!!')
  321. }
  322. addAttribute(value, value, start)
  323. }
  324. break;
  325. case S_EQ:
  326. throw new Error('attribute value missed!!');
  327. }
  328. // console.log(tagName,tagNamePattern,tagNamePattern.test(tagName))
  329. return p;
  330. /*xml space '\x20' | #x9 | #xD | #xA; */
  331. case '\u0080':
  332. c = ' ';
  333. default:
  334. if(c<= ' '){//space
  335. switch(s){
  336. case S_TAG:
  337. el.setTagName(source.slice(start,p));//tagName
  338. s = S_TAG_SPACE;
  339. break;
  340. case S_ATTR:
  341. attrName = source.slice(start,p)
  342. s = S_ATTR_SPACE;
  343. break;
  344. case S_ATTR_NOQUOT_VALUE:
  345. var value = source.slice(start,p).replace(/&#?\w+;/g,entityReplacer);
  346. errorHandler.warning('attribute "'+value+'" missed quot(")!!');
  347. addAttribute(attrName, value, start)
  348. case S_ATTR_END:
  349. s = S_TAG_SPACE;
  350. break;
  351. //case S_TAG_SPACE:
  352. //case S_EQ:
  353. //case S_ATTR_SPACE:
  354. // void();break;
  355. //case S_TAG_CLOSE:
  356. //ignore warning
  357. }
  358. }else{//not space
  359. //S_TAG, S_ATTR, S_EQ, S_ATTR_NOQUOT_VALUE
  360. //S_ATTR_SPACE, S_ATTR_END, S_TAG_SPACE, S_TAG_CLOSE
  361. switch(s){
  362. //case S_TAG:void();break;
  363. //case S_ATTR:void();break;
  364. //case S_ATTR_NOQUOT_VALUE:void();break;
  365. case S_ATTR_SPACE:
  366. var tagName = el.tagName;
  367. if (!NAMESPACE.isHTML(currentNSMap['']) || !attrName.match(/^(?:disabled|checked|selected)$/i)) {
  368. errorHandler.warning('attribute "'+attrName+'" missed value!! "'+attrName+'" instead2!!')
  369. }
  370. addAttribute(attrName, attrName, start);
  371. start = p;
  372. s = S_ATTR;
  373. break;
  374. case S_ATTR_END:
  375. errorHandler.warning('attribute space is required"'+attrName+'"!!')
  376. case S_TAG_SPACE:
  377. s = S_ATTR;
  378. start = p;
  379. break;
  380. case S_EQ:
  381. s = S_ATTR_NOQUOT_VALUE;
  382. start = p;
  383. break;
  384. case S_TAG_CLOSE:
  385. throw new Error("elements closed character '/' and '>' must be connected to");
  386. }
  387. }
  388. }//end outer switch
  389. //console.log('p++',p)
  390. p++;
  391. }
  392. }
  393. /**
  394. * @return true if has new namespace define
  395. */
  396. function appendElement(el,domBuilder,currentNSMap){
  397. var tagName = el.tagName;
  398. var localNSMap = null;
  399. //var currentNSMap = parseStack[parseStack.length-1].currentNSMap;
  400. var i = el.length;
  401. while(i--){
  402. var a = el[i];
  403. var qName = a.qName;
  404. var value = a.value;
  405. var nsp = qName.indexOf(':');
  406. if(nsp>0){
  407. var prefix = a.prefix = qName.slice(0,nsp);
  408. var localName = qName.slice(nsp+1);
  409. var nsPrefix = prefix === 'xmlns' && localName
  410. }else{
  411. localName = qName;
  412. prefix = null
  413. nsPrefix = qName === 'xmlns' && ''
  414. }
  415. //can not set prefix,because prefix !== ''
  416. a.localName = localName ;
  417. //prefix == null for no ns prefix attribute
  418. if(nsPrefix !== false){//hack!!
  419. if(localNSMap == null){
  420. localNSMap = {}
  421. //console.log(currentNSMap,0)
  422. _copy(currentNSMap,currentNSMap={})
  423. //console.log(currentNSMap,1)
  424. }
  425. currentNSMap[nsPrefix] = localNSMap[nsPrefix] = value;
  426. a.uri = NAMESPACE.XMLNS
  427. domBuilder.startPrefixMapping(nsPrefix, value)
  428. }
  429. }
  430. var i = el.length;
  431. while(i--){
  432. a = el[i];
  433. var prefix = a.prefix;
  434. if(prefix){//no prefix attribute has no namespace
  435. if(prefix === 'xml'){
  436. a.uri = NAMESPACE.XML;
  437. }if(prefix !== 'xmlns'){
  438. a.uri = currentNSMap[prefix || '']
  439. //{console.log('###'+a.qName,domBuilder.locator.systemId+'',currentNSMap,a.uri)}
  440. }
  441. }
  442. }
  443. var nsp = tagName.indexOf(':');
  444. if(nsp>0){
  445. prefix = el.prefix = tagName.slice(0,nsp);
  446. localName = el.localName = tagName.slice(nsp+1);
  447. }else{
  448. prefix = null;//important!!
  449. localName = el.localName = tagName;
  450. }
  451. //no prefix element has default namespace
  452. var ns = el.uri = currentNSMap[prefix || ''];
  453. domBuilder.startElement(ns,localName,tagName,el);
  454. //endPrefixMapping and startPrefixMapping have not any help for dom builder
  455. //localNSMap = null
  456. if(el.closed){
  457. domBuilder.endElement(ns,localName,tagName);
  458. if(localNSMap){
  459. for(prefix in localNSMap){
  460. domBuilder.endPrefixMapping(prefix)
  461. }
  462. }
  463. }else{
  464. el.currentNSMap = currentNSMap;
  465. el.localNSMap = localNSMap;
  466. //parseStack.push(el);
  467. return true;
  468. }
  469. }
  470. function parseHtmlSpecialContent(source,elStartEnd,tagName,entityReplacer,domBuilder){
  471. if(/^(?:script|textarea)$/i.test(tagName)){
  472. var elEndStart = source.indexOf('</'+tagName+'>',elStartEnd);
  473. var text = source.substring(elStartEnd+1,elEndStart);
  474. if(/[&<]/.test(text)){
  475. if(/^script$/i.test(tagName)){
  476. //if(!/\]\]>/.test(text)){
  477. //lexHandler.startCDATA();
  478. domBuilder.characters(text,0,text.length);
  479. //lexHandler.endCDATA();
  480. return elEndStart;
  481. //}
  482. }//}else{//text area
  483. text = text.replace(/&#?\w+;/g,entityReplacer);
  484. domBuilder.characters(text,0,text.length);
  485. return elEndStart;
  486. //}
  487. }
  488. }
  489. return elStartEnd+1;
  490. }
  491. function fixSelfClosed(source,elStartEnd,tagName,closeMap){
  492. //if(tagName in closeMap){
  493. var pos = closeMap[tagName];
  494. if(pos == null){
  495. //console.log(tagName)
  496. pos = source.lastIndexOf('</'+tagName+'>')
  497. if(pos<elStartEnd){//忘记闭合
  498. pos = source.lastIndexOf('</'+tagName)
  499. }
  500. closeMap[tagName] =pos
  501. }
  502. return pos<elStartEnd;
  503. //}
  504. }
  505. function _copy(source,target){
  506. for(var n in source){target[n] = source[n]}
  507. }
  508. function parseDCC(source,start,domBuilder,errorHandler){//sure start with '<!'
  509. var next= source.charAt(start+2)
  510. switch(next){
  511. case '-':
  512. if(source.charAt(start + 3) === '-'){
  513. var end = source.indexOf('-->',start+4);
  514. //append comment source.substring(4,end)//<!--
  515. if(end>start){
  516. domBuilder.comment(source,start+4,end-start-4);
  517. return end+3;
  518. }else{
  519. errorHandler.error("Unclosed comment");
  520. return -1;
  521. }
  522. }else{
  523. //error
  524. return -1;
  525. }
  526. default:
  527. if(source.substr(start+3,6) == 'CDATA['){
  528. var end = source.indexOf(']]>',start+9);
  529. domBuilder.startCDATA();
  530. domBuilder.characters(source,start+9,end-start-9);
  531. domBuilder.endCDATA()
  532. return end+3;
  533. }
  534. //<!DOCTYPE
  535. //startDTD(java.lang.String name, java.lang.String publicId, java.lang.String systemId)
  536. var matchs = split(source,start);
  537. var len = matchs.length;
  538. if(len>1 && /!doctype/i.test(matchs[0][0])){
  539. var name = matchs[1][0];
  540. var pubid = false;
  541. var sysid = false;
  542. if(len>3){
  543. if(/^public$/i.test(matchs[2][0])){
  544. pubid = matchs[3][0];
  545. sysid = len>4 && matchs[4][0];
  546. }else if(/^system$/i.test(matchs[2][0])){
  547. sysid = matchs[3][0];
  548. }
  549. }
  550. var lastMatch = matchs[len-1]
  551. domBuilder.startDTD(name, pubid, sysid);
  552. domBuilder.endDTD();
  553. return lastMatch.index+lastMatch[0].length
  554. }
  555. }
  556. return -1;
  557. }
  558. function parseInstruction(source,start,domBuilder){
  559. var end = source.indexOf('?>',start);
  560. if(end){
  561. var match = source.substring(start,end).match(/^<\?(\S*)\s*([\s\S]*?)\s*$/);
  562. if(match){
  563. var len = match[0].length;
  564. domBuilder.processingInstruction(match[1], match[2]) ;
  565. return end+2;
  566. }else{//error
  567. return -1;
  568. }
  569. }
  570. return -1;
  571. }
  572. function ElementAttributes(){
  573. this.attributeNames = {}
  574. }
  575. ElementAttributes.prototype = {
  576. setTagName:function(tagName){
  577. if(!tagNamePattern.test(tagName)){
  578. throw new Error('invalid tagName:'+tagName)
  579. }
  580. this.tagName = tagName
  581. },
  582. addValue:function(qName, value, offset) {
  583. if(!tagNamePattern.test(qName)){
  584. throw new Error('invalid attribute:'+qName)
  585. }
  586. this.attributeNames[qName] = this.length;
  587. this[this.length++] = {qName:qName,value:value,offset:offset}
  588. },
  589. length:0,
  590. getLocalName:function(i){return this[i].localName},
  591. getLocator:function(i){return this[i].locator},
  592. getQName:function(i){return this[i].qName},
  593. getURI:function(i){return this[i].uri},
  594. getValue:function(i){return this[i].value}
  595. // ,getIndex:function(uri, localName)){
  596. // if(localName){
  597. //
  598. // }else{
  599. // var qName = uri
  600. // }
  601. // },
  602. // getValue:function(){return this.getValue(this.getIndex.apply(this,arguments))},
  603. // getType:function(uri,localName){}
  604. // getType:function(i){},
  605. }
  606. function split(source,start){
  607. var match;
  608. var buf = [];
  609. var reg = /'[^']+'|"[^"]+"|[^\s<>\/=]+=?|(\/?\s*>|<)/g;
  610. reg.lastIndex = start;
  611. reg.exec(source);//skip <
  612. while(match = reg.exec(source)){
  613. buf.push(match);
  614. if(match[1])return buf;
  615. }
  616. }
  617. exports.XMLReader = XMLReader;
  618. exports.ParseError = ParseError;