Parser.js 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371
  1. "use strict";
  2. var __extends = (this && this.__extends) || (function () {
  3. var extendStatics = function (d, b) {
  4. extendStatics = Object.setPrototypeOf ||
  5. ({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||
  6. function (d, b) { for (var p in b) if (b.hasOwnProperty(p)) d[p] = b[p]; };
  7. return extendStatics(d, b);
  8. };
  9. return function (d, b) {
  10. extendStatics(d, b);
  11. function __() { this.constructor = d; }
  12. d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());
  13. };
  14. })();
  15. var __importDefault = (this && this.__importDefault) || function (mod) {
  16. return (mod && mod.__esModule) ? mod : { "default": mod };
  17. };
  18. Object.defineProperty(exports, "__esModule", { value: true });
  19. var Tokenizer_1 = __importDefault(require("./Tokenizer"));
  20. var events_1 = require("events");
  21. var formTags = new Set([
  22. "input",
  23. "option",
  24. "optgroup",
  25. "select",
  26. "button",
  27. "datalist",
  28. "textarea"
  29. ]);
  30. var pTag = new Set(["p"]);
  31. var openImpliesClose = {
  32. tr: new Set(["tr", "th", "td"]),
  33. th: new Set(["th"]),
  34. td: new Set(["thead", "th", "td"]),
  35. body: new Set(["head", "link", "script"]),
  36. li: new Set(["li"]),
  37. p: pTag,
  38. h1: pTag,
  39. h2: pTag,
  40. h3: pTag,
  41. h4: pTag,
  42. h5: pTag,
  43. h6: pTag,
  44. select: formTags,
  45. input: formTags,
  46. output: formTags,
  47. button: formTags,
  48. datalist: formTags,
  49. textarea: formTags,
  50. option: new Set(["option"]),
  51. optgroup: new Set(["optgroup", "option"]),
  52. dd: new Set(["dt", "dd"]),
  53. dt: new Set(["dt", "dd"]),
  54. address: pTag,
  55. article: pTag,
  56. aside: pTag,
  57. blockquote: pTag,
  58. details: pTag,
  59. div: pTag,
  60. dl: pTag,
  61. fieldset: pTag,
  62. figcaption: pTag,
  63. figure: pTag,
  64. footer: pTag,
  65. form: pTag,
  66. header: pTag,
  67. hr: pTag,
  68. main: pTag,
  69. nav: pTag,
  70. ol: pTag,
  71. pre: pTag,
  72. section: pTag,
  73. table: pTag,
  74. ul: pTag,
  75. rt: new Set(["rt", "rp"]),
  76. rp: new Set(["rt", "rp"]),
  77. tbody: new Set(["thead", "tbody"]),
  78. tfoot: new Set(["thead", "tbody"])
  79. };
  80. var voidElements = new Set([
  81. "area",
  82. "base",
  83. "basefont",
  84. "br",
  85. "col",
  86. "command",
  87. "embed",
  88. "frame",
  89. "hr",
  90. "img",
  91. "input",
  92. "isindex",
  93. "keygen",
  94. "link",
  95. "meta",
  96. "param",
  97. "source",
  98. "track",
  99. "wbr"
  100. ]);
  101. var foreignContextElements = new Set(["math", "svg"]);
  102. var htmlIntegrationElements = new Set([
  103. "mi",
  104. "mo",
  105. "mn",
  106. "ms",
  107. "mtext",
  108. "annotation-xml",
  109. "foreignObject",
  110. "desc",
  111. "title"
  112. ]);
  113. var reNameEnd = /\s|\//;
  114. var Parser = /** @class */ (function (_super) {
  115. __extends(Parser, _super);
  116. function Parser(cbs, options) {
  117. var _this = _super.call(this) || this;
  118. _this._tagname = "";
  119. _this._attribname = "";
  120. _this._attribvalue = "";
  121. _this._attribs = null;
  122. _this._stack = [];
  123. _this._foreignContext = [];
  124. _this.startIndex = 0;
  125. _this.endIndex = null;
  126. // Aliases for backwards compatibility
  127. _this.parseChunk = Parser.prototype.write;
  128. _this.done = Parser.prototype.end;
  129. _this._options = options || {};
  130. _this._cbs = cbs || {};
  131. _this._tagname = "";
  132. _this._attribname = "";
  133. _this._attribvalue = "";
  134. _this._attribs = null;
  135. _this._stack = [];
  136. _this._foreignContext = [];
  137. _this.startIndex = 0;
  138. _this.endIndex = null;
  139. _this._lowerCaseTagNames =
  140. "lowerCaseTags" in _this._options
  141. ? !!_this._options.lowerCaseTags
  142. : !_this._options.xmlMode;
  143. _this._lowerCaseAttributeNames =
  144. "lowerCaseAttributeNames" in _this._options
  145. ? !!_this._options.lowerCaseAttributeNames
  146. : !_this._options.xmlMode;
  147. _this._tokenizer = new (_this._options.Tokenizer || Tokenizer_1.default)(_this._options, _this);
  148. if (_this._cbs.onparserinit)
  149. _this._cbs.onparserinit(_this);
  150. return _this;
  151. }
  152. Parser.prototype._updatePosition = function (initialOffset) {
  153. if (this.endIndex === null) {
  154. if (this._tokenizer._sectionStart <= initialOffset) {
  155. this.startIndex = 0;
  156. }
  157. else {
  158. this.startIndex = this._tokenizer._sectionStart - initialOffset;
  159. }
  160. }
  161. else
  162. this.startIndex = this.endIndex + 1;
  163. this.endIndex = this._tokenizer.getAbsoluteIndex();
  164. };
  165. //Tokenizer event handlers
  166. Parser.prototype.ontext = function (data) {
  167. this._updatePosition(1);
  168. // @ts-ignore
  169. this.endIndex--;
  170. if (this._cbs.ontext)
  171. this._cbs.ontext(data);
  172. };
  173. Parser.prototype.onopentagname = function (name) {
  174. if (this._lowerCaseTagNames) {
  175. name = name.toLowerCase();
  176. }
  177. this._tagname = name;
  178. if (!this._options.xmlMode &&
  179. Object.prototype.hasOwnProperty.call(openImpliesClose, name)) {
  180. for (var el = void 0;
  181. // @ts-ignore
  182. openImpliesClose[name].has((el = this._stack[this._stack.length - 1])); this.onclosetag(el))
  183. ;
  184. }
  185. if (this._options.xmlMode || !voidElements.has(name)) {
  186. this._stack.push(name);
  187. if (foreignContextElements.has(name)) {
  188. this._foreignContext.push(true);
  189. }
  190. else if (htmlIntegrationElements.has(name)) {
  191. this._foreignContext.push(false);
  192. }
  193. }
  194. if (this._cbs.onopentagname)
  195. this._cbs.onopentagname(name);
  196. if (this._cbs.onopentag)
  197. this._attribs = {};
  198. };
  199. Parser.prototype.onopentagend = function () {
  200. this._updatePosition(1);
  201. if (this._attribs) {
  202. if (this._cbs.onopentag) {
  203. this._cbs.onopentag(this._tagname, this._attribs);
  204. }
  205. this._attribs = null;
  206. }
  207. if (!this._options.xmlMode &&
  208. this._cbs.onclosetag &&
  209. voidElements.has(this._tagname)) {
  210. this._cbs.onclosetag(this._tagname);
  211. }
  212. this._tagname = "";
  213. };
  214. Parser.prototype.onclosetag = function (name) {
  215. this._updatePosition(1);
  216. if (this._lowerCaseTagNames) {
  217. name = name.toLowerCase();
  218. }
  219. if (foreignContextElements.has(name) ||
  220. htmlIntegrationElements.has(name)) {
  221. this._foreignContext.pop();
  222. }
  223. if (this._stack.length &&
  224. (this._options.xmlMode || !voidElements.has(name))) {
  225. var pos = this._stack.lastIndexOf(name);
  226. if (pos !== -1) {
  227. if (this._cbs.onclosetag) {
  228. pos = this._stack.length - pos;
  229. // @ts-ignore
  230. while (pos--)
  231. this._cbs.onclosetag(this._stack.pop());
  232. }
  233. else
  234. this._stack.length = pos;
  235. }
  236. else if (name === "p" && !this._options.xmlMode) {
  237. this.onopentagname(name);
  238. this._closeCurrentTag();
  239. }
  240. }
  241. else if (!this._options.xmlMode && (name === "br" || name === "p")) {
  242. this.onopentagname(name);
  243. this._closeCurrentTag();
  244. }
  245. };
  246. Parser.prototype.onselfclosingtag = function () {
  247. if (this._options.xmlMode ||
  248. this._options.recognizeSelfClosing ||
  249. this._foreignContext[this._foreignContext.length - 1]) {
  250. this._closeCurrentTag();
  251. }
  252. else {
  253. this.onopentagend();
  254. }
  255. };
  256. Parser.prototype._closeCurrentTag = function () {
  257. var name = this._tagname;
  258. this.onopentagend();
  259. //self-closing tags will be on the top of the stack
  260. //(cheaper check than in onclosetag)
  261. if (this._stack[this._stack.length - 1] === name) {
  262. if (this._cbs.onclosetag) {
  263. this._cbs.onclosetag(name);
  264. }
  265. this._stack.pop();
  266. }
  267. };
  268. Parser.prototype.onattribname = function (name) {
  269. if (this._lowerCaseAttributeNames) {
  270. name = name.toLowerCase();
  271. }
  272. this._attribname = name;
  273. };
  274. Parser.prototype.onattribdata = function (value) {
  275. this._attribvalue += value;
  276. };
  277. Parser.prototype.onattribend = function () {
  278. if (this._cbs.onattribute)
  279. this._cbs.onattribute(this._attribname, this._attribvalue);
  280. if (this._attribs &&
  281. !Object.prototype.hasOwnProperty.call(this._attribs, this._attribname)) {
  282. this._attribs[this._attribname] = this._attribvalue;
  283. }
  284. this._attribname = "";
  285. this._attribvalue = "";
  286. };
  287. Parser.prototype._getInstructionName = function (value) {
  288. var idx = value.search(reNameEnd);
  289. var name = idx < 0 ? value : value.substr(0, idx);
  290. if (this._lowerCaseTagNames) {
  291. name = name.toLowerCase();
  292. }
  293. return name;
  294. };
  295. Parser.prototype.ondeclaration = function (value) {
  296. if (this._cbs.onprocessinginstruction) {
  297. var name_1 = this._getInstructionName(value);
  298. this._cbs.onprocessinginstruction("!" + name_1, "!" + value);
  299. }
  300. };
  301. Parser.prototype.onprocessinginstruction = function (value) {
  302. if (this._cbs.onprocessinginstruction) {
  303. var name_2 = this._getInstructionName(value);
  304. this._cbs.onprocessinginstruction("?" + name_2, "?" + value);
  305. }
  306. };
  307. Parser.prototype.oncomment = function (value) {
  308. this._updatePosition(4);
  309. if (this._cbs.oncomment)
  310. this._cbs.oncomment(value);
  311. if (this._cbs.oncommentend)
  312. this._cbs.oncommentend();
  313. };
  314. Parser.prototype.oncdata = function (value) {
  315. this._updatePosition(1);
  316. if (this._options.xmlMode || this._options.recognizeCDATA) {
  317. if (this._cbs.oncdatastart)
  318. this._cbs.oncdatastart();
  319. if (this._cbs.ontext)
  320. this._cbs.ontext(value);
  321. if (this._cbs.oncdataend)
  322. this._cbs.oncdataend();
  323. }
  324. else {
  325. this.oncomment("[CDATA[" + value + "]]");
  326. }
  327. };
  328. Parser.prototype.onerror = function (err) {
  329. if (this._cbs.onerror)
  330. this._cbs.onerror(err);
  331. };
  332. Parser.prototype.onend = function () {
  333. if (this._cbs.onclosetag) {
  334. for (var i = this._stack.length; i > 0; this._cbs.onclosetag(this._stack[--i]))
  335. ;
  336. }
  337. if (this._cbs.onend)
  338. this._cbs.onend();
  339. };
  340. //Resets the parser to a blank state, ready to parse a new HTML document
  341. Parser.prototype.reset = function () {
  342. if (this._cbs.onreset)
  343. this._cbs.onreset();
  344. this._tokenizer.reset();
  345. this._tagname = "";
  346. this._attribname = "";
  347. this._attribs = null;
  348. this._stack = [];
  349. if (this._cbs.onparserinit)
  350. this._cbs.onparserinit(this);
  351. };
  352. //Parses a complete HTML document and pushes it to the handler
  353. Parser.prototype.parseComplete = function (data) {
  354. this.reset();
  355. this.end(data);
  356. };
  357. Parser.prototype.write = function (chunk) {
  358. this._tokenizer.write(chunk);
  359. };
  360. Parser.prototype.end = function (chunk) {
  361. this._tokenizer.end(chunk);
  362. };
  363. Parser.prototype.pause = function () {
  364. this._tokenizer.pause();
  365. };
  366. Parser.prototype.resume = function () {
  367. this._tokenizer.resume();
  368. };
  369. return Parser;
  370. }(events_1.EventEmitter));
  371. exports.Parser = Parser;