parser-feedback-simulator.js 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
  1. import { Tokenizer, TokenizerMode, Token, foreignContent, html, } from 'parse5';
  2. const $ = html.TAG_ID;
  3. const REPLACEMENT_CHARACTER = '\uFFFD';
  4. const LINE_FEED_CODE_POINT = 0x0a;
  5. /**
  6. * Simulates adjustments of the Tokenizer which are performed by the standard parser during tree construction.
  7. */
  8. export class ParserFeedbackSimulator {
  9. constructor(options, handler) {
  10. this.handler = handler;
  11. this.namespaceStack = [];
  12. this.inForeignContent = false;
  13. this.skipNextNewLine = false;
  14. this.tokenizer = new Tokenizer(options, this);
  15. this._enterNamespace(html.NS.HTML);
  16. }
  17. /** @internal */
  18. onNullCharacter(token) {
  19. this.skipNextNewLine = false;
  20. if (this.inForeignContent) {
  21. this.handler.onCharacter({
  22. type: Token.TokenType.CHARACTER,
  23. chars: REPLACEMENT_CHARACTER,
  24. location: token.location,
  25. });
  26. }
  27. else {
  28. this.handler.onNullCharacter(token);
  29. }
  30. }
  31. /** @internal */
  32. onWhitespaceCharacter(token) {
  33. if (this.skipNextNewLine && token.chars.charCodeAt(0) === LINE_FEED_CODE_POINT) {
  34. this.skipNextNewLine = false;
  35. if (token.chars.length === 1) {
  36. return;
  37. }
  38. token.chars = token.chars.substr(1);
  39. }
  40. this.handler.onWhitespaceCharacter(token);
  41. }
  42. /** @internal */
  43. onCharacter(token) {
  44. this.skipNextNewLine = false;
  45. this.handler.onCharacter(token);
  46. }
  47. /** @internal */
  48. onComment(token) {
  49. this.skipNextNewLine = false;
  50. this.handler.onComment(token);
  51. }
  52. /** @internal */
  53. onDoctype(token) {
  54. this.skipNextNewLine = false;
  55. this.handler.onDoctype(token);
  56. }
  57. /** @internal */
  58. onEof(token) {
  59. this.skipNextNewLine = false;
  60. this.handler.onEof(token);
  61. }
  62. //Namespace stack mutations
  63. _enterNamespace(namespace) {
  64. this.namespaceStack.unshift(namespace);
  65. this.inForeignContent = namespace !== html.NS.HTML;
  66. this.tokenizer.inForeignNode = this.inForeignContent;
  67. }
  68. _leaveCurrentNamespace() {
  69. this.namespaceStack.shift();
  70. this.inForeignContent = this.namespaceStack[0] !== html.NS.HTML;
  71. this.tokenizer.inForeignNode = this.inForeignContent;
  72. }
  73. //Token handlers
  74. _ensureTokenizerMode(tn) {
  75. switch (tn) {
  76. case $.TEXTAREA:
  77. case $.TITLE: {
  78. this.tokenizer.state = TokenizerMode.RCDATA;
  79. break;
  80. }
  81. case $.PLAINTEXT: {
  82. this.tokenizer.state = TokenizerMode.PLAINTEXT;
  83. break;
  84. }
  85. case $.SCRIPT: {
  86. this.tokenizer.state = TokenizerMode.SCRIPT_DATA;
  87. break;
  88. }
  89. case $.STYLE:
  90. case $.IFRAME:
  91. case $.XMP:
  92. case $.NOEMBED:
  93. case $.NOFRAMES:
  94. case $.NOSCRIPT: {
  95. this.tokenizer.state = TokenizerMode.RAWTEXT;
  96. break;
  97. }
  98. default:
  99. // Do nothing
  100. }
  101. }
  102. /** @internal */
  103. onStartTag(token) {
  104. let tn = token.tagID;
  105. switch (tn) {
  106. case $.SVG: {
  107. this._enterNamespace(html.NS.SVG);
  108. break;
  109. }
  110. case $.MATH: {
  111. this._enterNamespace(html.NS.MATHML);
  112. break;
  113. }
  114. default:
  115. // Do nothing
  116. }
  117. if (this.inForeignContent) {
  118. if (foreignContent.causesExit(token)) {
  119. this._leaveCurrentNamespace();
  120. }
  121. else {
  122. const currentNs = this.namespaceStack[0];
  123. if (currentNs === html.NS.MATHML) {
  124. foreignContent.adjustTokenMathMLAttrs(token);
  125. }
  126. else if (currentNs === html.NS.SVG) {
  127. foreignContent.adjustTokenSVGTagName(token);
  128. foreignContent.adjustTokenSVGAttrs(token);
  129. }
  130. foreignContent.adjustTokenXMLAttrs(token);
  131. tn = token.tagID;
  132. if (!token.selfClosing && foreignContent.isIntegrationPoint(tn, currentNs, token.attrs)) {
  133. this._enterNamespace(html.NS.HTML);
  134. }
  135. }
  136. }
  137. else {
  138. switch (tn) {
  139. case $.PRE:
  140. case $.TEXTAREA:
  141. case $.LISTING: {
  142. this.skipNextNewLine = true;
  143. break;
  144. }
  145. case $.IMAGE: {
  146. token.tagName = html.TAG_NAMES.IMG;
  147. token.tagID = $.IMG;
  148. break;
  149. }
  150. default:
  151. // Do nothing
  152. }
  153. this._ensureTokenizerMode(tn);
  154. }
  155. this.handler.onStartTag(token);
  156. }
  157. /** @internal */
  158. onEndTag(token) {
  159. let tn = token.tagID;
  160. if (!this.inForeignContent) {
  161. const previousNs = this.namespaceStack[1];
  162. if (previousNs === html.NS.SVG) {
  163. const adjustedTagName = foreignContent.SVG_TAG_NAMES_ADJUSTMENT_MAP.get(token.tagName);
  164. if (adjustedTagName) {
  165. tn = html.getTagID(adjustedTagName);
  166. }
  167. }
  168. //NOTE: check for exit from integration point
  169. if (foreignContent.isIntegrationPoint(tn, previousNs, token.attrs)) {
  170. this._leaveCurrentNamespace();
  171. }
  172. }
  173. else if ((tn === $.SVG && this.namespaceStack[0] === html.NS.SVG) ||
  174. (tn === $.MATH && this.namespaceStack[0] === html.NS.MATHML)) {
  175. this._leaveCurrentNamespace();
  176. }
  177. // NOTE: adjust end tag name as well for consistency
  178. if (this.namespaceStack[0] === html.NS.SVG) {
  179. foreignContent.adjustTokenSVGTagName(token);
  180. }
  181. this.handler.onEndTag(token);
  182. }
  183. }
  184. //# sourceMappingURL=parser-feedback-simulator.js.map