index.d.ts 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. /// <reference types="node" />
  2. import { Transform } from 'node:stream';
  3. import type { Tokenizer, TokenHandler, Token } from 'parse5';
  4. import { ParserFeedbackSimulator } from './parser-feedback-simulator.js';
  5. export interface SAXParserOptions {
  6. /**
  7. * Enables source code location information for tokens.
  8. *
  9. * When enabled, each token will have a `sourceCodeLocation` property.
  10. */
  11. sourceCodeLocationInfo?: boolean;
  12. }
  13. /**
  14. * Streaming [SAX](https://en.wikipedia.org/wiki/Simple_API_for_XML)-style HTML parser.
  15. * A [transform stream](https://nodejs.org/api/stream.html#stream_class_stream_transform) (which means you can pipe _through_ it, see example).
  16. *
  17. * @example
  18. *
  19. * ```js
  20. * const SAXParser = require('parse5-sax-parser');
  21. * const http = require('http');
  22. * const fs = require('fs');
  23. *
  24. * const file = fs.createWriteStream('/home/google.com.html');
  25. * const parser = new SAXParser();
  26. *
  27. * parser.on('text', text => {
  28. * // Handle page text content
  29. * ...
  30. * });
  31. *
  32. * http.get('http://google.com', res => {
  33. * // `SAXParser` is the `Transform` stream, which means you can pipe
  34. * // through it. So, you can analyze the page content and, e.g., save it
  35. * // to the file at the same time:
  36. * res.pipe(parser).pipe(file);
  37. * });
  38. * ```
  39. */
  40. export declare class SAXParser extends Transform implements TokenHandler {
  41. protected options: SAXParserOptions;
  42. protected parserFeedbackSimulator: ParserFeedbackSimulator;
  43. private pendingText;
  44. private lastChunkWritten;
  45. private stopped;
  46. protected tokenizer: Tokenizer;
  47. /**
  48. * @param options Parsing options.
  49. */
  50. constructor(options?: SAXParserOptions);
  51. _transform(chunk: string, _encoding: string, callback: (error?: Error | null, data?: string) => void): void;
  52. _final(callback: (error?: Error | null, data?: string) => void): void;
  53. /**
  54. * Stops parsing. Useful if you want the parser to stop consuming CPU time
  55. * once you've obtained the desired info from the input stream. Doesn't
  56. * prevent piping, so that data will flow through the parser as usual.
  57. *
  58. * @example
  59. *
  60. * ```js
  61. * const SAXParser = require('parse5-sax-parser');
  62. * const http = require('http');
  63. * const fs = require('fs');
  64. *
  65. * const file = fs.createWriteStream('google.com.html');
  66. * const parser = new SAXParser();
  67. *
  68. * parser.on('doctype', ({ name, publicId, systemId }) => {
  69. * // Process doctype info and stop parsing
  70. * ...
  71. * parser.stop();
  72. * });
  73. *
  74. * http.get('http://google.com', res => {
  75. * // Despite the fact that parser.stop() was called whole
  76. * // content of the page will be written to the file
  77. * res.pipe(parser).pipe(file);
  78. * });
  79. * ```
  80. */
  81. stop(): void;
  82. protected _transformChunk(chunk: string): string;
  83. /** @internal */
  84. onCharacter({ chars, location }: Token.CharacterToken): void;
  85. /** @internal */
  86. onWhitespaceCharacter(token: Token.CharacterToken): void;
  87. /** @internal */
  88. onNullCharacter(token: Token.CharacterToken): void;
  89. /** @internal */
  90. onEof(): void;
  91. /** @internal */
  92. onStartTag(token: Token.TagToken): void;
  93. /** @internal */
  94. onEndTag(token: Token.TagToken): void;
  95. /** @internal */
  96. onDoctype(token: Token.DoctypeToken): void;
  97. /** @internal */
  98. onComment(token: Token.CommentToken): void;
  99. protected emitIfListenerExists(eventName: string, token: SaxToken): boolean;
  100. protected _emitToken(eventName: string, token: SaxToken): void;
  101. private _emitPendingText;
  102. }
  103. export interface SaxToken {
  104. /** Source code location info. Available if location info is enabled via {@link SAXParserOptions}. */
  105. sourceCodeLocation?: Token.Location | null;
  106. }
  107. export interface StartTag extends SaxToken {
  108. /** Tag name */
  109. tagName: string;
  110. /** List of attributes */
  111. attrs: Token.Attribute[];
  112. /** Indicates if the tag is self-closing */
  113. selfClosing: boolean;
  114. }
  115. export interface EndTag extends SaxToken {
  116. /** Tag name */
  117. tagName: string;
  118. }
  119. export interface Text extends SaxToken {
  120. /** Text content. */
  121. text: string;
  122. }
  123. export interface Comment extends SaxToken {
  124. /** Comment text. */
  125. text: string;
  126. }
  127. export interface Doctype extends SaxToken {
  128. /** Document type name. */
  129. name: string | null;
  130. /** Document type public identifier. */
  131. publicId: string | null;
  132. /** Document type system identifier. */
  133. systemId: string | null;
  134. }
  135. export interface SAXParser {
  136. /** Raised when the parser encounters a start tag. */
  137. on(event: 'startTag', listener: (startTag: StartTag) => void): this;
  138. /** Raised when the parser encounters an end tag. */
  139. on(event: 'endTag', listener: (endTag: EndTag) => void): this;
  140. /** Raised when the parser encounters a comment. */
  141. on(event: 'comment', listener: (comment: Comment) => void): this;
  142. /** Raised when the parser encounters text content. */
  143. on(event: 'text', listener: (text: Text) => void): this;
  144. /** Raised when the parser encounters a [document type declaration](https://en.wikipedia.org/wiki/Document_type_declaration) */
  145. on(event: 'doctype', listener: (doctype: Doctype) => void): this;
  146. /**
  147. * Base event handler.
  148. *
  149. * @param event Name of the event
  150. * @param handler Event handler
  151. */
  152. on(event: string, handler: (...args: any[]) => void): this;
  153. }
  154. //# sourceMappingURL=index.d.ts.map