141 lines
4.6 KiB
JavaScript
141 lines
4.6 KiB
JavaScript
'use strict';
|
|
|
|
var WritableStream = require('stream').Writable,
|
|
inherits = require('util').inherits,
|
|
Parser = require('./index');
|
|
|
|
/**
|
|
* Streaming HTML parser with scripting support.
|
|
* A [writable stream]{@link https://nodejs.org/api/stream.html#stream_class_stream_writable}.
|
|
* @class ParserStream
|
|
* @memberof parse5
|
|
* @instance
|
|
* @extends stream.Writable
|
|
* @param {ParserOptions} options - Parsing options.
|
|
* @example
|
|
* var parse5 = require('parse5');
|
|
* var http = require('http');
|
|
*
|
|
* // Fetch the google.com content and obtain it's <body> node
|
|
* http.get('http://google.com', function(res) {
|
|
* var parser = new parse5.ParserStream();
|
|
*
|
|
* parser.on('finish', function() {
|
|
* var body = parser.document.childNodes[0].childNodes[1];
|
|
* });
|
|
*
|
|
* res.pipe(parser);
|
|
* });
|
|
*/
|
|
var ParserStream = module.exports = function (options) {
|
|
WritableStream.call(this);
|
|
|
|
this.parser = new Parser(options);
|
|
|
|
this.lastChunkWritten = false;
|
|
this.writeCallback = null;
|
|
this.pausedByScript = false;
|
|
|
|
/**
|
|
* The resulting document node.
|
|
* @member {ASTNode<document>} document
|
|
* @memberof parse5#ParserStream
|
|
* @instance
|
|
*/
|
|
this.document = this.parser.treeAdapter.createDocument();
|
|
|
|
this.pendingHtmlInsertions = [];
|
|
|
|
this._resume = this._resume.bind(this);
|
|
this._documentWrite = this._documentWrite.bind(this);
|
|
this._scriptHandler = this._scriptHandler.bind(this);
|
|
|
|
this.parser._bootstrap(this.document, null);
|
|
};
|
|
|
|
inherits(ParserStream, WritableStream);
|
|
|
|
//WritableStream implementation
|
|
ParserStream.prototype._write = function (chunk, encoding, callback) {
|
|
this.writeCallback = callback;
|
|
this.parser.tokenizer.write(chunk.toString('utf8'), this.lastChunkWritten);
|
|
this._runParsingLoop();
|
|
};
|
|
|
|
ParserStream.prototype.end = function (chunk, encoding, callback) {
|
|
this.lastChunkWritten = true;
|
|
WritableStream.prototype.end.call(this, chunk, encoding, callback);
|
|
};
|
|
|
|
//Scriptable parser implementation
|
|
ParserStream.prototype._runParsingLoop = function () {
|
|
this.parser._runParsingLoop(this.writeCallback, this._scriptHandler);
|
|
};
|
|
|
|
ParserStream.prototype._resume = function () {
|
|
if (!this.pausedByScript)
|
|
throw new Error('Parser was already resumed');
|
|
|
|
while (this.pendingHtmlInsertions.length) {
|
|
var html = this.pendingHtmlInsertions.pop();
|
|
|
|
this.parser.tokenizer.insertHtmlAtCurrentPos(html);
|
|
}
|
|
|
|
this.pausedByScript = false;
|
|
|
|
//NOTE: keep parsing if we don't wait for the next input chunk
|
|
if (this.parser.tokenizer.active)
|
|
this._runParsingLoop();
|
|
};
|
|
|
|
ParserStream.prototype._documentWrite = function (html) {
|
|
if (!this.parser.stopped)
|
|
this.pendingHtmlInsertions.push(html);
|
|
};
|
|
|
|
ParserStream.prototype._scriptHandler = function (scriptElement) {
|
|
if (this.listeners('script').length) {
|
|
this.pausedByScript = true;
|
|
|
|
/**
|
|
* Raised then parser encounters a `<script>` element.
|
|
* If this event has listeners, parsing will be suspended once it is emitted.
|
|
* So, if `<script>` has the `src` attribute, you can fetch it, execute and then resume parsing just like browsers do.
|
|
* @event script
|
|
* @memberof parse5#ParserStream
|
|
* @instance
|
|
* @type {Function}
|
|
* @param {ASTNode} scriptElement - The script element that caused the event.
|
|
* @param {Function} documentWrite(html) - Write additional `html` at the current parsing position.
|
|
* Suitable for implementing the DOM `document.write` and `document.writeln` methods.
|
|
* @param {Function} resume - Resumes parsing.
|
|
* @example
|
|
* var parse = require('parse5');
|
|
* var http = require('http');
|
|
*
|
|
* var parser = new parse5.ParserStream();
|
|
*
|
|
* parser.on('script', function(scriptElement, documentWrite, resume) {
|
|
* var src = parse5.treeAdapters.default.getAttrList(scriptElement)[0].value;
|
|
*
|
|
* http.get(src, function(res) {
|
|
* // Fetch the script content, execute it with DOM built around `parser.document` and
|
|
* // `document.write` implemented using `documentWrite`.
|
|
* ...
|
|
* // Then resume parsing.
|
|
* resume();
|
|
* });
|
|
* });
|
|
*
|
|
* parser.end('<script src="example.com/script.js"></script>');
|
|
*/
|
|
|
|
|
|
this.emit('script', scriptElement, this._documentWrite, this._resume);
|
|
}
|
|
else
|
|
this._runParsingLoop();
|
|
};
|
|
|