123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595 |
- // this[BUFFER] is the remainder of a chunk if we're waiting for
- // the full 512 bytes of a header to come in. We will Buffer.concat()
- // it to the next write(), which is a mem copy, but a small one.
- //
- // this[QUEUE] is a Yallist of entries that haven't been emitted
- // yet this can only get filled up if the user keeps write()ing after
- // a write() returns false, or does a write() with more than one entry
- //
- // We don't buffer chunks, we always parse them and either create an
- // entry, or push it into the active entry. The ReadEntry class knows
- // to throw data away if .ignore=true
- //
- // Shift entry off the buffer when it emits 'end', and emit 'entry' for
- // the next one in the list.
- //
- // At any time, we're pushing body chunks into the entry at WRITEENTRY,
- // and waiting for 'end' on the entry at READENTRY
- //
- // ignored entries get .resume() called on them straight away
- import { EventEmitter as EE } from 'events';
- import { BrotliDecompress, Unzip } from 'minizlib';
- import { Yallist } from 'yallist';
- import { Header } from './header.js';
- import { Pax } from './pax.js';
- import { ReadEntry } from './read-entry.js';
- import { warnMethod, } from './warn-method.js';
- const maxMetaEntrySize = 1024 * 1024;
- const gzipHeader = Buffer.from([0x1f, 0x8b]);
- const STATE = Symbol('state');
- const WRITEENTRY = Symbol('writeEntry');
- const READENTRY = Symbol('readEntry');
- const NEXTENTRY = Symbol('nextEntry');
- const PROCESSENTRY = Symbol('processEntry');
- const EX = Symbol('extendedHeader');
- const GEX = Symbol('globalExtendedHeader');
- const META = Symbol('meta');
- const EMITMETA = Symbol('emitMeta');
- const BUFFER = Symbol('buffer');
- const QUEUE = Symbol('queue');
- const ENDED = Symbol('ended');
- const EMITTEDEND = Symbol('emittedEnd');
- const EMIT = Symbol('emit');
- const UNZIP = Symbol('unzip');
- const CONSUMECHUNK = Symbol('consumeChunk');
- const CONSUMECHUNKSUB = Symbol('consumeChunkSub');
- const CONSUMEBODY = Symbol('consumeBody');
- const CONSUMEMETA = Symbol('consumeMeta');
- const CONSUMEHEADER = Symbol('consumeHeader');
- const CONSUMING = Symbol('consuming');
- const BUFFERCONCAT = Symbol('bufferConcat');
- const MAYBEEND = Symbol('maybeEnd');
- const WRITING = Symbol('writing');
- const ABORTED = Symbol('aborted');
- const DONE = Symbol('onDone');
- const SAW_VALID_ENTRY = Symbol('sawValidEntry');
- const SAW_NULL_BLOCK = Symbol('sawNullBlock');
- const SAW_EOF = Symbol('sawEOF');
- const CLOSESTREAM = Symbol('closeStream');
- const noop = () => true;
- export class Parser extends EE {
- file;
- strict;
- maxMetaEntrySize;
- filter;
- brotli;
- writable = true;
- readable = false;
- [QUEUE] = new Yallist();
- [BUFFER];
- [READENTRY];
- [WRITEENTRY];
- [STATE] = 'begin';
- [META] = '';
- [EX];
- [GEX];
- [ENDED] = false;
- [UNZIP];
- [ABORTED] = false;
- [SAW_VALID_ENTRY];
- [SAW_NULL_BLOCK] = false;
- [SAW_EOF] = false;
- [WRITING] = false;
- [CONSUMING] = false;
- [EMITTEDEND] = false;
- constructor(opt = {}) {
- super();
- this.file = opt.file || '';
- // these BADARCHIVE errors can't be detected early. listen on DONE.
- this.on(DONE, () => {
- if (this[STATE] === 'begin' ||
- this[SAW_VALID_ENTRY] === false) {
- // either less than 1 block of data, or all entries were invalid.
- // Either way, probably not even a tarball.
- this.warn('TAR_BAD_ARCHIVE', 'Unrecognized archive format');
- }
- });
- if (opt.ondone) {
- this.on(DONE, opt.ondone);
- }
- else {
- this.on(DONE, () => {
- this.emit('prefinish');
- this.emit('finish');
- this.emit('end');
- });
- }
- this.strict = !!opt.strict;
- this.maxMetaEntrySize = opt.maxMetaEntrySize || maxMetaEntrySize;
- this.filter = typeof opt.filter === 'function' ? opt.filter : noop;
- // Unlike gzip, brotli doesn't have any magic bytes to identify it
- // Users need to explicitly tell us they're extracting a brotli file
- // Or we infer from the file extension
- const isTBR = opt.file &&
- (opt.file.endsWith('.tar.br') || opt.file.endsWith('.tbr'));
- // if it's a tbr file it MIGHT be brotli, but we don't know until
- // we look at it and verify it's not a valid tar file.
- this.brotli =
- !opt.gzip && opt.brotli !== undefined ? opt.brotli
- : isTBR ? undefined
- : false;
- // have to set this so that streams are ok piping into it
- this.on('end', () => this[CLOSESTREAM]());
- if (typeof opt.onwarn === 'function') {
- this.on('warn', opt.onwarn);
- }
- if (typeof opt.onReadEntry === 'function') {
- this.on('entry', opt.onReadEntry);
- }
- }
- warn(code, message, data = {}) {
- warnMethod(this, code, message, data);
- }
- [CONSUMEHEADER](chunk, position) {
- if (this[SAW_VALID_ENTRY] === undefined) {
- this[SAW_VALID_ENTRY] = false;
- }
- let header;
- try {
- header = new Header(chunk, position, this[EX], this[GEX]);
- }
- catch (er) {
- return this.warn('TAR_ENTRY_INVALID', er);
- }
- if (header.nullBlock) {
- if (this[SAW_NULL_BLOCK]) {
- this[SAW_EOF] = true;
- // ending an archive with no entries. pointless, but legal.
- if (this[STATE] === 'begin') {
- this[STATE] = 'header';
- }
- this[EMIT]('eof');
- }
- else {
- this[SAW_NULL_BLOCK] = true;
- this[EMIT]('nullBlock');
- }
- }
- else {
- this[SAW_NULL_BLOCK] = false;
- if (!header.cksumValid) {
- this.warn('TAR_ENTRY_INVALID', 'checksum failure', { header });
- }
- else if (!header.path) {
- this.warn('TAR_ENTRY_INVALID', 'path is required', { header });
- }
- else {
- const type = header.type;
- if (/^(Symbolic)?Link$/.test(type) && !header.linkpath) {
- this.warn('TAR_ENTRY_INVALID', 'linkpath required', {
- header,
- });
- }
- else if (!/^(Symbolic)?Link$/.test(type) &&
- !/^(Global)?ExtendedHeader$/.test(type) &&
- header.linkpath) {
- this.warn('TAR_ENTRY_INVALID', 'linkpath forbidden', {
- header,
- });
- }
- else {
- const entry = (this[WRITEENTRY] = new ReadEntry(header, this[EX], this[GEX]));
- // we do this for meta & ignored entries as well, because they
- // are still valid tar, or else we wouldn't know to ignore them
- if (!this[SAW_VALID_ENTRY]) {
- if (entry.remain) {
- // this might be the one!
- const onend = () => {
- if (!entry.invalid) {
- this[SAW_VALID_ENTRY] = true;
- }
- };
- entry.on('end', onend);
- }
- else {
- this[SAW_VALID_ENTRY] = true;
- }
- }
- if (entry.meta) {
- if (entry.size > this.maxMetaEntrySize) {
- entry.ignore = true;
- this[EMIT]('ignoredEntry', entry);
- this[STATE] = 'ignore';
- entry.resume();
- }
- else if (entry.size > 0) {
- this[META] = '';
- entry.on('data', c => (this[META] += c));
- this[STATE] = 'meta';
- }
- }
- else {
- this[EX] = undefined;
- entry.ignore =
- entry.ignore || !this.filter(entry.path, entry);
- if (entry.ignore) {
- // probably valid, just not something we care about
- this[EMIT]('ignoredEntry', entry);
- this[STATE] = entry.remain ? 'ignore' : 'header';
- entry.resume();
- }
- else {
- if (entry.remain) {
- this[STATE] = 'body';
- }
- else {
- this[STATE] = 'header';
- entry.end();
- }
- if (!this[READENTRY]) {
- this[QUEUE].push(entry);
- this[NEXTENTRY]();
- }
- else {
- this[QUEUE].push(entry);
- }
- }
- }
- }
- }
- }
- }
- [CLOSESTREAM]() {
- queueMicrotask(() => this.emit('close'));
- }
- [PROCESSENTRY](entry) {
- let go = true;
- if (!entry) {
- this[READENTRY] = undefined;
- go = false;
- }
- else if (Array.isArray(entry)) {
- const [ev, ...args] = entry;
- this.emit(ev, ...args);
- }
- else {
- this[READENTRY] = entry;
- this.emit('entry', entry);
- if (!entry.emittedEnd) {
- entry.on('end', () => this[NEXTENTRY]());
- go = false;
- }
- }
- return go;
- }
- [NEXTENTRY]() {
- do { } while (this[PROCESSENTRY](this[QUEUE].shift()));
- if (!this[QUEUE].length) {
- // At this point, there's nothing in the queue, but we may have an
- // entry which is being consumed (readEntry).
- // If we don't, then we definitely can handle more data.
- // If we do, and either it's flowing, or it has never had any data
- // written to it, then it needs more.
- // The only other possibility is that it has returned false from a
- // write() call, so we wait for the next drain to continue.
- const re = this[READENTRY];
- const drainNow = !re || re.flowing || re.size === re.remain;
- if (drainNow) {
- if (!this[WRITING]) {
- this.emit('drain');
- }
- }
- else {
- re.once('drain', () => this.emit('drain'));
- }
- }
- }
- [CONSUMEBODY](chunk, position) {
- // write up to but no more than writeEntry.blockRemain
- const entry = this[WRITEENTRY];
- /* c8 ignore start */
- if (!entry) {
- throw new Error('attempt to consume body without entry??');
- }
- const br = entry.blockRemain ?? 0;
- /* c8 ignore stop */
- const c = br >= chunk.length && position === 0 ?
- chunk
- : chunk.subarray(position, position + br);
- entry.write(c);
- if (!entry.blockRemain) {
- this[STATE] = 'header';
- this[WRITEENTRY] = undefined;
- entry.end();
- }
- return c.length;
- }
- [CONSUMEMETA](chunk, position) {
- const entry = this[WRITEENTRY];
- const ret = this[CONSUMEBODY](chunk, position);
- // if we finished, then the entry is reset
- if (!this[WRITEENTRY] && entry) {
- this[EMITMETA](entry);
- }
- return ret;
- }
- [EMIT](ev, data, extra) {
- if (!this[QUEUE].length && !this[READENTRY]) {
- this.emit(ev, data, extra);
- }
- else {
- this[QUEUE].push([ev, data, extra]);
- }
- }
- [EMITMETA](entry) {
- this[EMIT]('meta', this[META]);
- switch (entry.type) {
- case 'ExtendedHeader':
- case 'OldExtendedHeader':
- this[EX] = Pax.parse(this[META], this[EX], false);
- break;
- case 'GlobalExtendedHeader':
- this[GEX] = Pax.parse(this[META], this[GEX], true);
- break;
- case 'NextFileHasLongPath':
- case 'OldGnuLongPath': {
- const ex = this[EX] ?? Object.create(null);
- this[EX] = ex;
- ex.path = this[META].replace(/\0.*/, '');
- break;
- }
- case 'NextFileHasLongLinkpath': {
- const ex = this[EX] || Object.create(null);
- this[EX] = ex;
- ex.linkpath = this[META].replace(/\0.*/, '');
- break;
- }
- /* c8 ignore start */
- default:
- throw new Error('unknown meta: ' + entry.type);
- /* c8 ignore stop */
- }
- }
- abort(error) {
- this[ABORTED] = true;
- this.emit('abort', error);
- // always throws, even in non-strict mode
- this.warn('TAR_ABORT', error, { recoverable: false });
- }
- write(chunk, encoding, cb) {
- if (typeof encoding === 'function') {
- cb = encoding;
- encoding = undefined;
- }
- if (typeof chunk === 'string') {
- chunk = Buffer.from(chunk,
- /* c8 ignore next */
- typeof encoding === 'string' ? encoding : 'utf8');
- }
- if (this[ABORTED]) {
- /* c8 ignore next */
- cb?.();
- return false;
- }
- // first write, might be gzipped
- const needSniff = this[UNZIP] === undefined ||
- (this.brotli === undefined && this[UNZIP] === false);
- if (needSniff && chunk) {
- if (this[BUFFER]) {
- chunk = Buffer.concat([this[BUFFER], chunk]);
- this[BUFFER] = undefined;
- }
- if (chunk.length < gzipHeader.length) {
- this[BUFFER] = chunk;
- /* c8 ignore next */
- cb?.();
- return true;
- }
- // look for gzip header
- for (let i = 0; this[UNZIP] === undefined && i < gzipHeader.length; i++) {
- if (chunk[i] !== gzipHeader[i]) {
- this[UNZIP] = false;
- }
- }
- const maybeBrotli = this.brotli === undefined;
- if (this[UNZIP] === false && maybeBrotli) {
- // read the first header to see if it's a valid tar file. If so,
- // we can safely assume that it's not actually brotli, despite the
- // .tbr or .tar.br file extension.
- // if we ended before getting a full chunk, yes, def brotli
- if (chunk.length < 512) {
- if (this[ENDED]) {
- this.brotli = true;
- }
- else {
- this[BUFFER] = chunk;
- /* c8 ignore next */
- cb?.();
- return true;
- }
- }
- else {
- // if it's tar, it's pretty reliably not brotli, chances of
- // that happening are astronomical.
- try {
- new Header(chunk.subarray(0, 512));
- this.brotli = false;
- }
- catch (_) {
- this.brotli = true;
- }
- }
- }
- if (this[UNZIP] === undefined ||
- (this[UNZIP] === false && this.brotli)) {
- const ended = this[ENDED];
- this[ENDED] = false;
- this[UNZIP] =
- this[UNZIP] === undefined ?
- new Unzip({})
- : new BrotliDecompress({});
- this[UNZIP].on('data', chunk => this[CONSUMECHUNK](chunk));
- this[UNZIP].on('error', er => this.abort(er));
- this[UNZIP].on('end', () => {
- this[ENDED] = true;
- this[CONSUMECHUNK]();
- });
- this[WRITING] = true;
- const ret = !!this[UNZIP][ended ? 'end' : 'write'](chunk);
- this[WRITING] = false;
- cb?.();
- return ret;
- }
- }
- this[WRITING] = true;
- if (this[UNZIP]) {
- this[UNZIP].write(chunk);
- }
- else {
- this[CONSUMECHUNK](chunk);
- }
- this[WRITING] = false;
- // return false if there's a queue, or if the current entry isn't flowing
- const ret = this[QUEUE].length ? false
- : this[READENTRY] ? this[READENTRY].flowing
- : true;
- // if we have no queue, then that means a clogged READENTRY
- if (!ret && !this[QUEUE].length) {
- this[READENTRY]?.once('drain', () => this.emit('drain'));
- }
- /* c8 ignore next */
- cb?.();
- return ret;
- }
- [BUFFERCONCAT](c) {
- if (c && !this[ABORTED]) {
- this[BUFFER] =
- this[BUFFER] ? Buffer.concat([this[BUFFER], c]) : c;
- }
- }
- [MAYBEEND]() {
- if (this[ENDED] &&
- !this[EMITTEDEND] &&
- !this[ABORTED] &&
- !this[CONSUMING]) {
- this[EMITTEDEND] = true;
- const entry = this[WRITEENTRY];
- if (entry && entry.blockRemain) {
- // truncated, likely a damaged file
- const have = this[BUFFER] ? this[BUFFER].length : 0;
- this.warn('TAR_BAD_ARCHIVE', `Truncated input (needed ${entry.blockRemain} more bytes, only ${have} available)`, { entry });
- if (this[BUFFER]) {
- entry.write(this[BUFFER]);
- }
- entry.end();
- }
- this[EMIT](DONE);
- }
- }
- [CONSUMECHUNK](chunk) {
- if (this[CONSUMING] && chunk) {
- this[BUFFERCONCAT](chunk);
- }
- else if (!chunk && !this[BUFFER]) {
- this[MAYBEEND]();
- }
- else if (chunk) {
- this[CONSUMING] = true;
- if (this[BUFFER]) {
- this[BUFFERCONCAT](chunk);
- const c = this[BUFFER];
- this[BUFFER] = undefined;
- this[CONSUMECHUNKSUB](c);
- }
- else {
- this[CONSUMECHUNKSUB](chunk);
- }
- while (this[BUFFER] &&
- this[BUFFER]?.length >= 512 &&
- !this[ABORTED] &&
- !this[SAW_EOF]) {
- const c = this[BUFFER];
- this[BUFFER] = undefined;
- this[CONSUMECHUNKSUB](c);
- }
- this[CONSUMING] = false;
- }
- if (!this[BUFFER] || this[ENDED]) {
- this[MAYBEEND]();
- }
- }
- [CONSUMECHUNKSUB](chunk) {
- // we know that we are in CONSUMING mode, so anything written goes into
- // the buffer. Advance the position and put any remainder in the buffer.
- let position = 0;
- const length = chunk.length;
- while (position + 512 <= length &&
- !this[ABORTED] &&
- !this[SAW_EOF]) {
- switch (this[STATE]) {
- case 'begin':
- case 'header':
- this[CONSUMEHEADER](chunk, position);
- position += 512;
- break;
- case 'ignore':
- case 'body':
- position += this[CONSUMEBODY](chunk, position);
- break;
- case 'meta':
- position += this[CONSUMEMETA](chunk, position);
- break;
- /* c8 ignore start */
- default:
- throw new Error('invalid state: ' + this[STATE]);
- /* c8 ignore stop */
- }
- }
- if (position < length) {
- if (this[BUFFER]) {
- this[BUFFER] = Buffer.concat([
- chunk.subarray(position),
- this[BUFFER],
- ]);
- }
- else {
- this[BUFFER] = chunk.subarray(position);
- }
- }
- }
- end(chunk, encoding, cb) {
- if (typeof chunk === 'function') {
- cb = chunk;
- encoding = undefined;
- chunk = undefined;
- }
- if (typeof encoding === 'function') {
- cb = encoding;
- encoding = undefined;
- }
- if (typeof chunk === 'string') {
- chunk = Buffer.from(chunk, encoding);
- }
- if (cb)
- this.once('finish', cb);
- if (!this[ABORTED]) {
- if (this[UNZIP]) {
- /* c8 ignore start */
- if (chunk)
- this[UNZIP].write(chunk);
- /* c8 ignore stop */
- this[UNZIP].end();
- }
- else {
- this[ENDED] = true;
- if (this.brotli === undefined)
- chunk = chunk || Buffer.alloc(0);
- if (chunk)
- this.write(chunk);
- this[MAYBEEND]();
- }
- }
- return this;
- }
- }
- //# sourceMappingURL=parse.js.map
|