5d1d4162fdba654cac6f649201bfe89b0208a355bfeb84321f0069b84c2f9736493d39c6b38c4af13bf9680429a3afb72dac022b9ba1c58dac4472abe4409f 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595
  1. // this[BUFFER] is the remainder of a chunk if we're waiting for
  2. // the full 512 bytes of a header to come in. We will Buffer.concat()
  3. // it to the next write(), which is a mem copy, but a small one.
  4. //
  5. // this[QUEUE] is a Yallist of entries that haven't been emitted
  6. // yet this can only get filled up if the user keeps write()ing after
  7. // a write() returns false, or does a write() with more than one entry
  8. //
  9. // We don't buffer chunks, we always parse them and either create an
  10. // entry, or push it into the active entry. The ReadEntry class knows
  11. // to throw data away if .ignore=true
  12. //
  13. // Shift entry off the buffer when it emits 'end', and emit 'entry' for
  14. // the next one in the list.
  15. //
  16. // At any time, we're pushing body chunks into the entry at WRITEENTRY,
  17. // and waiting for 'end' on the entry at READENTRY
  18. //
  19. // ignored entries get .resume() called on them straight away
  20. import { EventEmitter as EE } from 'events';
  21. import { BrotliDecompress, Unzip } from 'minizlib';
  22. import { Yallist } from 'yallist';
  23. import { Header } from './header.js';
  24. import { Pax } from './pax.js';
  25. import { ReadEntry } from './read-entry.js';
  26. import { warnMethod, } from './warn-method.js';
  27. const maxMetaEntrySize = 1024 * 1024;
  28. const gzipHeader = Buffer.from([0x1f, 0x8b]);
  29. const STATE = Symbol('state');
  30. const WRITEENTRY = Symbol('writeEntry');
  31. const READENTRY = Symbol('readEntry');
  32. const NEXTENTRY = Symbol('nextEntry');
  33. const PROCESSENTRY = Symbol('processEntry');
  34. const EX = Symbol('extendedHeader');
  35. const GEX = Symbol('globalExtendedHeader');
  36. const META = Symbol('meta');
  37. const EMITMETA = Symbol('emitMeta');
  38. const BUFFER = Symbol('buffer');
  39. const QUEUE = Symbol('queue');
  40. const ENDED = Symbol('ended');
  41. const EMITTEDEND = Symbol('emittedEnd');
  42. const EMIT = Symbol('emit');
  43. const UNZIP = Symbol('unzip');
  44. const CONSUMECHUNK = Symbol('consumeChunk');
  45. const CONSUMECHUNKSUB = Symbol('consumeChunkSub');
  46. const CONSUMEBODY = Symbol('consumeBody');
  47. const CONSUMEMETA = Symbol('consumeMeta');
  48. const CONSUMEHEADER = Symbol('consumeHeader');
  49. const CONSUMING = Symbol('consuming');
  50. const BUFFERCONCAT = Symbol('bufferConcat');
  51. const MAYBEEND = Symbol('maybeEnd');
  52. const WRITING = Symbol('writing');
  53. const ABORTED = Symbol('aborted');
  54. const DONE = Symbol('onDone');
  55. const SAW_VALID_ENTRY = Symbol('sawValidEntry');
  56. const SAW_NULL_BLOCK = Symbol('sawNullBlock');
  57. const SAW_EOF = Symbol('sawEOF');
  58. const CLOSESTREAM = Symbol('closeStream');
  59. const noop = () => true;
  60. export class Parser extends EE {
  61. file;
  62. strict;
  63. maxMetaEntrySize;
  64. filter;
  65. brotli;
  66. writable = true;
  67. readable = false;
  68. [QUEUE] = new Yallist();
  69. [BUFFER];
  70. [READENTRY];
  71. [WRITEENTRY];
  72. [STATE] = 'begin';
  73. [META] = '';
  74. [EX];
  75. [GEX];
  76. [ENDED] = false;
  77. [UNZIP];
  78. [ABORTED] = false;
  79. [SAW_VALID_ENTRY];
  80. [SAW_NULL_BLOCK] = false;
  81. [SAW_EOF] = false;
  82. [WRITING] = false;
  83. [CONSUMING] = false;
  84. [EMITTEDEND] = false;
  85. constructor(opt = {}) {
  86. super();
  87. this.file = opt.file || '';
  88. // these BADARCHIVE errors can't be detected early. listen on DONE.
  89. this.on(DONE, () => {
  90. if (this[STATE] === 'begin' ||
  91. this[SAW_VALID_ENTRY] === false) {
  92. // either less than 1 block of data, or all entries were invalid.
  93. // Either way, probably not even a tarball.
  94. this.warn('TAR_BAD_ARCHIVE', 'Unrecognized archive format');
  95. }
  96. });
  97. if (opt.ondone) {
  98. this.on(DONE, opt.ondone);
  99. }
  100. else {
  101. this.on(DONE, () => {
  102. this.emit('prefinish');
  103. this.emit('finish');
  104. this.emit('end');
  105. });
  106. }
  107. this.strict = !!opt.strict;
  108. this.maxMetaEntrySize = opt.maxMetaEntrySize || maxMetaEntrySize;
  109. this.filter = typeof opt.filter === 'function' ? opt.filter : noop;
  110. // Unlike gzip, brotli doesn't have any magic bytes to identify it
  111. // Users need to explicitly tell us they're extracting a brotli file
  112. // Or we infer from the file extension
  113. const isTBR = opt.file &&
  114. (opt.file.endsWith('.tar.br') || opt.file.endsWith('.tbr'));
  115. // if it's a tbr file it MIGHT be brotli, but we don't know until
  116. // we look at it and verify it's not a valid tar file.
  117. this.brotli =
  118. !opt.gzip && opt.brotli !== undefined ? opt.brotli
  119. : isTBR ? undefined
  120. : false;
  121. // have to set this so that streams are ok piping into it
  122. this.on('end', () => this[CLOSESTREAM]());
  123. if (typeof opt.onwarn === 'function') {
  124. this.on('warn', opt.onwarn);
  125. }
  126. if (typeof opt.onReadEntry === 'function') {
  127. this.on('entry', opt.onReadEntry);
  128. }
  129. }
  130. warn(code, message, data = {}) {
  131. warnMethod(this, code, message, data);
  132. }
  133. [CONSUMEHEADER](chunk, position) {
  134. if (this[SAW_VALID_ENTRY] === undefined) {
  135. this[SAW_VALID_ENTRY] = false;
  136. }
  137. let header;
  138. try {
  139. header = new Header(chunk, position, this[EX], this[GEX]);
  140. }
  141. catch (er) {
  142. return this.warn('TAR_ENTRY_INVALID', er);
  143. }
  144. if (header.nullBlock) {
  145. if (this[SAW_NULL_BLOCK]) {
  146. this[SAW_EOF] = true;
  147. // ending an archive with no entries. pointless, but legal.
  148. if (this[STATE] === 'begin') {
  149. this[STATE] = 'header';
  150. }
  151. this[EMIT]('eof');
  152. }
  153. else {
  154. this[SAW_NULL_BLOCK] = true;
  155. this[EMIT]('nullBlock');
  156. }
  157. }
  158. else {
  159. this[SAW_NULL_BLOCK] = false;
  160. if (!header.cksumValid) {
  161. this.warn('TAR_ENTRY_INVALID', 'checksum failure', { header });
  162. }
  163. else if (!header.path) {
  164. this.warn('TAR_ENTRY_INVALID', 'path is required', { header });
  165. }
  166. else {
  167. const type = header.type;
  168. if (/^(Symbolic)?Link$/.test(type) && !header.linkpath) {
  169. this.warn('TAR_ENTRY_INVALID', 'linkpath required', {
  170. header,
  171. });
  172. }
  173. else if (!/^(Symbolic)?Link$/.test(type) &&
  174. !/^(Global)?ExtendedHeader$/.test(type) &&
  175. header.linkpath) {
  176. this.warn('TAR_ENTRY_INVALID', 'linkpath forbidden', {
  177. header,
  178. });
  179. }
  180. else {
  181. const entry = (this[WRITEENTRY] = new ReadEntry(header, this[EX], this[GEX]));
  182. // we do this for meta & ignored entries as well, because they
  183. // are still valid tar, or else we wouldn't know to ignore them
  184. if (!this[SAW_VALID_ENTRY]) {
  185. if (entry.remain) {
  186. // this might be the one!
  187. const onend = () => {
  188. if (!entry.invalid) {
  189. this[SAW_VALID_ENTRY] = true;
  190. }
  191. };
  192. entry.on('end', onend);
  193. }
  194. else {
  195. this[SAW_VALID_ENTRY] = true;
  196. }
  197. }
  198. if (entry.meta) {
  199. if (entry.size > this.maxMetaEntrySize) {
  200. entry.ignore = true;
  201. this[EMIT]('ignoredEntry', entry);
  202. this[STATE] = 'ignore';
  203. entry.resume();
  204. }
  205. else if (entry.size > 0) {
  206. this[META] = '';
  207. entry.on('data', c => (this[META] += c));
  208. this[STATE] = 'meta';
  209. }
  210. }
  211. else {
  212. this[EX] = undefined;
  213. entry.ignore =
  214. entry.ignore || !this.filter(entry.path, entry);
  215. if (entry.ignore) {
  216. // probably valid, just not something we care about
  217. this[EMIT]('ignoredEntry', entry);
  218. this[STATE] = entry.remain ? 'ignore' : 'header';
  219. entry.resume();
  220. }
  221. else {
  222. if (entry.remain) {
  223. this[STATE] = 'body';
  224. }
  225. else {
  226. this[STATE] = 'header';
  227. entry.end();
  228. }
  229. if (!this[READENTRY]) {
  230. this[QUEUE].push(entry);
  231. this[NEXTENTRY]();
  232. }
  233. else {
  234. this[QUEUE].push(entry);
  235. }
  236. }
  237. }
  238. }
  239. }
  240. }
  241. }
  242. [CLOSESTREAM]() {
  243. queueMicrotask(() => this.emit('close'));
  244. }
  245. [PROCESSENTRY](entry) {
  246. let go = true;
  247. if (!entry) {
  248. this[READENTRY] = undefined;
  249. go = false;
  250. }
  251. else if (Array.isArray(entry)) {
  252. const [ev, ...args] = entry;
  253. this.emit(ev, ...args);
  254. }
  255. else {
  256. this[READENTRY] = entry;
  257. this.emit('entry', entry);
  258. if (!entry.emittedEnd) {
  259. entry.on('end', () => this[NEXTENTRY]());
  260. go = false;
  261. }
  262. }
  263. return go;
  264. }
  265. [NEXTENTRY]() {
  266. do { } while (this[PROCESSENTRY](this[QUEUE].shift()));
  267. if (!this[QUEUE].length) {
  268. // At this point, there's nothing in the queue, but we may have an
  269. // entry which is being consumed (readEntry).
  270. // If we don't, then we definitely can handle more data.
  271. // If we do, and either it's flowing, or it has never had any data
  272. // written to it, then it needs more.
  273. // The only other possibility is that it has returned false from a
  274. // write() call, so we wait for the next drain to continue.
  275. const re = this[READENTRY];
  276. const drainNow = !re || re.flowing || re.size === re.remain;
  277. if (drainNow) {
  278. if (!this[WRITING]) {
  279. this.emit('drain');
  280. }
  281. }
  282. else {
  283. re.once('drain', () => this.emit('drain'));
  284. }
  285. }
  286. }
  287. [CONSUMEBODY](chunk, position) {
  288. // write up to but no more than writeEntry.blockRemain
  289. const entry = this[WRITEENTRY];
  290. /* c8 ignore start */
  291. if (!entry) {
  292. throw new Error('attempt to consume body without entry??');
  293. }
  294. const br = entry.blockRemain ?? 0;
  295. /* c8 ignore stop */
  296. const c = br >= chunk.length && position === 0 ?
  297. chunk
  298. : chunk.subarray(position, position + br);
  299. entry.write(c);
  300. if (!entry.blockRemain) {
  301. this[STATE] = 'header';
  302. this[WRITEENTRY] = undefined;
  303. entry.end();
  304. }
  305. return c.length;
  306. }
  307. [CONSUMEMETA](chunk, position) {
  308. const entry = this[WRITEENTRY];
  309. const ret = this[CONSUMEBODY](chunk, position);
  310. // if we finished, then the entry is reset
  311. if (!this[WRITEENTRY] && entry) {
  312. this[EMITMETA](entry);
  313. }
  314. return ret;
  315. }
  316. [EMIT](ev, data, extra) {
  317. if (!this[QUEUE].length && !this[READENTRY]) {
  318. this.emit(ev, data, extra);
  319. }
  320. else {
  321. this[QUEUE].push([ev, data, extra]);
  322. }
  323. }
  324. [EMITMETA](entry) {
  325. this[EMIT]('meta', this[META]);
  326. switch (entry.type) {
  327. case 'ExtendedHeader':
  328. case 'OldExtendedHeader':
  329. this[EX] = Pax.parse(this[META], this[EX], false);
  330. break;
  331. case 'GlobalExtendedHeader':
  332. this[GEX] = Pax.parse(this[META], this[GEX], true);
  333. break;
  334. case 'NextFileHasLongPath':
  335. case 'OldGnuLongPath': {
  336. const ex = this[EX] ?? Object.create(null);
  337. this[EX] = ex;
  338. ex.path = this[META].replace(/\0.*/, '');
  339. break;
  340. }
  341. case 'NextFileHasLongLinkpath': {
  342. const ex = this[EX] || Object.create(null);
  343. this[EX] = ex;
  344. ex.linkpath = this[META].replace(/\0.*/, '');
  345. break;
  346. }
  347. /* c8 ignore start */
  348. default:
  349. throw new Error('unknown meta: ' + entry.type);
  350. /* c8 ignore stop */
  351. }
  352. }
  353. abort(error) {
  354. this[ABORTED] = true;
  355. this.emit('abort', error);
  356. // always throws, even in non-strict mode
  357. this.warn('TAR_ABORT', error, { recoverable: false });
  358. }
  359. write(chunk, encoding, cb) {
  360. if (typeof encoding === 'function') {
  361. cb = encoding;
  362. encoding = undefined;
  363. }
  364. if (typeof chunk === 'string') {
  365. chunk = Buffer.from(chunk,
  366. /* c8 ignore next */
  367. typeof encoding === 'string' ? encoding : 'utf8');
  368. }
  369. if (this[ABORTED]) {
  370. /* c8 ignore next */
  371. cb?.();
  372. return false;
  373. }
  374. // first write, might be gzipped
  375. const needSniff = this[UNZIP] === undefined ||
  376. (this.brotli === undefined && this[UNZIP] === false);
  377. if (needSniff && chunk) {
  378. if (this[BUFFER]) {
  379. chunk = Buffer.concat([this[BUFFER], chunk]);
  380. this[BUFFER] = undefined;
  381. }
  382. if (chunk.length < gzipHeader.length) {
  383. this[BUFFER] = chunk;
  384. /* c8 ignore next */
  385. cb?.();
  386. return true;
  387. }
  388. // look for gzip header
  389. for (let i = 0; this[UNZIP] === undefined && i < gzipHeader.length; i++) {
  390. if (chunk[i] !== gzipHeader[i]) {
  391. this[UNZIP] = false;
  392. }
  393. }
  394. const maybeBrotli = this.brotli === undefined;
  395. if (this[UNZIP] === false && maybeBrotli) {
  396. // read the first header to see if it's a valid tar file. If so,
  397. // we can safely assume that it's not actually brotli, despite the
  398. // .tbr or .tar.br file extension.
  399. // if we ended before getting a full chunk, yes, def brotli
  400. if (chunk.length < 512) {
  401. if (this[ENDED]) {
  402. this.brotli = true;
  403. }
  404. else {
  405. this[BUFFER] = chunk;
  406. /* c8 ignore next */
  407. cb?.();
  408. return true;
  409. }
  410. }
  411. else {
  412. // if it's tar, it's pretty reliably not brotli, chances of
  413. // that happening are astronomical.
  414. try {
  415. new Header(chunk.subarray(0, 512));
  416. this.brotli = false;
  417. }
  418. catch (_) {
  419. this.brotli = true;
  420. }
  421. }
  422. }
  423. if (this[UNZIP] === undefined ||
  424. (this[UNZIP] === false && this.brotli)) {
  425. const ended = this[ENDED];
  426. this[ENDED] = false;
  427. this[UNZIP] =
  428. this[UNZIP] === undefined ?
  429. new Unzip({})
  430. : new BrotliDecompress({});
  431. this[UNZIP].on('data', chunk => this[CONSUMECHUNK](chunk));
  432. this[UNZIP].on('error', er => this.abort(er));
  433. this[UNZIP].on('end', () => {
  434. this[ENDED] = true;
  435. this[CONSUMECHUNK]();
  436. });
  437. this[WRITING] = true;
  438. const ret = !!this[UNZIP][ended ? 'end' : 'write'](chunk);
  439. this[WRITING] = false;
  440. cb?.();
  441. return ret;
  442. }
  443. }
  444. this[WRITING] = true;
  445. if (this[UNZIP]) {
  446. this[UNZIP].write(chunk);
  447. }
  448. else {
  449. this[CONSUMECHUNK](chunk);
  450. }
  451. this[WRITING] = false;
  452. // return false if there's a queue, or if the current entry isn't flowing
  453. const ret = this[QUEUE].length ? false
  454. : this[READENTRY] ? this[READENTRY].flowing
  455. : true;
  456. // if we have no queue, then that means a clogged READENTRY
  457. if (!ret && !this[QUEUE].length) {
  458. this[READENTRY]?.once('drain', () => this.emit('drain'));
  459. }
  460. /* c8 ignore next */
  461. cb?.();
  462. return ret;
  463. }
  464. [BUFFERCONCAT](c) {
  465. if (c && !this[ABORTED]) {
  466. this[BUFFER] =
  467. this[BUFFER] ? Buffer.concat([this[BUFFER], c]) : c;
  468. }
  469. }
  470. [MAYBEEND]() {
  471. if (this[ENDED] &&
  472. !this[EMITTEDEND] &&
  473. !this[ABORTED] &&
  474. !this[CONSUMING]) {
  475. this[EMITTEDEND] = true;
  476. const entry = this[WRITEENTRY];
  477. if (entry && entry.blockRemain) {
  478. // truncated, likely a damaged file
  479. const have = this[BUFFER] ? this[BUFFER].length : 0;
  480. this.warn('TAR_BAD_ARCHIVE', `Truncated input (needed ${entry.blockRemain} more bytes, only ${have} available)`, { entry });
  481. if (this[BUFFER]) {
  482. entry.write(this[BUFFER]);
  483. }
  484. entry.end();
  485. }
  486. this[EMIT](DONE);
  487. }
  488. }
  489. [CONSUMECHUNK](chunk) {
  490. if (this[CONSUMING] && chunk) {
  491. this[BUFFERCONCAT](chunk);
  492. }
  493. else if (!chunk && !this[BUFFER]) {
  494. this[MAYBEEND]();
  495. }
  496. else if (chunk) {
  497. this[CONSUMING] = true;
  498. if (this[BUFFER]) {
  499. this[BUFFERCONCAT](chunk);
  500. const c = this[BUFFER];
  501. this[BUFFER] = undefined;
  502. this[CONSUMECHUNKSUB](c);
  503. }
  504. else {
  505. this[CONSUMECHUNKSUB](chunk);
  506. }
  507. while (this[BUFFER] &&
  508. this[BUFFER]?.length >= 512 &&
  509. !this[ABORTED] &&
  510. !this[SAW_EOF]) {
  511. const c = this[BUFFER];
  512. this[BUFFER] = undefined;
  513. this[CONSUMECHUNKSUB](c);
  514. }
  515. this[CONSUMING] = false;
  516. }
  517. if (!this[BUFFER] || this[ENDED]) {
  518. this[MAYBEEND]();
  519. }
  520. }
  521. [CONSUMECHUNKSUB](chunk) {
  522. // we know that we are in CONSUMING mode, so anything written goes into
  523. // the buffer. Advance the position and put any remainder in the buffer.
  524. let position = 0;
  525. const length = chunk.length;
  526. while (position + 512 <= length &&
  527. !this[ABORTED] &&
  528. !this[SAW_EOF]) {
  529. switch (this[STATE]) {
  530. case 'begin':
  531. case 'header':
  532. this[CONSUMEHEADER](chunk, position);
  533. position += 512;
  534. break;
  535. case 'ignore':
  536. case 'body':
  537. position += this[CONSUMEBODY](chunk, position);
  538. break;
  539. case 'meta':
  540. position += this[CONSUMEMETA](chunk, position);
  541. break;
  542. /* c8 ignore start */
  543. default:
  544. throw new Error('invalid state: ' + this[STATE]);
  545. /* c8 ignore stop */
  546. }
  547. }
  548. if (position < length) {
  549. if (this[BUFFER]) {
  550. this[BUFFER] = Buffer.concat([
  551. chunk.subarray(position),
  552. this[BUFFER],
  553. ]);
  554. }
  555. else {
  556. this[BUFFER] = chunk.subarray(position);
  557. }
  558. }
  559. }
  560. end(chunk, encoding, cb) {
  561. if (typeof chunk === 'function') {
  562. cb = chunk;
  563. encoding = undefined;
  564. chunk = undefined;
  565. }
  566. if (typeof encoding === 'function') {
  567. cb = encoding;
  568. encoding = undefined;
  569. }
  570. if (typeof chunk === 'string') {
  571. chunk = Buffer.from(chunk, encoding);
  572. }
  573. if (cb)
  574. this.once('finish', cb);
  575. if (!this[ABORTED]) {
  576. if (this[UNZIP]) {
  577. /* c8 ignore start */
  578. if (chunk)
  579. this[UNZIP].write(chunk);
  580. /* c8 ignore stop */
  581. this[UNZIP].end();
  582. }
  583. else {
  584. this[ENDED] = true;
  585. if (this.brotli === undefined)
  586. chunk = chunk || Buffer.alloc(0);
  587. if (chunk)
  588. this.write(chunk);
  589. this[MAYBEEND]();
  590. }
  591. }
  592. return this;
  593. }
  594. }
  595. //# sourceMappingURL=parse.js.map