8066da5efb0e7cb53199814557e2847195b8fc74a058414065c7b4859564b897d28dde99d01f8ab3b2bef4e147ef875076bf8c4f7d22d50fe828f834214ac4 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599
  1. "use strict";
  2. // this[BUFFER] is the remainder of a chunk if we're waiting for
  3. // the full 512 bytes of a header to come in. We will Buffer.concat()
  4. // it to the next write(), which is a mem copy, but a small one.
  5. //
  6. // this[QUEUE] is a Yallist of entries that haven't been emitted
  7. // yet this can only get filled up if the user keeps write()ing after
  8. // a write() returns false, or does a write() with more than one entry
  9. //
  10. // We don't buffer chunks, we always parse them and either create an
  11. // entry, or push it into the active entry. The ReadEntry class knows
  12. // to throw data away if .ignore=true
  13. //
  14. // Shift entry off the buffer when it emits 'end', and emit 'entry' for
  15. // the next one in the list.
  16. //
  17. // At any time, we're pushing body chunks into the entry at WRITEENTRY,
  18. // and waiting for 'end' on the entry at READENTRY
  19. //
  20. // ignored entries get .resume() called on them straight away
  21. Object.defineProperty(exports, "__esModule", { value: true });
  22. exports.Parser = void 0;
  23. const events_1 = require("events");
  24. const minizlib_1 = require("minizlib");
  25. const yallist_1 = require("yallist");
  26. const header_js_1 = require("./header.js");
  27. const pax_js_1 = require("./pax.js");
  28. const read_entry_js_1 = require("./read-entry.js");
  29. const warn_method_js_1 = require("./warn-method.js");
  30. const maxMetaEntrySize = 1024 * 1024;
  31. const gzipHeader = Buffer.from([0x1f, 0x8b]);
  32. const STATE = Symbol('state');
  33. const WRITEENTRY = Symbol('writeEntry');
  34. const READENTRY = Symbol('readEntry');
  35. const NEXTENTRY = Symbol('nextEntry');
  36. const PROCESSENTRY = Symbol('processEntry');
  37. const EX = Symbol('extendedHeader');
  38. const GEX = Symbol('globalExtendedHeader');
  39. const META = Symbol('meta');
  40. const EMITMETA = Symbol('emitMeta');
  41. const BUFFER = Symbol('buffer');
  42. const QUEUE = Symbol('queue');
  43. const ENDED = Symbol('ended');
  44. const EMITTEDEND = Symbol('emittedEnd');
  45. const EMIT = Symbol('emit');
  46. const UNZIP = Symbol('unzip');
  47. const CONSUMECHUNK = Symbol('consumeChunk');
  48. const CONSUMECHUNKSUB = Symbol('consumeChunkSub');
  49. const CONSUMEBODY = Symbol('consumeBody');
  50. const CONSUMEMETA = Symbol('consumeMeta');
  51. const CONSUMEHEADER = Symbol('consumeHeader');
  52. const CONSUMING = Symbol('consuming');
  53. const BUFFERCONCAT = Symbol('bufferConcat');
  54. const MAYBEEND = Symbol('maybeEnd');
  55. const WRITING = Symbol('writing');
  56. const ABORTED = Symbol('aborted');
  57. const DONE = Symbol('onDone');
  58. const SAW_VALID_ENTRY = Symbol('sawValidEntry');
  59. const SAW_NULL_BLOCK = Symbol('sawNullBlock');
  60. const SAW_EOF = Symbol('sawEOF');
  61. const CLOSESTREAM = Symbol('closeStream');
  62. const noop = () => true;
  63. class Parser extends events_1.EventEmitter {
  64. file;
  65. strict;
  66. maxMetaEntrySize;
  67. filter;
  68. brotli;
  69. writable = true;
  70. readable = false;
  71. [QUEUE] = new yallist_1.Yallist();
  72. [BUFFER];
  73. [READENTRY];
  74. [WRITEENTRY];
  75. [STATE] = 'begin';
  76. [META] = '';
  77. [EX];
  78. [GEX];
  79. [ENDED] = false;
  80. [UNZIP];
  81. [ABORTED] = false;
  82. [SAW_VALID_ENTRY];
  83. [SAW_NULL_BLOCK] = false;
  84. [SAW_EOF] = false;
  85. [WRITING] = false;
  86. [CONSUMING] = false;
  87. [EMITTEDEND] = false;
  88. constructor(opt = {}) {
  89. super();
  90. this.file = opt.file || '';
  91. // these BADARCHIVE errors can't be detected early. listen on DONE.
  92. this.on(DONE, () => {
  93. if (this[STATE] === 'begin' ||
  94. this[SAW_VALID_ENTRY] === false) {
  95. // either less than 1 block of data, or all entries were invalid.
  96. // Either way, probably not even a tarball.
  97. this.warn('TAR_BAD_ARCHIVE', 'Unrecognized archive format');
  98. }
  99. });
  100. if (opt.ondone) {
  101. this.on(DONE, opt.ondone);
  102. }
  103. else {
  104. this.on(DONE, () => {
  105. this.emit('prefinish');
  106. this.emit('finish');
  107. this.emit('end');
  108. });
  109. }
  110. this.strict = !!opt.strict;
  111. this.maxMetaEntrySize = opt.maxMetaEntrySize || maxMetaEntrySize;
  112. this.filter = typeof opt.filter === 'function' ? opt.filter : noop;
  113. // Unlike gzip, brotli doesn't have any magic bytes to identify it
  114. // Users need to explicitly tell us they're extracting a brotli file
  115. // Or we infer from the file extension
  116. const isTBR = opt.file &&
  117. (opt.file.endsWith('.tar.br') || opt.file.endsWith('.tbr'));
  118. // if it's a tbr file it MIGHT be brotli, but we don't know until
  119. // we look at it and verify it's not a valid tar file.
  120. this.brotli =
  121. !opt.gzip && opt.brotli !== undefined ? opt.brotli
  122. : isTBR ? undefined
  123. : false;
  124. // have to set this so that streams are ok piping into it
  125. this.on('end', () => this[CLOSESTREAM]());
  126. if (typeof opt.onwarn === 'function') {
  127. this.on('warn', opt.onwarn);
  128. }
  129. if (typeof opt.onReadEntry === 'function') {
  130. this.on('entry', opt.onReadEntry);
  131. }
  132. }
  133. warn(code, message, data = {}) {
  134. (0, warn_method_js_1.warnMethod)(this, code, message, data);
  135. }
  136. [CONSUMEHEADER](chunk, position) {
  137. if (this[SAW_VALID_ENTRY] === undefined) {
  138. this[SAW_VALID_ENTRY] = false;
  139. }
  140. let header;
  141. try {
  142. header = new header_js_1.Header(chunk, position, this[EX], this[GEX]);
  143. }
  144. catch (er) {
  145. return this.warn('TAR_ENTRY_INVALID', er);
  146. }
  147. if (header.nullBlock) {
  148. if (this[SAW_NULL_BLOCK]) {
  149. this[SAW_EOF] = true;
  150. // ending an archive with no entries. pointless, but legal.
  151. if (this[STATE] === 'begin') {
  152. this[STATE] = 'header';
  153. }
  154. this[EMIT]('eof');
  155. }
  156. else {
  157. this[SAW_NULL_BLOCK] = true;
  158. this[EMIT]('nullBlock');
  159. }
  160. }
  161. else {
  162. this[SAW_NULL_BLOCK] = false;
  163. if (!header.cksumValid) {
  164. this.warn('TAR_ENTRY_INVALID', 'checksum failure', { header });
  165. }
  166. else if (!header.path) {
  167. this.warn('TAR_ENTRY_INVALID', 'path is required', { header });
  168. }
  169. else {
  170. const type = header.type;
  171. if (/^(Symbolic)?Link$/.test(type) && !header.linkpath) {
  172. this.warn('TAR_ENTRY_INVALID', 'linkpath required', {
  173. header,
  174. });
  175. }
  176. else if (!/^(Symbolic)?Link$/.test(type) &&
  177. !/^(Global)?ExtendedHeader$/.test(type) &&
  178. header.linkpath) {
  179. this.warn('TAR_ENTRY_INVALID', 'linkpath forbidden', {
  180. header,
  181. });
  182. }
  183. else {
  184. const entry = (this[WRITEENTRY] = new read_entry_js_1.ReadEntry(header, this[EX], this[GEX]));
  185. // we do this for meta & ignored entries as well, because they
  186. // are still valid tar, or else we wouldn't know to ignore them
  187. if (!this[SAW_VALID_ENTRY]) {
  188. if (entry.remain) {
  189. // this might be the one!
  190. const onend = () => {
  191. if (!entry.invalid) {
  192. this[SAW_VALID_ENTRY] = true;
  193. }
  194. };
  195. entry.on('end', onend);
  196. }
  197. else {
  198. this[SAW_VALID_ENTRY] = true;
  199. }
  200. }
  201. if (entry.meta) {
  202. if (entry.size > this.maxMetaEntrySize) {
  203. entry.ignore = true;
  204. this[EMIT]('ignoredEntry', entry);
  205. this[STATE] = 'ignore';
  206. entry.resume();
  207. }
  208. else if (entry.size > 0) {
  209. this[META] = '';
  210. entry.on('data', c => (this[META] += c));
  211. this[STATE] = 'meta';
  212. }
  213. }
  214. else {
  215. this[EX] = undefined;
  216. entry.ignore =
  217. entry.ignore || !this.filter(entry.path, entry);
  218. if (entry.ignore) {
  219. // probably valid, just not something we care about
  220. this[EMIT]('ignoredEntry', entry);
  221. this[STATE] = entry.remain ? 'ignore' : 'header';
  222. entry.resume();
  223. }
  224. else {
  225. if (entry.remain) {
  226. this[STATE] = 'body';
  227. }
  228. else {
  229. this[STATE] = 'header';
  230. entry.end();
  231. }
  232. if (!this[READENTRY]) {
  233. this[QUEUE].push(entry);
  234. this[NEXTENTRY]();
  235. }
  236. else {
  237. this[QUEUE].push(entry);
  238. }
  239. }
  240. }
  241. }
  242. }
  243. }
  244. }
  245. [CLOSESTREAM]() {
  246. queueMicrotask(() => this.emit('close'));
  247. }
  248. [PROCESSENTRY](entry) {
  249. let go = true;
  250. if (!entry) {
  251. this[READENTRY] = undefined;
  252. go = false;
  253. }
  254. else if (Array.isArray(entry)) {
  255. const [ev, ...args] = entry;
  256. this.emit(ev, ...args);
  257. }
  258. else {
  259. this[READENTRY] = entry;
  260. this.emit('entry', entry);
  261. if (!entry.emittedEnd) {
  262. entry.on('end', () => this[NEXTENTRY]());
  263. go = false;
  264. }
  265. }
  266. return go;
  267. }
  268. [NEXTENTRY]() {
  269. do { } while (this[PROCESSENTRY](this[QUEUE].shift()));
  270. if (!this[QUEUE].length) {
  271. // At this point, there's nothing in the queue, but we may have an
  272. // entry which is being consumed (readEntry).
  273. // If we don't, then we definitely can handle more data.
  274. // If we do, and either it's flowing, or it has never had any data
  275. // written to it, then it needs more.
  276. // The only other possibility is that it has returned false from a
  277. // write() call, so we wait for the next drain to continue.
  278. const re = this[READENTRY];
  279. const drainNow = !re || re.flowing || re.size === re.remain;
  280. if (drainNow) {
  281. if (!this[WRITING]) {
  282. this.emit('drain');
  283. }
  284. }
  285. else {
  286. re.once('drain', () => this.emit('drain'));
  287. }
  288. }
  289. }
  290. [CONSUMEBODY](chunk, position) {
  291. // write up to but no more than writeEntry.blockRemain
  292. const entry = this[WRITEENTRY];
  293. /* c8 ignore start */
  294. if (!entry) {
  295. throw new Error('attempt to consume body without entry??');
  296. }
  297. const br = entry.blockRemain ?? 0;
  298. /* c8 ignore stop */
  299. const c = br >= chunk.length && position === 0 ?
  300. chunk
  301. : chunk.subarray(position, position + br);
  302. entry.write(c);
  303. if (!entry.blockRemain) {
  304. this[STATE] = 'header';
  305. this[WRITEENTRY] = undefined;
  306. entry.end();
  307. }
  308. return c.length;
  309. }
  310. [CONSUMEMETA](chunk, position) {
  311. const entry = this[WRITEENTRY];
  312. const ret = this[CONSUMEBODY](chunk, position);
  313. // if we finished, then the entry is reset
  314. if (!this[WRITEENTRY] && entry) {
  315. this[EMITMETA](entry);
  316. }
  317. return ret;
  318. }
  319. [EMIT](ev, data, extra) {
  320. if (!this[QUEUE].length && !this[READENTRY]) {
  321. this.emit(ev, data, extra);
  322. }
  323. else {
  324. this[QUEUE].push([ev, data, extra]);
  325. }
  326. }
  327. [EMITMETA](entry) {
  328. this[EMIT]('meta', this[META]);
  329. switch (entry.type) {
  330. case 'ExtendedHeader':
  331. case 'OldExtendedHeader':
  332. this[EX] = pax_js_1.Pax.parse(this[META], this[EX], false);
  333. break;
  334. case 'GlobalExtendedHeader':
  335. this[GEX] = pax_js_1.Pax.parse(this[META], this[GEX], true);
  336. break;
  337. case 'NextFileHasLongPath':
  338. case 'OldGnuLongPath': {
  339. const ex = this[EX] ?? Object.create(null);
  340. this[EX] = ex;
  341. ex.path = this[META].replace(/\0.*/, '');
  342. break;
  343. }
  344. case 'NextFileHasLongLinkpath': {
  345. const ex = this[EX] || Object.create(null);
  346. this[EX] = ex;
  347. ex.linkpath = this[META].replace(/\0.*/, '');
  348. break;
  349. }
  350. /* c8 ignore start */
  351. default:
  352. throw new Error('unknown meta: ' + entry.type);
  353. /* c8 ignore stop */
  354. }
  355. }
  356. abort(error) {
  357. this[ABORTED] = true;
  358. this.emit('abort', error);
  359. // always throws, even in non-strict mode
  360. this.warn('TAR_ABORT', error, { recoverable: false });
  361. }
  362. write(chunk, encoding, cb) {
  363. if (typeof encoding === 'function') {
  364. cb = encoding;
  365. encoding = undefined;
  366. }
  367. if (typeof chunk === 'string') {
  368. chunk = Buffer.from(chunk,
  369. /* c8 ignore next */
  370. typeof encoding === 'string' ? encoding : 'utf8');
  371. }
  372. if (this[ABORTED]) {
  373. /* c8 ignore next */
  374. cb?.();
  375. return false;
  376. }
  377. // first write, might be gzipped
  378. const needSniff = this[UNZIP] === undefined ||
  379. (this.brotli === undefined && this[UNZIP] === false);
  380. if (needSniff && chunk) {
  381. if (this[BUFFER]) {
  382. chunk = Buffer.concat([this[BUFFER], chunk]);
  383. this[BUFFER] = undefined;
  384. }
  385. if (chunk.length < gzipHeader.length) {
  386. this[BUFFER] = chunk;
  387. /* c8 ignore next */
  388. cb?.();
  389. return true;
  390. }
  391. // look for gzip header
  392. for (let i = 0; this[UNZIP] === undefined && i < gzipHeader.length; i++) {
  393. if (chunk[i] !== gzipHeader[i]) {
  394. this[UNZIP] = false;
  395. }
  396. }
  397. const maybeBrotli = this.brotli === undefined;
  398. if (this[UNZIP] === false && maybeBrotli) {
  399. // read the first header to see if it's a valid tar file. If so,
  400. // we can safely assume that it's not actually brotli, despite the
  401. // .tbr or .tar.br file extension.
  402. // if we ended before getting a full chunk, yes, def brotli
  403. if (chunk.length < 512) {
  404. if (this[ENDED]) {
  405. this.brotli = true;
  406. }
  407. else {
  408. this[BUFFER] = chunk;
  409. /* c8 ignore next */
  410. cb?.();
  411. return true;
  412. }
  413. }
  414. else {
  415. // if it's tar, it's pretty reliably not brotli, chances of
  416. // that happening are astronomical.
  417. try {
  418. new header_js_1.Header(chunk.subarray(0, 512));
  419. this.brotli = false;
  420. }
  421. catch (_) {
  422. this.brotli = true;
  423. }
  424. }
  425. }
  426. if (this[UNZIP] === undefined ||
  427. (this[UNZIP] === false && this.brotli)) {
  428. const ended = this[ENDED];
  429. this[ENDED] = false;
  430. this[UNZIP] =
  431. this[UNZIP] === undefined ?
  432. new minizlib_1.Unzip({})
  433. : new minizlib_1.BrotliDecompress({});
  434. this[UNZIP].on('data', chunk => this[CONSUMECHUNK](chunk));
  435. this[UNZIP].on('error', er => this.abort(er));
  436. this[UNZIP].on('end', () => {
  437. this[ENDED] = true;
  438. this[CONSUMECHUNK]();
  439. });
  440. this[WRITING] = true;
  441. const ret = !!this[UNZIP][ended ? 'end' : 'write'](chunk);
  442. this[WRITING] = false;
  443. cb?.();
  444. return ret;
  445. }
  446. }
  447. this[WRITING] = true;
  448. if (this[UNZIP]) {
  449. this[UNZIP].write(chunk);
  450. }
  451. else {
  452. this[CONSUMECHUNK](chunk);
  453. }
  454. this[WRITING] = false;
  455. // return false if there's a queue, or if the current entry isn't flowing
  456. const ret = this[QUEUE].length ? false
  457. : this[READENTRY] ? this[READENTRY].flowing
  458. : true;
  459. // if we have no queue, then that means a clogged READENTRY
  460. if (!ret && !this[QUEUE].length) {
  461. this[READENTRY]?.once('drain', () => this.emit('drain'));
  462. }
  463. /* c8 ignore next */
  464. cb?.();
  465. return ret;
  466. }
  467. [BUFFERCONCAT](c) {
  468. if (c && !this[ABORTED]) {
  469. this[BUFFER] =
  470. this[BUFFER] ? Buffer.concat([this[BUFFER], c]) : c;
  471. }
  472. }
  473. [MAYBEEND]() {
  474. if (this[ENDED] &&
  475. !this[EMITTEDEND] &&
  476. !this[ABORTED] &&
  477. !this[CONSUMING]) {
  478. this[EMITTEDEND] = true;
  479. const entry = this[WRITEENTRY];
  480. if (entry && entry.blockRemain) {
  481. // truncated, likely a damaged file
  482. const have = this[BUFFER] ? this[BUFFER].length : 0;
  483. this.warn('TAR_BAD_ARCHIVE', `Truncated input (needed ${entry.blockRemain} more bytes, only ${have} available)`, { entry });
  484. if (this[BUFFER]) {
  485. entry.write(this[BUFFER]);
  486. }
  487. entry.end();
  488. }
  489. this[EMIT](DONE);
  490. }
  491. }
  492. [CONSUMECHUNK](chunk) {
  493. if (this[CONSUMING] && chunk) {
  494. this[BUFFERCONCAT](chunk);
  495. }
  496. else if (!chunk && !this[BUFFER]) {
  497. this[MAYBEEND]();
  498. }
  499. else if (chunk) {
  500. this[CONSUMING] = true;
  501. if (this[BUFFER]) {
  502. this[BUFFERCONCAT](chunk);
  503. const c = this[BUFFER];
  504. this[BUFFER] = undefined;
  505. this[CONSUMECHUNKSUB](c);
  506. }
  507. else {
  508. this[CONSUMECHUNKSUB](chunk);
  509. }
  510. while (this[BUFFER] &&
  511. this[BUFFER]?.length >= 512 &&
  512. !this[ABORTED] &&
  513. !this[SAW_EOF]) {
  514. const c = this[BUFFER];
  515. this[BUFFER] = undefined;
  516. this[CONSUMECHUNKSUB](c);
  517. }
  518. this[CONSUMING] = false;
  519. }
  520. if (!this[BUFFER] || this[ENDED]) {
  521. this[MAYBEEND]();
  522. }
  523. }
  524. [CONSUMECHUNKSUB](chunk) {
  525. // we know that we are in CONSUMING mode, so anything written goes into
  526. // the buffer. Advance the position and put any remainder in the buffer.
  527. let position = 0;
  528. const length = chunk.length;
  529. while (position + 512 <= length &&
  530. !this[ABORTED] &&
  531. !this[SAW_EOF]) {
  532. switch (this[STATE]) {
  533. case 'begin':
  534. case 'header':
  535. this[CONSUMEHEADER](chunk, position);
  536. position += 512;
  537. break;
  538. case 'ignore':
  539. case 'body':
  540. position += this[CONSUMEBODY](chunk, position);
  541. break;
  542. case 'meta':
  543. position += this[CONSUMEMETA](chunk, position);
  544. break;
  545. /* c8 ignore start */
  546. default:
  547. throw new Error('invalid state: ' + this[STATE]);
  548. /* c8 ignore stop */
  549. }
  550. }
  551. if (position < length) {
  552. if (this[BUFFER]) {
  553. this[BUFFER] = Buffer.concat([
  554. chunk.subarray(position),
  555. this[BUFFER],
  556. ]);
  557. }
  558. else {
  559. this[BUFFER] = chunk.subarray(position);
  560. }
  561. }
  562. }
  563. end(chunk, encoding, cb) {
  564. if (typeof chunk === 'function') {
  565. cb = chunk;
  566. encoding = undefined;
  567. chunk = undefined;
  568. }
  569. if (typeof encoding === 'function') {
  570. cb = encoding;
  571. encoding = undefined;
  572. }
  573. if (typeof chunk === 'string') {
  574. chunk = Buffer.from(chunk, encoding);
  575. }
  576. if (cb)
  577. this.once('finish', cb);
  578. if (!this[ABORTED]) {
  579. if (this[UNZIP]) {
  580. /* c8 ignore start */
  581. if (chunk)
  582. this[UNZIP].write(chunk);
  583. /* c8 ignore stop */
  584. this[UNZIP].end();
  585. }
  586. else {
  587. this[ENDED] = true;
  588. if (this.brotli === undefined)
  589. chunk = chunk || Buffer.alloc(0);
  590. if (chunk)
  591. this.write(chunk);
  592. this[MAYBEEND]();
  593. }
  594. }
  595. return this;
  596. }
  597. }
  598. exports.Parser = Parser;
  599. //# sourceMappingURL=parse.js.map