9bb28c0fa900129430318fd62ccdb5fef6ac66ae0f36ba15da427ee06cc504830fe21bd9450de8a7c939713436fc2dd41ae9e68eb6cc68d52d07614c2a3904 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. 'use strict'
  2. const util = require('util')
  3. const pMap = require('p-map')
  4. const contentPath = require('./content/path')
  5. const figgyPudding = require('figgy-pudding')
  6. const fixOwner = require('./util/fix-owner')
  7. const fs = require('graceful-fs')
  8. const fsm = require('fs-minipass')
  9. const glob = util.promisify(require('glob'))
  10. const index = require('./entry-index')
  11. const path = require('path')
  12. const rimraf = util.promisify(require('rimraf'))
  13. const ssri = require('ssri')
  14. const hasOwnProperty = (obj, key) =>
  15. Object.prototype.hasOwnProperty.call(obj, key)
  16. const stat = util.promisify(fs.stat)
  17. const truncate = util.promisify(fs.truncate)
  18. const writeFile = util.promisify(fs.writeFile)
  19. const readFile = util.promisify(fs.readFile)
  20. const VerifyOpts = figgyPudding({
  21. concurrency: {
  22. default: 20
  23. },
  24. filter: {},
  25. log: {
  26. default: { silly () {} }
  27. }
  28. })
  29. module.exports = verify
  30. function verify (cache, opts) {
  31. opts = VerifyOpts(opts)
  32. opts.log.silly('verify', 'verifying cache at', cache)
  33. const steps = [
  34. markStartTime,
  35. fixPerms,
  36. garbageCollect,
  37. rebuildIndex,
  38. cleanTmp,
  39. writeVerifile,
  40. markEndTime
  41. ]
  42. return steps
  43. .reduce((promise, step, i) => {
  44. const label = step.name || `step #${i}`
  45. const start = new Date()
  46. return promise.then((stats) => {
  47. return step(cache, opts).then((s) => {
  48. s &&
  49. Object.keys(s).forEach((k) => {
  50. stats[k] = s[k]
  51. })
  52. const end = new Date()
  53. if (!stats.runTime) {
  54. stats.runTime = {}
  55. }
  56. stats.runTime[label] = end - start
  57. return Promise.resolve(stats)
  58. })
  59. })
  60. }, Promise.resolve({}))
  61. .then((stats) => {
  62. stats.runTime.total = stats.endTime - stats.startTime
  63. opts.log.silly(
  64. 'verify',
  65. 'verification finished for',
  66. cache,
  67. 'in',
  68. `${stats.runTime.total}ms`
  69. )
  70. return stats
  71. })
  72. }
  73. function markStartTime (cache, opts) {
  74. return Promise.resolve({ startTime: new Date() })
  75. }
  76. function markEndTime (cache, opts) {
  77. return Promise.resolve({ endTime: new Date() })
  78. }
  79. function fixPerms (cache, opts) {
  80. opts.log.silly('verify', 'fixing cache permissions')
  81. return fixOwner
  82. .mkdirfix(cache, cache)
  83. .then(() => {
  84. // TODO - fix file permissions too
  85. return fixOwner.chownr(cache, cache)
  86. })
  87. .then(() => null)
  88. }
  89. // Implements a naive mark-and-sweep tracing garbage collector.
  90. //
  91. // The algorithm is basically as follows:
  92. // 1. Read (and filter) all index entries ("pointers")
  93. // 2. Mark each integrity value as "live"
  94. // 3. Read entire filesystem tree in `content-vX/` dir
  95. // 4. If content is live, verify its checksum and delete it if it fails
  96. // 5. If content is not marked as live, rimraf it.
  97. //
  98. function garbageCollect (cache, opts) {
  99. opts.log.silly('verify', 'garbage collecting content')
  100. const indexStream = index.lsStream(cache)
  101. const liveContent = new Set()
  102. indexStream.on('data', (entry) => {
  103. if (opts.filter && !opts.filter(entry)) {
  104. return
  105. }
  106. liveContent.add(entry.integrity.toString())
  107. })
  108. return new Promise((resolve, reject) => {
  109. indexStream.on('end', resolve).on('error', reject)
  110. }).then(() => {
  111. const contentDir = contentPath.contentDir(cache)
  112. return glob(path.join(contentDir, '**'), {
  113. follow: false,
  114. nodir: true,
  115. nosort: true
  116. }).then((files) => {
  117. return Promise.resolve({
  118. verifiedContent: 0,
  119. reclaimedCount: 0,
  120. reclaimedSize: 0,
  121. badContentCount: 0,
  122. keptSize: 0
  123. }).then((stats) =>
  124. pMap(
  125. files,
  126. (f) => {
  127. const split = f.split(/[/\\]/)
  128. const digest = split.slice(split.length - 3).join('')
  129. const algo = split[split.length - 4]
  130. const integrity = ssri.fromHex(digest, algo)
  131. if (liveContent.has(integrity.toString())) {
  132. return verifyContent(f, integrity).then((info) => {
  133. if (!info.valid) {
  134. stats.reclaimedCount++
  135. stats.badContentCount++
  136. stats.reclaimedSize += info.size
  137. } else {
  138. stats.verifiedContent++
  139. stats.keptSize += info.size
  140. }
  141. return stats
  142. })
  143. } else {
  144. // No entries refer to this content. We can delete.
  145. stats.reclaimedCount++
  146. return stat(f).then((s) => {
  147. return rimraf(f).then(() => {
  148. stats.reclaimedSize += s.size
  149. return stats
  150. })
  151. })
  152. }
  153. },
  154. { concurrency: opts.concurrency }
  155. ).then(() => stats)
  156. )
  157. })
  158. })
  159. }
  160. function verifyContent (filepath, sri) {
  161. return stat(filepath)
  162. .then((s) => {
  163. const contentInfo = {
  164. size: s.size,
  165. valid: true
  166. }
  167. return ssri
  168. .checkStream(new fsm.ReadStream(filepath), sri)
  169. .catch((err) => {
  170. if (err.code !== 'EINTEGRITY') {
  171. throw err
  172. }
  173. return rimraf(filepath).then(() => {
  174. contentInfo.valid = false
  175. })
  176. })
  177. .then(() => contentInfo)
  178. })
  179. .catch((err) => {
  180. if (err.code === 'ENOENT') {
  181. return { size: 0, valid: false }
  182. }
  183. throw err
  184. })
  185. }
  186. function rebuildIndex (cache, opts) {
  187. opts.log.silly('verify', 'rebuilding index')
  188. return index.ls(cache).then((entries) => {
  189. const stats = {
  190. missingContent: 0,
  191. rejectedEntries: 0,
  192. totalEntries: 0
  193. }
  194. const buckets = {}
  195. for (const k in entries) {
  196. if (hasOwnProperty(entries, k)) {
  197. const hashed = index.hashKey(k)
  198. const entry = entries[k]
  199. const excluded = opts.filter && !opts.filter(entry)
  200. excluded && stats.rejectedEntries++
  201. if (buckets[hashed] && !excluded) {
  202. buckets[hashed].push(entry)
  203. } else if (buckets[hashed] && excluded) {
  204. // skip
  205. } else if (excluded) {
  206. buckets[hashed] = []
  207. buckets[hashed]._path = index.bucketPath(cache, k)
  208. } else {
  209. buckets[hashed] = [entry]
  210. buckets[hashed]._path = index.bucketPath(cache, k)
  211. }
  212. }
  213. }
  214. return pMap(
  215. Object.keys(buckets),
  216. (key) => {
  217. return rebuildBucket(cache, buckets[key], stats, opts)
  218. },
  219. { concurrency: opts.concurrency }
  220. ).then(() => stats)
  221. })
  222. }
  223. function rebuildBucket (cache, bucket, stats, opts) {
  224. return truncate(bucket._path).then(() => {
  225. // This needs to be serialized because cacache explicitly
  226. // lets very racy bucket conflicts clobber each other.
  227. return bucket.reduce((promise, entry) => {
  228. return promise.then(() => {
  229. const content = contentPath(cache, entry.integrity)
  230. return stat(content)
  231. .then(() => {
  232. return index
  233. .insert(cache, entry.key, entry.integrity, {
  234. metadata: entry.metadata,
  235. size: entry.size
  236. })
  237. .then(() => {
  238. stats.totalEntries++
  239. })
  240. })
  241. .catch((err) => {
  242. if (err.code === 'ENOENT') {
  243. stats.rejectedEntries++
  244. stats.missingContent++
  245. return
  246. }
  247. throw err
  248. })
  249. })
  250. }, Promise.resolve())
  251. })
  252. }
  253. function cleanTmp (cache, opts) {
  254. opts.log.silly('verify', 'cleaning tmp directory')
  255. return rimraf(path.join(cache, 'tmp'))
  256. }
  257. function writeVerifile (cache, opts) {
  258. const verifile = path.join(cache, '_lastverified')
  259. opts.log.silly('verify', 'writing verifile to ' + verifile)
  260. try {
  261. return writeFile(verifile, '' + +new Date())
  262. } finally {
  263. fixOwner.chownr.sync(cache, verifile)
  264. }
  265. }
  266. module.exports.lastRun = lastRun
  267. function lastRun (cache) {
  268. return readFile(path.join(cache, '_lastverified'), 'utf8').then(
  269. (data) => new Date(+data)
  270. )
  271. }