diff --git a/src/fs.ts b/src/fs.ts new file mode 100644 index 00000000..adf8886d --- /dev/null +++ b/src/fs.ts @@ -0,0 +1,116 @@ +import type { Stats } from "fs"; +import { resolve } from "path"; +import fs from "graceful-fs"; +import { Sema } from "async-sema"; + +const fsReadFile = fs.promises.readFile; +const fsReadlink = fs.promises.readlink; +const fsStat = fs.promises.stat; + +export class CachedFileSystem { + private fileCache: Map>; + private statCache: Map>; + private symlinkCache: Map>; + private fileIOQueue: Sema; + + constructor({ + cache, + fileIOConcurrency, + }: { + cache?: { fileCache?: Map>, statCache?: Map>, symlinkCache?: Map> }; + fileIOConcurrency: number; + }) { + this.fileIOQueue = new Sema(fileIOConcurrency); + this.fileCache = cache?.fileCache ?? new Map(); + this.statCache = cache?.statCache ?? new Map(); + this.symlinkCache = cache?.symlinkCache ?? new Map(); + + if (cache) { + cache.fileCache = this.fileCache; + cache.statCache = this.statCache; + cache.symlinkCache = this.symlinkCache; + } + } + + async readlink(path: string): Promise { + const cached = this.symlinkCache.get(path); + if (cached !== undefined) return cached; + // This is not awaiting the response, so that the cache is instantly populated and + // future calls serve the Promise from the cache + const readlinkPromise = this.executeFileIO(path, this._internalReadlink); + this.symlinkCache.set(path, readlinkPromise); + + return readlinkPromise; + } + + async readFile(path: string): Promise { + const cached = this.fileCache.get(path); + if (cached !== undefined) return cached; + // This is not awaiting the response, so that the cache is instantly populated and + // future calls serve the Promise from the cache + const readFilePromise = this.executeFileIO(path, this._internalReadFile); + this.fileCache.set(path, readFilePromise); + + return readFilePromise; + } + + async stat(path: string): Promise { + const cached = this.statCache.get(path); + if (cached !== undefined) return cached; + // This is not awaiting the response, so that the cache is instantly populated and + // future calls serve the Promise from the cache + const statPromise = this.executeFileIO(path, this._internalStat); + this.statCache.set(path, statPromise); + + return statPromise; + } + + private async _internalReadlink(path: string) { + try { + const link = await fsReadlink(path); + // also copy stat cache to symlink + const stats = this.statCache.get(path); + if (stats) this.statCache.set(resolve(path, link), stats); + return link; + } catch (e: any) { + if (e.code !== "EINVAL" && e.code !== "ENOENT" && e.code !== "UNKNOWN") + throw e; + return null; + } + } + + private async _internalReadFile(path: string): Promise { + try { + return (await fsReadFile(path)).toString(); + } catch (e: any) { + if (e.code === "ENOENT" || e.code === "EISDIR") { + return null; + } + throw e; + } + } + + private async _internalStat(path: string) { + try { + return await fsStat(path); + } catch (e: any) { + if (e.code === "ENOENT") { + return null; + } + throw e; + } + } + + private async executeFileIO( + path: string, + fileIO: (path: string) => Promise + ): Promise { + await this.fileIOQueue.acquire(); + + try { + return fileIO.call(this, path); + } finally { + this.fileIOQueue.release(); + } + } +} diff --git a/src/node-file-trace.ts b/src/node-file-trace.ts index df6dbb24..9eb8e825 100644 --- a/src/node-file-trace.ts +++ b/src/node-file-trace.ts @@ -1,16 +1,11 @@ -import { NodeFileTraceOptions, NodeFileTraceResult, NodeFileTraceReasons, Stats, NodeFileTraceReasonType } from './types'; +import { NodeFileTraceOptions, NodeFileTraceResult, NodeFileTraceReasons, NodeFileTraceReasonType } from './types'; import { basename, dirname, extname, relative, resolve, sep } from 'path'; -import fs from 'graceful-fs'; import analyze, { AnalyzeResult } from './analyze'; import resolveDependency, { NotFoundError } from './resolve-dependency'; import { isMatch } from 'micromatch'; import { sharedLibEmit } from './utils/sharedlib-emit'; import { join } from 'path'; -import { Sema } from 'async-sema'; - -const fsReadFile = fs.promises.readFile; -const fsReadlink = fs.promises.readlink; -const fsStat = fs.promises.stat; +import { CachedFileSystem } from './fs'; function inPath (path: string, parent: string) { const pathWithSep = join(parent, sep); @@ -57,16 +52,13 @@ export class Job { public log: boolean; public mixedModules: boolean; public analysis: { emitGlobs?: boolean, computeFileReferences?: boolean, evaluatePureExpressions?: boolean }; - private fileCache: Map; - private statCache: Map; - private symlinkCache: Map; private analysisCache: Map; public fileList: Set; public esmFileList: Set; public processed: Set; public warnings: Set; - public reasons: NodeFileTraceReasons = new Map() - private fileIOQueue: Sema; + public reasons: NodeFileTraceReasons = new Map(); + private cachedFileSystem: CachedFileSystem; constructor ({ base = process.cwd(), @@ -121,8 +113,7 @@ export class Job { this.paths = resolvedPaths; this.log = log; this.mixedModules = mixedModules; - this.fileIOQueue = new Sema(fileIOConcurrency); - + this.cachedFileSystem = new CachedFileSystem({ cache, fileIOConcurrency }); this.analysis = {}; if (analysis !== false) { Object.assign(this.analysis, { @@ -137,15 +128,9 @@ export class Job { }, analysis === true ? {} : analysis); } - this.fileCache = cache && cache.fileCache || new Map(); - this.statCache = cache && cache.statCache || new Map(); - this.symlinkCache = cache && cache.symlinkCache || new Map(); this.analysisCache = cache && cache.analysisCache || new Map(); if (cache) { - cache.fileCache = this.fileCache; - cache.statCache = this.statCache; - cache.symlinkCache = this.symlinkCache; cache.analysisCache = this.analysisCache; } @@ -156,27 +141,7 @@ export class Job { } async readlink (path: string) { - const cached = this.symlinkCache.get(path); - if (cached !== undefined) return cached; - await this.fileIOQueue.acquire(); - try { - const link = await fsReadlink(path); - // also copy stat cache to symlink - const stats = this.statCache.get(path); - if (stats) - this.statCache.set(resolve(path, link), stats); - this.symlinkCache.set(path, link); - return link; - } - catch (e: any) { - if (e.code !== 'EINVAL' && e.code !== 'ENOENT' && e.code !== 'UNKNOWN') - throw e; - this.symlinkCache.set(path, null); - return null; - } - finally { - this.fileIOQueue.release(); - } + return this.cachedFileSystem.readlink(path); } async isFile (path: string) { @@ -194,24 +159,7 @@ export class Job { } async stat (path: string) { - const cached = this.statCache.get(path); - if (cached) return cached; - await this.fileIOQueue.acquire(); - try { - const stats = await fsStat(path); - this.statCache.set(path, stats); - return stats; - } - catch (e: any) { - if (e.code === 'ENOENT') { - this.statCache.set(path, null); - return null; - } - throw e; - } - finally { - this.fileIOQueue.release(); - } + return this.cachedFileSystem.stat(path); } private maybeEmitDep = async (dep: string, path: string, cjsResolve: boolean) => { @@ -257,25 +205,8 @@ export class Job { return resolveDependency(id, parent, job, cjsResolve); } - async readFile (path: string): Promise { - const cached = this.fileCache.get(path); - if (cached !== undefined) return cached; - await this.fileIOQueue.acquire(); - try { - const source = (await fsReadFile(path)).toString(); - this.fileCache.set(path, source); - return source; - } - catch (e: any) { - if (e.code === 'ENOENT' || e.code === 'EISDIR') { - this.fileCache.set(path, null); - return null; - } - throw e; - } - finally { - this.fileIOQueue.release(); - } + async readFile (path: string): Promise { + return this.cachedFileSystem.readFile(path); } async realpath (path: string, parent?: string, seen = new Set()): Promise { @@ -302,12 +233,12 @@ export class Job { path = await this.realpath(path, parent); } path = relative(this.base, path); - + if (parent) { parent = relative(this.base, parent); } let reasonEntry = this.reasons.get(path) - + if (!reasonEntry) { reasonEntry = { type: [reasonType], @@ -375,7 +306,7 @@ export class Job { else { const source = await this.readFile(path); if (source === null) throw new Error('File ' + path + ' does not exist.'); - // analyze should not have any side-effects e.g. calling `job.emitFile` + // analyze should not have any side-effects e.g. calling `job.emitFile` // directly as this will not be included in the cachedAnalysis and won't // be emit for successive runs that leverage the cache analyzeResult = await analyze(path, source.toString(), this); @@ -387,7 +318,7 @@ export class Job { if (isESM) { this.esmFileList.add(relative(this.base, path)); } - + await Promise.all([ ...[...assets].map(async asset => { const ext = extname(asset); diff --git a/test/unit.test.js b/test/unit.test.js index ce269b38..d797f609 100644 --- a/test/unit.test.js +++ b/test/unit.test.js @@ -1,6 +1,12 @@ const fs = require('fs'); const { join, relative } = require('path'); const { nodeFileTrace } = require('../out/node-file-trace'); +const gracefulFS = require('graceful-fs'); +const analyze = require('../out/analyze.js').default; + +const stat = gracefulFS.promises.stat +const readlink = gracefulFS.promises.readlink +const readFile = gracefulFS.promises.readFile global._unit = true; @@ -11,6 +17,38 @@ const unitTests = [ ...unitTestDirs.map(testName => ({testName, isRoot: true})), ]; +jest.mock('../out/analyze.js', () => { + const originalModule = jest.requireActual('../out/analyze.js').default; + + return { + __esModule: true, + default: jest.fn(originalModule) + }; +}); + +jest.mock('graceful-fs', () => { + const originalModule = jest.requireActual('graceful-fs'); + + return { + ...originalModule, + promises: { + ...originalModule.promises, + stat: jest.fn(originalModule.promises.stat), + readFile: jest.fn(originalModule.promises.readFile), + readlink: jest.fn( originalModule.promises.readlink), + } + }; +}); + +function resetFileIOMocks() { + analyze.mockClear(); + stat.mockClear() + readFile.mockClear() + readlink.mockClear() +} + +afterEach(resetFileIOMocks) + for (const { testName, isRoot } of unitTests) { const testSuffix = `${testName} from ${isRoot ? 'root' : 'cwd'}`; if (process.platform === 'win32' && (isRoot || skipOnWindows.includes(testName))) { @@ -18,7 +56,7 @@ for (const { testName, isRoot } of unitTests) { continue; }; const unitPath = join(__dirname, 'unit', testName); - + it(`should correctly trace ${testSuffix}`, async () => { // We mock readFile because when node-file-trace is integrated into @now/node @@ -27,12 +65,12 @@ for (const { testName, isRoot } of unitTests) { // used in the tsx-input test: const readFileMock = jest.fn(function() { const [id] = arguments; - + if (id.startsWith('custom-resolution-')) { return '' } - - // ensure sync readFile works as expected since default is + + // ensure sync readFile works as expected since default is // async now if (testName === 'wildcard') { try { @@ -45,11 +83,11 @@ for (const { testName, isRoot } of unitTests) { }); const nftCache = {} - + const doTrace = async (cached = false) => { let inputFileNames = ["input.js"]; let outputFileName = "output.js"; - + if (testName === "jsx-input") { inputFileNames = ["input.jsx"]; } @@ -63,13 +101,13 @@ for (const { testName, isRoot } of unitTests) { inputFileNames = ["input-cached.js"] outputFileName = "output-cached.js" } - + if (testName === 'multi-input') { inputFileNames.push('input-2.js', 'input-3.js', 'input-4.js'); } - + const { fileList, reasons } = await nodeFileTrace( - inputFileNames.map(file => join(unitPath, file)), + inputFileNames.map(file => join(unitPath, file)), { base: isRoot ? '/' : `${__dirname}/../`, processCwd: unitPath, @@ -93,20 +131,20 @@ for (const { testName, isRoot } of unitTests) { } ); - const normalizeFilesRoot = f => + const normalizeFilesRoot = f => (isRoot ? relative(join('./', __dirname, '..'), f) : f).replace(/\\/g, '/'); - + const normalizeInputRoot = f => isRoot ? join('./', unitPath, f) : join('test/unit', testName, f); - + const getReasonType = f => reasons.get(normalizeInputRoot(f)).type; - + if (testName === 'multi-input') { const collectFiles = (parent, files = new Set()) => { fileList.forEach(file => { if (files.has(file)) return const reason = reasons.get(file) - + if (reason.parents && reason.parents.has(parent)) { files.add(file) collectFiles(file, files) @@ -114,7 +152,7 @@ for (const { testName, isRoot } of unitTests) { }) return files } - + expect([...collectFiles(normalizeInputRoot('input.js'))].map(normalizeFilesRoot).sort()).toEqual([ "package.json", "test/unit/multi-input/asset-2.txt", @@ -156,7 +194,7 @@ for (const { testName, isRoot } of unitTests) { expect(getReasonType('style.module.css')).toEqual(['dependency', 'asset']) } let sortedFileList = [...fileList].sort() - + if (testName === 'microtime-node-gyp') { let foundMatchingBinary = false sortedFileList = sortedFileList.filter(file => { @@ -169,9 +207,9 @@ for (const { testName, isRoot } of unitTests) { } return true }) - expect(foundMatchingBinary).toBe(true) + expect(foundMatchingBinary).toBe(true) } - + let expected; try { expected = JSON.parse(fs.readFileSync(join(unitPath, outputFileName)).toString()); @@ -197,6 +235,26 @@ for (const { testName, isRoot } of unitTests) { fs.writeFileSync(join(unitPath, 'actual.js'), JSON.stringify(sortedFileList, null, 2)); throw e; } + + if (cached) { + // Everything should be cached in the second run, except for `processed-dependency` which adds 1 new input file + expect(stat).toHaveBeenCalledTimes(0); + expect(readlink).toHaveBeenCalledTimes(testName === 'processed-dependency' ? 1 : 0); + expect(readFile).toHaveBeenCalledTimes(testName === 'processed-dependency' ? 1 : 0); + expect(analyze).toHaveBeenCalledTimes(testName === 'processed-dependency' ? 1 : 0); + } else { + // Ensure all cached calls are only called once per file. The expected count is the count of calls unique per path + const uniqueStatCalls = new Set(stat.mock.calls.map((call) => call[0])); + const uniqueReadlinkCalls = new Set(readlink.mock.calls.map((call) => call[0])); + const uniqueReadFileCalls = new Set(readFile.mock.calls.map((call) => call[0])); + const uniqueAnalyzeFileCalls = new Set(analyze.mock.calls.map((call) => call[0])); + expect(stat).toHaveBeenCalledTimes(uniqueStatCalls.size); + expect(readlink).toHaveBeenCalledTimes(uniqueReadlinkCalls.size); + expect(readFile).toHaveBeenCalledTimes(uniqueReadFileCalls.size); + expect(analyze).toHaveBeenCalledTimes(uniqueAnalyzeFileCalls.size); + } + + resetFileIOMocks(); } await doTrace() // test tracing again with a populated nftTrace @@ -204,7 +262,7 @@ for (const { testName, isRoot } of unitTests) { expect(nftCache.statCache).toBeDefined() expect(nftCache.symlinkCache).toBeDefined() expect(nftCache.analysisCache).toBeDefined() - + try { await doTrace(true) } catch (err) {