Skip to content

Commit

Permalink
fix: Make caching work correctly in async context
Browse files Browse the repository at this point in the history
BREAKING CHANGE: The file-IO caches now contains promises instead of the actual result.

Previously the cache was only populated after the function call succeed which in a async
context was leading to multiple file IO or analyze tasks started for the same file
at once. Caching the Promise instead of the result makes this problem go away.
  • Loading branch information
danez committed Dec 6, 2022
1 parent 40a8710 commit 9b7fc6a
Show file tree
Hide file tree
Showing 3 changed files with 206 additions and 101 deletions.
116 changes: 116 additions & 0 deletions src/fs.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import type { Stats } from "fs";
import { resolve } from "path";
import fs from "graceful-fs";
import { Sema } from "async-sema";

const fsReadFile = fs.promises.readFile;
const fsReadlink = fs.promises.readlink;
const fsStat = fs.promises.stat;

export class CachedFileSystem {
private fileCache: Map<string, Promise<string | null>>;
private statCache: Map<string, Promise<Stats | null>>;
private symlinkCache: Map<string, Promise<string | null>>;
private fileIOQueue: Sema;

constructor({
cache,
fileIOConcurrency,
}: {
cache?: any;
fileIOConcurrency: number;
}) {
this.fileIOQueue = new Sema(fileIOConcurrency);
this.fileCache = (cache && cache.fileCache) || new Map();
this.statCache = (cache && cache.statCache) || new Map();
this.symlinkCache = (cache && cache.symlinkCache) || new Map();

if (cache) {
cache.fileCache = this.fileCache;
cache.statCache = this.statCache;
cache.symlinkCache = this.symlinkCache;
}
}

async readlink(path: string): Promise<string | null> {
const cached = this.symlinkCache.get(path);
if (cached !== undefined) return cached;
// This is not awaiting the response, so that the cache is instantly populated and
// future calls serve the Promise from the cache
const readlinkPromise = this.executeFileIO(path, this._internalReadlink);
this.symlinkCache.set(path, readlinkPromise);

return readlinkPromise;
}

async readFile(path: string): Promise<string | null> {
const cached = this.fileCache.get(path);
if (cached !== undefined) return cached;
// This is not awaiting the response, so that the cache is instantly populated and
// future calls serve the Promise from the cache
const readFilePromise = this.executeFileIO(path, this._internalReadFile);
this.fileCache.set(path, readFilePromise);

return readFilePromise;
}

async stat(path: string): Promise<Stats | null> {
const cached = this.statCache.get(path);
if (cached !== undefined) return cached;
// This is not awaiting the response, so that the cache is instantly populated and
// future calls serve the Promise from the cache
const statPromise = this.executeFileIO(path, this._internalStat);
this.statCache.set(path, statPromise);

return statPromise;
}

private async _internalReadlink(path: string) {
try {
const link = await fsReadlink(path);
// also copy stat cache to symlink
const stats = this.statCache.get(path);
if (stats) this.statCache.set(resolve(path, link), stats);
return link;
} catch (e: any) {
if (e.code !== "EINVAL" && e.code !== "ENOENT" && e.code !== "UNKNOWN")
throw e;
return null;
}
}

private async _internalReadFile(path: string): Promise<string | null> {
try {
return (await fsReadFile(path)).toString();
} catch (e: any) {
if (e.code === "ENOENT" || e.code === "EISDIR") {
return null;
}
throw e;
}
}

private async _internalStat(path: string) {
try {
return await fsStat(path);
} catch (e: any) {
if (e.code === "ENOENT") {
return null;
}
throw e;
}
}

private async executeFileIO<Return>(
path: string,
fileIO: (path: string) => Promise<Return>
): Promise<Return> {
await this.fileIOQueue.acquire();

try {
return fileIO.call(this, path);
} finally {
this.fileIOQueue.release();
}
}
}
95 changes: 13 additions & 82 deletions src/node-file-trace.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,11 @@
import { NodeFileTraceOptions, NodeFileTraceResult, NodeFileTraceReasons, Stats, NodeFileTraceReasonType } from './types';
import { NodeFileTraceOptions, NodeFileTraceResult, NodeFileTraceReasons, NodeFileTraceReasonType } from './types';
import { basename, dirname, extname, relative, resolve, sep } from 'path';
import fs from 'graceful-fs';
import analyze, { AnalyzeResult } from './analyze';
import resolveDependency, { NotFoundError } from './resolve-dependency';
import { isMatch } from 'micromatch';
import { sharedLibEmit } from './utils/sharedlib-emit';
import { join } from 'path';
import { Sema } from 'async-sema';

const fsReadFile = fs.promises.readFile;
const fsReadlink = fs.promises.readlink;
const fsStat = fs.promises.stat;
import { CachedFileSystem } from './fs';

function inPath (path: string, parent: string) {
const pathWithSep = join(parent, sep);
Expand Down Expand Up @@ -57,16 +52,13 @@ export class Job {
public log: boolean;
public mixedModules: boolean;
public analysis: { emitGlobs?: boolean, computeFileReferences?: boolean, evaluatePureExpressions?: boolean };
private fileCache: Map<string, string | null>;
private statCache: Map<string, Stats | null>;
private symlinkCache: Map<string, string | null>;
private analysisCache: Map<string, AnalyzeResult>;
public fileList: Set<string>;
public esmFileList: Set<string>;
public processed: Set<string>;
public warnings: Set<Error>;
public reasons: NodeFileTraceReasons = new Map()
private fileIOQueue: Sema;
public reasons: NodeFileTraceReasons = new Map();
private cachedFileSystem: CachedFileSystem;

constructor ({
base = process.cwd(),
Expand Down Expand Up @@ -121,8 +113,7 @@ export class Job {
this.paths = resolvedPaths;
this.log = log;
this.mixedModules = mixedModules;
this.fileIOQueue = new Sema(fileIOConcurrency);

this.cachedFileSystem = new CachedFileSystem({ cache, fileIOConcurrency });
this.analysis = {};
if (analysis !== false) {
Object.assign(this.analysis, {
Expand All @@ -137,15 +128,9 @@ export class Job {
}, analysis === true ? {} : analysis);
}

this.fileCache = cache && cache.fileCache || new Map();
this.statCache = cache && cache.statCache || new Map();
this.symlinkCache = cache && cache.symlinkCache || new Map();
this.analysisCache = cache && cache.analysisCache || new Map();

if (cache) {
cache.fileCache = this.fileCache;
cache.statCache = this.statCache;
cache.symlinkCache = this.symlinkCache;
cache.analysisCache = this.analysisCache;
}

Expand All @@ -156,27 +141,7 @@ export class Job {
}

async readlink (path: string) {
const cached = this.symlinkCache.get(path);
if (cached !== undefined) return cached;
await this.fileIOQueue.acquire();
try {
const link = await fsReadlink(path);
// also copy stat cache to symlink
const stats = this.statCache.get(path);
if (stats)
this.statCache.set(resolve(path, link), stats);
this.symlinkCache.set(path, link);
return link;
}
catch (e: any) {
if (e.code !== 'EINVAL' && e.code !== 'ENOENT' && e.code !== 'UNKNOWN')
throw e;
this.symlinkCache.set(path, null);
return null;
}
finally {
this.fileIOQueue.release();
}
return this.cachedFileSystem.readlink(path);
}

async isFile (path: string) {
Expand All @@ -194,24 +159,7 @@ export class Job {
}

async stat (path: string) {
const cached = this.statCache.get(path);
if (cached) return cached;
await this.fileIOQueue.acquire();
try {
const stats = await fsStat(path);
this.statCache.set(path, stats);
return stats;
}
catch (e: any) {
if (e.code === 'ENOENT') {
this.statCache.set(path, null);
return null;
}
throw e;
}
finally {
this.fileIOQueue.release();
}
return this.cachedFileSystem.stat(path);
}

private maybeEmitDep = async (dep: string, path: string, cjsResolve: boolean) => {
Expand Down Expand Up @@ -257,25 +205,8 @@ export class Job {
return resolveDependency(id, parent, job, cjsResolve);
}

async readFile (path: string): Promise<string | Buffer | null> {
const cached = this.fileCache.get(path);
if (cached !== undefined) return cached;
await this.fileIOQueue.acquire();
try {
const source = (await fsReadFile(path)).toString();
this.fileCache.set(path, source);
return source;
}
catch (e: any) {
if (e.code === 'ENOENT' || e.code === 'EISDIR') {
this.fileCache.set(path, null);
return null;
}
throw e;
}
finally {
this.fileIOQueue.release();
}
async readFile (path: string): Promise<Buffer | string | null> {
return this.cachedFileSystem.readFile(path);
}

async realpath (path: string, parent?: string, seen = new Set()): Promise<string> {
Expand All @@ -302,12 +233,12 @@ export class Job {
path = await this.realpath(path, parent);
}
path = relative(this.base, path);

if (parent) {
parent = relative(this.base, parent);
}
let reasonEntry = this.reasons.get(path)

if (!reasonEntry) {
reasonEntry = {
type: [reasonType],
Expand Down Expand Up @@ -375,7 +306,7 @@ export class Job {
else {
const source = await this.readFile(path);
if (source === null) throw new Error('File ' + path + ' does not exist.');
// analyze should not have any side-effects e.g. calling `job.emitFile`
// analyze should not have any side-effects e.g. calling `job.emitFile`
// directly as this will not be included in the cachedAnalysis and won't
// be emit for successive runs that leverage the cache
analyzeResult = await analyze(path, source.toString(), this);
Expand All @@ -387,7 +318,7 @@ export class Job {
if (isESM) {
this.esmFileList.add(relative(this.base, path));
}

await Promise.all([
...[...assets].map(async asset => {
const ext = extname(asset);
Expand Down
Loading

0 comments on commit 9b7fc6a

Please sign in to comment.