Skip to content

Commit

Permalink
fix: Make caching work correctly in async context (#320)
Browse files Browse the repository at this point in the history
Right now the cache was only populated after the call to the fs API
succeed, which in an async context was leading to multiple file IO or
analyze tasks started for the same file at once. This is because both
file IO are async and so until the fs call finished other calls for the
same file can still come in.

To fix this I extracted all file IO calls into a separate class that
wraps everything correctly and instead of caching the actual result, now
a Promise is cached. So the cache now always needs to be awaited.

As the values in the cache change this is definitely a breaking change.
Something like this:

```diff
const cache = {};
await nodeFileTrace([file], {
    cache,
);

-const cacheValue = cache.fileCache.get(file);
+const cacheValue = await cache.fileCache.get(file);
```

I know this is quite a big change, just let me know what you think and
if I should change anything. :)

In my personal example the number of calls to analyze went from ~2400 to
~1600 and the runtime from ~3s to ~2.7s (which includes other work than
nft)

Co-authored-by: Steven <[email protected]>
  • Loading branch information
danez and styfle authored Dec 14, 2022
1 parent 047f030 commit 1a7f083
Show file tree
Hide file tree
Showing 3 changed files with 206 additions and 101 deletions.
116 changes: 116 additions & 0 deletions src/fs.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import type { Stats } from "fs";
import { resolve } from "path";
import fs from "graceful-fs";
import { Sema } from "async-sema";

const fsReadFile = fs.promises.readFile;
const fsReadlink = fs.promises.readlink;
const fsStat = fs.promises.stat;

export class CachedFileSystem {
private fileCache: Map<string, Promise<string | null>>;
private statCache: Map<string, Promise<Stats | null>>;
private symlinkCache: Map<string, Promise<string | null>>;
private fileIOQueue: Sema;

constructor({
cache,
fileIOConcurrency,
}: {
cache?: { fileCache?: Map<string, Promise<string | null>>, statCache?: Map<string, Promise<Stats | null>>, symlinkCache?: Map<string, Promise<string | null>> };
fileIOConcurrency: number;
}) {
this.fileIOQueue = new Sema(fileIOConcurrency);
this.fileCache = cache?.fileCache ?? new Map();
this.statCache = cache?.statCache ?? new Map();
this.symlinkCache = cache?.symlinkCache ?? new Map();

if (cache) {
cache.fileCache = this.fileCache;
cache.statCache = this.statCache;
cache.symlinkCache = this.symlinkCache;
}
}

async readlink(path: string): Promise<string | null> {
const cached = this.symlinkCache.get(path);
if (cached !== undefined) return cached;
// This is not awaiting the response, so that the cache is instantly populated and
// future calls serve the Promise from the cache
const readlinkPromise = this.executeFileIO(path, this._internalReadlink);
this.symlinkCache.set(path, readlinkPromise);

return readlinkPromise;
}

async readFile(path: string): Promise<string | null> {
const cached = this.fileCache.get(path);
if (cached !== undefined) return cached;
// This is not awaiting the response, so that the cache is instantly populated and
// future calls serve the Promise from the cache
const readFilePromise = this.executeFileIO(path, this._internalReadFile);
this.fileCache.set(path, readFilePromise);

return readFilePromise;
}

async stat(path: string): Promise<Stats | null> {
const cached = this.statCache.get(path);
if (cached !== undefined) return cached;
// This is not awaiting the response, so that the cache is instantly populated and
// future calls serve the Promise from the cache
const statPromise = this.executeFileIO(path, this._internalStat);
this.statCache.set(path, statPromise);

return statPromise;
}

private async _internalReadlink(path: string) {
try {
const link = await fsReadlink(path);
// also copy stat cache to symlink
const stats = this.statCache.get(path);
if (stats) this.statCache.set(resolve(path, link), stats);
return link;
} catch (e: any) {
if (e.code !== "EINVAL" && e.code !== "ENOENT" && e.code !== "UNKNOWN")
throw e;
return null;
}
}

private async _internalReadFile(path: string): Promise<string | null> {
try {
return (await fsReadFile(path)).toString();
} catch (e: any) {
if (e.code === "ENOENT" || e.code === "EISDIR") {
return null;
}
throw e;
}
}

private async _internalStat(path: string) {
try {
return await fsStat(path);
} catch (e: any) {
if (e.code === "ENOENT") {
return null;
}
throw e;
}
}

private async executeFileIO<Return>(
path: string,
fileIO: (path: string) => Promise<Return>
): Promise<Return> {
await this.fileIOQueue.acquire();

try {
return fileIO.call(this, path);
} finally {
this.fileIOQueue.release();
}
}
}
95 changes: 13 additions & 82 deletions src/node-file-trace.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,11 @@
import { NodeFileTraceOptions, NodeFileTraceResult, NodeFileTraceReasons, Stats, NodeFileTraceReasonType } from './types';
import { NodeFileTraceOptions, NodeFileTraceResult, NodeFileTraceReasons, NodeFileTraceReasonType } from './types';
import { basename, dirname, extname, relative, resolve, sep } from 'path';
import fs from 'graceful-fs';
import analyze, { AnalyzeResult } from './analyze';
import resolveDependency, { NotFoundError } from './resolve-dependency';
import { isMatch } from 'micromatch';
import { sharedLibEmit } from './utils/sharedlib-emit';
import { join } from 'path';
import { Sema } from 'async-sema';

const fsReadFile = fs.promises.readFile;
const fsReadlink = fs.promises.readlink;
const fsStat = fs.promises.stat;
import { CachedFileSystem } from './fs';

function inPath (path: string, parent: string) {
const pathWithSep = join(parent, sep);
Expand Down Expand Up @@ -57,16 +52,13 @@ export class Job {
public log: boolean;
public mixedModules: boolean;
public analysis: { emitGlobs?: boolean, computeFileReferences?: boolean, evaluatePureExpressions?: boolean };
private fileCache: Map<string, string | null>;
private statCache: Map<string, Stats | null>;
private symlinkCache: Map<string, string | null>;
private analysisCache: Map<string, AnalyzeResult>;
public fileList: Set<string>;
public esmFileList: Set<string>;
public processed: Set<string>;
public warnings: Set<Error>;
public reasons: NodeFileTraceReasons = new Map()
private fileIOQueue: Sema;
public reasons: NodeFileTraceReasons = new Map();
private cachedFileSystem: CachedFileSystem;

constructor ({
base = process.cwd(),
Expand Down Expand Up @@ -121,8 +113,7 @@ export class Job {
this.paths = resolvedPaths;
this.log = log;
this.mixedModules = mixedModules;
this.fileIOQueue = new Sema(fileIOConcurrency);

this.cachedFileSystem = new CachedFileSystem({ cache, fileIOConcurrency });
this.analysis = {};
if (analysis !== false) {
Object.assign(this.analysis, {
Expand All @@ -137,15 +128,9 @@ export class Job {
}, analysis === true ? {} : analysis);
}

this.fileCache = cache && cache.fileCache || new Map();
this.statCache = cache && cache.statCache || new Map();
this.symlinkCache = cache && cache.symlinkCache || new Map();
this.analysisCache = cache && cache.analysisCache || new Map();

if (cache) {
cache.fileCache = this.fileCache;
cache.statCache = this.statCache;
cache.symlinkCache = this.symlinkCache;
cache.analysisCache = this.analysisCache;
}

Expand All @@ -156,27 +141,7 @@ export class Job {
}

async readlink (path: string) {
const cached = this.symlinkCache.get(path);
if (cached !== undefined) return cached;
await this.fileIOQueue.acquire();
try {
const link = await fsReadlink(path);
// also copy stat cache to symlink
const stats = this.statCache.get(path);
if (stats)
this.statCache.set(resolve(path, link), stats);
this.symlinkCache.set(path, link);
return link;
}
catch (e: any) {
if (e.code !== 'EINVAL' && e.code !== 'ENOENT' && e.code !== 'UNKNOWN')
throw e;
this.symlinkCache.set(path, null);
return null;
}
finally {
this.fileIOQueue.release();
}
return this.cachedFileSystem.readlink(path);
}

async isFile (path: string) {
Expand All @@ -194,24 +159,7 @@ export class Job {
}

async stat (path: string) {
const cached = this.statCache.get(path);
if (cached) return cached;
await this.fileIOQueue.acquire();
try {
const stats = await fsStat(path);
this.statCache.set(path, stats);
return stats;
}
catch (e: any) {
if (e.code === 'ENOENT') {
this.statCache.set(path, null);
return null;
}
throw e;
}
finally {
this.fileIOQueue.release();
}
return this.cachedFileSystem.stat(path);
}

private maybeEmitDep = async (dep: string, path: string, cjsResolve: boolean) => {
Expand Down Expand Up @@ -257,25 +205,8 @@ export class Job {
return resolveDependency(id, parent, job, cjsResolve);
}

async readFile (path: string): Promise<string | Buffer | null> {
const cached = this.fileCache.get(path);
if (cached !== undefined) return cached;
await this.fileIOQueue.acquire();
try {
const source = (await fsReadFile(path)).toString();
this.fileCache.set(path, source);
return source;
}
catch (e: any) {
if (e.code === 'ENOENT' || e.code === 'EISDIR') {
this.fileCache.set(path, null);
return null;
}
throw e;
}
finally {
this.fileIOQueue.release();
}
async readFile (path: string): Promise<Buffer | string | null> {
return this.cachedFileSystem.readFile(path);
}

async realpath (path: string, parent?: string, seen = new Set()): Promise<string> {
Expand All @@ -302,12 +233,12 @@ export class Job {
path = await this.realpath(path, parent);
}
path = relative(this.base, path);

if (parent) {
parent = relative(this.base, parent);
}
let reasonEntry = this.reasons.get(path)

if (!reasonEntry) {
reasonEntry = {
type: [reasonType],
Expand Down Expand Up @@ -375,7 +306,7 @@ export class Job {
else {
const source = await this.readFile(path);
if (source === null) throw new Error('File ' + path + ' does not exist.');
// analyze should not have any side-effects e.g. calling `job.emitFile`
// analyze should not have any side-effects e.g. calling `job.emitFile`
// directly as this will not be included in the cachedAnalysis and won't
// be emit for successive runs that leverage the cache
analyzeResult = await analyze(path, source.toString(), this);
Expand All @@ -387,7 +318,7 @@ export class Job {
if (isESM) {
this.esmFileList.add(relative(this.base, path));
}

await Promise.all([
...[...assets].map(async asset => {
const ext = extname(asset);
Expand Down
Loading

0 comments on commit 1a7f083

Please sign in to comment.