-
Notifications
You must be signed in to change notification settings - Fork 146
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix: Make caching work correctly in async context (#320)
Right now the cache was only populated after the call to the fs API succeed, which in an async context was leading to multiple file IO or analyze tasks started for the same file at once. This is because both file IO are async and so until the fs call finished other calls for the same file can still come in. To fix this I extracted all file IO calls into a separate class that wraps everything correctly and instead of caching the actual result, now a Promise is cached. So the cache now always needs to be awaited. As the values in the cache change this is definitely a breaking change. Something like this: ```diff const cache = {}; await nodeFileTrace([file], { cache, ); -const cacheValue = cache.fileCache.get(file); +const cacheValue = await cache.fileCache.get(file); ``` I know this is quite a big change, just let me know what you think and if I should change anything. :) In my personal example the number of calls to analyze went from ~2400 to ~1600 and the runtime from ~3s to ~2.7s (which includes other work than nft) Co-authored-by: Steven <[email protected]>
- Loading branch information
Showing
3 changed files
with
206 additions
and
101 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
import type { Stats } from "fs"; | ||
import { resolve } from "path"; | ||
import fs from "graceful-fs"; | ||
import { Sema } from "async-sema"; | ||
|
||
const fsReadFile = fs.promises.readFile; | ||
const fsReadlink = fs.promises.readlink; | ||
const fsStat = fs.promises.stat; | ||
|
||
export class CachedFileSystem { | ||
private fileCache: Map<string, Promise<string | null>>; | ||
private statCache: Map<string, Promise<Stats | null>>; | ||
private symlinkCache: Map<string, Promise<string | null>>; | ||
private fileIOQueue: Sema; | ||
|
||
constructor({ | ||
cache, | ||
fileIOConcurrency, | ||
}: { | ||
cache?: { fileCache?: Map<string, Promise<string | null>>, statCache?: Map<string, Promise<Stats | null>>, symlinkCache?: Map<string, Promise<string | null>> }; | ||
fileIOConcurrency: number; | ||
}) { | ||
this.fileIOQueue = new Sema(fileIOConcurrency); | ||
this.fileCache = cache?.fileCache ?? new Map(); | ||
this.statCache = cache?.statCache ?? new Map(); | ||
this.symlinkCache = cache?.symlinkCache ?? new Map(); | ||
|
||
if (cache) { | ||
cache.fileCache = this.fileCache; | ||
cache.statCache = this.statCache; | ||
cache.symlinkCache = this.symlinkCache; | ||
} | ||
} | ||
|
||
async readlink(path: string): Promise<string | null> { | ||
const cached = this.symlinkCache.get(path); | ||
if (cached !== undefined) return cached; | ||
// This is not awaiting the response, so that the cache is instantly populated and | ||
// future calls serve the Promise from the cache | ||
const readlinkPromise = this.executeFileIO(path, this._internalReadlink); | ||
this.symlinkCache.set(path, readlinkPromise); | ||
|
||
return readlinkPromise; | ||
} | ||
|
||
async readFile(path: string): Promise<string | null> { | ||
const cached = this.fileCache.get(path); | ||
if (cached !== undefined) return cached; | ||
// This is not awaiting the response, so that the cache is instantly populated and | ||
// future calls serve the Promise from the cache | ||
const readFilePromise = this.executeFileIO(path, this._internalReadFile); | ||
this.fileCache.set(path, readFilePromise); | ||
|
||
return readFilePromise; | ||
} | ||
|
||
async stat(path: string): Promise<Stats | null> { | ||
const cached = this.statCache.get(path); | ||
if (cached !== undefined) return cached; | ||
// This is not awaiting the response, so that the cache is instantly populated and | ||
// future calls serve the Promise from the cache | ||
const statPromise = this.executeFileIO(path, this._internalStat); | ||
this.statCache.set(path, statPromise); | ||
|
||
return statPromise; | ||
} | ||
|
||
private async _internalReadlink(path: string) { | ||
try { | ||
const link = await fsReadlink(path); | ||
// also copy stat cache to symlink | ||
const stats = this.statCache.get(path); | ||
if (stats) this.statCache.set(resolve(path, link), stats); | ||
return link; | ||
} catch (e: any) { | ||
if (e.code !== "EINVAL" && e.code !== "ENOENT" && e.code !== "UNKNOWN") | ||
throw e; | ||
return null; | ||
} | ||
} | ||
|
||
private async _internalReadFile(path: string): Promise<string | null> { | ||
try { | ||
return (await fsReadFile(path)).toString(); | ||
} catch (e: any) { | ||
if (e.code === "ENOENT" || e.code === "EISDIR") { | ||
return null; | ||
} | ||
throw e; | ||
} | ||
} | ||
|
||
private async _internalStat(path: string) { | ||
try { | ||
return await fsStat(path); | ||
} catch (e: any) { | ||
if (e.code === "ENOENT") { | ||
return null; | ||
} | ||
throw e; | ||
} | ||
} | ||
|
||
private async executeFileIO<Return>( | ||
path: string, | ||
fileIO: (path: string) => Promise<Return> | ||
): Promise<Return> { | ||
await this.fileIOQueue.acquire(); | ||
|
||
try { | ||
return fileIO.call(this, path); | ||
} finally { | ||
this.fileIOQueue.release(); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.