From 3e68a27822eb3e2a27282e32289a13f5b988c08d Mon Sep 17 00:00:00 2001 From: Winter Date: Mon, 5 Dec 2022 20:15:49 -0500 Subject: [PATCH 1/5] prefetch-npm-deps: refactor This splits prefetch-npm-deps into multiple files, as well as making a few small changes along the way, such as going from a `HashMap` to a `Vec` as the container for packages, to deduplicate them more efficently. --- .../node/fetch-npm-deps/src/main.rs | 425 +++--------------- .../node/fetch-npm-deps/src/parse/lock.rs | 191 ++++++++ .../node/fetch-npm-deps/src/parse/mod.rs | 311 +++++++++++++ .../node/fetch-npm-deps/src/tests.rs | 141 ------ 4 files changed, 575 insertions(+), 493 deletions(-) create mode 100644 pkgs/build-support/node/fetch-npm-deps/src/parse/lock.rs create mode 100644 pkgs/build-support/node/fetch-npm-deps/src/parse/mod.rs delete mode 100644 pkgs/build-support/node/fetch-npm-deps/src/tests.rs diff --git a/pkgs/build-support/node/fetch-npm-deps/src/main.rs b/pkgs/build-support/node/fetch-npm-deps/src/main.rs index 3d2204071a66a..068a6d2bcd60b 100644 --- a/pkgs/build-support/node/fetch-npm-deps/src/main.rs +++ b/pkgs/build-support/node/fetch-npm-deps/src/main.rs @@ -1,250 +1,18 @@ #![warn(clippy::pedantic)] use crate::cacache::Cache; -use anyhow::{anyhow, Context}; +use anyhow::anyhow; use rayon::prelude::*; -use serde::Deserialize; use serde_json::{Map, Value}; use std::{ - collections::{HashMap, HashSet}, - env, fmt, fs, io, + env, fs, path::Path, - process::{self, Command, Stdio}, + process::{self, Command}, }; use tempfile::tempdir; -use url::Url; mod cacache; -#[cfg(test)] -mod tests; - -#[derive(Deserialize)] -struct PackageLock { - #[serde(rename = "lockfileVersion")] - version: u8, - dependencies: Option>, - packages: Option>, -} - -#[derive(Deserialize)] -struct OldPackage { - version: UrlOrString, - #[serde(default)] - bundled: bool, - resolved: Option, - integrity: Option, - dependencies: Option>, -} - -#[derive(Debug, Deserialize, PartialEq, Eq)] -struct Package { - resolved: Option, - integrity: Option, -} - -#[derive(Debug, Deserialize, PartialEq, Eq)] -#[serde(untagged)] -enum UrlOrString { - Url(Url), - String(String), -} - -impl fmt::Display for UrlOrString { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - UrlOrString::Url(url) => url.fmt(f), - UrlOrString::String(string) => string.fmt(f), - } - } -} - -#[allow(clippy::case_sensitive_file_extension_comparisons)] -fn to_new_packages( - old_packages: HashMap, - initial_url: &Url, -) -> anyhow::Result> { - let mut new = HashMap::new(); - - for (name, mut package) in old_packages { - // In some cases, a bundled dependency happens to have the same version as a non-bundled one, causing - // the bundled one without a URL to override the entry for the non-bundled instance, which prevents the - // dependency from being downloaded. - if package.bundled { - continue; - } - - if let UrlOrString::Url(v) = &package.version { - for (scheme, host) in [ - ("github", "github.com"), - ("bitbucket", "bitbucket.org"), - ("gitlab", "gitlab.com"), - ] { - if v.scheme() == scheme { - package.version = { - let mut new_url = initial_url.clone(); - - new_url.set_host(Some(host))?; - - if v.path().ends_with(".git") { - new_url.set_path(v.path()); - } else { - new_url.set_path(&format!("{}.git", v.path())); - } - - new_url.set_fragment(v.fragment()); - - UrlOrString::Url(new_url) - }; - - break; - } - } - } - - new.insert( - format!("{name}-{}", package.version), - Package { - resolved: if matches!(package.version, UrlOrString::Url(_)) { - Some(package.version) - } else { - package.resolved - }, - integrity: package.integrity, - }, - ); - - if let Some(dependencies) = package.dependencies { - new.extend(to_new_packages(dependencies, initial_url)?); - } - } - - Ok(new) -} - -#[allow(clippy::case_sensitive_file_extension_comparisons)] -fn get_hosted_git_url(url: &Url) -> Option { - if ["git", "http", "git+ssh", "git+https", "ssh", "https"].contains(&url.scheme()) { - let mut s = url.path_segments()?; - - match url.host_str()? { - "github.com" => { - let user = s.next()?; - let mut project = s.next()?; - let typ = s.next(); - let mut commit = s.next(); - - if typ.is_none() { - commit = url.fragment(); - } else if typ.is_some() && typ != Some("tree") { - return None; - } - - if project.ends_with(".git") { - project = project.strip_suffix(".git")?; - } - - let commit = commit.unwrap(); - - Some( - Url::parse(&format!( - "https://codeload.github.com/{user}/{project}/tar.gz/{commit}" - )) - .ok()?, - ) - } - "bitbucket.org" => { - let user = s.next()?; - let mut project = s.next()?; - let aux = s.next(); - - if aux == Some("get") { - return None; - } - - if project.ends_with(".git") { - project = project.strip_suffix(".git")?; - } - - let commit = url.fragment()?; - - Some( - Url::parse(&format!( - "https://bitbucket.org/{user}/{project}/get/{commit}.tar.gz" - )) - .ok()?, - ) - } - "gitlab.com" => { - let path = &url.path()[1..]; - - if path.contains("/~/") || path.contains("/archive.tar.gz") { - return None; - } - - let user = s.next()?; - let mut project = s.next()?; - - if project.ends_with(".git") { - project = project.strip_suffix(".git")?; - } - - let commit = url.fragment()?; - - Some( - Url::parse(&format!( - "https://gitlab.com/{user}/{project}/repository/archive.tar.gz?ref={commit}" - )) - .ok()?, - ) - } - "git.sr.ht" => { - let user = s.next()?; - let mut project = s.next()?; - let aux = s.next(); - - if aux == Some("archive") { - return None; - } - - if project.ends_with(".git") { - project = project.strip_suffix(".git")?; - } - - let commit = url.fragment()?; - - Some( - Url::parse(&format!( - "https://git.sr.ht/{user}/{project}/archive/{commit}.tar.gz" - )) - .ok()?, - ) - } - _ => None, - } - } else { - None - } -} - -fn get_ideal_hash(integrity: &str) -> anyhow::Result<&str> { - let split: Vec<_> = integrity.split_ascii_whitespace().collect(); - - if split.len() == 1 { - Ok(split[0]) - } else { - for hash in ["sha512-", "sha1-"] { - if let Some(h) = split.iter().find(|s| s.starts_with(hash)) { - return Ok(h); - } - } - - Err(anyhow!("not sure which hash to select out of {split:?}")) - } -} - -fn get_initial_url() -> anyhow::Result { - Url::parse("git+ssh://git@a.b").context("initial url should be valid") -} +mod parse; /// `fixup_lockfile` removes the `integrity` field from Git dependencies. /// @@ -294,7 +62,6 @@ fn fixup_lockfile(mut lock: Map) -> anyhow::Result anyhow::Result<()> { let args = env::args().collect::>(); @@ -319,7 +86,6 @@ fn main() -> anyhow::Result<()> { } let lock_content = fs::read_to_string(&args[1])?; - let lock: PackageLock = serde_json::from_str(&lock_content)?; let out_tempdir; @@ -331,137 +97,92 @@ fn main() -> anyhow::Result<()> { (out_tempdir.path(), true) }; - let agent = ureq::agent(); - - eprintln!("lockfile version: {}", lock.version); - - let packages = match lock.version { - 1 => { - let initial_url = get_initial_url()?; - - lock.dependencies - .map(|p| to_new_packages(p, &initial_url)) - .transpose()? - } - 2 | 3 => lock.packages, - _ => panic!( - "We don't support lockfile version {}, please file an issue.", - lock.version - ), - }; - - if packages.is_none() { - return Ok(()); - } - - let packages = { - let mut seen = HashSet::new(); - let mut new_packages = HashMap::new(); - - for (dep, package) in packages.unwrap().drain() { - if let (false, Some(UrlOrString::Url(resolved))) = (dep.is_empty(), &package.resolved) { - if !seen.contains(resolved) { - seen.insert(resolved.clone()); - new_packages.insert(dep, package); - } - } - } - - new_packages - }; + let packages = parse::lockfile(&lock_content)?; let cache = Cache::new(out.join("_cacache")); - packages - .into_par_iter() - .try_for_each(|(dep, mut package)| { - eprintln!("{dep}"); + packages.into_par_iter().try_for_each(|package| { + eprintln!("{}", package.name); - let mut resolved = match package.resolved { - Some(UrlOrString::Url(url)) => url, - _ => unreachable!(), - }; + let tarball = package.tarball()?; + let integrity = package.integrity(); - let mut hosted = false; + cache + .put( + format!("make-fetch-happen:request-cache:{}", package.url), + package.url, + &tarball, + integrity, + ) + .map_err(|e| anyhow!("couldn't insert cache entry for {}: {e:?}", package.name))?; - if let Some(hosted_git_url) = get_hosted_git_url(&resolved) { - resolved = hosted_git_url; - package.integrity = None; - hosted = true; - } + Ok::<_, anyhow::Error>(()) + })?; - let mut data = Vec::new(); - - let mut body = agent.get(resolved.as_str()).call()?.into_reader(); - - if hosted { - let workdir = tempdir()?; + fs::write(out.join("package-lock.json"), lock_content)?; - let tar_path = workdir.path().join("package"); + if print_hash { + Command::new("nix") + .args(["--experimental-features", "nix-command", "hash", "path"]) + .arg(out.as_os_str()) + .status()?; + } - fs::create_dir(&tar_path)?; + Ok(()) +} - let mut cmd = Command::new("tar") - .args(["--extract", "--gzip", "--strip-components=1", "-C"]) - .arg(&tar_path) - .stdin(Stdio::piped()) - .spawn()?; +#[cfg(test)] +mod tests { + use super::fixup_lockfile; + use serde_json::json; + + #[test] + fn lockfile_fixup() -> anyhow::Result<()> { + let input = json!({ + "lockfileVersion": 2, + "name": "foo", + "packages": { + "": { - io::copy(&mut body, &mut cmd.stdin.take().unwrap())?; + }, + "foo": { + "resolved": "https://github.com/NixOS/nixpkgs", + "integrity": "aaa" + }, + "bar": { + "resolved": "git+ssh://git@github.com/NixOS/nixpkgs.git", + "integrity": "bbb" + } + } + }); - let exit = cmd.wait()?; + let expected = json!({ + "lockfileVersion": 2, + "name": "foo", + "packages": { + "": { - if !exit.success() { - return Err(anyhow!( - "failed to extract tarball for {dep}: tar exited with status code {}", - exit.code().unwrap() - )); + }, + "foo": { + "resolved": "https://github.com/NixOS/nixpkgs", + "integrity": "aaa" + }, + "bar": { + "resolved": "git+ssh://git@github.com/NixOS/nixpkgs.git", } - - data = Command::new("tar") - .args([ - "--sort=name", - "--mtime=@0", - "--owner=0", - "--group=0", - "--numeric-owner", - "--format=gnu", - "-I", - "gzip -n -9", - "--create", - "-C", - ]) - .arg(workdir.path()) - .arg("package") - .output()? - .stdout; - } else { - body.read_to_end(&mut data)?; } + }); - cache - .put( - format!("make-fetch-happen:request-cache:{resolved}"), - resolved, - &data, - package - .integrity - .map(|i| Ok::(get_ideal_hash(&i)?.to_string())) - .transpose()?, - ) - .map_err(|e| anyhow!("couldn't insert cache entry for {dep}: {e:?}"))?; - - Ok::<_, anyhow::Error>(()) - })?; + assert_eq!( + fixup_lockfile(input.as_object().unwrap().clone())?, + Some(expected.as_object().unwrap().clone()) + ); - fs::write(out.join("package-lock.json"), lock_content)?; + assert_eq!( + fixup_lockfile(json!({"lockfileVersion": 1}).as_object().unwrap().clone())?, + None + ); - if print_hash { - Command::new("nix") - .args(["--experimental-features", "nix-command", "hash", "path"]) - .arg(out.as_os_str()) - .status()?; + Ok(()) } - - Ok(()) } diff --git a/pkgs/build-support/node/fetch-npm-deps/src/parse/lock.rs b/pkgs/build-support/node/fetch-npm-deps/src/parse/lock.rs new file mode 100644 index 0000000000000..99bd3020b5237 --- /dev/null +++ b/pkgs/build-support/node/fetch-npm-deps/src/parse/lock.rs @@ -0,0 +1,191 @@ +use anyhow::{bail, Context}; +use rayon::slice::ParallelSliceMut; +use serde::Deserialize; +use std::{collections::HashMap, fmt}; +use url::Url; + +pub(super) fn packages(content: &str) -> anyhow::Result> { + let lockfile: Lockfile = serde_json::from_str(content)?; + + let mut packages = match lockfile.version { + 1 => { + let initial_url = get_initial_url()?; + + lockfile + .dependencies + .map(|p| to_new_packages(p, &initial_url)) + .transpose()? + } + 2 | 3 => lockfile.packages.map(|pkgs| { + pkgs.into_iter() + .filter(|(n, p)| !n.is_empty() && matches!(p.resolved, Some(UrlOrString::Url(_)))) + .map(|(n, p)| Package { name: Some(n), ..p }) + .collect() + }), + _ => bail!( + "We don't support lockfile version {}, please file an issue.", + lockfile.version + ), + } + .expect("lockfile should have packages"); + + packages.par_sort_by(|x, y| { + x.resolved + .partial_cmp(&y.resolved) + .expect("resolved should be comparable") + }); + + packages.dedup_by(|x, y| x.resolved == y.resolved); + + Ok(packages) +} + +#[derive(Deserialize)] +struct Lockfile { + #[serde(rename = "lockfileVersion")] + version: u8, + dependencies: Option>, + packages: Option>, +} + +#[derive(Deserialize)] +struct OldPackage { + version: UrlOrString, + #[serde(default)] + bundled: bool, + resolved: Option, + integrity: Option, + dependencies: Option>, +} + +#[derive(Debug, Deserialize, PartialEq, Eq)] +pub(super) struct Package { + #[serde(default)] + pub(super) name: Option, + pub(super) resolved: Option, + pub(super) integrity: Option, +} + +#[derive(Debug, Deserialize, PartialEq, Eq, PartialOrd, Ord)] +#[serde(untagged)] +pub(super) enum UrlOrString { + Url(Url), + String(String), +} + +impl fmt::Display for UrlOrString { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + UrlOrString::Url(url) => url.fmt(f), + UrlOrString::String(string) => string.fmt(f), + } + } +} + +#[allow(clippy::case_sensitive_file_extension_comparisons)] +fn to_new_packages( + old_packages: HashMap, + initial_url: &Url, +) -> anyhow::Result> { + let mut new = Vec::new(); + + for (name, mut package) in old_packages { + // In some cases, a bundled dependency happens to have the same version as a non-bundled one, causing + // the bundled one without a URL to override the entry for the non-bundled instance, which prevents the + // dependency from being downloaded. + if package.bundled { + continue; + } + + if let UrlOrString::Url(v) = &package.version { + for (scheme, host) in [ + ("github", "github.com"), + ("bitbucket", "bitbucket.org"), + ("gitlab", "gitlab.com"), + ] { + if v.scheme() == scheme { + package.version = { + let mut new_url = initial_url.clone(); + + new_url.set_host(Some(host))?; + + if v.path().ends_with(".git") { + new_url.set_path(v.path()); + } else { + new_url.set_path(&format!("{}.git", v.path())); + } + + new_url.set_fragment(v.fragment()); + + UrlOrString::Url(new_url) + }; + + break; + } + } + } + + new.push(Package { + name: Some(name), + resolved: if matches!(package.version, UrlOrString::Url(_)) { + Some(package.version) + } else { + package.resolved + }, + integrity: package.integrity, + }); + + if let Some(dependencies) = package.dependencies { + new.append(&mut to_new_packages(dependencies, initial_url)?); + } + } + + Ok(new) +} + +fn get_initial_url() -> anyhow::Result { + Url::parse("git+ssh://git@a.b").context("initial url should be valid") +} + +#[cfg(test)] +mod tests { + use super::{get_initial_url, to_new_packages, OldPackage, Package, UrlOrString}; + use std::collections::HashMap; + use url::Url; + + #[test] + fn git_shorthand_v1() -> anyhow::Result<()> { + let old = { + let mut o = HashMap::new(); + o.insert( + String::from("sqlite3"), + OldPackage { + version: UrlOrString::Url( + Url::parse( + "github:mapbox/node-sqlite3#593c9d498be2510d286349134537e3bf89401c4a", + ) + .unwrap(), + ), + bundled: false, + resolved: None, + integrity: None, + dependencies: None, + }, + ); + o + }; + + let initial_url = get_initial_url()?; + + let new = to_new_packages(old, &initial_url)?; + + assert_eq!(new.len(), 1, "new packages map should contain 1 value"); + assert_eq!(new[0], Package { + name: Some(String::from("sqlite3")), + resolved: Some(UrlOrString::Url(Url::parse("git+ssh://git@github.com/mapbox/node-sqlite3.git#593c9d498be2510d286349134537e3bf89401c4a").unwrap())), + integrity: None + }); + + Ok(()) + } +} diff --git a/pkgs/build-support/node/fetch-npm-deps/src/parse/mod.rs b/pkgs/build-support/node/fetch-npm-deps/src/parse/mod.rs new file mode 100644 index 0000000000000..95f876017bdd5 --- /dev/null +++ b/pkgs/build-support/node/fetch-npm-deps/src/parse/mod.rs @@ -0,0 +1,311 @@ +use anyhow::{anyhow, bail, Context}; +use lock::UrlOrString; +use rayon::prelude::*; +use std::{ + fs, io, + process::{Command, Stdio}, +}; +use tempfile::{tempdir, TempDir}; +use url::Url; + +mod lock; + +pub fn lockfile(lockfile: &str) -> anyhow::Result> { + let packages = lock::packages(lockfile).context("failed to extract packages from lockfile")?; + + packages + .into_par_iter() + .map(|p| { + let n = p.name.clone().unwrap(); + + Package::from_lock(p).with_context(|| format!("failed to parse data for {n}")) + }) + .collect() +} + +#[derive(Debug)] +pub struct Package { + pub name: String, + pub url: Url, + specifics: Specifics, +} + +#[derive(Debug)] +enum Specifics { + Registry { integrity: String }, + Git { workdir: TempDir }, +} + +impl Package { + fn from_lock(pkg: lock::Package) -> anyhow::Result { + let mut resolved = match pkg + .resolved + .expect("at this point, packages should have URLs") + { + UrlOrString::Url(u) => u, + UrlOrString::String(_) => panic!("at this point, all packages should have URLs"), + }; + + let specifics = match get_hosted_git_url(&resolved) { + Some(hosted) => { + let mut body = ureq::get(hosted.as_str()).call()?.into_reader(); + + let workdir = tempdir()?; + + let tar_path = workdir.path().join("package"); + + fs::create_dir(&tar_path)?; + + let mut cmd = Command::new("tar") + .args(["--extract", "--gzip", "--strip-components=1", "-C"]) + .arg(&tar_path) + .stdin(Stdio::piped()) + .spawn()?; + + io::copy(&mut body, &mut cmd.stdin.take().unwrap())?; + + let exit = cmd.wait()?; + + if !exit.success() { + bail!( + "failed to extract tarball for {}: tar exited with status code {}", + pkg.name.unwrap(), + exit.code().unwrap() + ); + } + + resolved = hosted; + + Specifics::Git { workdir } + } + None => Specifics::Registry { + integrity: get_ideal_hash( + &pkg.integrity + .expect("non-git dependencies should have assosciated integrity"), + )? + .to_string(), + }, + }; + + Ok(Package { + name: pkg.name.unwrap(), + url: resolved, + specifics, + }) + } + + pub fn tarball(&self) -> anyhow::Result> { + match &self.specifics { + Specifics::Registry { .. } => { + let mut body = Vec::new(); + + ureq::get(self.url.as_str()) + .call()? + .into_reader() + .read_to_end(&mut body)?; + + Ok(body) + } + Specifics::Git { workdir } => Ok(Command::new("tar") + .args([ + "--sort=name", + "--mtime=@0", + "--owner=0", + "--group=0", + "--numeric-owner", + "--format=gnu", + "-I", + "gzip -n -9", + "--create", + "-C", + ]) + .arg(workdir.path()) + .arg("package") + .output()? + .stdout), + } + } + + pub fn integrity(&self) -> Option { + match &self.specifics { + Specifics::Registry { integrity } => Some(integrity.clone()), + Specifics::Git { .. } => None, + } + } +} + +#[allow(clippy::case_sensitive_file_extension_comparisons)] +fn get_hosted_git_url(url: &Url) -> Option { + if ["git", "http", "git+ssh", "git+https", "ssh", "https"].contains(&url.scheme()) { + let mut s = url.path_segments()?; + + match url.host_str()? { + "github.com" => { + let user = s.next()?; + let mut project = s.next()?; + let typ = s.next(); + let mut commit = s.next(); + + if typ.is_none() { + commit = url.fragment(); + } else if typ.is_some() && typ != Some("tree") { + return None; + } + + if project.ends_with(".git") { + project = project.strip_suffix(".git")?; + } + + let commit = commit.unwrap(); + + Some( + Url::parse(&format!( + "https://codeload.github.com/{user}/{project}/tar.gz/{commit}" + )) + .ok()?, + ) + } + "bitbucket.org" => { + let user = s.next()?; + let mut project = s.next()?; + let aux = s.next(); + + if aux == Some("get") { + return None; + } + + if project.ends_with(".git") { + project = project.strip_suffix(".git")?; + } + + let commit = url.fragment()?; + + Some( + Url::parse(&format!( + "https://bitbucket.org/{user}/{project}/get/{commit}.tar.gz" + )) + .ok()?, + ) + } + "gitlab.com" => { + let path = &url.path()[1..]; + + if path.contains("/~/") || path.contains("/archive.tar.gz") { + return None; + } + + let user = s.next()?; + let mut project = s.next()?; + + if project.ends_with(".git") { + project = project.strip_suffix(".git")?; + } + + let commit = url.fragment()?; + + Some( + Url::parse(&format!( + "https://gitlab.com/{user}/{project}/repository/archive.tar.gz?ref={commit}" + )) + .ok()?, + ) + } + "git.sr.ht" => { + let user = s.next()?; + let mut project = s.next()?; + let aux = s.next(); + + if aux == Some("archive") { + return None; + } + + if project.ends_with(".git") { + project = project.strip_suffix(".git")?; + } + + let commit = url.fragment()?; + + Some( + Url::parse(&format!( + "https://git.sr.ht/{user}/{project}/archive/{commit}.tar.gz" + )) + .ok()?, + ) + } + _ => None, + } + } else { + None + } +} + +fn get_ideal_hash(integrity: &str) -> anyhow::Result<&str> { + let split: Vec<_> = integrity.split_ascii_whitespace().collect(); + + if split.len() == 1 { + Ok(split[0]) + } else { + for hash in ["sha512-", "sha1-"] { + if let Some(h) = split.iter().find(|s| s.starts_with(hash)) { + return Ok(h); + } + } + + Err(anyhow!("not sure which hash to select out of {split:?}")) + } +} + +#[cfg(test)] +mod tests { + use super::{get_hosted_git_url, get_ideal_hash}; + use url::Url; + + #[test] + fn hosted_git_urls() { + for (input, expected) in [ + ( + "git+ssh://git@github.com/castlabs/electron-releases.git#fc5f78d046e8d7cdeb66345a2633c383ab41f525", + Some("https://codeload.github.com/castlabs/electron-releases/tar.gz/fc5f78d046e8d7cdeb66345a2633c383ab41f525"), + ), + ( + "https://user@github.com/foo/bar#fix/bug", + Some("https://codeload.github.com/foo/bar/tar.gz/fix/bug") + ), + ( + "https://github.com/eligrey/classList.js/archive/1.2.20180112.tar.gz", + None + ), + ( + "git+ssh://bitbucket.org/foo/bar#branch", + Some("https://bitbucket.org/foo/bar/get/branch.tar.gz") + ), + ( + "ssh://git@gitlab.com/foo/bar.git#fix/bug", + Some("https://gitlab.com/foo/bar/repository/archive.tar.gz?ref=fix/bug") + ), + ( + "git+ssh://git.sr.ht/~foo/bar#branch", + Some("https://git.sr.ht/~foo/bar/archive/branch.tar.gz") + ), + ] { + assert_eq!( + get_hosted_git_url(&Url::parse(input).unwrap()), + expected.map(|u| Url::parse(u).unwrap()) + ); + } + } + + #[test] + fn ideal_hashes() { + for (input, expected) in [ + ("sha512-foo sha1-bar", Some("sha512-foo")), + ("sha1-bar md5-foo", Some("sha1-bar")), + ("sha1-bar", Some("sha1-bar")), + ("sha512-foo", Some("sha512-foo")), + ("foo-bar sha1-bar", Some("sha1-bar")), + ("foo-bar baz-foo", None), + ] { + assert_eq!(get_ideal_hash(input).ok(), expected); + } + } +} diff --git a/pkgs/build-support/node/fetch-npm-deps/src/tests.rs b/pkgs/build-support/node/fetch-npm-deps/src/tests.rs deleted file mode 100644 index a3317207c42e4..0000000000000 --- a/pkgs/build-support/node/fetch-npm-deps/src/tests.rs +++ /dev/null @@ -1,141 +0,0 @@ -use super::{ - fixup_lockfile, get_hosted_git_url, get_ideal_hash, get_initial_url, to_new_packages, - OldPackage, Package, UrlOrString, -}; -use serde_json::json; -use std::collections::HashMap; -use url::Url; - -#[test] -fn hosted_git_urls() { - for (input, expected) in [ - ( - "git+ssh://git@github.com/castlabs/electron-releases.git#fc5f78d046e8d7cdeb66345a2633c383ab41f525", - Some("https://codeload.github.com/castlabs/electron-releases/tar.gz/fc5f78d046e8d7cdeb66345a2633c383ab41f525"), - ), - ( - "https://user@github.com/foo/bar#fix/bug", - Some("https://codeload.github.com/foo/bar/tar.gz/fix/bug") - ), - ( - "https://github.com/eligrey/classList.js/archive/1.2.20180112.tar.gz", - None - ), - ( - "git+ssh://bitbucket.org/foo/bar#branch", - Some("https://bitbucket.org/foo/bar/get/branch.tar.gz") - ), - ( - "ssh://git@gitlab.com/foo/bar.git#fix/bug", - Some("https://gitlab.com/foo/bar/repository/archive.tar.gz?ref=fix/bug") - ), - ( - "git+ssh://git.sr.ht/~foo/bar#branch", - Some("https://git.sr.ht/~foo/bar/archive/branch.tar.gz") - ), - ] { - assert_eq!( - get_hosted_git_url(&Url::parse(input).unwrap()), - expected.map(|u| Url::parse(u).unwrap()) - ); - } -} - -#[test] -fn ideal_hashes() { - for (input, expected) in [ - ("sha512-foo sha1-bar", Some("sha512-foo")), - ("sha1-bar md5-foo", Some("sha1-bar")), - ("sha1-bar", Some("sha1-bar")), - ("sha512-foo", Some("sha512-foo")), - ("foo-bar sha1-bar", Some("sha1-bar")), - ("foo-bar baz-foo", None), - ] { - assert_eq!(get_ideal_hash(input).ok(), expected); - } -} - -#[test] -fn git_shorthand_v1() -> anyhow::Result<()> { - let old = { - let mut o = HashMap::new(); - o.insert( - String::from("sqlite3"), - OldPackage { - version: UrlOrString::Url( - Url::parse( - "github:mapbox/node-sqlite3#593c9d498be2510d286349134537e3bf89401c4a", - ) - .unwrap(), - ), - bundled: false, - resolved: None, - integrity: None, - dependencies: None, - }, - ); - o - }; - - let initial_url = get_initial_url()?; - - let new = to_new_packages(old, &initial_url)?; - - assert_eq!(new.len(), 1, "new packages map should contain 1 value"); - assert_eq!(new.into_values().next().unwrap(), Package { - resolved: Some(UrlOrString::Url(Url::parse("git+ssh://git@github.com/mapbox/node-sqlite3.git#593c9d498be2510d286349134537e3bf89401c4a").unwrap())), - integrity: None - }); - - Ok(()) -} - -#[test] -fn lockfile_fixup() -> anyhow::Result<()> { - let input = json!({ - "lockfileVersion": 2, - "name": "foo", - "packages": { - "": { - - }, - "foo": { - "resolved": "https://github.com/NixOS/nixpkgs", - "integrity": "aaa" - }, - "bar": { - "resolved": "git+ssh://git@github.com/NixOS/nixpkgs.git", - "integrity": "bbb" - } - } - }); - - let expected = json!({ - "lockfileVersion": 2, - "name": "foo", - "packages": { - "": { - - }, - "foo": { - "resolved": "https://github.com/NixOS/nixpkgs", - "integrity": "aaa" - }, - "bar": { - "resolved": "git+ssh://git@github.com/NixOS/nixpkgs.git", - } - } - }); - - assert_eq!( - fixup_lockfile(input.as_object().unwrap().clone())?, - Some(expected.as_object().unwrap().clone()) - ); - - assert_eq!( - fixup_lockfile(json!({"lockfileVersion": 1}).as_object().unwrap().clone())?, - None - ); - - Ok(()) -} From 7e247e6fecdb539911ddc7fafdbcf6db18506b84 Mon Sep 17 00:00:00 2001 From: Winter Date: Mon, 12 Dec 2022 22:33:43 -0500 Subject: [PATCH 2/5] prefetch-npm-deps: download dev deps for git deps with install scripts Git dependencies with install scripts are built isolated from the main package, so their development dependencies are required. To take advantage of this, #206477 is needed. --- .../node/build-npm-package/default.nix | 5 +- .../node/fetch-npm-deps/default.nix | 11 +++- .../node/fetch-npm-deps/src/main.rs | 2 +- .../node/fetch-npm-deps/src/parse/mod.rs | 65 +++++++++++++++++-- 4 files changed, 73 insertions(+), 10 deletions(-) diff --git a/pkgs/build-support/node/build-npm-package/default.nix b/pkgs/build-support/node/build-npm-package/default.nix index 26cc678c571e7..1c3fb6a74efca 100644 --- a/pkgs/build-support/node/build-npm-package/default.nix +++ b/pkgs/build-support/node/build-npm-package/default.nix @@ -12,6 +12,9 @@ # The output hash of the dependencies for this project. # Can be calculated in advance with prefetch-npm-deps. , npmDepsHash ? "" + # Whether to force the usage of Git dependencies that have install scripts, but not a lockfile. + # Use with care. +, forceGitDeps ? false # Whether to make the cache writable prior to installing dependencies. # Don't set this unless npm tries to write to the cache directory, as it can slow down the build. , makeCacheWritable ? false @@ -32,7 +35,7 @@ let npmDeps = fetchNpmDeps { - inherit src srcs sourceRoot prePatch patches postPatch; + inherit forceGitDeps src srcs sourceRoot prePatch patches postPatch; name = "${name}-npm-deps"; hash = npmDepsHash; }; diff --git a/pkgs/build-support/node/fetch-npm-deps/default.nix b/pkgs/build-support/node/fetch-npm-deps/default.nix index d87071d8559f0..41cad9d12ee66 100644 --- a/pkgs/build-support/node/fetch-npm-deps/default.nix +++ b/pkgs/build-support/node/fetch-npm-deps/default.nix @@ -36,8 +36,8 @@ ''; }; - makeTest = { name, src, hash }: testers.invalidateFetcherByDrvHash fetchNpmDeps { - inherit name hash; + makeTest = { name, src, hash, forceGitDeps ? false }: testers.invalidateFetcherByDrvHash fetchNpmDeps { + inherit name hash forceGitDeps; src = makeTestSrc { inherit name src; }; }; @@ -108,6 +108,8 @@ }; hash = "sha256-+KA8/orSBJ4EhuSyQO8IKSxsN/FAsYU3lOzq+awuxNQ="; + + forceGitDeps = true; }; }; @@ -121,6 +123,7 @@ fetchNpmDeps = { name ? "npm-deps" , hash ? "" + , forceGitDeps ? false , ... } @ args: let @@ -131,6 +134,8 @@ outputHash = ""; outputHashAlgo = "sha256"; }; + + forceGitDeps_ = lib.optionalAttrs forceGitDeps { FORCE_GIT_DEPS = true; }; in stdenvNoCC.mkDerivation (args // { inherit name; @@ -161,5 +166,5 @@ dontInstall = true; outputHashMode = "recursive"; - } // hash_); + } // hash_ // forceGitDeps_); } diff --git a/pkgs/build-support/node/fetch-npm-deps/src/main.rs b/pkgs/build-support/node/fetch-npm-deps/src/main.rs index 068a6d2bcd60b..57725a922dfd8 100644 --- a/pkgs/build-support/node/fetch-npm-deps/src/main.rs +++ b/pkgs/build-support/node/fetch-npm-deps/src/main.rs @@ -97,7 +97,7 @@ fn main() -> anyhow::Result<()> { (out_tempdir.path(), true) }; - let packages = parse::lockfile(&lock_content)?; + let packages = parse::lockfile(&lock_content, env::var("FORCE_GIT_DEPS").is_ok())?; let cache = Cache::new(out.join("_cacache")); diff --git a/pkgs/build-support/node/fetch-npm-deps/src/parse/mod.rs b/pkgs/build-support/node/fetch-npm-deps/src/parse/mod.rs index 95f876017bdd5..9ed4bbecc2158 100644 --- a/pkgs/build-support/node/fetch-npm-deps/src/parse/mod.rs +++ b/pkgs/build-support/node/fetch-npm-deps/src/parse/mod.rs @@ -1,6 +1,7 @@ use anyhow::{anyhow, bail, Context}; use lock::UrlOrString; use rayon::prelude::*; +use serde_json::{Map, Value}; use std::{ fs, io, process::{Command, Stdio}, @@ -10,17 +11,71 @@ use url::Url; mod lock; -pub fn lockfile(lockfile: &str) -> anyhow::Result> { - let packages = lock::packages(lockfile).context("failed to extract packages from lockfile")?; - - packages +pub fn lockfile(content: &str, force_git_deps: bool) -> anyhow::Result> { + let mut packages = lock::packages(content) + .context("failed to extract packages from lockfile")? .into_par_iter() .map(|p| { let n = p.name.clone().unwrap(); Package::from_lock(p).with_context(|| format!("failed to parse data for {n}")) }) - .collect() + .collect::>>()?; + + let mut new = Vec::new(); + + for pkg in packages + .iter() + .filter(|p| matches!(p.specifics, Specifics::Git { .. })) + { + let dir = match &pkg.specifics { + Specifics::Git { workdir } => workdir, + Specifics::Registry { .. } => unimplemented!(), + }; + + let path = dir.path().join("package"); + + let lockfile_contents = fs::read_to_string(path.join("package-lock.json")); + + let package_json_path = path.join("package.json"); + let mut package_json: Map = + serde_json::from_str(&fs::read_to_string(package_json_path)?)?; + + if let Some(scripts) = package_json + .get_mut("scripts") + .and_then(Value::as_object_mut) + { + // https://github.com/npm/pacote/blob/272edc1bac06991fc5f95d06342334bbacfbaa4b/lib/git.js#L166-L172 + for typ in [ + "postinstall", + "build", + "preinstall", + "install", + "prepack", + "prepare", + ] { + if scripts.contains_key(typ) && lockfile_contents.is_err() && !force_git_deps { + bail!("Git dependency {} contains install scripts, but has no lockfile, which is something that will probably break. Open an issue if you can't feasibly patch this dependency out, and we'll come up with a workaround.\nIf you'd like to attempt to try to use this dependency anyways, set `forceGitDeps = true`.", pkg.name); + } + } + } + + if let Ok(lockfile_contents) = lockfile_contents { + new.append(&mut lockfile(&lockfile_contents, force_git_deps)?); + } + } + + packages.append(&mut new); + + packages.par_sort_by(|x, y| { + x.url + .partial_cmp(&y.url) + .expect("resolved should be comparable") + }); + + packages.dedup_by(|x, y| x.url == y.url); + + Ok(packages) } #[derive(Debug)] From e1d64c19411e38790765338ac0e68f290f43bbe2 Mon Sep 17 00:00:00 2001 From: Winter Date: Sat, 28 Jan 2023 14:17:27 -0500 Subject: [PATCH 3/5] prefetch-npm-deps: throw better error when unsupported git service is used See https://github.com/NixOS/nixpkgs/pull/206476#discussion_r1058689383. --- .../node/fetch-npm-deps/src/parse/mod.rs | 46 ++++++++++--------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/pkgs/build-support/node/fetch-npm-deps/src/parse/mod.rs b/pkgs/build-support/node/fetch-npm-deps/src/parse/mod.rs index 9ed4bbecc2158..387b3add7ec93 100644 --- a/pkgs/build-support/node/fetch-npm-deps/src/parse/mod.rs +++ b/pkgs/build-support/node/fetch-npm-deps/src/parse/mod.rs @@ -101,7 +101,7 @@ impl Package { UrlOrString::String(_) => panic!("at this point, all packages should have URLs"), }; - let specifics = match get_hosted_git_url(&resolved) { + let specifics = match get_hosted_git_url(&resolved)? { Some(hosted) => { let mut body = ureq::get(hosted.as_str()).call()?.into_reader(); @@ -190,11 +190,13 @@ impl Package { } #[allow(clippy::case_sensitive_file_extension_comparisons)] -fn get_hosted_git_url(url: &Url) -> Option { - if ["git", "http", "git+ssh", "git+https", "ssh", "https"].contains(&url.scheme()) { - let mut s = url.path_segments()?; +fn get_hosted_git_url(url: &Url) -> anyhow::Result> { + if ["git", "git+ssh", "git+https", "ssh"].contains(&url.scheme()) { + let mut s = url + .path_segments() + .ok_or_else(|| anyhow!("bad URL: {url}"))?; - match url.host_str()? { + let mut get_url = || match url.host_str()? { "github.com" => { let user = s.next()?; let mut project = s.next()?; @@ -243,7 +245,7 @@ fn get_hosted_git_url(url: &Url) -> Option { ) } "gitlab.com" => { - let path = &url.path()[1..]; + /* let path = &url.path()[1..]; if path.contains("/~/") || path.contains("/archive.tar.gz") { return None; @@ -263,7 +265,10 @@ fn get_hosted_git_url(url: &Url) -> Option { "https://gitlab.com/{user}/{project}/repository/archive.tar.gz?ref={commit}" )) .ok()?, - ) + ) */ + + // lmao: https://github.com/npm/hosted-git-info/pull/109 + None } "git.sr.ht" => { let user = s.next()?; @@ -288,9 +293,14 @@ fn get_hosted_git_url(url: &Url) -> Option { ) } _ => None, + }; + + match get_url() { + Some(u) => Ok(Some(u)), + None => Err(anyhow!("This lockfile either contains a Git dependency with an unsupported host, or a malformed URL in the lockfile: {url}")) } } else { - None + Ok(None) } } @@ -322,32 +332,26 @@ mod tests { "git+ssh://git@github.com/castlabs/electron-releases.git#fc5f78d046e8d7cdeb66345a2633c383ab41f525", Some("https://codeload.github.com/castlabs/electron-releases/tar.gz/fc5f78d046e8d7cdeb66345a2633c383ab41f525"), ), - ( - "https://user@github.com/foo/bar#fix/bug", - Some("https://codeload.github.com/foo/bar/tar.gz/fix/bug") - ), - ( - "https://github.com/eligrey/classList.js/archive/1.2.20180112.tar.gz", - None - ), ( "git+ssh://bitbucket.org/foo/bar#branch", Some("https://bitbucket.org/foo/bar/get/branch.tar.gz") ), - ( - "ssh://git@gitlab.com/foo/bar.git#fix/bug", - Some("https://gitlab.com/foo/bar/repository/archive.tar.gz?ref=fix/bug") - ), ( "git+ssh://git.sr.ht/~foo/bar#branch", Some("https://git.sr.ht/~foo/bar/archive/branch.tar.gz") ), ] { assert_eq!( - get_hosted_git_url(&Url::parse(input).unwrap()), + get_hosted_git_url(&Url::parse(input).unwrap()).unwrap(), expected.map(|u| Url::parse(u).unwrap()) ); } + + assert!( + get_hosted_git_url(&Url::parse("ssh://git@gitlab.com/foo/bar.git#fix/bug").unwrap()) + .is_err(), + "GitLab URLs should be marked as invalid (lol)" + ); } #[test] From faa3de1bf57a78e8df814e06f05c0b7e038656b7 Mon Sep 17 00:00:00 2001 From: Winter Date: Mon, 30 Jan 2023 22:08:08 -0500 Subject: [PATCH 4/5] prefetch-npm-deps: fix clippy lint --- pkgs/build-support/node/fetch-npm-deps/src/cacache.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkgs/build-support/node/fetch-npm-deps/src/cacache.rs b/pkgs/build-support/node/fetch-npm-deps/src/cacache.rs index 715e115e7235b..5326c3e858bba 100644 --- a/pkgs/build-support/node/fetch-npm-deps/src/cacache.rs +++ b/pkgs/build-support/node/fetch-npm-deps/src/cacache.rs @@ -72,7 +72,7 @@ impl Cache { &mut p, &hash .into_iter() - .map(|x| format!("{:02x}", x)) + .map(|n| format!("{n:02x}")) .collect::(), ); From a7391ddbe6c7403f6537020b0f143a267b6e41a0 Mon Sep 17 00:00:00 2001 From: Lily Foster Date: Thu, 6 Apr 2023 11:35:49 -0400 Subject: [PATCH 5/5] mongosh: fix hash now that all git deps are fetched This FOD hash changed becuase it the fetcher was not downloading dev dependencies from https://github.com/mongodb-js/saslprep/tree/v1.0.4 (a transitive dependency) and so since including git dep lockfiles is the purpose of these changes this change is intended and the hash should be updated. Generally a package was unable to build without these so I did not expect this to be a breaking change (since there would be no existing FODs), but mongosh seems to have been an exception. I think just changing the hash here is more appropriate for this exception rather than working in machinery for supporting old and new FODs (since it's rather unlikely these prefetch-npm-deps changes will break downstream code as it should not have worked in the first place if it needed this). --- pkgs/development/tools/mongosh/source.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkgs/development/tools/mongosh/source.json b/pkgs/development/tools/mongosh/source.json index fda52f4e77cd5..9f590bcb9b2fd 100644 --- a/pkgs/development/tools/mongosh/source.json +++ b/pkgs/development/tools/mongosh/source.json @@ -2,5 +2,5 @@ "version": "1.8.0", "integrity": "sha512-9pHLqfYMWwP1L2t83TK5k6ho1faz+jFD9zXxnTtgnyu0c/uC39nx+tJT9AsxNZpY+GlhshDu1YcJm45f8l3gIw==", "filename": "mongosh-1.8.0.tgz", - "deps": "sha256-8v4E9wNv3+JCGm7mUEA+z+g/4X37ACwVsn+9Cv7N+4o=" + "deps": "sha256-ewl5q6ZxlQN030AmObP42E5KpUisrdYHOsg8whUIUZA=" }