diff --git a/Cargo.toml b/Cargo.toml index 5cc98714bc..e7c0200559 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,11 +37,10 @@ failure = "0.1.6" failure_derive = "0.1.6" finch = { version = "0.3.0", optional = true } fixedbitset = "0.2.0" -lazy_static = "1.4.0" -lazy-init = "0.3.0" log = "0.4.8" md5 = "0.7.0" murmurhash3 = "0.0.5" +once_cell = "1.2.0" serde = "1.0.103" serde_derive = "1.0.103" serde_json = "1.0.44" diff --git a/src/index/bigsi.rs b/src/index/bigsi.rs index 141211f584..e1bfdc6748 100644 --- a/src/index/bigsi.rs +++ b/src/index/bigsi.rs @@ -198,7 +198,7 @@ mod test { let results_sbt = sbt.search(&leaf, 0.5, false).unwrap(); assert_eq!(results_sbt.len(), 1); - let data = (*leaf.data).get().unwrap(); + let data = leaf.data.get().unwrap(); let results_bigsi = bigsi.search(&data, 0.5, false).unwrap(); assert_eq!(results_bigsi.len(), 1); @@ -207,7 +207,7 @@ mod test { let results_sbt = sbt.search(&leaf, 0.1, false).unwrap(); assert_eq!(results_sbt.len(), 2); - let data = (*leaf.data).get().unwrap(); + let data = leaf.data.get().unwrap(); let results_bigsi = bigsi.search(&data, 0.1, false).unwrap(); assert_eq!(results_bigsi.len(), 2); diff --git a/src/index/linear.rs b/src/index/linear.rs index 76ce5b6918..1cbe445fd8 100644 --- a/src/index/linear.rs +++ b/src/index/linear.rs @@ -6,7 +6,6 @@ use std::path::PathBuf; use std::rc::Rc; use failure::Error; -use lazy_init::Lazy; use serde_derive::{Deserialize, Serialize}; use typed_builder::TypedBuilder; @@ -14,10 +13,7 @@ use crate::index::storage::{FSStorage, ReadData, Storage, StorageInfo, ToWriter} use crate::index::{Comparable, DatasetInfo, Index, SigStore}; #[derive(TypedBuilder)] -pub struct LinearIndex -where - L: Sync, -{ +pub struct LinearIndex { #[builder(default)] storage: Option>, @@ -34,7 +30,7 @@ struct LinearInfo { impl<'a, L> Index<'a> for LinearIndex where - L: Sync + Clone + Comparable + 'a, + L: Clone + Comparable + 'a, SigStore: From, { type Item = L; @@ -83,7 +79,7 @@ where impl LinearIndex where - L: std::marker::Sync + ToWriter, + L: ToWriter, SigStore: ReadData, { pub fn save_file>( @@ -175,12 +171,10 @@ where datasets: linear .leaves .into_iter() - .map(|l| SigStore { - filename: l.filename, - name: l.name, - metadata: l.metadata, - storage: Some(Rc::clone(&storage)), - data: Rc::new(Lazy::new()), + .map(|l| { + let mut v: SigStore = l.into(); + v.storage = Some(Rc::clone(&storage)); + v }) .collect(), }) diff --git a/src/index/mod.rs b/src/index/mod.rs index 9f5ecdbe85..e70b96ed37 100644 --- a/src/index/mod.rs +++ b/src/index/mod.rs @@ -16,7 +16,7 @@ use std::path::Path; use std::rc::Rc; use failure::Error; -use lazy_init::Lazy; +use once_cell::sync::OnceCell; use serde_derive::{Deserialize, Serialize}; use typed_builder::TypedBuilder; @@ -135,32 +135,24 @@ pub struct DatasetInfo { } #[derive(TypedBuilder, Default, Clone)] -pub struct SigStore -where - T: std::marker::Sync, -{ +pub struct SigStore { pub(crate) filename: String, pub(crate) name: String, pub(crate) metadata: String, pub(crate) storage: Option>, - pub(crate) data: Rc>, + #[builder(default)] + pub(crate) data: OnceCell, } -impl SigStore -where - T: std::marker::Sync + Default, -{ +impl SigStore { pub fn name(&self) -> String { self.name.clone() } } -impl std::fmt::Debug for SigStore -where - T: std::marker::Sync, -{ +impl std::fmt::Debug for SigStore { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, @@ -175,7 +167,7 @@ impl ReadData for SigStore { if let Some(sig) = self.data.get() { Ok(sig) } else if let Some(storage) = &self.storage { - let sig = self.data.get_or_create(|| { + let sig = self.data.get_or_init(|| { let raw = storage.load(&self.filename).unwrap(); let sigs: Result, _> = serde_json::from_reader(&mut &raw[..]); if let Ok(sigs) = sigs { @@ -241,13 +233,10 @@ impl From for SigStore { let name = other.name(); let filename = other.filename(); - let data = Lazy::new(); - data.get_or_create(|| other); - SigStore::builder() .name(name) .filename(filename) - .data(data) + .data(other) .metadata("") .storage(None) .build() @@ -329,3 +318,15 @@ impl Comparable for Signature { unimplemented!() } } + +impl From for SigStore { + fn from(other: DatasetInfo) -> SigStore { + SigStore { + filename: other.filename, + name: other.name, + metadata: other.metadata, + storage: None, + data: OnceCell::new(), + } + } +} diff --git a/src/index/sbt/mhbt.rs b/src/index/sbt/mhbt.rs index 565468a278..4beaf91558 100644 --- a/src/index/sbt/mhbt.rs +++ b/src/index/sbt/mhbt.rs @@ -1,9 +1,7 @@ use std::collections::HashMap; use std::io::Write; -use std::rc::Rc; use failure::Error; -use lazy_init::Lazy; use crate::index::sbt::{Factory, FromFactory, Node, Update, SBT}; use crate::index::storage::{ReadData, ReadDataError, ToWriter}; @@ -21,21 +19,18 @@ impl ToWriter for Nodegraph { } } -impl FromFactory> for SBT, L> { +impl FromFactory> for SBT, L> { fn factory(&self, name: &str) -> Result, Error> { match self.factory { Factory::GraphFactory { args: (k, t, n) } => { let n = Nodegraph::with_tables(t as usize, n as usize, k as usize); - let data = Lazy::new(); - data.get_or_create(|| n); - Ok(Node::builder() .filename(name) .name(name) .metadata(HashMap::default()) .storage(self.storage()) - .data(Rc::new(data)) + .data(n) .build()) } } @@ -72,9 +67,7 @@ impl Update> for Signature { unimplemented!() } - let data = Lazy::new(); - data.get_or_create(|| parent_data); - parent.data = Rc::new(data); + parent.data = parent_data.into(); Ok(()) } @@ -139,7 +132,7 @@ impl Comparable for Node { impl ReadData for Node { fn data(&self) -> Result<&Nodegraph, Error> { if let Some(storage) = &self.storage { - Ok(self.data.get_or_create(|| { + Ok(self.data.get_or_init(|| { let raw = storage.load(&self.filename).unwrap(); Nodegraph::from_reader(&mut &raw[..]).unwrap() })) @@ -157,10 +150,8 @@ mod test { use std::fs::File; use std::io::{BufReader, Seek, SeekFrom}; use std::path::PathBuf; - use std::rc::Rc; use assert_matches::assert_matches; - use lazy_init::Lazy; use tempfile; use super::Factory; @@ -217,16 +208,7 @@ mod test { .unwrap(); let sig_data = sigs[0].clone(); - let data = Lazy::new(); - data.get_or_create(|| sig_data); - - let leaf = SigStore::builder() - .data(Rc::new(data)) - .filename("") - .name("") - .metadata("") - .storage(None) - .build(); + let leaf = sig_data.into(); let results = sbt.find(search_minhashes, &leaf, 0.5).unwrap(); assert_eq!(results.len(), 1); diff --git a/src/index/sbt/mod.rs b/src/index/sbt/mod.rs index 8262262a98..1ab2645f00 100644 --- a/src/index/sbt/mod.rs +++ b/src/index/sbt/mod.rs @@ -21,8 +21,8 @@ use std::path::{Path, PathBuf}; use std::rc::Rc; use failure::Error; -use lazy_init::Lazy; use log::info; +use once_cell::sync::OnceCell; use serde_derive::{Deserialize, Serialize}; use typed_builder::TypedBuilder; @@ -39,10 +39,7 @@ pub trait FromFactory { } #[derive(TypedBuilder)] -pub struct SBT -where - L: Sync, -{ +pub struct SBT { #[builder(default = 2)] d: u32, @@ -69,7 +66,7 @@ const fn child(parent: u64, pos: u64, d: u64) -> u64 { impl SBT where - L: std::clone::Clone + Default + Sync, + L: std::clone::Clone + Default, N: Default, { #[inline(always)] @@ -117,8 +114,8 @@ where impl SBT, T> where - T: std::marker::Sync + ToWriter + Clone, - U: std::marker::Sync + ToWriter, + T: ToWriter + Clone, + U: ToWriter, Node: ReadData, SigStore: ReadData, { @@ -177,28 +174,30 @@ where .nodes .into_iter() .map(|(n, l)| { - let new_node = Node { - filename: l.filename, - name: l.name, - metadata: l.metadata, - storage: Some(Rc::clone(&storage)), - data: Rc::new(Lazy::new()), - }; - (n, new_node) + ( + n, + Node::builder() + .filename(l.filename) + .name(l.name) + .metadata(l.metadata) + .storage(Some(Rc::clone(&storage))) + .build(), + ) }) .collect(); let leaves = sbt .leaves .into_iter() .map(|(n, l)| { - let new_node = SigStore { - filename: l.filename, - name: l.name, - metadata: l.metadata, - storage: Some(Rc::clone(&storage)), - data: Rc::new(Lazy::new()), - }; - (n, new_node) + ( + n, + SigStore::builder() + .filename(l.filename) + .name(l.name) + .metadata(l.metadata) + .storage(Some(Rc::clone(&storage))) + .build(), + ) }) .collect(); (nodes, leaves) @@ -208,16 +207,15 @@ where .nodes .iter() .filter_map(|(n, x)| match x { - NodeInfoV4::Node(l) => { - let new_node = Node { - filename: l.filename.clone(), - name: l.name.clone(), - metadata: l.metadata.clone(), - storage: Some(Rc::clone(&storage)), - data: Rc::new(Lazy::new()), - }; - Some((*n, new_node)) - } + NodeInfoV4::Node(l) => Some(( + *n, + Node::builder() + .filename(l.filename.clone()) + .name(l.name.clone()) + .metadata(l.metadata.clone()) + .storage(Some(Rc::clone(&storage))) + .build(), + )), NodeInfoV4::Leaf(_) => None, }) .collect(); @@ -227,16 +225,15 @@ where .into_iter() .filter_map(|(n, x)| match x { NodeInfoV4::Node(_) => None, - NodeInfoV4::Leaf(l) => { - let new_node = SigStore { - filename: l.filename, - name: l.name, - metadata: l.metadata, - storage: Some(Rc::clone(&storage)), - data: Rc::new(Lazy::new()), - }; - Some((n, new_node)) - } + NodeInfoV4::Leaf(l) => Some(( + n, + SigStore::builder() + .filename(l.filename) + .name(l.name) + .metadata(l.metadata) + .storage(Some(Rc::clone(&storage))) + .build(), + )), }) .collect(); @@ -354,7 +351,7 @@ where impl<'a, N, L> Index<'a> for SBT where N: Comparable + Comparable + Update + Debug + Default, - L: Comparable + Update + Clone + Debug + Default + Sync, + L: Comparable + Update + Clone + Debug + Default, SBT: FromFactory, SigStore: From + ReadData, { @@ -524,21 +521,21 @@ pub enum Factory { } #[derive(TypedBuilder, Default, Clone)] -pub struct Node -where - T: std::marker::Sync, -{ +pub struct Node { filename: String, name: String, metadata: HashMap, + + #[builder(default)] storage: Option>, + #[builder(default)] - pub(crate) data: Rc>, + pub(crate) data: OnceCell, } impl Node where - T: Sync + ToWriter, + T: ToWriter, { pub fn save(&self, path: &str) -> Result { if let Some(storage) = &self.storage { @@ -559,7 +556,7 @@ where impl PartialEq for Node where - T: Sync + PartialEq, + T: PartialEq, Node: ReadData, { fn eq(&self, other: &Node) -> bool { @@ -569,7 +566,7 @@ where impl SigStore where - T: Sync + ToWriter, + T: ToWriter, { pub fn save(&self, path: &str) -> Result { if let Some(storage) = &self.storage { @@ -589,7 +586,7 @@ where impl std::fmt::Debug for Node where - T: std::marker::Sync + std::fmt::Debug, + T: Debug, { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( @@ -687,7 +684,7 @@ pub fn scaffold( storage: Option>, ) -> SBT, Signature> where - N: std::marker::Sync + std::clone::Clone + std::default::Default, + N: Clone + Default, { let mut leaves: HashMap> = HashMap::with_capacity(datasets.len()); @@ -857,7 +854,7 @@ impl BinaryTree { /* impl From> for SBT, Signature> where - U: Sync + Default + Clone, + U: Default + Clone, { fn from(other: LinearIndex) -> Self { let storage = other.storage(); diff --git a/src/sketch/minhash.rs b/src/sketch/minhash.rs index 55b7961b29..ce5ec103b3 100644 --- a/src/sketch/minhash.rs +++ b/src/sketch/minhash.rs @@ -6,7 +6,7 @@ use std::iter::{Iterator, Peekable}; use std::str; use failure::Error; -use lazy_static::lazy_static; +use once_cell::sync::Lazy; use serde::de::{Deserialize, Deserializer}; use serde::ser::{Serialize, SerializeStruct, Serializer}; use serde_derive::Deserialize; @@ -706,77 +706,110 @@ fn revcomp(seq: &[u8]) -> Vec { .collect() } -lazy_static! { - static ref CODONTABLE: HashMap<&'static str, u8> = { - [ +static CODONTABLE: Lazy> = Lazy::new(|| { + [ // F - ("TTT", b'F'), ("TTC", b'F'), + ("TTT", b'F'), + ("TTC", b'F'), // L - ("TTA", b'L'), ("TTG", b'L'), - + ("TTA", b'L'), + ("TTG", b'L'), // S - ("TCT", b'S'), ("TCC", b'S'), ("TCA", b'S'), ("TCG", b'S'), ("TCN", b'S'), - + ("TCT", b'S'), + ("TCC", b'S'), + ("TCA", b'S'), + ("TCG", b'S'), + ("TCN", b'S'), // Y - ("TAT", b'Y'), ("TAC", b'Y'), + ("TAT", b'Y'), + ("TAC", b'Y'), // * - ("TAA", b'*'), ("TAG", b'*'), - + ("TAA", b'*'), + ("TAG", b'*'), // * ("TGA", b'*'), // C - ("TGT", b'C'), ("TGC", b'C'), + ("TGT", b'C'), + ("TGC", b'C'), // W ("TGG", b'W'), - // L - ("CTT", b'L'), ("CTC", b'L'), ("CTA", b'L'), ("CTG", b'L'), ("CTN", b'L'), - + ("CTT", b'L'), + ("CTC", b'L'), + ("CTA", b'L'), + ("CTG", b'L'), + ("CTN", b'L'), // P - ("CCT", b'P'), ("CCC", b'P'), ("CCA", b'P'), ("CCG", b'P'), ("CCN", b'P'), - + ("CCT", b'P'), + ("CCC", b'P'), + ("CCA", b'P'), + ("CCG", b'P'), + ("CCN", b'P'), // H - ("CAT", b'H'), ("CAC", b'H'), + ("CAT", b'H'), + ("CAC", b'H'), // Q - ("CAA", b'Q'), ("CAG", b'Q'), - + ("CAA", b'Q'), + ("CAG", b'Q'), // R - ("CGT", b'R'), ("CGC", b'R'), ("CGA", b'R'), ("CGG", b'R'), ("CGN", b'R'), - + ("CGT", b'R'), + ("CGC", b'R'), + ("CGA", b'R'), + ("CGG", b'R'), + ("CGN", b'R'), // I - ("ATT", b'I'), ("ATC", b'I'), ("ATA", b'I'), + ("ATT", b'I'), + ("ATC", b'I'), + ("ATA", b'I'), // M ("ATG", b'M'), - // T - ("ACT", b'T'), ("ACC", b'T'), ("ACA", b'T'), ("ACG", b'T'), ("ACN", b'T'), - + ("ACT", b'T'), + ("ACC", b'T'), + ("ACA", b'T'), + ("ACG", b'T'), + ("ACN", b'T'), // N - ("AAT", b'N'), ("AAC", b'N'), + ("AAT", b'N'), + ("AAC", b'N'), // K - ("AAA", b'K'), ("AAG", b'K'), - + ("AAA", b'K'), + ("AAG", b'K'), // S - ("AGT", b'S'), ("AGC", b'S'), + ("AGT", b'S'), + ("AGC", b'S'), // R - ("AGA", b'R'), ("AGG", b'R'), - + ("AGA", b'R'), + ("AGG", b'R'), // V - ("GTT", b'V'), ("GTC", b'V'), ("GTA", b'V'), ("GTG", b'V'), ("GTN", b'V'), - + ("GTT", b'V'), + ("GTC", b'V'), + ("GTA", b'V'), + ("GTG", b'V'), + ("GTN", b'V'), // A - ("GCT", b'A'), ("GCC", b'A'), ("GCA", b'A'), ("GCG", b'A'), ("GCN", b'A'), - + ("GCT", b'A'), + ("GCC", b'A'), + ("GCA", b'A'), + ("GCG", b'A'), + ("GCN", b'A'), // D - ("GAT", b'D'), ("GAC", b'D'), + ("GAT", b'D'), + ("GAC", b'D'), // E - ("GAA", b'E'), ("GAG", b'E'), - + ("GAA", b'E'), + ("GAG", b'E'), // G - ("GGT", b'G'), ("GGC", b'G'), ("GGA", b'G'), ("GGG", b'G'), ("GGN", b'G'), - ].iter().cloned().collect() - }; -} + ("GGT", b'G'), + ("GGC", b'G'), + ("GGA", b'G'), + ("GGG", b'G'), + ("GGN", b'G'), + ] + .iter() + .cloned() + .collect() +}); // Dayhoff table from // Peris, P., López, D., & Campos, M. (2008). @@ -799,29 +832,39 @@ lazy_static! { // | H, K, R | Basic | d | // | I, L, M, V | Hydrophobic | e | // | F, W, Y | Aromatic | f | -lazy_static! { - static ref DAYHOFFTABLE: HashMap = { - [ +static DAYHOFFTABLE: Lazy> = Lazy::new(|| { + [ // a (b'C', b'a'), - // b - (b'A', b'b'), (b'G', b'b'), (b'P', b'b'), (b'S', b'b'), (b'T', b'b'), - + (b'A', b'b'), + (b'G', b'b'), + (b'P', b'b'), + (b'S', b'b'), + (b'T', b'b'), // c - (b'D', b'c'), (b'E', b'c'), (b'N', b'c'), (b'Q', b'c'), - + (b'D', b'c'), + (b'E', b'c'), + (b'N', b'c'), + (b'Q', b'c'), // d - (b'H', b'd'), (b'K', b'd'), (b'R', b'd'), - + (b'H', b'd'), + (b'K', b'd'), + (b'R', b'd'), // e - (b'I', b'e'), (b'L', b'e'), (b'M', b'e'), (b'V', b'e'), - + (b'I', b'e'), + (b'L', b'e'), + (b'M', b'e'), + (b'V', b'e'), // e - (b'F', b'f'), (b'W', b'f'), (b'Y', b'f'), - ].iter().cloned().collect() - }; -} + (b'F', b'f'), + (b'W', b'f'), + (b'Y', b'f'), + ] + .iter() + .cloned() + .collect() +}); // HP Hydrophobic/hydrophilic mapping // From: Phillips, R., Kondev, J., Theriot, J. (2008). @@ -832,22 +875,35 @@ lazy_static! { // |---------------------------------------|---------| // | A, F, G, I, L, M, P, V, W, Y | h | // | N, C, S, T, D, E, R, H, K, Q | p | -lazy_static! { - static ref HPTABLE: HashMap = { - [ - // h - (b'A', b'h'), (b'F', b'h'), (b'G', b'h'), (b'I', b'h'), (b'L', b'h'), - (b'M', b'h'), (b'P', b'h'), (b'V', b'h'), (b'W', b'h'), (b'Y', b'h'), - - // p - (b'N', b'p'), (b'C', b'p'), (b'S', b'p'), (b'T', b'p'), (b'D', b'p'), - (b'E', b'p'), (b'R', b'p'), (b'H', b'p'), (b'K', b'p'), (b'Q', b'p'), - ] - .iter() - .cloned() - .collect() - }; -} +static HPTABLE: Lazy> = Lazy::new(|| { + [ + // h + (b'A', b'h'), + (b'F', b'h'), + (b'G', b'h'), + (b'I', b'h'), + (b'L', b'h'), + (b'M', b'h'), + (b'P', b'h'), + (b'V', b'h'), + (b'W', b'h'), + (b'Y', b'h'), + // p + (b'N', b'p'), + (b'C', b'p'), + (b'S', b'p'), + (b'T', b'p'), + (b'D', b'p'), + (b'E', b'p'), + (b'R', b'p'), + (b'H', b'p'), + (b'K', b'p'), + (b'Q', b'p'), + ] + .iter() + .cloned() + .collect() +}); #[inline] pub(crate) fn translate_codon(codon: &[u8]) -> Result {