-
Notifications
You must be signed in to change notification settings - Fork 467
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
persist: introduce a very small in-mem blob cache #19614
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
// Copyright Materialize, Inc. and contributors. All rights reserved. | ||
// | ||
// Use of this software is governed by the Business Source License | ||
// included in the LICENSE file. | ||
// | ||
// As of the Change Date specified in that file, in accordance with | ||
// the Business Source License, use of this software will be governed | ||
// by the Apache License, Version 2.0. | ||
|
||
//! In-process caches of [Blob]. | ||
|
||
use std::sync::Arc; | ||
|
||
use async_trait::async_trait; | ||
use bytes::Bytes; | ||
use moka::notification::RemovalCause; | ||
use moka::sync::Cache; | ||
use mz_ore::bytes::SegmentedBytes; | ||
use mz_ore::cast::CastFrom; | ||
use mz_persist::location::{Atomicity, Blob, BlobMetadata, ExternalError}; | ||
use tracing::error; | ||
|
||
use crate::cfg::PersistConfig; | ||
use crate::internal::metrics::Metrics; | ||
|
||
// In-memory cache for [Blob]. | ||
#[derive(Debug)] | ||
pub struct BlobMemCache { | ||
metrics: Arc<Metrics>, | ||
cache: Cache<String, SegmentedBytes>, | ||
blob: Arc<dyn Blob + Send + Sync>, | ||
} | ||
|
||
impl BlobMemCache { | ||
pub fn new( | ||
cfg: &PersistConfig, | ||
metrics: Arc<Metrics>, | ||
blob: Arc<dyn Blob + Send + Sync>, | ||
) -> Arc<dyn Blob + Send + Sync> { | ||
let eviction_metrics = Arc::clone(&metrics); | ||
// TODO: Make this react dynamically to changes in configuration. | ||
let cache = Cache::<String, SegmentedBytes>::builder() | ||
.max_capacity(u64::cast_from(cfg.dynamic.blob_cache_mem_limit_bytes())) | ||
.weigher(|k, v| { | ||
u32::try_from(v.len()).unwrap_or_else(|_| { | ||
// We chunk off blobs at 128MiB, so the length should easily | ||
// fit in a u32. | ||
error!( | ||
"unexpectedly large blob in persist cache {} bytes: {}", | ||
v.len(), | ||
k | ||
); | ||
u32::MAX | ||
}) | ||
}) | ||
.eviction_listener(move |_k, _v, cause| match cause { | ||
RemovalCause::Size => eviction_metrics.blob_cache_mem.evictions.inc(), | ||
RemovalCause::Expired | RemovalCause::Explicit | RemovalCause::Replaced => {} | ||
}) | ||
.build(); | ||
let blob = BlobMemCache { | ||
metrics, | ||
cache, | ||
blob, | ||
}; | ||
Arc::new(blob) | ||
} | ||
|
||
fn update_size_metrics(&self) { | ||
self.metrics | ||
.blob_cache_mem | ||
.size_blobs | ||
.set(self.cache.entry_count()); | ||
self.metrics | ||
.blob_cache_mem | ||
.size_bytes | ||
.set(self.cache.weighted_size()); | ||
} | ||
} | ||
|
||
#[async_trait] | ||
impl Blob for BlobMemCache { | ||
async fn get(&self, key: &str) -> Result<Option<SegmentedBytes>, ExternalError> { | ||
// First check if the blob is in the cache. If it is, return it. If not, | ||
// fetch it and put it in the cache. | ||
// | ||
// Blobs are write-once modify-never, so we don't have to worry about | ||
// any races or cache invalidations here. If the value is in the cache, | ||
// it's also what's in s3 (if not, then there's a horrible bug somewhere | ||
// else). | ||
if let Some(cached_value) = self.cache.get(key) { | ||
self.metrics.blob_cache_mem.hits_blobs.inc(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is the intent to calculate hit rate based on the delta between this vs existing blob metrics? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah, tho we'll have to do |
||
self.metrics | ||
.blob_cache_mem | ||
.hits_bytes | ||
.inc_by(u64::cast_from(cached_value.len())); | ||
return Ok(Some(cached_value)); | ||
} | ||
|
||
// This could maybe use moka's async cache to unify any concurrent | ||
// fetches for the same key? That's not particularly expected in | ||
// persist's workload, so punt for now. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am a little curious about this... I could imagine if multiple There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i can't think of an easy way to measure this without just literally solving the problem. would prefer to punt this to followup work as well |
||
let res = self.blob.get(key).await?; | ||
if let Some(blob) = res.as_ref() { | ||
self.cache.insert(key.to_owned(), blob.clone()); | ||
self.update_size_metrics(); | ||
} | ||
Ok(res) | ||
} | ||
|
||
async fn list_keys_and_metadata( | ||
&self, | ||
key_prefix: &str, | ||
f: &mut (dyn FnMut(BlobMetadata) + Send + Sync), | ||
) -> Result<(), ExternalError> { | ||
self.blob.list_keys_and_metadata(key_prefix, f).await | ||
} | ||
|
||
async fn set(&self, key: &str, value: Bytes, atomic: Atomicity) -> Result<(), ExternalError> { | ||
let () = self.blob.set(key, value.clone(), atomic).await?; | ||
self.cache | ||
.insert(key.to_owned(), SegmentedBytes::from(value)); | ||
self.update_size_metrics(); | ||
Ok(()) | ||
} | ||
|
||
async fn delete(&self, key: &str) -> Result<Option<usize>, ExternalError> { | ||
let res = self.blob.delete(key).await; | ||
self.cache.invalidate(key); | ||
self.update_size_metrics(); | ||
res | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -79,6 +79,8 @@ pub struct Metrics { | |
pub pubsub_client: PubSubClientMetrics, | ||
/// Metrics for mfp/filter pushdown. | ||
pub pushdown: PushdownMetrics, | ||
/// Metrics for blob caching. | ||
pub blob_cache_mem: BlobMemCache, | ||
|
||
/// Metrics for the persist sink. | ||
pub sink: SinkMetrics, | ||
|
@@ -123,6 +125,7 @@ impl Metrics { | |
watch: WatchMetrics::new(registry), | ||
pubsub_client: PubSubClientMetrics::new(registry), | ||
pushdown: PushdownMetrics::new(registry), | ||
blob_cache_mem: BlobMemCache::new(registry), | ||
sink: SinkMetrics::new(registry), | ||
s3_blob: S3BlobMetrics::new(registry), | ||
postgres_consensus: PostgresConsensusMetrics::new(registry), | ||
|
@@ -1903,6 +1906,47 @@ impl PushdownMetrics { | |
} | ||
} | ||
|
||
#[derive(Debug)] | ||
pub struct BlobMemCache { | ||
pub(crate) size_blobs: UIntGauge, | ||
pub(crate) size_bytes: UIntGauge, | ||
pub(crate) hits_blobs: IntCounter, | ||
pub(crate) hits_bytes: IntCounter, | ||
pub(crate) evictions: IntCounter, | ||
} | ||
|
||
impl BlobMemCache { | ||
fn new(registry: &MetricsRegistry) -> Self { | ||
BlobMemCache { | ||
size_blobs: registry.register(metric!( | ||
name: "mz_persist_blob_cache_size_blobs", | ||
help: "count of blobs in the cache", | ||
const_labels: {"cache" => "mem"}, | ||
)), | ||
size_bytes: registry.register(metric!( | ||
name: "mz_persist_blob_cache_size_bytes", | ||
help: "total size of blobs in the cache", | ||
const_labels: {"cache" => "mem"}, | ||
)), | ||
hits_blobs: registry.register(metric!( | ||
name: "mz_persist_blob_cache_hits_blobs", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: elsewhere it's referred to as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yup! did that on purpose to set up a hypothetical disk cache. this has a label of "cache" -> "mem" There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh yeah, I totally saw that and forgot it when reading it over a second time |
||
help: "count of blobs served via cache instead of s3", | ||
const_labels: {"cache" => "mem"}, | ||
)), | ||
hits_bytes: registry.register(metric!( | ||
name: "mz_persist_blob_cache_hits_bytes", | ||
help: "total size of blobs served via cache instead of s3", | ||
const_labels: {"cache" => "mem"}, | ||
)), | ||
evictions: registry.register(metric!( | ||
name: "mz_persist_blob_cache_evictions", | ||
help: "count of capacity-based cache evictions", | ||
const_labels: {"cache" => "mem"}, | ||
)), | ||
} | ||
} | ||
} | ||
|
||
#[derive(Debug)] | ||
pub struct ExternalOpMetrics { | ||
started: IntCounter, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
could be interesting to add a listener so we can track how many removals come from explicit
delete
calls vs size-based evictionsThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
great idea! will do