-
Notifications
You must be signed in to change notification settings - Fork 2.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[processor/tailsampling] Decision cache for non-sampled trace IDs #33722
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
change_type: enhancement | ||
component: tailsamplingprocessor | ||
note: Adds decision cache for non-sampled trace IDs | ||
issues: [31583] | ||
subtext: | ||
change_logs: [] |
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -48,16 +48,17 @@ type tailSamplingSpanProcessor struct { | |||
telemetry *metadata.TelemetryBuilder | ||||
logger *zap.Logger | ||||
|
||||
nextConsumer consumer.Traces | ||||
maxNumTraces uint64 | ||||
policies []*policy | ||||
idToTrace sync.Map | ||||
policyTicker timeutils.TTicker | ||||
tickerFrequency time.Duration | ||||
decisionBatcher idbatcher.Batcher | ||||
sampledIDCache cache.Cache[bool] | ||||
deleteChan chan pcommon.TraceID | ||||
numTracesOnMap *atomic.Uint64 | ||||
nextConsumer consumer.Traces | ||||
maxNumTraces uint64 | ||||
policies []*policy | ||||
idToTrace sync.Map | ||||
policyTicker timeutils.TTicker | ||||
tickerFrequency time.Duration | ||||
decisionBatcher idbatcher.Batcher | ||||
sampledIDCache cache.Cache[bool] | ||||
nonSampledIDCache cache.Cache[bool] | ||||
deleteChan chan pcommon.TraceID | ||||
numTracesOnMap *atomic.Uint64 | ||||
} | ||||
|
||||
// spanAndScope a structure for holding information about span and its instrumentation scope. | ||||
|
@@ -88,23 +89,32 @@ func newTracesProcessor(ctx context.Context, settings component.TelemetrySetting | |||
if err != nil { | ||||
return nil, err | ||||
} | ||||
sampledDecisions := cache.NewNopDecisionCache[bool]() | ||||
nopCache := cache.NewNopDecisionCache[bool]() | ||||
sampledDecisions := nopCache | ||||
nonSampledDecisions := nopCache | ||||
if cfg.DecisionCache.SampledCacheSize > 0 { | ||||
sampledDecisions, err = cache.NewLRUDecisionCache[bool](cfg.DecisionCache.SampledCacheSize) | ||||
if err != nil { | ||||
return nil, err | ||||
} | ||||
} | ||||
if cfg.DecisionCache.NonSampledCacheSize > 0 { | ||||
nonSampledDecisions, err = cache.NewLRUDecisionCache[bool](cfg.DecisionCache.NonSampledCacheSize) | ||||
if err != nil { | ||||
return nil, err | ||||
} | ||||
} | ||||
|
||||
tsp := &tailSamplingSpanProcessor{ | ||||
ctx: ctx, | ||||
telemetry: telemetry, | ||||
nextConsumer: nextConsumer, | ||||
maxNumTraces: cfg.NumTraces, | ||||
sampledIDCache: sampledDecisions, | ||||
logger: settings.Logger, | ||||
numTracesOnMap: &atomic.Uint64{}, | ||||
deleteChan: make(chan pcommon.TraceID, cfg.NumTraces), | ||||
ctx: ctx, | ||||
telemetry: telemetry, | ||||
nextConsumer: nextConsumer, | ||||
maxNumTraces: cfg.NumTraces, | ||||
sampledIDCache: sampledDecisions, | ||||
nonSampledIDCache: nonSampledDecisions, | ||||
logger: settings.Logger, | ||||
numTracesOnMap: &atomic.Uint64{}, | ||||
deleteChan: make(chan pcommon.TraceID, cfg.NumTraces), | ||||
} | ||||
tsp.policyTicker = &timeutils.PolicyTicker{OnTickFunc: tsp.samplingPolicyOnTick} | ||||
|
||||
|
@@ -182,6 +192,13 @@ func withSampledDecisionCache(c cache.Cache[bool]) Option { | |||
} | ||||
} | ||||
|
||||
// withSampledDecisionCache sets the cache which the processor uses to store recently sampled trace IDs. | ||||
func withNonSampledDecisionCache(c cache.Cache[bool]) Option { | ||||
return func(tsp *tailSamplingSpanProcessor) { | ||||
tsp.nonSampledIDCache = c | ||||
} | ||||
} | ||||
|
||||
func getPolicyEvaluator(settings component.TelemetrySettings, cfg *PolicyCfg) (sampling.PolicyEvaluator, error) { | ||||
switch cfg.Type { | ||||
case Composite: | ||||
|
@@ -365,7 +382,14 @@ func (tsp *tailSamplingSpanProcessor) processTraces(resourceSpans ptrace.Resourc | |||
traceTd := ptrace.NewTraces() | ||||
appendToTraces(traceTd, resourceSpans, spans) | ||||
tsp.releaseSampledTrace(tsp.ctx, id, traceTd) | ||||
tsp.telemetry.ProcessorTailSamplingEarlyReleasesFromCacheDecision.Add(tsp.ctx, int64(len(spans))) | ||||
tsp.telemetry.ProcessorTailSamplingEarlyReleasesFromCacheDecision. | ||||
Add(tsp.ctx, int64(len(spans)), metric.WithAttributes(attribute.String("decision", "sample"))) | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Interstingly, metric.WithAttributes performs very poorly, so it should be avoided on the hot path. I typically have a static var for that, and refer it later. Using metric.WithAttributeSet should not cause any allocations on the hot path. opentelemetry-collector-contrib/exporter/loadbalancingexporter/wrapped_exporter.go Line 36 in b95672e
|
||||
continue | ||||
} | ||||
// If the trace ID is in the non-sampled cache, short circuit the decision | ||||
if _, ok := tsp.nonSampledIDCache.Get(id); ok { | ||||
tsp.telemetry.ProcessorTailSamplingEarlyReleasesFromCacheDecision. | ||||
Add(tsp.ctx, int64(len(spans)), metric.WithAttributes(attribute.String("decision", "drop"))) | ||||
continue | ||||
} | ||||
|
||||
|
@@ -458,6 +482,9 @@ func (tsp *tailSamplingSpanProcessor) dropTrace(traceID pcommon.TraceID, deletio | |||
tsp.idToTrace.Delete(traceID) | ||||
// Subtract one from numTracesOnMap per https://godoc.org/sync/atomic#AddUint64 | ||||
tsp.numTracesOnMap.Add(^uint64(0)) | ||||
if trace.FinalDecision != sampling.Sampled { | ||||
tsp.nonSampledIDCache.Put(traceID, true) | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this should be placed close to line 450, where the NotSampled decision is made. I think this code here will not apply to all NotSampled decisions, especially if the idToTrace cache is full. |
||||
} | ||||
} | ||||
if trace == nil { | ||||
tsp.logger.Debug("Attempt to delete traceID not on table") | ||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This suggestion is because it wasn't very clear on a first read that those were different numbers.