diff --git a/pkg/ottl/context_inferrer.go b/pkg/ottl/context_inferrer.go index da4ade783278..714100ac4d57 100644 --- a/pkg/ottl/context_inferrer.go +++ b/pkg/ottl/context_inferrer.go @@ -3,12 +3,16 @@ package ottl // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl" -import "math" +import ( + "cmp" + "math" + "slices" +) var defaultContextInferPriority = []string{ "log", - "metric", "datapoint", + "metric", "spanevent", "span", "resource", @@ -16,62 +20,217 @@ var defaultContextInferPriority = []string{ "instrumentation_scope", } -// contextInferrer is an interface used to infer the OTTL context from statements paths. +// contextInferrer is an interface used to infer the OTTL context from statements. type contextInferrer interface { - // infer returns the OTTL context inferred from the given statements paths. + // infer returns the OTTL context inferred from the given statements. infer(statements []string) (string, error) } type priorityContextInferrer struct { - contextPriority map[string]int + contextPriority map[string]int + contextCandidate map[string]*priorityContextInferrerCandidate +} + +type priorityContextInferrerCandidate struct { + hasEnumSymbol func(enum *EnumSymbol) bool + hasFunctionName func(name string) bool + getLowerContexts func(context string) []string +} + +type priorityContextInferrerOption func(*priorityContextInferrer) + +// newPriorityContextInferrer creates a new priority-based context inferrer. To infer the context, +// it uses a slice of priorities (withContextInferrerPriorities) and a set of hints extracted from +// the parsed statements. +// +// To be eligible, a context must support all functions and enums symbols present on the statements. +// If the path context with the highest priority does not meet this requirement, it falls back to its +// lower contexts, testing them with the same logic and choosing the first one that meets all requirements. +// +// If non-prioritized contexts are found on the statements, they get assigned the lowest possible priority, +// and are only selected if no other prioritized context is found. +func newPriorityContextInferrer(contextsCandidate map[string]*priorityContextInferrerCandidate, options ...priorityContextInferrerOption) contextInferrer { + c := &priorityContextInferrer{ + contextCandidate: contextsCandidate, + } + for _, opt := range options { + opt(c) + } + if len(c.contextPriority) == 0 { + withContextInferrerPriorities(defaultContextInferPriority)(c) + } + return c +} + +// withContextInferrerPriorities sets the contexts candidates priorities. The lower the +// context position is in the array, the more priority it will have over other items. +func withContextInferrerPriorities(priorities []string) priorityContextInferrerOption { + return func(c *priorityContextInferrer) { + contextPriority := map[string]int{} + for pri, context := range priorities { + contextPriority[context] = pri + } + c.contextPriority = contextPriority + } } func (s *priorityContextInferrer) infer(statements []string) (string, error) { + requiredFunctions := map[string]struct{}{} + requiredEnums := map[enumSymbol]struct{}{} + var inferredContext string var inferredContextPriority int - for _, statement := range statements { parsed, err := parseStatement(statement) if err != nil { - return inferredContext, err + return "", err } - for _, p := range getParsedStatementPaths(parsed) { - pathContextPriority, ok := s.contextPriority[p.Context] + statementPaths, statementFunctions, statementEnums := s.getParsedStatementHints(parsed) + for _, p := range statementPaths { + candidate := p.Context + candidatePriority, ok := s.contextPriority[candidate] if !ok { - // Lowest priority - pathContextPriority = math.MaxInt + candidatePriority = math.MaxInt } - - if inferredContext == "" || pathContextPriority < inferredContextPriority { - inferredContext = p.Context - inferredContextPriority = pathContextPriority + if inferredContext == "" || candidatePriority < inferredContextPriority { + inferredContext = candidate + inferredContextPriority = candidatePriority } } + for function := range statementFunctions { + requiredFunctions[function] = struct{}{} + } + for enum := range statementEnums { + requiredEnums[enum] = struct{}{} + } + } + // No inferred context or nothing left to verify. + if inferredContext == "" || (len(requiredFunctions) == 0 && len(requiredEnums) == 0) { + return inferredContext, nil + } + ok := s.validateContextCandidate(inferredContext, requiredFunctions, requiredEnums) + if ok { + return inferredContext, nil + } + return s.inferFromLowerContexts(inferredContext, requiredFunctions, requiredEnums), nil +} + +// validateContextCandidate checks if the given context candidate has all required functions names +// and enums symbols. The functions arity are not verified. +func (s *priorityContextInferrer) validateContextCandidate( + context string, + requiredFunctions map[string]struct{}, + requiredEnums map[enumSymbol]struct{}, +) bool { + candidate, ok := s.contextCandidate[context] + if !ok { + return false + } + if len(requiredFunctions) == 0 && len(requiredEnums) == 0 { + return true + } + for function := range requiredFunctions { + if !candidate.hasFunctionName(function) { + return false + } + } + for enum := range requiredEnums { + if !candidate.hasEnumSymbol((*EnumSymbol)(&enum)) { + return false + } } + return true +} + +// inferFromLowerContexts returns the first lower context that supports all required functions +// and enum symbols used on the statements. +// If no lower context meets the requirements, or if the context candidate is unknown, it +// returns an empty string. +func (s *priorityContextInferrer) inferFromLowerContexts( + context string, + requiredFunctions map[string]struct{}, + requiredEnums map[enumSymbol]struct{}, +) string { + inferredContextCandidate, ok := s.contextCandidate[context] + if !ok { + return "" + } + + lowerContextCandidates := inferredContextCandidate.getLowerContexts(context) + if len(lowerContextCandidates) == 0 { + return "" + } + + s.sortContextCandidates(lowerContextCandidates) + for _, lowerCandidate := range lowerContextCandidates { + ok = s.validateContextCandidate(lowerCandidate, requiredFunctions, requiredEnums) + if ok { + return lowerCandidate + } + } + return "" +} + +// sortContextCandidates sorts the slice candidates using the priorityContextInferrer.contextsPriority order. +func (s *priorityContextInferrer) sortContextCandidates(candidates []string) { + slices.SortFunc(candidates, func(l, r string) int { + lp, ok := s.contextPriority[l] + if !ok { + lp = math.MaxInt + } + rp, ok := s.contextPriority[r] + if !ok { + rp = math.MaxInt + } + return cmp.Compare(lp, rp) + }) +} - return inferredContext, nil +// getParsedStatementHints extracts all path, function names (editor and converter), and enumSymbol +// from the given parsed statements. These values are used by the context inferrer as hints to +// select a context in which the function/enum are supported. +func (s *priorityContextInferrer) getParsedStatementHints(parsed *parsedStatement) ([]path, map[string]struct{}, map[enumSymbol]struct{}) { + visitor := newGrammarContextInferrerVisitor() + parsed.Editor.accept(&visitor) + if parsed.WhereClause != nil { + parsed.WhereClause.accept(&visitor) + } + return visitor.paths, visitor.functions, visitor.enumsSymbols } -// defaultPriorityContextInferrer is like newPriorityContextInferrer, but using the default -// context priorities and ignoring unknown/non-prioritized contexts. -func defaultPriorityContextInferrer() contextInferrer { - return newPriorityContextInferrer(defaultContextInferPriority) +// priorityContextInferrerHintsVisitor is a grammarVisitor implementation that collects +// all path, function names (converter.Function and editor.Function), and enumSymbol. +type priorityContextInferrerHintsVisitor struct { + paths []path + functions map[string]struct{} + enumsSymbols map[enumSymbol]struct{} } -// newPriorityContextInferrer creates a new priority-based context inferrer. -// To infer the context, it compares all [ottl.Path.Context] values, prioritizing them based -// on the provide contextsPriority argument, the lower the context position is in the array, -// the more priority it will have over other items. -// If unknown/non-prioritized contexts are found on the statements, they can be either ignored -// or considered when no other prioritized context is found. To skip unknown contexts, the -// ignoreUnknownContext argument must be set to false. -func newPriorityContextInferrer(contextsPriority []string) contextInferrer { - contextPriority := make(map[string]int, len(contextsPriority)) - for i, ctx := range contextsPriority { - contextPriority[ctx] = i +func newGrammarContextInferrerVisitor() priorityContextInferrerHintsVisitor { + return priorityContextInferrerHintsVisitor{ + paths: []path{}, + functions: make(map[string]struct{}), + enumsSymbols: make(map[enumSymbol]struct{}), } - return &priorityContextInferrer{ - contextPriority: contextPriority, +} + +func (v *priorityContextInferrerHintsVisitor) visitMathExprLiteral(_ *mathExprLiteral) {} + +func (v *priorityContextInferrerHintsVisitor) visitEditor(e *editor) { + v.functions[e.Function] = struct{}{} +} + +func (v *priorityContextInferrerHintsVisitor) visitConverter(c *converter) { + v.functions[c.Function] = struct{}{} +} + +func (v *priorityContextInferrerHintsVisitor) visitValue(va *value) { + if va.Enum != nil { + v.enumsSymbols[*va.Enum] = struct{}{} } } + +func (v *priorityContextInferrerHintsVisitor) visitPath(value *path) { + v.paths = append(v.paths, *value) +} diff --git a/pkg/ottl/context_inferrer_test.go b/pkg/ottl/context_inferrer_test.go index 4d4455dd0dcf..9cec76503451 100644 --- a/pkg/ottl/context_inferrer_test.go +++ b/pkg/ottl/context_inferrer_test.go @@ -10,22 +10,55 @@ import ( "github.com/stretchr/testify/require" ) +var defaultDummyPriorityContextInferrerCandidate = &priorityContextInferrerCandidate{ + hasFunctionName: func(name string) bool { + return true + }, + hasEnumSymbol: func(enum *EnumSymbol) bool { + return true + }, + getLowerContexts: func(context string) []string { + return nil + }, +} + +func newDummyPriorityContextInferrerCandidate(hasFunctionName, hasEnumSymbol bool, lowerContexts []string) *priorityContextInferrerCandidate { + return &priorityContextInferrerCandidate{ + hasFunctionName: func(_ string) bool { + return hasFunctionName + }, + hasEnumSymbol: func(_ *EnumSymbol) bool { + return hasEnumSymbol + }, + getLowerContexts: func(_ string) []string { + return lowerContexts + }, + } +} + func Test_NewPriorityContextInferrer_Infer(t *testing.T) { tests := []struct { name string priority []string + candidates map[string]*priorityContextInferrerCandidate statements []string expected string }{ { - name: "with priority and contexts", - priority: []string{"spanevent", "span", "resource"}, + name: "with priority and statement context", + priority: []string{"spanevent", "span", "resource"}, + candidates: map[string]*priorityContextInferrerCandidate{ + "spanevent": defaultDummyPriorityContextInferrerCandidate, + }, statements: []string{"set(span.foo, resource.value) where spanevent.bar == true"}, expected: "spanevent", }, { - name: "with multiple statements", + name: "with multiple statements and contexts", priority: []string{"spanevent", "span", "resource"}, + candidates: map[string]*priorityContextInferrerCandidate{ + "spanevent": defaultDummyPriorityContextInferrerCandidate, + }, statements: []string{ "set(resource.foo, resource.value) where span.bar == true", "set(resource.foo, resource.value) where spanevent.bar == true", @@ -33,27 +66,84 @@ func Test_NewPriorityContextInferrer_Infer(t *testing.T) { expected: "spanevent", }, { - name: "with no context", + name: "with no statements context", priority: []string{"log", "resource"}, + candidates: map[string]*priorityContextInferrerCandidate{}, statements: []string{"set(foo, true) where bar == true"}, expected: "", }, { - name: "with empty priority", + name: "with empty priority list", + candidates: map[string]*priorityContextInferrerCandidate{ + "foo": defaultDummyPriorityContextInferrerCandidate, + }, statements: []string{"set(foo.name, true) where bar.name == true"}, expected: "foo", }, { - name: "with unknown context", - priority: []string{"foo", "bar"}, + name: "with non-prioritized statement context", + priority: []string{"foo", "bar"}, + candidates: map[string]*priorityContextInferrerCandidate{ + "span": defaultDummyPriorityContextInferrerCandidate, + }, statements: []string{"set(span.foo, true) where span.bar == true"}, expected: "span", }, + { + name: "inferred path context with missing function", + priority: []string{"foo", "datapoint", "metric"}, + statements: []string{`set(metric.is_foo, true) where metric.name == "foo"`}, + candidates: map[string]*priorityContextInferrerCandidate{ + "metric": newDummyPriorityContextInferrerCandidate(false, true, []string{"foo", "datapoint"}), + "foo": newDummyPriorityContextInferrerCandidate(false, true, []string{}), + "datapoint": newDummyPriorityContextInferrerCandidate(true, true, []string{}), + }, + expected: "datapoint", + }, + { + name: "inferred path context with missing function and no qualified lower context", + priority: []string{"datapoint", "metric"}, + statements: []string{`set(metric.is_foo, true) where metric.name == "foo"`}, + candidates: map[string]*priorityContextInferrerCandidate{ + "metric": newDummyPriorityContextInferrerCandidate(false, false, []string{"datapoint"}), + "datapoint": newDummyPriorityContextInferrerCandidate(false, false, []string{}), + }, + expected: "", + }, + { + name: "inferred path context with missing function and no lower context", + priority: []string{"datapoint", "metric"}, + statements: []string{`set(metric.is_foo, true) where metric.name == "foo"`}, + candidates: map[string]*priorityContextInferrerCandidate{ + "metric": newDummyPriorityContextInferrerCandidate(false, true, []string{}), + }, + expected: "", + }, + { + name: "inferred path context with missing enum", + priority: []string{"foo", "bar"}, + statements: []string{`set(foo.name, FOO) where IsFoo() == true`}, + candidates: map[string]*priorityContextInferrerCandidate{ + "foo": newDummyPriorityContextInferrerCandidate(true, false, []string{"foo", "bar"}), + "bar": newDummyPriorityContextInferrerCandidate(true, true, []string{}), + }, + expected: "bar", + }, + { + name: "unknown context candidate inferred from paths", + priority: []string{"unknown"}, + statements: []string{`set(unknown.count, 0)`}, + candidates: map[string]*priorityContextInferrerCandidate{}, + expected: "", + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - inferrer := newPriorityContextInferrer(tt.priority) + inferrer := newPriorityContextInferrer( + tt.candidates, + withContextInferrerPriorities(tt.priority), + ) inferredContext, err := inferrer.infer(tt.statements) require.NoError(t, err) assert.Equal(t, tt.expected, inferredContext) @@ -62,7 +152,7 @@ func Test_NewPriorityContextInferrer_Infer(t *testing.T) { } func Test_NewPriorityContextInferrer_InvalidStatement(t *testing.T) { - inferrer := newPriorityContextInferrer([]string{"foo"}) + inferrer := newPriorityContextInferrer(map[string]*priorityContextInferrerCandidate{}) statements := []string{"set(foo.field,"} _, err := inferrer.infer(statements) require.ErrorContains(t, err, "unexpected token") @@ -71,8 +161,8 @@ func Test_NewPriorityContextInferrer_InvalidStatement(t *testing.T) { func Test_DefaultPriorityContextInferrer(t *testing.T) { expectedPriority := []string{ "log", - "metric", "datapoint", + "metric", "spanevent", "span", "resource", @@ -80,7 +170,7 @@ func Test_DefaultPriorityContextInferrer(t *testing.T) { "instrumentation_scope", } - inferrer := defaultPriorityContextInferrer().(*priorityContextInferrer) + inferrer := newPriorityContextInferrer(map[string]*priorityContextInferrerCandidate{}).(*priorityContextInferrer) require.NotNil(t, inferrer) for pri, ctx := range expectedPriority { diff --git a/pkg/ottl/grammar.go b/pkg/ottl/grammar.go index a1e5eb53a81d..b7d6743f4afa 100644 --- a/pkg/ottl/grammar.go +++ b/pkg/ottl/grammar.go @@ -213,6 +213,7 @@ type converter struct { } func (c *converter) accept(v grammarVisitor) { + v.visitConverter(c) if c.Arguments != nil { for _, a := range c.Arguments { a.accept(v) @@ -525,6 +526,7 @@ func (e *grammarCustomError) Unwrap() []error { type grammarVisitor interface { visitPath(v *path) visitEditor(v *editor) + visitConverter(v *converter) visitValue(v *value) visitMathExprLiteral(v *mathExprLiteral) } @@ -549,6 +551,8 @@ func (g *grammarCustomErrorsVisitor) visitPath(_ *path) {} func (g *grammarCustomErrorsVisitor) visitValue(_ *value) {} +func (g *grammarCustomErrorsVisitor) visitConverter(_ *converter) {} + func (g *grammarCustomErrorsVisitor) visitEditor(v *editor) { if v.Keys != nil { g.add(fmt.Errorf("only paths and converters may be indexed, not editors, but got %s%s", v.Function, buildOriginalKeysText(v.Keys))) diff --git a/pkg/ottl/parser_collection.go b/pkg/ottl/parser_collection.go index 72d0d6abb3f0..c257e8eb198c 100644 --- a/pkg/ottl/parser_collection.go +++ b/pkg/ottl/parser_collection.go @@ -131,11 +131,13 @@ type parserCollectionParser struct { // // Experimental: *NOTE* this API is subject to change or removal in the future. type ParserCollection[R any] struct { - contextParsers map[string]*parserCollectionParser - contextInferrer contextInferrer - modifiedStatementLogging bool - Settings component.TelemetrySettings - ErrorMode ErrorMode + contextParsers map[string]*parserCollectionParser + contextInferrer contextInferrer + contextInferrerCandidates map[string]*priorityContextInferrerCandidate + candidatesLowerContexts map[string][]string + modifiedStatementLogging bool + Settings component.TelemetrySettings + ErrorMode ErrorMode } // ParserCollectionOption is a configurable ParserCollection option. @@ -150,10 +152,13 @@ func NewParserCollection[R any]( settings component.TelemetrySettings, options ...ParserCollectionOption[R], ) (*ParserCollection[R], error) { + contextInferrerCandidates := map[string]*priorityContextInferrerCandidate{} pc := &ParserCollection[R]{ - Settings: settings, - contextParsers: map[string]*parserCollectionParser{}, - contextInferrer: defaultPriorityContextInferrer(), + Settings: settings, + contextParsers: map[string]*parserCollectionParser{}, + contextInferrer: newPriorityContextInferrer(contextInferrerCandidates), + contextInferrerCandidates: contextInferrerCandidates, + candidatesLowerContexts: map[string][]string{}, } for _, op := range options { @@ -211,10 +216,32 @@ func WithParserCollectionContext[K any, R any]( ottlParser: newParserWrapper[K](parser), statementsConverter: newStatementsConverterWrapper(converter), } + + for lowerContext := range parser.pathContextNames { + if lowerContext != context { + mp.candidatesLowerContexts[lowerContext] = append(mp.candidatesLowerContexts[lowerContext], context) + } + } + + mp.contextInferrerCandidates[context] = &priorityContextInferrerCandidate{ + hasEnumSymbol: func(enum *EnumSymbol) bool { + _, err := parser.enumParser(enum) + return err == nil + }, + hasFunctionName: func(name string) bool { + _, ok := parser.functions[name] + return ok + }, + getLowerContexts: mp.getLowerContexts, + } return nil } } +func (pc *ParserCollection[R]) getLowerContexts(context string) []string { + return pc.candidatesLowerContexts[context] +} + // WithParserCollectionErrorMode has no effect on the ParserCollection, but might be used // by the ParsedStatementConverter functions to handle/create StatementSequence. // @@ -255,7 +282,12 @@ func (pc *ParserCollection[R]) ParseStatements(statements StatementsGetter) (R, } if inferredContext == "" { - return *new(R), fmt.Errorf("unable to infer context from statements [%v], path's first segment must be a valid context name", statementsValues) + return *new(R), fmt.Errorf("unable to infer context from statements %+q, path's first segment must be a valid context name: %+q", statementsValues, pc.supportedContextNames()) + } + + _, ok := pc.contextParsers[inferredContext] + if !ok { + return *new(R), fmt.Errorf(`context "%s" inferred from the statements %+q is not a supported context: %+q`, inferredContext, statementsValues, pc.supportedContextNames()) } return pc.ParseStatementsWithContext(inferredContext, statements, false) @@ -332,3 +364,11 @@ func (pc *ParserCollection[R]) logModifiedStatements(originalStatements, modifie pc.Settings.Logger.Info("one or more statements were modified to include their paths context, please rewrite them accordingly", zap.Dict("statements", fields...)) } } + +func (pc *ParserCollection[R]) supportedContextNames() []string { + contextsNames := make([]string, 0, len(pc.contextParsers)) + for k := range pc.contextParsers { + contextsNames = append(contextsNames, k) + } + return contextsNames +} diff --git a/pkg/ottl/parser_collection_test.go b/pkg/ottl/parser_collection_test.go index 841f3a5fab60..df33f4ba05c3 100644 --- a/pkg/ottl/parser_collection_test.go +++ b/pkg/ottl/parser_collection_test.go @@ -93,6 +93,35 @@ func Test_WithParserCollectionContext_UnsupportedContext(t *testing.T) { require.ErrorContains(t, err, `context "bar" must be a valid "*ottl.Parser[interface {}]" path context name`) } +func Test_WithParserCollectionContext_contextInferrerCandidates(t *testing.T) { + pc, err := NewParserCollection[any](component.TelemetrySettings{}, + WithParserCollectionContext("foo", mockParser(t, WithPathContextNames[any]([]string{"foo", "bar"})), newNopParsedStatementConverter[any]()), + WithParserCollectionContext("bar", mockParser(t, WithPathContextNames[any]([]string{"bar"})), newNopParsedStatementConverter[any]()), + ) + require.NoError(t, err) + require.NotNil(t, pc.contextInferrer) + require.Contains(t, pc.contextInferrerCandidates, "foo") + + validEnumSymbol := EnumSymbol("TEST_ENUM") + invalidEnumSymbol := EnumSymbol("DUMMY") + + fooCandidate := pc.contextInferrerCandidates["foo"] + assert.NotNil(t, fooCandidate) + assert.True(t, fooCandidate.hasFunctionName("set")) + assert.False(t, fooCandidate.hasFunctionName("dummy")) + assert.True(t, fooCandidate.hasEnumSymbol(&validEnumSymbol)) + assert.False(t, fooCandidate.hasEnumSymbol(&invalidEnumSymbol)) + assert.Nil(t, fooCandidate.getLowerContexts("foo")) + + barCandidate := pc.contextInferrerCandidates["bar"] + assert.NotNil(t, barCandidate) + assert.True(t, barCandidate.hasFunctionName("set")) + assert.False(t, barCandidate.hasFunctionName("dummy")) + assert.True(t, barCandidate.hasEnumSymbol(&validEnumSymbol)) + assert.False(t, barCandidate.hasEnumSymbol(&invalidEnumSymbol)) + assert.Equal(t, []string{"foo"}, barCandidate.getLowerContexts("bar")) +} + func Test_WithParserCollectionErrorMode(t *testing.T) { pc, err := NewParserCollection[any]( componenttest.NewNopTelemetrySettings(), @@ -253,14 +282,18 @@ func Test_ParseStatements_ContextInferenceError(t *testing.T) { } func Test_ParseStatements_UnknownContextError(t *testing.T) { - pc, err := NewParserCollection[any](component.TelemetrySettings{}) + pc, err := NewParserCollection[any](component.TelemetrySettings{}, + WithParserCollectionContext("bar", mockParser(t, WithPathContextNames[any]([]string{"bar"})), newNopParsedStatementConverter[any]()), + WithParserCollectionContext("te", mockParser(t, WithPathContextNames[any]([]string{"te"})), newNopParsedStatementConverter[any]()), + ) require.NoError(t, err) pc.contextInferrer = &mockStaticContextInferrer{"foo"} statements := mockStatementsGetter{values: []string{`set(foo.attributes["bar"], "foo")`}} _, err = pc.ParseStatements(statements) - assert.ErrorContains(t, err, `unknown context "foo"`) + assert.ErrorContains(t, err, `context "foo" inferred from the statements`) + assert.ErrorContains(t, err, "is not a supported context") } func Test_ParseStatements_ParseStatementsError(t *testing.T) { diff --git a/pkg/ottl/paths.go b/pkg/ottl/paths.go index dbb66ee7c994..8f6c2c15a72d 100644 --- a/pkg/ottl/paths.go +++ b/pkg/ottl/paths.go @@ -9,6 +9,7 @@ type grammarPathVisitor struct { } func (v *grammarPathVisitor) visitEditor(_ *editor) {} +func (v *grammarPathVisitor) visitConverter(_ *converter) {} func (v *grammarPathVisitor) visitValue(_ *value) {} func (v *grammarPathVisitor) visitMathExprLiteral(_ *mathExprLiteral) {}