From 909b1d33b307462a9687b2b47ce19e4baed90caf Mon Sep 17 00:00:00 2001
From: Edmo Vamerlatti Costa <11836452+edmocosta@users.noreply.github.com>
Date: Thu, 31 Oct 2024 15:55:15 +0100
Subject: [PATCH] [pkg/ottl] Add parser utility to rewrite statements appending
 missing paths context (#35716)

<!--Ex. Fixing a bug - Describe the bug and how this fixes the issue.
Ex. Adding a feature - Explain what this achieves.-->
#### Description

This PR is part of
https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/29017,
and adds the `ottl.Parser[K].AppendStatementPathsContext` function,
allowing components to rewrite statements appending missing `ottl.path`
context names.

For examples, the following context-less statement:

```
set(value, 1) where name == attributes["foo.name"]
```

Would be rewritten using the `span` context as:

```

set(span.value, 1) where span.name == span.attributes["foo.name"]
```

**Why do we need to rewrite statements?**

This utility will be used during the transition from structured OTTL
statements to flat statements.
Components such as the `transformprocessor` will leverage it to support
both configuration styles, without forcing
users to adapt/rewrite their existing config files.

Once the component turns on the `ottl.Parser[K]` path's context
validation, new configuration style usages will be validated, requiring
all paths to have a context prefix, and old configuration styles will
automatically rewrite the statements using this function.

For more details, please have a look at the complete
[draft](https://github.com/open-telemetry/opentelemetry-collector-contrib/pull/35050)
implementation.

<!-- Issue number (e.g. #1234) or full URL to issue, if applicable. -->
#### Link to tracking issue

https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/29017

<!--Describe what testing was performed and which tests were added.-->
#### Testing
Unit tests

<!--Describe the documentation added.-->
#### Documentation
No changes

<!--Please delete paragraphs that you did not use before submitting.-->
---
 pkg/ottl/parser.go      |  53 ++++++++++++++++
 pkg/ottl/parser_test.go | 137 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 190 insertions(+)

diff --git a/pkg/ottl/parser.go b/pkg/ottl/parser.go
index ed8457603f7c..f16f0e3b0fbb 100644
--- a/pkg/ottl/parser.go
+++ b/pkg/ottl/parser.go
@@ -7,6 +7,8 @@ import (
 	"context"
 	"errors"
 	"fmt"
+	"sort"
+	"strings"
 
 	"github.com/alecthomas/participle/v2"
 	"go.opentelemetry.io/collector/component"
@@ -195,6 +197,33 @@ func (p *Parser[K]) ParseCondition(condition string) (*Condition[K], error) {
 	}, nil
 }
 
+// prependContextToStatementPaths changes the given OTTL statement adding the context name prefix
+// to all context-less paths. No modifications are performed for paths which [Path.Context]
+// value matches any WithPathContextNames value.
+// The context argument must be valid WithPathContextNames value, otherwise an error is returned.
+func (p *Parser[K]) prependContextToStatementPaths(context string, statement string) (string, error) {
+	if _, ok := p.pathContextNames[context]; !ok {
+		return statement, fmt.Errorf(`unknown context "%s" for parser %T, valid options are: %s`, context, p, p.buildPathContextNamesText(""))
+	}
+	parsed, err := parseStatement(statement)
+	if err != nil {
+		return "", err
+	}
+	paths := getParsedStatementPaths(parsed)
+	if len(paths) == 0 {
+		return statement, nil
+	}
+
+	var missingContextOffsets []int
+	for _, it := range paths {
+		if _, ok := p.pathContextNames[it.Context]; !ok {
+			missingContextOffsets = append(missingContextOffsets, it.Pos.Offset)
+		}
+	}
+
+	return insertContextIntoStatementOffsets(context, statement, missingContextOffsets)
+}
+
 var parser = newParser[parsedStatement]()
 var conditionParser = newParser[booleanExpression]()
 
@@ -226,6 +255,30 @@ func parseCondition(raw string) (*booleanExpression, error) {
 	return parsed, nil
 }
 
+func insertContextIntoStatementOffsets(context string, statement string, offsets []int) (string, error) {
+	if len(offsets) == 0 {
+		return statement, nil
+	}
+
+	contextPrefix := context + "."
+	var sb strings.Builder
+	sb.Grow(len(statement) + (len(contextPrefix) * len(offsets)))
+
+	sort.Ints(offsets)
+	left := 0
+	for _, offset := range offsets {
+		if offset < 0 || offset > len(statement) {
+			return statement, fmt.Errorf(`failed to insert context "%s" into statement "%s": offset %d is out of range`, context, statement, offset)
+		}
+		sb.WriteString(statement[left:offset])
+		sb.WriteString(contextPrefix)
+		left = offset
+	}
+	sb.WriteString(statement[left:])
+
+	return sb.String(), nil
+}
+
 // newParser returns a parser that can be used to read a string into a parsedStatement. An error will be returned if the string
 // is not formatted for the DSL.
 func newParser[G any]() *participle.Parser[G] {
diff --git a/pkg/ottl/parser_test.go b/pkg/ottl/parser_test.go
index e8bb93af6f9b..9e2e09a10e5f 100644
--- a/pkg/ottl/parser_test.go
+++ b/pkg/ottl/parser_test.go
@@ -2714,3 +2714,140 @@ func Test_ConditionSequence_Eval_Error(t *testing.T) {
 		})
 	}
 }
+
+func Test_prependContextToStatementPaths_InvalidStatement(t *testing.T) {
+	ps, err := NewParser(
+		CreateFactoryMap[any](),
+		testParsePath[any],
+		componenttest.NewNopTelemetrySettings(),
+		WithEnumParser[any](testParseEnum),
+		WithPathContextNames[any]([]string{"foo", "bar"}),
+	)
+	require.NoError(t, err)
+	_, err = ps.prependContextToStatementPaths("foo", "this is invalid")
+	require.ErrorContains(t, err, `statement has invalid syntax`)
+}
+
+func Test_prependContextToStatementPaths_InvalidContext(t *testing.T) {
+	ps, err := NewParser(
+		CreateFactoryMap[any](),
+		testParsePath[any],
+		componenttest.NewNopTelemetrySettings(),
+		WithEnumParser[any](testParseEnum),
+		WithPathContextNames[any]([]string{"foo", "bar"}),
+	)
+	require.NoError(t, err)
+	_, err = ps.prependContextToStatementPaths("foobar", "set(foo, 1)")
+	require.ErrorContains(t, err, `unknown context "foobar" for parser`)
+}
+
+func Test_prependContextToStatementPaths_Success(t *testing.T) {
+	type mockSetArguments[K any] struct {
+		Target Setter[K]
+		Value  Getter[K]
+	}
+
+	mockSetFactory := NewFactory("set", &mockSetArguments[any]{}, func(_ FunctionContext, _ Arguments) (ExprFunc[any], error) {
+		return func(_ context.Context, _ any) (any, error) {
+			return nil, nil
+		}, nil
+	})
+
+	tests := []struct {
+		name             string
+		statement        string
+		context          string
+		pathContextNames []string
+		expected         string
+	}{
+		{
+			name:             "no paths",
+			statement:        `set("foo", 1)`,
+			context:          "bar",
+			pathContextNames: []string{"bar"},
+			expected:         `set("foo", 1)`,
+		},
+		{
+			name:             "single path with context",
+			statement:        `set(span.value, 1)`,
+			context:          "span",
+			pathContextNames: []string{"span"},
+			expected:         `set(span.value, 1)`,
+		},
+		{
+			name:             "single path without context",
+			statement:        "set(value, 1)",
+			context:          "span",
+			pathContextNames: []string{"span"},
+			expected:         "set(span.value, 1)",
+		},
+		{
+			name:             "single path with context - multiple context names",
+			statement:        "set(span.value, 1)",
+			context:          "spanevent",
+			pathContextNames: []string{"spanevent", "span"},
+			expected:         "set(span.value, 1)",
+		},
+		{
+			name:             "multiple paths with the same context",
+			statement:        `set(span.value, 1) where span.attributes["foo"] == "foo" and span.id == 1`,
+			context:          "another",
+			pathContextNames: []string{"another", "span"},
+			expected:         `set(span.value, 1) where span.attributes["foo"] == "foo" and span.id == 1`,
+		},
+		{
+			name:             "multiple paths with different contexts",
+			statement:        `set(another.value, 1) where span.attributes["foo"] == "foo" and another.id == 1`,
+			context:          "another",
+			pathContextNames: []string{"another", "span"},
+			expected:         `set(another.value, 1) where span.attributes["foo"] == "foo" and another.id == 1`,
+		},
+		{
+			name:             "multiple paths with and without contexts",
+			statement:        `set(value, 1) where span.attributes["foo"] == "foo" and id == 1`,
+			context:          "spanevent",
+			pathContextNames: []string{"spanevent", "span"},
+			expected:         `set(spanevent.value, 1) where span.attributes["foo"] == "foo" and spanevent.id == 1`,
+		},
+		{
+			name:             "multiple paths without context",
+			statement:        `set(value, 1) where name == attributes["foo.name"]`,
+			context:          "span",
+			pathContextNames: []string{"span"},
+			expected:         `set(span.value, 1) where span.name == span.attributes["foo.name"]`,
+		},
+		{
+			name:             "function path parameter without context",
+			statement:        `set(attributes["test"], "pass") where IsMatch(name, "operation[AC]")`,
+			context:          "log",
+			pathContextNames: []string{"log"},
+			expected:         `set(log.attributes["test"], "pass") where IsMatch(log.name, "operation[AC]")`,
+		},
+		{
+			name:             "function path parameter with context",
+			statement:        `set(attributes["test"], "pass") where IsMatch(resource.name, "operation[AC]")`,
+			context:          "log",
+			pathContextNames: []string{"log", "resource"},
+			expected:         `set(log.attributes["test"], "pass") where IsMatch(resource.name, "operation[AC]")`,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			ps, err := NewParser(
+				CreateFactoryMap[any](mockSetFactory),
+				testParsePath[any],
+				componenttest.NewNopTelemetrySettings(),
+				WithEnumParser[any](testParseEnum),
+				WithPathContextNames[any](tt.pathContextNames),
+			)
+
+			require.NoError(t, err)
+			require.NotNil(t, ps)
+
+			result, err := ps.prependContextToStatementPaths(tt.context, tt.statement)
+			require.NoError(t, err)
+			assert.Equal(t, tt.expected, result)
+		})
+	}
+}