Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add support for utilising text indexes if exits for like operator in pinot based handlers #186

Merged
merged 6 commits into from
Jun 13, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@ public class PinotColumnSpec {

private final List<String> columnNames;
private ValueType type;
private boolean textIndex;

public PinotColumnSpec() {
columnNames = new ArrayList<>();
textIndex = false;
}

public List<String> getColumnNames() {
Expand All @@ -28,4 +30,12 @@ public ValueType getType() {
public void setType(ValueType type) {
this.type = type;
}

public boolean hasTextIndex() {
return textIndex;
}

public void setTextIndex() {
this.textIndex = true;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import static org.hypertrace.core.query.service.QueryRequestUtil.getLogicalColumnName;
import static org.hypertrace.core.query.service.QueryRequestUtil.isAttributeExpressionWithSubpath;
import static org.hypertrace.core.query.service.QueryRequestUtil.isSimpleAttributeExpression;
import static org.hypertrace.core.query.service.api.Expression.ValueCase.COLUMNIDENTIFIER;
import static org.hypertrace.core.query.service.api.Expression.ValueCase.LITERAL;

import com.google.common.base.Joiner;
Expand All @@ -13,6 +12,7 @@
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map.Entry;
import java.util.Optional;
import org.hypertrace.core.query.service.ExecutionContext;
import org.hypertrace.core.query.service.api.Expression;
import org.hypertrace.core.query.service.api.Filter;
Expand All @@ -36,6 +36,7 @@ class QueryRequestToPinotSQLConverter {

private static final String QUESTION_MARK = "?";
private static final String REGEX_OPERATOR = "REGEXP_LIKE";
private static final String TEXT_MATCH_OPERATOR = "TEXT_MATCH";
private static final String MAP_VALUE = "mapValue";
private static final int MAP_KEY_INDEX = 0;
private static final int MAP_VALUE_INDEX = 1;
Expand Down Expand Up @@ -144,9 +145,15 @@ private String convertFilterToString(
} else {
switch (filter.getOperator()) {
case LIKE:
// The like operation in PQL looks like `regexp_like(lhs, rhs)`
/**
* If the text index is not enabled on lhs expression, - the pql looks like
* `regexp_like(lhs, rhs)` else - the pql looks like `text_match(lhs, rhs)`
*/
operator = handleLikeOperatorConversion(filter.getLhs());
Expression rhs =
handleValueConversionForLiteralExpression(filter.getLhs(), filter.getRhs());
rhs = postProcessValueConversionForLikeOperator(operator, rhs);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: rather than taking multiple steps to calculate the RHS in the top level, could you do something like

          operator = handleLikeOperatorConversion(filter.getLhs());
          Expression rhs =
              handleValueConversionForLikeArgument(filter.getLhs(), filter.getRhs());

and then have handleValueConversionForLikeArgument do the delegation.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.


builder.append(operator);
builder.append("(");
builder.append(
Expand Down Expand Up @@ -276,6 +283,29 @@ private String convertOperatorToString(Operator operator) {
}
}

private String handleLikeOperatorConversion(Expression expression) {
Optional<String> logicalColumnName = getLogicalColumnName(expression);
if (logicalColumnName.isPresent()
&& isSimpleAttributeExpression(expression)
&& viewDefinition.hasTextIndex(logicalColumnName.get())) {
return TEXT_MATCH_OPERATOR;
}
return REGEX_OPERATOR;
}

private Expression postProcessValueConversionForLikeOperator(
String likeOperatorStr, Expression rhsExpression) {
if (likeOperatorStr.equals(TEXT_MATCH_OPERATOR)) {
String strValue = "/" + rhsExpression.getLiteral().getValue().getString() + "/";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: this could be simplified a bit by just starting from the input.

Builder builder = rhsExpression.toBuilder()
 if (condition) {
   builder.getLiteralBuilder().getValueBuilder().setString(...);
 }
return builder.build();

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

Value value = Value.newBuilder().setValueType(ValueType.STRING).setString(strValue).build();

return Expression.newBuilder()
.setLiteral(LiteralConstant.newBuilder().setValue(value))
.build();
}
return rhsExpression;
}

private String convertExpressionToString(
Expression expression, Builder paramsBuilder, ExecutionContext executionContext) {
switch (expression.getValueCase()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ public class ViewDefinition {
private static final String MAP_FIELDS_CONFIG_KEY = "mapFields";
private static final String FILTERS_CONFIG_KEY = "filters";
private static final String COLUMN_CONFIG_KEY = "column";
private static final String TEXT_INDEXES_FIELDS_CONFIG_KEY = "textIndexes";

private static final long DEFAULT_RETENTION_TIME = TimeUnit.DAYS.toMillis(8);
private static final long DEFAULT_TIME_GRANULARITY = TimeUnit.MINUTES.toMillis(1);
Expand Down Expand Up @@ -102,6 +103,13 @@ public static ViewDefinition parse(Config config, String tenantColumnName) {
? config.getStringList(BYTES_FIELDS_CONFIG_KEY)
: List.of());

// get all the String fields that have enabled text Indexes
final Set<String> textIndexFields =
new HashSet<>(
config.hasPath(TEXT_INDEXES_FIELDS_CONFIG_KEY)
? config.getStringList(TEXT_INDEXES_FIELDS_CONFIG_KEY)
: List.of());

Map<String, PinotColumnSpec> columnSpecMap = new HashMap<>();
for (Map.Entry<String, String> entry : fieldMap.entrySet()) {
String logicalName = entry.getKey();
Expand All @@ -120,6 +128,11 @@ public static ViewDefinition parse(Config config, String tenantColumnName) {
spec.addColumnName(physName);
spec.setType(ValueType.STRING);
}

if (textIndexFields.contains(physName)) {
spec.setTextIndex();
aaron-steinfeld marked this conversation as resolved.
Show resolved Hide resolved
}

columnSpecMap.put(logicalName, spec);
}

Expand Down Expand Up @@ -175,6 +188,10 @@ public ValueType getColumnType(String logicalName) {
return columnSpecMap.get(logicalName).getType();
}

public boolean hasTextIndex(String logicalName) {
return columnSpecMap.get(logicalName).hasTextIndex();
}

public String getKeyColumnNameForMap(String logicalName) {
List<String> keys = findPhysicalNameWithSuffix(logicalName, MAP_KEYS_SUFFIX);
Preconditions.checkArgument(keys.size() <= 1);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import static java.util.Objects.requireNonNull;
import static org.hypertrace.core.query.service.QueryRequestBuilderUtils.createAliasedFunctionExpression;
import static org.hypertrace.core.query.service.QueryRequestBuilderUtils.createColumnExpression;
import static org.hypertrace.core.query.service.QueryRequestBuilderUtils.createCompositeFilter;
import static org.hypertrace.core.query.service.QueryRequestBuilderUtils.createCountByColumnSelection;
import static org.hypertrace.core.query.service.QueryRequestBuilderUtils.createEqualsFilter;
import static org.hypertrace.core.query.service.QueryRequestBuilderUtils.createFunctionExpression;
Expand Down Expand Up @@ -951,6 +952,90 @@ public void testQueryWithAverageRateInOrderBy() {
executionContext);
}

@Test
public void testQueryWithLikeOperatorForServiceNameNotHavingTextIndex() {
Builder builder = QueryRequest.newBuilder();

Filter startTimeFilter =
createTimeFilter("Span.start_time_millis", Operator.GT, 1570658506605L);
Filter endTimeFilter = createTimeFilter("Span.end_time_millis", Operator.LT, 1570744906673L);

Filter likeFilter =
Filter.newBuilder()
.setOperator(Operator.LIKE)
.setLhs(createColumnExpression("Span.serviceName"))
.setRhs(createStringLiteralValueExpression("abc"))
.build();
builder.setFilter(likeFilter);

builder
.addSelection(createColumnExpression("Span.id"))
.addSelection(createColumnExpression("Span.serviceName"))
.setFilter(createCompositeFilter(Operator.AND, startTimeFilter, endTimeFilter, likeFilter))
.setLimit(15);

ViewDefinition viewDefinition = getDefaultViewDefinition();
defaultMockingForExecutionContext();

assertPQLQuery(
builder.build(),
"select span_id, service_name from spanEventView "
+ "where "
+ viewDefinition.getTenantIdColumn()
+ " = '"
+ TENANT_ID
+ "'"
+ " and "
+ "( start_time_millis > 1570658506605 and end_time_millis < 1570744906673"
+ " and "
+ "regexp_like(service_name,'abc') ) "
+ "limit 15",
viewDefinition,
executionContext);
}

@Test
public void testQueryWithLikeOperatorForResponseBodyHavingTextIndex() {
Builder builder = QueryRequest.newBuilder();

Filter startTimeFilter =
createTimeFilter("Span.start_time_millis", Operator.GT, 1570658506605L);
Filter endTimeFilter = createTimeFilter("Span.end_time_millis", Operator.LT, 1570744906673L);

Filter likeFilter =
Filter.newBuilder()
.setOperator(Operator.LIKE)
.setLhs(createColumnExpression("Span.attributes.response_body"))
.setRhs(createStringLiteralValueExpression("abc"))
.build();
builder.setFilter(likeFilter);

builder
.addSelection(createColumnExpression("Span.id"))
.addSelection(createColumnExpression("Span.attributes.response_body"))
.setFilter(createCompositeFilter(Operator.AND, startTimeFilter, endTimeFilter, likeFilter))
.setLimit(15);

ViewDefinition viewDefinition = getDefaultViewDefinition();
defaultMockingForExecutionContext();

assertPQLQuery(
builder.build(),
"select span_id, response_body from spanEventView "
+ "where "
+ viewDefinition.getTenantIdColumn()
+ " = '"
+ TENANT_ID
+ "'"
+ " and "
+ "( start_time_millis > 1570658506605 and end_time_millis < 1570744906673"
+ " and "
+ "text_match(response_body,'/abc/') ) "
+ "limit 15",
viewDefinition,
executionContext);
}

private QueryRequest buildSimpleQueryWithFilter(Filter filter) {
Builder builder = QueryRequest.newBuilder();
builder.addSelection(createColumnExpression("Span.id").build());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
viewName = SpanEventView
mapFields = ["tags", "request_headers"]
bytesFields = ["parent_span_id", "span_id"]
textIndexes = ["response_body"]
fieldMap = {
"Span.tags": "tags",
"Span.id": "span_id",
Expand Down