Skip to content

Commit

Permalink
Added support for accurate distinct count on attributes through config (
Browse files Browse the repository at this point in the history
#178)

Existing config supports configuring the distinct count aggregation function for a given scope for all attributes.
But we might need accurate distinct count on some attributes if the default is set to the approximate function.
Defined a way to configure overrides for such attributes.
  • Loading branch information
singhalprerana authored Jan 11, 2023
1 parent 38dcb3f commit 4525f0c
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ public String convert(
// and reasonably accurate, so support selecting the implementation to use
return this.functionToString(this.toPinotPercentile(function), argumentConverter);
case QUERY_FUNCTION_DISTINCTCOUNT:
return this.functionToString(this.toPinotDistinctCount(function), argumentConverter);
return this.functionToStringForDistinctCount(function, argumentConverter);
case QUERY_FUNCTION_CONCAT:
return this.functionToString(this.toPinotConcat(function), argumentConverter);
case QUERY_FUNCTION_AVGRATE:
Expand Down Expand Up @@ -72,6 +72,12 @@ private String functionToString(
return function.getFunctionName() + "(" + argumentString + ")";
}

private String functionToStringForDistinctCount(
Function function, java.util.function.Function<Expression, String> argumentConverter) {
String columnName = argumentConverter.apply(function.getArgumentsList().get(0));
return this.config.getDistinctCountFunction(columnName) + "(" + columnName + ")";
}

private String functionToStringForAvgRate(
Function function,
java.util.function.Function<Expression, String> argumentConverter,
Expand Down Expand Up @@ -119,12 +125,6 @@ private Function toPinotConcat(Function function) {
return Function.newBuilder(function).setFunctionName(PINOT_CONCAT_FUNCTION).build();
}

private Function toPinotDistinctCount(Function function) {
return Function.newBuilder(function)
.setFunctionName(this.config.getDistinctCountAggregationFunction())
.build();
}

private boolean isHardcodedPercentile(Function function) {
String functionName = function.getFunctionName().toUpperCase();
return functionName.startsWith(QUERY_FUNCTION_PERCENTILE)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,29 @@

import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory;
import java.util.Collections;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;
import javax.annotation.Nonnull;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Value;

@Value
@AllArgsConstructor
@AllArgsConstructor(access = AccessLevel.PACKAGE)
public class PinotFunctionConverterConfig {

private static final String PERCENTILE_AGGREGATION_FUNCTION_CONFIG = "percentileAggFunction";
private static final String DISTINCT_COUNT_AGGREGATION_FUNCTION_CONFIG =
"distinctCountAggFunction";
private static final String DISTINCT_COUNT_AGGREGATION_OVERRIDES = "distinctCountAggOverrides";
private static final String DEFAULT_PERCENTILE_AGGREGATION_FUNCTION = "PERCENTILETDIGEST";
private static final String DEFAULT_DISTINCT_COUNT_AGGREGATION_FUNCTION = "DISTINCTCOUNT";

String percentileAggregationFunction;
String distinctCountAggregationFunction;
@Nonnull Map<String, String> distinctCountAggOverrides;

public PinotFunctionConverterConfig(Config config) {
if (config.hasPath(PERCENTILE_AGGREGATION_FUNCTION_CONFIG)) {
Expand All @@ -30,9 +38,24 @@ public PinotFunctionConverterConfig(Config config) {
} else {
this.distinctCountAggregationFunction = DEFAULT_DISTINCT_COUNT_AGGREGATION_FUNCTION;
}
if (config.hasPath(DISTINCT_COUNT_AGGREGATION_OVERRIDES)) {
Config overridesConfig = config.getConfig(DISTINCT_COUNT_AGGREGATION_OVERRIDES);
this.distinctCountAggOverrides =
overridesConfig.entrySet().stream()
.collect(
Collectors.toMap(
entry -> entry.getKey(), entry -> overridesConfig.getString(entry.getKey())));
} else {
this.distinctCountAggOverrides = Collections.emptyMap();
}
}

public PinotFunctionConverterConfig() {
this(ConfigFactory.empty());
}

public String getDistinctCountFunction(String arg) {
return Optional.ofNullable(distinctCountAggOverrides.get(arg))
.orElse(distinctCountAggregationFunction);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.when;

import com.typesafe.config.ConfigFactory;
import java.time.Duration;
import java.util.Arrays;
import java.util.Collections;
import java.util.Optional;
import java.util.stream.Collectors;
import org.hypertrace.core.query.service.ExecutionContext;
Expand Down Expand Up @@ -104,7 +106,8 @@ void acceptsCustomPercentileFunctions() {

assertEquals(
expected,
new PinotFunctionConverter(new PinotFunctionConverterConfig("CUSTOMPERCENTILE", null))
new PinotFunctionConverter(
new PinotFunctionConverterConfig("CUSTOMPERCENTILE", null, Collections.emptyMap()))
.convert(mockingExecutionContext, percentileFunction, this.mockArgumentConverter));
}

Expand Down Expand Up @@ -233,25 +236,43 @@ void convertAvgRateFunction() {

@Test
void convertsDistinctCountFunction() {
Expression column = createColumnExpression("foo").build();
Expression column1 = createColumnExpression("foo").build();
Expression column2 = createColumnExpression("bar").build();

when(this.mockArgumentConverter.apply(column)).thenReturn("foo");
when(this.mockArgumentConverter.apply(column1)).thenReturn("foo");
when(this.mockArgumentConverter.apply(column2)).thenReturn("bar");

assertEquals(
"DISTINCTCOUNT(foo)",
new PinotFunctionConverter()
.convert(
mockingExecutionContext,
buildFunction(QUERY_FUNCTION_DISTINCTCOUNT, column.toBuilder()),
buildFunction(QUERY_FUNCTION_DISTINCTCOUNT, column1.toBuilder()),
this.mockArgumentConverter));

assertEquals(
"CUSTOM_DC(foo)",
new PinotFunctionConverter(new PinotFunctionConverterConfig(null, "CUSTOM_DC"))
"DISTINCTCOUNT(bar)",
new PinotFunctionConverter()
.convert(
mockingExecutionContext,
buildFunction(QUERY_FUNCTION_DISTINCTCOUNT, column.toBuilder()),
buildFunction(QUERY_FUNCTION_DISTINCTCOUNT, column2.toBuilder()),
this.mockArgumentConverter));

PinotFunctionConverter converter =
new PinotFunctionConverter(
new PinotFunctionConverterConfig(
ConfigFactory.parseString("distinctCountAggOverrides = {foo=CUSTOM_DC, xyz=XYZ}")));
assertEquals(
"CUSTOM_DC(foo)",
converter.convert(
mockingExecutionContext,
buildFunction(QUERY_FUNCTION_DISTINCTCOUNT, column1.toBuilder()),
this.mockArgumentConverter));
assertEquals(
"DISTINCTCOUNT(bar)",
converter.convert(
mockingExecutionContext,
buildFunction(QUERY_FUNCTION_DISTINCTCOUNT, column2.toBuilder()),
this.mockArgumentConverter));
}

@Test
Expand Down

0 comments on commit 4525f0c

Please sign in to comment.