Skip to content

Commit

Permalink
Fact Table Query Optimization (#1923)
Browse files Browse the repository at this point in the history
Co-authored-by: Luke Sonnet <[email protected]>
  • Loading branch information
jdorn and lukesonnet authored Jan 13, 2024
1 parent 004c1de commit 46d2c52
Show file tree
Hide file tree
Showing 62 changed files with 2,549 additions and 1,156 deletions.
14 changes: 14 additions & 0 deletions docs/docs/experimentation-analysis/fact-tables.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,20 @@ Keep in mind, these two things are answering slightly different questions.
`How much longer do people stay after viewing the experiment?` vs `How much longer is an average session that includes the experiment?`.
The first question is more direct and often a more strict test of your hypothesis, but it may not be worth the extra running time.

## Fact Table Query Optimization

GrowthBook Enterprise customers can enable Fact Table Query Optimization for faster, more efficient queries.

If multiple metrics from the same Fact Table are added to an experiment, they will be combined into a single SQL query. For data sourcees with usage-based billing, this can result in dramatic cost savings.

There are some restrictions that limit when this optimization can be performed:

- Ratio metrics where the numerator and denominator are part of different Fact Tables are always excluded from this optimization
- If `Ignore In-Progress Conversions` is set for an experiment, optimization is disabled for all metrics
- If you are using MySQL and a metric has percentile capping, it will be excluded from optimization

In all other cases, this optimization is enabled by default for all Enterprise customers. It can be disabled under **Settings -> General -> Experiment Settings**. When disabled, a separate SQL query will always be run for every individual metric.

## Migrating Existing Metrics to Fact Tables

Fact Tables are brand new to GrowthBook, first launching in October 2023. Eventually, we see Fact Tables completely replacing the existing way of defining metrics. Right now though, Fact Tables are still in early preview mode and there are some rough edges.
Expand Down
4 changes: 0 additions & 4 deletions packages/back-end/src/app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -419,10 +419,6 @@ app.get(
experimentsController.getSnapshotWithDimension
);
app.post("/experiment/:id/snapshot", experimentsController.postSnapshot);
app.post(
"/experiment/:id/snapshot/:phase/preview",
experimentsController.previewManualSnapshot
);
app.post("/experiment/:id", experimentsController.postExperiment);
app.delete("/experiment/:id", experimentsController.deleteExperiment);
app.get("/experiment/:id/watchers", experimentsController.getWatchingUsers);
Expand Down
5 changes: 3 additions & 2 deletions packages/back-end/src/controllers/datasources.ts
Original file line number Diff line number Diff line change
Expand Up @@ -801,7 +801,7 @@ export async function postDimensionSlices(
queryId,
});

const queryRunner = new DimensionSlicesQueryRunner(model, integration);
const queryRunner = new DimensionSlicesQueryRunner(model, integration, org);
const outputmodel = await queryRunner.startAnalysis({
exposureQueryId: queryId,
lookbackDays: Number(lookbackDays) ?? 30,
Expand Down Expand Up @@ -839,7 +839,8 @@ export async function cancelDimensionSlices(

const queryRunner = new DimensionSlicesQueryRunner(
dimensionSlices,
integration
integration,
org
);
await queryRunner.cancelQueries();

Expand Down
67 changes: 9 additions & 58 deletions packages/back-end/src/controllers/experiments.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ import {
getDefaultExperimentAnalysisSettings,
getExperimentMetricById,
getLinkedFeatureInfo,
getManualSnapshotData,
} from "../services/experiments";
import { MetricInterface, MetricStats } from "../../types/metric";
import {
Expand Down Expand Up @@ -1597,60 +1596,6 @@ export async function deleteExperiment(
});
}

export async function previewManualSnapshot(
req: AuthRequest<
{
users: number[];
metrics: { [key: string]: MetricStats[] };
},
{ id: string; phase: string }
>,
res: Response
) {
const { id, phase } = req.params;
const { org } = getOrgFromReq(req);

const experiment = await getExperimentById(org.id, id);

if (!experiment) {
res.status(404).json({
status: 404,
message: "Experiment not found",
});
return;
}

const phaseIndex = parseInt(phase);
if (!experiment.phases[phaseIndex]) {
res.status(404).json({
status: 404,
message: "Phase not found",
});
return;
}

try {
const metricMap = await getMetricMap(org.id);

const data = await getManualSnapshotData(
experiment,
phaseIndex,
req.body.users,
req.body.metrics,
metricMap
);
res.status(200).json({
status: 200,
snapshot: data,
});
} catch (e) {
res.status(400).json({
status: 400,
message: e.message,
});
}
}

export async function cancelSnapshot(
req: AuthRequest<null, { id: string }>,
res: Response
Expand Down Expand Up @@ -1680,7 +1625,11 @@ export async function cancelSnapshot(
snapshot.organization,
snapshot.settings.datasourceId
);
const queryRunner = new ExperimentResultsQueryRunner(snapshot, integration);
const queryRunner = new ExperimentResultsQueryRunner(
snapshot,
integration,
org
);
await queryRunner.cancelQueries();
await deleteSnapshotById(org.id, snapshot.id);

Expand Down Expand Up @@ -2117,7 +2066,8 @@ export async function cancelPastExperiments(
);
const queryRunner = new PastExperimentsQueryRunner(
pastExperiments,
integration
integration,
org
);
await queryRunner.cancelQueries();

Expand Down Expand Up @@ -2213,7 +2163,8 @@ export async function postPastExperiments(
if (needsRun) {
const queryRunner = new PastExperimentsQueryRunner(
pastExperiments,
integration
integration,
org
);
pastExperiments = await queryRunner.startAnalysis({
from: start,
Expand Down
4 changes: 2 additions & 2 deletions packages/back-end/src/controllers/metrics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ export async function cancelMetricAnalysis(
org.id,
metric.datasource
);
const queryRunner = new MetricAnalysisQueryRunner(metric, integration);
const queryRunner = new MetricAnalysisQueryRunner(metric, integration, org);
await queryRunner.cancelQueries();

res.status(200).json({
Expand Down Expand Up @@ -226,7 +226,7 @@ export async function postMetricAnalysis(
try {
await refreshMetric(
metric,
org.id,
org,
req.organization?.settings?.metricAnalysisDays
);

Expand Down
6 changes: 3 additions & 3 deletions packages/back-end/src/controllers/reports.ts
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ export async function refreshReport(
report.args.datasource,
true
);
const queryRunner = new ReportQueryRunner(report, integration, useCache);
const queryRunner = new ReportQueryRunner(report, integration, org, useCache);

const updatedReport = await queryRunner.startAnalysis({
metricMap,
Expand Down Expand Up @@ -300,7 +300,7 @@ export async function putReport(
updatedReport.args.datasource,
true
);
const queryRunner = new ReportQueryRunner(updatedReport, integration);
const queryRunner = new ReportQueryRunner(updatedReport, integration, org);

await queryRunner.startAnalysis({
metricMap,
Expand Down Expand Up @@ -333,7 +333,7 @@ export async function cancelReport(
org.id,
report.args.datasource
);
const queryRunner = new ReportQueryRunner(report, integration);
const queryRunner = new ReportQueryRunner(report, integration, org);
await queryRunner.cancelQueries();

res.status(200).json({ status: 200 });
Expand Down
19 changes: 15 additions & 4 deletions packages/back-end/src/integrations/Athena.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,24 @@ export default class Athena extends SqlIntegration {
return `CAST(${col} as double)`;
}
percentileCapSelectClause(
capPercentile: number,
metricTable: string
values: {
valueCol: string;
outputCol: string;
percentile: number;
}[],
metricTable: string,
where: string = ""
): string {
return `
SELECT APPROX_PERCENTILE(value, ${capPercentile}) AS cap_value
SELECT
${values
.map(
(v) =>
`APPROX_PERCENTILE(${v.valueCol}, ${v.percentile}) AS ${v.outputCol}`
)
.join(",\n")}
FROM ${metricTable}
WHERE value IS NOT NULL
${where}
`;
}
getDefaultDatabase() {
Expand Down
28 changes: 19 additions & 9 deletions packages/back-end/src/integrations/BigQuery.ts
Original file line number Diff line number Diff line change
Expand Up @@ -151,17 +151,27 @@ export default class BigQuery extends SqlIntegration {
return `CAST(${column} as DATETIME)`;
}
percentileCapSelectClause(
capPercentile: number,
metricTable: string
values: {
valueCol: string;
outputCol: string;
percentile: number;
}[],
metricTable: string,
where: string = ""
): string {
return `
SELECT
APPROX_QUANTILES(value, 100000)[OFFSET(${Math.trunc(
100000 * capPercentile
)})] AS cap_value
FROM ${metricTable}
WHERE value IS NOT NULL
`;
SELECT
${values
.map(
(v) =>
`APPROX_QUANTILES(${v.valueCol}, 100000)[OFFSET(${Math.trunc(
100000 * v.percentile
)})] AS ${v.outputCol}`
)
.join(",\n")}
FROM ${metricTable}
${where}
`;
}
getDefaultDatabase() {
return this.params.projectId || "";
Expand Down
18 changes: 14 additions & 4 deletions packages/back-end/src/integrations/ClickHouse.ts
Original file line number Diff line number Diff line change
Expand Up @@ -87,13 +87,23 @@ export default class ClickHouse extends SqlIntegration {
return `toFloat64(${col})`;
}
percentileCapSelectClause(
capPercentile: number,
metricTable: string
values: {
valueCol: string;
outputCol: string;
percentile: number;
}[],
metricTable: string,
where: string = ""
): string {
return `
SELECT quantile(${capPercentile})(value) AS cap_value
SELECT
${values
.map(
(v) => `quantile(${v.percentile})(${v.valueCol}) AS ${v.outputCol}`
)
.join(",\n")}
FROM ${metricTable}
WHERE value IS NOT NULL
${where}
`;
}
getInformationSchemaWhereClause(): string {
Expand Down
20 changes: 15 additions & 5 deletions packages/back-end/src/integrations/Mssql.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,24 @@ export default class Mssql extends SqlIntegration {
return `CONVERT(VARCHAR(25), ${col}, 121)`;
}
percentileCapSelectClause(
capPercentile: number,
metricTable: string
values: {
valueCol: string;
outputCol: string;
percentile: number;
}[],
metricTable: string,
where: string = ""
): string {
return `
SELECT
APPROX_PERCENTILE_CONT(${capPercentile}) WITHIN GROUP (ORDER BY value) AS cap_value
SELECT
${values
.map(
(v) =>
`APPROX_PERCENTILE_CONT(${v.percentile}) WITHIN GROUP (ORDER BY ${v.valueCol}) AS ${v.outputCol}`
)
.join(",\n")}
FROM ${metricTable}
WHERE value IS NOT NULL
${where}
`;
}
getDefaultDatabase() {
Expand Down
30 changes: 22 additions & 8 deletions packages/back-end/src/integrations/Mysql.ts
Original file line number Diff line number Diff line change
Expand Up @@ -72,21 +72,35 @@ export default class Mysql extends SqlIntegration {
return `CAST(${col} AS DOUBLE)`;
}
percentileCapSelectClause(
capPercentile: number,
metricTable: string
values: {
valueCol: string;
outputCol: string;
percentile: number;
}[],
metricTable: string,
where: string = ""
): string {
if (values.length > 1) {
throw new Error(
"MySQL only supports one percentile capped metric at a time"
);
}

return `
SELECT DISTINCT FIRST_VALUE(value) OVER (
ORDER BY CASE WHEN p <= ${capPercentile} THEN p END DESC
) AS cap_value
SELECT DISTINCT FIRST_VALUE(${values[0].valueCol}) OVER (
ORDER BY CASE WHEN p <= ${values[0].percentile} THEN p END DESC
) AS ${values[0].outputCol}
FROM (
SELECT
value,
PERCENT_RANK() OVER (ORDER BY value) p
${values[0].valueCol},
PERCENT_RANK() OVER (ORDER BY ${values[0].valueCol}) p
FROM ${metricTable}
WHERE value IS NOT NULL
${where}
) t`;
}
hasEfficientPercentile(): boolean {
return false;
}
getInformationSchemaWhereClause(): string {
if (!this.params.database)
throw new Error(
Expand Down
21 changes: 16 additions & 5 deletions packages/back-end/src/integrations/Presto.ts
Original file line number Diff line number Diff line change
Expand Up @@ -104,15 +104,26 @@ export default class Presto extends SqlIntegration {
ensureFloat(col: string): string {
return `CAST(${col} AS DOUBLE)`;
}

percentileCapSelectClause(
capPercentile: number,
metricTable: string
values: {
valueCol: string;
outputCol: string;
percentile: number;
}[],
metricTable: string,
where: string = ""
): string {
return `
SELECT
APPROX_PERCENTILE(value, ${capPercentile}) AS cap_value
SELECT
${values
.map(
(v) =>
`APPROX_PERCENTILE(${v.valueCol}, ${v.percentile}) AS ${v.outputCol}`
)
.join(",\n")}
FROM ${metricTable}
WHERE value IS NOT NULL
${where}
`;
}
getDefaultDatabase() {
Expand Down
Loading

1 comment on commit 46d2c52

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Deploy preview for docs ready!

✅ Preview
https://docs-i1z7p02h4-growthbook.vercel.app

Built with commit 46d2c52.
This pull request is being automatically deployed with vercel-action

Please sign in to comment.