Fact Table Query Optimization (#1923)

Co-authored-by: Luke Sonnet <[email protected]>
growthbook · Jan 13, 2024 · 46d2c52 · 46d2c52 · github-actions · Jan 13, 2024
1 parent 004c1de
commit 46d2c52
Show file tree

Hide file tree

Showing 62 changed files with 2,549 additions and 1,156 deletions.
diff --git a/docs/docs/experimentation-analysis/fact-tables.mdx b/docs/docs/experimentation-analysis/fact-tables.mdx
@@ -161,6 +161,20 @@ Keep in mind, these two things are answering slightly different questions.
 `How much longer do people stay after viewing the experiment?` vs `How much longer is an average session that includes the experiment?`.
 The first question is more direct and often a more strict test of your hypothesis, but it may not be worth the extra running time.
 
+## Fact Table Query Optimization
+
+GrowthBook Enterprise customers can enable Fact Table Query Optimization for faster, more efficient queries.
+
+If multiple metrics from the same Fact Table are added to an experiment, they will be combined into a single SQL query. For data sourcees with usage-based billing, this can result in dramatic cost savings.
+
+There are some restrictions that limit when this optimization can be performed:
+
+- Ratio metrics where the numerator and denominator are part of different Fact Tables are always excluded from this optimization
+- If `Ignore In-Progress Conversions` is set for an experiment, optimization is disabled for all metrics
+- If you are using MySQL and a metric has percentile capping, it will be excluded from optimization
+
+In all other cases, this optimization is enabled by default for all Enterprise customers. It can be disabled under **Settings -> General -> Experiment Settings**. When disabled, a separate SQL query will always be run for every individual metric.
+
 ## Migrating Existing Metrics to Fact Tables
 
 Fact Tables are brand new to GrowthBook, first launching in October 2023. Eventually, we see Fact Tables completely replacing the existing way of defining metrics. Right now though, Fact Tables are still in early preview mode and there are some rough edges.

diff --git a/packages/back-end/src/app.ts b/packages/back-end/src/app.ts
@@ -419,10 +419,6 @@ app.get(
   experimentsController.getSnapshotWithDimension
 );
 app.post("/experiment/:id/snapshot", experimentsController.postSnapshot);
-app.post(
-  "/experiment/:id/snapshot/:phase/preview",
-  experimentsController.previewManualSnapshot
-);
 app.post("/experiment/:id", experimentsController.postExperiment);
 app.delete("/experiment/:id", experimentsController.deleteExperiment);
 app.get("/experiment/:id/watchers", experimentsController.getWatchingUsers);

diff --git a/packages/back-end/src/controllers/datasources.ts b/packages/back-end/src/controllers/datasources.ts
@@ -801,7 +801,7 @@ export async function postDimensionSlices(
     queryId,
   });
 
-  const queryRunner = new DimensionSlicesQueryRunner(model, integration);
+  const queryRunner = new DimensionSlicesQueryRunner(model, integration, org);
   const outputmodel = await queryRunner.startAnalysis({
     exposureQueryId: queryId,
     lookbackDays: Number(lookbackDays) ?? 30,
@@ -839,7 +839,8 @@ export async function cancelDimensionSlices(
 
   const queryRunner = new DimensionSlicesQueryRunner(
     dimensionSlices,
-    integration
+    integration,
+    org
   );
   await queryRunner.cancelQueries();
 

diff --git a/packages/back-end/src/controllers/experiments.ts b/packages/back-end/src/controllers/experiments.ts
@@ -18,7 +18,6 @@ import {
   getDefaultExperimentAnalysisSettings,
   getExperimentMetricById,
   getLinkedFeatureInfo,
-  getManualSnapshotData,
 } from "../services/experiments";
 import { MetricInterface, MetricStats } from "../../types/metric";
 import {
@@ -1597,60 +1596,6 @@ export async function deleteExperiment(
   });
 }
 
-export async function previewManualSnapshot(
-  req: AuthRequest<
-    {
-      users: number[];
-      metrics: { [key: string]: MetricStats[] };
-    },
-    { id: string; phase: string }
-  >,
-  res: Response
-) {
-  const { id, phase } = req.params;
-  const { org } = getOrgFromReq(req);
-
-  const experiment = await getExperimentById(org.id, id);
-
-  if (!experiment) {
-    res.status(404).json({
-      status: 404,
-      message: "Experiment not found",
-    });
-    return;
-  }
-
-  const phaseIndex = parseInt(phase);
-  if (!experiment.phases[phaseIndex]) {
-    res.status(404).json({
-      status: 404,
-      message: "Phase not found",
-    });
-    return;
-  }
-
-  try {
-    const metricMap = await getMetricMap(org.id);
-
-    const data = await getManualSnapshotData(
-      experiment,
-      phaseIndex,
-      req.body.users,
-      req.body.metrics,
-      metricMap
-    );
-    res.status(200).json({
-      status: 200,
-      snapshot: data,
-    });
-  } catch (e) {
-    res.status(400).json({
-      status: 400,
-      message: e.message,
-    });
-  }
-}
-
 export async function cancelSnapshot(
   req: AuthRequest<null, { id: string }>,
   res: Response
@@ -1680,7 +1625,11 @@ export async function cancelSnapshot(
     snapshot.organization,
     snapshot.settings.datasourceId
   );
-  const queryRunner = new ExperimentResultsQueryRunner(snapshot, integration);
+  const queryRunner = new ExperimentResultsQueryRunner(
+    snapshot,
+    integration,
+    org
+  );
   await queryRunner.cancelQueries();
   await deleteSnapshotById(org.id, snapshot.id);
 
@@ -2117,7 +2066,8 @@ export async function cancelPastExperiments(
   );
   const queryRunner = new PastExperimentsQueryRunner(
     pastExperiments,
-    integration
+    integration,
+    org
   );
   await queryRunner.cancelQueries();
 
@@ -2213,7 +2163,8 @@ export async function postPastExperiments(
   if (needsRun) {
     const queryRunner = new PastExperimentsQueryRunner(
       pastExperiments,
-      integration
+      integration,
+      org
     );
     pastExperiments = await queryRunner.startAnalysis({
       from: start,

diff --git a/packages/back-end/src/controllers/metrics.ts b/packages/back-end/src/controllers/metrics.ts
@@ -194,7 +194,7 @@ export async function cancelMetricAnalysis(
     org.id,
     metric.datasource
   );
-  const queryRunner = new MetricAnalysisQueryRunner(metric, integration);
+  const queryRunner = new MetricAnalysisQueryRunner(metric, integration, org);
   await queryRunner.cancelQueries();
 
   res.status(200).json({
@@ -226,7 +226,7 @@ export async function postMetricAnalysis(
   try {
     await refreshMetric(
       metric,
-      org.id,
+      org,
       req.organization?.settings?.metricAnalysisDays
     );
 

diff --git a/packages/back-end/src/controllers/reports.ts b/packages/back-end/src/controllers/reports.ts
@@ -224,7 +224,7 @@ export async function refreshReport(
     report.args.datasource,
     true
   );
-  const queryRunner = new ReportQueryRunner(report, integration, useCache);
+  const queryRunner = new ReportQueryRunner(report, integration, org, useCache);
 
   const updatedReport = await queryRunner.startAnalysis({
     metricMap,
@@ -300,7 +300,7 @@ export async function putReport(
       updatedReport.args.datasource,
       true
     );
-    const queryRunner = new ReportQueryRunner(updatedReport, integration);
+    const queryRunner = new ReportQueryRunner(updatedReport, integration, org);
 
     await queryRunner.startAnalysis({
       metricMap,
@@ -333,7 +333,7 @@ export async function cancelReport(
     org.id,
     report.args.datasource
   );
-  const queryRunner = new ReportQueryRunner(report, integration);
+  const queryRunner = new ReportQueryRunner(report, integration, org);
   await queryRunner.cancelQueries();
 
   res.status(200).json({ status: 200 });

diff --git a/packages/back-end/src/integrations/Athena.ts b/packages/back-end/src/integrations/Athena.ts
@@ -54,13 +54,24 @@ export default class Athena extends SqlIntegration {
     return `CAST(${col} as double)`;
   }
   percentileCapSelectClause(
-    capPercentile: number,
-    metricTable: string
+    values: {
+      valueCol: string;
+      outputCol: string;
+      percentile: number;
+    }[],
+    metricTable: string,
+    where: string = ""
   ): string {
     return `
-      SELECT APPROX_PERCENTILE(value, ${capPercentile}) AS cap_value
+    SELECT
+      ${values
+        .map(
+          (v) =>
+            `APPROX_PERCENTILE(${v.valueCol}, ${v.percentile}) AS ${v.outputCol}`
+        )
+        .join(",\n")}
       FROM ${metricTable}
-      WHERE value IS NOT NULL
+      ${where}
     `;
   }
   getDefaultDatabase() {

diff --git a/packages/back-end/src/integrations/BigQuery.ts b/packages/back-end/src/integrations/BigQuery.ts
@@ -151,17 +151,27 @@ export default class BigQuery extends SqlIntegration {
     return `CAST(${column} as DATETIME)`;
   }
   percentileCapSelectClause(
-    capPercentile: number,
-    metricTable: string
+    values: {
+      valueCol: string;
+      outputCol: string;
+      percentile: number;
+    }[],
+    metricTable: string,
+    where: string = ""
   ): string {
     return `
-    SELECT 
-      APPROX_QUANTILES(value, 100000)[OFFSET(${Math.trunc(
-        100000 * capPercentile
-      )})] AS cap_value
-    FROM ${metricTable}
-    WHERE value IS NOT NULL
-  `;
+    SELECT
+      ${values
+        .map(
+          (v) =>
+            `APPROX_QUANTILES(${v.valueCol}, 100000)[OFFSET(${Math.trunc(
+              100000 * v.percentile
+            )})] AS ${v.outputCol}`
+        )
+        .join(",\n")}
+      FROM ${metricTable}
+      ${where}
+    `;
   }
   getDefaultDatabase() {
     return this.params.projectId || "";

diff --git a/packages/back-end/src/integrations/ClickHouse.ts b/packages/back-end/src/integrations/ClickHouse.ts
@@ -87,13 +87,23 @@ export default class ClickHouse extends SqlIntegration {
     return `toFloat64(${col})`;
   }
   percentileCapSelectClause(
-    capPercentile: number,
-    metricTable: string
+    values: {
+      valueCol: string;
+      outputCol: string;
+      percentile: number;
+    }[],
+    metricTable: string,
+    where: string = ""
   ): string {
     return `
-      SELECT quantile(${capPercentile})(value) AS cap_value
+    SELECT
+      ${values
+        .map(
+          (v) => `quantile(${v.percentile})(${v.valueCol}) AS ${v.outputCol}`
+        )
+        .join(",\n")}
       FROM ${metricTable}
-      WHERE value IS NOT NULL
+      ${where}
     `;
   }
   getInformationSchemaWhereClause(): string {

diff --git a/packages/back-end/src/integrations/Mssql.ts b/packages/back-end/src/integrations/Mssql.ts
@@ -66,14 +66,24 @@ export default class Mssql extends SqlIntegration {
     return `CONVERT(VARCHAR(25), ${col}, 121)`;
   }
   percentileCapSelectClause(
-    capPercentile: number,
-    metricTable: string
+    values: {
+      valueCol: string;
+      outputCol: string;
+      percentile: number;
+    }[],
+    metricTable: string,
+    where: string = ""
   ): string {
     return `
-      SELECT 
-        APPROX_PERCENTILE_CONT(${capPercentile}) WITHIN GROUP (ORDER BY value) AS cap_value
+    SELECT
+      ${values
+        .map(
+          (v) =>
+            `APPROX_PERCENTILE_CONT(${v.percentile}) WITHIN GROUP (ORDER BY ${v.valueCol}) AS ${v.outputCol}`
+        )
+        .join(",\n")}
       FROM ${metricTable}
-      WHERE value IS NOT NULL
+      ${where}
     `;
   }
   getDefaultDatabase() {

diff --git a/packages/back-end/src/integrations/Mysql.ts b/packages/back-end/src/integrations/Mysql.ts
@@ -72,21 +72,35 @@ export default class Mysql extends SqlIntegration {
     return `CAST(${col} AS DOUBLE)`;
   }
   percentileCapSelectClause(
-    capPercentile: number,
-    metricTable: string
+    values: {
+      valueCol: string;
+      outputCol: string;
+      percentile: number;
+    }[],
+    metricTable: string,
+    where: string = ""
   ): string {
+    if (values.length > 1) {
+      throw new Error(
+        "MySQL only supports one percentile capped metric at a time"
+      );
+    }
+
     return `
-    SELECT DISTINCT FIRST_VALUE(value) OVER (
-      ORDER BY CASE WHEN p <= ${capPercentile} THEN p END DESC
-    ) AS cap_value
+    SELECT DISTINCT FIRST_VALUE(${values[0].valueCol}) OVER (
+      ORDER BY CASE WHEN p <= ${values[0].percentile} THEN p END DESC
+    ) AS ${values[0].outputCol}
     FROM (
       SELECT
-        value,
-        PERCENT_RANK() OVER (ORDER BY value) p
+        ${values[0].valueCol},
+        PERCENT_RANK() OVER (ORDER BY ${values[0].valueCol}) p
       FROM ${metricTable}
-      WHERE value IS NOT NULL
+      ${where}
     ) t`;
   }
+  hasEfficientPercentile(): boolean {
+    return false;
+  }
   getInformationSchemaWhereClause(): string {
     if (!this.params.database)
       throw new Error(

diff --git a/packages/back-end/src/integrations/Presto.ts b/packages/back-end/src/integrations/Presto.ts
@@ -104,15 +104,26 @@ export default class Presto extends SqlIntegration {
   ensureFloat(col: string): string {
     return `CAST(${col} AS DOUBLE)`;
   }
+
   percentileCapSelectClause(
-    capPercentile: number,
-    metricTable: string
+    values: {
+      valueCol: string;
+      outputCol: string;
+      percentile: number;
+    }[],
+    metricTable: string,
+    where: string = ""
   ): string {
     return `
-      SELECT 
-        APPROX_PERCENTILE(value, ${capPercentile}) AS cap_value
+    SELECT
+      ${values
+        .map(
+          (v) =>
+            `APPROX_PERCENTILE(${v.valueCol}, ${v.percentile}) AS ${v.outputCol}`
+        )
+        .join(",\n")}
       FROM ${metricTable}
-      WHERE value IS NOT NULL
+      ${where}
     `;
   }
   getDefaultDatabase() {