apache · tvalentyn · Dec 22, 2022 · Oct 26, 2022 · Oct 28, 2022 · Oct 31, 2022
diff --git a/sdks/python/apache_beam/testing/analyzers/README.md b/sdks/python/apache_beam/testing/analyzers/README.md
@@ -74,10 +74,61 @@ Sometimes, the change point found might be way back in time and could be irrelev
 reported only when it was observed in the last 7 runs from the current run,
 setting `num_runs_in_change_point_window=7` will achieve it.
 
-##  Register a test for performance alerts. 
+##  Register a test for performance alerts 
 
 If a new test needs to be registered for the performance alerting tool, please add the required test parameters to the
 config file.
 
-[//]: # (TODO : Add triaging section)
+## Triage performance alert issues
 
+All the performance/load tests metrics defined at [beam/.test-infra/jenkins](https://github.com/apache/beam/tree/master/.test-infra/jenkins) are imported to [Grafana dashboards](http://104.154.241.245/d/1/getting-started?orgId=1) for visualization. Please 
+find the alerted test dashboard to find a spike in the metric values.
+
+For example, for the below configuration,
+* test: `apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks`
+* metric_name: `mean_load_model_latency_milli_secs`
+
+Grafana dashboard can be found at http://104.154.241.245/d/ZpS8Uf44z/python-ml-runinference-benchmarks?orgId=1&viewPanel=7
+
+If the dashboard for a test is not found, you can use the 
+code below to generate a plot for the given test, metric_name. 
+
+If the performance/load test store the results in BigQuery using this [schema](https://github.com/apache/beam/blob/83679216cce2d52dbeb7e837f06ca1d57b31d509/sdks/python/apache_beam/testing/load_tests/load_test_metrics_utils.py#L66),
+then use the following code to fetch the metric_values for a `metric_name` for the last `30` runs and display a plot using matplotlib.
+
+**NOTE**: Install matplotlib and pandas using `pip install matplotlib pandas`.
+```
+import pandas as pd
+import matplotlib.pyplot as plt
+
+from apache_beam.testing.load_tests import load_test_metrics_utils
+from apache_beam.testing.load_tests.load_test_metrics_utils import BigQueryMetricsFetcher
+
+bq_project = 'apache-beam-testing'
+bq_dataset = '<bq-dataset-name>'
+bq_table = '<bq-table>'
+metric_name = '<perf-alerted-metric-name>'
+
+query = f"""
+      SELECT *
+      FROM {bq_project}.{bq_dataset}.{bq_table}
+      WHERE CONTAINS_SUBSTR(({load_test_metrics_utils.METRICS_TYPE_LABEL}), '{metric_name}')
+      ORDER BY {load_test_metrics_utils.SUBMIT_TIMESTAMP_LABEL} DESC
+      LIMIT 30
+    """
+
+big_query_metrics_fetcher = BigQueryMetricsFetcher()
+metric_data: pd.DataFrame = big_query_metrics_fetcher.fetch(query=query)
+# sort the data to view it in chronological order.
+metric_data.sort_values(
+      by=[load_test_metrics_utils.SUBMIT_TIMESTAMP_LABEL], inplace=True)
+
+metric_data.plot(x=load_test_metrics_utils.SUBMIT_TIMESTAMP_LABEL,
+                 y=load_test_metrics_utils.VALUE_LABEL)
+plt.show()
+```
+
+If you confirm there is a change in the pattern of the values for a test, find the timestamp of when that change happened
+and use that timestamp to find possible culprit commit. 
+
+If the performance alert is a `false positive`, close the issue as `Close as not planned`.
diff --git a/sdks/python/apache_beam/testing/analyzers/github_issues_utils.py b/sdks/python/apache_beam/testing/analyzers/github_issues_utils.py
@@ -51,7 +51,11 @@
 # TODO: Provide a better debugging tool for the user to visualize metrics.
 # For example, a link to dashboards or query to get recent data to analyze,etc.
 _ISSUE_DESCRIPTION_TEMPLATE = """
-  Performance change found in the test: `{}` for the metric: `{}`.
+  Performance change found in the
+  test: `{}` for the metric: `{}`.
+
+  For more information on how to triage the alerts, please look at
+   `Triage performance alert issues` section of the README.
 """
 _METRIC_INFO_TEMPLATE = "timestamp: {}, metric_value: `{}`"
 _AWAITING_TRIAGE_LABEL = 'awaiting triage'

diff --git a/sdks/python/apache_beam/testing/analyzers/tests_config.yaml b/sdks/python/apache_beam/testing/analyzers/tests_config.yaml
@@ -16,7 +16,7 @@
 #
 
 test_1:
-  test_name: apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks_16
+  test_name: apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks_22
   metrics_dataset: beam_run_inference
   metrics_table: torch_inference_imagenet_results_resnet152
   project: apache-beam-testing
@@ -25,7 +25,7 @@ test_1:
     - run-inference
   # TODO: Remove before merging.
   min_runs_between_change_points: 5
-  num_runs_in_change_point_window: 14
+  num_runs_in_change_point_window: 30
 
 test_2:
   test_name: apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks