Skip to content

Commit

Permalink
Add in better test.
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-dkurokawa committed Dec 22, 2024
1 parent 4528258 commit a1c461c
Show file tree
Hide file tree
Showing 3 changed files with 164 additions and 15 deletions.
17 changes: 15 additions & 2 deletions tests/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import unittest
from unittest import TestCase

import pandas as pd
import pydantic
from pydantic import BaseModel
from trulens.core._utils.pycompat import ReferenceType
Expand Down Expand Up @@ -225,7 +226,9 @@ class WithJSONTestCase(TestCase):
"""TestCase mixin class that adds JSON comparisons and golden expectation
handling."""

def load_golden(self, golden_path: Union[str, Path]) -> serial_utils.JSON:
def load_golden(
self, golden_path: Union[str, Path]
) -> Union[serial_utils.JSON, pd.DataFrame]:
"""Load the golden file `path` and return its contents.
Args:
Expand All @@ -240,6 +243,10 @@ def load_golden(self, golden_path: Union[str, Path]) -> serial_utils.JSON:
loader = functools.partial(json.load)
elif ".yaml" in golden_path.suffixes or ".yml" in golden_path.suffixes:
loader = functools.partial(yaml.load, Loader=yaml.FullLoader)
elif ".csv" in golden_path.suffixes:
loader = functools.partial(pd.read_csv, index_col=0)
elif ".parquet" in golden_path.suffixes:
loader = functools.partial(pd.read_parquet, index_col=0)
else:
raise ValueError(f"Unknown file extension {golden_path}.")

Expand All @@ -250,7 +257,9 @@ def load_golden(self, golden_path: Union[str, Path]) -> serial_utils.JSON:
return loader(f)

def write_golden(
self, golden_path: Union[str, Path], data: serial_utils.JSON
self,
golden_path: Union[str, Path],
data: Union[serial_utils.JSON, pd.DataFrame],
) -> None:
"""If writing golden file is enabled, write the golden file `path` with
`data` and raise exception indicating so.
Expand All @@ -272,6 +281,10 @@ def write_golden(
writer = functools.partial(json.dump, indent=2, sort_keys=True)
elif golden_path.suffix == ".yaml":
writer = functools.partial(yaml.dump, sort_keys=True)
elif golden_path.suffix == ".csv":
writer = lambda data, f: data.to_csv(f)
elif golden_path.suffix == ".parquet":
writer = lambda data, f: data.to_parquet(f)
else:
raise ValueError(f"Unknown file extension {golden_path.suffix}.")

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
,record,event_id,record_attributes,record_type,resource_attributes,start_timestamp,timestamp,trace
0,"{'name': 'respond_to_query', 'kind': 'SPAN_KIND_TRULENS', 'parent_span_id': '', 'status': 'STATUS_CODE_UNSET'}",11560964145073308497,{},EventRecordType.SPAN,"{'telemetry.sdk.language': 'python', 'telemetry.sdk.name': 'opentelemetry', 'telemetry.sdk.version': '1.28.2', 'service.name': 'trulens'}",2024-12-21 22:46:48.817734,2024-12-21 22:46:48.822087,"{'trace_id': '105357914249406038608753130269542497721', 'parent_id': '', 'span_id': '11560964145073308497'}"
1,"{'name': 'nested', 'kind': 'SPAN_KIND_TRULENS', 'parent_span_id': '11560964145073308497', 'status': 'STATUS_CODE_UNSET'}",956390419060041970,{'trulens.nested_attr1': 'value1'},EventRecordType.SPAN,"{'telemetry.sdk.language': 'python', 'telemetry.sdk.name': 'opentelemetry', 'telemetry.sdk.version': '1.28.2', 'service.name': 'trulens'}",2024-12-21 22:46:48.817764,2024-12-21 22:46:48.821018,"{'trace_id': '105357914249406038608753130269542497721', 'parent_id': '11560964145073308497', 'span_id': '956390419060041970'}"
2,"{'name': 'nested2', 'kind': 'SPAN_KIND_TRULENS', 'parent_span_id': '956390419060041970', 'status': 'STATUS_CODE_UNSET'}",7191373700275539380,"{'trulens.nested2_ret': 'nested2: nested3', 'trulens.nested2_args[0]': 'test'}",EventRecordType.SPAN,"{'telemetry.sdk.language': 'python', 'telemetry.sdk.name': 'opentelemetry', 'telemetry.sdk.version': '1.28.2', 'service.name': 'trulens'}",2024-12-21 22:46:48.817784,2024-12-21 22:46:48.819862,"{'trace_id': '105357914249406038608753130269542497721', 'parent_id': '956390419060041970', 'span_id': '7191373700275539380'}"
3,"{'name': 'nested3', 'kind': 'SPAN_KIND_TRULENS', 'parent_span_id': '7191373700275539380', 'status': 'STATUS_CODE_UNSET'}",11561410003717533750,"{'trulens.nested3_ret': 'nested3', 'trulens.special.nested3_ret': 'nested3', 'trulens.selector_name': 'special', 'trulens.cows': 'moo', 'trulens.special.cows': 'moo'}",EventRecordType.SPAN,"{'telemetry.sdk.language': 'python', 'telemetry.sdk.name': 'opentelemetry', 'telemetry.sdk.version': '1.28.2', 'service.name': 'trulens'}",2024-12-21 22:46:48.817801,2024-12-21 22:46:48.817858,"{'trace_id': '105357914249406038608753130269542497721', 'parent_id': '7191373700275539380', 'span_id': '11561410003717533750'}"
4,"{'name': 'respond_to_query', 'kind': 'SPAN_KIND_TRULENS', 'parent_span_id': '', 'status': 'STATUS_CODE_UNSET'}",15712161957924150536,{},EventRecordType.SPAN,"{'telemetry.sdk.language': 'python', 'telemetry.sdk.name': 'opentelemetry', 'telemetry.sdk.version': '1.28.2', 'service.name': 'trulens'}",2024-12-21 22:46:48.823641,2024-12-21 22:46:48.827118,"{'trace_id': '311544861227161086009958871975634544174', 'parent_id': '', 'span_id': '15712161957924150536'}"
5,"{'name': 'nested', 'kind': 'SPAN_KIND_TRULENS', 'parent_span_id': '15712161957924150536', 'status': 'STATUS_CODE_UNSET'}",14875482935475817656,{'trulens.nested_attr1': 'value1'},EventRecordType.SPAN,"{'telemetry.sdk.language': 'python', 'telemetry.sdk.name': 'opentelemetry', 'telemetry.sdk.version': '1.28.2', 'service.name': 'trulens'}",2024-12-21 22:46:48.823666,2024-12-21 22:46:48.826205,"{'trace_id': '311544861227161086009958871975634544174', 'parent_id': '15712161957924150536', 'span_id': '14875482935475817656'}"
6,"{'name': 'nested2', 'kind': 'SPAN_KIND_TRULENS', 'parent_span_id': '14875482935475817656', 'status': 'STATUS_CODE_UNSET'}",7500502012444675242,"{'trulens.nested2_ret': 'nested2: ', 'trulens.nested2_args[0]': 'throw'}",EventRecordType.SPAN,"{'telemetry.sdk.language': 'python', 'telemetry.sdk.name': 'opentelemetry', 'telemetry.sdk.version': '1.28.2', 'service.name': 'trulens'}",2024-12-21 22:46:48.823683,2024-12-21 22:46:48.825310,"{'trace_id': '311544861227161086009958871975634544174', 'parent_id': '14875482935475817656', 'span_id': '7500502012444675242'}"
7,"{'name': 'nested3', 'kind': 'SPAN_KIND_TRULENS', 'parent_span_id': '7500502012444675242', 'status': 'STATUS_CODE_ERROR'}",9125451447180209633,"{'trulens.nested3_ex': ['nested3 exception'], 'trulens.special.nested3_ex': ['nested3 exception'], 'trulens.selector_name': 'special', 'trulens.cows': 'moo', 'trulens.special.cows': 'moo'}",EventRecordType.SPAN,"{'telemetry.sdk.language': 'python', 'telemetry.sdk.name': 'opentelemetry', 'telemetry.sdk.version': '1.28.2', 'service.name': 'trulens'}",2024-12-21 22:46:48.823710,2024-12-21 22:46:48.824262,"{'trace_id': '311544861227161086009958871975634544174', 'parent_id': '7500502012444675242', 'span_id': '9125451447180209633'}"
153 changes: 140 additions & 13 deletions tests/unit/test_otel_instrument.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,18 @@
Tests for OTEL instrument decorator.
"""

from unittest import TestCase
from typing import Any, Dict
from unittest import main

import pandas as pd
import sqlalchemy as sa
from trulens.apps.custom import TruCustomApp
from trulens.core.session import TruSession
from trulens.experimental.otel_tracing.core.init import init
from trulens.experimental.otel_tracing.core.instrument import instrument

from tests.test import TruTestCase


class _TestApp:
@instrument()
Expand Down Expand Up @@ -50,26 +54,149 @@ def nested3(self, query: str) -> str:
return "nested3"


class TestOtelInstrument(TestCase):
class TestOtelInstrument(TruTestCase):
def setUp(self):
pass

def test_deterministic_app_id(self):
session = TruSession()
session.experimental_enable_feature("otel_tracing")
session.reset_database()
init(session, debug=True)
@staticmethod
def _get_events() -> pd.DataFrame:
tru_session = TruSession()
db = tru_session.connector.db
with db.session.begin() as db_session:
q = sa.select(db.orm.Event).order_by(db.orm.Event.start_timestamp)
return pd.read_sql(q, db_session.bind)

@staticmethod
def _convert_column_types(df: pd.DataFrame):
df["event_id"] = df["event_id"].apply(str)
df["record_type"] = df["record_type"].apply(lambda x: eval(x))
df["start_timestamp"] = df["start_timestamp"].apply(pd.Timestamp)
df["timestamp"] = df["timestamp"].apply(pd.Timestamp)
for json_column in [
"record",
"record_attributes",
"resource_attributes",
"trace",
]:
df[json_column] = df[json_column].apply(lambda x: eval(x))

def _compare_dfs_accounting_for_ids_and_timestamps(
self, expected: pd.DataFrame, actual: pd.DataFrame
):
"""
Compare two Dataframes are equal, accounting for ids and timestamps.
That is:
1. The ids between the two Dataframes may be different, but they have
to be consistent. That is, if one Dataframe reuses an id in two
places, then the other must as well.
2. The timestamps between the two Dataframes may be different, but
they have to be in the same order.
Args:
expected: expected results
actual: actual results
"""
id_mapping: Dict[str, str] = {}
timestamp_mapping: Dict[pd.Timestamp, pd.Timestamp] = {}
self.assertEqual(len(expected), len(actual))
self.assertListEqual(list(expected.columns), list(actual.columns))
for i in range(len(expected)):
for col in expected.columns:
self._compare_entity(
expected.iloc[i][col],
actual.iloc[i][col],
id_mapping,
timestamp_mapping,
is_id=col.endswith("_id"),
locator=f"df.iloc[{i}][{col}]",
)
# Ensure that the id mapping is a bijection.
self.assertEqual(
len(set(id_mapping.values())),
len(id_mapping),
"Ids are not a bijection!",
)
# Ensure that the timestamp mapping is monotonic.
prev_value = None
for curr in sorted(timestamp_mapping.keys()):
if prev_value is not None:
self.assertLess(
prev_value,
timestamp_mapping[curr],
"Timestamps are not in the same order!",
)
prev_value = timestamp_mapping[curr]

def _compare_entity(
self,
expected: Any,
actual: Any,
id_mapping: Dict[str, str],
timestamp_mapping: Dict[pd.Timestamp, pd.Timestamp],
is_id: bool,
locator: str,
):
self.assertEqual(
type(expected), type(actual), f"Types of {locator} do not match!"
)
if is_id:
self.assertEqual(
type(expected), str, f"Type of id {locator} is not a string!"
)
if expected not in id_mapping:
id_mapping[expected] = actual
self.assertEqual(
id_mapping[expected],
actual,
f"Ids of {locator} are not consistent!",
)
elif isinstance(expected, dict):
self.assertEqual(
expected.keys(),
actual.keys(),
f"Keys of {locator} do not match!",
)
for k in expected.keys():
self._compare_entity(
expected[k],
actual[k],
id_mapping,
timestamp_mapping,
is_id=k.endswith("_id"),
locator=f"{locator}[k]",
)
elif isinstance(expected, pd.Timestamp):
if expected not in timestamp_mapping:
timestamp_mapping[expected] = actual
self.assertEqual(
timestamp_mapping[expected],
actual,
f"Timestamps of {locator} are not consistent!",
)
else:
self.assertEqual(expected, actual, f"{locator} does not match!")

def test_deterministic_app_id(self):
# Set up.
tru_session = TruSession()
tru_session.experimental_enable_feature("otel_tracing")
tru_session.reset_database()
init(tru_session, debug=True)
# Create and run app.
test_app = _TestApp()
custom_app = TruCustomApp(test_app)

with custom_app as recording:
with custom_app:
test_app.respond_to_query("test")

with custom_app as recording:
with custom_app:
test_app.respond_to_query("throw")

print(recording)
# Compare results to expected.
GOLDEN_FILENAME = "tests/unit/static/golden/test_otel_instrument__test_deterministic_app_id.csv"
actual = self._get_events()
self.assertEqual(len(actual), 8)
self.write_golden(GOLDEN_FILENAME, actual)
expected = self.load_golden(GOLDEN_FILENAME)
self._convert_column_types(expected)
self._compare_dfs_accounting_for_ids_and_timestamps(expected, actual)


if __name__ == "__main__":
Expand Down

0 comments on commit a1c461c

Please sign in to comment.