Skip to content

Commit

Permalink
ci: use duckdb instead of ibis to test interchange-only support
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcoGorelli committed Nov 3, 2024
1 parent c28dbb9 commit 7e57f72
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 29 deletions.
6 changes: 4 additions & 2 deletions altair/utils/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,9 @@ class SupportsGeoInterface(Protocol):


def is_data_type(obj: Any) -> TypeIs[DataType]:
return _is_pandas_dataframe(obj) or isinstance(
obj, (dict, DataFrameLike, SupportsGeoInterface, nw.DataFrame)
return isinstance(obj, (dict, SupportsGeoInterface)) or isinstance(
nw.from_native(obj, eager_or_interchange_only=True, strict=False),
nw.DataFrame,
)


Expand Down Expand Up @@ -413,6 +414,7 @@ def _data_to_csv_string(data: DataType) -> str:

def arrow_table_from_dfi_dataframe(dfi_df: DataFrameLike) -> pa.Table:
"""Convert a DataFrame Interchange Protocol compatible object to an Arrow Table."""
print(dfi_df)
import pyarrow as pa

# First check if the dataframe object has a method to convert to arrow.
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ all = [
dev = [
"hatch",
"ruff>=0.6.0",
"ibis-framework[polars]",
"duckdb>=1.0",
"ipython[kernel]",
"pandas>=1.1.3",
"pytest",
Expand Down
71 changes: 45 additions & 26 deletions tests/vegalite/v5/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from importlib.util import find_spec
from typing import TYPE_CHECKING

import ibis
import duckdb
import jsonschema
import narwhals.stable.v1 as nw
import pandas as pd
Expand Down Expand Up @@ -1607,51 +1607,70 @@ def test_polars_with_pandas_nor_pyarrow(monkeypatch: pytest.MonkeyPatch):
assert "numpy" not in sys.modules


@pytest.mark.skipif(
Version("1.5") > PANDAS_VERSION,
reason="A warning is thrown on old pandas versions",
)
@pytest.mark.xfail(
sys.platform == "win32", reason="Timezone database is not installed on Windows"
)
def test_ibis_with_date_32():
ibis.set_backend("polars")
df = pl.DataFrame(
def test_interchange_with_date_32():
# Test that objects which Narwhals only supports at the interchange
# level can be plotted when they contain date32 columns.
df = pl.DataFrame( # noqa: F841
{"a": [1, 2, 3], "b": [date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 3)]}
)
tbl = ibis.memtable(df)
result = alt.Chart(tbl).mark_line().encode(x="a", y="b").to_dict()
rel = duckdb.sql("select * from df")
result = alt.Chart(rel).mark_line().encode(x="a", y="b").to_dict()
assert next(iter(result["datasets"].values())) == [
{"a": 1, "b": "2020-01-01T00:00:00"},
{"a": 2, "b": "2020-01-02T00:00:00"},
{"a": 3, "b": "2020-01-03T00:00:00"},
]


@pytest.mark.skipif(
Version("1.5") > PANDAS_VERSION,
reason="A warning is thrown on old pandas versions",
)
@pytest.mark.xfail(
sys.platform == "win32", reason="Timezone database is not installed on Windows"
)
def test_ibis_with_vegafusion(monkeypatch: pytest.MonkeyPatch):
ibis.set_backend("polars")
df = pl.DataFrame(
def test_interchange_with_vegafusion(monkeypatch: pytest.MonkeyPatch):
# Test that objects which Narwhals only supports at the interchange
# level don't get converted to PyArrow unnecessarily when plotted
# with the vegafusion transformer.
# TODO: this test can be drastically simplified when some level of
# DuckDB support in VegaFusion, as it can then just be `alt.Chart(rel_df)`
# without DuckDBWithInterchangeSupport.
df = pl.DataFrame( # noqa: F841
{
"a": [1, 2, 3],
"b": [datetime(2020, 1, 1), datetime(2020, 1, 2), datetime(2020, 1, 3)],
}
)
tbl = ibis.memtable(df)
rel = duckdb.sql("select * from df")

class DuckDBWithInterchangeSupport:
"""
DuckDB doesn't (yet?) support the interchange protocol.
So, we create duckdb wrapper which defers to PyArrow's
implementation of the protocol.
"""

def __init__(self, rel: duckdb.DuckDBPyRelation) -> None:
self._rel = rel

def __dataframe__(self, allow_copy: bool = True) -> object:
return self._rel.to_arrow_table().__dataframe__()

rel_df = DuckDBWithInterchangeSupport(rel)
# "poison" `arrow_table_from_dfi_dataframe` to check that it does not get called
# if we use the vegafusion transformer
monkeypatch.setattr(
"altair.utils.data.arrow_table_from_dfi_dataframe", lambda x: 1 / 0
)
tbl = ibis.memtable(df)

# Narwhals doesn't fully support our custom DuckDBWithInterchangeSupport,
# so we need to overwrite `to_native`
def to_native(df, strict):
if isinstance(df, nw.DataFrame):
return rel_df
return df

monkeypatch.setattr("narwhals.stable.v1.to_native", to_native)

with alt.data_transformers.enable("vegafusion"):
result = alt.Chart(tbl).mark_line().encode(x="a", y="b").to_dict(format="vega")
result = (
alt.Chart(rel_df).mark_line().encode(x="a", y="b").to_dict(format="vega")
)
assert next(iter(result["data"]))["values"] == [
{"a": 1, "b": "2020-01-01T00:00:00.000"},
{"a": 2, "b": "2020-01-02T00:00:00.000"},
Expand Down

0 comments on commit 7e57f72

Please sign in to comment.