diff --git a/altair/utils/data.py b/altair/utils/data.py index 91ed54a51..89ec31f1f 100644 --- a/altair/utils/data.py +++ b/altair/utils/data.py @@ -65,8 +65,9 @@ class SupportsGeoInterface(Protocol): def is_data_type(obj: Any) -> TypeIs[DataType]: - return _is_pandas_dataframe(obj) or isinstance( - obj, (dict, DataFrameLike, SupportsGeoInterface, nw.DataFrame) + return isinstance(obj, (dict, SupportsGeoInterface)) or isinstance( + nw.from_native(obj, eager_or_interchange_only=True, strict=False), + nw.DataFrame, ) diff --git a/pyproject.toml b/pyproject.toml index 21c1a1342..5b5da88f9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,7 +69,7 @@ all = [ dev = [ "hatch", "ruff>=0.6.0", - "ibis-framework[polars]", + "duckdb>=1.0", "ipython[kernel]", "pandas>=1.1.3", "pytest", diff --git a/tests/vegalite/v5/test_api.py b/tests/vegalite/v5/test_api.py index f6cd4ee6d..fc7266e39 100644 --- a/tests/vegalite/v5/test_api.py +++ b/tests/vegalite/v5/test_api.py @@ -15,7 +15,7 @@ from importlib.util import find_spec from typing import TYPE_CHECKING -import ibis +import duckdb import jsonschema import narwhals.stable.v1 as nw import pandas as pd @@ -26,7 +26,7 @@ import altair as alt from altair.utils.core import use_signature from altair.utils.schemapi import Optional, SchemaValidationError, Undefined -from tests import skip_requires_vl_convert, slow +from tests import skip_requires_pyarrow, skip_requires_vl_convert, slow if TYPE_CHECKING: from typing import Any @@ -1607,20 +1607,15 @@ def test_polars_with_pandas_nor_pyarrow(monkeypatch: pytest.MonkeyPatch): assert "numpy" not in sys.modules -@pytest.mark.skipif( - Version("1.5") > PANDAS_VERSION, - reason="A warning is thrown on old pandas versions", -) -@pytest.mark.xfail( - sys.platform == "win32", reason="Timezone database is not installed on Windows" -) -def test_ibis_with_date_32(): - ibis.set_backend("polars") - df = pl.DataFrame( +@skip_requires_pyarrow +def test_interchange_with_date_32(): + # Test that objects which Narwhals only supports at the interchange + # level can be plotted when they contain date32 columns. + df = pl.DataFrame( # noqa: F841 {"a": [1, 2, 3], "b": [date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 3)]} ) - tbl = ibis.memtable(df) - result = alt.Chart(tbl).mark_line().encode(x="a", y="b").to_dict() + rel = duckdb.sql("select * from df") + result = alt.Chart(rel).mark_line().encode(x="a", y="b").to_dict() assert next(iter(result["datasets"].values())) == [ {"a": 1, "b": "2020-01-01T00:00:00"}, {"a": 2, "b": "2020-01-02T00:00:00"}, @@ -1628,30 +1623,56 @@ def test_ibis_with_date_32(): ] -@pytest.mark.skipif( - Version("1.5") > PANDAS_VERSION, - reason="A warning is thrown on old pandas versions", -) -@pytest.mark.xfail( - sys.platform == "win32", reason="Timezone database is not installed on Windows" -) -def test_ibis_with_vegafusion(monkeypatch: pytest.MonkeyPatch): - ibis.set_backend("polars") - df = pl.DataFrame( +@skip_requires_pyarrow +def test_interchange_with_vegafusion(monkeypatch: pytest.MonkeyPatch): + # Test that objects which Narwhals only supports at the interchange + # level don't get converted to PyArrow unnecessarily when plotted + # with the vegafusion transformer. + # TODO: this test can be drastically simplified when some level of + # DuckDB support in VegaFusion, as it can then just be `alt.Chart(rel_df)` + # without DuckDBWithInterchangeSupport. + df = pl.DataFrame( # noqa: F841 { "a": [1, 2, 3], "b": [datetime(2020, 1, 1), datetime(2020, 1, 2), datetime(2020, 1, 3)], } ) - tbl = ibis.memtable(df) + rel = duckdb.sql("select * from df") + + class DuckDBWithInterchangeSupport: + """ + DuckDB doesn't (yet?) support the interchange protocol. + + So, we create duckdb wrapper which defers to PyArrow's + implementation of the protocol. + """ + + def __init__(self, rel: duckdb.DuckDBPyRelation) -> None: + self._rel = rel + + def __dataframe__(self, allow_copy: bool = True) -> object: + return self._rel.to_arrow_table().__dataframe__() + + rel_df = DuckDBWithInterchangeSupport(rel) # "poison" `arrow_table_from_dfi_dataframe` to check that it does not get called # if we use the vegafusion transformer monkeypatch.setattr( "altair.utils.data.arrow_table_from_dfi_dataframe", lambda x: 1 / 0 ) - tbl = ibis.memtable(df) + + # Narwhals doesn't fully support our custom DuckDBWithInterchangeSupport, + # so we need to overwrite `to_native` + def to_native(df, strict): + if isinstance(df, nw.DataFrame): + return rel_df + return df + + monkeypatch.setattr("narwhals.stable.v1.to_native", to_native) + with alt.data_transformers.enable("vegafusion"): - result = alt.Chart(tbl).mark_line().encode(x="a", y="b").to_dict(format="vega") + result = ( + alt.Chart(rel_df).mark_line().encode(x="a", y="b").to_dict(format="vega") + ) assert next(iter(result["data"]))["values"] == [ {"a": 1, "b": "2020-01-01T00:00:00.000"}, {"a": 2, "b": "2020-01-02T00:00:00.000"},