diff --git a/src/safeds/data/tabular/containers/_experimental_lazy_column.py b/src/safeds/data/tabular/containers/_experimental_lazy_column.py new file mode 100644 index 000000000..473ecfe64 --- /dev/null +++ b/src/safeds/data/tabular/containers/_experimental_lazy_column.py @@ -0,0 +1,10 @@ +# TODO: polars expressions get optimized first, before being applied. For further performance improvements (if needed), +# we should mirror this when transitioning from a vectorized row to a cell. + +from abc import ABC + +from safeds.data.tabular.containers import ExperimentalPolarsCell + + +class _LazyColumn(ExperimentalPolarsCell, ABC): + pass diff --git a/src/safeds/data/tabular/containers/_experimental_polars_cell.py b/src/safeds/data/tabular/containers/_experimental_polars_cell.py index 4b67734a3..48a5368a7 100644 --- a/src/safeds/data/tabular/containers/_experimental_polars_cell.py +++ b/src/safeds/data/tabular/containers/_experimental_polars_cell.py @@ -1,10 +1,122 @@ from __future__ import annotations -from abc import ABC -from typing import Generic, TypeVar +from abc import ABC, abstractmethod +from typing import Any, Generic, TypeVar T = TypeVar("T") +P = TypeVar("P") +R = TypeVar("R") class ExperimentalPolarsCell(ABC, Generic[T]): - pass + """A cell is a single value in a table.""" + + # ------------------------------------------------------------------------------------------------------------------ + # Dunder methods + # ------------------------------------------------------------------------------------------------------------------ + + # "Boolean" operators (actually bitwise) ----------------------------------- + + @abstractmethod + def __invert__(self) -> ExperimentalPolarsCell[bool]: ... + + @abstractmethod + def __and__(self, other: bool | ExperimentalPolarsCell[bool]) -> ExperimentalPolarsCell[bool]: ... + + @abstractmethod + def __rand__(self, other: bool | ExperimentalPolarsCell[bool]) -> ExperimentalPolarsCell[bool]: ... + + @abstractmethod + def __or__(self, other: bool | ExperimentalPolarsCell[bool]) -> ExperimentalPolarsCell[bool]: ... + + @abstractmethod + def __ror__(self, other: bool | ExperimentalPolarsCell[bool]) -> ExperimentalPolarsCell[bool]: ... + + @abstractmethod + def __xor__(self, other: bool | ExperimentalPolarsCell[bool]) -> ExperimentalPolarsCell[bool]: ... + + @abstractmethod + def __rxor__(self, other: bool | ExperimentalPolarsCell[bool]) -> ExperimentalPolarsCell[bool]: ... + + # Comparison --------------------------------------------------------------- + + @abstractmethod + def __eq__(self, other: object) -> ExperimentalPolarsCell[bool]: # type: ignore[override] + ... + + @abstractmethod + def __ge__(self, other: Any) -> ExperimentalPolarsCell[bool]: ... + + @abstractmethod + def __gt__(self, other: Any) -> ExperimentalPolarsCell[bool]: ... + + @abstractmethod + def __le__(self, other: Any) -> ExperimentalPolarsCell[bool]: ... + + @abstractmethod + def __lt__(self, other: Any) -> ExperimentalPolarsCell[bool]: ... + + @abstractmethod + def __ne__(self, other: object) -> ExperimentalPolarsCell[bool]: # type: ignore[override] + ... + + # Numeric operators -------------------------------------------------------- + + @abstractmethod + def __abs__(self) -> ExperimentalPolarsCell[R]: ... + + @abstractmethod + def __neg__(self) -> ExperimentalPolarsCell[R]: ... + + @abstractmethod + def __pos__(self) -> ExperimentalPolarsCell[R]: ... + + @abstractmethod + def __add__(self, other: Any) -> ExperimentalPolarsCell[R]: ... + + @abstractmethod + def __radd__(self, other: Any) -> ExperimentalPolarsCell[R]: ... + + @abstractmethod + def __floordiv__(self, other: Any) -> ExperimentalPolarsCell[R]: ... + + @abstractmethod + def __rfloordiv__(self, other: Any) -> ExperimentalPolarsCell[R]: ... + + @abstractmethod + def __mod__(self, other: Any) -> ExperimentalPolarsCell[R]: ... + + @abstractmethod + def __rmod__(self, other: Any) -> ExperimentalPolarsCell[R]: ... + + @abstractmethod + def __mul__(self, other: Any) -> ExperimentalPolarsCell[R]: ... + + @abstractmethod + def __rmul__(self, other: Any) -> ExperimentalPolarsCell[R]: ... + + @abstractmethod + def __pow__(self, other: float | ExperimentalPolarsCell[P]) -> ExperimentalPolarsCell[R]: ... + + @abstractmethod + def __rpow__(self, other: float | ExperimentalPolarsCell[P]) -> ExperimentalPolarsCell[R]: ... + + @abstractmethod + def __sub__(self, other: Any) -> ExperimentalPolarsCell[R]: ... + + @abstractmethod + def __rsub__(self, other: Any) -> ExperimentalPolarsCell[R]: ... + + @abstractmethod + def __truediv__(self, other: Any) -> ExperimentalPolarsCell[R]: ... + + @abstractmethod + def __rtruediv__(self, other: Any) -> ExperimentalPolarsCell[R]: ... + + # Other -------------------------------------------------------------------- + + @abstractmethod + def __hash__(self) -> int: ... + + @abstractmethod + def __sizeof__(self) -> int: ... diff --git a/src/safeds/data/tabular/containers/_experimental_polars_column.py b/src/safeds/data/tabular/containers/_experimental_polars_column.py index db34d863b..163464349 100644 --- a/src/safeds/data/tabular/containers/_experimental_polars_column.py +++ b/src/safeds/data/tabular/containers/_experimental_polars_column.py @@ -1,12 +1,259 @@ from __future__ import annotations -from abc import ABC from collections.abc import Sequence -from typing import TypeVar +from typing import TYPE_CHECKING, Any, TypeVar, overload + +from ._experimental_polars_cell import ExperimentalPolarsCell + +if TYPE_CHECKING: + from polars import Series T = TypeVar("T") +P = TypeVar("P") +R = TypeVar("R") + + +class ExperimentalPolarsColumn(ExperimentalPolarsCell[T], Sequence[T]): + """ + A column is a named, one-dimensional collection of homogeneous values. + + Parameters + ---------- + name: + The name of the column. + data: + The data. If None, an empty column is created. + + Examples + -------- + >>> from safeds.data.tabular.containers import Column + >>> column = Column("test", [1, 2, 3]) + """ + + # ------------------------------------------------------------------------------------------------------------------ + # Import + # ------------------------------------------------------------------------------------------------------------------ + + @staticmethod + def _from_polars_series(data: Series) -> ExperimentalPolarsColumn: + result = object.__new__(ExperimentalPolarsColumn) + result._series = data + return result + + # ------------------------------------------------------------------------------------------------------------------ + # Dunder methods + # ------------------------------------------------------------------------------------------------------------------ + + def __init__(self, name: str, data: Sequence[T] | None = None) -> None: + import polars as pl + + if data is None: + data = [] + + self._series: pl.Series = pl.Series(name, data) + + # "Boolean" operators (actually bitwise) ----------------------------------- + + def __invert__(self) -> ExperimentalPolarsCell[bool]: + import polars as pl + + if self._series.dtype != pl.Boolean: + return NotImplemented + + return _wrap(self._series.__invert__()) + + def __and__(self, other: bool | ExperimentalPolarsCell[bool]) -> ExperimentalPolarsCell[bool]: + right_operand = _normalize_boolean_operation_operands(self, other) + if right_operand is None: + return NotImplemented + + return _wrap(self._series.__and__(right_operand)) + + def __rand__(self, other: bool | ExperimentalPolarsCell[bool]) -> ExperimentalPolarsCell[bool]: + right_operand = _normalize_boolean_operation_operands(self, other) + if right_operand is None: + return NotImplemented + + return _wrap(self._series.__rand__(right_operand)) + + def __or__(self, other: bool | ExperimentalPolarsCell[bool]) -> ExperimentalPolarsCell[bool]: + right_operand = _normalize_boolean_operation_operands(self, other) + if right_operand is None: + return NotImplemented + + return _wrap(self._series.__or__(right_operand)) + + def __ror__(self, other: bool | ExperimentalPolarsCell[bool]) -> ExperimentalPolarsCell[bool]: + right_operand = _normalize_boolean_operation_operands(self, other) + if right_operand is None: + return NotImplemented + + return _wrap(self._series.__ror__(right_operand)) + + def __xor__(self, other: bool | ExperimentalPolarsCell[bool]) -> ExperimentalPolarsCell[bool]: + right_operand = _normalize_boolean_operation_operands(self, other) + if right_operand is None: + return NotImplemented + + return _wrap(self._series.__xor__(right_operand)) + + def __rxor__(self, other: bool | ExperimentalPolarsCell[bool]) -> ExperimentalPolarsCell[bool]: + right_operand = _normalize_boolean_operation_operands(self, other) + if right_operand is None: + return NotImplemented + + return _wrap(self._series.__rxor__(right_operand)) + + # Collection --------------------------------------------------------------- + + @overload + def __getitem__(self, index: int) -> T: ... + + @overload + def __getitem__(self, index: slice) -> ExperimentalPolarsColumn[T]: ... + + def __getitem__(self, index: int | slice) -> T | ExperimentalPolarsColumn[T]: + return self._series.__getitem__(index) + + def __len__(self) -> int: + return self._series.__len__() + + # Comparison --------------------------------------------------------------- + + def __eq__(self, other: object) -> ExperimentalPolarsCell[bool]: # type: ignore[override] + other = _unwrap(other) + return _wrap(self._series.__eq__(other)) + + def __ge__(self, other: Any) -> ExperimentalPolarsCell[bool]: + other = _unwrap(other) + return _wrap(self._series.__ge__(other)) + + def __gt__(self, other: Any) -> ExperimentalPolarsCell[bool]: + other = _unwrap(other) + return _wrap(self._series.__gt__(other)) + + def __le__(self, other: Any) -> ExperimentalPolarsCell[bool]: + other = _unwrap(other) + return _wrap(self._series.__le__(other)) + + def __lt__(self, other: Any) -> ExperimentalPolarsCell[bool]: + other = _unwrap(other) + return _wrap(self._series.__lt__(other)) + + def __ne__(self, other: object) -> ExperimentalPolarsCell[bool]: # type: ignore[override] + other = _unwrap(other) + return _wrap(self._series.__ne__(other)) + + # Numeric operators -------------------------------------------------------- + + def __abs__(self) -> ExperimentalPolarsCell[R]: + return _wrap(self._series.__abs__()) + + def __neg__(self) -> ExperimentalPolarsCell[R]: + return _wrap(self._series.__neg__()) + + def __pos__(self) -> ExperimentalPolarsCell[R]: + return _wrap(self._series.__pos__()) + + def __add__(self, other: Any) -> ExperimentalPolarsCell[R]: + other = _unwrap(other) + return _wrap(self._series.__add__(other)) + + def __radd__(self, other: Any) -> ExperimentalPolarsCell[R]: + other = _unwrap(other) + return _wrap(self._series.__radd__(other)) + + def __floordiv__(self, other: Any) -> ExperimentalPolarsCell[R]: + other = _unwrap(other) + return _wrap(self._series.__floordiv__(other)) + + def __rfloordiv__(self, other: Any) -> ExperimentalPolarsCell[R]: + other = _unwrap(other) + return _wrap(self._series.__rfloordiv__(other)) + + def __mod__(self, other: Any) -> ExperimentalPolarsCell[R]: + other = _unwrap(other) + return _wrap(self._series.__mod__(other)) + + def __rmod__(self, other: Any) -> ExperimentalPolarsCell[R]: + other = _unwrap(other) + return _wrap(self._series.__rmod__(other)) + + def __mul__(self, other: Any) -> ExperimentalPolarsCell[R]: + other = _unwrap(other) + return _wrap(self._series.__mul__(other)) + + def __rmul__(self, other: Any) -> ExperimentalPolarsCell[R]: + other = _unwrap(other) + return _wrap(self._series.__rmul__(other)) + + def __pow__(self, other: float | ExperimentalPolarsCell[P]) -> ExperimentalPolarsCell[R]: + other = _unwrap(other) + return _wrap(self._series.__pow__(other)) + + def __rpow__(self, other: float | ExperimentalPolarsCell[P]) -> ExperimentalPolarsCell[R]: + other = _unwrap(other) + return _wrap(self._series.__rpow__(other)) + + def __sub__(self, other: Any) -> ExperimentalPolarsCell[R]: + other = _unwrap(other) + return _wrap(self._series.__sub__(other)) + + def __rsub__(self, other: Any) -> ExperimentalPolarsCell[R]: + other = _unwrap(other) + return _wrap(self._series.__rsub__(other)) + + def __truediv__(self, other: Any) -> ExperimentalPolarsCell[R]: + other = _unwrap(other) + return _wrap(self._series.__truediv__(other)) + + def __rtruediv__(self, other: Any) -> ExperimentalPolarsCell[R]: + other = _unwrap(other) + return _wrap(self._series.__rtruediv__(other)) + + # String representation ---------------------------------------------------- + + def __repr__(self) -> str: + return self._series.__repr__() + + def __str__(self) -> str: + return self._series.__str__() + + # Other -------------------------------------------------------------------- + + def __hash__(self) -> int: + raise NotImplementedError + + def __sizeof__(self) -> int: + raise NotImplementedError + + +def _normalize_boolean_operation_operands( + left_operand: ExperimentalPolarsColumn, + right_operand: bool | ExperimentalPolarsCell[bool], +) -> Series | None: + """ + Normalize the operands of a boolean operation (not, and, or, xor). + + If one of the operands is invalid, None is returned. Otherwise, the normalized right operand is returned. + """ + import polars as pl + + if left_operand._series.dtype != pl.Boolean: + return None + elif isinstance(right_operand, bool): + return pl.Series("", [right_operand]) + elif not isinstance(right_operand, ExperimentalPolarsColumn) or right_operand._series.dtype != pl.Boolean: + return None + else: + return right_operand._series + + +def _wrap(other: Series) -> Any: + return ExperimentalPolarsColumn._from_polars_series(other) -# TODO: should not be abstract -class ExperimentalPolarsColumn(ABC, Sequence[T]): - pass +def _unwrap(other: Any) -> Any: + if isinstance(other, ExperimentalPolarsColumn): + return other._series + return other diff --git a/src/safeds/data/tabular/containers/_experimental_vectorized_cell.py b/src/safeds/data/tabular/containers/_experimental_vectorized_cell.py deleted file mode 100644 index 0a9708d25..000000000 --- a/src/safeds/data/tabular/containers/_experimental_vectorized_cell.py +++ /dev/null @@ -1,11 +0,0 @@ -from ._experimental_polars_cell import ExperimentalPolarsCell -from ._experimental_polars_column import ExperimentalPolarsColumn - - -class _VectorizedCell(ExperimentalPolarsCell): - # ------------------------------------------------------------------------------------------------------------------ - # Dunder methods - # ------------------------------------------------------------------------------------------------------------------ - - def __init__(self, column: ExperimentalPolarsColumn): - self._column: ExperimentalPolarsColumn = column diff --git a/src/safeds/data/tabular/containers/_experimental_vectorized_row.py b/src/safeds/data/tabular/containers/_experimental_vectorized_row.py index 001f7fa71..71191b447 100644 --- a/src/safeds/data/tabular/containers/_experimental_vectorized_row.py +++ b/src/safeds/data/tabular/containers/_experimental_vectorized_row.py @@ -3,11 +3,11 @@ from typing import TYPE_CHECKING from ._experimental_polars_row import ExperimentalPolarsRow -from ._experimental_vectorized_cell import _VectorizedCell if TYPE_CHECKING: from safeds.data.tabular.typing import ColumnType, Schema + from ._experimental_polars_column import ExperimentalPolarsColumn from ._experimental_polars_table import ExperimentalPolarsTable @@ -62,7 +62,7 @@ def schema(self) -> Schema: # TODO: rethink return type # Column operations # ------------------------------------------------------------------------------------------------------------------ - def get_value(self, name: str) -> _VectorizedCell: + def get_value(self, name: str) -> ExperimentalPolarsColumn: """ Get the value of the specified column. @@ -76,7 +76,7 @@ def get_value(self, name: str) -> _VectorizedCell: value: The value of the column. """ - return _VectorizedCell(self._table.get_column(name)) + return self._table.get_column(name) def get_column_type(self, name: str) -> ColumnType: # TODO: rethink return type """