Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: polars implementation of a row #733

Merged
merged 4 commits into from
May 6, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
feat: polars implementation of a row
lars-reimann committed May 6, 2024

Verified

This commit was signed with the committer’s verified signature.
lars-reimann Lars Reimann
commit 439c899c39544b6f343c7061e095dbda49484f18
110 changes: 107 additions & 3 deletions src/safeds/data/tabular/containers/_experimental_polars_row.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,111 @@
from __future__ import annotations

from abc import ABC
from abc import ABC, abstractmethod
from typing import Any
from collections.abc import Mapping, Iterator

from safeds.data.tabular.containers import ExperimentalPolarsCell
from safeds.data.tabular.typing import Schema, ColumnType

class ExperimentalPolarsRow(ABC): # noqa: B024
pass

class ExperimentalPolarsRow(ABC, Mapping[str, Any]):
"""A row is a one-dimensional collection of named, heterogeneous values."""

# ------------------------------------------------------------------------------------------------------------------
# Dunder methods
# ------------------------------------------------------------------------------------------------------------------

def __contains__(self, name: Any) -> bool:
return self.has_column(name)

@abstractmethod
def __eq__(self, other: object) -> bool:
...

def __getitem__(self, name: str) -> ExperimentalPolarsCell:
return self.get_value(name)

@abstractmethod
def __hash__(self) -> int:
...

def __iter__(self) -> Iterator[Any]:
return iter(self.column_names)

def __len__(self) -> int:
return self.number_of_columns

@abstractmethod
def __sizeof__(self) -> int:
...

# ------------------------------------------------------------------------------------------------------------------
# Properties
# ------------------------------------------------------------------------------------------------------------------

@property
@abstractmethod
def column_names(self) -> list[str]:
"""The names of the columns in the row."""

@property
@abstractmethod
def number_of_columns(self) -> int:
"""The number of columns in the row."""

@property
@abstractmethod
def schema(self) -> Schema: # TODO: rethink return type
"""The schema of the row."""

# ------------------------------------------------------------------------------------------------------------------
# Column operations
# ------------------------------------------------------------------------------------------------------------------

@abstractmethod
def get_value(self, name: str) -> ExperimentalPolarsCell:
"""
Get the value of the specified column.

Parameters
----------
name:
The name of the column.

Returns
-------
value:
The value of the column.
"""

@abstractmethod
def get_column_type(self, name: str) -> ColumnType: # TODO: rethink return type
"""
Get the type of the specified column.

Parameters
----------
name:
The name of the column.

Returns
-------
type:
The type of the column.
"""

@abstractmethod
def has_column(self, name: str) -> bool:
"""
Check if the row has a column with the specified name.

Parameters
----------
name:
The name of the column.

Returns
-------
has_column:
Whether the row has a column with the specified name.
"""
Original file line number Diff line number Diff line change
@@ -28,12 +28,13 @@ class ExperimentalPolarsTable:

To create a `Table` call the constructor or use one of the following static methods:

| Method | Description |
| ------------------------------------------------------------------------------------------------------------------ | -------------------------------------- |
| [from_csv_file][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_csv_file] | Create a table from a CSV file. |
| [from_json_file][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_json_file] | Create a table from a JSON file. |
| [from_dict][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_dict] | Create a table from a dictionary. |
| [from_columns][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_columns] | Create a table from a list of columns. |
| Method | Description |
| ------------------------------------------------------------------------------------------------------------------------ | -------------------------------------- |
| [from_csv_file][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_csv_file] | Create a table from a CSV file. |
| [from_json_file][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_json_file] | Create a table from a JSON file. |
| [from_parquet_file][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_parquet_file] | Create a table from a Parquet file. |
| [from_columns][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_columns] | Create a table from a list of columns. |
| [from_dict][safeds.data.tabular.containers._experimental_polars_table.ExperimentalPolarsTable.from_dict] | Create a table from a dictionary. |

Parameters
----------
@@ -212,7 +213,7 @@ def __str__(self) -> str:
@property
def column_names(self) -> list[str]:
"""
Names of the columns in the table.
The names of the columns in the table.

Examples
--------
@@ -354,6 +355,11 @@ def remove_duplicate_rows(self) -> ExperimentalPolarsTable:
"""
Remove duplicate rows from the table.

Returns
-------
filtered_table:
The table without duplicate rows.

Examples
--------
>>> from safeds.data.tabular.containers import ExperimentalPolarsTable
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from safeds.data.tabular.containers import ExperimentalPolarsCell, ExperimentalPolarsColumn


class _VectorizedCell(ExperimentalPolarsCell):
# ------------------------------------------------------------------------------------------------------------------
# Dunder methods
# ------------------------------------------------------------------------------------------------------------------

def __init__(self, column: ExperimentalPolarsColumn):
self._column: ExperimentalPolarsColumn = column
105 changes: 105 additions & 0 deletions src/safeds/data/tabular/containers/_experimental_vectorized_row.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
from __future__ import annotations

from safeds.data.tabular.containers import ExperimentalPolarsRow, ExperimentalPolarsTable
from safeds.data.tabular.containers._vectorized_cell import _VectorizedCell
from safeds.data.tabular.typing import Schema, ColumnType


class _VectorizedRow(ExperimentalPolarsRow):
"""
A row is a one-dimensional collection of named, heterogeneous values.

This implementation treats an entire table as a row, where each column is a "cell" in the row. This greatly speeds
up operations on the row.
"""

# ------------------------------------------------------------------------------------------------------------------
# Dunder methods
# ------------------------------------------------------------------------------------------------------------------

def __init__(self, table: ExperimentalPolarsTable):
self._table: ExperimentalPolarsTable = table

def __eq__(self, other: object) -> bool:
if not isinstance(other, _VectorizedRow):
return NotImplemented
if self is other:
return True
return self._table == other._table

def __hash__(self) -> int:
return self._table.__hash__()

def __sizeof__(self) -> int:
return self._table.__sizeof__()

# ------------------------------------------------------------------------------------------------------------------
# Properties
# ------------------------------------------------------------------------------------------------------------------

@property
def column_names(self) -> list[str]:
"""The names of the columns in the row."""
return self._table.column_names

@property
def number_of_columns(self) -> int:
"""The number of columns in the row."""
return self._table.number_of_columns

@property
def schema(self) -> Schema: # TODO: rethink return type
"""The schema of the row."""
return self._table.schema

# ------------------------------------------------------------------------------------------------------------------
# Column operations
# ------------------------------------------------------------------------------------------------------------------

def get_value(self, name: str) -> _VectorizedCell:
"""
Get the value of the specified column.

Parameters
----------
name:
The name of the column.

Returns
-------
value:
The value of the column.
"""
return _VectorizedCell(self._table.get_column(name))

def get_column_type(self, name: str) -> ColumnType: # TODO: rethink return type
"""
Get the type of the specified column.

Parameters
----------
name:
The name of the column.

Returns
-------
type:
The type of the column.
"""
return self._table.get_column_type(name)

def has_column(self, name: str) -> bool:
"""
Check if the row has a column with the specified name.

Parameters
----------
name:
The name of the column.

Returns
-------
has_column:
Whether the row has a column with the specified name.
"""
return self._table.has_column(name)
Loading