Skip to content

Commit

Permalink
fixed ML tests (#33234)
Browse files Browse the repository at this point in the history
* fixed ML tests

* try some new setups

* created py312-ml tox section
  • Loading branch information
liferoad authored Nov 27, 2024
1 parent 9560fe1 commit d723216
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 21 deletions.
21 changes: 10 additions & 11 deletions sdks/python/apache_beam/ml/transforms/base_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import shutil
import tempfile
import time
import typing
import unittest
from collections.abc import Sequence
from typing import Any
Expand Down Expand Up @@ -140,8 +139,8 @@ def test_ml_transform_on_list_dict(self):
'x': int, 'y': float
},
expected_dtype={
'x': typing.Sequence[np.float32],
'y': typing.Sequence[np.float32],
'x': Sequence[np.float32],
'y': Sequence[np.float32],
},
),
param(
Expand All @@ -153,8 +152,8 @@ def test_ml_transform_on_list_dict(self):
'x': np.int32, 'y': np.float32
},
expected_dtype={
'x': typing.Sequence[np.float32],
'y': typing.Sequence[np.float32],
'x': Sequence[np.float32],
'y': Sequence[np.float32],
},
),
param(
Expand All @@ -165,21 +164,21 @@ def test_ml_transform_on_list_dict(self):
'x': list[int], 'y': list[float]
},
expected_dtype={
'x': typing.Sequence[np.float32],
'y': typing.Sequence[np.float32],
'x': Sequence[np.float32],
'y': Sequence[np.float32],
},
),
param(
input_data=[{
'x': [1, 2, 3], 'y': [2.0, 3.0, 4.0]
}],
input_types={
'x': typing.Sequence[int],
'y': typing.Sequence[float],
'x': Sequence[int],
'y': Sequence[float],
},
expected_dtype={
'x': typing.Sequence[np.float32],
'y': typing.Sequence[np.float32],
'x': Sequence[np.float32],
'y': Sequence[np.float32],
},
),
])
Expand Down
8 changes: 4 additions & 4 deletions sdks/python/apache_beam/ml/transforms/handlers_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
import shutil
import sys
import tempfile
import typing
import unittest
import uuid
from collections.abc import Sequence
from typing import NamedTuple
from typing import Union

Expand Down Expand Up @@ -276,9 +276,9 @@ def test_tft_process_handler_transformed_data_schema(self):
schema_utils.schema_from_feature_spec(raw_data_feature_spec))

expected_transformed_data_schema = {
'x': typing.Sequence[np.float32],
'y': typing.Sequence[np.float32],
'z': typing.Sequence[bytes]
'x': Sequence[np.float32],
'y': Sequence[np.float32],
'z': Sequence[bytes]
}

actual_transformed_data_schema = (
Expand Down
20 changes: 15 additions & 5 deletions sdks/python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,12 +490,9 @@ def get_portability_package_data():
'sentence-transformers',
'skl2onnx',
'pillow',
# Support TF 2.16.0: https://github.com/apache/beam/issues/31294
# Once TF version is unpinned, also don't restrict Python version.
'tensorflow<2.16.0;python_version<"3.12"',
'tensorflow',
'tensorflow-hub',
# https://github.com/tensorflow/transform/issues/313
'tensorflow-transform;python_version<"3.11"',
'tensorflow-transform',
'tf2onnx',
'torch',
'transformers',
Expand All @@ -504,6 +501,19 @@ def get_portability_package_data():
# https://github.com/apache/beam/issues/31285
# 'xgboost<2.0', # https://github.com/apache/beam/issues/31252
],
'p312_ml_test': [
'datatable',
'embeddings',
'onnxruntime',
'sentence-transformers',
'skl2onnx',
'pillow',
'tensorflow',
'tensorflow-hub',
'tf2onnx',
'torch',
'transformers',
],
'aws': ['boto3>=1.9,<2'],
'azure': [
'azure-storage-blob>=12.3.2,<13',
Expand Down
14 changes: 13 additions & 1 deletion sdks/python/tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -101,11 +101,23 @@ commands =
python apache_beam/examples/complete/autocomplete_test.py
bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}"

[testenv:py{39,310,311,312}-ml]
[testenv:py{39,310,311}-ml]
# Don't set TMPDIR to avoid "AF_UNIX path too long" errors in certain tests.
setenv =
extras = test,gcp,dataframe,ml_test
commands =
# Log tensorflow version for debugging
/bin/sh -c "pip freeze | grep -E tensorflow"
bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}"

[testenv:py312-ml]
# many packages do not support py3.12
# Don't set TMPDIR to avoid "AF_UNIX path too long" errors in certain tests.
setenv =
extras = test,gcp,dataframe,p312_ml_test
commands =
# Log tensorflow version for debugging
/bin/sh -c "pip freeze | grep -E tensorflow"
bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}"

[testenv:py{39,310,311,312}-dask]
Expand Down

0 comments on commit d723216

Please sign in to comment.