Skip to content

Commit

Permalink
[Al-1669] Reduce test duration (#1443)
Browse files Browse the repository at this point in the history
* reduce test_api size

* reduce api+compression tests

* more tests updated

* reduce transform tests

* fix

* reduce tiling test sizes

* further reduce test sizes

* fix test

* reduce more tests

* correct array sizes

* increase codecov
  • Loading branch information
AbhinavTuli authored Jan 27, 2022
1 parent 07ded2c commit 25f1fce
Show file tree
Hide file tree
Showing 10 changed files with 141 additions and 239 deletions.
118 changes: 51 additions & 67 deletions hub/api/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from hub.tests.common import assert_array_lists_equal
from hub.util.exceptions import (
TensorDtypeMismatchError,
TensorDoesNotExistError,
TensorAlreadyExistsError,
TensorGroupAlreadyExistsError,
TensorInvalidSampleShapeError,
Expand All @@ -18,11 +17,6 @@
from hub.constants import MB

from click.testing import CliRunner
from hub.tests.dataset_fixtures import (
enabled_datasets,
enabled_persistent_dataset_generators,
enabled_non_gcs_datasets,
)


# need this for 32-bit and 64-bit systems to have correct tests
Expand Down Expand Up @@ -55,16 +49,15 @@ def test_persist(ds_generator):
assert ds_new.meta.version == hub.__version__


@enabled_persistent_dataset_generators
def test_persist_with(ds_generator):
with ds_generator() as ds:
def test_persist_with(local_ds_generator):
with local_ds_generator() as ds:
ds.create_tensor("image")
ds.image.extend(np.ones((4, 224, 224, 3)))

ds_new = ds_generator()
ds_new = local_ds_generator()
assert len(ds_new) == 0 # shouldn't be flushed yet

ds_new = ds_generator()
ds_new = local_ds_generator()
assert len(ds_new) == 4

engine = ds_new.image.chunk_engine
Expand All @@ -78,47 +71,45 @@ def test_persist_with(ds_generator):
assert ds_new.meta.version == hub.__version__


@enabled_persistent_dataset_generators
def test_persist_clear_cache(ds_generator):
ds = ds_generator()
def test_persist_clear_cache(local_ds_generator):
ds = local_ds_generator()
ds.create_tensor("image")
ds.image.extend(np.ones((4, 224, 224, 3)))
ds.clear_cache()
ds_new = ds_generator()
ds_new = local_ds_generator()
assert len(ds_new) == 4

assert ds_new.image.shape == (4, 224, 224, 3)

np.testing.assert_array_equal(ds_new.image.numpy(), np.ones((4, 224, 224, 3)))


@enabled_datasets
def test_populate_dataset(ds):
assert ds.meta.tensors == []
ds.create_tensor("image")
assert len(ds) == 0
assert len(ds.image) == 0
def test_populate_dataset(local_ds):
assert local_ds.meta.tensors == []
local_ds.create_tensor("image")
assert len(local_ds) == 0
assert len(local_ds.image) == 0

ds.image.extend(np.ones((4, 28, 28)))
assert len(ds) == 4
assert len(ds.image) == 4
local_ds.image.extend(np.ones((4, 28, 28)))
assert len(local_ds) == 4
assert len(local_ds.image) == 4

for _ in range(10):
ds.image.append(np.ones((28, 28)))
assert len(ds.image) == 14
local_ds.image.append(np.ones((28, 28)))
assert len(local_ds.image) == 14

ds.image.extend([np.ones((28, 28)), np.ones((28, 28))])
assert len(ds.image) == 16
local_ds.image.extend([np.ones((28, 28)), np.ones((28, 28))])
assert len(local_ds.image) == 16

assert ds.meta.tensors == [
assert local_ds.meta.tensors == [
"image",
]
assert ds.meta.version == hub.__version__
assert local_ds.meta.version == hub.__version__


def test_larger_data_memory(memory_ds):
memory_ds.create_tensor("image")
x = np.ones((4, 4096, 4096))
memory_ds.create_tensor("image", max_chunk_size=2 * MB)
x = np.ones((4, 1024, 1024))
memory_ds.image.extend(x)
assert len(memory_ds) == 4
assert memory_ds.image.shape == x.shape
Expand All @@ -133,7 +124,7 @@ def test_larger_data_memory(memory_ds):
slice(2, None),
(0, slice(5, None), slice(None, 714)),
(2, 100, 1007),
(slice(1, 3), [20, 1000, 2, 400], [-2, 3, 577, 4095]),
(slice(1, 3), [20, 1000, 2, 400], [-2, 3, 577, 1023]),
]
for idx in idxs:
np.testing.assert_array_equal(memory_ds.image[idx].numpy(), x[idx])
Expand Down Expand Up @@ -161,23 +152,21 @@ def test_stringify_with_path(local_ds):
assert str(ds) == f"Dataset(path='{local_ds.path}', tensors=[])"


@enabled_non_gcs_datasets
def test_compute_fixed_tensor(ds):
ds.create_tensor("image")
ds.image.extend(np.ones((32, 28, 28)))
assert len(ds) == 32
np.testing.assert_array_equal(ds.image.numpy(), np.ones((32, 28, 28)))
def test_fixed_tensor(local_ds):
local_ds.create_tensor("image")
local_ds.image.extend(np.ones((32, 28, 28)))
assert len(local_ds) == 32
np.testing.assert_array_equal(local_ds.image.numpy(), np.ones((32, 28, 28)))


@enabled_non_gcs_datasets
def test_compute_dynamic_tensor(ds):
ds.create_tensor("image")
def test_dynamic_tensor(local_ds):
local_ds.create_tensor("image")

a1 = np.ones((32, 28, 28))
a2 = np.ones((10, 36, 11))
a3 = np.ones((29, 10))

image = ds.image
image = local_ds.image

image.extend(a1)
image.extend(a2)
Expand All @@ -201,9 +190,8 @@ def test_compute_dynamic_tensor(ds):
assert image.is_dynamic


@enabled_datasets
def test_empty_samples(ds: Dataset):
tensor = ds.create_tensor("with_empty")
def test_empty_samples(local_ds: Dataset):
tensor = local_ds.create_tensor("with_empty")

a1 = np.arange(25 * 4 * 2).reshape(25, 4, 2)
a2 = np.arange(5 * 10 * 50 * 2).reshape(5, 10, 50, 2)
Expand All @@ -227,14 +215,13 @@ def test_empty_samples(ds: Dataset):
assert_array_lists_equal(actual_list, expected_list)

# test indexing individual empty samples with numpy while looping, this may seem redundant but this was failing before
for actual_sample, expected in zip(ds, expected_list):
for actual_sample, expected in zip(local_ds, expected_list):
actual = actual_sample.with_empty.numpy()
np.testing.assert_array_equal(actual, expected)


@enabled_non_gcs_datasets
def test_safe_downcasting(ds: Dataset):
int_tensor = ds.create_tensor("int", dtype="uint8")
def test_safe_downcasting(local_ds):
int_tensor = local_ds.create_tensor("int", dtype="uint8")
int_tensor.append(0)
int_tensor.append(1)
int_tensor.extend([2, 3, 4])
Expand All @@ -247,7 +234,7 @@ def test_safe_downcasting(ds: Dataset):
with pytest.raises(TensorDtypeMismatchError):
int_tensor.append(np.array([1.0]))

float_tensor = ds.create_tensor("float", dtype="float32")
float_tensor = local_ds.create_tensor("float", dtype="float32")
float_tensor.append(0)
float_tensor.append(1)
float_tensor.extend([2, 3.0, 4.0])
Expand All @@ -259,9 +246,8 @@ def test_safe_downcasting(ds: Dataset):
assert len(float_tensor) == 10


@enabled_datasets
def test_scalar_samples(ds: Dataset):
tensor = ds.create_tensor("scalars")
def test_scalar_samples(local_ds):
tensor = local_ds.create_tensor("scalars")

assert tensor.meta.dtype is None

Expand Down Expand Up @@ -335,13 +321,12 @@ def test_scalar_samples(ds: Dataset):
assert len(tensor) == 22


@enabled_datasets
def test_sequence_samples(ds: Dataset):
tensor = ds.create_tensor("arrays")
def test_sequence_samples(local_ds):
tensor = local_ds.create_tensor("arrays")

tensor.append([1, 2, 3])
tensor.extend([[4, 5, 6]])
ds.clear_cache()
local_ds.clear_cache()

assert len(tensor) == 2
expected_list = [[1, 2, 3], [4, 5, 6]]
Expand All @@ -352,16 +337,15 @@ def test_sequence_samples(ds: Dataset):
assert_array_lists_equal(tensor.numpy(aslist=True), expected_list)


@enabled_datasets
def test_iterate_dataset(ds):
def test_iterate_dataset(local_ds):
labels = [1, 9, 7, 4]
ds.create_tensor("image")
ds.create_tensor("label")
local_ds.create_tensor("image")
local_ds.create_tensor("label")

ds.image.extend(np.ones((4, 28, 28)))
ds.label.extend(np.asarray(labels).reshape((4, 1)))
local_ds.image.extend(np.ones((4, 28, 28)))
local_ds.label.extend(np.asarray(labels).reshape((4, 1)))

for idx, sub_ds in enumerate(ds):
for idx, sub_ds in enumerate(local_ds):
img = sub_ds.image.numpy()
label = sub_ds.label.numpy()
np.testing.assert_array_equal(img, np.ones((28, 28)))
Expand Down Expand Up @@ -883,11 +867,11 @@ def test_ds_append(memory_ds, x_args, y_args, x_size):
@pytest.mark.parametrize(
"dest_args", [{}, {"sample_compression": "png"}, {"chunk_compression": "png"}]
)
@pytest.mark.parametrize("size", [(30, 40, 3), (5041, 3037, 3)])
@pytest.mark.parametrize("size", [(30, 40, 3), (1261, 759, 3)])
def test_append_with_tensor(src_args, dest_args, size):
ds1 = hub.dataset("mem://ds1")
ds2 = hub.dataset("mem://ds2")
ds1.create_tensor("x", **src_args)
ds1.create_tensor("x", **src_args, max_chunk_size=2 * MB)
x = np.random.randint(0, 256, size, dtype=np.uint8)
ds1.x.append(x)
ds2.create_tensor("y", **dest_args)
Expand Down
8 changes: 4 additions & 4 deletions hub/api/tests/test_api_tiling.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@

def test_simple(memory_ds):
with memory_ds:
memory_ds.create_tensor("abc")
memory_ds.abc.extend(np.ones((3, 1003, 2001, 5)))
np.testing.assert_array_equal(memory_ds.abc.numpy(), np.ones((3, 1003, 2001, 5)))
memory_ds.create_tensor("abc", max_chunk_size=2 * MB)
memory_ds.abc.extend(np.ones((3, 253, 501, 5)))
np.testing.assert_array_equal(memory_ds.abc.numpy(), np.ones((3, 253, 501, 5)))
memory_ds.commit()
np.testing.assert_array_equal(memory_ds.abc.numpy(), np.ones((3, 1003, 2001, 5)))
np.testing.assert_array_equal(memory_ds.abc.numpy(), np.ones((3, 253, 501, 5)))


@compressions_paremetrized
Expand Down
28 changes: 12 additions & 16 deletions hub/api/tests/test_api_with_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,8 @@ def _populate_compressed_samples(tensor: Tensor, cat_path, flower_path, count=1)
)


@enabled_datasets
def test_populate_compressed_samples(ds: Dataset, cat_path, flower_path):
images = ds.create_tensor(
def test_populate_compressed_samples(local_ds, cat_path, flower_path):
images = local_ds.create_tensor(
TENSOR_KEY, htype="image", sample_compression="png", max_chunk_size=2 * MB
)

Expand Down Expand Up @@ -65,9 +64,8 @@ def test_populate_compressed_samples(ds: Dataset, cat_path, flower_path):
assert images.shape_interval.upper == (6, 900, 900, 4)


@enabled_datasets
def test_iterate_compressed_samples(ds: Dataset, cat_path, flower_path):
images = ds.create_tensor(TENSOR_KEY, htype="image", sample_compression="png")
def test_iterate_compressed_samples(local_ds, cat_path, flower_path):
images = local_ds.create_tensor(TENSOR_KEY, htype="image", sample_compression="png")

assert images.meta.dtype == "uint8"
assert images.meta.sample_compression == "png"
Expand All @@ -94,13 +92,12 @@ def test_iterate_compressed_samples(ds: Dataset, cat_path, flower_path):
assert x.dtype == "uint8"


@enabled_datasets
def test_uncompressed(ds: Dataset):
images = ds.create_tensor(TENSOR_KEY, sample_compression=None)
def test_uncompressed(local_ds):
images = local_ds.create_tensor(TENSOR_KEY, sample_compression=None)

images.append(np.ones((100, 100, 100)))
images.extend(np.ones((3, 101, 2, 1)))
ds.clear_cache()
local_ds.clear_cache()
np.testing.assert_array_equal(images[0].numpy(), np.ones((100, 100, 100)))
np.testing.assert_array_equal(images[1:4].numpy(), np.ones((3, 101, 2, 1)))

Expand Down Expand Up @@ -223,9 +220,8 @@ def test_chunkwise_compression(memory_ds, cat_path, flower_path):
np.testing.assert_array_equal(data[i], ds.labels[20 + i].numpy())


@enabled_datasets
@pytest.mark.parametrize("compression", hub.compression.AUDIO_COMPRESSIONS)
def test_audio(ds: Dataset, compression, audio_paths):
def test_audio(local_ds, compression, audio_paths):
path = audio_paths[compression]
if path.endswith(".mp3"):
audio = mp3_read_file_f32(path)
Expand All @@ -236,9 +232,9 @@ def test_audio(ds: Dataset, compression, audio_paths):
arr = np.frombuffer(audio.samples, dtype=np.float32).reshape(
audio.num_frames, audio.nchannels
)
ds.create_tensor("audio", htype="audio", sample_compression=compression)
with ds:
local_ds.create_tensor("audio", htype="audio", sample_compression=compression)
with local_ds:
for _ in range(10):
ds.audio.append(hub.read(path)) # type: ignore
local_ds.audio.append(hub.read(path)) # type: ignore
for i in range(10):
np.testing.assert_array_equal(ds.audio[i].numpy(), arr) # type: ignore
np.testing.assert_array_equal(local_ds.audio[i].numpy(), arr) # type: ignore
3 changes: 1 addition & 2 deletions hub/api/tests/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,7 @@ def upload(stuff, ds):


@enabled_non_gcs_datasets
@pytest.mark.parametrize("compression", ["lz4", None])
def test_list_transform(ds, compression, scheduler="threaded"):
def test_list_transform(ds, scheduler="threaded"):
ds.create_tensor("list", htype="list")

items = [
Expand Down
7 changes: 3 additions & 4 deletions hub/api/tests/test_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,10 @@
_USE_CFFI = True


@enabled_datasets
@pytest.mark.parametrize("compression", hub.compression.VIDEO_COMPRESSIONS)
def test_video(ds: Dataset, compression, video_paths):
def test_video(local_ds, compression, video_paths):
for i, path in enumerate(video_paths[compression]):
tensor = ds.create_tensor(
tensor = local_ds.create_tensor(
f"video_{i}", htype="video", sample_compression=compression
)
sample = hub.read(path)
Expand All @@ -32,7 +31,7 @@ def test_video(ds: Dataset, compression, video_paths):
elif compression == "avi":
assert sample.shape == (900, 270, 480, 3)
assert sample.shape[-1] == 3
with ds:
with local_ds:
for _ in range(5):
tensor.append(hub.read(path)) # type: ignore
tensor.extend([hub.read(path) for _ in range(5)]) # type: ignore
Expand Down
Loading

0 comments on commit 25f1fce

Please sign in to comment.