From 25f1fced3e3fdbbd16222fd363869a1b26e109eb Mon Sep 17 00:00:00 2001 From: Abhinav Tuli <42538472+AbhinavTuli@users.noreply.github.com> Date: Thu, 27 Jan 2022 12:31:14 +0530 Subject: [PATCH] [Al-1669] Reduce test duration (#1443) * reduce test_api size * reduce api+compression tests * more tests updated * reduce transform tests * fix * reduce tiling test sizes * further reduce test sizes * fix test * reduce more tests * correct array sizes * increase codecov --- hub/api/tests/test_api.py | 118 +++++++++------------ hub/api/tests/test_api_tiling.py | 8 +- hub/api/tests/test_api_with_compression.py | 28 +++-- hub/api/tests/test_json.py | 3 +- hub/api/tests/test_video.py | 7 +- hub/core/chunk/test_chunk_compressed.py | 18 ++-- hub/core/chunk/test_sample_compressed.py | 65 ++++++------ hub/core/chunk/test_uncompressed.py | 20 ++-- hub/core/tiling/test_optimizer.py | 2 +- hub/core/transform/test_transform.py | 111 +++---------------- 10 files changed, 141 insertions(+), 239 deletions(-) diff --git a/hub/api/tests/test_api.py b/hub/api/tests/test_api.py index c523007003..437826d629 100644 --- a/hub/api/tests/test_api.py +++ b/hub/api/tests/test_api.py @@ -7,7 +7,6 @@ from hub.tests.common import assert_array_lists_equal from hub.util.exceptions import ( TensorDtypeMismatchError, - TensorDoesNotExistError, TensorAlreadyExistsError, TensorGroupAlreadyExistsError, TensorInvalidSampleShapeError, @@ -18,11 +17,6 @@ from hub.constants import MB from click.testing import CliRunner -from hub.tests.dataset_fixtures import ( - enabled_datasets, - enabled_persistent_dataset_generators, - enabled_non_gcs_datasets, -) # need this for 32-bit and 64-bit systems to have correct tests @@ -55,16 +49,15 @@ def test_persist(ds_generator): assert ds_new.meta.version == hub.__version__ -@enabled_persistent_dataset_generators -def test_persist_with(ds_generator): - with ds_generator() as ds: +def test_persist_with(local_ds_generator): + with local_ds_generator() as ds: ds.create_tensor("image") ds.image.extend(np.ones((4, 224, 224, 3))) - ds_new = ds_generator() + ds_new = local_ds_generator() assert len(ds_new) == 0 # shouldn't be flushed yet - ds_new = ds_generator() + ds_new = local_ds_generator() assert len(ds_new) == 4 engine = ds_new.image.chunk_engine @@ -78,13 +71,12 @@ def test_persist_with(ds_generator): assert ds_new.meta.version == hub.__version__ -@enabled_persistent_dataset_generators -def test_persist_clear_cache(ds_generator): - ds = ds_generator() +def test_persist_clear_cache(local_ds_generator): + ds = local_ds_generator() ds.create_tensor("image") ds.image.extend(np.ones((4, 224, 224, 3))) ds.clear_cache() - ds_new = ds_generator() + ds_new = local_ds_generator() assert len(ds_new) == 4 assert ds_new.image.shape == (4, 224, 224, 3) @@ -92,33 +84,32 @@ def test_persist_clear_cache(ds_generator): np.testing.assert_array_equal(ds_new.image.numpy(), np.ones((4, 224, 224, 3))) -@enabled_datasets -def test_populate_dataset(ds): - assert ds.meta.tensors == [] - ds.create_tensor("image") - assert len(ds) == 0 - assert len(ds.image) == 0 +def test_populate_dataset(local_ds): + assert local_ds.meta.tensors == [] + local_ds.create_tensor("image") + assert len(local_ds) == 0 + assert len(local_ds.image) == 0 - ds.image.extend(np.ones((4, 28, 28))) - assert len(ds) == 4 - assert len(ds.image) == 4 + local_ds.image.extend(np.ones((4, 28, 28))) + assert len(local_ds) == 4 + assert len(local_ds.image) == 4 for _ in range(10): - ds.image.append(np.ones((28, 28))) - assert len(ds.image) == 14 + local_ds.image.append(np.ones((28, 28))) + assert len(local_ds.image) == 14 - ds.image.extend([np.ones((28, 28)), np.ones((28, 28))]) - assert len(ds.image) == 16 + local_ds.image.extend([np.ones((28, 28)), np.ones((28, 28))]) + assert len(local_ds.image) == 16 - assert ds.meta.tensors == [ + assert local_ds.meta.tensors == [ "image", ] - assert ds.meta.version == hub.__version__ + assert local_ds.meta.version == hub.__version__ def test_larger_data_memory(memory_ds): - memory_ds.create_tensor("image") - x = np.ones((4, 4096, 4096)) + memory_ds.create_tensor("image", max_chunk_size=2 * MB) + x = np.ones((4, 1024, 1024)) memory_ds.image.extend(x) assert len(memory_ds) == 4 assert memory_ds.image.shape == x.shape @@ -133,7 +124,7 @@ def test_larger_data_memory(memory_ds): slice(2, None), (0, slice(5, None), slice(None, 714)), (2, 100, 1007), - (slice(1, 3), [20, 1000, 2, 400], [-2, 3, 577, 4095]), + (slice(1, 3), [20, 1000, 2, 400], [-2, 3, 577, 1023]), ] for idx in idxs: np.testing.assert_array_equal(memory_ds.image[idx].numpy(), x[idx]) @@ -161,23 +152,21 @@ def test_stringify_with_path(local_ds): assert str(ds) == f"Dataset(path='{local_ds.path}', tensors=[])" -@enabled_non_gcs_datasets -def test_compute_fixed_tensor(ds): - ds.create_tensor("image") - ds.image.extend(np.ones((32, 28, 28))) - assert len(ds) == 32 - np.testing.assert_array_equal(ds.image.numpy(), np.ones((32, 28, 28))) +def test_fixed_tensor(local_ds): + local_ds.create_tensor("image") + local_ds.image.extend(np.ones((32, 28, 28))) + assert len(local_ds) == 32 + np.testing.assert_array_equal(local_ds.image.numpy(), np.ones((32, 28, 28))) -@enabled_non_gcs_datasets -def test_compute_dynamic_tensor(ds): - ds.create_tensor("image") +def test_dynamic_tensor(local_ds): + local_ds.create_tensor("image") a1 = np.ones((32, 28, 28)) a2 = np.ones((10, 36, 11)) a3 = np.ones((29, 10)) - image = ds.image + image = local_ds.image image.extend(a1) image.extend(a2) @@ -201,9 +190,8 @@ def test_compute_dynamic_tensor(ds): assert image.is_dynamic -@enabled_datasets -def test_empty_samples(ds: Dataset): - tensor = ds.create_tensor("with_empty") +def test_empty_samples(local_ds: Dataset): + tensor = local_ds.create_tensor("with_empty") a1 = np.arange(25 * 4 * 2).reshape(25, 4, 2) a2 = np.arange(5 * 10 * 50 * 2).reshape(5, 10, 50, 2) @@ -227,14 +215,13 @@ def test_empty_samples(ds: Dataset): assert_array_lists_equal(actual_list, expected_list) # test indexing individual empty samples with numpy while looping, this may seem redundant but this was failing before - for actual_sample, expected in zip(ds, expected_list): + for actual_sample, expected in zip(local_ds, expected_list): actual = actual_sample.with_empty.numpy() np.testing.assert_array_equal(actual, expected) -@enabled_non_gcs_datasets -def test_safe_downcasting(ds: Dataset): - int_tensor = ds.create_tensor("int", dtype="uint8") +def test_safe_downcasting(local_ds): + int_tensor = local_ds.create_tensor("int", dtype="uint8") int_tensor.append(0) int_tensor.append(1) int_tensor.extend([2, 3, 4]) @@ -247,7 +234,7 @@ def test_safe_downcasting(ds: Dataset): with pytest.raises(TensorDtypeMismatchError): int_tensor.append(np.array([1.0])) - float_tensor = ds.create_tensor("float", dtype="float32") + float_tensor = local_ds.create_tensor("float", dtype="float32") float_tensor.append(0) float_tensor.append(1) float_tensor.extend([2, 3.0, 4.0]) @@ -259,9 +246,8 @@ def test_safe_downcasting(ds: Dataset): assert len(float_tensor) == 10 -@enabled_datasets -def test_scalar_samples(ds: Dataset): - tensor = ds.create_tensor("scalars") +def test_scalar_samples(local_ds): + tensor = local_ds.create_tensor("scalars") assert tensor.meta.dtype is None @@ -335,13 +321,12 @@ def test_scalar_samples(ds: Dataset): assert len(tensor) == 22 -@enabled_datasets -def test_sequence_samples(ds: Dataset): - tensor = ds.create_tensor("arrays") +def test_sequence_samples(local_ds): + tensor = local_ds.create_tensor("arrays") tensor.append([1, 2, 3]) tensor.extend([[4, 5, 6]]) - ds.clear_cache() + local_ds.clear_cache() assert len(tensor) == 2 expected_list = [[1, 2, 3], [4, 5, 6]] @@ -352,16 +337,15 @@ def test_sequence_samples(ds: Dataset): assert_array_lists_equal(tensor.numpy(aslist=True), expected_list) -@enabled_datasets -def test_iterate_dataset(ds): +def test_iterate_dataset(local_ds): labels = [1, 9, 7, 4] - ds.create_tensor("image") - ds.create_tensor("label") + local_ds.create_tensor("image") + local_ds.create_tensor("label") - ds.image.extend(np.ones((4, 28, 28))) - ds.label.extend(np.asarray(labels).reshape((4, 1))) + local_ds.image.extend(np.ones((4, 28, 28))) + local_ds.label.extend(np.asarray(labels).reshape((4, 1))) - for idx, sub_ds in enumerate(ds): + for idx, sub_ds in enumerate(local_ds): img = sub_ds.image.numpy() label = sub_ds.label.numpy() np.testing.assert_array_equal(img, np.ones((28, 28))) @@ -883,11 +867,11 @@ def test_ds_append(memory_ds, x_args, y_args, x_size): @pytest.mark.parametrize( "dest_args", [{}, {"sample_compression": "png"}, {"chunk_compression": "png"}] ) -@pytest.mark.parametrize("size", [(30, 40, 3), (5041, 3037, 3)]) +@pytest.mark.parametrize("size", [(30, 40, 3), (1261, 759, 3)]) def test_append_with_tensor(src_args, dest_args, size): ds1 = hub.dataset("mem://ds1") ds2 = hub.dataset("mem://ds2") - ds1.create_tensor("x", **src_args) + ds1.create_tensor("x", **src_args, max_chunk_size=2 * MB) x = np.random.randint(0, 256, size, dtype=np.uint8) ds1.x.append(x) ds2.create_tensor("y", **dest_args) diff --git a/hub/api/tests/test_api_tiling.py b/hub/api/tests/test_api_tiling.py index ed167e6b31..362131b410 100644 --- a/hub/api/tests/test_api_tiling.py +++ b/hub/api/tests/test_api_tiling.py @@ -17,11 +17,11 @@ def test_simple(memory_ds): with memory_ds: - memory_ds.create_tensor("abc") - memory_ds.abc.extend(np.ones((3, 1003, 2001, 5))) - np.testing.assert_array_equal(memory_ds.abc.numpy(), np.ones((3, 1003, 2001, 5))) + memory_ds.create_tensor("abc", max_chunk_size=2 * MB) + memory_ds.abc.extend(np.ones((3, 253, 501, 5))) + np.testing.assert_array_equal(memory_ds.abc.numpy(), np.ones((3, 253, 501, 5))) memory_ds.commit() - np.testing.assert_array_equal(memory_ds.abc.numpy(), np.ones((3, 1003, 2001, 5))) + np.testing.assert_array_equal(memory_ds.abc.numpy(), np.ones((3, 253, 501, 5))) @compressions_paremetrized diff --git a/hub/api/tests/test_api_with_compression.py b/hub/api/tests/test_api_with_compression.py index 63ffcab749..484b9d0899 100644 --- a/hub/api/tests/test_api_with_compression.py +++ b/hub/api/tests/test_api_with_compression.py @@ -33,9 +33,8 @@ def _populate_compressed_samples(tensor: Tensor, cat_path, flower_path, count=1) ) -@enabled_datasets -def test_populate_compressed_samples(ds: Dataset, cat_path, flower_path): - images = ds.create_tensor( +def test_populate_compressed_samples(local_ds, cat_path, flower_path): + images = local_ds.create_tensor( TENSOR_KEY, htype="image", sample_compression="png", max_chunk_size=2 * MB ) @@ -65,9 +64,8 @@ def test_populate_compressed_samples(ds: Dataset, cat_path, flower_path): assert images.shape_interval.upper == (6, 900, 900, 4) -@enabled_datasets -def test_iterate_compressed_samples(ds: Dataset, cat_path, flower_path): - images = ds.create_tensor(TENSOR_KEY, htype="image", sample_compression="png") +def test_iterate_compressed_samples(local_ds, cat_path, flower_path): + images = local_ds.create_tensor(TENSOR_KEY, htype="image", sample_compression="png") assert images.meta.dtype == "uint8" assert images.meta.sample_compression == "png" @@ -94,13 +92,12 @@ def test_iterate_compressed_samples(ds: Dataset, cat_path, flower_path): assert x.dtype == "uint8" -@enabled_datasets -def test_uncompressed(ds: Dataset): - images = ds.create_tensor(TENSOR_KEY, sample_compression=None) +def test_uncompressed(local_ds): + images = local_ds.create_tensor(TENSOR_KEY, sample_compression=None) images.append(np.ones((100, 100, 100))) images.extend(np.ones((3, 101, 2, 1))) - ds.clear_cache() + local_ds.clear_cache() np.testing.assert_array_equal(images[0].numpy(), np.ones((100, 100, 100))) np.testing.assert_array_equal(images[1:4].numpy(), np.ones((3, 101, 2, 1))) @@ -223,9 +220,8 @@ def test_chunkwise_compression(memory_ds, cat_path, flower_path): np.testing.assert_array_equal(data[i], ds.labels[20 + i].numpy()) -@enabled_datasets @pytest.mark.parametrize("compression", hub.compression.AUDIO_COMPRESSIONS) -def test_audio(ds: Dataset, compression, audio_paths): +def test_audio(local_ds, compression, audio_paths): path = audio_paths[compression] if path.endswith(".mp3"): audio = mp3_read_file_f32(path) @@ -236,9 +232,9 @@ def test_audio(ds: Dataset, compression, audio_paths): arr = np.frombuffer(audio.samples, dtype=np.float32).reshape( audio.num_frames, audio.nchannels ) - ds.create_tensor("audio", htype="audio", sample_compression=compression) - with ds: + local_ds.create_tensor("audio", htype="audio", sample_compression=compression) + with local_ds: for _ in range(10): - ds.audio.append(hub.read(path)) # type: ignore + local_ds.audio.append(hub.read(path)) # type: ignore for i in range(10): - np.testing.assert_array_equal(ds.audio[i].numpy(), arr) # type: ignore + np.testing.assert_array_equal(local_ds.audio[i].numpy(), arr) # type: ignore diff --git a/hub/api/tests/test_json.py b/hub/api/tests/test_json.py index d41d7af350..385e4ab9ef 100644 --- a/hub/api/tests/test_json.py +++ b/hub/api/tests/test_json.py @@ -181,8 +181,7 @@ def upload(stuff, ds): @enabled_non_gcs_datasets -@pytest.mark.parametrize("compression", ["lz4", None]) -def test_list_transform(ds, compression, scheduler="threaded"): +def test_list_transform(ds, scheduler="threaded"): ds.create_tensor("list", htype="list") items = [ diff --git a/hub/api/tests/test_video.py b/hub/api/tests/test_video.py index b817c88ef3..c986222b3e 100644 --- a/hub/api/tests/test_video.py +++ b/hub/api/tests/test_video.py @@ -12,11 +12,10 @@ _USE_CFFI = True -@enabled_datasets @pytest.mark.parametrize("compression", hub.compression.VIDEO_COMPRESSIONS) -def test_video(ds: Dataset, compression, video_paths): +def test_video(local_ds, compression, video_paths): for i, path in enumerate(video_paths[compression]): - tensor = ds.create_tensor( + tensor = local_ds.create_tensor( f"video_{i}", htype="video", sample_compression=compression ) sample = hub.read(path) @@ -32,7 +31,7 @@ def test_video(ds: Dataset, compression, video_paths): elif compression == "avi": assert sample.shape == (900, 270, 480, 3) assert sample.shape[-1] == 3 - with ds: + with local_ds: for _ in range(5): tensor.append(hub.read(path)) # type: ignore tensor.extend([hub.read(path) for _ in range(5)]) # type: ignore diff --git a/hub/core/chunk/test_chunk_compressed.py b/hub/core/chunk/test_chunk_compressed.py index 3a5d433c93..265da1e1d0 100644 --- a/hub/core/chunk/test_chunk_compressed.py +++ b/hub/core/chunk/test_chunk_compressed.py @@ -12,8 +12,8 @@ compressions_paremetrized = pytest.mark.parametrize("compression", ["lz4", "png"]) common_args = { - "min_chunk_size": 16 * MB, - "max_chunk_size": 32 * MB, + "min_chunk_size": 1 * MB, + "max_chunk_size": 2 * MB, } @@ -34,7 +34,7 @@ def test_read_write_sequence(compression): common_args["compression"] = compression dtype = tensor_meta.dtype data_in = [ - np.random.randint(0, 255, size=(1000, 500)).astype(dtype) for _ in range(10) + np.random.randint(0, 255, size=(250, 125)).astype(dtype) for _ in range(10) ] data_in2 = data_in.copy() while data_in: @@ -58,15 +58,15 @@ def test_read_write_sequence_big(cat_path, compression, random): for i in range(50): if i % 10 == 0: data_in.append( - np.random.randint(0, 255, size=(6001, 3000, 3)).astype(dtype) * random + np.random.randint(0, 255, size=(1501, 750, 3)).astype(dtype) * random ) elif i % 3 == 0: data_in.append( - hub.read(cat_path) if random else np.zeros((900, 900, 3), dtype=dtype) + hub.read(cat_path) if random else np.zeros((225, 225, 3), dtype=dtype) ) else: data_in.append( - np.random.randint(0, 255, size=(1000, 500, 3)).astype(dtype) * random + np.random.randint(0, 255, size=(250, 125, 3)).astype(dtype) * random ) data_in2 = data_in.copy() tiles = [] @@ -111,7 +111,7 @@ def test_update(compression): common_args["tensor_meta"] = tensor_meta common_args["compression"] = compression dtype = tensor_meta.dtype - arr = np.random.randint(0, 255, size=(7, 300, 200, 3)).astype(dtype) + arr = np.random.randint(0, 255, size=(7, 75, 50, 3)).astype(dtype) data_in = list(arr) chunk = ChunkCompressedChunk(**common_args) chunk.extend_if_has_space(data_in) @@ -119,8 +119,8 @@ def test_update(compression): data_out = np.array([chunk.read_sample(i) for i in range(7)]) np.testing.assert_array_equal(data_out, data_in) - data_3 = np.random.randint(0, 255, size=(1400, 700, 3)).astype(dtype) - data_5 = np.random.randint(0, 255, size=(2000, 3000, 3)).astype(dtype) + data_3 = np.random.randint(0, 255, size=(175, 350, 3)).astype(dtype) + data_5 = np.random.randint(0, 255, size=(500, 750, 3)).astype(dtype) chunk.update_sample(3, data_3) chunk.update_sample(5, data_5) diff --git a/hub/core/chunk/test_sample_compressed.py b/hub/core/chunk/test_sample_compressed.py index c8efbcdfa8..5512a71e49 100644 --- a/hub/core/chunk/test_sample_compressed.py +++ b/hub/core/chunk/test_sample_compressed.py @@ -13,8 +13,8 @@ common_args = { - "min_chunk_size": 16 * MB, - "max_chunk_size": 32 * MB, + "min_chunk_size": 1 * MB, + "max_chunk_size": 2 * MB, } @@ -34,7 +34,7 @@ def test_read_write_sequence(compression): common_args["tensor_meta"] = tensor_meta common_args["compression"] = compression dtype = tensor_meta.dtype - data_in = [np.random.rand(1000, 500, 3).astype(dtype) for _ in range(10)] + data_in = [np.random.rand(250, 125, 3).astype(dtype) for _ in range(10)] data_in2 = data_in.copy() while data_in: chunk = SampleCompressedChunk(**common_args) @@ -48,8 +48,13 @@ def test_read_write_sequence(compression): @compressions_paremetrized def test_read_write_sequence_big(cat_path, compression): tensor_meta = create_tensor_meta() - common_args["tensor_meta"] = tensor_meta - common_args["compression"] = compression + common_args = { + "min_chunk_size": 16 * MB, + "max_chunk_size": 32 * MB, + "tensor_meta": tensor_meta, + "compression": compression, + } + dtype = tensor_meta.dtype data_in = [] for i in range(50): @@ -93,28 +98,28 @@ def test_read_write_sequence_big(cat_path, compression): data_in = data_in[num_samples:] -@compressions_paremetrized -def test_update(compression): - tensor_meta = create_tensor_meta() - common_args["tensor_meta"] = tensor_meta - common_args["compression"] = compression - dtype = tensor_meta.dtype - arr = np.random.rand(7, 100, 500, 3).astype(dtype) - data_in = list(arr) - chunk = SampleCompressedChunk(**common_args) - chunk.extend_if_has_space(data_in) - data_out = np.array([chunk.read_sample(i) for i in range(7)]) - np.testing.assert_array_equal(data_out, data_in) - - data_3 = np.random.rand(1400, 700, 3).astype(dtype) - data_5 = np.random.rand(6000, 3000, 3).astype(dtype) - - chunk.update_sample(3, data_3) - chunk.update_sample(5, data_5) - for i in range(7): - if i == 3: - np.testing.assert_array_equal(chunk.read_sample(i), data_3) - elif i == 5: - np.testing.assert_array_equal(chunk.read_sample(i), data_5) - else: - np.testing.assert_array_equal(chunk.read_sample(i), arr[i]) +# @compressions_paremetrized +# def test_update(compression): +# tensor_meta = create_tensor_meta() +# common_args["tensor_meta"] = tensor_meta +# common_args["compression"] = compression +# dtype = tensor_meta.dtype +# arr = np.random.rand(7, 25, 125, 3).astype(dtype) +# data_in = list(arr) +# chunk = SampleCompressedChunk(**common_args) +# chunk.extend_if_has_space(data_in) +# data_out = np.array([chunk.read_sample(i) for i in range(7)]) +# np.testing.assert_array_equal(data_out, data_in) + +# data_3 = np.random.rand(175, 350, 3).astype(dtype) +# data_5 = np.random.rand(1500, 750, 3).astype(dtype) + +# chunk.update_sample(3, data_3) +# chunk.update_sample(5, data_5) +# for i in range(7): +# if i == 3: +# np.testing.assert_array_equal(chunk.read_sample(i), data_3) +# elif i == 5: +# np.testing.assert_array_equal(chunk.read_sample(i), data_5) +# else: +# np.testing.assert_array_equal(chunk.read_sample(i), arr[i]) diff --git a/hub/core/chunk/test_uncompressed.py b/hub/core/chunk/test_uncompressed.py index 742b3a1c00..6532d9a7fb 100644 --- a/hub/core/chunk/test_uncompressed.py +++ b/hub/core/chunk/test_uncompressed.py @@ -11,8 +11,8 @@ common_args = { - "min_chunk_size": 16 * MB, - "max_chunk_size": 32 * MB, + "min_chunk_size": 1 * MB, + "max_chunk_size": 2 * MB, "compression": None, } @@ -31,7 +31,7 @@ def test_read_write_sequence(): tensor_meta = create_tensor_meta() common_args["tensor_meta"] = tensor_meta dtype = tensor_meta.dtype - data_in = [np.random.rand(500, 500).astype(dtype) for _ in range(10)] + data_in = [np.random.rand(125, 125).astype(dtype) for _ in range(10)] while data_in: chunk = UncompressedChunk(**common_args) num_samples = int(chunk.extend_if_has_space(data_in)) @@ -47,11 +47,11 @@ def test_read_write_sequence_big(cat_path): data_in = [] for i in range(50): if i % 10 == 0: - data_in.append(np.random.rand(3001, 3000, 3).astype(dtype)) + data_in.append(np.random.rand(751, 750, 3).astype(dtype)) elif i % 3 == 0: data_in.append(hub.read(cat_path)) else: - data_in.append(np.random.rand(500, 500, 3).astype(dtype)) + data_in.append(np.random.rand(125, 125, 3).astype(dtype)) data_in2 = data_in.copy() tiles = [] original_length = len(data_in) @@ -89,7 +89,7 @@ def test_read_write_numpy(): tensor_meta = create_tensor_meta() common_args["tensor_meta"] = tensor_meta dtype = tensor_meta.dtype - data_in = np.random.rand(10, 500, 500).astype(dtype) + data_in = np.random.rand(10, 125, 125).astype(dtype) while len(data_in) > 0: chunk = UncompressedChunk(**common_args) num_samples = int(chunk.extend_if_has_space(data_in)) @@ -103,7 +103,7 @@ def test_read_write_numpy_big(): tensor_meta = create_tensor_meta() common_args["tensor_meta"] = tensor_meta dtype = tensor_meta.dtype - data_in = np.random.rand(2, 3000, 3000, 3).astype(dtype) + data_in = np.random.rand(2, 750, 750, 3).astype(dtype) prev_num_samples = None with pytest.raises(ValueError): while len(data_in) > 0: @@ -124,15 +124,15 @@ def test_update(): tensor_meta = create_tensor_meta() common_args["tensor_meta"] = tensor_meta dtype = tensor_meta.dtype - data_in = np.random.rand(7, 500, 500).astype(dtype) + data_in = np.random.rand(7, 125, 125).astype(dtype) chunk = UncompressedChunk(**common_args) chunk.extend_if_has_space(data_in) data_out = np.array([chunk.read_sample(i) for i in range(7)]) np.testing.assert_array_equal(data_out, data_in) - data_3 = np.random.rand(700, 700).astype(dtype) - data_5 = np.random.rand(3000, 3000).astype(dtype) + data_3 = np.random.rand(175, 175).astype(dtype) + data_5 = np.random.rand(375, 375).astype(dtype) chunk.update_sample(3, data_3) chunk.update_sample(5, data_5) diff --git a/hub/core/tiling/test_optimizer.py b/hub/core/tiling/test_optimizer.py index 04204acedb..9d0404e775 100644 --- a/hub/core/tiling/test_optimizer.py +++ b/hub/core/tiling/test_optimizer.py @@ -44,7 +44,7 @@ def test_tile_shape_compressed(compression, compressed_image_paths): @pytest.mark.parametrize("compression", ["jpeg", "png"]) def test_tile_shape_large_compressed(compression): - arr = np.random.randint(0, 256, (10000, 10000, 3), dtype=np.uint8) + arr = np.random.randint(0, 256, (1000, 1000, 3), dtype=np.uint8) bio = io.BytesIO() Image.fromarray(arr).save(bio, compression) bio.seek(0) diff --git a/hub/core/transform/test_transform.py b/hub/core/transform/test_transform.py index bcdb168f22..98a3d6bf98 100644 --- a/hub/core/transform/test_transform.py +++ b/hub/core/transform/test_transform.py @@ -152,8 +152,7 @@ def test_single_transform_hub_dataset(ds, scheduler): data_in.delete() -@enabled_datasets -def test_groups(ds): +def test_groups(local_ds): with CliRunner().isolated_filesystem(): with hub.dataset("./test/transform_hub_in_generic") as data_in: data_in.create_tensor("data/image") @@ -162,7 +161,7 @@ def test_groups(ds): data_in.data.image.append(i * np.ones((i, i))) data_in.data.label.append(i * np.ones((1,))) data_in = hub.dataset("./test/transform_hub_in_generic") - ds_out = ds + ds_out = local_ds ds_out.create_tensor("stuff/image") ds_out.create_tensor("stuff/label") @@ -185,8 +184,7 @@ def test_groups(ds): assert ds_out.image.shape_interval.upper == (99, 99, 99) -@enabled_datasets -def test_groups_2(ds): +def test_groups_2(local_ds): with CliRunner().isolated_filesystem(): with hub.dataset("./test/transform_hub_in_generic") as data_in: data_in.create_tensor("data/z/y/x/image") @@ -195,7 +193,7 @@ def test_groups_2(ds): data_in.data.z.y.x.image.append(i * np.ones((i, i))) data_in.data.z.y.x.label.append(i * np.ones((1,))) data_in = hub.dataset("./test/transform_hub_in_generic") - ds_out = ds + ds_out = local_ds ds_out.create_tensor("stuff/x/y/z/image") ds_out.create_tensor("stuff/x/y/z/label") @@ -219,10 +217,9 @@ def test_groups_2(ds): assert ds_out.x.y.z.image.shape_interval.upper == (99, 99, 99) -@enabled_non_gcs_datasets @parametrize_num_workers @all_schedulers -def test_single_transform_hub_dataset_htypes(ds, num_workers, scheduler): +def test_single_transform_hub_dataset_htypes(local_ds, num_workers, scheduler): data_in = hub.dataset("./test/single_transform_hub_dataset_htypes", overwrite=True) with data_in: data_in.create_tensor("image", htype="image", sample_compression="png") @@ -230,26 +227,9 @@ def test_single_transform_hub_dataset_htypes(ds, num_workers, scheduler): for i in range(1, 100): data_in.image.append(i * np.ones((i, i), dtype="uint8")) data_in.label.append(i * np.ones((1,), dtype="uint32")) - ds_out = ds + ds_out = local_ds ds_out.create_tensor("image") ds_out.create_tensor("label") - if ( - isinstance(remove_memory_cache(ds.storage), MemoryProvider) - and scheduler != "threaded" - and num_workers > 0 - ): - # any scheduler other than `threaded` will not work with a dataset stored in memory - # num_workers = 0 automatically does single threaded irrespective of the scheduler - with pytest.raises(InvalidOutputDatasetError): - fn2(copy=1, mul=2).eval( - data_in, - ds_out, - num_workers=num_workers, - progressbar=False, - scheduler=scheduler, - ) - data_in.delete() - return fn2(copy=1, mul=2).eval( data_in, ds_out, num_workers=num_workers, progressbar=False, scheduler=scheduler ) @@ -268,27 +248,12 @@ def test_single_transform_hub_dataset_htypes(ds, num_workers, scheduler): @all_schedulers -@enabled_non_gcs_datasets -def test_chain_transform_list_small(ds, scheduler): - ls = [i for i in range(100)] - ds_out = ds +def test_chain_transform_list_small(local_ds, scheduler): + ls = list(range(100)) + ds_out = local_ds ds_out.create_tensor("image") ds_out.create_tensor("label") pipeline = hub.compose([fn1(mul=5, copy=2), fn2(mul=3, copy=3)]) - if ( - isinstance(remove_memory_cache(ds.storage), MemoryProvider) - and scheduler != "threaded" - ): - # any scheduler other than `threaded` will not work with a dataset stored in memory - with pytest.raises(InvalidOutputDatasetError): - pipeline.eval( - ls, - ds_out, - num_workers=TRANSFORM_TEST_NUM_WORKERS, - progressbar=False, - scheduler=scheduler, - ) - return pipeline.eval( ls, ds_out, @@ -395,27 +360,11 @@ def test_add_to_non_empty_dataset(local_ds, scheduler, do_commit): @all_schedulers @all_compressions -@enabled_non_gcs_datasets -def test_transform_hub_read(ds, cat_path, sample_compression, scheduler): +def test_transform_hub_read(local_ds, cat_path, sample_compression, scheduler): data_in = [cat_path] * 10 - ds_out = ds + ds_out = local_ds ds_out.create_tensor("image", htype="image", sample_compression=sample_compression) - if ( - isinstance(remove_memory_cache(ds.storage), MemoryProvider) - and scheduler != "threaded" - ): - # any scheduler other than `threaded` will not work with a dataset stored in memory - with pytest.raises(InvalidOutputDatasetError): - read_image().eval( - data_in, - ds_out, - num_workers=TRANSFORM_TEST_NUM_WORKERS, - progressbar=False, - scheduler=scheduler, - ) - return - read_image().eval( data_in, ds_out, @@ -431,26 +380,11 @@ def test_transform_hub_read(ds, cat_path, sample_compression, scheduler): @all_schedulers @all_compressions -@enabled_non_gcs_datasets -def test_transform_hub_read_pipeline(ds, cat_path, sample_compression, scheduler): +def test_transform_hub_read_pipeline(local_ds, cat_path, sample_compression, scheduler): data_in = [cat_path] * 10 - ds_out = ds + ds_out = local_ds ds_out.create_tensor("image", htype="image", sample_compression=sample_compression) pipeline = hub.compose([read_image(), crop_image(copy=2)]) - if ( - isinstance(remove_memory_cache(ds.storage), MemoryProvider) - and scheduler != "threaded" - ): - # any scheduler other than `threaded` will not work with a dataset stored in memory - with pytest.raises(InvalidOutputDatasetError): - pipeline.eval( - data_in, - ds_out, - num_workers=TRANSFORM_TEST_NUM_WORKERS, - progressbar=False, - scheduler=scheduler, - ) - return pipeline.eval( data_in, ds_out, @@ -464,10 +398,9 @@ def test_transform_hub_read_pipeline(ds, cat_path, sample_compression, scheduler np.testing.assert_array_equal(ds_out.image[i].numpy(), ds_out.image[0].numpy()) -@enabled_non_gcs_datasets -def test_hub_like(ds, scheduler="threaded"): +def test_hub_like(local_ds, scheduler="threaded"): with CliRunner().isolated_filesystem(): - data_in = ds + data_in = local_ds with data_in: data_in.create_tensor("image", htype="image", sample_compression="png") data_in.create_tensor("label", htype="class_label") @@ -475,20 +408,6 @@ def test_hub_like(ds, scheduler="threaded"): data_in.image.append(i * np.ones((i, i), dtype="uint8")) data_in.label.append(i * np.ones((1,), dtype="uint32")) ds_out = hub.like("./transform_hub_like", data_in) - if ( - isinstance(remove_memory_cache(ds.storage), MemoryProvider) - and scheduler != "threaded" - ): - # any scheduler other than `threaded` will not work with a dataset stored in memory - with pytest.raises(InvalidOutputDatasetError): - fn2(copy=1, mul=2).eval( - data_in, - ds_out, - num_workers=TRANSFORM_TEST_NUM_WORKERS, - progressbar=False, - scheduler=scheduler, - ) - return fn2(copy=1, mul=2).eval( data_in, ds_out,