activeloopai · farizrahman4u · Dec 29, 2021 · Nov 22, 2021 · Nov 22, 2021 · Nov 23, 2021
diff --git a/hub/api/tests/test_video.py b/hub/api/tests/test_video.py
@@ -8,18 +8,18 @@
 @enabled_datasets
 @pytest.mark.parametrize("compression", hub.compression.VIDEO_COMPRESSIONS)
 def test_video(ds: Dataset, compression, video_paths):
-    path = video_paths[compression]
-    ds.create_tensor("video", htype="video", sample_compression=compression)
-    sample = hub.read(path)
-    assert len(sample.shape) == 4
-    if compression in ("mp4", "mkv"):
-        assert sample.shape == (400, 360, 640, 3)
-    elif compression == "avi":
-        assert sample.shape == (900, 270, 480, 3)
-    assert sample.shape[-1] == 3
-    with ds:
-        for _ in range(5):
-            ds.video.append(hub.read(path))  # type: ignore
-        ds.video.extend([hub.read(path) for _ in range(5)])  # type: ignore
-    for i in range(10):
-        assert ds.video[i].numpy().shape == sample.shape  # type: ignore
+    for path in video_paths[compression]:
+        ds.create_tensor("video", htype="video", sample_compression=compression)
+        sample = hub.read(path)
+        assert len(sample.shape) == 4
+        if compression in ("mp4", "mkv"):
+            assert sample.shape == (400, 360, 640, 3)
+        elif compression == "avi":
+            assert sample.shape == (900, 270, 480, 3)
+        assert sample.shape[-1] == 3
+        with ds:
+            for _ in range(5):
+                ds.video.append(hub.read(path))  # type: ignore
+            ds.video.extend([hub.read(path) for _ in range(5)])  # type: ignore
+        for i in range(10):
+            assert ds.video[i].numpy().shape == sample.shape  # type: ignore
diff --git a/hub/core/compression.py b/hub/core/compression.py
@@ -796,9 +796,16 @@ def _decompress_video(
             command, stdin=sp.PIPE, stdout=sp.PIPE, stderr=sp.PIPE, bufsize=10 ** 8
         )
         raw_video = pipe.communicate(input=file)[0]  # type: ignore
-    return np.frombuffer(raw_video[: int(np.prod(shape))], dtype=np.uint8).reshape(
-        shape
-    )
+    nbytes = len(raw_video)
+    size = np.prod(shape)
+    if nbytes >= size:  # size is computed from fps and duration, might not be accurate.
+        return np.frombuffer(memoryview(raw_video)[:size], dtype=np.uint8).reshape(
+            shape
+        )
+    else:  # If size was overestimated, append blank frames to the end.
+        arr = np.zeros(shape, dtype=np.uint8)
+        arr.reshape(-1)[: len(raw_video)] = np.frombuffer(raw_video, dtype=np.uint8)
+        return arr
 
 
 def _read_video_shape(file: Union[bytes, memoryview, str]) -> Tuple[int, ...]:

diff --git a/hub/core/tests/test_compression.py b/hub/core/tests/test_compression.py
@@ -147,14 +147,14 @@ def test_audio(compression, audio_paths):
 
 @pytest.mark.parametrize("compression", VIDEO_COMPRESSIONS)
 def test_video(compression, video_paths):
-    path = video_paths[compression]
-    sample = hub.read(path)
-    arr = np.array(sample)
-    assert arr.shape[-1] == 3
-    assert arr.dtype == "uint8"
-    if compression not in ("mp4", "mkv"):
-        with open(path, "rb") as f:
-            assert sample.compressed_bytes(compression) == f.read()
+    for path in video_paths[compression]:
+        sample = hub.read(path)
+        arr = np.array(sample)
+        assert arr.shape[-1] == 3
+        assert arr.dtype == "uint8"
+        if compression not in ("mp4", "mkv"):
+            with open(path, "rb") as f:
+                assert sample.compressed_bytes(compression) == f.read()
 
 
 def test_apng(memory_ds):

diff --git a/hub/tests/path_fixtures.py b/hub/tests/path_fixtures.py
@@ -25,9 +25,6 @@
     is_opt_true,
 )
 import pytest
-import requests
-import shutil
-import tempfile
 import sys
 
 
@@ -37,21 +34,49 @@
 GCS = "gcs"
 HUB_CLOUD = "hub_cloud"
 
+_GIT_CLONE_CACHE_DIR = ".test_resources"
 
-def _download_hub_test_images(tempdir):
-    cwd = os.getcwd()
-    os.chdir(tempdir)
-    try:
-        os.system(
-            "git clone https://www.github.com/activeloopai/hub-test-resources.git"
-        )
-        d = "hub-test-resources/images/jpeg"
-        return [os.path.join(tempdir, d, f) for f in os.listdir(d)]
-    finally:
-        os.chdir(cwd)
+_HUB_TEST_RESOURCES_URL = "https://www.github.com/activeloopai/hub-test-resources.git"
+_PILLOW_URL = "https://www.github.com/python-pillow/Pillow.git"
 
 
-def _download_pil_test_images(tempdir, ext=[".jpg", ".png"]):
+def _repo_name_from_git_url(url):
+    repo_name = posixpath.split(url)[-1]
+    repo_name = repo_name.split("@", 1)[0]
+    if repo_name.endswith(".git"):
+        repo_name = repo_name[:-4]
+    return repo_name
+
+
+def _git_clone(url):
+    _repo_name = _repo_name_from_git_url(url)
+    cached_dir = _GIT_CLONE_CACHE_DIR + "/" + _repo_name
+    if not os.path.isdir(cached_dir):
+        if not os.path.isdir(_GIT_CLONE_CACHE_DIR):
+            os.mkdir(_GIT_CLONE_CACHE_DIR)
+        cwd = os.getcwd()
+        os.chdir(_GIT_CLONE_CACHE_DIR)
+        try:
+            os.system(f"git clone " + url)
+        finally:
+            os.chdir(cwd)
+    assert os.path.isdir(cached_dir)
+    return cached_dir
+
+
+def _download_hub_test_images():
+    path = _git_clone(_HUB_TEST_RESOURCES_URL)
+    jpeg_path = path + "/images/jpeg"
+    return [os.path.join(jpeg_path, f) for f in os.listdir(jpeg_path)]
+
+
+def _download_hub_test_videos():
+    path = _git_clone(_HUB_TEST_RESOURCES_URL)
+    mp4_path = path + "/videos/mp4"
+    return [os.path.join(mp4_path, f) for f in os.listdir(mp4_path)]
+
+
+def _download_pil_test_images(ext=[".jpg", ".png"]):
     paths = {e: [] for e in ext}
     corrupt_file_keys = [
         "broken",
@@ -60,31 +85,30 @@ def _download_pil_test_images(tempdir, ext=[".jpg", ".png"]):
         "chunk_no_fctl",
         "syntax_num_frames_zero",
     ]
-    cwd = os.getcwd()
-    os.chdir(tempdir)
-    try:
-        os.system("git clone https://www.github.com/python-pillow/Pillow.git")
-        dirs = [
-            "Pillow/Tests/images",
-            "Pillow/Tests/images/apng",
-            "Pillow/Tests/images/imagedraw",
+
+    path = _git_clone(_PILLOW_URL)
+    dirs = [
+        path + x
+        for x in [
+            "/Tests/images",
+            "/Tests/images/apng",
+            "/Tests/images/imagedraw",
         ]
-        for d in dirs:
-            for f in os.listdir(d):
-                brk = False
-                for k in corrupt_file_keys:
-                    if k in f:
-                        brk = True
-                        break
-                if brk:
-                    continue
-                for e in ext:
-                    if f.lower().endswith(e):
-                        paths[e].append(os.path.join(tempdir, d, f))
-                        break
-        return paths
-    finally:
-        os.chdir(cwd)
+    ]
+    for d in dirs:
+        for f in os.listdir(d):
+            brk = False
+            for k in corrupt_file_keys:
+                if k in f:
+                    brk = True
+                    break
+            if brk:
+                continue
+            for e in ext:
+                if f.lower().endswith(e):
+                    paths[e].append(os.path.join(d, f))
+                    break
+    return paths
 
 
 def _get_path_composition_configs(request):
@@ -289,17 +313,12 @@ def compressed_image_paths():
 
     # Since we implement our own meta data reading for jpegs and pngs,
     # we test against images from PIL repo to cover all edge cases.
-    tmpdir = tempfile.mkdtemp()
-    pil_image_paths = _download_pil_test_images(tmpdir)
+    pil_image_paths = _download_pil_test_images()
     paths["jpeg"] += pil_image_paths[".jpg"]
     paths["png"] += pil_image_paths[".png"]
-    hub_test_images = _download_hub_test_images(tmpdir)
+    hub_test_images = _download_hub_test_images()
     paths["jpeg"] += hub_test_images
     yield paths
-    try:
-        shutil.rmtree(tmpdir)
-    except PermissionError:
-        pass
 
 
 @pytest.fixture
@@ -329,10 +348,15 @@ def audio_paths():
 
 @pytest.fixture
 def video_paths():
-    paths = {"mp4": "samplemp4.mp4", "mkv": "samplemkv.mkv", "avi": "sampleavi.avi"}
+    paths = {
+        "mp4": ["samplemp4.mp4"],
+        "mkv": ["samplemkv.mkv"],
+        "avi": ["sampleavi.avi"],
+    }
 
     parent = get_dummy_data_path("video")
     for k in paths:
-        paths[k] = os.path.join(parent, paths[k])
+        paths[k] = [os.path.join(parent, fname) for fname in paths[k]]
+    paths["mp4"] += _download_hub_test_videos()
 
     return paths