Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Video bug fix #1349

Merged
merged 14 commits into from
Dec 29, 2021
30 changes: 15 additions & 15 deletions hub/api/tests/test_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,18 @@
@enabled_datasets
@pytest.mark.parametrize("compression", hub.compression.VIDEO_COMPRESSIONS)
def test_video(ds: Dataset, compression, video_paths):
path = video_paths[compression]
ds.create_tensor("video", htype="video", sample_compression=compression)
sample = hub.read(path)
assert len(sample.shape) == 4
if compression in ("mp4", "mkv"):
assert sample.shape == (400, 360, 640, 3)
elif compression == "avi":
assert sample.shape == (900, 270, 480, 3)
assert sample.shape[-1] == 3
with ds:
for _ in range(5):
ds.video.append(hub.read(path)) # type: ignore
ds.video.extend([hub.read(path) for _ in range(5)]) # type: ignore
for i in range(10):
assert ds.video[i].numpy().shape == sample.shape # type: ignore
for path in video_paths[compression]:
ds.create_tensor("video", htype="video", sample_compression=compression)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wouldn't this create this tensor multiple times?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.

sample = hub.read(path)
assert len(sample.shape) == 4
if compression in ("mp4", "mkv"):
assert sample.shape == (400, 360, 640, 3)
elif compression == "avi":
assert sample.shape == (900, 270, 480, 3)
assert sample.shape[-1] == 3
with ds:
for _ in range(5):
ds.video.append(hub.read(path)) # type: ignore
ds.video.extend([hub.read(path) for _ in range(5)]) # type: ignore
for i in range(10):
assert ds.video[i].numpy().shape == sample.shape # type: ignore
13 changes: 10 additions & 3 deletions hub/core/compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -796,9 +796,16 @@ def _decompress_video(
command, stdin=sp.PIPE, stdout=sp.PIPE, stderr=sp.PIPE, bufsize=10 ** 8
)
raw_video = pipe.communicate(input=file)[0] # type: ignore
return np.frombuffer(raw_video[: int(np.prod(shape))], dtype=np.uint8).reshape(
shape
)
nbytes = len(raw_video)
size = np.prod(shape)
if nbytes >= size: # size is computed from fps and duration, might not be accurate.
return np.frombuffer(memoryview(raw_video)[:size], dtype=np.uint8).reshape(
shape
)
else: # If size was overestimated, append blank frames to the end.
arr = np.zeros(shape, dtype=np.uint8)
AbhinavTuli marked this conversation as resolved.
Show resolved Hide resolved
arr.reshape(-1)[: len(raw_video)] = np.frombuffer(raw_video, dtype=np.uint8)
return arr


def _read_video_shape(file: Union[bytes, memoryview, str]) -> Tuple[int, ...]:
Expand Down
16 changes: 8 additions & 8 deletions hub/core/tests/test_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,14 +147,14 @@ def test_audio(compression, audio_paths):

@pytest.mark.parametrize("compression", VIDEO_COMPRESSIONS)
def test_video(compression, video_paths):
path = video_paths[compression]
sample = hub.read(path)
arr = np.array(sample)
assert arr.shape[-1] == 3
assert arr.dtype == "uint8"
if compression not in ("mp4", "mkv"):
with open(path, "rb") as f:
assert sample.compressed_bytes(compression) == f.read()
for path in video_paths[compression]:
sample = hub.read(path)
arr = np.array(sample)
assert arr.shape[-1] == 3
assert arr.dtype == "uint8"
if compression not in ("mp4", "mkv"):
with open(path, "rb") as f:
assert sample.compressed_bytes(compression) == f.read()


def test_apng(memory_ds):
Expand Down
120 changes: 72 additions & 48 deletions hub/tests/path_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,6 @@
is_opt_true,
)
import pytest
import requests
import shutil
import tempfile
import sys


Expand All @@ -37,21 +34,49 @@
GCS = "gcs"
HUB_CLOUD = "hub_cloud"

_GIT_CLONE_CACHE_DIR = ".test_resources"

def _download_hub_test_images(tempdir):
cwd = os.getcwd()
os.chdir(tempdir)
try:
os.system(
"git clone https://www.github.com/activeloopai/hub-test-resources.git"
)
d = "hub-test-resources/images/jpeg"
return [os.path.join(tempdir, d, f) for f in os.listdir(d)]
finally:
os.chdir(cwd)
_HUB_TEST_RESOURCES_URL = "https://www.github.com/activeloopai/hub-test-resources.git"
_PILLOW_URL = "https://www.github.com/python-pillow/Pillow.git"


def _download_pil_test_images(tempdir, ext=[".jpg", ".png"]):
def _repo_name_from_git_url(url):
repo_name = posixpath.split(url)[-1]
repo_name = repo_name.split("@", 1)[0]
if repo_name.endswith(".git"):
repo_name = repo_name[:-4]
return repo_name


def _git_clone(url):
_repo_name = _repo_name_from_git_url(url)
cached_dir = _GIT_CLONE_CACHE_DIR + "/" + _repo_name
if not os.path.isdir(cached_dir):
if not os.path.isdir(_GIT_CLONE_CACHE_DIR):
os.mkdir(_GIT_CLONE_CACHE_DIR)
cwd = os.getcwd()
os.chdir(_GIT_CLONE_CACHE_DIR)
try:
os.system(f"git clone " + url)
finally:
os.chdir(cwd)
assert os.path.isdir(cached_dir)
return cached_dir


def _download_hub_test_images():
path = _git_clone(_HUB_TEST_RESOURCES_URL)
jpeg_path = path + "/images/jpeg"
return [os.path.join(jpeg_path, f) for f in os.listdir(jpeg_path)]


def _download_hub_test_videos():
path = _git_clone(_HUB_TEST_RESOURCES_URL)
mp4_path = path + "/videos/mp4"
return [os.path.join(mp4_path, f) for f in os.listdir(mp4_path)]


def _download_pil_test_images(ext=[".jpg", ".png"]):
paths = {e: [] for e in ext}
corrupt_file_keys = [
"broken",
Expand All @@ -60,31 +85,30 @@ def _download_pil_test_images(tempdir, ext=[".jpg", ".png"]):
"chunk_no_fctl",
"syntax_num_frames_zero",
]
cwd = os.getcwd()
os.chdir(tempdir)
try:
os.system("git clone https://www.github.com/python-pillow/Pillow.git")
dirs = [
"Pillow/Tests/images",
"Pillow/Tests/images/apng",
"Pillow/Tests/images/imagedraw",

path = _git_clone(_PILLOW_URL)
dirs = [
path + x
for x in [
"/Tests/images",
"/Tests/images/apng",
"/Tests/images/imagedraw",
]
for d in dirs:
for f in os.listdir(d):
brk = False
for k in corrupt_file_keys:
if k in f:
brk = True
break
if brk:
continue
for e in ext:
if f.lower().endswith(e):
paths[e].append(os.path.join(tempdir, d, f))
break
return paths
finally:
os.chdir(cwd)
]
for d in dirs:
for f in os.listdir(d):
brk = False
for k in corrupt_file_keys:
if k in f:
brk = True
break
if brk:
continue
for e in ext:
if f.lower().endswith(e):
paths[e].append(os.path.join(d, f))
break
return paths


def _get_path_composition_configs(request):
Expand Down Expand Up @@ -289,17 +313,12 @@ def compressed_image_paths():

# Since we implement our own meta data reading for jpegs and pngs,
# we test against images from PIL repo to cover all edge cases.
tmpdir = tempfile.mkdtemp()
pil_image_paths = _download_pil_test_images(tmpdir)
pil_image_paths = _download_pil_test_images()
paths["jpeg"] += pil_image_paths[".jpg"]
paths["png"] += pil_image_paths[".png"]
hub_test_images = _download_hub_test_images(tmpdir)
hub_test_images = _download_hub_test_images()
paths["jpeg"] += hub_test_images
yield paths
try:
shutil.rmtree(tmpdir)
except PermissionError:
pass


@pytest.fixture
Expand Down Expand Up @@ -329,10 +348,15 @@ def audio_paths():

@pytest.fixture
def video_paths():
paths = {"mp4": "samplemp4.mp4", "mkv": "samplemkv.mkv", "avi": "sampleavi.avi"}
paths = {
"mp4": ["samplemp4.mp4"],
"mkv": ["samplemkv.mkv"],
"avi": ["sampleavi.avi"],
}

parent = get_dummy_data_path("video")
for k in paths:
paths[k] = os.path.join(parent, paths[k])
paths[k] = [os.path.join(parent, fname) for fname in paths[k]]
paths["mp4"] += _download_hub_test_videos()

return paths