From aa34aa0ec14dc31cece99563571a57f6483ca81c Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Tue, 28 Feb 2023 15:23:29 +0000 Subject: [PATCH 01/35] created class BlobLoader and moved all related function to sep file --- .../implicitron/dataset/json_index_dataset.py | 461 ++------------- pytorch3d/implicitron/dataset/load_blob.py | 542 ++++++++++++++++++ 2 files changed, 576 insertions(+), 427 deletions(-) create mode 100644 pytorch3d/implicitron/dataset/load_blob.py diff --git a/pytorch3d/implicitron/dataset/json_index_dataset.py b/pytorch3d/implicitron/dataset/json_index_dataset.py index 669f4e9b6..ac9daf02a 100644 --- a/pytorch3d/implicitron/dataset/json_index_dataset.py +++ b/pytorch3d/implicitron/dataset/json_index_dataset.py @@ -32,17 +32,16 @@ import numpy as np import torch -from PIL import Image +from tqdm import tqdm + from pytorch3d.implicitron.tools.config import registry, ReplaceableBase -from pytorch3d.io import IO +from pytorch3d.implicitron.dataset import types +from pytorch3d.implicitron.dataset.dataset_base import DatasetBase, FrameData +from pytorch3d.implicitron.dataset.load_blob import BlobLoader +from pytorch3d.implicitron.dataset.utils import is_known_frame_scalar from pytorch3d.renderer.camera_utils import join_cameras_as_batch from pytorch3d.renderer.cameras import CamerasBase, PerspectiveCameras from pytorch3d.structures.pointclouds import Pointclouds -from tqdm import tqdm - -from . import types -from .dataset_base import DatasetBase, FrameData -from .utils import is_known_frame_scalar logger = logging.getLogger(__name__) @@ -53,6 +52,7 @@ class FrameAnnotsEntry(TypedDict): subset: Optional[str] + # pyre-ignore frame_annotation: types.FrameAnnotation else: @@ -60,6 +60,7 @@ class FrameAnnotsEntry(TypedDict): @registry.register +# pyre-ignore class JsonIndexDataset(DatasetBase, ReplaceableBase): """ A dataset with annotations in json files like the Common Objects in 3D @@ -130,6 +131,7 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase): frame_annotations_type: ClassVar[ Type[types.FrameAnnotation] + # pyre-ignore ] = types.FrameAnnotation path_manager: Any = None @@ -162,6 +164,7 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase): sort_frames: bool = False eval_batches: Any = None eval_batch_index: Any = None + loader: BlobLoader # frame_annots: List[FrameAnnotsEntry] = field(init=False) # seq_annots: Dict[str, types.SequenceAnnotation] = field(init=False) @@ -175,6 +178,23 @@ def __post_init__(self) -> None: self._load_subset_lists() self._filter_db() # also computes sequence indices self._extract_and_set_eval_batches() + + self.loader = BlobLoader( + self.dataset_root, + self.load_images, + self.load_depths, + self.load_depth_masks, + self.load_masks, + self.load_point_clouds, + self.max_points, + self.mask_images, + self.mask_depths, + self.image_height, + self.image_width, + self.box_crop, + self.box_crop_mask_thr, + self.box_crop_context, + ) logger.info(str(self)) def _extract_and_set_eval_batches(self): @@ -207,12 +227,11 @@ def join(self, other_datasets: Iterable[DatasetBase]) -> None: # https://gist.github.com/treyhunner/f35292e676efa0be1728 functools.reduce( lambda a, b: {**a, **b}, - [d.seq_annots for d in other_datasets], # pyre-ignore[16] + [d.seq_annots for d in other_datasets], ) ) all_eval_batches = [ self.eval_batches, - # pyre-ignore *[d.eval_batches for d in other_datasets], ] if not ( @@ -396,6 +415,7 @@ def __len__(self) -> int: def _get_frame_type(self, entry: FrameAnnotsEntry) -> Optional[str]: return entry["subset"] + # pyre-ignore def get_all_train_cameras(self) -> CamerasBase: """ Returns the cameras corresponding to all the known frames. @@ -411,6 +431,7 @@ def get_all_train_cameras(self) -> CamerasBase: cameras.append(self[frame_idx].camera) return join_cameras_as_batch(cameras) + # pyre-ignore def __getitem__(self, index) -> FrameData: # pyre-ignore[16] if index >= len(self.frame_annots): @@ -438,238 +459,14 @@ def __getitem__(self, index) -> FrameData: # The rest of the fields are optional frame_data.frame_type = self._get_frame_type(self.frame_annots[index]) - ( - frame_data.fg_probability, - frame_data.mask_path, - frame_data.bbox_xywh, - clamp_bbox_xyxy, - frame_data.crop_bbox_xywh, - ) = self._load_crop_fg_probability(entry) - - scale = 1.0 - if self.load_images and entry.image is not None: - # original image size - frame_data.image_size_hw = _safe_as_tensor(entry.image.size, torch.long) - - ( - frame_data.image_rgb, - frame_data.image_path, - frame_data.mask_crop, - scale, - ) = self._load_crop_images( - entry, frame_data.fg_probability, clamp_bbox_xyxy - ) - - if self.load_depths and entry.depth is not None: - ( - frame_data.depth_map, - frame_data.depth_path, - frame_data.depth_mask, - ) = self._load_mask_depth(entry, clamp_bbox_xyxy, frame_data.fg_probability) - - if entry.viewpoint is not None: - frame_data.camera = self._get_pytorch3d_camera( - entry, - scale, - clamp_bbox_xyxy, - ) - - if self.load_point_clouds and point_cloud is not None: - pcl_path = self._fix_point_cloud_path(point_cloud.path) - frame_data.sequence_point_cloud = _load_pointcloud( - self._local_path(pcl_path), max_points=self.max_points - ) - frame_data.sequence_point_cloud_path = pcl_path - + frame_data = self.loader.load(frame_data, entry, point_cloud) return frame_data - def _fix_point_cloud_path(self, path: str) -> str: - """ - Fix up a point cloud path from the dataset. - Some files in Co3Dv2 have an accidental absolute path stored. - """ - unwanted_prefix = ( - "/large_experiments/p3/replay/datasets/co3d/co3d45k_220512/export_v23/" - ) - if path.startswith(unwanted_prefix): - path = path[len(unwanted_prefix) :] - return os.path.join(self.dataset_root, path) - - def _load_crop_fg_probability( - self, entry: types.FrameAnnotation - ) -> Tuple[ - Optional[torch.Tensor], - Optional[str], - Optional[torch.Tensor], - Optional[torch.Tensor], - Optional[torch.Tensor], - ]: - fg_probability = None - full_path = None - bbox_xywh = None - clamp_bbox_xyxy = None - crop_box_xywh = None - - if (self.load_masks or self.box_crop) and entry.mask is not None: - full_path = os.path.join(self.dataset_root, entry.mask.path) - mask = _load_mask(self._local_path(full_path)) - - if mask.shape[-2:] != entry.image.size: - raise ValueError( - f"bad mask size: {mask.shape[-2:]} vs {entry.image.size}!" - ) - - bbox_xywh = torch.tensor(_get_bbox_from_mask(mask, self.box_crop_mask_thr)) - - if self.box_crop: - clamp_bbox_xyxy = _clamp_box_to_image_bounds_and_round( - _get_clamp_bbox( - bbox_xywh, - image_path=entry.image.path, - box_crop_context=self.box_crop_context, - ), - image_size_hw=tuple(mask.shape[-2:]), - ) - crop_box_xywh = _bbox_xyxy_to_xywh(clamp_bbox_xyxy) - - mask = _crop_around_box(mask, clamp_bbox_xyxy, full_path) - - fg_probability, _, _ = self._resize_image(mask, mode="nearest") - - return fg_probability, full_path, bbox_xywh, clamp_bbox_xyxy, crop_box_xywh - - def _load_crop_images( - self, - entry: types.FrameAnnotation, - fg_probability: Optional[torch.Tensor], - clamp_bbox_xyxy: Optional[torch.Tensor], - ) -> Tuple[torch.Tensor, str, torch.Tensor, float]: - assert self.dataset_root is not None and entry.image is not None - path = os.path.join(self.dataset_root, entry.image.path) - image_rgb = _load_image(self._local_path(path)) - - if image_rgb.shape[-2:] != entry.image.size: - raise ValueError( - f"bad image size: {image_rgb.shape[-2:]} vs {entry.image.size}!" - ) - - if self.box_crop: - assert clamp_bbox_xyxy is not None - image_rgb = _crop_around_box(image_rgb, clamp_bbox_xyxy, path) - - image_rgb, scale, mask_crop = self._resize_image(image_rgb) - - if self.mask_images: - assert fg_probability is not None - image_rgb *= fg_probability - - return image_rgb, path, mask_crop, scale - - def _load_mask_depth( - self, - entry: types.FrameAnnotation, - clamp_bbox_xyxy: Optional[torch.Tensor], - fg_probability: Optional[torch.Tensor], - ) -> Tuple[torch.Tensor, str, torch.Tensor]: - entry_depth = entry.depth - assert entry_depth is not None - path = os.path.join(self.dataset_root, entry_depth.path) - depth_map = _load_depth(self._local_path(path), entry_depth.scale_adjustment) - - if self.box_crop: - assert clamp_bbox_xyxy is not None - depth_bbox_xyxy = _rescale_bbox( - clamp_bbox_xyxy, entry.image.size, depth_map.shape[-2:] - ) - depth_map = _crop_around_box(depth_map, depth_bbox_xyxy, path) - - depth_map, _, _ = self._resize_image(depth_map, mode="nearest") - - if self.mask_depths: - assert fg_probability is not None - depth_map *= fg_probability - - if self.load_depth_masks: - assert entry_depth.mask_path is not None - mask_path = os.path.join(self.dataset_root, entry_depth.mask_path) - depth_mask = _load_depth_mask(self._local_path(mask_path)) - - if self.box_crop: - assert clamp_bbox_xyxy is not None - depth_mask_bbox_xyxy = _rescale_bbox( - clamp_bbox_xyxy, entry.image.size, depth_mask.shape[-2:] - ) - depth_mask = _crop_around_box( - depth_mask, depth_mask_bbox_xyxy, mask_path - ) - - depth_mask, _, _ = self._resize_image(depth_mask, mode="nearest") - else: - depth_mask = torch.ones_like(depth_map) - - return depth_map, path, depth_mask - - def _get_pytorch3d_camera( - self, - entry: types.FrameAnnotation, - scale: float, - clamp_bbox_xyxy: Optional[torch.Tensor], - ) -> PerspectiveCameras: - entry_viewpoint = entry.viewpoint - assert entry_viewpoint is not None - # principal point and focal length - principal_point = torch.tensor( - entry_viewpoint.principal_point, dtype=torch.float - ) - focal_length = torch.tensor(entry_viewpoint.focal_length, dtype=torch.float) - - half_image_size_wh_orig = ( - torch.tensor(list(reversed(entry.image.size)), dtype=torch.float) / 2.0 - ) - - # first, we convert from the dataset's NDC convention to pixels - format = entry_viewpoint.intrinsics_format - if format.lower() == "ndc_norm_image_bounds": - # this is e.g. currently used in CO3D for storing intrinsics - rescale = half_image_size_wh_orig - elif format.lower() == "ndc_isotropic": - rescale = half_image_size_wh_orig.min() - else: - raise ValueError(f"Unknown intrinsics format: {format}") - - # principal point and focal length in pixels - principal_point_px = half_image_size_wh_orig - principal_point * rescale - focal_length_px = focal_length * rescale - if self.box_crop: - assert clamp_bbox_xyxy is not None - principal_point_px -= clamp_bbox_xyxy[:2] - - # now, convert from pixels to PyTorch3D v0.5+ NDC convention - if self.image_height is None or self.image_width is None: - out_size = list(reversed(entry.image.size)) - else: - out_size = [self.image_width, self.image_height] - - half_image_size_output = torch.tensor(out_size, dtype=torch.float) / 2.0 - half_min_image_size_output = half_image_size_output.min() - - # rescaled principal point and focal length in ndc - principal_point = ( - half_image_size_output - principal_point_px * scale - ) / half_min_image_size_output - focal_length = focal_length_px * scale / half_min_image_size_output - - return PerspectiveCameras( - focal_length=focal_length[None], - principal_point=principal_point[None], - R=torch.tensor(entry_viewpoint.R, dtype=torch.float)[None], - T=torch.tensor(entry_viewpoint.T, dtype=torch.float)[None], - ) - def _load_frames(self) -> None: logger.info(f"Loading Co3D frames from {self.frame_annotations_file}.") local_file = self._local_path(self.frame_annotations_file) with gzip.open(local_file, "rt", encoding="utf8") as zipfile: + # pyre-ignore frame_annots_list = types.load_dataclass( zipfile, List[self.frame_annotations_type] ) @@ -684,6 +481,7 @@ def _load_sequences(self) -> None: logger.info(f"Loading Co3D sequences from {self.sequence_annotations_file}.") local_file = self._local_path(self.sequence_annotations_file) with gzip.open(local_file, "rt", encoding="utf8") as zipfile: + # pyre-ignore seq_annots = types.load_dataclass(zipfile, List[types.SequenceAnnotation]) if not seq_annots: raise ValueError("Empty sequences file!") @@ -853,35 +651,6 @@ def _invalidate_seq_to_idx(self) -> None: # pyre-ignore[16] self._seq_to_idx = seq_to_idx - def _resize_image( - self, image, mode="bilinear" - ) -> Tuple[torch.Tensor, float, torch.Tensor]: - image_height, image_width = self.image_height, self.image_width - if image_height is None or image_width is None: - # skip the resizing - imre_ = torch.from_numpy(image) - return imre_, 1.0, torch.ones_like(imre_[:1]) - # takes numpy array, returns pytorch tensor - minscale = min( - image_height / image.shape[-2], - image_width / image.shape[-1], - ) - imre = torch.nn.functional.interpolate( - torch.from_numpy(image)[None], - scale_factor=minscale, - mode=mode, - align_corners=False if mode == "bilinear" else None, - recompute_scale_factor=True, - )[0] - # pyre-fixme[19]: Expected 1 positional argument. - imre_ = torch.zeros(image.shape[0], self.image_height, self.image_width) - imre_[:, 0 : imre.shape[1], 0 : imre.shape[2]] = imre - # pyre-fixme[6]: For 2nd param expected `int` but got `Optional[int]`. - # pyre-fixme[6]: For 3rd param expected `int` but got `Optional[int]`. - mask = torch.zeros(1, self.image_height, self.image_width) - mask[:, 0 : imre.shape[1], 0 : imre.shape[2]] = 1.0 - return imre_, minscale, mask - def _local_path(self, path: str) -> str: if self.path_manager is None: return path @@ -920,167 +689,5 @@ def _seq_name_to_seed(seq_name) -> int: return int(hashlib.sha1(seq_name.encode("utf-8")).hexdigest(), 16) -def _load_image(path) -> np.ndarray: - with Image.open(path) as pil_im: - im = np.array(pil_im.convert("RGB")) - im = im.transpose((2, 0, 1)) - im = im.astype(np.float32) / 255.0 - return im - - -def _load_16big_png_depth(depth_png) -> np.ndarray: - with Image.open(depth_png) as depth_pil: - # the image is stored with 16-bit depth but PIL reads it as I (32 bit). - # we cast it to uint16, then reinterpret as float16, then cast to float32 - depth = ( - np.frombuffer(np.array(depth_pil, dtype=np.uint16), dtype=np.float16) - .astype(np.float32) - .reshape((depth_pil.size[1], depth_pil.size[0])) - ) - return depth - - -def _load_1bit_png_mask(file: str) -> np.ndarray: - with Image.open(file) as pil_im: - mask = (np.array(pil_im.convert("L")) > 0.0).astype(np.float32) - return mask - - -def _load_depth_mask(path: str) -> np.ndarray: - if not path.lower().endswith(".png"): - raise ValueError('unsupported depth mask file name "%s"' % path) - m = _load_1bit_png_mask(path) - return m[None] # fake feature channel - - -def _load_depth(path, scale_adjustment) -> np.ndarray: - if not path.lower().endswith(".png"): - raise ValueError('unsupported depth file name "%s"' % path) - - d = _load_16big_png_depth(path) * scale_adjustment - d[~np.isfinite(d)] = 0.0 - return d[None] # fake feature channel - - -def _load_mask(path) -> np.ndarray: - with Image.open(path) as pil_im: - mask = np.array(pil_im) - mask = mask.astype(np.float32) / 255.0 - return mask[None] # fake feature channel - - -def _get_1d_bounds(arr) -> Tuple[int, int]: - nz = np.flatnonzero(arr) - return nz[0], nz[-1] + 1 - - -def _get_bbox_from_mask( - mask, thr, decrease_quant: float = 0.05 -) -> Tuple[int, int, int, int]: - # bbox in xywh - masks_for_box = np.zeros_like(mask) - while masks_for_box.sum() <= 1.0: - masks_for_box = (mask > thr).astype(np.float32) - thr -= decrease_quant - if thr <= 0.0: - warnings.warn(f"Empty masks_for_bbox (thr={thr}) => using full image.") - - x0, x1 = _get_1d_bounds(masks_for_box.sum(axis=-2)) - y0, y1 = _get_1d_bounds(masks_for_box.sum(axis=-1)) - - return x0, y0, x1 - x0, y1 - y0 - - -def _get_clamp_bbox( - bbox: torch.Tensor, - box_crop_context: float = 0.0, - image_path: str = "", -) -> torch.Tensor: - # box_crop_context: rate of expansion for bbox - # returns possibly expanded bbox xyxy as float - - bbox = bbox.clone() # do not edit bbox in place - - # increase box size - if box_crop_context > 0.0: - c = box_crop_context - bbox = bbox.float() - bbox[0] -= bbox[2] * c / 2 - bbox[1] -= bbox[3] * c / 2 - bbox[2] += bbox[2] * c - bbox[3] += bbox[3] * c - - if (bbox[2:] <= 1.0).any(): - raise ValueError( - f"squashed image {image_path}!! The bounding box contains no pixels." - ) - - bbox[2:] = torch.clamp(bbox[2:], 2) # set min height, width to 2 along both axes - bbox_xyxy = _bbox_xywh_to_xyxy(bbox, clamp_size=2) - - return bbox_xyxy - - -def _crop_around_box(tensor, bbox, impath: str = ""): - # bbox is xyxy, where the upper bound is corrected with +1 - bbox = _clamp_box_to_image_bounds_and_round( - bbox, - image_size_hw=tensor.shape[-2:], - ) - tensor = tensor[..., bbox[1] : bbox[3], bbox[0] : bbox[2]] - assert all(c > 0 for c in tensor.shape), f"squashed image {impath}" - return tensor - - -def _clamp_box_to_image_bounds_and_round( - bbox_xyxy: torch.Tensor, - image_size_hw: Tuple[int, int], -) -> torch.LongTensor: - bbox_xyxy = bbox_xyxy.clone() - bbox_xyxy[[0, 2]] = torch.clamp(bbox_xyxy[[0, 2]], 0, image_size_hw[-1]) - bbox_xyxy[[1, 3]] = torch.clamp(bbox_xyxy[[1, 3]], 0, image_size_hw[-2]) - if not isinstance(bbox_xyxy, torch.LongTensor): - bbox_xyxy = bbox_xyxy.round().long() - return bbox_xyxy # pyre-ignore [7] - - -def _rescale_bbox(bbox: torch.Tensor, orig_res, new_res) -> torch.Tensor: - assert bbox is not None - assert np.prod(orig_res) > 1e-8 - # average ratio of dimensions - rel_size = (new_res[0] / orig_res[0] + new_res[1] / orig_res[1]) / 2.0 - return bbox * rel_size - - -def _bbox_xyxy_to_xywh(xyxy: torch.Tensor) -> torch.Tensor: - wh = xyxy[2:] - xyxy[:2] - xywh = torch.cat([xyxy[:2], wh]) - return xywh - - -def _bbox_xywh_to_xyxy( - xywh: torch.Tensor, clamp_size: Optional[int] = None -) -> torch.Tensor: - xyxy = xywh.clone() - if clamp_size is not None: - xyxy[2:] = torch.clamp(xyxy[2:], clamp_size) - xyxy[2:] += xyxy[:2] - return xyxy - - def _safe_as_tensor(data, dtype): - if data is None: - return None - return torch.tensor(data, dtype=dtype) - - -# NOTE this cache is per-worker; they are implemented as processes. -# each batch is loaded and collated by a single worker; -# since sequences tend to co-occur within batches, this is useful. -@functools.lru_cache(maxsize=256) -def _load_pointcloud(pcl_path: Union[str, Path], max_points: int = 0) -> Pointclouds: - pcl = IO().load_pointcloud(pcl_path) - if max_points > 0: - pcl = pcl.subsample(max_points) - - return pcl + return torch.tensor(data, dtype=dtype) if data is not None else None diff --git a/pytorch3d/implicitron/dataset/load_blob.py b/pytorch3d/implicitron/dataset/load_blob.py new file mode 100644 index 000000000..b10fb1267 --- /dev/null +++ b/pytorch3d/implicitron/dataset/load_blob.py @@ -0,0 +1,542 @@ +import functools +import os +import warnings + +import numpy as np +from PIL import Image +import torch +from typing import Any, Optional, Tuple + +from pytorch3d.implicitron.dataset import types +from pytorch3d.implicitron.dataset.dataset_base import FrameData +from pytorch3d.io import IO + + +class BlobLoader: + """ + A loader for correctly (according to setup) loading blobs for FrameData + + Args: + dataset_root: The root folder of the dataset; all the paths in jsons are + specified relative to this root (but not json paths themselves). + load_images: Enable loading the frame RGB data. + load_depths: Enable loading the frame depth maps. + load_depth_masks: Enable loading the frame depth map masks denoting the + depth values used for evaluation (the points consistent across views). + load_masks: Enable loading frame foreground masks. + load_point_clouds: Enable loading sequence-level point clouds. + max_points: Cap on the number of loaded points in the point cloud; + if reached, they are randomly sampled without replacement. + mask_images: Whether to mask the images with the loaded foreground masks; + 0 value is used for background. + mask_depths: Whether to mask the depth maps with the loaded foreground + masks; 0 value is used for background. + image_height: The height of the returned images, masks, and depth maps; + aspect ratio is preserved during cropping/resizing. + image_width: The width of the returned images, masks, and depth maps; + aspect ratio is preserved during cropping/resizing. + box_crop: Enable cropping of the image around the bounding box inferred + from the foreground region of the loaded segmentation mask; masks + and depth maps are cropped accordingly; cameras are corrected. + box_crop_mask_thr: The threshold used to separate pixels into foreground + and background based on the foreground_probability mask; if no value + is greater than this threshold, the loader lowers it and repeats. + box_crop_context: The amount of additional padding added to each + dimension of the cropping bounding box, relative to box size. + """ + + path_manager: Any = None + + def __init__( + self, + dataset_root, + load_images, + load_depths, + load_depth_masks, + load_masks, + load_point_clouds, + max_points, + mask_images, + mask_depths, + image_height, + image_width, + box_crop, + box_crop_mask_thr, + box_crop_context, + ): + self.dataset_root = dataset_root + self.load_images = load_images + self.load_depths = load_depths + self.load_depth_masks = load_depth_masks + self.load_masks = load_masks + self.load_point_clouds = load_point_clouds + self.max_points = max_points + self.mask_images = mask_images + self.mask_depths = mask_depths + self.image_height = image_height + self.image_width = image_width + self.box_crop = box_crop + self.box_crop_mask_thr = box_crop_mask_thr + self.box_crop_context = box_crop_context + + def load( + self, + # pyre-ignore + frame_data: FrameData, + # pyre-ignore + entry: types.FrameAnnotation, + # pyre-ignore + point_cloud: types.PointCloudAnnotation, + ) -> FrameData: + """Main method for loader.""" + ( + frame_data.fg_probability, + frame_data.mask_path, + frame_data.bbox_xywh, + clamp_bbox_xyxy, + frame_data.crop_bbox_xywh, + ) = self._load_crop_fg_probability(entry) + + scale = 1.0 + if self.load_images and entry.image is not None: + # original image size + frame_data.image_size_hw = _safe_as_tensor(entry.image.size, torch.long) + + ( + frame_data.image_rgb, + frame_data.image_path, + frame_data.mask_crop, + scale, + ) = self._load_crop_images( + entry, frame_data.fg_probability, clamp_bbox_xyxy + ) + + if self.load_depths and entry.depth is not None: + ( + frame_data.depth_map, + frame_data.depth_path, + frame_data.depth_mask, + ) = self._load_mask_depth(entry, clamp_bbox_xyxy, frame_data.fg_probability) + + if entry.viewpoint is not None: + frame_data.camera = self._get_pytorch3d_camera( + entry, + scale, + clamp_bbox_xyxy, + ) + + if self.load_point_clouds and point_cloud is not None: + pcl_path = self._fix_point_cloud_path(point_cloud.path) + frame_data.sequence_point_cloud = _load_pointcloud( + self._local_path(pcl_path), max_points=self.max_points + ) + frame_data.sequence_point_cloud_path = pcl_path + return frame_data + + def _load_crop_fg_probability( + self, entry: types.FrameAnnotation + ) -> Tuple[ + Optional[torch.Tensor], + Optional[str], + Optional[torch.Tensor], + Optional[torch.Tensor], + Optional[torch.Tensor], + ]: + fg_probability = None + full_path = None + bbox_xywh = None + clamp_bbox_xyxy = None + crop_box_xywh = None + + if (self.load_masks or self.box_crop) and entry.mask is not None: + full_path = os.path.join(self.dataset_root, entry.mask.path) + mask = _load_mask(self._local_path(full_path)) + + if mask.shape[-2:] != entry.image.size: + raise ValueError( + f"bad mask size: {mask.shape[-2:]} vs {entry.image.size}!" + ) + + bbox_xywh = torch.tensor(_get_bbox_from_mask(mask, self.box_crop_mask_thr)) + + if self.box_crop: + clamp_bbox_xyxy = _clamp_box_to_image_bounds_and_round( + _get_clamp_bbox( + bbox_xywh, + image_path=entry.image.path, + box_crop_context=self.box_crop_context, + ), + image_size_hw=tuple(mask.shape[-2:]), + ) + crop_box_xywh = _bbox_xyxy_to_xywh(clamp_bbox_xyxy) + + mask = _crop_around_box(mask, clamp_bbox_xyxy, full_path) + + fg_probability, _, _ = self._resize_image(mask, mode="nearest") + + return fg_probability, full_path, bbox_xywh, clamp_bbox_xyxy, crop_box_xywh + + def _load_crop_images( + self, + entry: types.FrameAnnotation, + fg_probability: Optional[torch.Tensor], + clamp_bbox_xyxy: Optional[torch.Tensor], + ) -> Tuple[torch.Tensor, str, torch.Tensor, float]: + assert self.dataset_root is not None and entry.image is not None + path = os.path.join(self.dataset_root, entry.image.path) + image_rgb = _load_image(self._local_path(path)) + + if image_rgb.shape[-2:] != entry.image.size: + raise ValueError( + f"bad image size: {image_rgb.shape[-2:]} vs {entry.image.size}!" + ) + + if self.box_crop: + assert clamp_bbox_xyxy is not None + image_rgb = _crop_around_box(image_rgb, clamp_bbox_xyxy, path) + + image_rgb, scale, mask_crop = self._resize_image(image_rgb) + + if self.mask_images: + assert fg_probability is not None + image_rgb *= fg_probability + + return image_rgb, path, mask_crop, scale + + def _load_mask_depth( + self, + entry: types.FrameAnnotation, + clamp_bbox_xyxy: Optional[torch.Tensor], + fg_probability: Optional[torch.Tensor], + ) -> Tuple[torch.Tensor, str, torch.Tensor]: + entry_depth = entry.depth + assert entry_depth is not None + path = os.path.join(self.dataset_root, entry_depth.path) + depth_map = _load_depth(self._local_path(path), entry_depth.scale_adjustment) + + if self.box_crop: + assert clamp_bbox_xyxy is not None + depth_bbox_xyxy = _rescale_bbox( + clamp_bbox_xyxy, entry.image.size, depth_map.shape[-2:] + ) + depth_map = _crop_around_box(depth_map, depth_bbox_xyxy, path) + + depth_map, _, _ = self._resize_image(depth_map, mode="nearest") + + if self.mask_depths: + assert fg_probability is not None + depth_map *= fg_probability + + if self.load_depth_masks: + assert entry_depth.mask_path is not None + mask_path = os.path.join(self.dataset_root, entry_depth.mask_path) + depth_mask = _load_depth_mask(self._local_path(mask_path)) + + if self.box_crop: + assert clamp_bbox_xyxy is not None + depth_mask_bbox_xyxy = _rescale_bbox( + clamp_bbox_xyxy, entry.image.size, depth_mask.shape[-2:] + ) + depth_mask = _crop_around_box( + depth_mask, depth_mask_bbox_xyxy, mask_path + ) + + depth_mask, _, _ = self._resize_image(depth_mask, mode="nearest") + else: + depth_mask = torch.ones_like(depth_map) + + return depth_map, path, depth_mask + + def _get_pytorch3d_camera( + self, + entry: types.FrameAnnotation, + scale: float, + clamp_bbox_xyxy: Optional[torch.Tensor], + ) -> PerspectiveCameras: # pyre-ignore + entry_viewpoint = entry.viewpoint + assert entry_viewpoint is not None + # principal point and focal length + principal_point = torch.tensor( + entry_viewpoint.principal_point, dtype=torch.float + ) + focal_length = torch.tensor(entry_viewpoint.focal_length, dtype=torch.float) + + half_image_size_wh_orig = ( + torch.tensor(list(reversed(entry.image.size)), dtype=torch.float) / 2.0 + ) + + # first, we convert from the dataset's NDC convention to pixels + format = entry_viewpoint.intrinsics_format + if format.lower() == "ndc_norm_image_bounds": + # this is e.g. currently used in CO3D for storing intrinsics + rescale = half_image_size_wh_orig + elif format.lower() == "ndc_isotropic": + rescale = half_image_size_wh_orig.min() + else: + raise ValueError(f"Unknown intrinsics format: {format}") + + # principal point and focal length in pixels + principal_point_px = half_image_size_wh_orig - principal_point * rescale + focal_length_px = focal_length * rescale + if self.box_crop: + assert clamp_bbox_xyxy is not None + principal_point_px -= clamp_bbox_xyxy[:2] + + # now, convert from pixels to PyTorch3D v0.5+ NDC convention + if self.image_height is None or self.image_width is None: + out_size = list(reversed(entry.image.size)) + else: + out_size = [self.image_width, self.image_height] + + half_image_size_output = torch.tensor(out_size, dtype=torch.float) / 2.0 + half_min_image_size_output = half_image_size_output.min() + + # rescaled principal point and focal length in ndc + principal_point = ( + half_image_size_output - principal_point_px * scale + ) / half_min_image_size_output + focal_length = focal_length_px * scale / half_min_image_size_output + + return PerspectiveCameras( + focal_length=focal_length[None], + principal_point=principal_point[None], + R=torch.tensor(entry_viewpoint.R, dtype=torch.float)[None], + T=torch.tensor(entry_viewpoint.T, dtype=torch.float)[None], + ) + + def _fix_point_cloud_path(self, path: str) -> str: + """ + Fix up a point cloud path from the dataset. + Some files in Co3Dv2 have an accidental absolute path stored. + """ + unwanted_prefix = ( + "/large_experiments/p3/replay/datasets/co3d/co3d45k_220512/export_v23/" + ) + if path.startswith(unwanted_prefix): + path = path[len(unwanted_prefix) :] + return os.path.join(self.dataset_root, path) + + def _local_path(self, path: str) -> str: + if self.path_manager is None: + return path + return self.path_manager.get_local_path(path) + + def _resize_image( + self, image, mode="bilinear" + ) -> Tuple[torch.Tensor, float, torch.Tensor]: + image_height, image_width = self.image_height, self.image_width + if image_height is None or image_width is None: + # skip the resizing + imre_ = torch.from_numpy(image) + return imre_, 1.0, torch.ones_like(imre_[:1]) + # takes numpy array, returns pytorch tensor + minscale = min( + image_height / image.shape[-2], + image_width / image.shape[-1], + ) + imre = torch.nn.functional.interpolate( + torch.from_numpy(image)[None], + scale_factor=minscale, + mode=mode, + align_corners=False if mode == "bilinear" else None, + recompute_scale_factor=True, + )[0] + # pyre-fixme[19]: Expected 1 positional argument. + imre_ = torch.zeros(image.shape[0], self.image_height, self.image_width) + imre_[:, 0 : imre.shape[1], 0 : imre.shape[2]] = imre + mask = torch.zeros(1, self.image_height, self.image_width) + mask[:, 0 : imre.shape[1], 0 : imre.shape[2]] = 1.0 + return imre_, minscale, mask + + +def _load_image(path) -> np.ndarray: + with Image.open(path) as pil_im: + im = np.array(pil_im.convert("RGB")) + im = im.transpose((2, 0, 1)) + im = im.astype(np.float32) / 255.0 + return im + + +def _load_mask(path) -> np.ndarray: + with Image.open(path) as pil_im: + mask = np.array(pil_im) + mask = mask.astype(np.float32) / 255.0 + return mask[None] # fake feature channel + + +def _get_bbox_from_mask( + mask, thr, decrease_quant: float = 0.05 +) -> Tuple[int, int, int, int]: + # bbox in xywh + masks_for_box = np.zeros_like(mask) + while masks_for_box.sum() <= 1.0: + masks_for_box = (mask > thr).astype(np.float32) + thr -= decrease_quant + if thr <= 0.0: + warnings.warn(f"Empty masks_for_bbox (thr={thr}) => using full image.") + + x0, x1 = _get_1d_bounds(masks_for_box.sum(axis=-2)) + y0, y1 = _get_1d_bounds(masks_for_box.sum(axis=-1)) + + return x0, y0, x1 - x0, y1 - y0 + + +def _crop_around_box(tensor, bbox, impath: str = ""): + # bbox is xyxy, where the upper bound is corrected with +1 + bbox = _clamp_box_to_image_bounds_and_round( + bbox, + image_size_hw=tensor.shape[-2:], + ) + tensor = tensor[..., bbox[1] : bbox[3], bbox[0] : bbox[2]] + assert all(c > 0 for c in tensor.shape), f"squashed image {impath}" + return tensor + + +def _clamp_box_to_image_bounds_and_round( + bbox_xyxy: torch.Tensor, + image_size_hw: Tuple[int, int], +) -> torch.LongTensor: + bbox_xyxy = bbox_xyxy.clone() + bbox_xyxy[[0, 2]] = torch.clamp(bbox_xyxy[[0, 2]], 0, image_size_hw[-1]) + bbox_xyxy[[1, 3]] = torch.clamp(bbox_xyxy[[1, 3]], 0, image_size_hw[-2]) + if not isinstance(bbox_xyxy, torch.LongTensor): + bbox_xyxy = bbox_xyxy.round().long() + return bbox_xyxy # pyre-ignore [7] + + +def _get_clamp_bbox( + bbox: torch.Tensor, + box_crop_context: float = 0.0, + image_path: str = "", +) -> torch.Tensor: + # box_crop_context: rate of expansion for bbox + # returns possibly expanded bbox xyxy as float + + bbox = bbox.clone() # do not edit bbox in place + + # increase box size + if box_crop_context > 0.0: + c = box_crop_context + bbox = bbox.float() + bbox[0] -= bbox[2] * c / 2 + bbox[1] -= bbox[3] * c / 2 + bbox[2] += bbox[2] * c + bbox[3] += bbox[3] * c + + if (bbox[2:] <= 1.0).any(): + raise ValueError( + f"squashed image {image_path}!! The bounding box contains no pixels." + ) + + bbox[2:] = torch.clamp(bbox[2:], 2) # set min height, width to 2 along both axes + bbox_xyxy = _bbox_xywh_to_xyxy(bbox, clamp_size=2) + + return bbox_xyxy + + +def _bbox_xyxy_to_xywh(xyxy: torch.Tensor) -> torch.Tensor: + wh = xyxy[2:] - xyxy[:2] + xywh = torch.cat([xyxy[:2], wh]) + return xywh + + +def _resize_image( + self, image, mode="bilinear" +) -> Tuple[torch.Tensor, float, torch.Tensor]: + image_height, image_width = self.image_height, self.image_width + if image_height is None or image_width is None: + # skip the resizing + imre_ = torch.from_numpy(image) + return imre_, 1.0, torch.ones_like(imre_[:1]) + # takes numpy array, returns pytorch tensor + minscale = min( + image_height / image.shape[-2], + image_width / image.shape[-1], + ) + imre = torch.nn.functional.interpolate( + torch.from_numpy(image)[None], + scale_factor=minscale, + mode=mode, + align_corners=False if mode == "bilinear" else None, + recompute_scale_factor=True, + )[0] + # pyre-fixme[19]: Expected 1 positional argument. + imre_ = torch.zeros(image.shape[0], self.image_height, self.image_width) + imre_[:, 0 : imre.shape[1], 0 : imre.shape[2]] = imre + mask = torch.zeros(1, self.image_height, self.image_width) + mask[:, 0 : imre.shape[1], 0 : imre.shape[2]] = 1.0 + return imre_, minscale, mask + + +def _load_depth(path, scale_adjustment) -> np.ndarray: + if not path.lower().endswith(".png"): + raise ValueError('unsupported depth file name "%s"' % path) + + d = _load_16big_png_depth(path) * scale_adjustment + d[~np.isfinite(d)] = 0.0 + return d[None] # fake feature channel + + +def _load_16big_png_depth(depth_png) -> np.ndarray: + with Image.open(depth_png) as depth_pil: + # the image is stored with 16-bit depth but PIL reads it as I (32 bit). + # we cast it to uint16, then reinterpret as float16, then cast to float32 + depth = ( + np.frombuffer(np.array(depth_pil, dtype=np.uint16), dtype=np.float16) + .astype(np.float32) + .reshape((depth_pil.size[1], depth_pil.size[0])) + ) + return depth + + +def _rescale_bbox(bbox: torch.Tensor, orig_res, new_res) -> torch.Tensor: + assert bbox is not None + assert np.prod(orig_res) > 1e-8 + # average ratio of dimensions + rel_size = (new_res[0] / orig_res[0] + new_res[1] / orig_res[1]) / 2.0 + return bbox * rel_size + + +def _load_1bit_png_mask(file: str) -> np.ndarray: + with Image.open(file) as pil_im: + mask = (np.array(pil_im.convert("L")) > 0.0).astype(np.float32) + return mask + + +def _load_depth_mask(path: str) -> np.ndarray: + if not path.lower().endswith(".png"): + raise ValueError('unsupported depth mask file name "%s"' % path) + m = _load_1bit_png_mask(path) + return m[None] # fake feature channel + + +def _get_1d_bounds(arr) -> Tuple[int, int]: + nz = np.flatnonzero(arr) + return nz[0], nz[-1] + 1 + + +def _bbox_xywh_to_xyxy( + xywh: torch.Tensor, clamp_size: Optional[int] = None +) -> torch.Tensor: + xyxy = xywh.clone() + if clamp_size is not None: + xyxy[2:] = torch.clamp(xyxy[2:], clamp_size) + xyxy[2:] += xyxy[:2] + return xyxy + + +def _safe_as_tensor(data, dtype): + return torch.tensor(data, dtype=dtype) if data is not None else None + + +# NOTE this cache is per-worker; they are implemented as processes. +# each batch is loaded and collated by a single worker; +# since sequences tend to co-occur within batches, this is useful. +@functools.lru_cache(maxsize=256) +# pyre-ignore +def _load_pointcloud(pcl_path: Union[str, Path], max_points: int = 0) -> Pointclouds: + pcl = IO().load_pointcloud(pcl_path) + if max_points > 0: + pcl = pcl.subsample(max_points) + + return pcl From f745dfc941e9c5ed3e10e0a2664236b3124b3770 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Tue, 28 Feb 2023 15:39:42 +0000 Subject: [PATCH 02/35] added type hints and deleted chore pyre-ignore --- .../implicitron/dataset/json_index_dataset.py | 45 ++++++++----------- pytorch3d/implicitron/dataset/load_blob.py | 30 ++++++------- 2 files changed, 32 insertions(+), 43 deletions(-) diff --git a/pytorch3d/implicitron/dataset/json_index_dataset.py b/pytorch3d/implicitron/dataset/json_index_dataset.py index ac9daf02a..9bec154c3 100644 --- a/pytorch3d/implicitron/dataset/json_index_dataset.py +++ b/pytorch3d/implicitron/dataset/json_index_dataset.py @@ -52,7 +52,6 @@ class FrameAnnotsEntry(TypedDict): subset: Optional[str] - # pyre-ignore frame_annotation: types.FrameAnnotation else: @@ -60,7 +59,6 @@ class FrameAnnotsEntry(TypedDict): @registry.register -# pyre-ignore class JsonIndexDataset(DatasetBase, ReplaceableBase): """ A dataset with annotations in json files like the Common Objects in 3D @@ -131,7 +129,6 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase): frame_annotations_type: ClassVar[ Type[types.FrameAnnotation] - # pyre-ignore ] = types.FrameAnnotation path_manager: Any = None @@ -164,7 +161,7 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase): sort_frames: bool = False eval_batches: Any = None eval_batch_index: Any = None - loader: BlobLoader + blob_loader: BlobLoader # frame_annots: List[FrameAnnotsEntry] = field(init=False) # seq_annots: Dict[str, types.SequenceAnnotation] = field(init=False) @@ -179,21 +176,21 @@ def __post_init__(self) -> None: self._filter_db() # also computes sequence indices self._extract_and_set_eval_batches() - self.loader = BlobLoader( - self.dataset_root, - self.load_images, - self.load_depths, - self.load_depth_masks, - self.load_masks, - self.load_point_clouds, - self.max_points, - self.mask_images, - self.mask_depths, - self.image_height, - self.image_width, - self.box_crop, - self.box_crop_mask_thr, - self.box_crop_context, + self.blob_loader = BlobLoader( + dataset_root = self.dataset_root, + load_images = self.load_images, + load_depths = self.load_depths, + load_depth_masks = self.load_depth_masks, + load_masks = self.load_masks, + load_point_clouds = self.load_point_clouds, + max_points = self.max_points, + mask_images = self.mask_images, + mask_depths = self.mask_depths, + image_height = self.image_height, + image_width = self.image_width, + box_crop = self.box_crop, + box_crop_mask_thr = self.box_crop_mask_thr, + box_crop_context = self.box_crop_context, ) logger.info(str(self)) @@ -415,7 +412,6 @@ def __len__(self) -> int: def _get_frame_type(self, entry: FrameAnnotsEntry) -> Optional[str]: return entry["subset"] - # pyre-ignore def get_all_train_cameras(self) -> CamerasBase: """ Returns the cameras corresponding to all the known frames. @@ -431,7 +427,6 @@ def get_all_train_cameras(self) -> CamerasBase: cameras.append(self[frame_idx].camera) return join_cameras_as_batch(cameras) - # pyre-ignore def __getitem__(self, index) -> FrameData: # pyre-ignore[16] if index >= len(self.frame_annots): @@ -456,17 +451,14 @@ def __getitem__(self, index) -> FrameData: else None, ) - # The rest of the fields are optional + # Optional field frame_data.frame_type = self._get_frame_type(self.frame_annots[index]) - - frame_data = self.loader.load(frame_data, entry, point_cloud) - return frame_data + return self.blob_loader.load(frame_data, entry, point_cloud) def _load_frames(self) -> None: logger.info(f"Loading Co3D frames from {self.frame_annotations_file}.") local_file = self._local_path(self.frame_annotations_file) with gzip.open(local_file, "rt", encoding="utf8") as zipfile: - # pyre-ignore frame_annots_list = types.load_dataclass( zipfile, List[self.frame_annotations_type] ) @@ -481,7 +473,6 @@ def _load_sequences(self) -> None: logger.info(f"Loading Co3D sequences from {self.sequence_annotations_file}.") local_file = self._local_path(self.sequence_annotations_file) with gzip.open(local_file, "rt", encoding="utf8") as zipfile: - # pyre-ignore seq_annots = types.load_dataclass(zipfile, List[types.SequenceAnnotation]) if not seq_annots: raise ValueError("Empty sequences file!") diff --git a/pytorch3d/implicitron/dataset/load_blob.py b/pytorch3d/implicitron/dataset/load_blob.py index b10fb1267..905351896 100644 --- a/pytorch3d/implicitron/dataset/load_blob.py +++ b/pytorch3d/implicitron/dataset/load_blob.py @@ -44,7 +44,6 @@ class BlobLoader: box_crop_context: The amount of additional padding added to each dimension of the cropping bounding box, relative to box size. """ - path_manager: Any = None def __init__( @@ -64,20 +63,20 @@ def __init__( box_crop_mask_thr, box_crop_context, ): - self.dataset_root = dataset_root - self.load_images = load_images - self.load_depths = load_depths - self.load_depth_masks = load_depth_masks - self.load_masks = load_masks - self.load_point_clouds = load_point_clouds - self.max_points = max_points - self.mask_images = mask_images - self.mask_depths = mask_depths - self.image_height = image_height - self.image_width = image_width - self.box_crop = box_crop - self.box_crop_mask_thr = box_crop_mask_thr - self.box_crop_context = box_crop_context + self.dataset_root: str = dataset_root + self.load_images: bool = load_images + self.load_depths: bool = load_depths + self.load_depth_masks: bool = load_depth_masks + self.load_masks: bool = load_masks + self.load_point_clouds: bool = load_point_clouds + self.max_points: int = max_points + self.mask_images: bool = mask_images + self.mask_depths: bool = mask_depths + self.image_height: int = image_height + self.image_width: int = image_width + self.box_crop: bool = box_crop + self.box_crop_mask_thr: float = box_crop_mask_thr + self.box_crop_context: float = box_crop_context def load( self, @@ -341,7 +340,6 @@ def _resize_image( align_corners=False if mode == "bilinear" else None, recompute_scale_factor=True, )[0] - # pyre-fixme[19]: Expected 1 positional argument. imre_ = torch.zeros(image.shape[0], self.image_height, self.image_width) imre_[:, 0 : imre.shape[1], 0 : imre.shape[2]] = imre mask = torch.zeros(1, self.image_height, self.image_width) From c3c5110364ae1d7e42ba63a97223d3410926d587 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Tue, 28 Feb 2023 16:16:37 +0000 Subject: [PATCH 03/35] linter --- .../implicitron/dataset/json_index_dataset.py | 36 +++++++++---------- pytorch3d/implicitron/dataset/load_blob.py | 10 ++++-- 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/pytorch3d/implicitron/dataset/json_index_dataset.py b/pytorch3d/implicitron/dataset/json_index_dataset.py index 9bec154c3..0d5aa1796 100644 --- a/pytorch3d/implicitron/dataset/json_index_dataset.py +++ b/pytorch3d/implicitron/dataset/json_index_dataset.py @@ -15,7 +15,6 @@ import warnings from collections import defaultdict from itertools import islice -from pathlib import Path from typing import ( Any, ClassVar, @@ -30,18 +29,17 @@ Union, ) -import numpy as np import torch from tqdm import tqdm -from pytorch3d.implicitron.tools.config import registry, ReplaceableBase from pytorch3d.implicitron.dataset import types from pytorch3d.implicitron.dataset.dataset_base import DatasetBase, FrameData from pytorch3d.implicitron.dataset.load_blob import BlobLoader from pytorch3d.implicitron.dataset.utils import is_known_frame_scalar + +from pytorch3d.implicitron.tools.config import registry, ReplaceableBase from pytorch3d.renderer.camera_utils import join_cameras_as_batch -from pytorch3d.renderer.cameras import CamerasBase, PerspectiveCameras -from pytorch3d.structures.pointclouds import Pointclouds +from pytorch3d.renderer.cameras import CamerasBase logger = logging.getLogger(__name__) @@ -177,20 +175,20 @@ def __post_init__(self) -> None: self._extract_and_set_eval_batches() self.blob_loader = BlobLoader( - dataset_root = self.dataset_root, - load_images = self.load_images, - load_depths = self.load_depths, - load_depth_masks = self.load_depth_masks, - load_masks = self.load_masks, - load_point_clouds = self.load_point_clouds, - max_points = self.max_points, - mask_images = self.mask_images, - mask_depths = self.mask_depths, - image_height = self.image_height, - image_width = self.image_width, - box_crop = self.box_crop, - box_crop_mask_thr = self.box_crop_mask_thr, - box_crop_context = self.box_crop_context, + dataset_root=self.dataset_root, + load_images=self.load_images, + load_depths=self.load_depths, + load_depth_masks=self.load_depth_masks, + load_masks=self.load_masks, + load_point_clouds=self.load_point_clouds, + max_points=self.max_points, + mask_images=self.mask_images, + mask_depths=self.mask_depths, + image_height=self.image_height, + image_width=self.image_width, + box_crop=self.box_crop, + box_crop_mask_thr=self.box_crop_mask_thr, + box_crop_context=self.box_crop_context, ) logger.info(str(self)) diff --git a/pytorch3d/implicitron/dataset/load_blob.py b/pytorch3d/implicitron/dataset/load_blob.py index 905351896..2d6d2d220 100644 --- a/pytorch3d/implicitron/dataset/load_blob.py +++ b/pytorch3d/implicitron/dataset/load_blob.py @@ -1,15 +1,18 @@ import functools import os import warnings +from pathlib import Path +from typing import Any, Optional, Tuple, Union import numpy as np -from PIL import Image import torch -from typing import Any, Optional, Tuple +from PIL import Image from pytorch3d.implicitron.dataset import types from pytorch3d.implicitron.dataset.dataset_base import FrameData from pytorch3d.io import IO +from pytorch3d.renderer.cameras import PerspectiveCameras +from pytorch3d.structures.pointclouds import Pointclouds class BlobLoader: @@ -44,6 +47,7 @@ class BlobLoader: box_crop_context: The amount of additional padding added to each dimension of the cropping bounding box, relative to box size. """ + path_manager: Any = None def __init__( @@ -371,7 +375,7 @@ def _get_bbox_from_mask( masks_for_box = (mask > thr).astype(np.float32) thr -= decrease_quant if thr <= 0.0: - warnings.warn(f"Empty masks_for_bbox (thr={thr}) => using full image.") + warnings.warn(f"Empty masks_for_bbox (thr={thr}) => using full image.", stacklevel=1) x0, x1 = _get_1d_bounds(masks_for_box.sum(axis=-2)) y0, y1 = _get_1d_bounds(masks_for_box.sum(axis=-1)) From 9b431bd5698050bfc5574881a569f2fb9cab5be7 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Tue, 28 Feb 2023 16:18:11 +0000 Subject: [PATCH 04/35] linter --- pytorch3d/implicitron/dataset/load_blob.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pytorch3d/implicitron/dataset/load_blob.py b/pytorch3d/implicitron/dataset/load_blob.py index 2d6d2d220..0cad7b4f1 100644 --- a/pytorch3d/implicitron/dataset/load_blob.py +++ b/pytorch3d/implicitron/dataset/load_blob.py @@ -375,7 +375,9 @@ def _get_bbox_from_mask( masks_for_box = (mask > thr).astype(np.float32) thr -= decrease_quant if thr <= 0.0: - warnings.warn(f"Empty masks_for_bbox (thr={thr}) => using full image.", stacklevel=1) + warnings.warn( + f"Empty masks_for_bbox (thr={thr}) => using full image.", stacklevel=1 + ) x0, x1 = _get_1d_bounds(masks_for_box.sum(axis=-2)) y0, y1 = _get_1d_bounds(masks_for_box.sum(axis=-1)) From 627e60fb4cf989c7ce0a75b1cb198cd5f99a027a Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Tue, 28 Feb 2023 17:02:36 +0000 Subject: [PATCH 05/35] deleted chore pyre-ignore --- pytorch3d/implicitron/dataset/load_blob.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch3d/implicitron/dataset/load_blob.py b/pytorch3d/implicitron/dataset/load_blob.py index 0cad7b4f1..9193a147d 100644 --- a/pytorch3d/implicitron/dataset/load_blob.py +++ b/pytorch3d/implicitron/dataset/load_blob.py @@ -537,7 +537,6 @@ def _safe_as_tensor(data, dtype): # each batch is loaded and collated by a single worker; # since sequences tend to co-occur within batches, this is useful. @functools.lru_cache(maxsize=256) -# pyre-ignore def _load_pointcloud(pcl_path: Union[str, Path], max_points: int = 0) -> Pointclouds: pcl = IO().load_pointcloud(pcl_path) if max_points > 0: From 0aa27a6488afe16dbda6b667a34e802a627f2b77 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Wed, 1 Mar 2023 09:49:02 +0000 Subject: [PATCH 06/35] renamed load_blob to blob_loader --- pytorch3d/implicitron/dataset/{load_blob.py => blob_loader.py} | 0 pytorch3d/implicitron/dataset/json_index_dataset.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename pytorch3d/implicitron/dataset/{load_blob.py => blob_loader.py} (100%) diff --git a/pytorch3d/implicitron/dataset/load_blob.py b/pytorch3d/implicitron/dataset/blob_loader.py similarity index 100% rename from pytorch3d/implicitron/dataset/load_blob.py rename to pytorch3d/implicitron/dataset/blob_loader.py diff --git a/pytorch3d/implicitron/dataset/json_index_dataset.py b/pytorch3d/implicitron/dataset/json_index_dataset.py index 0d5aa1796..2ad041bf7 100644 --- a/pytorch3d/implicitron/dataset/json_index_dataset.py +++ b/pytorch3d/implicitron/dataset/json_index_dataset.py @@ -34,7 +34,7 @@ from pytorch3d.implicitron.dataset import types from pytorch3d.implicitron.dataset.dataset_base import DatasetBase, FrameData -from pytorch3d.implicitron.dataset.load_blob import BlobLoader +from pytorch3d.implicitron.dataset.blob_loader import BlobLoader from pytorch3d.implicitron.dataset.utils import is_known_frame_scalar from pytorch3d.implicitron.tools.config import registry, ReplaceableBase From 53823cf6d330af23046ec66a5ce52c17a0c038ec Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Wed, 1 Mar 2023 09:56:19 +0000 Subject: [PATCH 07/35] sending to BlobLoader whore seq_annotation --- pytorch3d/implicitron/dataset/blob_loader.py | 6 +++--- pytorch3d/implicitron/dataset/json_index_dataset.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py index 9193a147d..3c624a2ce 100644 --- a/pytorch3d/implicitron/dataset/blob_loader.py +++ b/pytorch3d/implicitron/dataset/blob_loader.py @@ -89,7 +89,7 @@ def load( # pyre-ignore entry: types.FrameAnnotation, # pyre-ignore - point_cloud: types.PointCloudAnnotation, + seq_annotation: types.SequenceAnnotation, ) -> FrameData: """Main method for loader.""" ( @@ -128,8 +128,8 @@ def load( clamp_bbox_xyxy, ) - if self.load_point_clouds and point_cloud is not None: - pcl_path = self._fix_point_cloud_path(point_cloud.path) + if self.load_point_clouds and seq_annotation.point_cloud is not None: + pcl_path = self._fix_point_cloud_path(seq_annotation.point_cloud.path) frame_data.sequence_point_cloud = _load_pointcloud( self._local_path(pcl_path), max_points=self.max_points ) diff --git a/pytorch3d/implicitron/dataset/json_index_dataset.py b/pytorch3d/implicitron/dataset/json_index_dataset.py index 2ad041bf7..0ceb7dec0 100644 --- a/pytorch3d/implicitron/dataset/json_index_dataset.py +++ b/pytorch3d/implicitron/dataset/json_index_dataset.py @@ -451,7 +451,7 @@ def __getitem__(self, index) -> FrameData: # Optional field frame_data.frame_type = self._get_frame_type(self.frame_annots[index]) - return self.blob_loader.load(frame_data, entry, point_cloud) + return self.blob_loader.load(frame_data, entry, self.seq_annots[entry.sequence_name]) def _load_frames(self) -> None: logger.info(f"Loading Co3D frames from {self.frame_annotations_file}.") From d6f13eb629d6607ebd60bfc1e026027309402fc9 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Wed, 1 Mar 2023 10:00:09 +0000 Subject: [PATCH 08/35] made blob_loader dataclass to avoid boilerplate --- pytorch3d/implicitron/dataset/blob_loader.py | 48 +++++++------------- 1 file changed, 16 insertions(+), 32 deletions(-) diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py index 3c624a2ce..29c41837f 100644 --- a/pytorch3d/implicitron/dataset/blob_loader.py +++ b/pytorch3d/implicitron/dataset/blob_loader.py @@ -1,6 +1,7 @@ import functools import os import warnings +from dataclasses import dataclass from pathlib import Path from typing import Any, Optional, Tuple, Union @@ -15,6 +16,7 @@ from pytorch3d.structures.pointclouds import Pointclouds +@dataclass class BlobLoader: """ A loader for correctly (according to setup) loading blobs for FrameData @@ -48,40 +50,22 @@ class BlobLoader: dimension of the cropping bounding box, relative to box size. """ + dataset_root: str + load_images: bool + load_depths: bool + load_depth_masks: bool + load_masks: bool + load_point_clouds: bool + max_points: int + mask_images: bool + mask_depths: bool + image_height: int + image_width: int + box_crop: bool + box_crop_mask_thr: float + box_crop_context: float path_manager: Any = None - def __init__( - self, - dataset_root, - load_images, - load_depths, - load_depth_masks, - load_masks, - load_point_clouds, - max_points, - mask_images, - mask_depths, - image_height, - image_width, - box_crop, - box_crop_mask_thr, - box_crop_context, - ): - self.dataset_root: str = dataset_root - self.load_images: bool = load_images - self.load_depths: bool = load_depths - self.load_depth_masks: bool = load_depth_masks - self.load_masks: bool = load_masks - self.load_point_clouds: bool = load_point_clouds - self.max_points: int = max_points - self.mask_images: bool = mask_images - self.mask_depths: bool = mask_depths - self.image_height: int = image_height - self.image_width: int = image_width - self.box_crop: bool = box_crop - self.box_crop_mask_thr: float = box_crop_mask_thr - self.box_crop_context: float = box_crop_context - def load( self, # pyre-ignore From 86e64f77fb89b10acd51576620aeda709bd0505c Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Wed, 1 Mar 2023 10:02:21 +0000 Subject: [PATCH 09/35] documented, that FrameData modification done inplace --- pytorch3d/implicitron/dataset/blob_loader.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py index 29c41837f..48578927d 100644 --- a/pytorch3d/implicitron/dataset/blob_loader.py +++ b/pytorch3d/implicitron/dataset/blob_loader.py @@ -19,7 +19,8 @@ @dataclass class BlobLoader: """ - A loader for correctly (according to setup) loading blobs for FrameData + A loader for correctly (according to setup) loading blobs for FrameData. + Beware that modification done in place Args: dataset_root: The root folder of the dataset; all the paths in jsons are @@ -75,7 +76,9 @@ def load( # pyre-ignore seq_annotation: types.SequenceAnnotation, ) -> FrameData: - """Main method for loader.""" + """Main method for loader. + FrameData modification done inplace + """ ( frame_data.fg_probability, frame_data.mask_path, From 2f1704939fb1795e7ad2e0eca1b18fb30d12fba4 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Wed, 1 Mar 2023 10:19:14 +0000 Subject: [PATCH 10/35] spliited JsonIndexDataset args to 2 gorups: Matadata-related and Blob-loading --- .../implicitron/dataset/json_index_dataset.py | 37 ++++++++++--------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/pytorch3d/implicitron/dataset/json_index_dataset.py b/pytorch3d/implicitron/dataset/json_index_dataset.py index 0ceb7dec0..671161680 100644 --- a/pytorch3d/implicitron/dataset/json_index_dataset.py +++ b/pytorch3d/implicitron/dataset/json_index_dataset.py @@ -62,7 +62,7 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase): A dataset with annotations in json files like the Common Objects in 3D (CO3D) dataset. - Args: + Metadata-related args:: frame_annotations_file: A zipped json file containing metadata of the frames in the dataset, serialized List[types.FrameAnnotation]. sequence_annotations_file: A zipped json file containing metadata of the @@ -80,6 +80,24 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase): pick_sequence: A list of sequence names to restrict the dataset to. exclude_sequence: A list of the names of the sequences to exclude. limit_category_to: Restrict the dataset to the given list of categories. + remove_empty_masks: Removes the frames with no active foreground pixels + in the segmentation mask after thresholding (see box_crop_mask_thr). + n_frames_per_sequence: If > 0, randomly samples #n_frames_per_sequence + frames in each sequences uniformly without replacement if it has + more frames than that; applied before other frame-level filters. + seed: The seed of the random generator sampling #n_frames_per_sequence + random frames per sequence. + sort_frames: Enable frame annotations sorting to group frames from the + same sequences together and order them by timestamps + eval_batches: A list of batches that form the evaluation set; + list of batch-sized lists of indices corresponding to __getitem__ + of this class, thus it can be used directly as a batch sampler. + eval_batch_index: + ( Optional[List[List[Union[Tuple[str, int, str], Tuple[str, int]]]] ) + A list of batches of frames described as (sequence_name, frame_idx) + that can form the evaluation set, `eval_batches` will be set from this. + + Blob-loading parameters: dataset_root: The root folder of the dataset; all the paths in jsons are specified relative to this root (but not json paths themselves). load_images: Enable loading the frame RGB data. @@ -106,23 +124,6 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase): is greater than this threshold, the loader lowers it and repeats. box_crop_context: The amount of additional padding added to each dimension of the cropping bounding box, relative to box size. - remove_empty_masks: Removes the frames with no active foreground pixels - in the segmentation mask after thresholding (see box_crop_mask_thr). - n_frames_per_sequence: If > 0, randomly samples #n_frames_per_sequence - frames in each sequences uniformly without replacement if it has - more frames than that; applied before other frame-level filters. - seed: The seed of the random generator sampling #n_frames_per_sequence - random frames per sequence. - sort_frames: Enable frame annotations sorting to group frames from the - same sequences together and order them by timestamps - eval_batches: A list of batches that form the evaluation set; - list of batch-sized lists of indices corresponding to __getitem__ - of this class, thus it can be used directly as a batch sampler. - eval_batch_index: - ( Optional[List[List[Union[Tuple[str, int, str], Tuple[str, int]]]] ) - A list of batches of frames described as (sequence_name, frame_idx) - that can form the evaluation set, `eval_batches` will be set from this. - """ frame_annotations_type: ClassVar[ From 527ec098e44c15f1386b607d34c7b9e760528813 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Wed, 1 Mar 2023 12:08:46 +0000 Subject: [PATCH 11/35] code refactoring to delete chore pyre-ignore --- pytorch3d/implicitron/dataset/blob_loader.py | 19 +++--- .../implicitron/dataset/json_index_dataset.py | 58 ++++++------------- pytorch3d/implicitron/dataset/visualize.py | 1 - 3 files changed, 29 insertions(+), 49 deletions(-) diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py index 48578927d..fce26b255 100644 --- a/pytorch3d/implicitron/dataset/blob_loader.py +++ b/pytorch3d/implicitron/dataset/blob_loader.py @@ -1,3 +1,9 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + import functools import os import warnings @@ -60,8 +66,8 @@ class BlobLoader: max_points: int mask_images: bool mask_depths: bool - image_height: int - image_width: int + image_height: Optional[int] + image_width: Optional[int] box_crop: bool box_crop_mask_thr: float box_crop_context: float @@ -69,11 +75,8 @@ class BlobLoader: def load( self, - # pyre-ignore frame_data: FrameData, - # pyre-ignore entry: types.FrameAnnotation, - # pyre-ignore seq_annotation: types.SequenceAnnotation, ) -> FrameData: """Main method for loader. @@ -242,7 +245,7 @@ def _get_pytorch3d_camera( entry: types.FrameAnnotation, scale: float, clamp_bbox_xyxy: Optional[torch.Tensor], - ) -> PerspectiveCameras: # pyre-ignore + ) -> PerspectiveCameras: entry_viewpoint = entry.viewpoint assert entry_viewpoint is not None # principal point and focal length @@ -331,9 +334,9 @@ def _resize_image( align_corners=False if mode == "bilinear" else None, recompute_scale_factor=True, )[0] - imre_ = torch.zeros(image.shape[0], self.image_height, self.image_width) + imre_ = torch.zeros(image.shape[0], image_height, image_width) imre_[:, 0 : imre.shape[1], 0 : imre.shape[2]] = imre - mask = torch.zeros(1, self.image_height, self.image_width) + mask = torch.zeros(1, image_height, image_width) mask[:, 0 : imre.shape[1], 0 : imre.shape[2]] = 1.0 return imre_, minscale, mask diff --git a/pytorch3d/implicitron/dataset/json_index_dataset.py b/pytorch3d/implicitron/dataset/json_index_dataset.py index 671161680..cf63b9b43 100644 --- a/pytorch3d/implicitron/dataset/json_index_dataset.py +++ b/pytorch3d/implicitron/dataset/json_index_dataset.py @@ -14,6 +14,7 @@ import random import warnings from collections import defaultdict +from dataclasses import field from itertools import islice from typing import ( Any, @@ -30,16 +31,16 @@ ) import torch -from tqdm import tqdm from pytorch3d.implicitron.dataset import types -from pytorch3d.implicitron.dataset.dataset_base import DatasetBase, FrameData from pytorch3d.implicitron.dataset.blob_loader import BlobLoader +from pytorch3d.implicitron.dataset.dataset_base import DatasetBase, FrameData from pytorch3d.implicitron.dataset.utils import is_known_frame_scalar from pytorch3d.implicitron.tools.config import registry, ReplaceableBase from pytorch3d.renderer.camera_utils import join_cameras_as_batch from pytorch3d.renderer.cameras import CamerasBase +from tqdm import tqdm logger = logging.getLogger(__name__) @@ -160,13 +161,14 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase): sort_frames: bool = False eval_batches: Any = None eval_batch_index: Any = None - blob_loader: BlobLoader - # frame_annots: List[FrameAnnotsEntry] = field(init=False) - # seq_annots: Dict[str, types.SequenceAnnotation] = field(init=False) + subset_to_image_path: Any = None + # initialised in __post_init__ + blob_loader: BlobLoader = field(init=False) + frame_annots: List[FrameAnnotsEntry] = field(init=False) + seq_annots: Dict[str, types.SequenceAnnotation] = field(init=False) + _seq_to_idx: Dict[str, List[int]] = field(init=False) def __post_init__(self) -> None: - # pyre-fixme[16]: `JsonIndexDataset` has no attribute `subset_to_image_path`. - self.subset_to_image_path = None self._load_frames() self._load_sequences() if self.sort_frames: @@ -206,7 +208,8 @@ def _extract_and_set_eval_batches(self): self.eval_batch_index ) - def join(self, other_datasets: Iterable[DatasetBase]) -> None: + # pyre-ignore + def join(self, other_datasets: Iterable["JsonIndexDataset"]) -> None: """ Join the dataset with other JsonIndexDataset objects. @@ -216,9 +219,7 @@ def join(self, other_datasets: Iterable[DatasetBase]) -> None: """ if not all(isinstance(d, JsonIndexDataset) for d in other_datasets): raise ValueError("This function can only join a list of JsonIndexDataset") - # pyre-ignore[16] self.frame_annots.extend([fa for d in other_datasets for fa in d.frame_annots]) - # pyre-ignore[16] self.seq_annots.update( # https://gist.github.com/treyhunner/f35292e676efa0be1728 functools.reduce( @@ -266,7 +267,7 @@ def seq_frame_index_to_dataset_index( allow_missing_indices: bool = False, remove_missing_indices: bool = False, suppress_missing_index_warning: bool = True, - ) -> List[List[Union[Optional[int], int]]]: + ) -> Union[List[List[Optional[int]]], List[List[int]]]: """ Obtain indices into the dataset object given a list of frame ids. @@ -294,11 +295,9 @@ def seq_frame_index_to_dataset_index( """ _dataset_seq_frame_n_index = { seq: { - # pyre-ignore[16] self.frame_annots[idx]["frame_annotation"].frame_number: idx for idx in seq_idx } - # pyre-ignore[16] for seq, seq_idx in self._seq_to_idx.items() } @@ -321,7 +320,6 @@ def _get_dataset_idx( # Check that the loaded frame path is consistent # with the one stored in self.frame_annots. assert os.path.normpath( - # pyre-ignore[16] self.frame_annots[idx]["frame_annotation"].image.path ) == os.path.normpath( path @@ -338,9 +336,7 @@ def _get_dataset_idx( valid_dataset_idx = [ [b for b in batch if b is not None] for batch in dataset_idx ] - return [ # pyre-ignore[7] - batch for batch in valid_dataset_idx if len(batch) > 0 - ] + return [batch for batch in valid_dataset_idx if len(batch) > 0] return dataset_idx @@ -373,7 +369,7 @@ def subset_from_frame_index( # Deep copy the whole dataset except frame_annots, which are large so we # deep copy only the requested subset of frame_annots. - memo = {id(self.frame_annots): None} # pyre-ignore[16] + memo = {id(self.frame_annots): None} dataset_new = copy.deepcopy(self, memo) dataset_new.frame_annots = copy.deepcopy( [self.frame_annots[i] for i in valid_dataset_indices] @@ -401,11 +397,9 @@ def subset_from_frame_index( return dataset_new def __str__(self) -> str: - # pyre-ignore[16] return f"JsonIndexDataset #frames={len(self.frame_annots)}" def __len__(self) -> int: - # pyre-ignore[16] return len(self.frame_annots) def _get_frame_type(self, entry: FrameAnnotsEntry) -> Optional[str]: @@ -417,7 +411,6 @@ def get_all_train_cameras(self) -> CamerasBase: """ logger.info("Loading all train cameras.") cameras = [] - # pyre-ignore[16] for frame_idx, frame_annot in enumerate(tqdm(self.frame_annots)): frame_type = self._get_frame_type(frame_annot) if frame_type is None: @@ -427,12 +420,10 @@ def get_all_train_cameras(self) -> CamerasBase: return join_cameras_as_batch(cameras) def __getitem__(self, index) -> FrameData: - # pyre-ignore[16] if index >= len(self.frame_annots): raise IndexError(f"index {index} out of range {len(self.frame_annots)}") entry = self.frame_annots[index]["frame_annotation"] - # pyre-ignore[16] point_cloud = self.seq_annots[entry.sequence_name].point_cloud frame_data = FrameData( frame_number=_safe_as_tensor(entry.frame_number, torch.long), @@ -452,7 +443,9 @@ def __getitem__(self, index) -> FrameData: # Optional field frame_data.frame_type = self._get_frame_type(self.frame_annots[index]) - return self.blob_loader.load(frame_data, entry, self.seq_annots[entry.sequence_name]) + return self.blob_loader.load( + frame_data, entry, self.seq_annots[entry.sequence_name] + ) def _load_frames(self) -> None: logger.info(f"Loading Co3D frames from {self.frame_annotations_file}.") @@ -463,7 +456,6 @@ def _load_frames(self) -> None: ) if not frame_annots_list: raise ValueError("Empty dataset!") - # pyre-ignore[16] self.frame_annots = [ FrameAnnotsEntry(frame_annotation=a, subset=None) for a in frame_annots_list ] @@ -475,7 +467,6 @@ def _load_sequences(self) -> None: seq_annots = types.load_dataclass(zipfile, List[types.SequenceAnnotation]) if not seq_annots: raise ValueError("Empty sequences file!") - # pyre-ignore[16] self.seq_annots = {entry.sequence_name: entry for entry in seq_annots} def _load_subset_lists(self) -> None: @@ -491,7 +482,6 @@ def _load_subset_lists(self) -> None: for subset, frames in subset_to_seq_frame.items() for _, _, path in frames } - # pyre-ignore[16] for frame in self.frame_annots: frame["subset"] = frame_path_to_subset.get( frame["frame_annotation"].image.path, None @@ -504,7 +494,6 @@ def _load_subset_lists(self) -> None: def _sort_frames(self) -> None: # Sort frames to have them grouped by sequence, ordered by timestamp - # pyre-ignore[16] self.frame_annots = sorted( self.frame_annots, key=lambda f: ( @@ -516,7 +505,6 @@ def _sort_frames(self) -> None: def _filter_db(self) -> None: if self.remove_empty_masks: logger.info("Removing images with empty masks.") - # pyre-ignore[16] old_len = len(self.frame_annots) msg = "remove_empty_masks needs every MaskAnnotation.mass to be set." @@ -557,7 +545,6 @@ def positive_mass(frame_annot: types.FrameAnnotation) -> bool: if len(self.limit_category_to) > 0: logger.info(f"Limiting dataset to categories: {self.limit_category_to}") - # pyre-ignore[16] self.seq_annots = { name: entry for name, entry in self.seq_annots.items() @@ -595,7 +582,6 @@ def positive_mass(frame_annot: types.FrameAnnotation) -> bool: if self.n_frames_per_sequence > 0: logger.info(f"Taking max {self.n_frames_per_sequence} per sequence.") keep_idx = [] - # pyre-ignore[16] for seq, seq_indices in self._seq_to_idx.items(): # infer the seed from the sequence name, this is reproducible # and makes the selection differ for different sequences @@ -625,20 +611,14 @@ def _invalidate_indexes(self, filter_seq_annots: bool = False) -> None: self._invalidate_seq_to_idx() if filter_seq_annots: - # pyre-ignore[16] self.seq_annots = { - k: v - for k, v in self.seq_annots.items() - # pyre-ignore[16] - if k in self._seq_to_idx + k: v for k, v in self.seq_annots.items() if k in self._seq_to_idx } def _invalidate_seq_to_idx(self) -> None: seq_to_idx = defaultdict(list) - # pyre-ignore[16] for idx, entry in enumerate(self.frame_annots): seq_to_idx[entry["frame_annotation"].sequence_name].append(idx) - # pyre-ignore[16] self._seq_to_idx = seq_to_idx def _local_path(self, path: str) -> str: @@ -653,7 +633,6 @@ def get_frame_numbers_and_timestamps( for idx in idxs: if ( subset_filter is not None - # pyre-fixme[16]: `JsonIndexDataset` has no attribute `frame_annots`. and self.frame_annots[idx]["subset"] not in subset_filter ): continue @@ -666,7 +645,6 @@ def get_frame_numbers_and_timestamps( def category_to_sequence_names(self) -> Dict[str, List[str]]: c2seq = defaultdict(list) - # pyre-ignore for sequence_name, sa in self.seq_annots.items(): c2seq[sa.category].append(sequence_name) return dict(c2seq) diff --git a/pytorch3d/implicitron/dataset/visualize.py b/pytorch3d/implicitron/dataset/visualize.py index 6d0be0362..284e903a0 100644 --- a/pytorch3d/implicitron/dataset/visualize.py +++ b/pytorch3d/implicitron/dataset/visualize.py @@ -44,7 +44,6 @@ def get_implicitron_sequence_pointcloud( sequence_entries = [ ei for ei in sequence_entries - # pyre-ignore[16] if dataset.frame_annots[ei]["frame_annotation"].sequence_name == sequence_name ] From 24b731b853b54f741a2f9377118e36d14821fa7c Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Mon, 6 Mar 2023 12:47:23 +0000 Subject: [PATCH 12/35] deleted chore function --- pytorch3d/implicitron/dataset/blob_loader.py | 28 -------------------- 1 file changed, 28 deletions(-) diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py index fce26b255..035e99a83 100644 --- a/pytorch3d/implicitron/dataset/blob_loader.py +++ b/pytorch3d/implicitron/dataset/blob_loader.py @@ -434,34 +434,6 @@ def _bbox_xyxy_to_xywh(xyxy: torch.Tensor) -> torch.Tensor: return xywh -def _resize_image( - self, image, mode="bilinear" -) -> Tuple[torch.Tensor, float, torch.Tensor]: - image_height, image_width = self.image_height, self.image_width - if image_height is None or image_width is None: - # skip the resizing - imre_ = torch.from_numpy(image) - return imre_, 1.0, torch.ones_like(imre_[:1]) - # takes numpy array, returns pytorch tensor - minscale = min( - image_height / image.shape[-2], - image_width / image.shape[-1], - ) - imre = torch.nn.functional.interpolate( - torch.from_numpy(image)[None], - scale_factor=minscale, - mode=mode, - align_corners=False if mode == "bilinear" else None, - recompute_scale_factor=True, - )[0] - # pyre-fixme[19]: Expected 1 positional argument. - imre_ = torch.zeros(image.shape[0], self.image_height, self.image_width) - imre_[:, 0 : imre.shape[1], 0 : imre.shape[2]] = imre - mask = torch.zeros(1, self.image_height, self.image_width) - mask[:, 0 : imre.shape[1], 0 : imre.shape[2]] = 1.0 - return imre_, minscale, mask - - def _load_depth(path, scale_adjustment) -> np.ndarray: if not path.lower().endswith(".png"): raise ValueError('unsupported depth file name "%s"' % path) From f484a12501b7d13027fe98707c1be8ece3546153 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Mon, 6 Mar 2023 12:47:51 +0000 Subject: [PATCH 13/35] BloabLoader tests boilerplate --- tests/implicitron/test_bbox.py | 2 +- tests/implicitron/test_blob_loader.py | 89 +++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 tests/implicitron/test_blob_loader.py diff --git a/tests/implicitron/test_bbox.py b/tests/implicitron/test_bbox.py index 999dfc924..7d214d857 100644 --- a/tests/implicitron/test_bbox.py +++ b/tests/implicitron/test_bbox.py @@ -9,7 +9,7 @@ import numpy as np import torch -from pytorch3d.implicitron.dataset.json_index_dataset import ( +from pytorch3d.implicitron.dataset.blob_loader import ( _bbox_xywh_to_xyxy, _bbox_xyxy_to_xywh, _get_bbox_from_mask, diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py new file mode 100644 index 000000000..0e6bf6936 --- /dev/null +++ b/tests/implicitron/test_blob_loader.py @@ -0,0 +1,89 @@ +import contextlib +import unittest + +import numpy as np + +import torch +from pytorch3d.implicitron.dataset.blob_loader import ( + _bbox_xywh_to_xyxy, + _bbox_xyxy_to_xywh, + _get_bbox_from_mask, +) +from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset +from pytorch3d.implicitron.dataset.blob_loader import BlobLoader +from tests.common_testing import TestCaseMixin +from pytorch3d.implicitron.tools.config import expand_args_fields +from pytorch3d.implicitron.tools.config import get_default_args + + +class TestBlobLoader(TestCaseMixin, unittest.TestCase): + def setUp(self): + torch.manual_seed(42) + self.blob_loader = BlobLoader() + + category = "skateboard" + stack = contextlib.ExitStack() + dataset_root, path_manager = stack.enter_context(get_skateboard_data()) + self.addCleanup(stack.close) + frame_file = os.path.join(dataset_root, category, "frame_annotations.jgz") + sequence_file = os.path.join(dataset_root, category, "sequence_annotations.jgz") + self.image_size = 256 + + expand_args_fields(JsonIndexDataset) + + self.datasets = JsonIndexDataset( + frame_annotations_file=frame_file, + sequence_annotations_file=sequence_file, + dataset_root=dataset_root, + image_height=self.image_size, + image_width=self.image_size, + box_crop=True, + load_point_clouds=True, + path_manager=path_manager, + ) + + def test_BlobLoader_args(self): + # test that BlobLoader works with get_default_args + get_default_args(BlobLoader) + + def test_load_crop_fg_probability(self): + pass + + def test_load_crop_images(self): + pass + + def test_load_mask_depth(self): + pass + + def test_fix_point_cloud_path(self): + pass + + def test_resize_image(self): + pass + + def test_crop_around_box(self): + pass + + def test_clamp_box_to_image_bounds_and_round(self): + pass + + def test_get_clamp_bbox(self): + pass + + def test_load_depth(self): + pass + + def test_load_16big_png_depth(self): + pass + + def test_rescale_bbox(self): + pass + + def test_load_1bit_png_mask(self): + pass + + def test_load_depth_mask(self): + pass + + def test_get_1d_bounds(self): + pass From b8674eaa4c6645bcceae089dcc2d12dee730f657 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Tue, 7 Mar 2023 13:11:45 +0000 Subject: [PATCH 14/35] tests WIP (not tested) --- tests/implicitron/test_bbox.py | 43 +++++++++ tests/implicitron/test_blob_loader.py | 124 +++++++++++++++++++------- 2 files changed, 136 insertions(+), 31 deletions(-) diff --git a/tests/implicitron/test_bbox.py b/tests/implicitron/test_bbox.py index 7d214d857..ddbcd6bd1 100644 --- a/tests/implicitron/test_bbox.py +++ b/tests/implicitron/test_bbox.py @@ -13,6 +13,11 @@ _bbox_xywh_to_xyxy, _bbox_xyxy_to_xywh, _get_bbox_from_mask, + _crop_around_box, + _clamp_box_to_image_bounds_and_round, + _get_clamp_bbox, + _rescale_bbox, + _get_1d_bounds, ) from tests.common_testing import TestCaseMixin @@ -76,3 +81,41 @@ def test_mask_to_bbox(self): expected_bbox_xywh = [2, 1, 2, 1] bbox_xywh = _get_bbox_from_mask(mask, 0.5) self.assertClose(bbox_xywh, expected_bbox_xywh) + + def test_crop_around_box(self): + bbox = (0, 1, 2, 2) # (x_min, y_min, x_max, y_max) + image = torch.LongTensor( + [ + [0, 0, 10, 20], + [10, 20, 5, 1], + [10, 20, 1, 1], + [5, 4, 0, 1], + ] + ) + cropped = _crop_around_box(image, bbox) + self.assertClose(cropped, image[0:2, 1:2]) + + def test_clamp_box_to_image_bounds_and_round(self): + bbox = torch.LongTensor([0, 1, 10, 12]) + image_size = (5, 6) + clamped_bbox = _clamp_box_to_image_bounds_and_round(bbox) + self.assertClose(clamped_bbox == [0, 1, 5, 6]) + + def test_get_clamp_bbox(self): + bbox_xywh = torch.LongTensor([1, 1, 4, 5]) + clamped_bbox_xyxy = _get_clamp_bbox(bbox, box_crop_context=2) + # size multiplied by 2 and added coordinates + self.assertClose(clamped_bbox_xyxy == torch.LongTensor([0, 1, 9, 11])) + + def test_rescale_bbox(self): + bbox = torch.LongTensor([0, 1, 3, 4]) + original_resolution = (4, 4) # + new_resolution = (8, 8) + rescaled_bbox = _rescale_bbox(bbox, original_resolution, new_resolution) + self.assertClose(bbox * 2 == rescaled_bbox) + + def test_get_1d_bounds(self): + array = [0, 1, 2] + bounds = _get_1d_bounds(array) + # make nonzero 1d bounds of image + assert bounds == [1, 2] diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py index 0e6bf6936..da3326421 100644 --- a/tests/implicitron/test_blob_loader.py +++ b/tests/implicitron/test_blob_loader.py @@ -5,9 +5,12 @@ import torch from pytorch3d.implicitron.dataset.blob_loader import ( - _bbox_xywh_to_xyxy, - _bbox_xyxy_to_xywh, - _get_bbox_from_mask, + _load_image, + _load_mask, + _load_depth, + _load_16big_png_depth, + _load_1bit_png_mask, + _load_depth_mask, ) from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset from pytorch3d.implicitron.dataset.blob_loader import BlobLoader @@ -41,49 +44,108 @@ def setUp(self): load_point_clouds=True, path_manager=path_manager, ) + self.entry = self.datasets.frame_annots[index]["frame_annotation"] def test_BlobLoader_args(self): # test that BlobLoader works with get_default_args get_default_args(BlobLoader) - def test_load_crop_fg_probability(self): - pass - - def test_load_crop_images(self): - pass + def test_load_pipeline(self): + ( + fg_probability, + mask_path, + bbox_xywh, + clamp_bbox_xyxy, + crop_bbox_xywh, + ) = self.datasets.loader._load_crop_fg_probability(entry) + + assert fg_probability + assert mask_path + assert bbox_xywh + assert clamp_bbox_xyxy + assert crop_bbox_xywh + ( + image_rgb, + image_path, + mask_crop, + scale, + ) = self.dataset.loader._load_crop_images( + self.entry, fg_probability, clamp_bbox_xyxy, + ) + assert image_rgb + assert image_path + assert mask_crop, + assert scale, + ( + depth_map, + depth_path, + depth_mask, + ) = self.dataset.loader._load_mask_depth( + self.entry, clamp_bbox_xyxy, fg_probability, + ) + assert depth_map + assert depth_path + assert depth_mask - def test_load_mask_depth(self): - pass + camera = self.dataset.loader._get_pytorch3d_camera( + self.entry, scale, clamp_bbox_xyxy, + ) + assert camera def test_fix_point_cloud_path(self): - pass + """Some files in Co3Dv2 have an accidental absolute path stored.""" + original_path = 'some_file_path' + modified_path = self.dataset.loader._fix_point_cloud_path(original_path) + assert original_path in modified_path + assert self.dataset.loader.dataset_root in modified_path def test_resize_image(self): - pass - - def test_crop_around_box(self): - pass - - def test_clamp_box_to_image_bounds_and_round(self): - pass - - def test_get_clamp_bbox(self): - pass + image = None + image_rgb, scale, mask_crop = self.dataset.loader._resize_image(image) + assert image_rgb.shape == (self.dataset.loader.width, self.dataset.loader.height) + assert scale == 1 + assert masc_crop.shape == (self.dataset.loader.width, self.dataset.loader.height) + + def test_load_image(self): + image = _load_image(self.entry.image.path) + assert image.dtype == np.float32 + assert torch.max(image) <= 1.0 + assert torch.min(image) >= 0.0 + + def test_load_mask(self): + mask = _load_mask(self.entry.mask.path) + assert mask.dtype == np.float32 + assert torch.max(mask) <= 1.0 + assert torch.min(mask) >= 0.0 def test_load_depth(self): - pass + entry_depth = self.entry.depth + # path = os.path.join(self.dataset_root, entry_depth.path) + path = entry_depth.path + depth_map = _load_depth(path, entry_depth.scale_adjustment) + assert depth_map.dtype == np.float32 + assert depth_map.shape def test_load_16big_png_depth(self): - pass - - def test_rescale_bbox(self): - pass + entry_depth = self.entry.depth + # path = os.path.join(self.dataset_root, entry_depth.path) + path = entry_depth.path + depth_map = _load_16big_png_depth(path) + assert depth_map.dtype == np.float32 + assert depth_map.shape def test_load_1bit_png_mask(self): - pass + entry_depth = self.entry.depth + # mask_path = os.path.join(self.dataset_root, entry_depth.mask_path) + mask_path = entry_depth.mask_path + mask = _load_16big_png_depth(mask_path) + assert mask.dtype == np.float32 + assert mask.shape def test_load_depth_mask(self): - pass - - def test_get_1d_bounds(self): - pass + entry_depth = self.entry.depth + # mask_path = os.path.join(self.dataset_root, entry_depth.mask_path) + mask_path = entry_depth.mask_path + mask = _load_depth_mask(mask_path) + assert mask.dtype == np.float32 + assert mask.shape From faeffcf3aa61716640fca15fe25e260fd524e953 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Thu, 9 Mar 2023 09:58:42 +0000 Subject: [PATCH 15/35] tests typos and errors WIP --- tests/implicitron/test_bbox.py | 12 ++++++------ tests/implicitron/test_blob_loader.py | 7 ++++--- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/tests/implicitron/test_bbox.py b/tests/implicitron/test_bbox.py index ddbcd6bd1..1e351d049 100644 --- a/tests/implicitron/test_bbox.py +++ b/tests/implicitron/test_bbox.py @@ -83,7 +83,7 @@ def test_mask_to_bbox(self): self.assertClose(bbox_xywh, expected_bbox_xywh) def test_crop_around_box(self): - bbox = (0, 1, 2, 2) # (x_min, y_min, x_max, y_max) + bbox = torxh.LongTensor([0, 1, 2, 2]) # (x_min, y_min, x_max, y_max) image = torch.LongTensor( [ [0, 0, 10, 20], @@ -98,24 +98,24 @@ def test_crop_around_box(self): def test_clamp_box_to_image_bounds_and_round(self): bbox = torch.LongTensor([0, 1, 10, 12]) image_size = (5, 6) - clamped_bbox = _clamp_box_to_image_bounds_and_round(bbox) + clamped_bbox = _clamp_box_to_image_bounds_and_round(bbox, image_size) self.assertClose(clamped_bbox == [0, 1, 5, 6]) def test_get_clamp_bbox(self): bbox_xywh = torch.LongTensor([1, 1, 4, 5]) - clamped_bbox_xyxy = _get_clamp_bbox(bbox, box_crop_context=2) + clamped_bbox_xyxy = _get_clamp_bbox(bbox_xywh, box_crop_context=2) # size multiplied by 2 and added coordinates self.assertClose(clamped_bbox_xyxy == torch.LongTensor([0, 1, 9, 11])) def test_rescale_bbox(self): bbox = torch.LongTensor([0, 1, 3, 4]) - original_resolution = (4, 4) # + original_resolution = (4, 4) new_resolution = (8, 8) rescaled_bbox = _rescale_bbox(bbox, original_resolution, new_resolution) - self.assertClose(bbox * 2 == rescaled_bbox) + self.assertClose(bbox * 2, rescaled_bbox) def test_get_1d_bounds(self): array = [0, 1, 2] bounds = _get_1d_bounds(array) # make nonzero 1d bounds of image - assert bounds == [1, 2] + assert bounds == [1, 3] diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py index da3326421..692ecbd62 100644 --- a/tests/implicitron/test_blob_loader.py +++ b/tests/implicitron/test_blob_loader.py @@ -18,11 +18,12 @@ from pytorch3d.implicitron.tools.config import expand_args_fields from pytorch3d.implicitron.tools.config import get_default_args +from tests.implicitron.common_resources import get_skateboard_data + class TestBlobLoader(TestCaseMixin, unittest.TestCase): def setUp(self): torch.manual_seed(42) - self.blob_loader = BlobLoader() category = "skateboard" stack = contextlib.ExitStack() @@ -74,8 +75,8 @@ def test_load_pipeline(self): ) assert image_rgb assert image_path - assert mask_crop, - assert scale, + assert mask_crop + assert scale ( depth_map, depth_path, From bc24e29d7640773e0892288b919b3e1f851ec37d Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Thu, 9 Mar 2023 10:23:19 +0000 Subject: [PATCH 16/35] tests typos and errors WIP --- tests/implicitron/test_bbox.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/implicitron/test_bbox.py b/tests/implicitron/test_bbox.py index 1e351d049..5381e709e 100644 --- a/tests/implicitron/test_bbox.py +++ b/tests/implicitron/test_bbox.py @@ -83,7 +83,7 @@ def test_mask_to_bbox(self): self.assertClose(bbox_xywh, expected_bbox_xywh) def test_crop_around_box(self): - bbox = torxh.LongTensor([0, 1, 2, 2]) # (x_min, y_min, x_max, y_max) + bbox = torch.LongTensor([0, 1, 2, 2]) # (x_min, y_min, x_max, y_max) image = torch.LongTensor( [ [0, 0, 10, 20], @@ -95,27 +95,31 @@ def test_crop_around_box(self): cropped = _crop_around_box(image, bbox) self.assertClose(cropped, image[0:2, 1:2]) + + def test_clamp_box_to_image_bounds_and_round(self): bbox = torch.LongTensor([0, 1, 10, 12]) image_size = (5, 6) clamped_bbox = _clamp_box_to_image_bounds_and_round(bbox, image_size) - self.assertClose(clamped_bbox == [0, 1, 5, 6]) + self.assertClose(clamped_bbox, [0, 1, 5, 6]) def test_get_clamp_bbox(self): bbox_xywh = torch.LongTensor([1, 1, 4, 5]) clamped_bbox_xyxy = _get_clamp_bbox(bbox_xywh, box_crop_context=2) # size multiplied by 2 and added coordinates - self.assertClose(clamped_bbox_xyxy == torch.LongTensor([0, 1, 9, 11])) + self.assertClose(clamped_bbox_xyxy, torch.LongTensor([0, 1, 9, 11])) def test_rescale_bbox(self): bbox = torch.LongTensor([0, 1, 3, 4]) original_resolution = (4, 4) new_resolution = (8, 8) rescaled_bbox = _rescale_bbox(bbox, original_resolution, new_resolution) + print(rescaled_bbox) self.assertClose(bbox * 2, rescaled_bbox) def test_get_1d_bounds(self): array = [0, 1, 2] bounds = _get_1d_bounds(array) # make nonzero 1d bounds of image + print(bounds) assert bounds == [1, 3] From e9c59693ed78dfb036db3e056724b88252f6fbe7 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Thu, 9 Mar 2023 11:03:16 +0000 Subject: [PATCH 17/35] solved error and typos for test_bbox --- tests/implicitron/test_bbox.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/tests/implicitron/test_bbox.py b/tests/implicitron/test_bbox.py index 5381e709e..89b624199 100644 --- a/tests/implicitron/test_bbox.py +++ b/tests/implicitron/test_bbox.py @@ -83,7 +83,7 @@ def test_mask_to_bbox(self): self.assertClose(bbox_xywh, expected_bbox_xywh) def test_crop_around_box(self): - bbox = torch.LongTensor([0, 1, 2, 2]) # (x_min, y_min, x_max, y_max) + bbox = torch.LongTensor([0, 1, 2, 3]) # (x_min, y_min, x_max, y_max) image = torch.LongTensor( [ [0, 0, 10, 20], @@ -93,33 +93,30 @@ def test_crop_around_box(self): ] ) cropped = _crop_around_box(image, bbox) - self.assertClose(cropped, image[0:2, 1:2]) - - + self.assertClose(cropped, image[1:3, 0:2]) def test_clamp_box_to_image_bounds_and_round(self): bbox = torch.LongTensor([0, 1, 10, 12]) image_size = (5, 6) + expected_clamped_bbox = torch.LongTensor([0, 1, image_size[1], image_size[0]]) clamped_bbox = _clamp_box_to_image_bounds_and_round(bbox, image_size) - self.assertClose(clamped_bbox, [0, 1, 5, 6]) + self.assertClose(clamped_bbox, expected_clamped_bbox) def test_get_clamp_bbox(self): bbox_xywh = torch.LongTensor([1, 1, 4, 5]) clamped_bbox_xyxy = _get_clamp_bbox(bbox_xywh, box_crop_context=2) # size multiplied by 2 and added coordinates - self.assertClose(clamped_bbox_xyxy, torch.LongTensor([0, 1, 9, 11])) + self.assertClose(clamped_bbox_xyxy, torch.Tensor([-3, -4, 9, 11])) def test_rescale_bbox(self): - bbox = torch.LongTensor([0, 1, 3, 4]) + bbox = torch.Tensor([0.0, 1.0, 3.0, 4.0]) original_resolution = (4, 4) - new_resolution = (8, 8) + new_resolution = (8, 8) # twice bigger rescaled_bbox = _rescale_bbox(bbox, original_resolution, new_resolution) - print(rescaled_bbox) self.assertClose(bbox * 2, rescaled_bbox) def test_get_1d_bounds(self): array = [0, 1, 2] bounds = _get_1d_bounds(array) # make nonzero 1d bounds of image - print(bounds) - assert bounds == [1, 3] + self.assertClose(bounds, [1, 3]) From 44cfcfb9f243c16f6153617166eb28461705f1cc Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Thu, 9 Mar 2023 13:28:54 +0000 Subject: [PATCH 18/35] updating test_blob_loader WIP --- tests/implicitron/test_blob_loader.py | 78 +++++++++++++++------------ 1 file changed, 43 insertions(+), 35 deletions(-) diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py index 692ecbd62..d54754d88 100644 --- a/tests/implicitron/test_blob_loader.py +++ b/tests/implicitron/test_blob_loader.py @@ -1,3 +1,5 @@ +import os +import math import contextlib import unittest @@ -14,6 +16,7 @@ ) from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset from pytorch3d.implicitron.dataset.blob_loader import BlobLoader +from pytorch3d.renderer.cameras import PerspectiveCameras from tests.common_testing import TestCaseMixin from pytorch3d.implicitron.tools.config import expand_args_fields from pytorch3d.implicitron.tools.config import get_default_args @@ -27,7 +30,7 @@ def setUp(self): category = "skateboard" stack = contextlib.ExitStack() - dataset_root, path_manager = stack.enter_context(get_skateboard_data()) + self.dataset_root, self.path_manager = stack.enter_context(get_skateboard_data()) self.addCleanup(stack.close) frame_file = os.path.join(dataset_root, category, "frame_annotations.jgz") sequence_file = os.path.join(dataset_root, category, "sequence_annotations.jgz") @@ -35,17 +38,18 @@ def setUp(self): expand_args_fields(JsonIndexDataset) - self.datasets = JsonIndexDataset( + self.dataset = JsonIndexDataset( frame_annotations_file=frame_file, sequence_annotations_file=sequence_file, - dataset_root=dataset_root, + dataset_root=self.dataset_root, image_height=self.image_size, image_width=self.image_size, box_crop=True, load_point_clouds=True, - path_manager=path_manager, + path_manager=self.path_manager, ) - self.entry = self.datasets.frame_annots[index]["frame_annotation"] + index = 7000 + self.entry = self.dataset.frame_annots[index]["frame_annotation"] def test_BlobLoader_args(self): # test that BlobLoader works with get_default_args @@ -58,57 +62,66 @@ def test_load_pipeline(self): bbox_xywh, clamp_bbox_xyxy, crop_bbox_xywh, - ) = self.datasets.loader._load_crop_fg_probability(entry) + ) = self.dataset.blob_loader._load_crop_fg_probability(self.entry) - assert fg_probability + assert torch.is_tensor(fg_probability) assert mask_path assert bbox_xywh - assert clamp_bbox_xyxy - assert crop_bbox_xywh + assert torch.is_tensor(clamp_bbox_xyxy) + assert torch.is_tensor(crop_bbox_xywh) ( image_rgb, image_path, mask_crop, scale, - ) = self.dataset.loader._load_crop_images( + ) = self.dataset.blob_loader._load_crop_images( self.entry, fg_probability, clamp_bbox_xyxy, ) - assert image_rgb + assert torch.is_tensor(image_rgb) assert image_path - assert mask_crop + assert torch.is_tensor(mask_crop) assert scale ( depth_map, depth_path, depth_mask, - ) = self.dataset.loader._load_mask_depth( + ) = self.dataset.blob_loader._load_mask_depth( self.entry, clamp_bbox_xyxy, fg_probability, ) - assert depth_map - assert depth_path - assert depth_mask + assert torch.is_tensor(depth_map) + assert torch.is_tensor(depth_path) + assert torch.is_tensor(depth_mask) - camera = self.dataset.loader._get_pytorch3d_camera( + camera = self.dataset.blob_loader._get_pytorch3d_camera( self.entry, scale, clamp_bbox_xyxy, ) - assert camera + assert type(camera) == PerspectiveCameras def test_fix_point_cloud_path(self): """Some files in Co3Dv2 have an accidental absolute path stored.""" original_path = 'some_file_path' - modified_path = self.dataset.loader._fix_point_cloud_path(original_path) + modified_path = self.dataset.blob_loader._fix_point_cloud_path(original_path) assert original_path in modified_path - assert self.dataset.loader.dataset_root in modified_path + assert self.dataset.blob_loader.dataset_root in modified_path def test_resize_image(self): - image = None - image_rgb, scale, mask_crop = self.dataset.loader._resize_image(image) - assert image_rgb.shape == (self.dataset.loader.width, self.dataset.loader.height) - assert scale == 1 - assert masc_crop.shape == (self.dataset.loader.width, self.dataset.loader.height) + path = os.path.join(self.dataset_root, self.entry.image.path) + local_path = self.path_manager.get_local_path(path) + image = _load_image(local_path) + image_rgb, scale, mask_crop = self.dataset.blob_loader._resize_image(image) + + original_shape = image.shape[-2:] + expected_shape = (self.dataset.blob_loader.image_width, self.dataset.blob_loader.image_height) + expected_scale = expected_shape[0] / original_shape[0] + + assert scale == expected_scale + assert image_rgb.shape[-2:] == expected_shape + assert mask_crop.shape[-2:] == expected_shape def test_load_image(self): - image = _load_image(self.entry.image.path) + path = os.path.join(self.dataset_root, self.entry.image.path) + local_path = self.path_manager.get_local_path(path) + image = _load_image(local_path) assert image.dtype == np.float32 assert torch.max(image) <= 1.0 assert torch.min(image) >= 0.0 @@ -120,32 +133,27 @@ def test_load_mask(self): assert torch.min(mask) >= 0.0 def test_load_depth(self): - entry_depth = self.entry.depth - # path = os.path.join(self.dataset_root, entry_depth.path) + path = os.path.join(self.dataset_root, entry_depth.path) path = entry_depth.path depth_map = _load_depth(path, entry_depth.scale_adjustment) assert depth_map.dtype == np.float32 assert depth_map.shape def test_load_16big_png_depth(self): - entry_depth = self.entry.depth - # path = os.path.join(self.dataset_root, entry_depth.path) - path = entry_depth.path + path = os.path.join(self.dataset_root, self.entry.depth.path) depth_map = _load_16big_png_depth(path) assert depth_map.dtype == np.float32 assert depth_map.shape def test_load_1bit_png_mask(self): - entry_depth = self.entry.depth - # mask_path = os.path.join(self.dataset_root, entry_depth.mask_path) + mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path) mask_path = entry_depth.mask_path mask = _load_16big_png_depth(mask_path) assert mask.dtype == np.float32 assert mask.shape def test_load_depth_mask(self): - entry_depth = self.entry.depth - # mask_path = os.path.join(self.dataset_root, entry_depth.mask_path) + mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path) mask_path = entry_depth.mask_path mask = _load_depth_mask(mask_path) assert mask.dtype == np.float32 From 11def0a8b452a1479d63fe9ba665f2adc6687553 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Thu, 9 Mar 2023 14:50:11 +0000 Subject: [PATCH 19/35] blob loader tests ready for review --- tests/implicitron/test_bbox.py | 9 +- tests/implicitron/test_blob_loader.py | 119 ++++++++++++++++---------- 2 files changed, 81 insertions(+), 47 deletions(-) diff --git a/tests/implicitron/test_bbox.py b/tests/implicitron/test_bbox.py index 89b624199..8dffd751d 100644 --- a/tests/implicitron/test_bbox.py +++ b/tests/implicitron/test_bbox.py @@ -12,13 +12,14 @@ from pytorch3d.implicitron.dataset.blob_loader import ( _bbox_xywh_to_xyxy, _bbox_xyxy_to_xywh, - _get_bbox_from_mask, - _crop_around_box, _clamp_box_to_image_bounds_and_round, + _crop_around_box, + _get_1d_bounds, + _get_bbox_from_mask, _get_clamp_bbox, _rescale_bbox, - _get_1d_bounds, ) + from tests.common_testing import TestCaseMixin @@ -83,7 +84,7 @@ def test_mask_to_bbox(self): self.assertClose(bbox_xywh, expected_bbox_xywh) def test_crop_around_box(self): - bbox = torch.LongTensor([0, 1, 2, 3]) # (x_min, y_min, x_max, y_max) + bbox = torch.LongTensor([0, 1, 2, 3]) # (x_min, y_min, x_max, y_max) image = torch.LongTensor( [ [0, 0, 10, 20], diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py index d54754d88..461b2109c 100644 --- a/tests/implicitron/test_blob_loader.py +++ b/tests/implicitron/test_blob_loader.py @@ -1,25 +1,24 @@ -import os -import math import contextlib +import os import unittest import numpy as np import torch from pytorch3d.implicitron.dataset.blob_loader import ( - _load_image, - _load_mask, - _load_depth, _load_16big_png_depth, _load_1bit_png_mask, + _load_depth, _load_depth_mask, + _load_image, + _load_mask, + BlobLoader, ) from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset -from pytorch3d.implicitron.dataset.blob_loader import BlobLoader +from pytorch3d.implicitron.tools.config import expand_args_fields, get_default_args from pytorch3d.renderer.cameras import PerspectiveCameras + from tests.common_testing import TestCaseMixin -from pytorch3d.implicitron.tools.config import expand_args_fields -from pytorch3d.implicitron.tools.config import get_default_args from tests.implicitron.common_resources import get_skateboard_data @@ -30,23 +29,28 @@ def setUp(self): category = "skateboard" stack = contextlib.ExitStack() - self.dataset_root, self.path_manager = stack.enter_context(get_skateboard_data()) + self.dataset_root, self.path_manager = stack.enter_context( + get_skateboard_data() + ) self.addCleanup(stack.close) - frame_file = os.path.join(dataset_root, category, "frame_annotations.jgz") - sequence_file = os.path.join(dataset_root, category, "sequence_annotations.jgz") - self.image_size = 256 + frame_file = os.path.join(self.dataset_root, category, "frame_annotations.jgz") + sequence_file = os.path.join( + self.dataset_root, category, "sequence_annotations.jgz" + ) + self.image_height = 768 + self.image_width = 512 expand_args_fields(JsonIndexDataset) self.dataset = JsonIndexDataset( - frame_annotations_file=frame_file, - sequence_annotations_file=sequence_file, - dataset_root=self.dataset_root, - image_height=self.image_size, - image_width=self.image_size, - box_crop=True, - load_point_clouds=True, - path_manager=self.path_manager, + frame_annotations_file=frame_file, + sequence_annotations_file=sequence_file, + dataset_root=self.dataset_root, + image_height=self.image_height, + image_width=self.image_width, + box_crop=True, + load_point_clouds=True, + path_manager=self.path_manager, ) index = 7000 self.entry = self.dataset.frame_annots[index]["frame_annotation"] @@ -64,42 +68,68 @@ def test_load_pipeline(self): crop_bbox_xywh, ) = self.dataset.blob_loader._load_crop_fg_probability(self.entry) - assert torch.is_tensor(fg_probability) assert mask_path - assert bbox_xywh + assert torch.is_tensor(fg_probability) + assert torch.is_tensor(bbox_xywh) assert torch.is_tensor(clamp_bbox_xyxy) assert torch.is_tensor(crop_bbox_xywh) + # assert bboxes shape + assert fg_probability.shape == torch.Shape( + [1, self.image_height, self.image_width] + ) + assert bbox_xywh.shape == torch.Shape([4]) + assert clamp_bbox_xyxy == torch.Shape([4]) + assert crop_bbox_xywh.shape == torch.Shape([4]) ( image_rgb, image_path, mask_crop, scale, ) = self.dataset.blob_loader._load_crop_images( - self.entry, fg_probability, clamp_bbox_xyxy, + self.entry, + fg_probability, + clamp_bbox_xyxy, ) assert torch.is_tensor(image_rgb) assert image_path assert torch.is_tensor(mask_crop) assert scale + # assert image and mask shapes + assert image_rgb.shape == torch.Shape([3, self.image_height, self.image_width]) + assert mask_crop.shape == torch.Shape( + [1, self.image_height, self.image_width], + ) + ( depth_map, depth_path, depth_mask, ) = self.dataset.blob_loader._load_mask_depth( - self.entry, clamp_bbox_xyxy, fg_probability, + self.entry, + clamp_bbox_xyxy, + fg_probability, ) assert torch.is_tensor(depth_map) - assert torch.is_tensor(depth_path) + assert depth_path assert torch.is_tensor(depth_mask) + # assert image and mask shapes + assert depth_map.shape == torch.Shape( + [1, self.image_height, self.image_width], + ) + assert depth_mask.shape == torch.Shape( + [1, self.image_height, self.image_width], + ) camera = self.dataset.blob_loader._get_pytorch3d_camera( - self.entry, scale, clamp_bbox_xyxy, - ) + self.entry, + scale, + clamp_bbox_xyxy, + ) assert type(camera) == PerspectiveCameras def test_fix_point_cloud_path(self): """Some files in Co3Dv2 have an accidental absolute path stored.""" - original_path = 'some_file_path' + original_path = "some_file_path" modified_path = self.dataset.blob_loader._fix_point_cloud_path(original_path) assert original_path in modified_path assert self.dataset.blob_loader.dataset_root in modified_path @@ -111,8 +141,13 @@ def test_resize_image(self): image_rgb, scale, mask_crop = self.dataset.blob_loader._resize_image(image) original_shape = image.shape[-2:] - expected_shape = (self.dataset.blob_loader.image_width, self.dataset.blob_loader.image_height) - expected_scale = expected_shape[0] / original_shape[0] + expected_shape = ( + self.image_height, + self.image_width, + ) + expected_scale = min( + expected_shape[0] / original_shape[0], expected_shape[1] / original_shape[1] + ) assert scale == expected_scale assert image_rgb.shape[-2:] == expected_shape @@ -123,19 +158,19 @@ def test_load_image(self): local_path = self.path_manager.get_local_path(path) image = _load_image(local_path) assert image.dtype == np.float32 - assert torch.max(image) <= 1.0 - assert torch.min(image) >= 0.0 + assert np.max(image) <= 1.0 + assert np.min(image) >= 0.0 def test_load_mask(self): - mask = _load_mask(self.entry.mask.path) + path = os.path.join(self.dataset_root, self.entry.mask.path) + mask = _load_mask(path) assert mask.dtype == np.float32 - assert torch.max(mask) <= 1.0 - assert torch.min(mask) >= 0.0 + assert np.max(mask) <= 1.0 + assert np.min(mask) >= 0.0 def test_load_depth(self): - path = os.path.join(self.dataset_root, entry_depth.path) - path = entry_depth.path - depth_map = _load_depth(path, entry_depth.scale_adjustment) + path = os.path.join(self.dataset_root, self.entry.depth.path) + depth_map = _load_depth(path, self.entry.depth.scale_adjustment) assert depth_map.dtype == np.float32 assert depth_map.shape @@ -147,14 +182,12 @@ def test_load_16big_png_depth(self): def test_load_1bit_png_mask(self): mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path) - mask_path = entry_depth.mask_path - mask = _load_16big_png_depth(mask_path) + mask = _load_1bit_png_mask(mask_path) assert mask.dtype == np.float32 - assert mask.shape + assert len(mask.shape) == 3 def test_load_depth_mask(self): mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path) - mask_path = entry_depth.mask_path mask = _load_depth_mask(mask_path) assert mask.dtype == np.float32 - assert mask.shape + assert len(mask.shape) == 3 From bc52382a7991c69107645c0a91e5ea6dd7511f25 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Thu, 9 Mar 2023 14:54:25 +0000 Subject: [PATCH 20/35] typo --- tests/implicitron/test_blob_loader.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py index 461b2109c..cb2976011 100644 --- a/tests/implicitron/test_blob_loader.py +++ b/tests/implicitron/test_blob_loader.py @@ -74,12 +74,12 @@ def test_load_pipeline(self): assert torch.is_tensor(clamp_bbox_xyxy) assert torch.is_tensor(crop_bbox_xywh) # assert bboxes shape - assert fg_probability.shape == torch.Shape( + assert fg_probability.shape == torch.Size( [1, self.image_height, self.image_width] ) - assert bbox_xywh.shape == torch.Shape([4]) - assert clamp_bbox_xyxy == torch.Shape([4]) - assert crop_bbox_xywh.shape == torch.Shape([4]) + assert bbox_xywh.shape == torch.Size([4]) + assert clamp_bbox_xyxy == torch.Size([4]) + assert crop_bbox_xywh.shape == torch.Size([4]) ( image_rgb, image_path, @@ -95,8 +95,8 @@ def test_load_pipeline(self): assert torch.is_tensor(mask_crop) assert scale # assert image and mask shapes - assert image_rgb.shape == torch.Shape([3, self.image_height, self.image_width]) - assert mask_crop.shape == torch.Shape( + assert image_rgb.shape == torch.Size([3, self.image_height, self.image_width]) + assert mask_crop.shape == torch.Size( [1, self.image_height, self.image_width], ) @@ -113,10 +113,10 @@ def test_load_pipeline(self): assert depth_path assert torch.is_tensor(depth_mask) # assert image and mask shapes - assert depth_map.shape == torch.Shape( + assert depth_map.shape == torch.Size( [1, self.image_height, self.image_width], ) - assert depth_mask.shape == torch.Shape( + assert depth_mask.shape == torch.Size( [1, self.image_height, self.image_width], ) From 01493775ea0d2c55069fc6348ce80aaa56cbf104 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Thu, 9 Mar 2023 14:58:43 +0000 Subject: [PATCH 21/35] typo --- tests/implicitron/test_blob_loader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py index cb2976011..5f694b897 100644 --- a/tests/implicitron/test_blob_loader.py +++ b/tests/implicitron/test_blob_loader.py @@ -78,7 +78,7 @@ def test_load_pipeline(self): [1, self.image_height, self.image_width] ) assert bbox_xywh.shape == torch.Size([4]) - assert clamp_bbox_xyxy == torch.Size([4]) + assert clamp_bbox_xyxy.shape == torch.Size([4]) assert crop_bbox_xywh.shape == torch.Size([4]) ( image_rgb, @@ -184,7 +184,7 @@ def test_load_1bit_png_mask(self): mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path) mask = _load_1bit_png_mask(mask_path) assert mask.dtype == np.float32 - assert len(mask.shape) == 3 + assert len(mask.shape) == 2 def test_load_depth_mask(self): mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path) From 3bcbd018cd04941a1541d58c724cd266803ae768 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Thu, 9 Mar 2023 15:23:39 +0000 Subject: [PATCH 22/35] linter --- tests/implicitron/test_blob_loader.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py index 5f694b897..96d8fac60 100644 --- a/tests/implicitron/test_blob_loader.py +++ b/tests/implicitron/test_blob_loader.py @@ -96,9 +96,7 @@ def test_load_pipeline(self): assert scale # assert image and mask shapes assert image_rgb.shape == torch.Size([3, self.image_height, self.image_width]) - assert mask_crop.shape == torch.Size( - [1, self.image_height, self.image_width], - ) + assert mask_crop.shape == torch.Size([1, self.image_height, self.image_width]) ( depth_map, @@ -113,12 +111,8 @@ def test_load_pipeline(self): assert depth_path assert torch.is_tensor(depth_mask) # assert image and mask shapes - assert depth_map.shape == torch.Size( - [1, self.image_height, self.image_width], - ) - assert depth_mask.shape == torch.Size( - [1, self.image_height, self.image_width], - ) + assert depth_map.shape == torch.Size([1, self.image_height, self.image_width]) + assert depth_mask.shape == torch.Size([1, self.image_height, self.image_width]) camera = self.dataset.blob_loader._get_pytorch3d_camera( self.entry, @@ -178,7 +172,7 @@ def test_load_16big_png_depth(self): path = os.path.join(self.dataset_root, self.entry.depth.path) depth_map = _load_16big_png_depth(path) assert depth_map.dtype == np.float32 - assert depth_map.shape + assert len(depth_map.shape) == 2 def test_load_1bit_png_mask(self): mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path) From 269cffa9ca8327cabb90104407df1c285e3405e3 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Thu, 9 Mar 2023 15:35:02 +0000 Subject: [PATCH 23/35] all entry tests run thru all frames --- tests/implicitron/test_blob_loader.py | 68 +++++++++++++++------------ 1 file changed, 39 insertions(+), 29 deletions(-) diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py index 96d8fac60..619586f04 100644 --- a/tests/implicitron/test_blob_loader.py +++ b/tests/implicitron/test_blob_loader.py @@ -52,21 +52,38 @@ def setUp(self): load_point_clouds=True, path_manager=self.path_manager, ) - index = 7000 - self.entry = self.dataset.frame_annots[index]["frame_annotation"] def test_BlobLoader_args(self): # test that BlobLoader works with get_default_args get_default_args(BlobLoader) - def test_load_pipeline(self): + def test_fix_point_cloud_path(self): + """Some files in Co3Dv2 have an accidental absolute path stored.""" + original_path = "some_file_path" + modified_path = self.dataset.blob_loader._fix_point_cloud_path(original_path) + assert original_path in modified_path + assert self.dataset.blob_loader.dataset_root in modified_path + + def test_entry_loading_functions(self): + for index in range(len(self.dataset.frame_annots)): + entry = self.dataset.frame_annots[index]["frame_annotation"] + self.load_test(entry) + self._resize_image_test(entry) + self._load_image_test(entry) + self._load_mask_test(entry) + self._load_depth_test(entry) + self._load_16big_png_depth_test(entry) + self._load_1bit_png_mask_test(entry) + self._load_depth_mask_test(entry) + + def load_test(self, entry): ( fg_probability, mask_path, bbox_xywh, clamp_bbox_xyxy, crop_bbox_xywh, - ) = self.dataset.blob_loader._load_crop_fg_probability(self.entry) + ) = self.dataset.blob_loader._load_crop_fg_probability(entry) assert mask_path assert torch.is_tensor(fg_probability) @@ -86,7 +103,7 @@ def test_load_pipeline(self): mask_crop, scale, ) = self.dataset.blob_loader._load_crop_images( - self.entry, + entry, fg_probability, clamp_bbox_xyxy, ) @@ -103,7 +120,7 @@ def test_load_pipeline(self): depth_path, depth_mask, ) = self.dataset.blob_loader._load_mask_depth( - self.entry, + entry, clamp_bbox_xyxy, fg_probability, ) @@ -115,21 +132,14 @@ def test_load_pipeline(self): assert depth_mask.shape == torch.Size([1, self.image_height, self.image_width]) camera = self.dataset.blob_loader._get_pytorch3d_camera( - self.entry, + entry, scale, clamp_bbox_xyxy, ) assert type(camera) == PerspectiveCameras - def test_fix_point_cloud_path(self): - """Some files in Co3Dv2 have an accidental absolute path stored.""" - original_path = "some_file_path" - modified_path = self.dataset.blob_loader._fix_point_cloud_path(original_path) - assert original_path in modified_path - assert self.dataset.blob_loader.dataset_root in modified_path - - def test_resize_image(self): - path = os.path.join(self.dataset_root, self.entry.image.path) + def _resize_image_test(self, entry): + path = os.path.join(self.dataset_root, entry.image.path) local_path = self.path_manager.get_local_path(path) image = _load_image(local_path) image_rgb, scale, mask_crop = self.dataset.blob_loader._resize_image(image) @@ -147,41 +157,41 @@ def test_resize_image(self): assert image_rgb.shape[-2:] == expected_shape assert mask_crop.shape[-2:] == expected_shape - def test_load_image(self): - path = os.path.join(self.dataset_root, self.entry.image.path) + def _load_image_test(self, entry): + path = os.path.join(self.dataset_root, entry.image.path) local_path = self.path_manager.get_local_path(path) image = _load_image(local_path) assert image.dtype == np.float32 assert np.max(image) <= 1.0 assert np.min(image) >= 0.0 - def test_load_mask(self): - path = os.path.join(self.dataset_root, self.entry.mask.path) + def _load_mask_test(self, entry): + path = os.path.join(self.dataset_root, entry.mask.path) mask = _load_mask(path) assert mask.dtype == np.float32 assert np.max(mask) <= 1.0 assert np.min(mask) >= 0.0 - def test_load_depth(self): - path = os.path.join(self.dataset_root, self.entry.depth.path) - depth_map = _load_depth(path, self.entry.depth.scale_adjustment) + def _load_depth_test(self, entry): + path = os.path.join(self.dataset_root, entry.depth.path) + depth_map = _load_depth(path, entry.depth.scale_adjustment) assert depth_map.dtype == np.float32 assert depth_map.shape - def test_load_16big_png_depth(self): - path = os.path.join(self.dataset_root, self.entry.depth.path) + def _load_16big_png_depth_test(self, entry): + path = os.path.join(self.dataset_root, entry.depth.path) depth_map = _load_16big_png_depth(path) assert depth_map.dtype == np.float32 assert len(depth_map.shape) == 2 - def test_load_1bit_png_mask(self): - mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path) + def _load_1bit_png_mask_test(self, entry): + mask_path = os.path.join(self.dataset_root, entry.depth.mask_path) mask = _load_1bit_png_mask(mask_path) assert mask.dtype == np.float32 assert len(mask.shape) == 2 - def test_load_depth_mask(self): - mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path) + def _load_depth_mask_test(self, entry): + mask_path = os.path.join(self.dataset_root, entry.depth.mask_path) mask = _load_depth_mask(mask_path) assert mask.dtype == np.float32 assert len(mask.shape) == 3 From f930d71488ed978b7ca71525567e4f94ed721fc6 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Fri, 10 Mar 2023 09:38:08 +0000 Subject: [PATCH 24/35] assert .. == .. to self.assertEqual(.., ..) --- tests/implicitron/test_blob_loader.py | 46 +++++++++++++-------------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py index 619586f04..059244c67 100644 --- a/tests/implicitron/test_blob_loader.py +++ b/tests/implicitron/test_blob_loader.py @@ -91,12 +91,10 @@ def load_test(self, entry): assert torch.is_tensor(clamp_bbox_xyxy) assert torch.is_tensor(crop_bbox_xywh) # assert bboxes shape - assert fg_probability.shape == torch.Size( - [1, self.image_height, self.image_width] - ) - assert bbox_xywh.shape == torch.Size([4]) - assert clamp_bbox_xyxy.shape == torch.Size([4]) - assert crop_bbox_xywh.shape == torch.Size([4]) + self.assertEqual(fg_probability.shape, torch.Size([1, self.image_height, self.image_width])) + self.assertEqual(bbox_xywh.shape, torch.Size([4])) + self.assertEqual(clamp_bbox_xyxy.shape, torch.Size([4])) + self.assertEqual(crop_bbox_xywh.shape, torch.Size([4])) ( image_rgb, image_path, @@ -112,8 +110,8 @@ def load_test(self, entry): assert torch.is_tensor(mask_crop) assert scale # assert image and mask shapes - assert image_rgb.shape == torch.Size([3, self.image_height, self.image_width]) - assert mask_crop.shape == torch.Size([1, self.image_height, self.image_width]) + self.assertEqual(image_rgb.shape, torch.Size([3, self.image_height, self.image_width])) + self.assertEqual(mask_crop.shape, torch.Size([1, self.image_height, self.image_width]) ( depth_map, @@ -128,15 +126,15 @@ def load_test(self, entry): assert depth_path assert torch.is_tensor(depth_mask) # assert image and mask shapes - assert depth_map.shape == torch.Size([1, self.image_height, self.image_width]) - assert depth_mask.shape == torch.Size([1, self.image_height, self.image_width]) + self.assertEqual(depth_map.shape, torch.Size([1, self.image_height, self.image_width])) + self.assertEqual(depth_mask.shape, torch.Size([1, self.image_height, self.image_width])) camera = self.dataset.blob_loader._get_pytorch3d_camera( entry, scale, clamp_bbox_xyxy, ) - assert type(camera) == PerspectiveCameras + self.assertEqual(type(camera), PerspectiveCameras) def _resize_image_test(self, entry): path = os.path.join(self.dataset_root, entry.image.path) @@ -153,45 +151,45 @@ def _resize_image_test(self, entry): expected_shape[0] / original_shape[0], expected_shape[1] / original_shape[1] ) - assert scale == expected_scale - assert image_rgb.shape[-2:] == expected_shape - assert mask_crop.shape[-2:] == expected_shape + self.assertEqual(scale, expected_scale) + self.assertEqual(image_rgb.shape[-2:], expected_shape) + self.assertEqual(mask_crop.shape[-2:], expected_shape) def _load_image_test(self, entry): path = os.path.join(self.dataset_root, entry.image.path) local_path = self.path_manager.get_local_path(path) image = _load_image(local_path) - assert image.dtype == np.float32 + self.assertEqual(image.dtype, np.float32) assert np.max(image) <= 1.0 assert np.min(image) >= 0.0 def _load_mask_test(self, entry): path = os.path.join(self.dataset_root, entry.mask.path) mask = _load_mask(path) - assert mask.dtype == np.float32 + self.assertEqual(mask.dtype, np.float32) assert np.max(mask) <= 1.0 assert np.min(mask) >= 0.0 def _load_depth_test(self, entry): path = os.path.join(self.dataset_root, entry.depth.path) depth_map = _load_depth(path, entry.depth.scale_adjustment) - assert depth_map.dtype == np.float32 - assert depth_map.shape + self.assertEqual(depth_map.dtype, np.float32) + self.assertEqual(len(depth_map.shape), 2) def _load_16big_png_depth_test(self, entry): path = os.path.join(self.dataset_root, entry.depth.path) depth_map = _load_16big_png_depth(path) - assert depth_map.dtype == np.float32 - assert len(depth_map.shape) == 2 + self.assertEqual(depth_map.dtype, np.float32) + self.assertEqual(len(depth_map.shape), 2) def _load_1bit_png_mask_test(self, entry): mask_path = os.path.join(self.dataset_root, entry.depth.mask_path) mask = _load_1bit_png_mask(mask_path) - assert mask.dtype == np.float32 - assert len(mask.shape) == 2 + self.assertEqual(mask.dtype, np.float32) + self.assertEqual(len(mask.shape), 2) def _load_depth_mask_test(self, entry): mask_path = os.path.join(self.dataset_root, entry.depth.mask_path) mask = _load_depth_mask(mask_path) - assert mask.dtype == np.float32 - assert len(mask.shape) == 3 + self.assertEqual(mask.dtype, np.float32) + self.assertEqual(len(mask.shape), 3) From dc7a70280eed0715ddf8ab04267f883e7a4de8e4 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Fri, 10 Mar 2023 09:42:24 +0000 Subject: [PATCH 25/35] testing only on 1 frame --- tests/implicitron/test_blob_loader.py | 63 ++++++++++----------------- 1 file changed, 23 insertions(+), 40 deletions(-) diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py index 059244c67..a03e91537 100644 --- a/tests/implicitron/test_blob_loader.py +++ b/tests/implicitron/test_blob_loader.py @@ -52,6 +52,8 @@ def setUp(self): load_point_clouds=True, path_manager=self.path_manager, ) + index = 7000 + self.entry = self.dataset.frame_annots[index]["frame_annotation"] def test_BlobLoader_args(self): # test that BlobLoader works with get_default_args @@ -64,26 +66,14 @@ def test_fix_point_cloud_path(self): assert original_path in modified_path assert self.dataset.blob_loader.dataset_root in modified_path - def test_entry_loading_functions(self): - for index in range(len(self.dataset.frame_annots)): - entry = self.dataset.frame_annots[index]["frame_annotation"] - self.load_test(entry) - self._resize_image_test(entry) - self._load_image_test(entry) - self._load_mask_test(entry) - self._load_depth_test(entry) - self._load_16big_png_depth_test(entry) - self._load_1bit_png_mask_test(entry) - self._load_depth_mask_test(entry) - - def load_test(self, entry): + def test_load(self): ( fg_probability, mask_path, bbox_xywh, clamp_bbox_xyxy, crop_bbox_xywh, - ) = self.dataset.blob_loader._load_crop_fg_probability(entry) + ) = self.dataset.blob_loader._load_crop_fg_probability(self.entry) assert mask_path assert torch.is_tensor(fg_probability) @@ -96,15 +86,8 @@ def load_test(self, entry): self.assertEqual(clamp_bbox_xyxy.shape, torch.Size([4])) self.assertEqual(crop_bbox_xywh.shape, torch.Size([4])) ( - image_rgb, - image_path, - mask_crop, - scale, - ) = self.dataset.blob_loader._load_crop_images( - entry, - fg_probability, - clamp_bbox_xyxy, - ) + image_rgb, image_path, mask_crop, scale, + ) = self.dataset.blob_loader._load_crop_images(self.entry, fg_probability, clamp_bbox_xyxy) assert torch.is_tensor(image_rgb) assert image_path assert torch.is_tensor(mask_crop) @@ -118,7 +101,7 @@ def load_test(self, entry): depth_path, depth_mask, ) = self.dataset.blob_loader._load_mask_depth( - entry, + self.entry, clamp_bbox_xyxy, fg_probability, ) @@ -130,14 +113,14 @@ def load_test(self, entry): self.assertEqual(depth_mask.shape, torch.Size([1, self.image_height, self.image_width])) camera = self.dataset.blob_loader._get_pytorch3d_camera( - entry, + self.entry, scale, clamp_bbox_xyxy, ) self.assertEqual(type(camera), PerspectiveCameras) - def _resize_image_test(self, entry): - path = os.path.join(self.dataset_root, entry.image.path) + def test_resize_image(self): + path = os.path.join(self.dataset_root, self.entry.image.path) local_path = self.path_manager.get_local_path(path) image = _load_image(local_path) image_rgb, scale, mask_crop = self.dataset.blob_loader._resize_image(image) @@ -155,41 +138,41 @@ def _resize_image_test(self, entry): self.assertEqual(image_rgb.shape[-2:], expected_shape) self.assertEqual(mask_crop.shape[-2:], expected_shape) - def _load_image_test(self, entry): - path = os.path.join(self.dataset_root, entry.image.path) + def test_load_image(self): + path = os.path.join(self.dataset_root, self.entry.image.path) local_path = self.path_manager.get_local_path(path) image = _load_image(local_path) self.assertEqual(image.dtype, np.float32) assert np.max(image) <= 1.0 assert np.min(image) >= 0.0 - def _load_mask_test(self, entry): - path = os.path.join(self.dataset_root, entry.mask.path) + def test_load_mask(self): + path = os.path.join(self.dataset_root, self.entry.mask.path) mask = _load_mask(path) self.assertEqual(mask.dtype, np.float32) assert np.max(mask) <= 1.0 assert np.min(mask) >= 0.0 - def _load_depth_test(self, entry): - path = os.path.join(self.dataset_root, entry.depth.path) - depth_map = _load_depth(path, entry.depth.scale_adjustment) + def test_load_depth(self): + path = os.path.join(self.dataset_root, self.entry.depth.path) + depth_map = _load_depth(path, self.entry.depth.scale_adjustment) self.assertEqual(depth_map.dtype, np.float32) self.assertEqual(len(depth_map.shape), 2) - def _load_16big_png_depth_test(self, entry): - path = os.path.join(self.dataset_root, entry.depth.path) + def test_load_16big_png_depth(self): + path = os.path.join(self.dataset_root, self.entry.depth.path) depth_map = _load_16big_png_depth(path) self.assertEqual(depth_map.dtype, np.float32) self.assertEqual(len(depth_map.shape), 2) - def _load_1bit_png_mask_test(self, entry): - mask_path = os.path.join(self.dataset_root, entry.depth.mask_path) + def test_load_1bit_png_mask(self): + mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path) mask = _load_1bit_png_mask(mask_path) self.assertEqual(mask.dtype, np.float32) self.assertEqual(len(mask.shape), 2) - def _load_depth_mask_test(self, entry): - mask_path = os.path.join(self.dataset_root, entry.depth.mask_path) + def test_load_depth_mask(self): + mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path) mask = _load_depth_mask(mask_path) self.assertEqual(mask.dtype, np.float32) self.assertEqual(len(mask.shape), 3) From fcd8d8b45947c40370a2f93254da9e9169357a38 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Fri, 10 Mar 2023 09:57:39 +0000 Subject: [PATCH 26/35] instead of loading whole dataset, loading only single frame annots --- tests/implicitron/test_blob_loader.py | 39 +++++++++++++-------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py index a03e91537..b878dc0d0 100644 --- a/tests/implicitron/test_blob_loader.py +++ b/tests/implicitron/test_blob_loader.py @@ -14,6 +14,7 @@ _load_mask, BlobLoader, ) +from pytorch3d.implicitron.dataset import types from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset from pytorch3d.implicitron.tools.config import expand_args_fields, get_default_args from pytorch3d.renderer.cameras import PerspectiveCameras @@ -40,20 +41,16 @@ def setUp(self): self.image_height = 768 self.image_width = 512 - expand_args_fields(JsonIndexDataset) - - self.dataset = JsonIndexDataset( - frame_annotations_file=frame_file, - sequence_annotations_file=sequence_file, - dataset_root=self.dataset_root, - image_height=self.image_height, - image_width=self.image_width, - box_crop=True, - load_point_clouds=True, - path_manager=self.path_manager, - ) - index = 7000 - self.entry = self.dataset.frame_annots[index]["frame_annotation"] + expand_args_fields(BlobLoader) + self.blob_loader = BlobLoader() + + # loading single frame annotation of dataset (see JsonIndexDataset._load_frames()) + local_file = self.path_manager.get_local_path(frame_file) + with gzip.open(local_file, "rt", encoding="utf8") as zipfile: + frame_annots_list = types.load_dataclass(zipfile, List[self.frame_annotations_type]) + + index = 0 + self.entry = FrameAnnotsEntry(frame_annotation=frame_annots_list[index], subset=None) def test_BlobLoader_args(self): # test that BlobLoader works with get_default_args @@ -62,9 +59,9 @@ def test_BlobLoader_args(self): def test_fix_point_cloud_path(self): """Some files in Co3Dv2 have an accidental absolute path stored.""" original_path = "some_file_path" - modified_path = self.dataset.blob_loader._fix_point_cloud_path(original_path) + modified_path = self.blob_loader._fix_point_cloud_path(original_path) assert original_path in modified_path - assert self.dataset.blob_loader.dataset_root in modified_path + assert self.blob_loader.dataset_root in modified_path def test_load(self): ( @@ -73,7 +70,7 @@ def test_load(self): bbox_xywh, clamp_bbox_xyxy, crop_bbox_xywh, - ) = self.dataset.blob_loader._load_crop_fg_probability(self.entry) + ) = self.blob_loader._load_crop_fg_probability(self.entry) assert mask_path assert torch.is_tensor(fg_probability) @@ -87,7 +84,7 @@ def test_load(self): self.assertEqual(crop_bbox_xywh.shape, torch.Size([4])) ( image_rgb, image_path, mask_crop, scale, - ) = self.dataset.blob_loader._load_crop_images(self.entry, fg_probability, clamp_bbox_xyxy) + ) = self.blob_loader._load_crop_images(self.entry, fg_probability, clamp_bbox_xyxy) assert torch.is_tensor(image_rgb) assert image_path assert torch.is_tensor(mask_crop) @@ -100,7 +97,7 @@ def test_load(self): depth_map, depth_path, depth_mask, - ) = self.dataset.blob_loader._load_mask_depth( + ) = self.blob_loader._load_mask_depth( self.entry, clamp_bbox_xyxy, fg_probability, @@ -112,7 +109,7 @@ def test_load(self): self.assertEqual(depth_map.shape, torch.Size([1, self.image_height, self.image_width])) self.assertEqual(depth_mask.shape, torch.Size([1, self.image_height, self.image_width])) - camera = self.dataset.blob_loader._get_pytorch3d_camera( + camera = self.blob_loader._get_pytorch3d_camera( self.entry, scale, clamp_bbox_xyxy, @@ -123,7 +120,7 @@ def test_resize_image(self): path = os.path.join(self.dataset_root, self.entry.image.path) local_path = self.path_manager.get_local_path(path) image = _load_image(local_path) - image_rgb, scale, mask_crop = self.dataset.blob_loader._resize_image(image) + image_rgb, scale, mask_crop = self.blob_loader._resize_image(image) original_shape = image.shape[-2:] expected_shape = ( From c3bd722507a8bca9eb9dfa0828ee3e32fe005bb0 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Fri, 10 Mar 2023 10:15:38 +0000 Subject: [PATCH 27/35] added default values to BlobLoader to ease initialisation --- pytorch3d/implicitron/dataset/blob_loader.py | 28 ++++++++++---------- tests/implicitron/test_blob_loader.py | 2 +- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py index 035e99a83..362d1c459 100644 --- a/pytorch3d/implicitron/dataset/blob_loader.py +++ b/pytorch3d/implicitron/dataset/blob_loader.py @@ -57,20 +57,20 @@ class BlobLoader: dimension of the cropping bounding box, relative to box size. """ - dataset_root: str - load_images: bool - load_depths: bool - load_depth_masks: bool - load_masks: bool - load_point_clouds: bool - max_points: int - mask_images: bool - mask_depths: bool - image_height: Optional[int] - image_width: Optional[int] - box_crop: bool - box_crop_mask_thr: float - box_crop_context: float + dataset_root: str = "" + load_images: bool = True + load_depths: bool = True + load_depth_masks: bool = True + load_masks: bool = True + load_point_clouds: bool = False + max_points: int = 0 + mask_images: bool = False + mask_depths: bool = False + image_height: Optional[int] = 800 + image_width: Optional[int] = 800 + box_crop: bool = True + box_crop_mask_thr: float = 0.4 + box_crop_context: float = 0.3 path_manager: Any = None def load( diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py index b878dc0d0..cdbb2d9c5 100644 --- a/tests/implicitron/test_blob_loader.py +++ b/tests/implicitron/test_blob_loader.py @@ -91,7 +91,7 @@ def test_load(self): assert scale # assert image and mask shapes self.assertEqual(image_rgb.shape, torch.Size([3, self.image_height, self.image_width])) - self.assertEqual(mask_crop.shape, torch.Size([1, self.image_height, self.image_width]) + self.assertEqual(mask_crop.shape, torch.Size([1, self.image_height, self.image_width])) ( depth_map, From cb34c0134465bc8e2b5e463e5f123afa224920bf Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Fri, 10 Mar 2023 10:59:18 +0000 Subject: [PATCH 28/35] mackink tests on single loaded frame --- tests/implicitron/test_blob_loader.py | 89 +++++++++++++++------------ 1 file changed, 50 insertions(+), 39 deletions(-) diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py index cdbb2d9c5..209830bbe 100644 --- a/tests/implicitron/test_blob_loader.py +++ b/tests/implicitron/test_blob_loader.py @@ -1,10 +1,13 @@ import contextlib +import gzip import os import unittest +from typing import List import numpy as np - import torch + +from pytorch3d.implicitron.dataset import types from pytorch3d.implicitron.dataset.blob_loader import ( _load_16big_png_depth, _load_1bit_png_mask, @@ -14,13 +17,10 @@ _load_mask, BlobLoader, ) -from pytorch3d.implicitron.dataset import types -from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset from pytorch3d.implicitron.tools.config import expand_args_fields, get_default_args from pytorch3d.renderer.cameras import PerspectiveCameras from tests.common_testing import TestCaseMixin - from tests.implicitron.common_resources import get_skateboard_data @@ -34,23 +34,24 @@ def setUp(self): get_skateboard_data() ) self.addCleanup(stack.close) - frame_file = os.path.join(self.dataset_root, category, "frame_annotations.jgz") - sequence_file = os.path.join( - self.dataset_root, category, "sequence_annotations.jgz" - ) self.image_height = 768 self.image_width = 512 - expand_args_fields(BlobLoader) - self.blob_loader = BlobLoader() + self.blob_loader = BlobLoader( + image_height=self.image_height, + image_width=self.image_width, + dataset_root=self.dataset_root, + path_manager=self.path_manager, + ) # loading single frame annotation of dataset (see JsonIndexDataset._load_frames()) + frame_file = os.path.join(self.dataset_root, category, "frame_annotations.jgz") local_file = self.path_manager.get_local_path(frame_file) with gzip.open(local_file, "rt", encoding="utf8") as zipfile: - frame_annots_list = types.load_dataclass(zipfile, List[self.frame_annotations_type]) - - index = 0 - self.entry = FrameAnnotsEntry(frame_annotation=frame_annots_list[index], subset=None) + frame_annots_list = types.load_dataclass( + zipfile, List[types.FrameAnnotation] + ) + self.frame_annotation = frame_annots_list[0] def test_BlobLoader_args(self): # test that BlobLoader works with get_default_args @@ -70,7 +71,7 @@ def test_load(self): bbox_xywh, clamp_bbox_xyxy, crop_bbox_xywh, - ) = self.blob_loader._load_crop_fg_probability(self.entry) + ) = self.blob_loader._load_crop_fg_probability(self.frame_annotation) assert mask_path assert torch.is_tensor(fg_probability) @@ -78,27 +79,29 @@ def test_load(self): assert torch.is_tensor(clamp_bbox_xyxy) assert torch.is_tensor(crop_bbox_xywh) # assert bboxes shape - self.assertEqual(fg_probability.shape, torch.Size([1, self.image_height, self.image_width])) + self.assertEqual( + fg_probability.shape, torch.Size([1, self.image_height, self.image_width]) + ) self.assertEqual(bbox_xywh.shape, torch.Size([4])) self.assertEqual(clamp_bbox_xyxy.shape, torch.Size([4])) self.assertEqual(crop_bbox_xywh.shape, torch.Size([4])) - ( - image_rgb, image_path, mask_crop, scale, - ) = self.blob_loader._load_crop_images(self.entry, fg_probability, clamp_bbox_xyxy) + (image_rgb, image_path, mask_crop, scale,) = self.blob_loader._load_crop_images( + self.frame_annotation, fg_probability, clamp_bbox_xyxy + ) assert torch.is_tensor(image_rgb) assert image_path assert torch.is_tensor(mask_crop) assert scale # assert image and mask shapes - self.assertEqual(image_rgb.shape, torch.Size([3, self.image_height, self.image_width])) - self.assertEqual(mask_crop.shape, torch.Size([1, self.image_height, self.image_width])) + self.assertEqual( + image_rgb.shape, torch.Size([3, self.image_height, self.image_width]) + ) + self.assertEqual( + mask_crop.shape, torch.Size([1, self.image_height, self.image_width]) + ) - ( - depth_map, - depth_path, - depth_mask, - ) = self.blob_loader._load_mask_depth( - self.entry, + (depth_map, depth_path, depth_mask,) = self.blob_loader._load_mask_depth( + self.frame_annotation, clamp_bbox_xyxy, fg_probability, ) @@ -106,18 +109,22 @@ def test_load(self): assert depth_path assert torch.is_tensor(depth_mask) # assert image and mask shapes - self.assertEqual(depth_map.shape, torch.Size([1, self.image_height, self.image_width])) - self.assertEqual(depth_mask.shape, torch.Size([1, self.image_height, self.image_width])) + self.assertEqual( + depth_map.shape, torch.Size([1, self.image_height, self.image_width]) + ) + self.assertEqual( + depth_mask.shape, torch.Size([1, self.image_height, self.image_width]) + ) camera = self.blob_loader._get_pytorch3d_camera( - self.entry, + self.frame_annotation, scale, clamp_bbox_xyxy, ) self.assertEqual(type(camera), PerspectiveCameras) def test_resize_image(self): - path = os.path.join(self.dataset_root, self.entry.image.path) + path = os.path.join(self.dataset_root, self.frame_annotation.image.path) local_path = self.path_manager.get_local_path(path) image = _load_image(local_path) image_rgb, scale, mask_crop = self.blob_loader._resize_image(image) @@ -136,7 +143,7 @@ def test_resize_image(self): self.assertEqual(mask_crop.shape[-2:], expected_shape) def test_load_image(self): - path = os.path.join(self.dataset_root, self.entry.image.path) + path = os.path.join(self.dataset_root, self.frame_annotation.image.path) local_path = self.path_manager.get_local_path(path) image = _load_image(local_path) self.assertEqual(image.dtype, np.float32) @@ -144,32 +151,36 @@ def test_load_image(self): assert np.min(image) >= 0.0 def test_load_mask(self): - path = os.path.join(self.dataset_root, self.entry.mask.path) + path = os.path.join(self.dataset_root, self.frame_annotation.mask.path) mask = _load_mask(path) self.assertEqual(mask.dtype, np.float32) assert np.max(mask) <= 1.0 assert np.min(mask) >= 0.0 def test_load_depth(self): - path = os.path.join(self.dataset_root, self.entry.depth.path) - depth_map = _load_depth(path, self.entry.depth.scale_adjustment) + path = os.path.join(self.dataset_root, self.frame_annotation.depth.path) + depth_map = _load_depth(path, self.frame_annotation.depth.scale_adjustment) self.assertEqual(depth_map.dtype, np.float32) - self.assertEqual(len(depth_map.shape), 2) + self.assertEqual(len(depth_map.shape), 3) def test_load_16big_png_depth(self): - path = os.path.join(self.dataset_root, self.entry.depth.path) + path = os.path.join(self.dataset_root, self.frame_annotation.depth.path) depth_map = _load_16big_png_depth(path) self.assertEqual(depth_map.dtype, np.float32) self.assertEqual(len(depth_map.shape), 2) def test_load_1bit_png_mask(self): - mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path) + mask_path = os.path.join( + self.dataset_root, self.frame_annotation.depth.mask_path + ) mask = _load_1bit_png_mask(mask_path) self.assertEqual(mask.dtype, np.float32) self.assertEqual(len(mask.shape), 2) def test_load_depth_mask(self): - mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path) + mask_path = os.path.join( + self.dataset_root, self.frame_annotation.depth.mask_path + ) mask = _load_depth_mask(mask_path) self.assertEqual(mask.dtype, np.float32) self.assertEqual(len(mask.shape), 3) From 04b7d1591cf38c9957c19dd2ba3da53aeb023715 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Fri, 10 Mar 2023 15:29:08 +0000 Subject: [PATCH 29/35] made _resize_image separate function (will ease use in pixar replay) --- pytorch3d/implicitron/dataset/blob_loader.py | 75 ++++++++++++-------- tests/implicitron/test_blob_loader.py | 5 +- 2 files changed, 50 insertions(+), 30 deletions(-) diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py index 362d1c459..bedbc070e 100644 --- a/pytorch3d/implicitron/dataset/blob_loader.py +++ b/pytorch3d/implicitron/dataset/blob_loader.py @@ -165,7 +165,12 @@ def _load_crop_fg_probability( mask = _crop_around_box(mask, clamp_bbox_xyxy, full_path) - fg_probability, _, _ = self._resize_image(mask, mode="nearest") + fg_probability, _, _ = _resize_image( + mask, + image_height=self.image_height, + image_width=self.image_width, + mode="nearest", + ) return fg_probability, full_path, bbox_xywh, clamp_bbox_xyxy, crop_box_xywh @@ -188,7 +193,9 @@ def _load_crop_images( assert clamp_bbox_xyxy is not None image_rgb = _crop_around_box(image_rgb, clamp_bbox_xyxy, path) - image_rgb, scale, mask_crop = self._resize_image(image_rgb) + image_rgb, scale, mask_crop = _resize_image( + image_rgb, image_height=self.image_height, image_width=self.image_width + ) if self.mask_images: assert fg_probability is not None @@ -214,7 +221,12 @@ def _load_mask_depth( ) depth_map = _crop_around_box(depth_map, depth_bbox_xyxy, path) - depth_map, _, _ = self._resize_image(depth_map, mode="nearest") + depth_map, _, _ = _resize_image( + depth_map, + image_height=self.image_height, + image_width=self.image_width, + mode="nearest", + ) if self.mask_depths: assert fg_probability is not None @@ -234,7 +246,12 @@ def _load_mask_depth( depth_mask, depth_mask_bbox_xyxy, mask_path ) - depth_mask, _, _ = self._resize_image(depth_mask, mode="nearest") + depth_mask, _, _ = _resize_image( + depth_mask, + image_height=self.image_height, + image_width=self.image_width, + mode="nearest", + ) else: depth_mask = torch.ones_like(depth_map) @@ -314,31 +331,31 @@ def _local_path(self, path: str) -> str: return path return self.path_manager.get_local_path(path) - def _resize_image( - self, image, mode="bilinear" - ) -> Tuple[torch.Tensor, float, torch.Tensor]: - image_height, image_width = self.image_height, self.image_width - if image_height is None or image_width is None: - # skip the resizing - imre_ = torch.from_numpy(image) - return imre_, 1.0, torch.ones_like(imre_[:1]) - # takes numpy array, returns pytorch tensor - minscale = min( - image_height / image.shape[-2], - image_width / image.shape[-1], - ) - imre = torch.nn.functional.interpolate( - torch.from_numpy(image)[None], - scale_factor=minscale, - mode=mode, - align_corners=False if mode == "bilinear" else None, - recompute_scale_factor=True, - )[0] - imre_ = torch.zeros(image.shape[0], image_height, image_width) - imre_[:, 0 : imre.shape[1], 0 : imre.shape[2]] = imre - mask = torch.zeros(1, image_height, image_width) - mask[:, 0 : imre.shape[1], 0 : imre.shape[2]] = 1.0 - return imre_, minscale, mask + +def _resize_image( + self, image, image_height, image_width, mode="bilinear" +) -> Tuple[torch.Tensor, float, torch.Tensor]: + if image_height is None or image_width is None: + # skip the resizing + imre_ = torch.from_numpy(image) + return imre_, 1.0, torch.ones_like(imre_[:1]) + # takes numpy array, returns pytorch tensor + minscale = min( + image_height / image.shape[-2], + image_width / image.shape[-1], + ) + imre = torch.nn.functional.interpolate( + torch.from_numpy(image)[None], + scale_factor=minscale, + mode=mode, + align_corners=False if mode == "bilinear" else None, + recompute_scale_factor=True, + )[0] + imre_ = torch.zeros(image.shape[0], image_height, image_width) + imre_[:, 0 : imre.shape[1], 0 : imre.shape[2]] = imre + mask = torch.zeros(1, image_height, image_width) + mask[:, 0 : imre.shape[1], 0 : imre.shape[2]] = 1.0 + return imre_, minscale, mask def _load_image(path) -> np.ndarray: diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py index 209830bbe..5d432ba69 100644 --- a/tests/implicitron/test_blob_loader.py +++ b/tests/implicitron/test_blob_loader.py @@ -15,6 +15,7 @@ _load_depth_mask, _load_image, _load_mask, + _resize_image, BlobLoader, ) from pytorch3d.implicitron.tools.config import expand_args_fields, get_default_args @@ -127,7 +128,9 @@ def test_resize_image(self): path = os.path.join(self.dataset_root, self.frame_annotation.image.path) local_path = self.path_manager.get_local_path(path) image = _load_image(local_path) - image_rgb, scale, mask_crop = self.blob_loader._resize_image(image) + image_rgb, scale, mask_crop = _resize_image( + image, image_height=self.image_height, image_width=self.image_width + ) original_shape = image.shape[-2:] expected_shape = ( From 76f45aa27dc5c99c576e9242b480267ceeb55db6 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Fri, 10 Mar 2023 15:31:22 +0000 Subject: [PATCH 30/35] type in function arguments --- pytorch3d/implicitron/dataset/blob_loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py index bedbc070e..ce15f116a 100644 --- a/pytorch3d/implicitron/dataset/blob_loader.py +++ b/pytorch3d/implicitron/dataset/blob_loader.py @@ -333,7 +333,7 @@ def _local_path(self, path: str) -> str: def _resize_image( - self, image, image_height, image_width, mode="bilinear" + image, image_height, image_width, mode="bilinear" ) -> Tuple[torch.Tensor, float, torch.Tensor]: if image_height is None or image_width is None: # skip the resizing From e5d3a2b08c3e0c1ab2496ef55d9e3a012428ea7f Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Fri, 10 Mar 2023 15:44:12 +0000 Subject: [PATCH 31/35] moved tests for _resize_image to test_bbox --- tests/implicitron/test_bbox.py | 18 ++++++++++++++++++ tests/implicitron/test_blob_loader.py | 24 +----------------------- 2 files changed, 19 insertions(+), 23 deletions(-) diff --git a/tests/implicitron/test_bbox.py b/tests/implicitron/test_bbox.py index 8dffd751d..4f518dfee 100644 --- a/tests/implicitron/test_bbox.py +++ b/tests/implicitron/test_bbox.py @@ -18,6 +18,7 @@ _get_bbox_from_mask, _get_clamp_bbox, _rescale_bbox, + _resize_image, ) from tests.common_testing import TestCaseMixin @@ -121,3 +122,20 @@ def test_get_1d_bounds(self): bounds = _get_1d_bounds(array) # make nonzero 1d bounds of image self.assertClose(bounds, [1, 3]) + + def test_resize_image(self): + image = torch.rand(3, 300, 500) # rgb image 300x500 + expected_shape = (150, 250) + + resized_image, scale, mask_crop = _resize_image( + image, image_height=expected_shape[0], image_width=expected_shape[1] + ) + + original_shape = image.shape[-2:] + expected_scale = min( + expected_shape[0] / original_shape[0], expected_shape[1] / original_shape[1] + ) + + self.assertEqual(scale, expected_scale) + self.assertEqual(resized_image.shape[-2:], expected_shape) + self.assertEqual(mask_crop.shape[-2:], expected_shape) diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py index 5d432ba69..5634854e9 100644 --- a/tests/implicitron/test_blob_loader.py +++ b/tests/implicitron/test_blob_loader.py @@ -15,10 +15,9 @@ _load_depth_mask, _load_image, _load_mask, - _resize_image, BlobLoader, ) -from pytorch3d.implicitron.tools.config import expand_args_fields, get_default_args +from pytorch3d.implicitron.tools.config import get_default_args from pytorch3d.renderer.cameras import PerspectiveCameras from tests.common_testing import TestCaseMixin @@ -124,27 +123,6 @@ def test_load(self): ) self.assertEqual(type(camera), PerspectiveCameras) - def test_resize_image(self): - path = os.path.join(self.dataset_root, self.frame_annotation.image.path) - local_path = self.path_manager.get_local_path(path) - image = _load_image(local_path) - image_rgb, scale, mask_crop = _resize_image( - image, image_height=self.image_height, image_width=self.image_width - ) - - original_shape = image.shape[-2:] - expected_shape = ( - self.image_height, - self.image_width, - ) - expected_scale = min( - expected_shape[0] / original_shape[0], expected_shape[1] / original_shape[1] - ) - - self.assertEqual(scale, expected_scale) - self.assertEqual(image_rgb.shape[-2:], expected_shape) - self.assertEqual(mask_crop.shape[-2:], expected_shape) - def test_load_image(self): path = os.path.join(self.dataset_root, self.frame_annotation.image.path) local_path = self.path_manager.get_local_path(path) From 1ba1a3a6896ee947a7791df704c39ca436eff377 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Fri, 10 Mar 2023 15:46:17 +0000 Subject: [PATCH 32/35] np array instead of tensor to resize_image --- tests/implicitron/test_bbox.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/implicitron/test_bbox.py b/tests/implicitron/test_bbox.py index 4f518dfee..48a8421bb 100644 --- a/tests/implicitron/test_bbox.py +++ b/tests/implicitron/test_bbox.py @@ -124,7 +124,7 @@ def test_get_1d_bounds(self): self.assertClose(bounds, [1, 3]) def test_resize_image(self): - image = torch.rand(3, 300, 500) # rgb image 300x500 + image = np.random.rand(3, 300, 500) # rgb image 300x500 expected_shape = (150, 250) resized_image, scale, mask_crop = _resize_image( From cd9aa5ccd0bb3899cee14058277d99b7fc2daffa Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Mon, 13 Mar 2023 10:59:25 +0000 Subject: [PATCH 33/35] setting up default scale value to correct one --- pytorch3d/implicitron/dataset/blob_loader.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py index ce15f116a..2d77e6c08 100644 --- a/pytorch3d/implicitron/dataset/blob_loader.py +++ b/pytorch3d/implicitron/dataset/blob_loader.py @@ -90,7 +90,10 @@ def load( frame_data.crop_bbox_xywh, ) = self._load_crop_fg_probability(entry) - scale = 1.0 + scale = min( + self.image_height / entry.image.size[0], + self.image_width / entry.image.size[1], + ) if self.load_images and entry.image is not None: # original image size frame_data.image_size_hw = _safe_as_tensor(entry.image.size, torch.long) From ce9fd400d1ca3a2b7b767cda31ddc530d68eee1a Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Tue, 14 Mar 2023 10:48:26 +0000 Subject: [PATCH 34/35] renamed funciton to load_ to make more obvious inplace modification --- pytorch3d/implicitron/dataset/blob_loader.py | 3 +-- pytorch3d/implicitron/dataset/json_index_dataset.py | 3 ++- tests/implicitron/test_blob_loader.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py index 2d77e6c08..6d0dc7fa4 100644 --- a/pytorch3d/implicitron/dataset/blob_loader.py +++ b/pytorch3d/implicitron/dataset/blob_loader.py @@ -73,7 +73,7 @@ class BlobLoader: box_crop_context: float = 0.3 path_manager: Any = None - def load( + def load_( self, frame_data: FrameData, entry: types.FrameAnnotation, @@ -127,7 +127,6 @@ def load( self._local_path(pcl_path), max_points=self.max_points ) frame_data.sequence_point_cloud_path = pcl_path - return frame_data def _load_crop_fg_probability( self, entry: types.FrameAnnotation diff --git a/pytorch3d/implicitron/dataset/json_index_dataset.py b/pytorch3d/implicitron/dataset/json_index_dataset.py index cf63b9b43..636630680 100644 --- a/pytorch3d/implicitron/dataset/json_index_dataset.py +++ b/pytorch3d/implicitron/dataset/json_index_dataset.py @@ -443,9 +443,10 @@ def __getitem__(self, index) -> FrameData: # Optional field frame_data.frame_type = self._get_frame_type(self.frame_annots[index]) - return self.blob_loader.load( + self.blob_loader.load_( frame_data, entry, self.seq_annots[entry.sequence_name] ) + return frame_data def _load_frames(self) -> None: logger.info(f"Loading Co3D frames from {self.frame_annotations_file}.") diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py index 5634854e9..fd8d8fd81 100644 --- a/tests/implicitron/test_blob_loader.py +++ b/tests/implicitron/test_blob_loader.py @@ -64,7 +64,7 @@ def test_fix_point_cloud_path(self): assert original_path in modified_path assert self.blob_loader.dataset_root in modified_path - def test_load(self): + def test_load_(self): ( fg_probability, mask_path, From 46d39ed325dd8d56989c7723347d366431b35d48 Mon Sep 17 00:00:00 2001 From: Ildar SALAKHIEV Date: Tue, 14 Mar 2023 16:22:15 +0000 Subject: [PATCH 35/35] Update pytorch3d/implicitron/dataset/blob_loader.py Co-authored-by: Jeremy Reizenstein <669761+bottler@users.noreply.github.com> --- pytorch3d/implicitron/dataset/blob_loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py index 6d0dc7fa4..9d7ffb35d 100644 --- a/pytorch3d/implicitron/dataset/blob_loader.py +++ b/pytorch3d/implicitron/dataset/blob_loader.py @@ -26,7 +26,7 @@ class BlobLoader: """ A loader for correctly (according to setup) loading blobs for FrameData. - Beware that modification done in place + This is used in the implementation of some dataset objects. Args: dataset_root: The root folder of the dataset; all the paths in jsons are