diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ffbe496 --- /dev/null +++ b/.gitignore @@ -0,0 +1,132 @@ +# custom +results* +pretrained_model/ + +### https://raw.github.com/github/gitignore/50e42aa1064d004a5c99eaa72a2d8054a0d8de55/Python.gitignore + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + + diff --git a/README.md b/README.md new file mode 100644 index 0000000..2cf6501 --- /dev/null +++ b/README.md @@ -0,0 +1,141 @@ +# Monocular Depth Estimation for [NYU2](https://cs.nyu.edu/~silberman/datasets/nyu_depth_v2.html) + +Pytorch re-implementation of the below paper. + +- Python 3.6.8 +- PyTorch 1.6.0 + +Please see [requirements.txt](./docker/requirements.txt) for the other libraries' versions. + + +## Paper + +**Revisiting Single Image Depth Estimation: Toward Higher Resolution Maps with Accurate Object Boundaries** + +*Junjie Hu, Mete Ozay, Yan Zhang, Takayuki Okatani* + +WACV2019 + +[[arXiv]](https://arxiv.org/abs/1803.08673) [[Original repo]](https://github.com/JunjH/Revisiting_Single_Depth_Estimation) + +![](./figs/revisiting_paper_fig2.png) +![](./figs/revisiting_paper_fig5.png) + +Results in the paper: + +![](./figs/revisiting_paper_metrics.png) + +*The figures are from the paper. + +## Results in this repository + +We use ResNet-50 for the main results. Please see [the config file](./configs/default.yaml) for the other parameters. + +### Training loss + +![](./figs/revisiting_plot_loss.png) + +### Quantitative results + +MAE | MSE | RMSE | ABS_REL | LG10 | DELTA1 | DELTA2 | DELTA3 +-- | -- | -- | -- | -- | -- | -- | -- +0.3388 | 0.3150 | 0.5613 | 0.1283 | 0.0548 | 0.8407 | 0.9673 | 0.9907 + +![](./figs/revisiting_plot_metrics1.png) +![](./figs/revisiting_plot_metrics2.png) + +### Qualitative results + +![](./figs/revisiting_qualitative_main.jpeg) + +### Others + +The results about other metrics and the ablation study results are [here](./docs/RESULTS.md). + +## Preparation + +### Dataset: NYU v2 + +```bash +sh scripts/prepare_nyu2.sh +``` + +[This script](./scripts/download_nyu2.sh) uses the downloading link in [J. Hue's repository](https://github.com/JunjH/Revisiting_Single_Depth_Estimation). + + +### Installation + +```bash +docker-compose build +docker-compose run dev +``` + +- docker-compose 1.28.3 +- Docker 20.10.2 + +ref. [[Enabling GPU access with Compose]](https://docs.docker.com/compose/gpu-support/) + + +### Installation w/ nvidia-docker + +```bash +nvidia-docker build -t {IMAGE_NAME} ./docker +nvidia-docker run -it -v `pwd`:/work -v $HOME/data/nyu2/data:/work/data --name {CONTAINER_NAME} {IMAGE_NAME} +``` + +Please mount your working directory on `/work`, and the dataset path on `/work/data`. + +### Installation w/o Docker + +```bash +pip install -r ./docker/requirements.txt +``` + +Note that the libraries will be installed into your environment. + +Please place the dataset in `./data` in your working directory, or change the dataset path in your config file. + + +## Run + +### Train +```bash +python tools/train.py +``` + +Option | Description +--- | --- +`--config [config path]` | Optional config file path. The `configs/default.yaml` is loaded by default. The specified file overwrites the default configs. +`--out-dir [outdir path]` | Output directory path. (default: `results`) +`--resume [ckpt path]` | Resuming checkpoint file path. + +If you want to override the config with command line args, put them at the end in the form of dotlist. + +```bash +python tools/train.py --config [config path] SOLVER.NUM_WORKERS=8 SOLVER.EPOCH=5 +``` + +### Test +```bash +python tools/test.py [ckpt path] +``` + +Option | Description +--- | --- +`--config [config path]` | The optional config file path.used when training. +`--show-dir [outdir path]` | Path to save predict visualization. Please specify if you want to save. + +### Other tools +Please see [tools/README.md](./tools/README.md). + + +## Credit + +``` +@inproceedings{Hu2019RevisitingSI, + title={Revisiting Single Image Depth Estimation: Toward Higher Resolution Maps With Accurate Object Boundaries}, + author={Junjie Hu and Mete Ozay and Yan Zhang and Takayuki Okatani}, + journal={2019 IEEE Winter Conference on Applications of Computer Vision (WACV)}, + year={2019} +} +``` diff --git a/configs/default.yaml b/configs/default.yaml new file mode 100644 index 0000000..2b4ed79 --- /dev/null +++ b/configs/default.yaml @@ -0,0 +1,41 @@ +DATASET: + TRAIN_CSV: './data/nyu2_train.csv' + TEST_CSV: './data/nyu2_test.csv' +MODEL: + # model type: ['resnet', 'densenet', 'senet'] + NAME: 'resnet' +SOLVER: + BASE_LR: 0.0001 + BATCHSIZE: 8 + NUM_WORKERS: 4 + MOMENTUM: 0.9 + WEIGHT_DECAY: 0.0001 + LR_STEP_SIZE: 5 + LR_GAMMA: 0.1 + EPOCH: 20 + SAVE_INTERVAL: 1 +DATA: + NORMALIZE_MEAN: [0.485, 0.456, 0.406] + NORMALIZE_STD: [0.229, 0.224, 0.225] + PCA_LIGHTING: 0.1 + PCA_EIGVAL: [0.2175, 0.0188, 0.0045] + PCA_EIGVEC: [[-0.5675, 0.7192, 0.4009], + [-0.5808, -0.0045, -0.8140], + [-0.5836, -0.6948, 0.4203]] + SCALE_SIZE_MIN: 240 + RANDOM_ROT_DEGREE: 5 + CENTER_CROP_SIZE: [304, 228] + OUTPUT_SIZE: [152, 114] + RANDOM_BRIGHTNESS: 0.4 + RANDOM_CONTRAST: 0.4 + RANDOM_SATURATION: 0.4 +LOSS: + ALPHA: 0.5 + LAMBDA: 1 + MU: 1 +SEED: 1 +TEST: + BATCHSIZE: 1 + THRESHOLD_EDGE: 0.25 +DEVICE: 'cuda' +OUTPUT_DIR: 'results' diff --git a/configs/exps/densenet.yaml b/configs/exps/densenet.yaml new file mode 100644 index 0000000..64d205e --- /dev/null +++ b/configs/exps/densenet.yaml @@ -0,0 +1,3 @@ +MODEL: + NAME: 'densenet' +OUTPUT_DIR: 'results_densenet' diff --git a/configs/exps/loss_d_g.yaml b/configs/exps/loss_d_g.yaml new file mode 100644 index 0000000..ddbc814 --- /dev/null +++ b/configs/exps/loss_d_g.yaml @@ -0,0 +1,4 @@ +LOSS: + LAMBDA: 1 + MU: 0 +OUTPUT_DIR: 'results_loss_d_g' diff --git a/configs/exps/loss_d_n.yaml b/configs/exps/loss_d_n.yaml new file mode 100644 index 0000000..a83c2ab --- /dev/null +++ b/configs/exps/loss_d_n.yaml @@ -0,0 +1,4 @@ +LOSS: + LAMBDA: 0 + MU: 1 +OUTPUT_DIR: 'results_loss_d_n' diff --git a/configs/exps/loss_d_only.yaml b/configs/exps/loss_d_only.yaml new file mode 100644 index 0000000..c670f70 --- /dev/null +++ b/configs/exps/loss_d_only.yaml @@ -0,0 +1,4 @@ +LOSS: + LAMBDA: 0 + MU: 0 +OUTPUT_DIR: 'results_loss_d_only' diff --git a/configs/exps/senet.yaml b/configs/exps/senet.yaml new file mode 100644 index 0000000..a681ecd --- /dev/null +++ b/configs/exps/senet.yaml @@ -0,0 +1,3 @@ +MODEL: + NAME: 'densenet' +OUTPUT_DIR: 'results_senet' diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..c946195 --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,9 @@ +version: "2.3" +services: + dev: + runtime: nvidia + build: + context: ./docker + volumes: + - .:/work + - $HOME/data/nyu2/data:/work/data diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..cdaa8af --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,24 @@ +FROM nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04 + +RUN apt-get update && apt-get install -y --no-install-recommends \ + sudo \ + git \ + zip \ + libopencv-dev \ + build-essential libssl-dev libbz2-dev libreadline-dev libsqlite3-dev curl \ + wget && \ + rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* + +ENV PYENV_ROOT /home/root/.pyenv +ENV PATH $PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH +RUN curl -L https://raw.githubusercontent.com/yyuu/pyenv-installer/master/bin/pyenv-installer | bash + +ENV PYTHON_VERSION 3.6.8 +RUN pyenv install ${PYTHON_VERSION} && pyenv global ${PYTHON_VERSION} + +COPY requirements.txt /tmp/requirements.txt +RUN pip install -r /tmp/requirements.txt + +ENV PYTHONPATH $PYTHONPATH:/work + +WORKDIR /work diff --git a/docker/requirements.txt b/docker/requirements.txt new file mode 100644 index 0000000..e75f63c --- /dev/null +++ b/docker/requirements.txt @@ -0,0 +1,9 @@ +ipython==7.16.1 +matplotlib==3.3.4 +omegaconf==2.0.6 +pandas==1.1.5 +scipy==1.5.4 +tensorboardX==1.4 +torch==1.6.0 +torchvision==0.7.0 +tqdm==4.59.0 diff --git a/figs/revisiting_paper_fig2.png b/figs/revisiting_paper_fig2.png new file mode 100644 index 0000000..ac5c04e Binary files /dev/null and b/figs/revisiting_paper_fig2.png differ diff --git a/figs/revisiting_paper_fig5.png b/figs/revisiting_paper_fig5.png new file mode 100644 index 0000000..ece7189 Binary files /dev/null and b/figs/revisiting_paper_fig5.png differ diff --git a/figs/revisiting_paper_metrics.png b/figs/revisiting_paper_metrics.png new file mode 100644 index 0000000..0c3ffa9 Binary files /dev/null and b/figs/revisiting_paper_metrics.png differ diff --git a/figs/revisiting_plot_loss.png b/figs/revisiting_plot_loss.png new file mode 100644 index 0000000..3b35aae Binary files /dev/null and b/figs/revisiting_plot_loss.png differ diff --git a/figs/revisiting_plot_loss_all.png b/figs/revisiting_plot_loss_all.png new file mode 100644 index 0000000..a59104b Binary files /dev/null and b/figs/revisiting_plot_loss_all.png differ diff --git a/figs/revisiting_plot_metrics1.png b/figs/revisiting_plot_metrics1.png new file mode 100644 index 0000000..33d394b Binary files /dev/null and b/figs/revisiting_plot_metrics1.png differ diff --git a/figs/revisiting_plot_metrics2.png b/figs/revisiting_plot_metrics2.png new file mode 100644 index 0000000..ecc7aa6 Binary files /dev/null and b/figs/revisiting_plot_metrics2.png differ diff --git a/figs/revisiting_plot_metrics_all.png b/figs/revisiting_plot_metrics_all.png new file mode 100644 index 0000000..2c96bb4 Binary files /dev/null and b/figs/revisiting_plot_metrics_all.png differ diff --git a/figs/revisiting_qualitative_all.jpeg b/figs/revisiting_qualitative_all.jpeg new file mode 100644 index 0000000..150c79e Binary files /dev/null and b/figs/revisiting_qualitative_all.jpeg differ diff --git a/figs/revisiting_qualitative_main.jpeg b/figs/revisiting_qualitative_main.jpeg new file mode 100644 index 0000000..d68a5f6 Binary files /dev/null and b/figs/revisiting_qualitative_main.jpeg differ diff --git a/scripts/prepare_nyu2.sh b/scripts/prepare_nyu2.sh new file mode 100755 index 0000000..35fa1e3 --- /dev/null +++ b/scripts/prepare_nyu2.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +DATA_ROOT=$HOME/data +DATA_DIR=${DATA_ROOT}/nyu2 + +if [ -d ${DATA_DIR} ];then + echo "${DATA_DIR} already exists. Try again after removing it." + echo "Aborted." + exit 1 +fi + +mkdir -p ${DATA_DIR} +cd ${DATA_DIR} + +wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=\ +$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate\ + 'https://docs.google.com/uc?export=download&id=1WoOZOBpOWfmwe7bknWS5PMUCLBPFKTOw'\ + -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')\ +&id=1WoOZOBpOWfmwe7bknWS5PMUCLBPFKTOw" -O nyu2.zip +rm -f /tmp/cookies.txt + +unzip nyu2.zip + +rm -f nyu2.zip diff --git a/src/data/__init__.py b/src/data/__init__.py new file mode 100644 index 0000000..4607aef --- /dev/null +++ b/src/data/__init__.py @@ -0,0 +1 @@ +from .dataset import build_data_loader, get_test_loader diff --git a/src/data/dataset.py b/src/data/dataset.py new file mode 100644 index 0000000..2f0e6c1 --- /dev/null +++ b/src/data/dataset.py @@ -0,0 +1,97 @@ +from typing import Tuple + +import pandas as pd +from omegaconf import DictConfig +from torch.utils.data import Dataset, DataLoader +from torchvision import transforms + +from .transforms import * + + +class Nyu2Dataset(Dataset): + + def __init__(self, csv_file: str, transform=None): + self.paths = pd.read_csv(csv_file, header=None, + names=['image', 'depth']) + self.transform = transform + + def __getitem__(self, idx: int) -> dict: + + image = Image.open(self.paths['image'][idx]) + depth = Image.open(self.paths['depth'][idx]) + sample = {'image': image, 'depth': depth} + + if self.transform: + sample = self.transform(sample) + + return sample + + def __len__(self): + return len(self.paths) + + +def get_train_loader(config: DictConfig) -> DataLoader: + train_transform = transforms.Compose( + [ + Scale(config.DATA.SCALE_SIZE_MIN), + RandomHorizontalFlip(), + RandomRotate(config.DATA.RANDOM_ROT_DEGREE), + CenterCrop( + config.DATA.CENTER_CROP_SIZE, + config.DATA.OUTPUT_SIZE), + ToTensor(), + Lighting(config.DATA.PCA_LIGHTING, + torch.Tensor(config.DATA.PCA_EIGVAL), + torch.Tensor(config.DATA.PCA_EIGVEC)), + ColorJitter( + brightness=config.DATA.RANDOM_BRIGHTNESS, + contrast=config.DATA.RANDOM_CONTRAST, + saturation=config.DATA.RANDOM_SATURATION, + ), + Normalize(config.DATA.NORMALIZE_MEAN, + config.DATA.NORMALIZE_STD) + + ] + ) + train_dataset = Nyu2Dataset( + csv_file=config.DATASET.TRAIN_CSV, + transform=train_transform) + train_loader = DataLoader( + train_dataset, + config.SOLVER.BATCHSIZE, + shuffle=True, + num_workers=config.SOLVER.NUM_WORKERS, + pin_memory=False) + + return train_loader + + +def get_test_loader(config: DictConfig) -> DataLoader: + test_transform = transforms.Compose( + [ + Scale(config.DATA.SCALE_SIZE_MIN), + CenterCrop( + config.DATA.CENTER_CROP_SIZE, + config.DATA.CENTER_CROP_SIZE), + ToTensor(is_test=True), + Normalize(config.DATA.NORMALIZE_MEAN, + config.DATA.NORMALIZE_STD) + ] + ) + test_dataset = Nyu2Dataset( + csv_file=config.DATASET.TEST_CSV, + transform=test_transform) + test_loader = DataLoader( + test_dataset, + config.TEST.BATCHSIZE, + shuffle=False, + num_workers=config.SOLVER.NUM_WORKERS, + pin_memory=False) + + return test_loader + + +def build_data_loader(config: DictConfig) -> Tuple[DataLoader, DataLoader]: + train_loader = get_train_loader(config) + test_loader = get_test_loader(config) + return train_loader, test_loader diff --git a/src/data/transforms.py b/src/data/transforms.py new file mode 100644 index 0000000..179cd5f --- /dev/null +++ b/src/data/transforms.py @@ -0,0 +1,354 @@ +# copied from https://github.com/JunjH/Revisiting_Single_Depth_Estimation/blob/master/nyu_transform.py + +import torch +import numpy as np +from PIL import Image, ImageOps +import collections + +try: + import accimage +except ImportError: + accimage = None +import random +import scipy.ndimage as ndimage + +import pdb + + +def _is_pil_image(img): + if accimage is not None: + return isinstance(img, (Image.Image, accimage.Image)) + else: + return isinstance(img, Image.Image) + + +def _is_numpy_image(img): + return isinstance(img, np.ndarray) and (img.ndim in {2, 3}) + + +class RandomRotate(object): + """Random rotation of the image from -angle to angle (in degrees) + This is useful for dataAugmentation, especially for geometric problems such as FlowEstimation + angle: max angle of the rotation + interpolation order: Default: 2 (bilinear) + reshape: Default: false. If set to true, image size will be set to keep every pixel in the image. + diff_angle: Default: 0. Must stay less than 10 degrees, or linear approximation of flowmap will be off. + """ + + def __init__(self, angle, diff_angle=0, order=2, reshape=False): + self.angle = angle + self.reshape = reshape + self.order = order + + def __call__(self, sample): + image, depth = sample['image'], sample['depth'] + + applied_angle = random.uniform(-self.angle, self.angle) + angle1 = applied_angle + angle1_rad = angle1 * np.pi / 180 + + image = ndimage.interpolation.rotate( + image, angle1, reshape=self.reshape, order=self.order) + depth = ndimage.interpolation.rotate( + depth, angle1, reshape=self.reshape, order=self.order) + + image = Image.fromarray(image) + depth = Image.fromarray(depth) + + return {'image': image, 'depth': depth} + + +class RandomHorizontalFlip(object): + + def __call__(self, sample): + image, depth = sample['image'], sample['depth'] + + if not _is_pil_image(image): + raise TypeError( + 'img should be PIL Image. Got {}'.format(type(image))) + if not _is_pil_image(depth): + raise TypeError( + 'img should be PIL Image. Got {}'.format(type(depth))) + + if random.random() < 0.5: + image = image.transpose(Image.FLIP_LEFT_RIGHT) + depth = depth.transpose(Image.FLIP_LEFT_RIGHT) + + return {'image': image, 'depth': depth} + + +class Scale(object): + """ Rescales the inputs and target arrays to the given 'size'. + 'size' will be the size of the smaller edge. + For example, if height > width, then image will be + rescaled to (size * height / width, size) + size: size of the smaller edge + interpolation order: Default: 2 (bilinear) + """ + + def __init__(self, size): + self.size = size + + def __call__(self, sample): + image, depth = sample['image'], sample['depth'] + + image = self.changeScale(image, self.size) + depth = self.changeScale(depth, self.size, Image.NEAREST) + + return {'image': image, 'depth': depth} + + def changeScale(self, img, size, interpolation=Image.BILINEAR): + + if not _is_pil_image(img): + raise TypeError( + 'img should be PIL Image. Got {}'.format(type(img))) + if not (isinstance(size, int) or (isinstance(size, collections.Iterable) and len(size) == 2)): + raise TypeError('Got inappropriate size arg: {}'.format(size)) + + if isinstance(size, int): + w, h = img.size + if (w <= h and w == size) or (h <= w and h == size): + return img + if w < h: + ow = size + oh = int(size * h / w) + return img.resize((ow, oh), interpolation) + else: + oh = size + ow = int(size * w / h) + return img.resize((ow, oh), interpolation) + else: + return img.resize(size[::-1], interpolation) + + +class CenterCrop(object): + def __init__(self, size_image, size_depth): + self.size_image = size_image + self.size_depth = size_depth + + def __call__(self, sample): + image, depth = sample['image'], sample['depth'] + + image = self.centerCrop(image, self.size_image) + depth = self.centerCrop(depth, self.size_image) + + ow, oh = self.size_depth + depth = depth.resize((ow, oh)) + + return {'image': image, 'depth': depth} + + def centerCrop(self, image, size): + w1, h1 = image.size + + tw, th = size + + if w1 == tw and h1 == th: + return image + + x1 = int(round((w1 - tw) / 2.)) + y1 = int(round((h1 - th) / 2.)) + + image = image.crop((x1, y1, tw + x1, th + y1)) + + return image + + +class ToTensor(object): + """Convert a ``PIL.Image`` or ``numpy.ndarray`` to tensor. + Converts a PIL.Image or numpy.ndarray (H x W x C) in the range + [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0]. + """ + + def __init__(self, is_test=False): + self.is_test = is_test + + def __call__(self, sample): + image, depth = sample['image'], sample['depth'] + """ + Args: + pic (PIL.Image or numpy.ndarray): Image to be converted to tensor. + Returns: + Tensor: Converted image. + """ + # ground truth depth of training samples is stored in 8-bit while test samples are saved in 16 bit + image = self.to_tensor(image) + if self.is_test: + depth = self.to_tensor(depth).float() / 1000 + else: + depth = self.to_tensor(depth).float() * 10 + return {'image': image, 'depth': depth} + + def to_tensor(self, pic): + if not (_is_pil_image(pic) or _is_numpy_image(pic)): + raise TypeError( + 'pic should be PIL Image or ndarray. Got {}'.format(type(pic))) + + if isinstance(pic, np.ndarray): + img = torch.from_numpy(pic.transpose((2, 0, 1))) + + return img.float().div(255) + + if accimage is not None and isinstance(pic, accimage.Image): + nppic = np.zeros( + [pic.channels, pic.height, pic.width], dtype=np.float32) + pic.copyto(nppic) + return torch.from_numpy(nppic) + + # handle PIL Image + if pic.mode == 'I': + img = torch.from_numpy(np.array(pic, np.int32, copy=False)) + elif pic.mode == 'I;16': + img = torch.from_numpy(np.array(pic, np.int16, copy=False)) + else: + img = torch.ByteTensor( + torch.ByteStorage.from_buffer(pic.tobytes())) + # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK + if pic.mode == 'YCbCr': + nchannel = 3 + elif pic.mode == 'I;16': + nchannel = 1 + else: + nchannel = len(pic.mode) + img = img.view(pic.size[1], pic.size[0], nchannel) + # put it from HWC to CHW format + # yikes, this transpose takes 80% of the loading time/CPU + img = img.transpose(0, 1).transpose(0, 2).contiguous() + if isinstance(img, torch.ByteTensor): + return img.float().div(255) + else: + return img + + +class Lighting(object): + + def __init__(self, alphastd, eigval, eigvec): + self.alphastd = alphastd + self.eigval = eigval + self.eigvec = eigvec + + def __call__(self, sample): + image, depth = sample['image'], sample['depth'] + if self.alphastd == 0: + return image + + alpha = image.new().resize_(3).normal_(0, self.alphastd) + rgb = self.eigvec.type_as(image).clone() \ + .mul(alpha.view(1, 3).expand(3, 3)) \ + .mul(self.eigval.view(1, 3).expand(3, 3)) \ + .sum(1).squeeze() + + image = image.add(rgb.view(3, 1, 1).expand_as(image)) + + return {'image': image, 'depth': depth} + + +class Grayscale(object): + + def __call__(self, img): + gs = img.clone() + gs[0].mul_(0.299).add_(gs[1], alpha=0.587).add_(gs[2], alpha=0.114) + gs[1].copy_(gs[0]) + gs[2].copy_(gs[0]) + return gs + + +class Saturation(object): + + def __init__(self, var): + self.var = var + + def __call__(self, img): + gs = Grayscale()(img) + alpha = random.uniform(-self.var, self.var) + return img.lerp(gs, alpha) + + +class Brightness(object): + + def __init__(self, var): + self.var = var + + def __call__(self, img): + gs = img.new().resize_as_(img).zero_() + alpha = random.uniform(-self.var, self.var) + + return img.lerp(gs, alpha) + + +class Contrast(object): + + def __init__(self, var): + self.var = var + + def __call__(self, img): + gs = Grayscale()(img) + gs.fill_(gs.mean()) + alpha = random.uniform(-self.var, self.var) + return img.lerp(gs, alpha) + + +class RandomOrder(object): + """ Composes several transforms together in random order. + """ + + def __init__(self, transforms): + self.transforms = transforms + + def __call__(self, sample): + image, depth = sample['image'], sample['depth'] + + if self.transforms is None: + return {'image': image, 'depth': depth} + order = torch.randperm(len(self.transforms)) + for i in order: + image = self.transforms[i](image) + + return {'image': image, 'depth': depth} + + +class ColorJitter(RandomOrder): + + def __init__(self, brightness=0.4, contrast=0.4, saturation=0.4): + self.transforms = [] + if brightness != 0: + self.transforms.append(Brightness(brightness)) + if contrast != 0: + self.transforms.append(Contrast(contrast)) + if saturation != 0: + self.transforms.append(Saturation(saturation)) + + +class Normalize(object): + def __init__(self, mean, std): + self.mean = mean + self.std = std + + def __call__(self, sample): + """ + Args: + tensor (Tensor): Tensor image of size (C, H, W) to be normalized. + Returns: + Tensor: Normalized image. + """ + image, depth = sample['image'], sample['depth'] + + image = self.normalize(image, self.mean, self.std) + + return {'image': image, 'depth': depth} + + def normalize(self, tensor, mean, std): + """Normalize a tensor image with mean and standard deviation. + See ``Normalize`` for more details. + Args: + tensor (Tensor): Tensor image of size (C, H, W) to be normalized. + mean (sequence): Sequence of means for R, G, B channels respecitvely. + std (sequence): Sequence of standard deviations for R, G, B channels + respecitvely. + Returns: + Tensor: Normalized image. + """ + + # TODO: make efficient + for t, m, s in zip(tensor, mean, std): + t.sub_(m).div_(s) + return tensor diff --git a/src/engine.py b/src/engine.py new file mode 100644 index 0000000..5d7a32d --- /dev/null +++ b/src/engine.py @@ -0,0 +1,205 @@ +import os +import time +from typing import Optional, Dict + +import matplotlib.pyplot as plt +import numpy as np +import torch +import torch.nn as nn +import torch.nn.parallel +from omegaconf import DictConfig +from tensorboardX import SummaryWriter +from torch.utils.data import DataLoader +from torch.optim import Optimizer +from tqdm import tqdm + +from src.sobel import Sobel +from src.metrics import evaluate_depth_metrics, evaluate_edge_metrics + + +class AverageMeter(object): + def __init__(self): + self.value = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, value, n=1): + self.value = value + self.sum += value * n + self.count += n + self.avg = self.sum / self.count + + +def train(model: nn.Module, + data_loader: DataLoader, + optimizer: Optimizer, + loss_config: DictConfig, + epoch: int, + device: str = 'cuda', + tblogger: Optional[SummaryWriter] = None): + """ref. https://github.com/JunjH/Revisiting_Single_Depth_Estimation/blob/master/train.py""" + + model.train() + + # func for loss + cos = nn.CosineSimilarity(dim=1, eps=0) + get_gradient = Sobel().to(device) + + # init + batch_time = AverageMeter() + losses = AverageMeter() + losses_depth = AverageMeter() + losses_normal = AverageMeter() + losses_grad = AverageMeter() + end = time.time() + for i, batch in enumerate(data_loader): + + # prepare + image, depth = batch['image'], batch['depth'] + image = image.to(device) + depth = depth.to(device) + optimizer.zero_grad() + + # forward + output = model(image) + + # loss: depth + loss_depth = torch.log(torch.abs(output - depth) + loss_config.ALPHA).mean() + + # loss: grad + depth_grad = get_gradient(depth) + output_grad = get_gradient(output) + depth_grad_dx = depth_grad[:, 0, :, :].contiguous().view_as(depth) + depth_grad_dy = depth_grad[:, 1, :, :].contiguous().view_as(depth) + output_grad_dx = output_grad[:, 0, :, :].contiguous().view_as(depth) + output_grad_dy = output_grad[:, 1, :, :].contiguous().view_as(depth) + + loss_dx = torch.log(torch.abs(output_grad_dx - depth_grad_dx) + loss_config.ALPHA).mean() + loss_dy = torch.log(torch.abs(output_grad_dy - depth_grad_dy) + loss_config.ALPHA).mean() + + # loss: normal + ones = torch.ones(depth.size(0), 1, depth.size(2), depth.size(3), requires_grad=True).to(device) + depth_normal = torch.cat((-depth_grad_dx, -depth_grad_dy, ones), 1) + output_normal = torch.cat((-output_grad_dx, -output_grad_dy, ones), 1) + + loss_normal = torch.abs(1 - cos(output_normal, depth_normal)).mean() + + # loss + loss = loss_depth \ + + loss_config.LAMBDA * (loss_dx + loss_dy) \ + + loss_config.MU * loss_normal + + # update + bs = image.size(0) + losses.update(loss.item(), bs) + losses_depth.update(loss_depth.item(), bs) + losses_normal.update(loss_normal.item(), bs) + losses_grad.update((loss_dx + loss_dy).item(), bs) + + # step + loss.backward() + optimizer.step() + + # time + batch_time.update(time.time() - end) + end = time.time() + + # log + print(f'epoch {epoch}[{i}/{len(data_loader)}], ' + f'time {batch_time.value:.3f} ({batch_time.sum:.3f}), ' + f'loss {losses.value:.4f} ({losses.avg:.4f}), ' + f'l_d {losses_depth.value:.4f} ({losses_depth.avg:.4f}), ' + f'l_g {losses_grad.value:.4f} ({losses_grad.avg:.4f}), ' + f'l_n {losses_normal.value:.4f} ({losses_normal.avg:.4f}), ') + + if tblogger is not None: + tblogger.add_scalar('train/loss', losses.avg, epoch + 1) + tblogger.add_scalar('train/l_d', losses_depth.avg, epoch + 1) + tblogger.add_scalar('train/l_g', losses_grad.avg, epoch + 1) + tblogger.add_scalar('train/l_n', losses_normal.avg, epoch + 1) + + +def test(model: nn.Module, + data_loader: DataLoader, + threshold_edge: float = 0.25, + device: str = 'cuda', + epoch: Optional[int] = None, + tblogger: Optional[SummaryWriter] = None, + show_dir: Optional[str] = None): + model.eval() + get_gradient = Sobel().to(device) + if show_dir is not None: + os.makedirs(show_dir, exist_ok=False) + + metrics: Dict[str, AverageMeter] = { + 'MSE': AverageMeter(), + 'MAE': AverageMeter(), + 'ABS_REL': AverageMeter(), + 'LG10': AverageMeter(), + 'DELTA1': AverageMeter(), + 'DELTA2': AverageMeter(), + 'DELTA3': AverageMeter(), + 'EDGE_ACCURACY': AverageMeter(), + 'EDGE_PRECISION': AverageMeter(), + 'EDGE_RECALL': AverageMeter(), + 'EDGE_F1SCORE': AverageMeter(), + } + with torch.no_grad(): + for i, batch in enumerate(tqdm(data_loader)): + + # prepare + image, depth = batch['image'], batch['depth'] + image = image.to(device) + depth = depth.to(device) + + # forward + output = model(image) + output = torch.nn.functional.interpolate(output, size=[depth.size(2), depth.size(3)], + mode='bilinear', align_corners=True) + + # show output + if show_dir is not None: + for j, out_i in enumerate(output): + filename = f'vis_{i * data_loader.batch_size + j:05}.jpg' + plt.imshow(out_i.view(out_i.size(1), out_i.size(2)).data.cpu().numpy()) + plt.axis('off') + plt.savefig(os.path.join(show_dir, filename), + bbox_inches='tight', pad_inches=0) + plt.close() + + # calc metrics + d_metrics = evaluate_depth_metrics(output, depth) + + # forward for edge + depth_grad_xy = get_gradient(depth) + output_grad_xy = get_gradient(output) + + # calc edge metrics + e_metrics = evaluate_edge_metrics(output_grad_xy, depth_grad_xy, + threshold=threshold_edge) + + # update + bs = image.size(0) + metrics['MSE'].update(d_metrics.mse, bs) + metrics['MAE'].update(d_metrics.mae, bs) + metrics['ABS_REL'].update(d_metrics.abs_rel, bs) + metrics['LG10'].update(d_metrics.lg10, bs) + metrics['DELTA1'].update(d_metrics.delta1, bs) + metrics['DELTA2'].update(d_metrics.delta2, bs) + metrics['DELTA3'].update(d_metrics.delta3, bs) + metrics['EDGE_ACCURACY'].update(e_metrics.accuracy, bs) + metrics['EDGE_PRECISION'].update(e_metrics.precision, bs) + metrics['EDGE_RECALL'].update(e_metrics.recall, bs) + metrics['EDGE_F1SCORE'].update(e_metrics.f1_score, bs) + + rmse = np.sqrt(metrics['MSE'].avg) + + for k, v in metrics.items(): + print(k, v.avg, sep='\t') + print('RMSE', rmse, sep='\t') + + if tblogger is not None: + for k, v in metrics.items(): + tblogger.add_scalar(f'val/{k}_avg', v.avg, epoch + 1) + tblogger.add_scalar('val/RMSE_avg', rmse, epoch + 1) diff --git a/src/metrics.py b/src/metrics.py new file mode 100644 index 0000000..0b6fb21 --- /dev/null +++ b/src/metrics.py @@ -0,0 +1,148 @@ +# ref. https://github.com/JunjH/Revisiting_Single_Depth_Estimation/blob/master/util.py + +import dataclasses +import math +from typing import Tuple + +import torch +import numpy as np +from torch import Tensor + + +@dataclasses.dataclass +class DepthMetrics(object): + mse: float = 0. + mae: float = 0. + abs_rel: float = 0. + lg10: float = 0. + delta1: float = 0. + delta2: float = 0. + delta3: float = 0. + + +@dataclasses.dataclass +class EdgeMetrics(object): + accuracy: float = 0. + precision: float = 0. + recall: float = 0. + f1_score: float = 0. + + +def evaluate_edge_metrics(output_grad_xy: Tensor, depth_grad_xy: Tensor, + threshold: float = 0.25) -> EdgeMetrics: + + # calc edge valid + depth_edge = torch.sqrt( + torch.pow(depth_grad_xy[:, 0, :, :], 2) + torch.pow(depth_grad_xy[:, 1, :, :], 2)) + depth_edge_valid: Tensor = (depth_edge > threshold) + + output_edge = torch.sqrt( + torch.pow(output_grad_xy[:, 0, :, :], 2) + torch.pow(output_grad_xy[:, 1, :, :], 2)) + output_edge_valid: Tensor = (output_edge > threshold) + + # count true pixels + n_equal = np.sum(torch.eq(depth_edge_valid, output_edge_valid).float().data.cpu().numpy()) + n_equal_pos = np.sum((depth_edge_valid * output_edge_valid).float().data.cpu().numpy()) + + # calc metrics + n_total = depth_grad_xy.size(2) * depth_grad_xy.size(3) + accuracy = n_equal / n_total + n_out_pos = (np.sum(output_edge_valid.data.cpu().numpy())) + precision = n_equal_pos / n_out_pos if n_out_pos else 0 + recall = n_equal_pos / (np.sum(depth_edge_valid.data.cpu().numpy())) + f1_score = (2 * precision * recall) / (precision + recall) if precision + recall else 0 + + metrics = EdgeMetrics( + accuracy=accuracy, + precision=precision, + recall=recall, + f1_score=f1_score + ) + return metrics + + +def evaluate_depth_metrics(output: Tensor, target: Tensor) -> DepthMetrics: + + _output, _target, nan_mask, n_valid_element = set_nan_to_zero(output, target) + + if n_valid_element.data.cpu().numpy(): + + # calc diff + diff_matrix = torch.abs(_output - _target) + + # mse, mae + mse = torch.sum(torch.pow(diff_matrix, 2)) / n_valid_element + mae = torch.sum(diff_matrix) / n_valid_element + + # abs rel + real_matrix = torch.div(diff_matrix, _target) + real_matrix[nan_mask] = 0 + abs_rel = torch.sum(real_matrix) / n_valid_element + + # lg10 + lg10_matrix = torch.abs(calc_lg10(_output) - calc_lg10(_target)) + lg10_matrix[nan_mask] = 0 + lg10 = torch.sum(lg10_matrix) / n_valid_element + + # delta + y_over_z = torch.div(_output, _target) + z_over_y = torch.div(_target, _output) + max_ratio = max_of_two(y_over_z, z_over_y) + delta1 = torch.sum( + torch.le(max_ratio, 1.25).float()) / n_valid_element + delta2 = torch.sum( + torch.le(max_ratio, math.pow(1.25, 2)).float()) / n_valid_element + delta3 = torch.sum( + torch.le(max_ratio, math.pow(1.25, 3)).float()) / n_valid_element + + metrics = DepthMetrics( + mse=float(mse.data.cpu().numpy()), + mae=float(mae.data.cpu().numpy()), + abs_rel=float(abs_rel.data.cpu().numpy()), + lg10=float(lg10.data.cpu().numpy()), + delta1=float(delta1.data.cpu().numpy()), + delta2=float(delta2.data.cpu().numpy()), + delta3=float(delta3.data.cpu().numpy()) + ) + + else: + metrics = DepthMetrics() + + return metrics + + +def calc_lg10(x: Tensor) -> Tensor: + return torch.div(torch.log(x), math.log(10)) + + +def max_of_two(x: Tensor, y: Tensor) -> Tensor: + z = x.clone() + mask_y_larger = torch.lt(x, y) + z[mask_y_larger.detach()] = y[mask_y_larger.detach()] + return z + + +def get_n_valid(x: Tensor) -> Tensor: + return torch.sum(torch.eq(x, x).float()) + + +def get_n_nan_element(x: Tensor) -> Tensor: + return torch.sum(torch.ne(x, x).float()) + + +def get_nan_mask(x: Tensor) -> Tensor: + return torch.ne(x, x) + + +def set_nan_to_zero(input: Tensor, target: Tensor + ) -> Tuple[Tensor, Tensor, Tensor, Tensor]: + nan_mask = get_nan_mask(target) + n_valid_element = get_n_valid(target) + + _input = input.clone() + _target = target.clone() + + _input[nan_mask] = 0 + _target[nan_mask] = 0 + + return _input, _target, nan_mask, n_valid_element diff --git a/src/models/__init__.py b/src/models/__init__.py new file mode 100644 index 0000000..a83272a --- /dev/null +++ b/src/models/__init__.py @@ -0,0 +1 @@ +from .models import build_model diff --git a/src/models/densenet.py b/src/models/densenet.py new file mode 100644 index 0000000..26c7893 --- /dev/null +++ b/src/models/densenet.py @@ -0,0 +1,160 @@ +# copied from https://github.com/JunjH/Revisiting_Single_Depth_Estimation/blob/master/models/densenet.py + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.model_zoo as model_zoo +from collections import OrderedDict +import pdb +import copy +from torchvision import utils +import numpy as np + +__all__ = ['DenseNet', 'densenet121', + 'densenet169', 'densenet201', 'densenet161'] + + +model_urls = { + 'densenet121': 'https://download.pytorch.org/models/densenet121-a639ec97.pth', + 'densenet169': 'https://download.pytorch.org/models/densenet169-b2777c0a.pth', + 'densenet201': 'https://download.pytorch.org/models/densenet201-c1103571.pth', + 'densenet161': 'https://download.pytorch.org/models/densenet161-8d451a50.pth', +} + + + +def densenet161(pretrained=False, **kwargs): + r"""Densenet-161 model from + `"Densely Connected Convolutional Networks" `_ + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = DenseNet(num_init_features=96, growth_rate=48, block_config=(6, 12, 36, 24), + **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['densenet161'], 'pretrained_model/encoder')) + return model + + +class _DenseLayer(nn.Sequential): + + def __init__(self, num_input_features, growth_rate, bn_size, drop_rate): + super(_DenseLayer, self).__init__() + self.add_module('norm.1', nn.BatchNorm2d(num_input_features)), + self.add_module('relu.1', nn.ReLU(inplace=True)), + self.add_module('conv.1', nn.Conv2d(num_input_features, bn_size * + growth_rate, kernel_size=1, stride=1, bias=False)), + self.add_module('norm.2', nn.BatchNorm2d(bn_size * growth_rate)), + self.add_module('relu.2', nn.ReLU(inplace=True)), + self.add_module('conv.2', nn.Conv2d(bn_size * growth_rate, growth_rate, + kernel_size=3, stride=1, padding=1, bias=False)), + self.drop_rate = drop_rate + + def forward(self, x): + new_features = super(_DenseLayer, self).forward(x) + if self.drop_rate > 0: + new_features = F.dropout( + new_features, p=self.drop_rate, training=self.training) + return torch.cat([x, new_features], 1) + + +class _DenseBlock(nn.Sequential): + + def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate): + super(_DenseBlock, self).__init__() + for i in range(num_layers): + layer = _DenseLayer(num_input_features + i * + growth_rate, growth_rate, bn_size, drop_rate) + self.add_module('denselayer%d' % (i + 1), layer) + + +class _Transition(nn.Sequential): + + def __init__(self, num_input_features, num_output_features): + super(_Transition, self).__init__() + self.add_module('norm', nn.BatchNorm2d(num_input_features)) + self.add_module('relu', nn.ReLU(inplace=True)) + self.add_module('conv', nn.Conv2d(num_input_features, num_output_features, + kernel_size=1, stride=1, bias=False)) + self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2)) + + + +class _DenseLayer(nn.Sequential): + + def __init__(self, num_input_features, growth_rate, bn_size, drop_rate): + super(_DenseLayer, self).__init__() + self.add_module('norm.1', nn.BatchNorm2d(num_input_features)), + self.add_module('relu.1', nn.ReLU(inplace=True)), + self.add_module('conv.1', nn.Conv2d(num_input_features, bn_size * + growth_rate, kernel_size=1, stride=1, bias=False)), + self.add_module('norm.2', nn.BatchNorm2d(bn_size * growth_rate)), + self.add_module('relu.2', nn.ReLU(inplace=True)), + self.add_module('conv.2', nn.Conv2d(bn_size * growth_rate, growth_rate, + kernel_size=3, stride=1, padding=1, bias=False)), + self.drop_rate = drop_rate + + def forward(self, x): + new_features = super(_DenseLayer, self).forward(x) + if self.drop_rate > 0: + new_features = F.dropout( + new_features, p=self.drop_rate, training=self.training) + return torch.cat([x, new_features], 1) + + +class _DenseBlock(nn.Sequential): + + def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate): + super(_DenseBlock, self).__init__() + for i in range(num_layers): + layer = _DenseLayer(num_input_features + i * + growth_rate, growth_rate, bn_size, drop_rate) + self.add_module('denselayer%d' % (i + 1), layer) + + +class DenseNet(nn.Module): + + def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16), + num_init_features=64, bn_size=4, drop_rate=0, num_classes=1000): + + super(DenseNet, self).__init__() + + # First convolution + self.features = nn.Sequential(OrderedDict([ + ('conv0', nn.Conv2d(3, num_init_features, + kernel_size=7, stride=2, padding=3, bias=False)), + ('norm0', nn.BatchNorm2d(num_init_features)), + ('relu0', nn.ReLU(inplace=True)), + ('pool0', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)), + ])) + + # Each denseblock + num_features = num_init_features + for i, num_layers in enumerate(block_config): + block = _DenseBlock(num_layers=num_layers, num_input_features=num_features, + bn_size=bn_size, growth_rate=growth_rate, drop_rate=drop_rate) + self.features.add_module('denseblock%d' % (i + 1), block) + num_features = num_features + num_layers * growth_rate + if i != len(block_config) - 1: + trans = _Transition( + num_input_features=num_features, num_output_features=num_features // 2) + self.features.add_module('transition%d' % (i + 1), trans) + num_features = num_features // 2 + # print(str(i), num_features) + + # Final batch norm + self.features.add_module('norm5', nn.BatchNorm2d(num_features)) + self.num_features = num_features + + # Linear layer + self.classifier = nn.Linear(num_features, num_classes) + + + def forward(self, x): + features = self.features(x) + out = F.relu(features, inplace=True) + out = F.avg_pool2d(out, kernel_size=7, stride=1).view( + features.size(0), -1) + out = self.classifier(out) + return out, self.num_features + diff --git a/src/models/models.py b/src/models/models.py new file mode 100644 index 0000000..c355f2c --- /dev/null +++ b/src/models/models.py @@ -0,0 +1,33 @@ +from typing import Optional + +import torch.nn as nn +from omegaconf import DictConfig + +from . import modules, net, resnet, densenet, senet + + +def build_model(config: DictConfig, model_state_dict: Optional[dict] = None) -> nn.Module: + model_type = config.MODEL.NAME + + if model_type == 'resnet': + original_model = resnet.resnet50(pretrained=True) + encoder = modules.E_resnet(original_model) + model = net.model(encoder, num_features=2048, block_channel=[256, 512, 1024, 2048]) + elif model_type == 'densenet': + original_model = densenet.densenet161(pretrained=True) + encoder = modules.E_densenet(original_model) + model = net.model(encoder, num_features=2208, block_channel=[192, 384, 1056, 2208]) + elif model_type == 'senet': + original_model = senet.senet154(pretrained='imagenet') + encoder = modules.E_senet(original_model) + model = net.model(encoder, num_features=2048, block_channel=[256, 512, 1024, 2048]) + else: + raise NotImplementedError + + model.to(config.DEVICE) + + # load + if model_state_dict is not None: + model.load_state_dict(model_state_dict) + + return model diff --git a/src/models/modules.py b/src/models/modules.py new file mode 100644 index 0000000..c0cc905 --- /dev/null +++ b/src/models/modules.py @@ -0,0 +1,211 @@ +# copied from https://github.com/JunjH/Revisiting_Single_Depth_Estimation/blob/master/models/modules.py + +import torch +import torch.nn.functional as F +import torch.nn as nn + + +class _UpProjection(nn.Sequential): + + def __init__(self, num_input_features, num_output_features): + super(_UpProjection, self).__init__() + + self.conv1 = nn.Conv2d(num_input_features, num_output_features, + kernel_size=5, stride=1, padding=2, bias=False) + self.bn1 = nn.BatchNorm2d(num_output_features) + self.relu = nn.ReLU(inplace=True) + self.conv1_2 = nn.Conv2d(num_output_features, num_output_features, + kernel_size=3, stride=1, padding=1, bias=False) + self.bn1_2 = nn.BatchNorm2d(num_output_features) + + self.conv2 = nn.Conv2d(num_input_features, num_output_features, + kernel_size=5, stride=1, padding=2, bias=False) + self.bn2 = nn.BatchNorm2d(num_output_features) + + def forward(self, x, size): + x = F.interpolate(x, size=size, mode='bilinear', align_corners=True) + x_conv1 = self.relu(self.bn1(self.conv1(x))) + bran1 = self.bn1_2(self.conv1_2(x_conv1)) + bran2 = self.bn2(self.conv2(x)) + + out = self.relu(bran1 + bran2) + + return out + + +class E_resnet(nn.Module): + + def __init__(self, original_model, num_features=2048): + super(E_resnet, self).__init__() + self.conv1 = original_model.conv1 + self.bn1 = original_model.bn1 + self.relu = original_model.relu + self.maxpool = original_model.maxpool + + self.layer1 = original_model.layer1 + self.layer2 = original_model.layer2 + self.layer3 = original_model.layer3 + self.layer4 = original_model.layer4 + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x_block1 = self.layer1(x) + x_block2 = self.layer2(x_block1) + x_block3 = self.layer3(x_block2) + x_block4 = self.layer4(x_block3) + + return x_block1, x_block2, x_block3, x_block4 + + +class E_densenet(nn.Module): + + def __init__(self, original_model, num_features=2208): + super(E_densenet, self).__init__() + self.features = original_model.features + + def forward(self, x): + x01 = self.features[0](x) + x02 = self.features[1](x01) + x03 = self.features[2](x02) + x04 = self.features[3](x03) + + x_block1 = self.features[4](x04) + x_block1 = self.features[5][0](x_block1) + x_block1 = self.features[5][1](x_block1) + x_block1 = self.features[5][2](x_block1) + x_tran1 = self.features[5][3](x_block1) + + x_block2 = self.features[6](x_tran1) + x_block2 = self.features[7][0](x_block2) + x_block2 = self.features[7][1](x_block2) + x_block2 = self.features[7][2](x_block2) + x_tran2 = self.features[7][3](x_block2) + + x_block3 = self.features[8](x_tran2) + x_block3 = self.features[9][0](x_block3) + x_block3 = self.features[9][1](x_block3) + x_block3 = self.features[9][2](x_block3) + x_tran3 = self.features[9][3](x_block3) + + x_block4 = self.features[10](x_tran3) + x_block4 = F.relu(self.features[11](x_block4)) + + return x_block1, x_block2, x_block3, x_block4 + + +class E_senet(nn.Module): + + def __init__(self, original_model, num_features=2048): + super(E_senet, self).__init__() + self.base = nn.Sequential(*list(original_model.children())[:-3]) + + def forward(self, x): + x = self.base[0](x) + x_block1 = self.base[1](x) + x_block2 = self.base[2](x_block1) + x_block3 = self.base[3](x_block2) + x_block4 = self.base[4](x_block3) + + return x_block1, x_block2, x_block3, x_block4 + + +class D(nn.Module): + + def __init__(self, num_features=2048): + super(D, self).__init__() + self.conv = nn.Conv2d(num_features, num_features // + 2, kernel_size=1, stride=1, bias=False) + num_features = num_features // 2 + self.bn = nn.BatchNorm2d(num_features) + + self.up1 = _UpProjection( + num_input_features=num_features, num_output_features=num_features // 2) + num_features = num_features // 2 + + self.up2 = _UpProjection( + num_input_features=num_features, num_output_features=num_features // 2) + num_features = num_features // 2 + + self.up3 = _UpProjection( + num_input_features=num_features, num_output_features=num_features // 2) + num_features = num_features // 2 + + self.up4 = _UpProjection( + num_input_features=num_features, num_output_features=num_features // 2) + num_features = num_features // 2 + + def forward(self, x_block1, x_block2, x_block3, x_block4): + x_d0 = F.relu(self.bn(self.conv(x_block4))) + x_d1 = self.up1(x_d0, [x_block3.size(2), x_block3.size(3)]) + x_d2 = self.up2(x_d1, [x_block2.size(2), x_block2.size(3)]) + x_d3 = self.up3(x_d2, [x_block1.size(2), x_block1.size(3)]) + x_d4 = self.up4(x_d3, [x_block1.size(2) * 2, x_block1.size(3) * 2]) + + return x_d4 + + +class MFF(nn.Module): + + def __init__(self, block_channel, num_features=64): + super(MFF, self).__init__() + + self.up1 = _UpProjection( + num_input_features=block_channel[0], num_output_features=16) + + self.up2 = _UpProjection( + num_input_features=block_channel[1], num_output_features=16) + + self.up3 = _UpProjection( + num_input_features=block_channel[2], num_output_features=16) + + self.up4 = _UpProjection( + num_input_features=block_channel[3], num_output_features=16) + + self.conv = nn.Conv2d( + num_features, num_features, kernel_size=5, stride=1, padding=2, bias=False) + self.bn = nn.BatchNorm2d(num_features) + + def forward(self, x_block1, x_block2, x_block3, x_block4, size): + x_m1 = self.up1(x_block1, size) + x_m2 = self.up2(x_block2, size) + x_m3 = self.up3(x_block3, size) + x_m4 = self.up4(x_block4, size) + + x = self.bn(self.conv(torch.cat((x_m1, x_m2, x_m3, x_m4), 1))) + x = F.relu(x) + + return x + + +class R(nn.Module): + def __init__(self, block_channel): + super(R, self).__init__() + + num_features = 64 + block_channel[3] // 32 + self.conv0 = nn.Conv2d(num_features, num_features, + kernel_size=5, stride=1, padding=2, bias=False) + self.bn0 = nn.BatchNorm2d(num_features) + + self.conv1 = nn.Conv2d(num_features, num_features, + kernel_size=5, stride=1, padding=2, bias=False) + self.bn1 = nn.BatchNorm2d(num_features) + + self.conv2 = nn.Conv2d( + num_features, 1, kernel_size=5, stride=1, padding=2, bias=True) + + def forward(self, x): + x0 = self.conv0(x) + x0 = self.bn0(x0) + x0 = F.relu(x0) + + x1 = self.conv1(x0) + x1 = self.bn1(x1) + x1 = F.relu(x1) + + x2 = self.conv2(x1) + + return x2 diff --git a/src/models/net.py b/src/models/net.py new file mode 100644 index 0000000..0b2c714 --- /dev/null +++ b/src/models/net.py @@ -0,0 +1,24 @@ +# copied from https://github.com/JunjH/Revisiting_Single_Depth_Estimation/blob/master/models/net.py + +import torch +import torch.nn as nn +from . import modules + + +class model(nn.Module): + def __init__(self, Encoder, num_features, block_channel): + + super(model, self).__init__() + + self.E = Encoder + self.D = modules.D(num_features) + self.MFF = modules.MFF(block_channel) + self.R = modules.R(block_channel) + + def forward(self, x): + x_block1, x_block2, x_block3, x_block4 = self.E(x) + x_decoder = self.D(x_block1, x_block2, x_block3, x_block4) + x_mff = self.MFF(x_block1, x_block2, x_block3, x_block4,[x_decoder.size(2),x_decoder.size(3)]) + out = self.R(torch.cat((x_decoder, x_mff), 1)) + + return out diff --git a/src/models/resnet.py b/src/models/resnet.py new file mode 100644 index 0000000..c6fff08 --- /dev/null +++ b/src/models/resnet.py @@ -0,0 +1,208 @@ +# copied from https://github.com/JunjH/Revisiting_Single_Depth_Estimation/blob/master/models/resnet.py + +import torch.nn as nn +import math +import torch.utils.model_zoo as model_zoo + +__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', + 'resnet152'] + + +model_urls = { + 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', + 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', + 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', + 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', + 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', +} + + +def conv3x3(in_planes, out_planes, stride=1): + "3x3 convolution with padding" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=1, bias=False) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = nn.BatchNorm2d(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm2d(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, + padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(planes * 4) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class ResNet(nn.Module): + + def __init__(self, block, layers, num_classes=1000): + self.inplanes = 64 + super(ResNet, self).__init__() + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) + self.avgpool = nn.AvgPool2d(7, stride=1) + self.fc = nn.Linear(512 * block.expansion, num_classes) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + + def _make_layer(self, block, planes, blocks, stride=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(self.inplanes, planes * block.expansion, + kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample)) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.inplanes, planes)) + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + x = self.avgpool(x) + x = x.view(x.size(0), -1) + x = self.fc(x) + + return x + +def resnet18(pretrained=False, **kwargs): + """Constructs a ResNet-18 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) + return model + + +def resnet34(pretrained=False, **kwargs): + """Constructs a ResNet-34 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet34'])) + return model + + +def resnet50(pretrained=False, **kwargs): + """Constructs a ResNet-50 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet50'], 'pretrained_model/encoder')) + return model + + +def resnet101(pretrained=False, **kwargs): + """Constructs a ResNet-101 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet101'])) + return model + + +def resnet152(pretrained=False, **kwargs): + """Constructs a ResNet-152 model. + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet152'])) + return model diff --git a/src/models/senet.py b/src/models/senet.py new file mode 100644 index 0000000..e54cbf8 --- /dev/null +++ b/src/models/senet.py @@ -0,0 +1,452 @@ +# copied from https://github.com/JunjH/Revisiting_Single_Depth_Estimation/blob/master/models/senet.py + +""" +ResNet code gently borrowed from +https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py +""" + +from collections import OrderedDict +import math +import torch +import torch.nn.functional as F +import torch.nn as nn +from torch.utils import model_zoo +import copy +import numpy as np + +__all__ = ['SENet', 'senet154', 'se_resnet50', 'se_resnet101', 'se_resnet152', + 'se_resnext50_32x4d', 'se_resnext101_32x4d'] + +pretrained_settings = { + 'senet154': { + 'imagenet': { + 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/senet154-c7b49a05.pth', + 'input_space': 'RGB', + 'input_size': [3, 224, 224], + 'input_range': [0, 1], + 'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225], + 'num_classes': 1000 + } + }, + 'se_resnet50': { + 'imagenet': { + 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet50-ce0d4300.pth', + 'input_space': 'RGB', + 'input_size': [3, 224, 224], + 'input_range': [0, 1], + 'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225], + 'num_classes': 1000 + } + }, + 'se_resnet101': { + 'imagenet': { + 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet101-7e38fcc6.pth', + 'input_space': 'RGB', + 'input_size': [3, 224, 224], + 'input_range': [0, 1], + 'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225], + 'num_classes': 1000 + } + }, + 'se_resnet152': { + 'imagenet': { + 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet152-d17c99b7.pth', + 'input_space': 'RGB', + 'input_size': [3, 224, 224], + 'input_range': [0, 1], + 'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225], + 'num_classes': 1000 + } + }, + 'se_resnext50_32x4d': { + 'imagenet': { + 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnext50_32x4d-a260b3a4.pth', + 'input_space': 'RGB', + 'input_size': [3, 224, 224], + 'input_range': [0, 1], + 'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225], + 'num_classes': 1000 + } + }, + 'se_resnext101_32x4d': { + 'imagenet': { + 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnext101_32x4d-3b2fe3d8.pth', + 'input_space': 'RGB', + 'input_size': [3, 224, 224], + 'input_range': [0, 1], + 'mean': [0.485, 0.456, 0.406], + 'std': [0.229, 0.224, 0.225], + 'num_classes': 1000 + } + }, +} + + +class SEModule(nn.Module): + + def __init__(self, channels, reduction): + super(SEModule, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.fc1 = nn.Conv2d(channels, channels // reduction, kernel_size=1, + padding=0) + self.relu = nn.ReLU(inplace=True) + self.fc2 = nn.Conv2d(channels // reduction, channels, kernel_size=1, + padding=0) + self.sigmoid = nn.Sigmoid() + + def forward(self, x): + module_input = x + x = self.avg_pool(x) + x = self.fc1(x) + x = self.relu(x) + x = self.fc2(x) + x = self.sigmoid(x) + return module_input * x + + +class Bottleneck(nn.Module): + """ + Base class for bottlenecks that implements `forward()` method. + """ + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out = self.se_module(out) + residual + out = self.relu(out) + + return out + + +class SEBottleneck(Bottleneck): + """ + Bottleneck for SENet154. + """ + expansion = 4 + + def __init__(self, inplanes, planes, groups, reduction, stride=1, + downsample=None): + super(SEBottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes * 2, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes * 2) + self.conv2 = nn.Conv2d(planes * 2, planes * 4, kernel_size=3, + stride=stride, padding=1, groups=groups, + bias=False) + self.bn2 = nn.BatchNorm2d(planes * 4) + self.conv3 = nn.Conv2d(planes * 4, planes * 4, kernel_size=1, + bias=False) + self.bn3 = nn.BatchNorm2d(planes * 4) + self.relu = nn.ReLU(inplace=True) + self.se_module = SEModule(planes * 4, reduction=reduction) + self.downsample = downsample + self.stride = stride + + +class SEResNetBottleneck(Bottleneck): + """ + ResNet bottleneck with a Squeeze-and-Excitation module. It follows Caffe + implementation and uses `stride=stride` in `conv1` and not in `conv2` + (the latter is used in the torchvision implementation of ResNet). + """ + expansion = 4 + + def __init__(self, inplanes, planes, groups, reduction, stride=1, + downsample=None): + super(SEResNetBottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False, + stride=stride) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1, + groups=groups, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(planes * 4) + self.relu = nn.ReLU(inplace=True) + self.se_module = SEModule(planes * 4, reduction=reduction) + self.downsample = downsample + self.stride = stride + + +class SEResNeXtBottleneck(Bottleneck): + """ + ResNeXt bottleneck type C with a Squeeze-and-Excitation module. + """ + expansion = 4 + + def __init__(self, inplanes, planes, groups, reduction, stride=1, + downsample=None, base_width=4): + super(SEResNeXtBottleneck, self).__init__() + # width = math.floor(planes * (base_width / 64)) * groups + # pdb.set_trace() + width = int(planes * base_width / 64) * groups + self.conv1 = nn.Conv2d(inplanes, width, kernel_size=1, bias=False, + stride=1) + self.bn1 = nn.BatchNorm2d(width) + self.conv2 = nn.Conv2d(width, width, kernel_size=3, stride=stride, + padding=1, groups=groups, bias=False) + self.bn2 = nn.BatchNorm2d(width) + self.conv3 = nn.Conv2d(width, planes * 4, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(planes * 4) + self.relu = nn.ReLU(inplace=True) + self.se_module = SEModule(planes * 4, reduction=reduction) + self.downsample = downsample + self.stride = stride + + +class SENet(nn.Module): + + def __init__(self, block, layers, groups, reduction, dropout_p=0.2, + inplanes=128, input_3x3=True, downsample_kernel_size=3, + downsample_padding=1, num_classes=1000): + """ + Parameters + ---------- + block (nn.Module): Bottleneck class. + - For SENet154: SEBottleneck + - For SE-ResNet models: SEResNetBottleneck + - For SE-ResNeXt models: SEResNeXtBottleneck + layers (list of ints): Number of residual blocks for 4 layers of the + network (layer1...layer4). + groups (int): Number of groups for the 3x3 convolution in each + bottleneck block. + - For SENet154: 64 + - For SE-ResNet models: 1 + - For SE-ResNeXt models: 32 + reduction (int): Reduction ratio for Squeeze-and-Excitation modules. + - For all models: 16 + dropout_p (float or None): Drop probability for the Dropout layer. + If `None` the Dropout layer is not used. + - For SENet154: 0.2 + - For SE-ResNet models: None + - For SE-ResNeXt models: None + inplanes (int): Number of input channels for layer1. + - For SENet154: 128 + - For SE-ResNet models: 64 + - For SE-ResNeXt models: 64 + input_3x3 (bool): If `True`, use three 3x3 convolutions instead of + a single 7x7 convolution in layer0. + - For SENet154: True + - For SE-ResNet models: False + - For SE-ResNeXt models: False + downsample_kernel_size (int): Kernel size for downsampling convolutions + in layer2, layer3 and layer4. + - For SENet154: 3 + - For SE-ResNet models: 1 + - For SE-ResNeXt models: 1 + downsample_padding (int): Padding for downsampling convolutions in + layer2, layer3 and layer4. + - For SENet154: 1 + - For SE-ResNet models: 0 + - For SE-ResNeXt models: 0 + num_classes (int): Number of outputs in `last_linear` layer. + - For all models: 1000 + """ + super(SENet, self).__init__() + self.inplanes = inplanes + if input_3x3: + layer0_modules = [ + ('conv1', nn.Conv2d(3, 64, 3, stride=2, padding=1, + bias=False)), + ('bn1', nn.BatchNorm2d(64)), + ('relu1', nn.ReLU(inplace=True)), + ('conv2', nn.Conv2d(64, 64, 3, stride=1, padding=1, + bias=False)), + ('bn2', nn.BatchNorm2d(64)), + ('relu2', nn.ReLU(inplace=True)), + ('conv3', nn.Conv2d(64, inplanes, 3, stride=1, padding=1, + bias=False)), + ('bn3', nn.BatchNorm2d(inplanes)), + ('relu3', nn.ReLU(inplace=True)), + ] + else: + layer0_modules = [ + ('conv1', nn.Conv2d(3, inplanes, kernel_size=7, stride=2, + padding=3, bias=False)), + ('bn1', nn.BatchNorm2d(inplanes)), + ('relu1', nn.ReLU(inplace=True)), + ] + # To preserve compatibility with Caffe weights `ceil_mode=True` + # is used instead of `padding=1`. + layer0_modules.append(('pool', nn.MaxPool2d(3, stride=2, + ceil_mode=True))) + self.layer0 = nn.Sequential(OrderedDict(layer0_modules)) + self.layer1 = self._make_layer( + block, + planes=64, + blocks=layers[0], + groups=groups, + reduction=reduction, + downsample_kernel_size=1, + downsample_padding=0 + ) + self.layer2 = self._make_layer( + block, + planes=128, + blocks=layers[1], + stride=2, + groups=groups, + reduction=reduction, + downsample_kernel_size=downsample_kernel_size, + downsample_padding=downsample_padding + ) + self.layer3 = self._make_layer( + block, + planes=256, + blocks=layers[2], + stride=2, + groups=groups, + reduction=reduction, + downsample_kernel_size=downsample_kernel_size, + downsample_padding=downsample_padding + ) + self.layer4 = self._make_layer( + block, + planes=512, + blocks=layers[3], + stride=2, + groups=groups, + reduction=reduction, + downsample_kernel_size=downsample_kernel_size, + downsample_padding=downsample_padding + ) + self.avg_pool = nn.AvgPool2d(7, stride=1) + self.dropout = nn.Dropout(dropout_p) if dropout_p is not None else None + self.last_linear = nn.Linear(512 * block.expansion, num_classes) + + + def _make_layer(self, block, planes, blocks, groups, reduction, stride=1, + downsample_kernel_size=1, downsample_padding=0): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(self.inplanes, planes * block.expansion, + kernel_size=downsample_kernel_size, stride=stride, + padding=downsample_padding, bias=False), + nn.BatchNorm2d(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, groups, reduction, stride, + downsample)) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.inplanes, planes, groups, reduction)) + + return nn.Sequential(*layers) + + + def features(self, x): + x = self.layer0(x) + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + return x + + + def logits(self, x): + x = self.avg_pool(x) + if self.dropout is not None: + x = self.dropout(x) + x = x.view(x.size(0), -1) + x = self.last_linear(x) + return x + + def forward(self, x,x_): + x = self.features(x) + x = self.logits(x) + return x + +def initialize_pretrained_model(model, num_classes, settings): + assert num_classes == settings['num_classes'], \ + 'num_classes should be {}, but is {}'.format( + settings['num_classes'], num_classes) + model.load_state_dict(model_zoo.load_url(settings['url'], 'pretrained_model/encoder')) + model.input_space = settings['input_space'] + model.input_size = settings['input_size'] + model.input_range = settings['input_range'] + model.mean = settings['mean'] + model.std = settings['std'] + + +def senet154(num_classes=1000, pretrained='imagenet'): + model = SENet(SEBottleneck, [3, 8, 36, 3], groups=64, reduction=16, + dropout_p=0.2, num_classes=num_classes) + if pretrained is not None: + settings = pretrained_settings['senet154'][pretrained] + initialize_pretrained_model(model, num_classes, settings) + return model + + +def se_resnet50(num_classes=1000, pretrained='imagenet'): + model = SENet(SEResNetBottleneck, [3, 4, 6, 3], groups=1, reduction=16, + dropout_p=None, inplanes=64, input_3x3=False, + downsample_kernel_size=1, downsample_padding=0, + num_classes=num_classes) + if pretrained is not None: + settings = pretrained_settings['se_resnet50'][pretrained] + initialize_pretrained_model(model, num_classes, settings) + return model + + +def se_resnet101(num_classes=1000, pretrained='imagenet'): + model = SENet(SEResNetBottleneck, [3, 4, 23, 3], groups=1, reduction=16, + dropout_p=None, inplanes=64, input_3x3=False, + downsample_kernel_size=1, downsample_padding=0, + num_classes=num_classes) + if pretrained is not None: + settings = pretrained_settings['se_resnet101'][pretrained] + initialize_pretrained_model(model, num_classes, settings) + return model + + +def se_resnet152(num_classes=1000, pretrained='imagenet'): + model = SENet(SEResNetBottleneck, [3, 8, 36, 3], groups=1, reduction=16, + dropout_p=None, inplanes=64, input_3x3=False, + downsample_kernel_size=1, downsample_padding=0, + num_classes=num_classes) + if pretrained is not None: + settings = pretrained_settings['se_resnet152'][pretrained] + initialize_pretrained_model(model, num_classes, settings) + return model + + +def se_resnext50_32x4d(num_classes=1000, pretrained='imagenet'): + model = SENet(SEResNeXtBottleneck, [3, 4, 6, 3], groups=32, reduction=16, + dropout_p=None, inplanes=64, input_3x3=False, + downsample_kernel_size=1, downsample_padding=0, + num_classes=num_classes) + if pretrained is not None: + settings = pretrained_settings['se_resnext50_32x4d'][pretrained] + initialize_pretrained_model(model, num_classes, settings) + return model + + +def se_resnext101_32x4d(num_classes=1000, pretrained='imagenet'): + model = SENet(SEResNeXtBottleneck, [3, 4, 23, 3], groups=32, reduction=16, + dropout_p=None, inplanes=64, input_3x3=False, + downsample_kernel_size=1, downsample_padding=0, + num_classes=num_classes) + if pretrained is not None: + settings = pretrained_settings['se_resnext101_32x4d'][pretrained] + initialize_pretrained_model(model, num_classes, settings) + return model diff --git a/src/sobel.py b/src/sobel.py new file mode 100644 index 0000000..57642e9 --- /dev/null +++ b/src/sobel.py @@ -0,0 +1,26 @@ +# copied from https://github.com/JunjH/Revisiting_Single_Depth_Estimation/blob/master/sobel.py + +import torch +import torch.nn as nn +import numpy as np + + +class Sobel(nn.Module): + def __init__(self): + super(Sobel, self).__init__() + self.edge_conv = nn.Conv2d(1, 2, kernel_size=3, stride=1, padding=1, bias=False) + edge_kx = np.array([[1, 0, -1], [2, 0, -2], [1, 0, -1]]) + edge_ky = np.array([[1, 2, 1], [0, 0, 0], [-1, -2, -1]]) + edge_k = np.stack((edge_kx, edge_ky)) + + edge_k = torch.from_numpy(edge_k).float().view(2, 1, 3, 3) + self.edge_conv.weight = nn.Parameter(edge_k) + + for param in self.parameters(): + param.requires_grad = False + + def forward(self, x): + out = self.edge_conv(x) + out = out.contiguous().view(-1, 2, x.size(2), x.size(3)) + + return out diff --git a/src/utils.py b/src/utils.py new file mode 100644 index 0000000..01ed834 --- /dev/null +++ b/src/utils.py @@ -0,0 +1,66 @@ +import dataclasses +import random +from typing import Optional, Tuple, List + +import torch +import numpy as np +import torch.nn as nn +from omegaconf import OmegaConf, DictConfig +from torch.optim.optimizer import Optimizer +from torch.optim.lr_scheduler import _LRScheduler + +from .models import build_model + + +def load_config(cfg_path: Optional[str] = None, + default_cfg_path: str = 'configs/default.yaml', + update_dotlist: Optional[List[str]] = None) -> DictConfig: + + config = OmegaConf.load(default_cfg_path) + if cfg_path is not None: + optional_config = OmegaConf.load(cfg_path) + config = OmegaConf.merge(config, optional_config) + if update_dotlist is not None: + update_config = OmegaConf.from_dotlist(update_dotlist) + config = OmegaConf.merge(config, update_config) + + OmegaConf.set_readonly(config, True) + + return config + + +def print_config(config: DictConfig) -> None: + print(OmegaConf.to_yaml(config)) + + +def make_deterministic(seed): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + + +def prepare_training_modules(config: DictConfig, resume_from: Optional[str] = None + ) -> Tuple[int, nn.Module, Optimizer, _LRScheduler]: + + model: nn.Module = build_model(config) + optimizer = torch.optim.Adam(model.parameters(), + lr=config.SOLVER.BASE_LR, + weight_decay=config.SOLVER.WEIGHT_DECAY) + scheduler = torch.optim.lr_scheduler.StepLR( + optimizer, + config.SOLVER.LR_STEP_SIZE, + config.SOLVER.LR_GAMMA, + ) + + # resume + start_epoch = 0 + if resume_from is not None: + ckpt: dict = torch.load(resume_from) + model.load_state_dict(ckpt['model_state_dict']) + optimizer.load_state_dict(ckpt['optimizer_state_dict']) + scheduler.load_state_dict(ckpt['scheduler_state_dict']) + start_epoch = ckpt['epoch'] + + return start_epoch, model, optimizer, scheduler diff --git a/tools/README.md b/tools/README.md new file mode 100644 index 0000000..1d7b6f5 --- /dev/null +++ b/tools/README.md @@ -0,0 +1,45 @@ +# Tools description + +### test.py + +```bash +python tools/train.py +``` + +Option | Description +--- | --- +`--config [config path]` | Optional config file path. The `configs/default.yaml` is loaded by default. The specified file overwrites the default configs. +`--out-dir [outdir path]` | Output directory path. (default: `results`) +`--resume [ckpt path]` | Resuming checkpoint file path. + +### train.py + +```bash +python tools/test.py [ckpt path] +``` + +Option | Description +--- | --- +`--config [config path]` | The optional config file path.used when training. +`--show-dir [outdir path]` | Path to save predict visualization. Please specify if you want to save. + + +If you want to override the config with command line args, put them at the end in the form of dotlist. + +```bash +python tools/train.py --config [config path] SOLVER.NUM_WORKERS=8 SOLVER.EPOCH=5 +``` + +### visualize_nyu2_test_gt.py + +Visualize the dataset gt as well as visualizations of prediction results for comparison. + +```bash +python tools/visualize_nyu2_test_gt.py +``` + +Option | Description +--- | --- +`--outdir [outdir path]` | Output directory path. (default: `vis_gt`) +`--config [config path]` | Optional config file path. +`--debug [ckpt path]` | Use Debug mode. (visualize just one data) diff --git a/tools/test.py b/tools/test.py new file mode 100644 index 0000000..092fe9d --- /dev/null +++ b/tools/test.py @@ -0,0 +1,51 @@ +import argparse +from typing import Optional + +import torch +from omegaconf import DictConfig + +from src.data import get_test_loader +from src.models import build_model +from src.engine import test +from src.utils import load_config, make_deterministic + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description='Test a predictor') + parser.add_argument('ckpt', type=str, help='the checkpoint file') + parser.add_argument('--config', type=str, default=None, help='train config file path') + parser.add_argument('--show-dir', type=str, default=None, + help='Please specify the directory if you want to save predicted figures.') + parser.add_argument('opts', default=None, nargs=argparse.REMAINDER, + help='Overwrite configs. (ex. SOLVER.NUM_WORKERS=8)') + return parser.parse_args() + + +def main(config: DictConfig, ckpt: dict, show_dir: Optional[str] = None): + + # seed + if config.SEED is not None: + make_deterministic(seed=config.SEED) + + # data + test_loader = get_test_loader(config) + + # model + model = build_model(config, model_state_dict=ckpt['model_state_dict']) + + # test + test(model=model, + data_loader=test_loader, + device=config.DEVICE, + threshold_edge=config.TEST.THRESHOLD_EDGE, + show_dir=show_dir) + + +if __name__ == "__main__": + args = parse_args() + + # load config, ckpt + config = load_config(args.config, update_dotlist=args.opts) + ckpt: dict = torch.load(args.ckpt) + + main(config, ckpt, args.show_dir) diff --git a/tools/train.py b/tools/train.py new file mode 100644 index 0000000..62524d7 --- /dev/null +++ b/tools/train.py @@ -0,0 +1,86 @@ +import argparse +import os +from typing import Optional + +import torch +from omegaconf import DictConfig +from tensorboardX import SummaryWriter + +from src.data import build_data_loader +from src.engine import train, test +from src.utils import load_config, print_config, make_deterministic, prepare_training_modules + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description='Train a predictor') + parser.add_argument('--config', type=str, default=None, help='train config file path') + parser.add_argument('--out-dir', type=str, default=None, help='the dir to save logs and models') + parser.add_argument('--resume', type=str, default=None, help='the checkpoint file to resume from') + parser.add_argument('opts', default=None, nargs=argparse.REMAINDER, + help='Overwrite configs. (ex. SOLVER.NUM_WORKERS=8)') + return parser.parse_args() + + +def main(config: DictConfig, + out_dir: str = 'results', + resume_from: Optional[str] = None): + + # seed + if config.SEED is not None: + make_deterministic(seed=config.SEED) + + # data + train_loader, test_loader = build_data_loader(config) + + # training modules + start_epoch, model, optimizer, scheduler = \ + prepare_training_modules(config, resume_from=resume_from) + + # output setting + os.makedirs(out_dir, exist_ok=True) + tblogger = SummaryWriter(out_dir) + + # train + for epoch in range(start_epoch, config.SOLVER.EPOCH): + lr = scheduler.get_last_lr()[0] + print(f'#### Epoch{epoch}, lr: {lr} ####') + tblogger.add_scalar('train/lr', lr, epoch) + + train(model=model, + data_loader=train_loader, + optimizer=optimizer, + loss_config=config.LOSS, + epoch=epoch, + device=config.DEVICE, + tblogger=tblogger) + test(model=model, + data_loader=test_loader, + epoch=epoch, + device=config.DEVICE, + tblogger=tblogger, + threshold_edge=config.TEST.THRESHOLD_EDGE) + scheduler.step() + + # save + ckpt = {'epoch': epoch + 1, + 'model_state_dict': model.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(), + 'scheduler_state_dict': scheduler.state_dict()} + torch.save(ckpt, os.path.join(out_dir, f"snapshot.ckpt")) + if (epoch + 1) % config.SOLVER.SAVE_INTERVAL == 0: + torch.save(ckpt, os.path.join(out_dir, f"epoch_{epoch + 1}.ckpt")) + + +if __name__ == "__main__": + args = parse_args() + + # load config + config = load_config(args.config, update_dotlist=args.opts) + print_config(config) + + if args.out_dir is not None: + out_dir = args.out_dir + else: + out_dir = config.OUTPUT_DIR + + main(config, out_dir, args.resume) diff --git a/tools/visualize_nyu2_test_gt.py b/tools/visualize_nyu2_test_gt.py new file mode 100644 index 0000000..4c9c833 --- /dev/null +++ b/tools/visualize_nyu2_test_gt.py @@ -0,0 +1,73 @@ +import argparse +import os + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from omegaconf import DictConfig +from PIL import Image +from tqdm import tqdm + +from src.utils import load_config + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description='Visualize Test GT') + parser.add_argument('--outdir', type=str, default='vis_gt', help='Output directory') + parser.add_argument('--config', type=str, default=None) + parser.add_argument('--debug', action='store_true') + return parser.parse_args() + + +def change_scale(depth: Image.Image, scale_size_min=240, mode=Image.NEAREST): + w, h = depth.size + if w < h: + ow = scale_size_min + oh = int(ow * h / w) + else: + oh = scale_size_min + ow = int(oh * w / h) + return depth.resize((ow, oh), mode) + + +def center_crop(depth: Image.Image, size=(304, 228)): + center_crop_w, center_crop_h = size + w1, h1 = depth.size + x1 = int(round((w1 - center_crop_w) / 2.)) + y1 = int(round((h1 - center_crop_h) / 2.)) + return depth.crop((x1, y1, x1 + center_crop_w, y1 + center_crop_h)) + + +def main(config: DictConfig, + outdir: str = './visualized_gt', + debug: bool = False): + os.makedirs(outdir, exist_ok=False) + test_df = pd.read_csv(config.DATASET.TEST_CSV, header=None, + names=['image', 'depth']) + + for i, row in tqdm(test_df.iterrows(), total=len(test_df)): + depth = Image.open(row['depth']) + depth1 = change_scale(depth, + scale_size_min=config.DATA.SCALE_SIZE_MIN, + mode=Image.NEAREST) + depth2 = center_crop(depth1, + size=config.DATA.CENTER_CROP_SIZE) + + depth_array = np.array(depth2) + plt.imshow(depth_array) + plt.axis('off') + plt.savefig(os.path.join(outdir, f'vis_gt_{i:05}.jpg'), + bbox_inches='tight', pad_inches=0) + plt.close() + + if debug: + print('### DEBUG MODE ###') + print(depth.size, depth1.size, depth2.size, sep='\n') + print('exit.') + break + + +if __name__ == '__main__': + args = parse_args() + config = load_config(args.config) + main(config, outdir=args.outdir, debug=args.debug)