diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..ffbe496
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,132 @@
+# custom
+results*
+pretrained_model/
+
+### https://raw.github.com/github/gitignore/50e42aa1064d004a5c99eaa72a2d8054a0d8de55/Python.gitignore
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..2cf6501
--- /dev/null
+++ b/README.md
@@ -0,0 +1,141 @@
+# Monocular Depth Estimation for [NYU2](https://cs.nyu.edu/~silberman/datasets/nyu_depth_v2.html)
+
+Pytorch re-implementation of the below paper.
+
+- Python 3.6.8
+- PyTorch 1.6.0
+
+Please see [requirements.txt](./docker/requirements.txt) for the other libraries' versions.
+
+
+## Paper
+
+**Revisiting Single Image Depth Estimation: Toward Higher Resolution Maps with Accurate Object Boundaries**
+
+*Junjie Hu, Mete Ozay, Yan Zhang, Takayuki Okatani*
+
+WACV2019
+
+[[arXiv]](https://arxiv.org/abs/1803.08673) [[Original repo]](https://github.com/JunjH/Revisiting_Single_Depth_Estimation)
+
+![](./figs/revisiting_paper_fig2.png)
+![](./figs/revisiting_paper_fig5.png)
+
+Results in the paper:
+
+![](./figs/revisiting_paper_metrics.png)
+
+*The figures are from the paper.
+
+## Results in this repository
+
+We use ResNet-50 for the main results. Please see [the config file](./configs/default.yaml) for the other parameters.
+
+### Training loss
+
+![](./figs/revisiting_plot_loss.png)
+
+### Quantitative results
+
+MAE | MSE | RMSE | ABS_REL | LG10 | DELTA1 | DELTA2 | DELTA3
+-- | -- | -- | -- | -- | -- | -- | --
+0.3388 | 0.3150 | 0.5613 | 0.1283 | 0.0548 | 0.8407 | 0.9673 | 0.9907
+
+![](./figs/revisiting_plot_metrics1.png)
+![](./figs/revisiting_plot_metrics2.png)
+
+### Qualitative results
+
+![](./figs/revisiting_qualitative_main.jpeg)
+
+### Others
+
+The results about other metrics and the ablation study results are [here](./docs/RESULTS.md).
+
+## Preparation
+
+### Dataset: NYU v2
+
+```bash
+sh scripts/prepare_nyu2.sh
+```
+
+[This script](./scripts/download_nyu2.sh) uses the downloading link in [J. Hue's repository](https://github.com/JunjH/Revisiting_Single_Depth_Estimation).
+
+
+### Installation
+
+```bash
+docker-compose build
+docker-compose run dev
+```
+
+- docker-compose 1.28.3
+- Docker 20.10.2
+
+ref. [[Enabling GPU access with Compose]](https://docs.docker.com/compose/gpu-support/)
+
+
+### Installation w/ nvidia-docker
+
+```bash
+nvidia-docker build -t {IMAGE_NAME} ./docker
+nvidia-docker run -it -v `pwd`:/work -v $HOME/data/nyu2/data:/work/data --name {CONTAINER_NAME} {IMAGE_NAME}
+```
+
+Please mount your working directory on `/work`, and the dataset path on `/work/data`.
+
+### Installation w/o Docker
+
+```bash
+pip install -r ./docker/requirements.txt
+```
+
+Note that the libraries will be installed into your environment.
+
+Please place the dataset in `./data` in your working directory, or change the dataset path in your config file.
+
+
+## Run
+
+### Train
+```bash
+python tools/train.py
+```
+
+Option | Description
+--- | ---
+`--config [config path]` | Optional config file path. The `configs/default.yaml` is loaded by default. The specified file overwrites the default configs.
+`--out-dir [outdir path]`  | Output directory path. (default: `results`)
+`--resume [ckpt path]` | Resuming checkpoint file path. 
+
+If you want to override the config with command line args, put them at the end in the form of dotlist.
+
+```bash
+python tools/train.py --config [config path] SOLVER.NUM_WORKERS=8 SOLVER.EPOCH=5
+```
+
+### Test
+```bash
+python tools/test.py [ckpt path]
+```
+
+Option | Description
+--- | ---
+`--config [config path]` | The optional config file path.used when training.
+`--show-dir [outdir path]`  | Path to save predict visualization. Please specify if you want to save.
+
+### Other tools
+Please see [tools/README.md](./tools/README.md).
+
+
+## Credit
+
+```
+@inproceedings{Hu2019RevisitingSI,
+  title={Revisiting Single Image Depth Estimation: Toward Higher Resolution Maps With Accurate Object Boundaries},
+  author={Junjie Hu and Mete Ozay and Yan Zhang and Takayuki Okatani},
+  journal={2019 IEEE Winter Conference on Applications of Computer Vision (WACV)},
+  year={2019}
+}
+```
diff --git a/configs/default.yaml b/configs/default.yaml
new file mode 100644
index 0000000..2b4ed79
--- /dev/null
+++ b/configs/default.yaml
@@ -0,0 +1,41 @@
+DATASET:
+  TRAIN_CSV: './data/nyu2_train.csv'
+  TEST_CSV: './data/nyu2_test.csv'
+MODEL:
+  # model type: ['resnet', 'densenet', 'senet']
+  NAME: 'resnet'
+SOLVER:
+  BASE_LR: 0.0001
+  BATCHSIZE: 8
+  NUM_WORKERS: 4
+  MOMENTUM: 0.9
+  WEIGHT_DECAY: 0.0001
+  LR_STEP_SIZE: 5
+  LR_GAMMA: 0.1
+  EPOCH: 20
+  SAVE_INTERVAL: 1
+DATA:
+  NORMALIZE_MEAN: [0.485, 0.456, 0.406]
+  NORMALIZE_STD: [0.229, 0.224, 0.225]
+  PCA_LIGHTING: 0.1
+  PCA_EIGVAL: [0.2175, 0.0188, 0.0045]
+  PCA_EIGVEC: [[-0.5675,  0.7192,  0.4009],
+               [-0.5808, -0.0045, -0.8140],
+               [-0.5836, -0.6948,  0.4203]]
+  SCALE_SIZE_MIN: 240
+  RANDOM_ROT_DEGREE: 5
+  CENTER_CROP_SIZE: [304, 228]
+  OUTPUT_SIZE: [152, 114]
+  RANDOM_BRIGHTNESS: 0.4
+  RANDOM_CONTRAST: 0.4
+  RANDOM_SATURATION: 0.4
+LOSS:
+  ALPHA: 0.5
+  LAMBDA: 1
+  MU: 1
+SEED: 1
+TEST:
+  BATCHSIZE: 1
+  THRESHOLD_EDGE: 0.25
+DEVICE: 'cuda'
+OUTPUT_DIR: 'results'
diff --git a/configs/exps/densenet.yaml b/configs/exps/densenet.yaml
new file mode 100644
index 0000000..64d205e
--- /dev/null
+++ b/configs/exps/densenet.yaml
@@ -0,0 +1,3 @@
+MODEL:
+  NAME: 'densenet'
+OUTPUT_DIR: 'results_densenet'
diff --git a/configs/exps/loss_d_g.yaml b/configs/exps/loss_d_g.yaml
new file mode 100644
index 0000000..ddbc814
--- /dev/null
+++ b/configs/exps/loss_d_g.yaml
@@ -0,0 +1,4 @@
+LOSS:
+  LAMBDA: 1
+  MU: 0
+OUTPUT_DIR: 'results_loss_d_g'
diff --git a/configs/exps/loss_d_n.yaml b/configs/exps/loss_d_n.yaml
new file mode 100644
index 0000000..a83c2ab
--- /dev/null
+++ b/configs/exps/loss_d_n.yaml
@@ -0,0 +1,4 @@
+LOSS:
+  LAMBDA: 0
+  MU: 1
+OUTPUT_DIR: 'results_loss_d_n'
diff --git a/configs/exps/loss_d_only.yaml b/configs/exps/loss_d_only.yaml
new file mode 100644
index 0000000..c670f70
--- /dev/null
+++ b/configs/exps/loss_d_only.yaml
@@ -0,0 +1,4 @@
+LOSS:
+  LAMBDA: 0
+  MU: 0
+OUTPUT_DIR: 'results_loss_d_only'
diff --git a/configs/exps/senet.yaml b/configs/exps/senet.yaml
new file mode 100644
index 0000000..a681ecd
--- /dev/null
+++ b/configs/exps/senet.yaml
@@ -0,0 +1,3 @@
+MODEL:
+  NAME: 'densenet'
+OUTPUT_DIR: 'results_senet'
diff --git a/docker-compose.yaml b/docker-compose.yaml
new file mode 100644
index 0000000..c946195
--- /dev/null
+++ b/docker-compose.yaml
@@ -0,0 +1,9 @@
+version: "2.3"
+services:
+  dev:
+    runtime: nvidia
+    build:
+      context: ./docker
+    volumes:
+      - .:/work
+      - $HOME/data/nyu2/data:/work/data
diff --git a/docker/Dockerfile b/docker/Dockerfile
new file mode 100644
index 0000000..cdaa8af
--- /dev/null
+++ b/docker/Dockerfile
@@ -0,0 +1,24 @@
+FROM nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    sudo \
+    git \
+    zip \
+    libopencv-dev \
+    build-essential libssl-dev libbz2-dev libreadline-dev libsqlite3-dev curl \
+    wget && \
+    rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
+
+ENV PYENV_ROOT /home/root/.pyenv
+ENV PATH $PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH
+RUN curl -L https://raw.githubusercontent.com/yyuu/pyenv-installer/master/bin/pyenv-installer | bash
+
+ENV PYTHON_VERSION 3.6.8
+RUN pyenv install ${PYTHON_VERSION} && pyenv global ${PYTHON_VERSION}
+
+COPY requirements.txt /tmp/requirements.txt
+RUN pip install -r /tmp/requirements.txt
+
+ENV PYTHONPATH $PYTHONPATH:/work
+
+WORKDIR /work
diff --git a/docker/requirements.txt b/docker/requirements.txt
new file mode 100644
index 0000000..e75f63c
--- /dev/null
+++ b/docker/requirements.txt
@@ -0,0 +1,9 @@
+ipython==7.16.1
+matplotlib==3.3.4
+omegaconf==2.0.6
+pandas==1.1.5
+scipy==1.5.4
+tensorboardX==1.4
+torch==1.6.0
+torchvision==0.7.0
+tqdm==4.59.0
diff --git a/figs/revisiting_paper_fig2.png b/figs/revisiting_paper_fig2.png
new file mode 100644
index 0000000..ac5c04e
Binary files /dev/null and b/figs/revisiting_paper_fig2.png differ
diff --git a/figs/revisiting_paper_fig5.png b/figs/revisiting_paper_fig5.png
new file mode 100644
index 0000000..ece7189
Binary files /dev/null and b/figs/revisiting_paper_fig5.png differ
diff --git a/figs/revisiting_paper_metrics.png b/figs/revisiting_paper_metrics.png
new file mode 100644
index 0000000..0c3ffa9
Binary files /dev/null and b/figs/revisiting_paper_metrics.png differ
diff --git a/figs/revisiting_plot_loss.png b/figs/revisiting_plot_loss.png
new file mode 100644
index 0000000..3b35aae
Binary files /dev/null and b/figs/revisiting_plot_loss.png differ
diff --git a/figs/revisiting_plot_loss_all.png b/figs/revisiting_plot_loss_all.png
new file mode 100644
index 0000000..a59104b
Binary files /dev/null and b/figs/revisiting_plot_loss_all.png differ
diff --git a/figs/revisiting_plot_metrics1.png b/figs/revisiting_plot_metrics1.png
new file mode 100644
index 0000000..33d394b
Binary files /dev/null and b/figs/revisiting_plot_metrics1.png differ
diff --git a/figs/revisiting_plot_metrics2.png b/figs/revisiting_plot_metrics2.png
new file mode 100644
index 0000000..ecc7aa6
Binary files /dev/null and b/figs/revisiting_plot_metrics2.png differ
diff --git a/figs/revisiting_plot_metrics_all.png b/figs/revisiting_plot_metrics_all.png
new file mode 100644
index 0000000..2c96bb4
Binary files /dev/null and b/figs/revisiting_plot_metrics_all.png differ
diff --git a/figs/revisiting_qualitative_all.jpeg b/figs/revisiting_qualitative_all.jpeg
new file mode 100644
index 0000000..150c79e
Binary files /dev/null and b/figs/revisiting_qualitative_all.jpeg differ
diff --git a/figs/revisiting_qualitative_main.jpeg b/figs/revisiting_qualitative_main.jpeg
new file mode 100644
index 0000000..d68a5f6
Binary files /dev/null and b/figs/revisiting_qualitative_main.jpeg differ
diff --git a/scripts/prepare_nyu2.sh b/scripts/prepare_nyu2.sh
new file mode 100755
index 0000000..35fa1e3
--- /dev/null
+++ b/scripts/prepare_nyu2.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+DATA_ROOT=$HOME/data
+DATA_DIR=${DATA_ROOT}/nyu2
+
+if [ -d ${DATA_DIR} ];then
+    echo "${DATA_DIR} already exists. Try again after removing it."
+    echo "Aborted."
+    exit 1
+fi
+
+mkdir -p ${DATA_DIR}
+cd ${DATA_DIR}
+
+wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=\
+$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate\
+ 'https://docs.google.com/uc?export=download&id=1WoOZOBpOWfmwe7bknWS5PMUCLBPFKTOw'\
+ -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')\
+&id=1WoOZOBpOWfmwe7bknWS5PMUCLBPFKTOw" -O nyu2.zip
+rm -f /tmp/cookies.txt
+
+unzip nyu2.zip
+
+rm -f nyu2.zip
diff --git a/src/data/__init__.py b/src/data/__init__.py
new file mode 100644
index 0000000..4607aef
--- /dev/null
+++ b/src/data/__init__.py
@@ -0,0 +1 @@
+from .dataset import build_data_loader, get_test_loader
diff --git a/src/data/dataset.py b/src/data/dataset.py
new file mode 100644
index 0000000..2f0e6c1
--- /dev/null
+++ b/src/data/dataset.py
@@ -0,0 +1,97 @@
+from typing import Tuple
+
+import pandas as pd
+from omegaconf import DictConfig
+from torch.utils.data import Dataset, DataLoader
+from torchvision import transforms
+
+from .transforms import *
+
+
+class Nyu2Dataset(Dataset):
+
+    def __init__(self, csv_file: str, transform=None):
+        self.paths = pd.read_csv(csv_file, header=None,
+                                 names=['image', 'depth'])
+        self.transform = transform
+
+    def __getitem__(self, idx: int) -> dict:
+
+        image = Image.open(self.paths['image'][idx])
+        depth = Image.open(self.paths['depth'][idx])
+        sample = {'image': image, 'depth': depth}
+
+        if self.transform:
+            sample = self.transform(sample)
+
+        return sample
+
+    def __len__(self):
+        return len(self.paths)
+
+
+def get_train_loader(config: DictConfig) -> DataLoader:
+    train_transform = transforms.Compose(
+        [
+            Scale(config.DATA.SCALE_SIZE_MIN),
+            RandomHorizontalFlip(),
+            RandomRotate(config.DATA.RANDOM_ROT_DEGREE),
+            CenterCrop(
+                config.DATA.CENTER_CROP_SIZE,
+                config.DATA.OUTPUT_SIZE),
+            ToTensor(),
+            Lighting(config.DATA.PCA_LIGHTING,
+                     torch.Tensor(config.DATA.PCA_EIGVAL),
+                     torch.Tensor(config.DATA.PCA_EIGVEC)),
+            ColorJitter(
+                brightness=config.DATA.RANDOM_BRIGHTNESS,
+                contrast=config.DATA.RANDOM_CONTRAST,
+                saturation=config.DATA.RANDOM_SATURATION,
+            ),
+            Normalize(config.DATA.NORMALIZE_MEAN,
+                      config.DATA.NORMALIZE_STD)
+
+        ]
+    )
+    train_dataset = Nyu2Dataset(
+        csv_file=config.DATASET.TRAIN_CSV,
+        transform=train_transform)
+    train_loader = DataLoader(
+        train_dataset,
+        config.SOLVER.BATCHSIZE,
+        shuffle=True,
+        num_workers=config.SOLVER.NUM_WORKERS,
+        pin_memory=False)
+
+    return train_loader
+
+
+def get_test_loader(config: DictConfig) -> DataLoader:
+    test_transform = transforms.Compose(
+        [
+            Scale(config.DATA.SCALE_SIZE_MIN),
+            CenterCrop(
+                config.DATA.CENTER_CROP_SIZE,
+                config.DATA.CENTER_CROP_SIZE),
+            ToTensor(is_test=True),
+            Normalize(config.DATA.NORMALIZE_MEAN,
+                      config.DATA.NORMALIZE_STD)
+        ]
+    )
+    test_dataset = Nyu2Dataset(
+        csv_file=config.DATASET.TEST_CSV,
+        transform=test_transform)
+    test_loader = DataLoader(
+        test_dataset,
+        config.TEST.BATCHSIZE,
+        shuffle=False,
+        num_workers=config.SOLVER.NUM_WORKERS,
+        pin_memory=False)
+
+    return test_loader
+
+
+def build_data_loader(config: DictConfig) -> Tuple[DataLoader, DataLoader]:
+    train_loader = get_train_loader(config)
+    test_loader = get_test_loader(config)
+    return train_loader, test_loader
diff --git a/src/data/transforms.py b/src/data/transforms.py
new file mode 100644
index 0000000..179cd5f
--- /dev/null
+++ b/src/data/transforms.py
@@ -0,0 +1,354 @@
+# copied from https://github.com/JunjH/Revisiting_Single_Depth_Estimation/blob/master/nyu_transform.py
+
+import torch
+import numpy as np
+from PIL import Image, ImageOps
+import collections
+
+try:
+    import accimage
+except ImportError:
+    accimage = None
+import random
+import scipy.ndimage as ndimage
+
+import pdb
+
+
+def _is_pil_image(img):
+    if accimage is not None:
+        return isinstance(img, (Image.Image, accimage.Image))
+    else:
+        return isinstance(img, Image.Image)
+
+
+def _is_numpy_image(img):
+    return isinstance(img, np.ndarray) and (img.ndim in {2, 3})
+
+
+class RandomRotate(object):
+    """Random rotation of the image from -angle to angle (in degrees)
+    This is useful for dataAugmentation, especially for geometric problems such as FlowEstimation
+    angle: max angle of the rotation
+    interpolation order: Default: 2 (bilinear)
+    reshape: Default: false. If set to true, image size will be set to keep every pixel in the image.
+    diff_angle: Default: 0. Must stay less than 10 degrees, or linear approximation of flowmap will be off.
+    """
+
+    def __init__(self, angle, diff_angle=0, order=2, reshape=False):
+        self.angle = angle
+        self.reshape = reshape
+        self.order = order
+
+    def __call__(self, sample):
+        image, depth = sample['image'], sample['depth']
+
+        applied_angle = random.uniform(-self.angle, self.angle)
+        angle1 = applied_angle
+        angle1_rad = angle1 * np.pi / 180
+
+        image = ndimage.interpolation.rotate(
+            image, angle1, reshape=self.reshape, order=self.order)
+        depth = ndimage.interpolation.rotate(
+            depth, angle1, reshape=self.reshape, order=self.order)
+
+        image = Image.fromarray(image)
+        depth = Image.fromarray(depth)
+
+        return {'image': image, 'depth': depth}
+
+
+class RandomHorizontalFlip(object):
+
+    def __call__(self, sample):
+        image, depth = sample['image'], sample['depth']
+
+        if not _is_pil_image(image):
+            raise TypeError(
+                'img should be PIL Image. Got {}'.format(type(image)))
+        if not _is_pil_image(depth):
+            raise TypeError(
+                'img should be PIL Image. Got {}'.format(type(depth)))
+
+        if random.random() < 0.5:
+            image = image.transpose(Image.FLIP_LEFT_RIGHT)
+            depth = depth.transpose(Image.FLIP_LEFT_RIGHT)
+
+        return {'image': image, 'depth': depth}
+
+
+class Scale(object):
+    """ Rescales the inputs and target arrays to the given 'size'.
+    'size' will be the size of the smaller edge.
+    For example, if height > width, then image will be
+    rescaled to (size * height / width, size)
+    size: size of the smaller edge
+    interpolation order: Default: 2 (bilinear)
+    """
+
+    def __init__(self, size):
+        self.size = size
+
+    def __call__(self, sample):
+        image, depth = sample['image'], sample['depth']
+
+        image = self.changeScale(image, self.size)
+        depth = self.changeScale(depth, self.size, Image.NEAREST)
+
+        return {'image': image, 'depth': depth}
+
+    def changeScale(self, img, size, interpolation=Image.BILINEAR):
+
+        if not _is_pil_image(img):
+            raise TypeError(
+                'img should be PIL Image. Got {}'.format(type(img)))
+        if not (isinstance(size, int) or (isinstance(size, collections.Iterable) and len(size) == 2)):
+            raise TypeError('Got inappropriate size arg: {}'.format(size))
+
+        if isinstance(size, int):
+            w, h = img.size
+            if (w <= h and w == size) or (h <= w and h == size):
+                return img
+            if w < h:
+                ow = size
+                oh = int(size * h / w)
+                return img.resize((ow, oh), interpolation)
+            else:
+                oh = size
+                ow = int(size * w / h)
+                return img.resize((ow, oh), interpolation)
+        else:
+            return img.resize(size[::-1], interpolation)
+
+
+class CenterCrop(object):
+    def __init__(self, size_image, size_depth):
+        self.size_image = size_image
+        self.size_depth = size_depth
+
+    def __call__(self, sample):
+        image, depth = sample['image'], sample['depth']
+
+        image = self.centerCrop(image, self.size_image)
+        depth = self.centerCrop(depth, self.size_image)
+
+        ow, oh = self.size_depth
+        depth = depth.resize((ow, oh))
+
+        return {'image': image, 'depth': depth}
+
+    def centerCrop(self, image, size):
+        w1, h1 = image.size
+
+        tw, th = size
+
+        if w1 == tw and h1 == th:
+            return image
+
+        x1 = int(round((w1 - tw) / 2.))
+        y1 = int(round((h1 - th) / 2.))
+
+        image = image.crop((x1, y1, tw + x1, th + y1))
+
+        return image
+
+
+class ToTensor(object):
+    """Convert a ``PIL.Image`` or ``numpy.ndarray`` to tensor.
+    Converts a PIL.Image or numpy.ndarray (H x W x C) in the range
+    [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0].
+    """
+
+    def __init__(self, is_test=False):
+        self.is_test = is_test
+
+    def __call__(self, sample):
+        image, depth = sample['image'], sample['depth']
+        """
+        Args:
+            pic (PIL.Image or numpy.ndarray): Image to be converted to tensor.
+        Returns:
+            Tensor: Converted image.
+        """
+        # ground truth depth of training samples is stored in 8-bit while test samples are saved in 16 bit
+        image = self.to_tensor(image)
+        if self.is_test:
+            depth = self.to_tensor(depth).float() / 1000
+        else:
+            depth = self.to_tensor(depth).float() * 10
+        return {'image': image, 'depth': depth}
+
+    def to_tensor(self, pic):
+        if not (_is_pil_image(pic) or _is_numpy_image(pic)):
+            raise TypeError(
+                'pic should be PIL Image or ndarray. Got {}'.format(type(pic)))
+
+        if isinstance(pic, np.ndarray):
+            img = torch.from_numpy(pic.transpose((2, 0, 1)))
+
+            return img.float().div(255)
+
+        if accimage is not None and isinstance(pic, accimage.Image):
+            nppic = np.zeros(
+                [pic.channels, pic.height, pic.width], dtype=np.float32)
+            pic.copyto(nppic)
+            return torch.from_numpy(nppic)
+
+        # handle PIL Image
+        if pic.mode == 'I':
+            img = torch.from_numpy(np.array(pic, np.int32, copy=False))
+        elif pic.mode == 'I;16':
+            img = torch.from_numpy(np.array(pic, np.int16, copy=False))
+        else:
+            img = torch.ByteTensor(
+                torch.ByteStorage.from_buffer(pic.tobytes()))
+        # PIL image mode: 1, L, P, I, F, RGB, YCbCr, RGBA, CMYK
+        if pic.mode == 'YCbCr':
+            nchannel = 3
+        elif pic.mode == 'I;16':
+            nchannel = 1
+        else:
+            nchannel = len(pic.mode)
+        img = img.view(pic.size[1], pic.size[0], nchannel)
+        # put it from HWC to CHW format
+        # yikes, this transpose takes 80% of the loading time/CPU
+        img = img.transpose(0, 1).transpose(0, 2).contiguous()
+        if isinstance(img, torch.ByteTensor):
+            return img.float().div(255)
+        else:
+            return img
+
+
+class Lighting(object):
+
+    def __init__(self, alphastd, eigval, eigvec):
+        self.alphastd = alphastd
+        self.eigval = eigval
+        self.eigvec = eigvec
+
+    def __call__(self, sample):
+        image, depth = sample['image'], sample['depth']
+        if self.alphastd == 0:
+            return image
+
+        alpha = image.new().resize_(3).normal_(0, self.alphastd)
+        rgb = self.eigvec.type_as(image).clone() \
+            .mul(alpha.view(1, 3).expand(3, 3)) \
+            .mul(self.eigval.view(1, 3).expand(3, 3)) \
+            .sum(1).squeeze()
+
+        image = image.add(rgb.view(3, 1, 1).expand_as(image))
+
+        return {'image': image, 'depth': depth}
+
+
+class Grayscale(object):
+
+    def __call__(self, img):
+        gs = img.clone()
+        gs[0].mul_(0.299).add_(gs[1], alpha=0.587).add_(gs[2], alpha=0.114)
+        gs[1].copy_(gs[0])
+        gs[2].copy_(gs[0])
+        return gs
+
+
+class Saturation(object):
+
+    def __init__(self, var):
+        self.var = var
+
+    def __call__(self, img):
+        gs = Grayscale()(img)
+        alpha = random.uniform(-self.var, self.var)
+        return img.lerp(gs, alpha)
+
+
+class Brightness(object):
+
+    def __init__(self, var):
+        self.var = var
+
+    def __call__(self, img):
+        gs = img.new().resize_as_(img).zero_()
+        alpha = random.uniform(-self.var, self.var)
+
+        return img.lerp(gs, alpha)
+
+
+class Contrast(object):
+
+    def __init__(self, var):
+        self.var = var
+
+    def __call__(self, img):
+        gs = Grayscale()(img)
+        gs.fill_(gs.mean())
+        alpha = random.uniform(-self.var, self.var)
+        return img.lerp(gs, alpha)
+
+
+class RandomOrder(object):
+    """ Composes several transforms together in random order.
+    """
+
+    def __init__(self, transforms):
+        self.transforms = transforms
+
+    def __call__(self, sample):
+        image, depth = sample['image'], sample['depth']
+
+        if self.transforms is None:
+            return {'image': image, 'depth': depth}
+        order = torch.randperm(len(self.transforms))
+        for i in order:
+            image = self.transforms[i](image)
+
+        return {'image': image, 'depth': depth}
+
+
+class ColorJitter(RandomOrder):
+
+    def __init__(self, brightness=0.4, contrast=0.4, saturation=0.4):
+        self.transforms = []
+        if brightness != 0:
+            self.transforms.append(Brightness(brightness))
+        if contrast != 0:
+            self.transforms.append(Contrast(contrast))
+        if saturation != 0:
+            self.transforms.append(Saturation(saturation))
+
+
+class Normalize(object):
+    def __init__(self, mean, std):
+        self.mean = mean
+        self.std = std
+
+    def __call__(self, sample):
+        """
+        Args:
+            tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
+        Returns:
+            Tensor: Normalized image.
+        """
+        image, depth = sample['image'], sample['depth']
+
+        image = self.normalize(image, self.mean, self.std)
+
+        return {'image': image, 'depth': depth}
+
+    def normalize(self, tensor, mean, std):
+        """Normalize a tensor image with mean and standard deviation.
+        See ``Normalize`` for more details.
+        Args:
+            tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
+            mean (sequence): Sequence of means for R, G, B channels respecitvely.
+            std (sequence): Sequence of standard deviations for R, G, B channels
+                respecitvely.
+        Returns:
+            Tensor: Normalized image.
+        """
+
+        # TODO: make efficient
+        for t, m, s in zip(tensor, mean, std):
+            t.sub_(m).div_(s)
+        return tensor
diff --git a/src/engine.py b/src/engine.py
new file mode 100644
index 0000000..5d7a32d
--- /dev/null
+++ b/src/engine.py
@@ -0,0 +1,205 @@
+import os
+import time
+from typing import Optional, Dict
+
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.parallel
+from omegaconf import DictConfig
+from tensorboardX import SummaryWriter
+from torch.utils.data import DataLoader
+from torch.optim import Optimizer
+from tqdm import tqdm
+
+from src.sobel import Sobel
+from src.metrics import evaluate_depth_metrics, evaluate_edge_metrics
+
+
+class AverageMeter(object):
+    def __init__(self):
+        self.value = 0
+        self.avg = 0
+        self.sum = 0
+        self.count = 0
+
+    def update(self, value, n=1):
+        self.value = value
+        self.sum += value * n
+        self.count += n
+        self.avg = self.sum / self.count
+
+
+def train(model: nn.Module,
+          data_loader: DataLoader,
+          optimizer: Optimizer,
+          loss_config: DictConfig,
+          epoch: int,
+          device: str = 'cuda',
+          tblogger: Optional[SummaryWriter] = None):
+    """ref. https://github.com/JunjH/Revisiting_Single_Depth_Estimation/blob/master/train.py"""
+
+    model.train()
+
+    # func for loss
+    cos = nn.CosineSimilarity(dim=1, eps=0)
+    get_gradient = Sobel().to(device)
+
+    # init
+    batch_time = AverageMeter()
+    losses = AverageMeter()
+    losses_depth = AverageMeter()
+    losses_normal = AverageMeter()
+    losses_grad = AverageMeter()
+    end = time.time()
+    for i, batch in enumerate(data_loader):
+
+        # prepare
+        image, depth = batch['image'], batch['depth']
+        image = image.to(device)
+        depth = depth.to(device)
+        optimizer.zero_grad()
+
+        # forward
+        output = model(image)
+
+        # loss: depth
+        loss_depth = torch.log(torch.abs(output - depth) + loss_config.ALPHA).mean()
+
+        # loss: grad
+        depth_grad = get_gradient(depth)
+        output_grad = get_gradient(output)
+        depth_grad_dx = depth_grad[:, 0, :, :].contiguous().view_as(depth)
+        depth_grad_dy = depth_grad[:, 1, :, :].contiguous().view_as(depth)
+        output_grad_dx = output_grad[:, 0, :, :].contiguous().view_as(depth)
+        output_grad_dy = output_grad[:, 1, :, :].contiguous().view_as(depth)
+
+        loss_dx = torch.log(torch.abs(output_grad_dx - depth_grad_dx) + loss_config.ALPHA).mean()
+        loss_dy = torch.log(torch.abs(output_grad_dy - depth_grad_dy) + loss_config.ALPHA).mean()
+
+        # loss: normal
+        ones = torch.ones(depth.size(0), 1, depth.size(2), depth.size(3), requires_grad=True).to(device)
+        depth_normal = torch.cat((-depth_grad_dx, -depth_grad_dy, ones), 1)
+        output_normal = torch.cat((-output_grad_dx, -output_grad_dy, ones), 1)
+
+        loss_normal = torch.abs(1 - cos(output_normal, depth_normal)).mean()
+
+        # loss
+        loss = loss_depth \
+            + loss_config.LAMBDA * (loss_dx + loss_dy) \
+            + loss_config.MU * loss_normal
+
+        # update
+        bs = image.size(0)
+        losses.update(loss.item(), bs)
+        losses_depth.update(loss_depth.item(), bs)
+        losses_normal.update(loss_normal.item(), bs)
+        losses_grad.update((loss_dx + loss_dy).item(), bs)
+
+        # step
+        loss.backward()
+        optimizer.step()
+
+        # time
+        batch_time.update(time.time() - end)
+        end = time.time()
+
+        # log
+        print(f'epoch {epoch}[{i}/{len(data_loader)}], '
+              f'time {batch_time.value:.3f} ({batch_time.sum:.3f}), '
+              f'loss {losses.value:.4f} ({losses.avg:.4f}), '
+              f'l_d {losses_depth.value:.4f} ({losses_depth.avg:.4f}), '
+              f'l_g {losses_grad.value:.4f} ({losses_grad.avg:.4f}), '
+              f'l_n {losses_normal.value:.4f} ({losses_normal.avg:.4f}), ')
+
+    if tblogger is not None:
+        tblogger.add_scalar('train/loss', losses.avg, epoch + 1)
+        tblogger.add_scalar('train/l_d', losses_depth.avg, epoch + 1)
+        tblogger.add_scalar('train/l_g', losses_grad.avg, epoch + 1)
+        tblogger.add_scalar('train/l_n', losses_normal.avg, epoch + 1)
+
+
+def test(model: nn.Module,
+         data_loader: DataLoader,
+         threshold_edge: float = 0.25,
+         device: str = 'cuda',
+         epoch: Optional[int] = None,
+         tblogger: Optional[SummaryWriter] = None,
+         show_dir: Optional[str] = None):
+    model.eval()
+    get_gradient = Sobel().to(device)
+    if show_dir is not None:
+        os.makedirs(show_dir, exist_ok=False)
+
+    metrics: Dict[str, AverageMeter] = {
+        'MSE': AverageMeter(),
+        'MAE': AverageMeter(),
+        'ABS_REL': AverageMeter(),
+        'LG10': AverageMeter(),
+        'DELTA1': AverageMeter(),
+        'DELTA2': AverageMeter(),
+        'DELTA3': AverageMeter(),
+        'EDGE_ACCURACY': AverageMeter(),
+        'EDGE_PRECISION': AverageMeter(),
+        'EDGE_RECALL': AverageMeter(),
+        'EDGE_F1SCORE': AverageMeter(),
+    }
+    with torch.no_grad():
+        for i, batch in enumerate(tqdm(data_loader)):
+
+            # prepare
+            image, depth = batch['image'], batch['depth']
+            image = image.to(device)
+            depth = depth.to(device)
+
+            # forward
+            output = model(image)
+            output = torch.nn.functional.interpolate(output, size=[depth.size(2), depth.size(3)],
+                                                     mode='bilinear', align_corners=True)
+
+            # show output
+            if show_dir is not None:
+                for j, out_i in enumerate(output):
+                    filename = f'vis_{i * data_loader.batch_size + j:05}.jpg'
+                    plt.imshow(out_i.view(out_i.size(1), out_i.size(2)).data.cpu().numpy())
+                    plt.axis('off')
+                    plt.savefig(os.path.join(show_dir, filename),
+                                bbox_inches='tight', pad_inches=0)
+                    plt.close()
+
+            # calc metrics
+            d_metrics = evaluate_depth_metrics(output, depth)
+
+            # forward for edge
+            depth_grad_xy = get_gradient(depth)
+            output_grad_xy = get_gradient(output)
+
+            # calc edge metrics
+            e_metrics = evaluate_edge_metrics(output_grad_xy, depth_grad_xy,
+                                              threshold=threshold_edge)
+
+            # update
+            bs = image.size(0)
+            metrics['MSE'].update(d_metrics.mse, bs)
+            metrics['MAE'].update(d_metrics.mae, bs)
+            metrics['ABS_REL'].update(d_metrics.abs_rel, bs)
+            metrics['LG10'].update(d_metrics.lg10, bs)
+            metrics['DELTA1'].update(d_metrics.delta1, bs)
+            metrics['DELTA2'].update(d_metrics.delta2, bs)
+            metrics['DELTA3'].update(d_metrics.delta3, bs)
+            metrics['EDGE_ACCURACY'].update(e_metrics.accuracy, bs)
+            metrics['EDGE_PRECISION'].update(e_metrics.precision, bs)
+            metrics['EDGE_RECALL'].update(e_metrics.recall, bs)
+            metrics['EDGE_F1SCORE'].update(e_metrics.f1_score, bs)
+
+    rmse = np.sqrt(metrics['MSE'].avg)
+
+    for k, v in metrics.items():
+        print(k, v.avg, sep='\t')
+    print('RMSE', rmse, sep='\t')
+
+    if tblogger is not None:
+        for k, v in metrics.items():
+            tblogger.add_scalar(f'val/{k}_avg', v.avg, epoch + 1)
+        tblogger.add_scalar('val/RMSE_avg', rmse, epoch + 1)
diff --git a/src/metrics.py b/src/metrics.py
new file mode 100644
index 0000000..0b6fb21
--- /dev/null
+++ b/src/metrics.py
@@ -0,0 +1,148 @@
+# ref. https://github.com/JunjH/Revisiting_Single_Depth_Estimation/blob/master/util.py
+
+import dataclasses
+import math
+from typing import Tuple
+
+import torch
+import numpy as np
+from torch import Tensor
+
+
+@dataclasses.dataclass
+class DepthMetrics(object):
+    mse: float = 0.
+    mae: float = 0.
+    abs_rel: float = 0.
+    lg10: float = 0.
+    delta1: float = 0.
+    delta2: float = 0.
+    delta3: float = 0.
+
+
+@dataclasses.dataclass
+class EdgeMetrics(object):
+    accuracy: float = 0.
+    precision: float = 0.
+    recall: float = 0.
+    f1_score: float = 0.
+
+
+def evaluate_edge_metrics(output_grad_xy: Tensor, depth_grad_xy: Tensor,
+                          threshold: float = 0.25) -> EdgeMetrics:
+
+    # calc edge valid
+    depth_edge = torch.sqrt(
+        torch.pow(depth_grad_xy[:, 0, :, :], 2) + torch.pow(depth_grad_xy[:, 1, :, :], 2))
+    depth_edge_valid: Tensor = (depth_edge > threshold)
+
+    output_edge = torch.sqrt(
+        torch.pow(output_grad_xy[:, 0, :, :], 2) + torch.pow(output_grad_xy[:, 1, :, :], 2))
+    output_edge_valid: Tensor = (output_edge > threshold)
+
+    # count true pixels
+    n_equal = np.sum(torch.eq(depth_edge_valid, output_edge_valid).float().data.cpu().numpy())
+    n_equal_pos = np.sum((depth_edge_valid * output_edge_valid).float().data.cpu().numpy())
+
+    # calc metrics
+    n_total = depth_grad_xy.size(2) * depth_grad_xy.size(3)
+    accuracy = n_equal / n_total
+    n_out_pos = (np.sum(output_edge_valid.data.cpu().numpy()))
+    precision = n_equal_pos / n_out_pos if n_out_pos else 0
+    recall = n_equal_pos / (np.sum(depth_edge_valid.data.cpu().numpy()))
+    f1_score = (2 * precision * recall) / (precision + recall) if precision + recall else 0
+
+    metrics = EdgeMetrics(
+        accuracy=accuracy,
+        precision=precision,
+        recall=recall,
+        f1_score=f1_score
+    )
+    return metrics
+
+
+def evaluate_depth_metrics(output: Tensor, target: Tensor) -> DepthMetrics:
+
+    _output, _target, nan_mask, n_valid_element = set_nan_to_zero(output, target)
+
+    if n_valid_element.data.cpu().numpy():
+
+        # calc diff
+        diff_matrix = torch.abs(_output - _target)
+
+        # mse, mae
+        mse = torch.sum(torch.pow(diff_matrix, 2)) / n_valid_element
+        mae = torch.sum(diff_matrix) / n_valid_element
+
+        # abs rel
+        real_matrix = torch.div(diff_matrix, _target)
+        real_matrix[nan_mask] = 0
+        abs_rel = torch.sum(real_matrix) / n_valid_element
+
+        # lg10
+        lg10_matrix = torch.abs(calc_lg10(_output) - calc_lg10(_target))
+        lg10_matrix[nan_mask] = 0
+        lg10 = torch.sum(lg10_matrix) / n_valid_element
+
+        # delta
+        y_over_z = torch.div(_output, _target)
+        z_over_y = torch.div(_target, _output)
+        max_ratio = max_of_two(y_over_z, z_over_y)
+        delta1 = torch.sum(
+            torch.le(max_ratio, 1.25).float()) / n_valid_element
+        delta2 = torch.sum(
+            torch.le(max_ratio, math.pow(1.25, 2)).float()) / n_valid_element
+        delta3 = torch.sum(
+            torch.le(max_ratio, math.pow(1.25, 3)).float()) / n_valid_element
+
+        metrics = DepthMetrics(
+            mse=float(mse.data.cpu().numpy()),
+            mae=float(mae.data.cpu().numpy()),
+            abs_rel=float(abs_rel.data.cpu().numpy()),
+            lg10=float(lg10.data.cpu().numpy()),
+            delta1=float(delta1.data.cpu().numpy()),
+            delta2=float(delta2.data.cpu().numpy()),
+            delta3=float(delta3.data.cpu().numpy())
+        )
+
+    else:
+        metrics = DepthMetrics()
+
+    return metrics
+
+
+def calc_lg10(x: Tensor) -> Tensor:
+    return torch.div(torch.log(x), math.log(10))
+
+
+def max_of_two(x: Tensor, y: Tensor) -> Tensor:
+    z = x.clone()
+    mask_y_larger = torch.lt(x, y)
+    z[mask_y_larger.detach()] = y[mask_y_larger.detach()]
+    return z
+
+
+def get_n_valid(x: Tensor) -> Tensor:
+    return torch.sum(torch.eq(x, x).float())
+
+
+def get_n_nan_element(x: Tensor) -> Tensor:
+    return torch.sum(torch.ne(x, x).float())
+
+
+def get_nan_mask(x: Tensor) -> Tensor:
+    return torch.ne(x, x)
+
+
+def set_nan_to_zero(input: Tensor, target: Tensor
+                    ) -> Tuple[Tensor, Tensor, Tensor, Tensor]:
+    nan_mask = get_nan_mask(target)
+    n_valid_element = get_n_valid(target)
+
+    _input = input.clone()
+    _target = target.clone()
+
+    _input[nan_mask] = 0
+    _target[nan_mask] = 0
+
+    return _input, _target, nan_mask, n_valid_element
diff --git a/src/models/__init__.py b/src/models/__init__.py
new file mode 100644
index 0000000..a83272a
--- /dev/null
+++ b/src/models/__init__.py
@@ -0,0 +1 @@
+from .models import build_model
diff --git a/src/models/densenet.py b/src/models/densenet.py
new file mode 100644
index 0000000..26c7893
--- /dev/null
+++ b/src/models/densenet.py
@@ -0,0 +1,160 @@
+# copied from https://github.com/JunjH/Revisiting_Single_Depth_Estimation/blob/master/models/densenet.py
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.model_zoo as model_zoo
+from collections import OrderedDict
+import pdb
+import copy
+from torchvision import utils
+import numpy as np
+
+__all__ = ['DenseNet', 'densenet121',
+           'densenet169', 'densenet201', 'densenet161']
+
+
+model_urls = {
+    'densenet121': 'https://download.pytorch.org/models/densenet121-a639ec97.pth',
+    'densenet169': 'https://download.pytorch.org/models/densenet169-b2777c0a.pth',
+    'densenet201': 'https://download.pytorch.org/models/densenet201-c1103571.pth',
+    'densenet161': 'https://download.pytorch.org/models/densenet161-8d451a50.pth',
+}
+
+
+
+def densenet161(pretrained=False, **kwargs):
+    r"""Densenet-161 model from
+    `"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = DenseNet(num_init_features=96, growth_rate=48, block_config=(6, 12, 36, 24),
+                     **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['densenet161'], 'pretrained_model/encoder'))
+    return model
+
+
+class _DenseLayer(nn.Sequential):
+
+    def __init__(self, num_input_features, growth_rate, bn_size, drop_rate):
+        super(_DenseLayer, self).__init__()
+        self.add_module('norm.1', nn.BatchNorm2d(num_input_features)),
+        self.add_module('relu.1', nn.ReLU(inplace=True)),
+        self.add_module('conv.1', nn.Conv2d(num_input_features, bn_size *
+                                            growth_rate, kernel_size=1, stride=1, bias=False)),
+        self.add_module('norm.2', nn.BatchNorm2d(bn_size * growth_rate)),
+        self.add_module('relu.2', nn.ReLU(inplace=True)),
+        self.add_module('conv.2', nn.Conv2d(bn_size * growth_rate, growth_rate,
+                                            kernel_size=3, stride=1, padding=1, bias=False)),
+        self.drop_rate = drop_rate
+
+    def forward(self, x):
+        new_features = super(_DenseLayer, self).forward(x)
+        if self.drop_rate > 0:
+            new_features = F.dropout(
+                new_features, p=self.drop_rate, training=self.training)
+        return torch.cat([x, new_features], 1)
+
+
+class _DenseBlock(nn.Sequential):
+
+    def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate):
+        super(_DenseBlock, self).__init__()
+        for i in range(num_layers):
+            layer = _DenseLayer(num_input_features + i *
+                                growth_rate, growth_rate, bn_size, drop_rate)
+            self.add_module('denselayer%d' % (i + 1), layer)
+
+
+class _Transition(nn.Sequential):
+
+    def __init__(self, num_input_features, num_output_features):
+        super(_Transition, self).__init__()
+        self.add_module('norm', nn.BatchNorm2d(num_input_features))
+        self.add_module('relu', nn.ReLU(inplace=True))
+        self.add_module('conv', nn.Conv2d(num_input_features, num_output_features,
+                                          kernel_size=1, stride=1, bias=False))
+        self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2))
+
+
+
+class _DenseLayer(nn.Sequential):
+
+    def __init__(self, num_input_features, growth_rate, bn_size, drop_rate):
+        super(_DenseLayer, self).__init__()
+        self.add_module('norm.1', nn.BatchNorm2d(num_input_features)),
+        self.add_module('relu.1', nn.ReLU(inplace=True)),
+        self.add_module('conv.1', nn.Conv2d(num_input_features, bn_size *
+                                            growth_rate, kernel_size=1, stride=1, bias=False)),
+        self.add_module('norm.2', nn.BatchNorm2d(bn_size * growth_rate)),
+        self.add_module('relu.2', nn.ReLU(inplace=True)),
+        self.add_module('conv.2', nn.Conv2d(bn_size * growth_rate, growth_rate,
+                                            kernel_size=3, stride=1, padding=1, bias=False)),
+        self.drop_rate = drop_rate
+
+    def forward(self, x):
+        new_features = super(_DenseLayer, self).forward(x)
+        if self.drop_rate > 0:
+            new_features = F.dropout(
+                new_features, p=self.drop_rate, training=self.training)
+        return torch.cat([x, new_features], 1)
+
+
+class _DenseBlock(nn.Sequential):
+
+    def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate):
+        super(_DenseBlock, self).__init__()
+        for i in range(num_layers):
+            layer = _DenseLayer(num_input_features + i *
+                                growth_rate, growth_rate, bn_size, drop_rate)
+            self.add_module('denselayer%d' % (i + 1), layer)
+
+
+class DenseNet(nn.Module):
+
+    def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16),
+                 num_init_features=64, bn_size=4, drop_rate=0, num_classes=1000):
+
+        super(DenseNet, self).__init__()
+
+        # First convolution
+        self.features = nn.Sequential(OrderedDict([
+            ('conv0', nn.Conv2d(3, num_init_features,
+                                kernel_size=7, stride=2, padding=3, bias=False)),
+            ('norm0', nn.BatchNorm2d(num_init_features)),
+            ('relu0', nn.ReLU(inplace=True)),
+            ('pool0', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),
+        ]))
+
+        # Each denseblock
+        num_features = num_init_features
+        for i, num_layers in enumerate(block_config):
+            block = _DenseBlock(num_layers=num_layers, num_input_features=num_features,
+                                bn_size=bn_size, growth_rate=growth_rate, drop_rate=drop_rate)
+            self.features.add_module('denseblock%d' % (i + 1), block)
+            num_features = num_features + num_layers * growth_rate
+            if i != len(block_config) - 1:
+                trans = _Transition(
+                    num_input_features=num_features, num_output_features=num_features // 2)
+                self.features.add_module('transition%d' % (i + 1), trans)
+                num_features = num_features // 2
+            # print(str(i), num_features)
+
+        # Final batch norm
+        self.features.add_module('norm5', nn.BatchNorm2d(num_features))
+        self.num_features = num_features
+
+        # Linear layer
+        self.classifier = nn.Linear(num_features, num_classes)
+      
+
+    def forward(self, x):
+        features = self.features(x)
+        out = F.relu(features, inplace=True)
+        out = F.avg_pool2d(out, kernel_size=7, stride=1).view(
+            features.size(0), -1)
+        out = self.classifier(out)
+        return out, self.num_features
+
diff --git a/src/models/models.py b/src/models/models.py
new file mode 100644
index 0000000..c355f2c
--- /dev/null
+++ b/src/models/models.py
@@ -0,0 +1,33 @@
+from typing import Optional
+
+import torch.nn as nn
+from omegaconf import DictConfig
+
+from . import modules, net, resnet, densenet, senet
+
+
+def build_model(config: DictConfig, model_state_dict: Optional[dict] = None) -> nn.Module:
+    model_type = config.MODEL.NAME
+
+    if model_type == 'resnet':
+        original_model = resnet.resnet50(pretrained=True)
+        encoder = modules.E_resnet(original_model)
+        model = net.model(encoder, num_features=2048, block_channel=[256, 512, 1024, 2048])
+    elif model_type == 'densenet':
+        original_model = densenet.densenet161(pretrained=True)
+        encoder = modules.E_densenet(original_model)
+        model = net.model(encoder, num_features=2208, block_channel=[192, 384, 1056, 2208])
+    elif model_type == 'senet':
+        original_model = senet.senet154(pretrained='imagenet')
+        encoder = modules.E_senet(original_model)
+        model = net.model(encoder, num_features=2048, block_channel=[256, 512, 1024, 2048])
+    else:
+        raise NotImplementedError
+
+    model.to(config.DEVICE)
+
+    # load
+    if model_state_dict is not None:
+        model.load_state_dict(model_state_dict)
+
+    return model
diff --git a/src/models/modules.py b/src/models/modules.py
new file mode 100644
index 0000000..c0cc905
--- /dev/null
+++ b/src/models/modules.py
@@ -0,0 +1,211 @@
+# copied from https://github.com/JunjH/Revisiting_Single_Depth_Estimation/blob/master/models/modules.py
+
+import torch
+import torch.nn.functional as F
+import torch.nn as nn
+
+
+class _UpProjection(nn.Sequential):
+
+    def __init__(self, num_input_features, num_output_features):
+        super(_UpProjection, self).__init__()
+
+        self.conv1 = nn.Conv2d(num_input_features, num_output_features,
+                               kernel_size=5, stride=1, padding=2, bias=False)
+        self.bn1 = nn.BatchNorm2d(num_output_features)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv1_2 = nn.Conv2d(num_output_features, num_output_features,
+                                 kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn1_2 = nn.BatchNorm2d(num_output_features)
+
+        self.conv2 = nn.Conv2d(num_input_features, num_output_features,
+                               kernel_size=5, stride=1, padding=2, bias=False)
+        self.bn2 = nn.BatchNorm2d(num_output_features)
+
+    def forward(self, x, size):
+        x = F.interpolate(x, size=size, mode='bilinear', align_corners=True)
+        x_conv1 = self.relu(self.bn1(self.conv1(x)))
+        bran1 = self.bn1_2(self.conv1_2(x_conv1))
+        bran2 = self.bn2(self.conv2(x))
+
+        out = self.relu(bran1 + bran2)
+
+        return out
+
+
+class E_resnet(nn.Module):
+
+    def __init__(self, original_model, num_features=2048):
+        super(E_resnet, self).__init__()
+        self.conv1 = original_model.conv1
+        self.bn1 = original_model.bn1
+        self.relu = original_model.relu
+        self.maxpool = original_model.maxpool
+
+        self.layer1 = original_model.layer1
+        self.layer2 = original_model.layer2
+        self.layer3 = original_model.layer3
+        self.layer4 = original_model.layer4
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+
+        x_block1 = self.layer1(x)
+        x_block2 = self.layer2(x_block1)
+        x_block3 = self.layer3(x_block2)
+        x_block4 = self.layer4(x_block3)
+
+        return x_block1, x_block2, x_block3, x_block4
+
+
+class E_densenet(nn.Module):
+
+    def __init__(self, original_model, num_features=2208):
+        super(E_densenet, self).__init__()
+        self.features = original_model.features
+
+    def forward(self, x):
+        x01 = self.features[0](x)
+        x02 = self.features[1](x01)
+        x03 = self.features[2](x02)
+        x04 = self.features[3](x03)
+
+        x_block1 = self.features[4](x04)
+        x_block1 = self.features[5][0](x_block1)
+        x_block1 = self.features[5][1](x_block1)
+        x_block1 = self.features[5][2](x_block1)
+        x_tran1 = self.features[5][3](x_block1)
+
+        x_block2 = self.features[6](x_tran1)
+        x_block2 = self.features[7][0](x_block2)
+        x_block2 = self.features[7][1](x_block2)
+        x_block2 = self.features[7][2](x_block2)
+        x_tran2 = self.features[7][3](x_block2)
+
+        x_block3 = self.features[8](x_tran2)
+        x_block3 = self.features[9][0](x_block3)
+        x_block3 = self.features[9][1](x_block3)
+        x_block3 = self.features[9][2](x_block3)
+        x_tran3 = self.features[9][3](x_block3)
+
+        x_block4 = self.features[10](x_tran3)
+        x_block4 = F.relu(self.features[11](x_block4))
+
+        return x_block1, x_block2, x_block3, x_block4
+
+
+class E_senet(nn.Module):
+
+    def __init__(self, original_model, num_features=2048):
+        super(E_senet, self).__init__()
+        self.base = nn.Sequential(*list(original_model.children())[:-3])
+
+    def forward(self, x):
+        x = self.base[0](x)
+        x_block1 = self.base[1](x)
+        x_block2 = self.base[2](x_block1)
+        x_block3 = self.base[3](x_block2)
+        x_block4 = self.base[4](x_block3)
+
+        return x_block1, x_block2, x_block3, x_block4
+
+
+class D(nn.Module):
+
+    def __init__(self, num_features=2048):
+        super(D, self).__init__()
+        self.conv = nn.Conv2d(num_features, num_features //
+                              2, kernel_size=1, stride=1, bias=False)
+        num_features = num_features // 2
+        self.bn = nn.BatchNorm2d(num_features)
+
+        self.up1 = _UpProjection(
+            num_input_features=num_features, num_output_features=num_features // 2)
+        num_features = num_features // 2
+
+        self.up2 = _UpProjection(
+            num_input_features=num_features, num_output_features=num_features // 2)
+        num_features = num_features // 2
+
+        self.up3 = _UpProjection(
+            num_input_features=num_features, num_output_features=num_features // 2)
+        num_features = num_features // 2
+
+        self.up4 = _UpProjection(
+            num_input_features=num_features, num_output_features=num_features // 2)
+        num_features = num_features // 2
+
+    def forward(self, x_block1, x_block2, x_block3, x_block4):
+        x_d0 = F.relu(self.bn(self.conv(x_block4)))
+        x_d1 = self.up1(x_d0, [x_block3.size(2), x_block3.size(3)])
+        x_d2 = self.up2(x_d1, [x_block2.size(2), x_block2.size(3)])
+        x_d3 = self.up3(x_d2, [x_block1.size(2), x_block1.size(3)])
+        x_d4 = self.up4(x_d3, [x_block1.size(2) * 2, x_block1.size(3) * 2])
+
+        return x_d4
+
+
+class MFF(nn.Module):
+
+    def __init__(self, block_channel, num_features=64):
+        super(MFF, self).__init__()
+
+        self.up1 = _UpProjection(
+            num_input_features=block_channel[0], num_output_features=16)
+
+        self.up2 = _UpProjection(
+            num_input_features=block_channel[1], num_output_features=16)
+
+        self.up3 = _UpProjection(
+            num_input_features=block_channel[2], num_output_features=16)
+
+        self.up4 = _UpProjection(
+            num_input_features=block_channel[3], num_output_features=16)
+
+        self.conv = nn.Conv2d(
+            num_features, num_features, kernel_size=5, stride=1, padding=2, bias=False)
+        self.bn = nn.BatchNorm2d(num_features)
+
+    def forward(self, x_block1, x_block2, x_block3, x_block4, size):
+        x_m1 = self.up1(x_block1, size)
+        x_m2 = self.up2(x_block2, size)
+        x_m3 = self.up3(x_block3, size)
+        x_m4 = self.up4(x_block4, size)
+
+        x = self.bn(self.conv(torch.cat((x_m1, x_m2, x_m3, x_m4), 1)))
+        x = F.relu(x)
+
+        return x
+
+
+class R(nn.Module):
+    def __init__(self, block_channel):
+        super(R, self).__init__()
+
+        num_features = 64 + block_channel[3] // 32
+        self.conv0 = nn.Conv2d(num_features, num_features,
+                               kernel_size=5, stride=1, padding=2, bias=False)
+        self.bn0 = nn.BatchNorm2d(num_features)
+
+        self.conv1 = nn.Conv2d(num_features, num_features,
+                               kernel_size=5, stride=1, padding=2, bias=False)
+        self.bn1 = nn.BatchNorm2d(num_features)
+
+        self.conv2 = nn.Conv2d(
+            num_features, 1, kernel_size=5, stride=1, padding=2, bias=True)
+
+    def forward(self, x):
+        x0 = self.conv0(x)
+        x0 = self.bn0(x0)
+        x0 = F.relu(x0)
+
+        x1 = self.conv1(x0)
+        x1 = self.bn1(x1)
+        x1 = F.relu(x1)
+
+        x2 = self.conv2(x1)
+
+        return x2
diff --git a/src/models/net.py b/src/models/net.py
new file mode 100644
index 0000000..0b2c714
--- /dev/null
+++ b/src/models/net.py
@@ -0,0 +1,24 @@
+# copied from https://github.com/JunjH/Revisiting_Single_Depth_Estimation/blob/master/models/net.py
+
+import torch
+import torch.nn as nn
+from . import modules
+
+
+class model(nn.Module):
+    def __init__(self, Encoder, num_features, block_channel):
+
+        super(model, self).__init__()
+
+        self.E = Encoder
+        self.D = modules.D(num_features)
+        self.MFF = modules.MFF(block_channel)
+        self.R = modules.R(block_channel)
+
+    def forward(self, x):
+        x_block1, x_block2, x_block3, x_block4 = self.E(x)
+        x_decoder = self.D(x_block1, x_block2, x_block3, x_block4)
+        x_mff = self.MFF(x_block1, x_block2, x_block3, x_block4,[x_decoder.size(2),x_decoder.size(3)])
+        out = self.R(torch.cat((x_decoder, x_mff), 1))
+
+        return out
diff --git a/src/models/resnet.py b/src/models/resnet.py
new file mode 100644
index 0000000..c6fff08
--- /dev/null
+++ b/src/models/resnet.py
@@ -0,0 +1,208 @@
+# copied from https://github.com/JunjH/Revisiting_Single_Depth_Estimation/blob/master/models/resnet.py
+
+import torch.nn as nn
+import math
+import torch.utils.model_zoo as model_zoo
+
+__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
+           'resnet152']
+
+
+model_urls = {
+    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
+    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
+    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
+    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
+    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
+}
+
+
+def conv3x3(in_planes, out_planes, stride=1):
+    "3x3 convolution with padding"
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=1, bias=False)
+
+
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+
+class Bottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
+                               padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * 4)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+
+class ResNet(nn.Module):
+
+    def __init__(self, block, layers, num_classes=1000):
+        self.inplanes = 64
+        super(ResNet, self).__init__()
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
+                               bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
+        self.avgpool = nn.AvgPool2d(7, stride=1)
+        self.fc = nn.Linear(512 * block.expansion, num_classes)
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes, planes * block.expansion,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(planes * block.expansion),
+            )
+
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+
+        x = self.avgpool(x)
+        x = x.view(x.size(0), -1)
+        x = self.fc(x)
+
+        return x
+
+def resnet18(pretrained=False, **kwargs):
+    """Constructs a ResNet-18 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
+    return model
+
+
+def resnet34(pretrained=False, **kwargs):
+    """Constructs a ResNet-34 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
+    return model
+
+
+def resnet50(pretrained=False, **kwargs):
+    """Constructs a ResNet-50 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['resnet50'], 'pretrained_model/encoder'))
+    return model
+
+
+def resnet101(pretrained=False, **kwargs):
+    """Constructs a ResNet-101 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
+    return model
+
+
+def resnet152(pretrained=False, **kwargs):
+    """Constructs a ResNet-152 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
+    return model
diff --git a/src/models/senet.py b/src/models/senet.py
new file mode 100644
index 0000000..e54cbf8
--- /dev/null
+++ b/src/models/senet.py
@@ -0,0 +1,452 @@
+# copied from https://github.com/JunjH/Revisiting_Single_Depth_Estimation/blob/master/models/senet.py
+
+"""
+ResNet code gently borrowed from
+https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py
+"""
+
+from collections import OrderedDict
+import math
+import torch
+import torch.nn.functional as F
+import torch.nn as nn
+from torch.utils import model_zoo
+import copy
+import numpy as np
+
+__all__ = ['SENet', 'senet154', 'se_resnet50', 'se_resnet101', 'se_resnet152',
+           'se_resnext50_32x4d', 'se_resnext101_32x4d']
+
+pretrained_settings = {
+    'senet154': {
+        'imagenet': {
+            'url': 'http://data.lip6.fr/cadene/pretrainedmodels/senet154-c7b49a05.pth',
+            'input_space': 'RGB',
+            'input_size': [3, 224, 224],
+            'input_range': [0, 1],
+            'mean': [0.485, 0.456, 0.406],
+            'std': [0.229, 0.224, 0.225],
+            'num_classes': 1000
+        }
+    },
+    'se_resnet50': {
+        'imagenet': {
+            'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet50-ce0d4300.pth',
+            'input_space': 'RGB',
+            'input_size': [3, 224, 224],
+            'input_range': [0, 1],
+            'mean': [0.485, 0.456, 0.406],
+            'std': [0.229, 0.224, 0.225],
+            'num_classes': 1000
+        }
+    },
+    'se_resnet101': {
+        'imagenet': {
+            'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet101-7e38fcc6.pth',
+            'input_space': 'RGB',
+            'input_size': [3, 224, 224],
+            'input_range': [0, 1],
+            'mean': [0.485, 0.456, 0.406],
+            'std': [0.229, 0.224, 0.225],
+            'num_classes': 1000
+        }
+    },
+    'se_resnet152': {
+        'imagenet': {
+            'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet152-d17c99b7.pth',
+            'input_space': 'RGB',
+            'input_size': [3, 224, 224],
+            'input_range': [0, 1],
+            'mean': [0.485, 0.456, 0.406],
+            'std': [0.229, 0.224, 0.225],
+            'num_classes': 1000
+        }
+    },
+    'se_resnext50_32x4d': {
+        'imagenet': {
+            'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnext50_32x4d-a260b3a4.pth',
+            'input_space': 'RGB',
+            'input_size': [3, 224, 224],
+            'input_range': [0, 1],
+            'mean': [0.485, 0.456, 0.406],
+            'std': [0.229, 0.224, 0.225],
+            'num_classes': 1000
+        }
+    },
+    'se_resnext101_32x4d': {
+        'imagenet': {
+            'url': 'http://data.lip6.fr/cadene/pretrainedmodels/se_resnext101_32x4d-3b2fe3d8.pth',
+            'input_space': 'RGB',
+            'input_size': [3, 224, 224],
+            'input_range': [0, 1],
+            'mean': [0.485, 0.456, 0.406],
+            'std': [0.229, 0.224, 0.225],
+            'num_classes': 1000
+        }
+    },
+}
+
+
+class SEModule(nn.Module):
+
+    def __init__(self, channels, reduction):
+        super(SEModule, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.fc1 = nn.Conv2d(channels, channels // reduction, kernel_size=1,
+                             padding=0)
+        self.relu = nn.ReLU(inplace=True)
+        self.fc2 = nn.Conv2d(channels // reduction, channels, kernel_size=1,
+                             padding=0)
+        self.sigmoid = nn.Sigmoid()
+
+    def forward(self, x):
+        module_input = x
+        x = self.avg_pool(x)
+        x = self.fc1(x)
+        x = self.relu(x)
+        x = self.fc2(x)
+        x = self.sigmoid(x)
+        return module_input * x
+
+
+class Bottleneck(nn.Module):
+    """
+    Base class for bottlenecks that implements `forward()` method.
+    """
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out = self.se_module(out) + residual
+        out = self.relu(out)
+
+        return out
+
+
+class SEBottleneck(Bottleneck):
+    """
+    Bottleneck for SENet154.
+    """
+    expansion = 4
+
+    def __init__(self, inplanes, planes, groups, reduction, stride=1,
+                 downsample=None):
+        super(SEBottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes * 2, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes * 2)
+        self.conv2 = nn.Conv2d(planes * 2, planes * 4, kernel_size=3,
+                               stride=stride, padding=1, groups=groups,
+                               bias=False)
+        self.bn2 = nn.BatchNorm2d(planes * 4)
+        self.conv3 = nn.Conv2d(planes * 4, planes * 4, kernel_size=1,
+                               bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * 4)
+        self.relu = nn.ReLU(inplace=True)
+        self.se_module = SEModule(planes * 4, reduction=reduction)
+        self.downsample = downsample
+        self.stride = stride
+
+
+class SEResNetBottleneck(Bottleneck):
+    """
+    ResNet bottleneck with a Squeeze-and-Excitation module. It follows Caffe
+    implementation and uses `stride=stride` in `conv1` and not in `conv2`
+    (the latter is used in the torchvision implementation of ResNet).
+    """
+    expansion = 4
+
+    def __init__(self, inplanes, planes, groups, reduction, stride=1,
+                 downsample=None):
+        super(SEResNetBottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False,
+                               stride=stride)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1,
+                               groups=groups, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * 4)
+        self.relu = nn.ReLU(inplace=True)
+        self.se_module = SEModule(planes * 4, reduction=reduction)
+        self.downsample = downsample
+        self.stride = stride
+
+
+class SEResNeXtBottleneck(Bottleneck):
+    """
+    ResNeXt bottleneck type C with a Squeeze-and-Excitation module.
+    """
+    expansion = 4
+
+    def __init__(self, inplanes, planes, groups, reduction, stride=1,
+                 downsample=None, base_width=4):
+        super(SEResNeXtBottleneck, self).__init__()
+        # width = math.floor(planes * (base_width / 64)) * groups
+        # pdb.set_trace()
+        width = int(planes * base_width / 64) * groups
+        self.conv1 = nn.Conv2d(inplanes, width, kernel_size=1, bias=False,
+                               stride=1)
+        self.bn1 = nn.BatchNorm2d(width)
+        self.conv2 = nn.Conv2d(width, width, kernel_size=3, stride=stride,
+                               padding=1, groups=groups, bias=False)
+        self.bn2 = nn.BatchNorm2d(width)
+        self.conv3 = nn.Conv2d(width, planes * 4, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * 4)
+        self.relu = nn.ReLU(inplace=True)
+        self.se_module = SEModule(planes * 4, reduction=reduction)
+        self.downsample = downsample
+        self.stride = stride
+
+
+class SENet(nn.Module):
+
+    def __init__(self, block, layers, groups, reduction, dropout_p=0.2,
+                 inplanes=128, input_3x3=True, downsample_kernel_size=3,
+                 downsample_padding=1, num_classes=1000):
+        """
+        Parameters
+        ----------
+        block (nn.Module): Bottleneck class.
+            - For SENet154: SEBottleneck
+            - For SE-ResNet models: SEResNetBottleneck
+            - For SE-ResNeXt models:  SEResNeXtBottleneck
+        layers (list of ints): Number of residual blocks for 4 layers of the
+            network (layer1...layer4).
+        groups (int): Number of groups for the 3x3 convolution in each
+            bottleneck block.
+            - For SENet154: 64
+            - For SE-ResNet models: 1
+            - For SE-ResNeXt models:  32
+        reduction (int): Reduction ratio for Squeeze-and-Excitation modules.
+            - For all models: 16
+        dropout_p (float or None): Drop probability for the Dropout layer.
+            If `None` the Dropout layer is not used.
+            - For SENet154: 0.2
+            - For SE-ResNet models: None
+            - For SE-ResNeXt models: None
+        inplanes (int):  Number of input channels for layer1.
+            - For SENet154: 128
+            - For SE-ResNet models: 64
+            - For SE-ResNeXt models: 64
+        input_3x3 (bool): If `True`, use three 3x3 convolutions instead of
+            a single 7x7 convolution in layer0.
+            - For SENet154: True
+            - For SE-ResNet models: False
+            - For SE-ResNeXt models: False
+        downsample_kernel_size (int): Kernel size for downsampling convolutions
+            in layer2, layer3 and layer4.
+            - For SENet154: 3
+            - For SE-ResNet models: 1
+            - For SE-ResNeXt models: 1
+        downsample_padding (int): Padding for downsampling convolutions in
+            layer2, layer3 and layer4.
+            - For SENet154: 1
+            - For SE-ResNet models: 0
+            - For SE-ResNeXt models: 0
+        num_classes (int): Number of outputs in `last_linear` layer.
+            - For all models: 1000
+        """
+        super(SENet, self).__init__()
+        self.inplanes = inplanes
+        if input_3x3:
+            layer0_modules = [
+                ('conv1', nn.Conv2d(3, 64, 3, stride=2, padding=1,
+                                    bias=False)),
+                ('bn1', nn.BatchNorm2d(64)),
+                ('relu1', nn.ReLU(inplace=True)),
+                ('conv2', nn.Conv2d(64, 64, 3, stride=1, padding=1,
+                                    bias=False)),
+                ('bn2', nn.BatchNorm2d(64)),
+                ('relu2', nn.ReLU(inplace=True)),
+                ('conv3', nn.Conv2d(64, inplanes, 3, stride=1, padding=1,
+                                    bias=False)),
+                ('bn3', nn.BatchNorm2d(inplanes)),
+                ('relu3', nn.ReLU(inplace=True)),
+            ]
+        else:
+            layer0_modules = [
+                ('conv1', nn.Conv2d(3, inplanes, kernel_size=7, stride=2,
+                                    padding=3, bias=False)),
+                ('bn1', nn.BatchNorm2d(inplanes)),
+                ('relu1', nn.ReLU(inplace=True)),
+            ]
+        # To preserve compatibility with Caffe weights `ceil_mode=True`
+        # is used instead of `padding=1`.
+        layer0_modules.append(('pool', nn.MaxPool2d(3, stride=2,
+                                                    ceil_mode=True)))
+        self.layer0 = nn.Sequential(OrderedDict(layer0_modules))
+        self.layer1 = self._make_layer(
+            block,
+            planes=64,
+            blocks=layers[0],
+            groups=groups,
+            reduction=reduction,
+            downsample_kernel_size=1,
+            downsample_padding=0
+        )
+        self.layer2 = self._make_layer(
+            block,
+            planes=128,
+            blocks=layers[1],
+            stride=2,
+            groups=groups,
+            reduction=reduction,
+            downsample_kernel_size=downsample_kernel_size,
+            downsample_padding=downsample_padding
+        )
+        self.layer3 = self._make_layer(
+            block,
+            planes=256,
+            blocks=layers[2],
+            stride=2,
+            groups=groups,
+            reduction=reduction,
+            downsample_kernel_size=downsample_kernel_size,
+            downsample_padding=downsample_padding
+        )
+        self.layer4 = self._make_layer(
+            block,
+            planes=512,
+            blocks=layers[3],
+            stride=2,
+            groups=groups,
+            reduction=reduction,
+            downsample_kernel_size=downsample_kernel_size,
+            downsample_padding=downsample_padding
+        )
+        self.avg_pool = nn.AvgPool2d(7, stride=1)
+        self.dropout = nn.Dropout(dropout_p) if dropout_p is not None else None
+        self.last_linear = nn.Linear(512 * block.expansion, num_classes)
+
+
+    def _make_layer(self, block, planes, blocks, groups, reduction, stride=1,
+                    downsample_kernel_size=1, downsample_padding=0):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes, planes * block.expansion,
+                          kernel_size=downsample_kernel_size, stride=stride,
+                          padding=downsample_padding, bias=False),
+                nn.BatchNorm2d(planes * block.expansion),
+            )
+
+        layers = []
+        layers.append(block(self.inplanes, planes, groups, reduction, stride,
+                            downsample))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes, groups, reduction))
+
+        return nn.Sequential(*layers)
+
+
+    def features(self, x):
+        x = self.layer0(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+ 
+        return x
+
+
+    def logits(self, x):
+        x = self.avg_pool(x)
+        if self.dropout is not None:
+            x = self.dropout(x)
+        x = x.view(x.size(0), -1)
+        x = self.last_linear(x)
+        return x
+
+    def forward(self, x,x_):      
+        x = self.features(x)
+        x = self.logits(x)
+        return x
+
+def initialize_pretrained_model(model, num_classes, settings):
+    assert num_classes == settings['num_classes'], \
+        'num_classes should be {}, but is {}'.format(
+            settings['num_classes'], num_classes)
+    model.load_state_dict(model_zoo.load_url(settings['url'], 'pretrained_model/encoder'))
+    model.input_space = settings['input_space']
+    model.input_size = settings['input_size']
+    model.input_range = settings['input_range']
+    model.mean = settings['mean']
+    model.std = settings['std']
+
+
+def senet154(num_classes=1000, pretrained='imagenet'):
+    model = SENet(SEBottleneck, [3, 8, 36, 3], groups=64, reduction=16,
+                  dropout_p=0.2, num_classes=num_classes)
+    if pretrained is not None:
+        settings = pretrained_settings['senet154'][pretrained]
+        initialize_pretrained_model(model, num_classes, settings)
+    return model
+
+
+def se_resnet50(num_classes=1000, pretrained='imagenet'):
+    model = SENet(SEResNetBottleneck, [3, 4, 6, 3], groups=1, reduction=16,
+                  dropout_p=None, inplanes=64, input_3x3=False,
+                  downsample_kernel_size=1, downsample_padding=0,
+                  num_classes=num_classes)
+    if pretrained is not None:
+        settings = pretrained_settings['se_resnet50'][pretrained]
+        initialize_pretrained_model(model, num_classes, settings)
+    return model
+
+
+def se_resnet101(num_classes=1000, pretrained='imagenet'):
+    model = SENet(SEResNetBottleneck, [3, 4, 23, 3], groups=1, reduction=16,
+                  dropout_p=None, inplanes=64, input_3x3=False,
+                  downsample_kernel_size=1, downsample_padding=0,
+                  num_classes=num_classes)
+    if pretrained is not None:
+        settings = pretrained_settings['se_resnet101'][pretrained]
+        initialize_pretrained_model(model, num_classes, settings)
+    return model
+
+
+def se_resnet152(num_classes=1000, pretrained='imagenet'):
+    model = SENet(SEResNetBottleneck, [3, 8, 36, 3], groups=1, reduction=16,
+                  dropout_p=None, inplanes=64, input_3x3=False,
+                  downsample_kernel_size=1, downsample_padding=0,
+                  num_classes=num_classes)
+    if pretrained is not None:
+        settings = pretrained_settings['se_resnet152'][pretrained]
+        initialize_pretrained_model(model, num_classes, settings)
+    return model
+
+
+def se_resnext50_32x4d(num_classes=1000, pretrained='imagenet'):
+    model = SENet(SEResNeXtBottleneck, [3, 4, 6, 3], groups=32, reduction=16,
+                  dropout_p=None, inplanes=64, input_3x3=False,
+                  downsample_kernel_size=1, downsample_padding=0,
+                  num_classes=num_classes)
+    if pretrained is not None:
+        settings = pretrained_settings['se_resnext50_32x4d'][pretrained]
+        initialize_pretrained_model(model, num_classes, settings)
+    return model
+
+
+def se_resnext101_32x4d(num_classes=1000, pretrained='imagenet'):
+    model = SENet(SEResNeXtBottleneck, [3, 4, 23, 3], groups=32, reduction=16,
+                  dropout_p=None, inplanes=64, input_3x3=False,
+                  downsample_kernel_size=1, downsample_padding=0,
+                  num_classes=num_classes)
+    if pretrained is not None:
+        settings = pretrained_settings['se_resnext101_32x4d'][pretrained]
+        initialize_pretrained_model(model, num_classes, settings)
+    return model
diff --git a/src/sobel.py b/src/sobel.py
new file mode 100644
index 0000000..57642e9
--- /dev/null
+++ b/src/sobel.py
@@ -0,0 +1,26 @@
+# copied from https://github.com/JunjH/Revisiting_Single_Depth_Estimation/blob/master/sobel.py
+
+import torch
+import torch.nn as nn
+import numpy as np
+
+
+class Sobel(nn.Module):
+    def __init__(self):
+        super(Sobel, self).__init__()
+        self.edge_conv = nn.Conv2d(1, 2, kernel_size=3, stride=1, padding=1, bias=False)
+        edge_kx = np.array([[1, 0, -1], [2, 0, -2], [1, 0, -1]])
+        edge_ky = np.array([[1, 2, 1], [0, 0, 0], [-1, -2, -1]])
+        edge_k = np.stack((edge_kx, edge_ky))
+
+        edge_k = torch.from_numpy(edge_k).float().view(2, 1, 3, 3)
+        self.edge_conv.weight = nn.Parameter(edge_k)
+
+        for param in self.parameters():
+            param.requires_grad = False
+
+    def forward(self, x):
+        out = self.edge_conv(x)
+        out = out.contiguous().view(-1, 2, x.size(2), x.size(3))
+
+        return out
diff --git a/src/utils.py b/src/utils.py
new file mode 100644
index 0000000..01ed834
--- /dev/null
+++ b/src/utils.py
@@ -0,0 +1,66 @@
+import dataclasses
+import random
+from typing import Optional, Tuple, List
+
+import torch
+import numpy as np
+import torch.nn as nn
+from omegaconf import OmegaConf, DictConfig
+from torch.optim.optimizer import Optimizer
+from torch.optim.lr_scheduler import _LRScheduler
+
+from .models import build_model
+
+
+def load_config(cfg_path: Optional[str] = None,
+                default_cfg_path: str = 'configs/default.yaml',
+                update_dotlist: Optional[List[str]] = None) -> DictConfig:
+
+    config = OmegaConf.load(default_cfg_path)
+    if cfg_path is not None:
+        optional_config = OmegaConf.load(cfg_path)
+        config = OmegaConf.merge(config, optional_config)
+    if update_dotlist is not None:
+        update_config = OmegaConf.from_dotlist(update_dotlist)
+        config = OmegaConf.merge(config, update_config)
+
+    OmegaConf.set_readonly(config, True)
+
+    return config
+
+
+def print_config(config: DictConfig) -> None:
+    print(OmegaConf.to_yaml(config))
+
+
+def make_deterministic(seed):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+
+
+def prepare_training_modules(config: DictConfig, resume_from: Optional[str] = None
+                             ) -> Tuple[int, nn.Module, Optimizer, _LRScheduler]:
+
+    model: nn.Module = build_model(config)
+    optimizer = torch.optim.Adam(model.parameters(),
+                                 lr=config.SOLVER.BASE_LR,
+                                 weight_decay=config.SOLVER.WEIGHT_DECAY)
+    scheduler = torch.optim.lr_scheduler.StepLR(
+        optimizer,
+        config.SOLVER.LR_STEP_SIZE,
+        config.SOLVER.LR_GAMMA,
+    )
+
+    # resume
+    start_epoch = 0
+    if resume_from is not None:
+        ckpt: dict = torch.load(resume_from)
+        model.load_state_dict(ckpt['model_state_dict'])
+        optimizer.load_state_dict(ckpt['optimizer_state_dict'])
+        scheduler.load_state_dict(ckpt['scheduler_state_dict'])
+        start_epoch = ckpt['epoch']
+
+    return start_epoch, model, optimizer, scheduler
diff --git a/tools/README.md b/tools/README.md
new file mode 100644
index 0000000..1d7b6f5
--- /dev/null
+++ b/tools/README.md
@@ -0,0 +1,45 @@
+# Tools description
+
+### test.py
+
+```bash
+python tools/train.py
+```
+
+Option | Description
+--- | ---
+`--config [config path]` | Optional config file path. The `configs/default.yaml` is loaded by default. The specified file overwrites the default configs.
+`--out-dir [outdir path]`  | Output directory path. (default: `results`)
+`--resume [ckpt path]` | Resuming checkpoint file path. 
+
+### train.py
+
+```bash
+python tools/test.py [ckpt path]
+```
+
+Option | Description
+--- | ---
+`--config [config path]` | The optional config file path.used when training.
+`--show-dir [outdir path]`  | Path to save predict visualization. Please specify if you want to save.
+
+
+If you want to override the config with command line args, put them at the end in the form of dotlist.
+
+```bash
+python tools/train.py --config [config path] SOLVER.NUM_WORKERS=8 SOLVER.EPOCH=5
+```
+
+### visualize_nyu2_test_gt.py
+
+Visualize the dataset gt as well as visualizations of prediction results for comparison.
+
+```bash
+python tools/visualize_nyu2_test_gt.py
+```
+
+Option | Description
+--- | ---
+`--outdir [outdir path]`  | Output directory path. (default: `vis_gt`)
+`--config [config path]` | Optional config file path.
+`--debug [ckpt path]` | Use Debug mode. (visualize just one data)
diff --git a/tools/test.py b/tools/test.py
new file mode 100644
index 0000000..092fe9d
--- /dev/null
+++ b/tools/test.py
@@ -0,0 +1,51 @@
+import argparse
+from typing import Optional
+
+import torch
+from omegaconf import DictConfig
+
+from src.data import get_test_loader
+from src.models import build_model
+from src.engine import test
+from src.utils import load_config, make_deterministic
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description='Test a predictor')
+    parser.add_argument('ckpt', type=str, help='the checkpoint file')
+    parser.add_argument('--config', type=str, default=None, help='train config file path')
+    parser.add_argument('--show-dir', type=str, default=None,
+                        help='Please specify the directory if you want to save predicted figures.')
+    parser.add_argument('opts', default=None, nargs=argparse.REMAINDER,
+                        help='Overwrite configs. (ex. SOLVER.NUM_WORKERS=8)')
+    return parser.parse_args()
+
+
+def main(config: DictConfig, ckpt: dict, show_dir: Optional[str] = None):
+
+    # seed
+    if config.SEED is not None:
+        make_deterministic(seed=config.SEED)
+
+    # data
+    test_loader = get_test_loader(config)
+
+    # model
+    model = build_model(config, model_state_dict=ckpt['model_state_dict'])
+
+    # test
+    test(model=model,
+         data_loader=test_loader,
+         device=config.DEVICE,
+         threshold_edge=config.TEST.THRESHOLD_EDGE,
+         show_dir=show_dir)
+
+
+if __name__ == "__main__":
+    args = parse_args()
+
+    # load config, ckpt
+    config = load_config(args.config, update_dotlist=args.opts)
+    ckpt: dict = torch.load(args.ckpt)
+
+    main(config, ckpt, args.show_dir)
diff --git a/tools/train.py b/tools/train.py
new file mode 100644
index 0000000..62524d7
--- /dev/null
+++ b/tools/train.py
@@ -0,0 +1,86 @@
+import argparse
+import os
+from typing import Optional
+
+import torch
+from omegaconf import DictConfig
+from tensorboardX import SummaryWriter
+
+from src.data import build_data_loader
+from src.engine import train, test
+from src.utils import load_config, print_config, make_deterministic, prepare_training_modules
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description='Train a predictor')
+    parser.add_argument('--config', type=str, default=None, help='train config file path')
+    parser.add_argument('--out-dir', type=str, default=None, help='the dir to save logs and models')
+    parser.add_argument('--resume', type=str, default=None, help='the checkpoint file to resume from')
+    parser.add_argument('opts', default=None, nargs=argparse.REMAINDER,
+                        help='Overwrite configs. (ex. SOLVER.NUM_WORKERS=8)')
+    return parser.parse_args()
+
+
+def main(config: DictConfig,
+         out_dir: str = 'results',
+         resume_from: Optional[str] = None):
+
+    # seed
+    if config.SEED is not None:
+        make_deterministic(seed=config.SEED)
+
+    # data
+    train_loader, test_loader = build_data_loader(config)
+
+    # training modules
+    start_epoch, model, optimizer, scheduler = \
+        prepare_training_modules(config, resume_from=resume_from)
+
+    # output setting
+    os.makedirs(out_dir, exist_ok=True)
+    tblogger = SummaryWriter(out_dir)
+
+    # train
+    for epoch in range(start_epoch, config.SOLVER.EPOCH):
+        lr = scheduler.get_last_lr()[0]
+        print(f'#### Epoch{epoch}, lr: {lr} ####')
+        tblogger.add_scalar('train/lr', lr, epoch)
+
+        train(model=model,
+              data_loader=train_loader,
+              optimizer=optimizer,
+              loss_config=config.LOSS,
+              epoch=epoch,
+              device=config.DEVICE,
+              tblogger=tblogger)
+        test(model=model,
+             data_loader=test_loader,
+             epoch=epoch,
+             device=config.DEVICE,
+             tblogger=tblogger,
+             threshold_edge=config.TEST.THRESHOLD_EDGE)
+        scheduler.step()
+
+        # save
+        ckpt = {'epoch': epoch + 1,
+                'model_state_dict': model.state_dict(),
+                'optimizer_state_dict': optimizer.state_dict(),
+                'scheduler_state_dict': scheduler.state_dict()}
+        torch.save(ckpt, os.path.join(out_dir, f"snapshot.ckpt"))
+        if (epoch + 1) % config.SOLVER.SAVE_INTERVAL == 0:
+            torch.save(ckpt, os.path.join(out_dir, f"epoch_{epoch + 1}.ckpt"))
+
+
+if __name__ == "__main__":
+    args = parse_args()
+
+    # load config
+    config = load_config(args.config, update_dotlist=args.opts)
+    print_config(config)
+
+    if args.out_dir is not None:
+        out_dir = args.out_dir
+    else:
+        out_dir = config.OUTPUT_DIR
+
+    main(config, out_dir, args.resume)
diff --git a/tools/visualize_nyu2_test_gt.py b/tools/visualize_nyu2_test_gt.py
new file mode 100644
index 0000000..4c9c833
--- /dev/null
+++ b/tools/visualize_nyu2_test_gt.py
@@ -0,0 +1,73 @@
+import argparse
+import os
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from omegaconf import DictConfig
+from PIL import Image
+from tqdm import tqdm
+
+from src.utils import load_config
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description='Visualize Test GT')
+    parser.add_argument('--outdir', type=str, default='vis_gt', help='Output directory')
+    parser.add_argument('--config', type=str, default=None)
+    parser.add_argument('--debug', action='store_true')
+    return parser.parse_args()
+
+
+def change_scale(depth: Image.Image, scale_size_min=240, mode=Image.NEAREST):
+    w, h = depth.size
+    if w < h:
+        ow = scale_size_min
+        oh = int(ow * h / w)
+    else:
+        oh = scale_size_min
+        ow = int(oh * w / h)
+    return depth.resize((ow, oh), mode)
+
+
+def center_crop(depth: Image.Image, size=(304, 228)):
+    center_crop_w, center_crop_h = size
+    w1, h1 = depth.size
+    x1 = int(round((w1 - center_crop_w) / 2.))
+    y1 = int(round((h1 - center_crop_h) / 2.))
+    return depth.crop((x1, y1, x1 + center_crop_w, y1 + center_crop_h))
+
+
+def main(config: DictConfig,
+         outdir: str = './visualized_gt',
+         debug: bool = False):
+    os.makedirs(outdir, exist_ok=False)
+    test_df = pd.read_csv(config.DATASET.TEST_CSV, header=None,
+                          names=['image', 'depth'])
+
+    for i, row in tqdm(test_df.iterrows(), total=len(test_df)):
+        depth = Image.open(row['depth'])
+        depth1 = change_scale(depth,
+                              scale_size_min=config.DATA.SCALE_SIZE_MIN,
+                              mode=Image.NEAREST)
+        depth2 = center_crop(depth1,
+                             size=config.DATA.CENTER_CROP_SIZE)
+
+        depth_array = np.array(depth2)
+        plt.imshow(depth_array)
+        plt.axis('off')
+        plt.savefig(os.path.join(outdir, f'vis_gt_{i:05}.jpg'),
+                    bbox_inches='tight', pad_inches=0)
+        plt.close()
+
+        if debug:
+            print('### DEBUG MODE ###')
+            print(depth.size, depth1.size, depth2.size, sep='\n')
+            print('exit.')
+            break
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    config = load_config(args.config)
+    main(config, outdir=args.outdir, debug=args.debug)