resources/train_config_unet3D.yaml

# use a fixed random seed to guarantee that when you run the code twice you will get the same outcome
manual_seed: 0
# model configuration
model:
  # model class
  name: UNet3D
  # number of input channels to the model
  in_channels: 1
  # number of output channels
  out_channels: 6
  # determines the order of operators in a single layer (crg - Conv3d+ReLU+GroupNorm)
  layer_order: crg
  # feature maps scale factor
  f_maps: 32
  # number of groups in the groupnorm
  num_groups: 8
  # apply element-wise nn.Sigmoid after the final 1x1 convolution, otherwise apply nn.Softmax
  final_sigmoid: false
# trainer configuration
trainer:
  # path to the checkpoint directory
  checkpoint_dir: /mnt/lustre/shenrui/project/edgeDL/checkpoints/pelvis/unet3d
  # path to latest checkpoint; if provided the training will be resumed from that checkpoint
  resume: null
  # how many iterations between validations
  validate_after_iters: 20
  # how many iterations between tensorboard logging
  log_after_iters: 20
  # how many iterations evaluated in validations
  validate_iters: 20
  # max number of epochs
  epochs: 50
  # max number of iterations
  iters: 10000
  # model with higher eval score is considered better
  eval_score_higher_is_better: True
# optimizer configuration
optimizer:
  # initial learning rate
  learning_rate: 0.0001
  # weight decay
  weight_decay: 0.0001
# loss function configuration
loss:
  # loss function to be used during training
  name: DiceLoss
  # A manual rescaling weight given to each class.
  loss_weight: null
  # a target value that is ignored and does not contribute to the input gradient
  ignore_index: null
# evaluation metric configuration
eval_metric:
  name: DiceCoefficient
  # a target label that is ignored during metric evaluation
  ignore_index: null
lr_scheduler:
  name: MultiStepLR
  milestones: [10, 30, 60]
  gamma: 0.2
# data loaders configuration
loaders:
  # train patch size given to the network (adapt to fit in your GPU mem, generally the bigger patch the better)
  train_patch: [64, 128, 128]
  # train stride between patches
  train_stride: [16, 32, 32]
  # validation patch (can be bigger than train patch since there is no backprop)
  val_patch: [64, 128, 128]
  # validation stride (validation patches doesn't need to overlap)
  val_stride: [64, 128, 128]
  # clip value within the range
  clip_val: [-1000, 2000]
  # paths to the training datasets
  train_path:
    - '/mnt/lustre/shenrui/data/pelvis_resampled/dataset_train_temp.txt'
  # paths to the validation datasets
  val_path:
    - '/mnt/lustre/shenrui/data/pelvis_resampled/dataset_val_temp.txt'
  # how many subprocesses to use for data loading
  num_workers: 8
  # batch size in training process
  batch_size: 1
  # data transformations/augmentations
  transformer:
    train:
      raw:
        - name: ClipNormalize
        - name: RandomFlip
        - name: RandomRotate90
        - name: RandomRotate
          # rotate only in ZY only
          axes: [[2, 1]]
          angle_spectrum: 15
          mode: reflect
        - name: ElasticDeformation
          spline_order: 3
        - name: ToTensor
          expand_dims: true
      label:
        - name: RandomFlip
        - name: RandomRotate90
        - name: RandomRotate
          # rotate only in ZY only
          axes: [[2, 1]]
          angle_spectrum: 15
          mode: reflect
        - name: ElasticDeformation
          spline_order: 0
        - name: ToTensor
          expand_dims: false
          dtype: 'long'
    test:
      raw:
        - name: ClipNormalize
        - name: ToTensor
          expand_dims: true
      label:
        - name: ToTensor
          expand_dims: false
          dtype: 'long'