Skip to content

Commit

Permalink
added dropout to layers, didn't seem to help, might need more epochs
Browse files Browse the repository at this point in the history
  • Loading branch information
goldbattle committed Mar 15, 2022
1 parent 3161198 commit abf7b1b
Show file tree
Hide file tree
Showing 9 changed files with 83 additions and 69 deletions.
4 changes: 3 additions & 1 deletion ReadMe.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,9 @@ Right now there are 8908 images in the [files_trainable](https://github.com/comm
It seems to perform ok after >20 epochs, but the fine detail seems to struggle.
Training started at 4:53pm on March 13, 2022 and reached epoch 33 at 8:55pm (7 minutes per epoch) on a 1080Ti card.
It would be interesting to perform evaluation only on "confident" network returns.
Average loss of 0.0694 on test and 0.0549 on training data after 100 epochs
Average loss of 0.0694 on test and 0.0549 on training data after 100 epochs.
If dropout is used the average loss is 0.1060 on test and 0.0960 on training data after 100 epochs.


Input picture (left), groundtruth (top right), and prediction (bottom right)
![](docs/example_pred.png)
Expand Down
Binary file modified docs/example_pred.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/example_probs.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion src/data/Comma10kDataset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ Comma10kDataset::Comma10kDataset(std::string pathroot, ModeDataSplit mode, bool

// Random order (ensure same random shuffle on both)
// https://stackoverflow.com/a/16968342
if (randomize) {
if (mode == Comma10kDataset::ModeDataSplit::kTrain && randomize) {
unsigned int seed = std::time(NULL);
std::srand(seed);
std::random_shuffle(paths_rgb.begin(), paths_rgb.end());
Expand Down
125 changes: 65 additions & 60 deletions src/net_seg_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,12 @@ int main(int argc, char *argv[]) {

// Finally convert it to a unique pointer dataloader
auto dataset_mapped = dataset.map(torch::data::transforms::Stack<>());
auto data_loader = torch::data::make_data_loader(std::move(dataset_mapped), torch::data::DataLoaderOptions().batch_size(1).workers(6));
auto sampler = torch::data::samplers::SequentialSampler(dataset.size().value());
auto options = torch::data::DataLoaderOptions().enforce_ordering(true).batch_size(1).workers(10);
auto data_loader = torch::data::make_data_loader(std::move(dataset_mapped), sampler, options);

// Loop through our batches of training data
bool visualize = true;
double loss_sum = 0.0;
size_t loss_ct = 0;
size_t batch_idx = 0;
Expand Down Expand Up @@ -108,66 +111,68 @@ int main(int argc, char *argv[]) {
std::cout << items_curr << "/" << items_total << " | loss = " << loss.item<float>() << " | loss_avg = " << loss_avg << " (" << loss_ct
<< " samples)" << std::endl;

// Softmax the output to get our total class probabilities [N, classes, H, W]
// Thus across all classes, our probabilities should sum to 1
auto output_probs = torch::softmax(output, 1);

// Plot the first image, need to change to opencv format [H,W,C]
// Note that we arg max the softmax network output, then need to add an dimension
// We scale up the 0..1 range back to the 0..255 that opencv expects (later cast to int)
torch::Tensor cv_input = 255.0 * batch.data[0].permute({1, 2, 0}).clone().cpu();
torch::Tensor cv_label = batch.target[0].permute({1, 2, 0}).clone().cpu();
torch::Tensor cv_output = torch::unsqueeze(output_probs[0].argmax(0), 0).permute({1, 2, 0}).clone().cpu();

// Convert them all to 0..255 ranges
cv_input = cv_input.to(torch::kInt8);
cv_label = cv_label.to(torch::kInt8);
cv_output = cv_output.to(torch::kInt8);

// Point the cv::Mats to the transformed locations in memory
cv::Mat img_input(cv::Size((int)cv_input.size(1), (int)cv_input.size(0)), CV_8UC3, cv_input.data_ptr<int8_t>());
cv::Mat img_label(cv::Size((int)cv_label.size(1), (int)cv_label.size(0)), CV_8UC1, cv_label.data_ptr<int8_t>());
cv::Mat img_output(cv::Size((int)cv_output.size(1), (int)cv_output.size(0)), CV_8UC1, cv_output.data_ptr<int8_t>());

// Convert labeled images to color
cv::cvtColor(img_label, img_label, cv::COLOR_GRAY2BGR);
cv::cvtColor(img_output, img_output, cv::COLOR_GRAY2BGR);
// img_label = 255.0 / (double)n_classes * img_label;
// img_output = 255.0 / (double)n_classes * img_output;

// Change both to be colored like the comma10k
img_label.forEach<cv::Vec3b>([&](cv::Vec3b &px, const int *pos) -> void { px = dataset.map_id2hex[(char)px[0]]; });
img_output.forEach<cv::Vec3b>([&](cv::Vec3b &px, const int *pos) -> void { px = dataset.map_id2hex[(char)px[0]]; });

// Finally stack and display in a window
cv::Mat outimg1, outimg2, outimg3;
cv::hconcat(img_input, img_label, outimg1);
cv::hconcat(img_input, img_output, outimg2);
cv::vconcat(outimg1, outimg2, outimg3);
cv::imshow("prediction", outimg3);

// Next we will visualize our probability distributions [N, classes, H, W]
torch::Tensor cv_probs = output_probs[0].clone().cpu();
cv_probs = cv_probs.to(torch::kFloat32);
cv::Mat outimg4 = cv::Mat(cv::Size(n_classes * (int)cv_input.size(1), (int)cv_input.size(0)), CV_8UC3, cv::Scalar(0, 0, 0));
assert((size_t)output_probs.size(0) == 1);
assert((size_t)cv_probs.size(0) == n_classes);
for (int n = 0; n < (int)n_classes; n++) {
cv::Mat imgtmp(cv::Size((int)cv_probs.size(2), (int)cv_probs.size(1)), CV_32FC1, cv_probs[n].data_ptr<float>());
imgtmp = 255 * imgtmp;
imgtmp.convertTo(imgtmp, CV_8UC1);
cv::Mat imgtmp_color;
cv::applyColorMap(imgtmp, imgtmp_color, cv::COLORMAP_JET);
imgtmp_color.copyTo(outimg4(cv::Rect(n * (int)cv_input.size(1), 0, imgtmp.cols, imgtmp.rows)));
// Visualize if we need to
if (visualize) {
// Softmax the output to get our total class probabilities [N, classes, H, W]
// Thus across all classes, our probabilities should sum to 1
auto output_probs = torch::softmax(output, 1);

// Plot the first image, need to change to opencv format [H,W,C]
// Note that we arg max the softmax network output, then need to add an dimension
// We scale up the 0..1 range back to the 0..255 that opencv expects (later cast to int)
torch::Tensor cv_input = 255.0 * batch.data[0].permute({1, 2, 0}).clone().cpu();
torch::Tensor cv_label = batch.target[0].permute({1, 2, 0}).clone().cpu();
torch::Tensor cv_output = torch::unsqueeze(output_probs[0].argmax(0), 0).permute({1, 2, 0}).clone().cpu();

// Convert them all to 0..255 ranges
cv_input = cv_input.to(torch::kInt8);
cv_label = cv_label.to(torch::kInt8);
cv_output = cv_output.to(torch::kInt8);

// Point the cv::Mats to the transformed locations in memory
cv::Mat img_input(cv::Size((int)cv_input.size(1), (int)cv_input.size(0)), CV_8UC3, cv_input.data_ptr<int8_t>());
cv::Mat img_label(cv::Size((int)cv_label.size(1), (int)cv_label.size(0)), CV_8UC1, cv_label.data_ptr<int8_t>());
cv::Mat img_output(cv::Size((int)cv_output.size(1), (int)cv_output.size(0)), CV_8UC1, cv_output.data_ptr<int8_t>());

// Convert labeled images to color
cv::cvtColor(img_label, img_label, cv::COLOR_GRAY2BGR);
cv::cvtColor(img_output, img_output, cv::COLOR_GRAY2BGR);
// img_label = 255.0 / (double)n_classes * img_label;
// img_output = 255.0 / (double)n_classes * img_output;

// Change both to be colored like the comma10k
img_label.forEach<cv::Vec3b>([&](cv::Vec3b &px, const int *pos) -> void { px = dataset.map_id2hex[(char)px[0]]; });
img_output.forEach<cv::Vec3b>([&](cv::Vec3b &px, const int *pos) -> void { px = dataset.map_id2hex[(char)px[0]]; });

// Finally stack and display in a window
cv::Mat outimg1, outimg2, outimg3;
cv::hconcat(img_input, img_label, outimg1);
cv::hconcat(img_input, img_output, outimg2);
cv::vconcat(outimg1, outimg2, outimg3);
cv::imshow("prediction", outimg3);

// Next we will visualize our probability distributions [N, classes, H, W]
torch::Tensor cv_probs = output_probs[0].clone().cpu();
cv_probs = cv_probs.to(torch::kFloat32);
cv::Mat outimg4 = cv::Mat(cv::Size(n_classes * (int)cv_input.size(1), (int)cv_input.size(0)), CV_8UC3, cv::Scalar(0, 0, 0));
assert((size_t)output_probs.size(0) == 1);
assert((size_t)cv_probs.size(0) == n_classes);
for (int n = 0; n < (int)n_classes; n++) {
cv::Mat imgtmp(cv::Size((int)cv_probs.size(2), (int)cv_probs.size(1)), CV_32FC1, cv_probs[n].data_ptr<float>());
imgtmp = 255 * imgtmp;
imgtmp.convertTo(imgtmp, CV_8UC1);
cv::Mat imgtmp_color;
cv::applyColorMap(imgtmp, imgtmp_color, cv::COLORMAP_JET);
imgtmp_color.copyTo(outimg4(cv::Rect(n * (int)cv_input.size(1), 0, imgtmp.cols, imgtmp.rows)));
}
cv::imshow("uncertainties", outimg4);
cv::waitKey(100);

// Save to file for readme
// cv::imwrite("/home/patrick/github/segnet/docs/example_pred.png", outimg3);
// cv::imwrite("/home/patrick/github/segnet/docs/example_probs.png", outimg4);
// std::exit(EXIT_FAILURE);
}
cv::imshow("uncertainties", outimg4);
cv::waitKey(100);

// Save to file for readme
// cv::imwrite("/home/patrick/github/segnet/docs/example_pred.png", outimg3);
// cv::imwrite("/home/patrick/github/segnet/docs/example_probs.png", outimg4);
// std::exit(EXIT_FAILURE);

batch_idx++;
}
}
6 changes: 4 additions & 2 deletions src/net_seg_train.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,9 @@ int main() {

// Finally convert it to a unique pointer dataloader
auto dataset_mapped = dataset.map(torch::data::transforms::Stack<>());
auto data_loader = torch::data::make_data_loader(std::move(dataset_mapped), torch::data::DataLoaderOptions().batch_size(5).workers(30));
auto sampler = torch::data::samplers::RandomSampler(dataset.size().value());
auto options = torch::data::DataLoaderOptions().enforce_ordering(false).batch_size(5).workers(30);
auto data_loader = torch::data::make_data_loader(std::move(dataset_mapped), sampler, options);

// Create the optimizer
// torch::optim::SGD optimizer(model->parameters(), torch::optim::SGDOptions(0.01).momentum(0.5));
Expand Down Expand Up @@ -111,7 +113,7 @@ int main() {
optimizer.step();

// Print our the loss every once in a while
if (batch_idx % 10 == 0) {
if (batch_idx % 100 == 0) {

// Debug printout
size_t items_curr = batch_idx * batch.data.size(0);
Expand Down
8 changes: 6 additions & 2 deletions src/network/blocks/UNetBlocks.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,10 @@ struct UNetDownwardsImpl : torch::nn::Module {
}

// Forward propagation
torch::Tensor forward(torch::Tensor input) { return conv2(conv1(torch::max_pool2d(input, 2))); }
torch::Tensor forward(torch::Tensor input) {
auto output = conv2(conv1(torch::max_pool2d(input, 2)));
return torch::dropout(output, 0.5, this->is_training());
}

// Parts of the network
// NOTE: for submodules, we call the "empty holder" constructor
Expand Down Expand Up @@ -96,7 +99,8 @@ struct UNetUpwardsImpl : torch::nn::Module {
input = torch::cat({input, bridge}, 1);

// Finally do our convolutions and return
return conv2(conv1(input));
auto output = torch::dropout(input, 0.5, this->is_training());
return conv2(conv1(output));
}

// Parts of the network
Expand Down
1 change: 1 addition & 0 deletions src/network/models/UNetModel.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ struct UNetModelImpl : torch::nn::Module {
// First do our starting two convolutions
x1 = inconv1(input);
x1 = inconv2(x1);
x1 = torch::dropout(x1, 0.25, this->is_training());

// Downscale to the bottleneck
x2 = down1(x1);
Expand Down
6 changes: 3 additions & 3 deletions src/utils/augmentations.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,9 +135,9 @@ inline void random_camera_model(cv::Mat &cv_rgb, cv::Mat &cv_label) {
cam.at<float>(2, 1) = 0.0f;
cam.at<float>(2, 2) = 1.0f;
cv::Mat dist(5, 1, cv::DataType<float>::type);
dist.at<float>(0, 0) = 0.1 * unif_pn(rng);
dist.at<float>(1, 0) = 0.05 * unif_pn(rng);
dist.at<float>(2, 0) = 1e-3 * unif_pn(rng);
dist.at<float>(0, 0) = 0.20 * unif_pn(rng);
dist.at<float>(1, 0) = 0.10 * unif_pn(rng);
dist.at<float>(2, 0) = 1e-2 * unif_pn(rng);
dist.at<float>(3, 0) = 1e-4 * unif_pn(rng);
dist.at<float>(4, 0) = 1e-5 * unif_pn(rng);

Expand Down

0 comments on commit abf7b1b

Please sign in to comment.