-
Notifications
You must be signed in to change notification settings - Fork 68
/
opencv_realtime_webcam_human_segmentation.cpp
128 lines (80 loc) · 3.66 KB
/
opencv_realtime_webcam_human_segmentation.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
/*
Example shows a real-time segmentation of human class from PASCAL VOC.
The network ouputs probabilities of each pixels belonging to the human class.
These probabilities are later on are used as a transparancy mask for the input image.
The final fused image is displayed in the window of the application.
*/
#include "ATen/ATen.h"
#include "ATen/Type.h"
#include <map>
#include <opencv2/opencv.hpp>
#include <pytorch.cpp>
using namespace at;
using std::map;
using std::string;
using namespace cv;
int main()
{
// Structure the project in a better way
// Add a correct linking to Opencv on the local machine
// Get the build running on laptop for demo
// upload all the transferred models
// -----
// * Should we convert the renset 50 and 101?
// * we don't have any segmentatin models trained using them
// * maybe only to make the framework more complete? (check)
// * Make the classification demo?
// * need to put a softmax on top -- should be very easy
// * need a dict with number --> class name mapping (check)
// * Structure the whole project (check)
// * write docs on how to build it
// * write missing parts -- good for future contributions
// * Write the dataloaders for the new surgical datasets
// * start the training
auto net = torch::resnet34_8s_pascal_voc();
net->load_weights("../resnet34_fcn_pascal.h5");
net->cuda();
VideoCapture cap(0); // open the default camera
if(!cap.isOpened()) // check if we succeeded
return -1;
Mat frame;
for(;;)
{
cap >> frame;
// BGR to RGB which is what our network was trained on
cvtColor(frame, frame, COLOR_BGR2RGB);
// Resizing while preserving aspect ratio, comment out to run
// it on the whole input image.
resize(frame, frame, Size(0, 0), 0.5, 0.5, INTER_LINEAR);
// Outputs height x width x 3 tensor converted from Opencv's Mat with 0-255 values
// and convert to 0-1 range
auto image_tensor = torch::convert_opencv_mat_image_to_tensor(frame).toType(CPU(kFloat)) / 255;
auto output_height = image_tensor.size(0);
auto output_width = image_tensor.size(1);
// Reshape image into 1 x 3 x height x width
auto image_batch_tensor = torch::convert_image_to_batch(image_tensor);
// Subtract the mean and divide by standart deivation
auto image_batch_normalized_tensor = torch::preprocess_batch(image_batch_tensor);
auto input_tensor_gpu = image_batch_normalized_tensor.toBackend(Backend::CUDA);
auto full_prediction = net->forward(input_tensor_gpu);
// This is necessary to correctly apply softmax,
// last dimension should represent logits
auto full_prediction_flattned = full_prediction.squeeze(0)
.view({21, -1})
.transpose(0, 1);
// Converting logits to probabilities
auto softmaxed = torch::softmax(full_prediction_flattned).transpose(0, 1);
// 15 is a class for a person
auto layer = softmaxed[15].contiguous().view({output_height, output_width, 1}).toBackend(Backend::CPU);
// Fuse the prediction probabilities and the actual image to form a masked image.
auto masked_image = ( image_tensor * layer.expand({output_height, output_width, 3}) ) * 255 ;
// A function to convert Tensor to a Mat
auto layer_cpu = masked_image.toType(CPU(kByte));
auto converted = Mat(output_height, output_width, CV_8UC3, layer_cpu.data_ptr());
// OpenCV wants BGR not RGB
cvtColor(converted, converted, COLOR_RGB2BGR);
imshow("Masked image", converted);
if(waitKey(30) >= 0 ) break;
}
return 0;
}