This repository has been archived by the owner on Jun 25, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 552
/
ssd_layers.py
181 lines (162 loc) · 6.56 KB
/
ssd_layers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
"""Some special pupropse layers for SSD."""
import keras.backend as K
from keras.engine.topology import InputSpec
from keras.engine.topology import Layer
import numpy as np
import tensorflow as tf
class Normalize(Layer):
"""Normalization layer as described in ParseNet paper.
# Arguments
scale: Default feature scale.
# Input shape
4D tensor with shape:
`(samples, channels, rows, cols)` if dim_ordering='th'
or 4D tensor with shape:
`(samples, rows, cols, channels)` if dim_ordering='tf'.
# Output shape
Same as input
# References
http://cs.unc.edu/~wliu/papers/parsenet.pdf
#TODO
Add possibility to have one scale for all features.
"""
def __init__(self, scale, **kwargs):
if K.image_dim_ordering() == 'tf':
self.axis = 3
else:
self.axis = 1
self.scale = scale
super(Normalize, self).__init__(**kwargs)
def build(self, input_shape):
self.input_spec = [InputSpec(shape=input_shape)]
shape = (input_shape[self.axis],)
init_gamma = self.scale * np.ones(shape)
self.gamma = K.variable(init_gamma, name='{}_gamma'.format(self.name))
self.trainable_weights = [self.gamma]
def call(self, x, mask=None):
output = K.l2_normalize(x, self.axis)
output *= self.gamma
return output
class PriorBox(Layer):
"""Generate the prior boxes of designated sizes and aspect ratios.
# Arguments
img_size: Size of the input image as tuple (w, h).
min_size: Minimum box size in pixels.
max_size: Maximum box size in pixels.
aspect_ratios: List of aspect ratios of boxes.
flip: Whether to consider reverse aspect ratios.
variances: List of variances for x, y, w, h.
clip: Whether to clip the prior's coordinates
such that they are within [0, 1].
# Input shape
4D tensor with shape:
`(samples, channels, rows, cols)` if dim_ordering='th'
or 4D tensor with shape:
`(samples, rows, cols, channels)` if dim_ordering='tf'.
# Output shape
3D tensor with shape:
(samples, num_boxes, 8)
# References
https://arxiv.org/abs/1512.02325
#TODO
Add possibility not to have variances.
Add Theano support
"""
def __init__(self, img_size, min_size, max_size=None, aspect_ratios=None,
flip=True, variances=[0.1], clip=True, **kwargs):
if K.image_dim_ordering() == 'tf':
self.waxis = 2
self.haxis = 1
else:
self.waxis = 3
self.haxis = 2
self.img_size = img_size
if min_size <= 0:
raise Exception('min_size must be positive.')
self.min_size = min_size
self.max_size = max_size
self.aspect_ratios = [1.0]
if max_size:
if max_size < min_size:
raise Exception('max_size must be greater than min_size.')
self.aspect_ratios.append(1.0)
if aspect_ratios:
for ar in aspect_ratios:
if ar in self.aspect_ratios:
continue
self.aspect_ratios.append(ar)
if flip:
self.aspect_ratios.append(1.0 / ar)
self.variances = np.array(variances)
self.clip = True
super(PriorBox, self).__init__(**kwargs)
def get_output_shape_for(self, input_shape):
num_priors_ = len(self.aspect_ratios)
layer_width = input_shape[self.waxis]
layer_height = input_shape[self.haxis]
num_boxes = num_priors_ * layer_width * layer_height
return (input_shape[0], num_boxes, 8)
def call(self, x, mask=None):
if hasattr(x, '_keras_shape'):
input_shape = x._keras_shape
elif hasattr(K, 'int_shape'):
input_shape = K.int_shape(x)
layer_width = input_shape[self.waxis]
layer_height = input_shape[self.haxis]
img_width = self.img_size[0]
img_height = self.img_size[1]
# define prior boxes shapes
box_widths = []
box_heights = []
for ar in self.aspect_ratios:
if ar == 1 and len(box_widths) == 0:
box_widths.append(self.min_size)
box_heights.append(self.min_size)
elif ar == 1 and len(box_widths) > 0:
box_widths.append(np.sqrt(self.min_size * self.max_size))
box_heights.append(np.sqrt(self.min_size * self.max_size))
elif ar != 1:
box_widths.append(self.min_size * np.sqrt(ar))
box_heights.append(self.min_size / np.sqrt(ar))
box_widths = 0.5 * np.array(box_widths)
box_heights = 0.5 * np.array(box_heights)
# define centers of prior boxes
step_x = img_width / layer_width
step_y = img_height / layer_height
linx = np.linspace(0.5 * step_x, img_width - 0.5 * step_x,
layer_width)
liny = np.linspace(0.5 * step_y, img_height - 0.5 * step_y,
layer_height)
centers_x, centers_y = np.meshgrid(linx, liny)
centers_x = centers_x.reshape(-1, 1)
centers_y = centers_y.reshape(-1, 1)
# define xmin, ymin, xmax, ymax of prior boxes
num_priors_ = len(self.aspect_ratios)
prior_boxes = np.concatenate((centers_x, centers_y), axis=1)
prior_boxes = np.tile(prior_boxes, (1, 2 * num_priors_))
prior_boxes[:, ::4] -= box_widths
prior_boxes[:, 1::4] -= box_heights
prior_boxes[:, 2::4] += box_widths
prior_boxes[:, 3::4] += box_heights
prior_boxes[:, ::2] /= img_width
prior_boxes[:, 1::2] /= img_height
prior_boxes = prior_boxes.reshape(-1, 4)
if self.clip:
prior_boxes = np.minimum(np.maximum(prior_boxes, 0.0), 1.0)
# define variances
num_boxes = len(prior_boxes)
if len(self.variances) == 1:
variances = np.ones((num_boxes, 4)) * self.variances[0]
elif len(self.variances) == 4:
variances = np.tile(self.variances, (num_boxes, 1))
else:
raise Exception('Must provide one or four variances.')
prior_boxes = np.concatenate((prior_boxes, variances), axis=1)
prior_boxes_tensor = K.expand_dims(K.variable(prior_boxes), 0)
if K.backend() == 'tensorflow':
pattern = [tf.shape(x)[0], 1, 1]
prior_boxes_tensor = tf.tile(prior_boxes_tensor, pattern)
elif K.backend() == 'theano':
#TODO
pass
return prior_boxes_tensor