@@ -25,8 +25,15 @@ def __init__(self, network_name="mapillary", image_size=512, nb_channels=3,
2525 shape = [None , self ._nb_labels ])
2626 if netsize == "small" :
2727 self .add_layers_3_1 ()
28- else :
28+ elif netsize == "medium" :
2929 self .add_layers_6_2 ()
30+ elif netsize == "vgg" :
31+ self .add_vgg_layers ()
32+ elif netsize == "inception" :
33+ self .add_inception_layers ()
34+ else :
35+ utils .logger .error ("Unsupported network." )
36+ sys .exit (1 )
3037 self .compute_loss ()
3138 self .optimize ()
3239 self ._cm = self .compute_dashboard (self ._Y , self ._Y_pred )
@@ -112,6 +119,121 @@ def add_layers_6_2(self):
112119 layer = self .fullyconnected_layer (2 , self ._is_training , layer , 1024 , 512 , self ._dropout )
113120 return self .output_layer (layer , 512 )
114121
122+ def add_vgg_layers (self ):
123+ """Build the structure of a convolutional neural network from image data `input_layer`
124+ to the last hidden layer on the model of a similar manner than VGG-net (see Simonyan &
125+ Zisserman, Very Deep Convolutional Networks for Large-Scale Image Recognition, arXiv
126+ technical report, 2014) ; not necessarily the *same* structure, as the input shape is not
127+ necessarily identical
128+
129+ Returns
130+ -------
131+ tensor
132+ Output layer of the neural network, *i.e.* a 1 X 1 X nb_class structure that contains
133+ model predictions
134+ """
135+ layer = self .convolutional_layer (1 , self ._is_training , self ._X , self ._nb_channels , 3 , 64 )
136+ layer = self .maxpooling_layer (1 , layer , 2 , 2 )
137+ layer = self .convolutional_layer (2 , self ._is_training , layer , 64 , 3 , 128 )
138+ layer = self .maxpooling_layer (2 , layer , 2 , 2 )
139+ layer = self .convolutional_layer (3 , self ._is_training , layer , 128 , 3 , 256 )
140+ layer = self .convolutional_layer (4 , self ._is_training , layer , 256 , 3 , 256 )
141+ layer = self .maxpooling_layer (3 , layer , 2 , 2 )
142+ layer = self .convolutional_layer (5 , self ._is_training , layer , 256 , 3 , 512 )
143+ layer = self .convolutional_layer (6 , self ._is_training , layer , 512 , 3 , 512 )
144+ layer = self .maxpooling_layer (4 , layer , 2 , 2 )
145+ layer = self .convolutional_layer (7 , self ._is_training , layer , 512 , 3 , 512 )
146+ layer = self .convolutional_layer (8 , self ._is_training , layer , 512 , 3 , 512 )
147+ layer = self .maxpooling_layer (5 , layer , 2 , 2 )
148+ last_layer_dim = self .get_last_conv_layer_dim (32 , 512 )
149+ layer = self .fullyconnected_layer (1 , self ._is_training , layer , last_layer_dim , 1024 , self ._dropout )
150+ return self .output_layer (layer , 1024 )
151+
152+ def inception_block (self , counter , input_layer , input_depth , depth_1 ,
153+ depth_3_reduce , depth_3 , depth_5_reduce , depth_5 , depth_pool ):
154+ """Apply an Inception block (concatenation of convoluted inputs, see Szegedy et al, 2014)
155+
156+ Concatenation of several filtered outputs:
157+ - 1*1 convoluted image
158+ - 1*1 and 3*3 convoluted images
159+ - 1*1 and 5*5 convoluted images
160+ - 3*3 max-pooled and 1*1 convoluted images
161+
162+ Parameters
163+ ----------
164+ counter : integer
165+ Inception block ID
166+ input_layer : tensor
167+ Input layer that has to be transformed in the Inception block
168+ input_depth : integer
169+ Input layer depth
170+ depth_1 : integer
171+ Depth of the 1*1 convoluted output
172+ depth_3_reduce : integer
173+ Hidden layer depth, between 1*1 and 3*3 convolution
174+ depth_3 : integer
175+ Depth of the 3*3 convoluted output
176+ depth_5_reduce : integer
177+ Hidden layer depth, between 1*1 and 5*5 convolution
178+ depth_5 : integer
179+ Depth of the 5*5 convoluted output
180+ depth_pool : integer
181+ Depth of the max-pooled output (after 1*1 convolution)
182+
183+ Returns
184+ -------
185+ tensor
186+ Output layer, after Inception block treatments
187+ """
188+ filter_1_1 = self .convolutional_layer ("i" + str (counter )+ "1" , self ._is_training , input_layer ,
189+ input_depth , 1 , depth_1 )
190+ filter_3_3 = self .convolutional_layer ("i" + str (counter )+ "3a" , self ._is_training , input_layer ,
191+ input_depth , 1 , depth_3_reduce )
192+ filter_3_3 = self .convolutional_layer ("i" + str (counter )+ "3b" , self ._is_training , filter_3_3 ,
193+ depth_3_reduce , 3 , depth_3 )
194+ filter_5_5 = self .convolutional_layer ("i" + str (counter )+ "5a" , self ._is_training , input_layer ,
195+ input_depth , 1 , depth_5_reduce )
196+ filter_5_5 = self .convolutional_layer ("i" + str (counter )+ "5b" , self ._is_training , filter_5_5 ,
197+ depth_5_reduce , 5 , depth_5 )
198+ filter_pool = self .maxpooling_layer ("i" + str (counter ), input_layer , 3 , 1 )
199+ filter_pool = self .convolutional_layer ("i" + str (counter )+ "p" , self ._is_training ,
200+ filter_pool , input_depth , 1 , depth_pool )
201+ return tf .concat ([filter_1_1 , filter_3_3 , filter_5_5 , filter_pool ], axis = 3 )
202+
203+ def add_inception_layers (self ):
204+ """Build the structure of a convolutional neural network from image data `input_layer`
205+ to the last hidden layer on the model of a similar manner than Inception networks (see
206+ Szegedy et al, Going Deeper with Convolutions, arXiv technical report, 2014) ; not
207+ necessarily the *same* structure, as the input shape is not necessarily identical
208+
209+ Returns
210+ -------
211+ tensor
212+ Output layer of the neural network, *i.e.* a 1 X 1 X nb_class structure that contains
213+ model predictions
214+ """
215+ layer = self .convolutional_layer (1 , self ._is_training , self ._X , self ._nb_channels , 7 , 64 ,
216+ 2 )
217+ layer = self .maxpooling_layer (1 , layer , 3 , 2 )
218+ layer = self .convolutional_layer (2 , self ._is_training , layer , 64 , 3 , 192 )
219+ layer = self .maxpooling_layer (2 , layer , 3 , 2 )
220+ layer = self .inception_block ('3a' , layer , 192 , 64 , 96 , 128 , 16 , 32 , 32 )
221+ layer = self .inception_block ('3b' , layer , 256 , 128 , 128 , 192 , 32 , 96 , 64 )
222+ layer = self .maxpooling_layer (3 , layer , 3 , 2 )
223+ layer = self .inception_block ('4a' , layer , 480 , 192 , 96 , 208 , 16 , 48 , 64 )
224+ layer = self .inception_block ('4b' , layer , 512 , 160 , 112 , 224 , 24 , 64 , 64 )
225+ layer = self .inception_block ('4c' , layer , 512 , 128 , 128 , 256 , 24 , 64 , 64 )
226+ layer = self .inception_block ('4d' , layer , 512 , 112 , 144 , 288 , 32 , 64 , 64 )
227+ layer = self .inception_block ('4e' , layer , 528 , 256 , 160 , 320 , 32 , 128 , 128 )
228+ layer = self .maxpooling_layer (4 , layer , 3 , 2 )
229+ layer = self .inception_block ('5a' , layer , 832 , 256 , 160 , 320 , 32 , 128 , 128 )
230+ layer = self .inception_block ('5b' , layer , 832 , 384 , 192 , 384 , 48 , 128 , 128 )
231+ layer = tf .nn .avg_pool (layer , ksize = [1 , 7 , 7 , 1 ], strides = [1 , 1 , 1 , 1 ],
232+ padding = "VALID" , name = "avg_pool" )
233+ layer = tf .reshape (layer , [- 1 , 1024 ])
234+ layer = tf .nn .dropout (layer , self ._dropout , name = "final_dropout" )
235+ return self .output_layer (layer , 1024 )
236+
115237 def compute_loss (self ):
116238 """Define the loss tensor as well as the optimizer; it uses a decaying
117239 learning rate following the equation
0 commit comments