@@ -64,7 +64,6 @@ def Cluster(X, labels)
64
64
return zip(labels, cluster_labels)
65
65
"""
66
66
67
-
68
67
def ClusterConv (model_file , train_file , valid_file , ftype , nsamples , outdir ):
69
68
70
69
f = open (model_file + ".pre" )
@@ -85,7 +84,7 @@ def ClusterConv(model_file, train_file, valid_file, ftype, nsamples, outdir):
85
84
maxlen = window_size
86
85
87
86
embedding_dims = 20
88
- nb_filters = 50
87
+ nb_filters = 250
89
88
filter_length = 3
90
89
hidden_dims = 250
91
90
@@ -96,51 +95,10 @@ def ClusterConv(model_file, train_file, valid_file, ftype, nsamples, outdir):
96
95
97
96
#y = train_programs
98
97
X_train , y_train , labels = preprocessor .preprocess_traces (train_features , y_data = train_classes , labels = train_programs )
99
-
100
- from keras .preprocessing import sequence
101
- from keras .optimizers import RMSprop
102
- from keras .models import Sequential
103
- from keras .layers .core import Dense , Dropout , Activation , Flatten
104
- from keras .layers .embeddings import Embedding
105
- from keras .layers .convolutional import Convolution1D , MaxPooling1D
106
-
107
- print ('Build model...' )
108
- new_model = Sequential ()
109
-
110
- # we start off with an efficient embedding layer which maps
111
- # our vocab indices into embedding_dims dimensions
112
- new_model .add (Embedding (max_features , embedding_dims , weights = layers [0 ]))
113
- new_model .add (Dropout (0.25 ))
114
-
115
- # we add a Convolution1D, which will learn nb_filters
116
- # word group filters of size filter_length:
117
- new_model .add (Convolution1D (input_dim = embedding_dims ,
118
- nb_filter = nb_filters ,
119
- filter_length = filter_length ,
120
- border_mode = "valid" ,
121
- activation = "relu" ,
122
- subsample_length = 1 ,
123
- weights = layers [2 ]))
124
-
125
- # we use standard max pooling (halving the output of the previous layer):
126
- new_model .add (MaxPooling1D (pool_length = 2 ))
127
-
128
- # We flatten the output of the conv layer, so that we can add a vanilla dense layer:
129
- new_model .add (Flatten ())
130
-
131
- # Computing the output shape of a conv layer can be tricky;
132
- # for a good tutorial, see: http://cs231n.github.io/convolutional-networks/
133
- output_size = nb_filters * (((maxlen - filter_length ) / 1 ) + 1 ) / 2
134
-
135
- # We add a vanilla hidden layer:
136
- new_model .add (Dense (output_size , hidden_dims , weights = layers [5 ]))
137
- #new_model.add(Dropout(0.25))
138
- #new_model.add(Activation('relu'))
139
-
140
- new_model .compile (loss = 'mean_squared_error' , optimizer = 'rmsprop' )
98
+ new_model = mk_cnn ("test" , max_features , maxlen , embedding_dims , nb_filters , filter_length , hidden_dims , None , weights = layers )
141
99
142
100
train_dict = dict ()
143
- train_dict [ftype ] = new_model ._predict (X_train )
101
+ train_dict [ftype ] = new_model .predict (X_train )
144
102
145
103
model = make_cluster_pipeline_subtraces (ftype )
146
104
X_red_comp = model .fit_transform (train_dict )
@@ -155,6 +113,7 @@ def ClusterConv(model_file, train_file, valid_file, ftype, nsamples, outdir):
155
113
ncolors = len (colors )
156
114
157
115
for prog ,[x ,y ] in zip (labels , X_red ):
116
+ < << << << HEAD
158
117
#x = gauss(0,0.1) + x
159
118
#y = gauss(0,0.1) + y
160
119
color = 'r' #colors[progs.index(prog)]
@@ -181,7 +140,6 @@ def ClusterConv(model_file, train_file, valid_file, ftype, nsamples, outdir):
181
140
182
141
plt .show ()
183
142
#plt.savefig("plot.png")
184
- #return None
185
143
186
144
from sklearn .cluster import MeanShift , estimate_bandwidth
187
145
@@ -274,7 +232,7 @@ def ClusterConv(model_file, train_file, valid_file, ftype, nsamples, outdir):
274
232
#print "Clusters dumped!"
275
233
276
234
277
- def TrainDeepRepr (model_file , train_file , valid_file , ftype , nsamples ):
235
+ def TrainCnn (model_file , train_file , valid_file , ftype , nsamples ):
278
236
279
237
csvreader = open_csv (train_file )
280
238
@@ -290,7 +248,7 @@ def TrainDeepRepr(model_file, train_file, valid_file, ftype, nsamples):
290
248
nb_filters = 250
291
249
filter_length = 3
292
250
hidden_dims = 250
293
- nb_epoch = 1
251
+ nb_epoch = 100
294
252
295
253
train_programs , train_features , train_classes = read_traces (train_file , nsamples , cut = None )
296
254
train_size = len (train_features )
@@ -303,70 +261,22 @@ def TrainDeepRepr(model_file, train_file, valid_file, ftype, nsamples):
303
261
max_features = len (tokenizer .word_counts )
304
262
305
263
preprocessor = DeepReprPreprocessor (tokenizer , window_size , batch_size )
306
- X_train ,y_train = preprocessor .preprocess (train_features , 3000 )
264
+ X_train ,y_train = preprocessor .preprocess (train_features , 50000 )
307
265
nb_classes = len (preprocessor .classes )
308
266
print preprocessor .classes
309
- #print X_train[0], len(X_train[0])
310
- #print X_train[1], len(X_train[1])
311
-
312
- #print set(y_train)
313
- #assert(0)
314
-
315
- from keras .preprocessing import sequence
316
- from keras .optimizers import RMSprop
317
- from keras .models import Sequential
318
- from keras .layers .core import Dense , Dropout , Activation , Flatten
319
- from keras .layers .embeddings import Embedding
320
- from keras .layers .convolutional import Convolution1D , MaxPooling1D
321
-
322
- print ('Build model...' )
323
- model = Sequential ()
324
-
325
- # we start off with an efficient embedding layer which maps
326
- # our vocab indices into embedding_dims dimensions
327
- model .add (Embedding (max_features , embedding_dims ))
328
- model .add (Dropout (0.25 ))
329
-
330
- # we add a Convolution1D, which will learn nb_filters
331
- # word group filters of size filter_length:
332
- model .add (Convolution1D (input_dim = embedding_dims ,
333
- nb_filter = nb_filters ,
334
- filter_length = filter_length ,
335
- border_mode = "valid" ,
336
- activation = "relu" ,
337
- subsample_length = 1 ))
338
-
339
- # we use standard max pooling (halving the output of the previous layer):
340
- model .add (MaxPooling1D (pool_length = 2 ))
341
-
342
- # We flatten the output of the conv layer, so that we can add a vanilla dense layer:
343
- model .add (Flatten ())
344
-
345
- # Computing the output shape of a conv layer can be tricky;
346
- # for a good tutorial, see: http://cs231n.github.io/convolutional-networks/
347
- output_size = nb_filters * (((maxlen - filter_length ) / 1 ) + 1 ) / 2
348
-
349
- # We add a vanilla hidden layer:
350
- model .add (Dense (output_size , hidden_dims ))
351
- model .add (Dropout (0.25 ))
352
- model .add (Activation ('relu' ))
353
-
354
- # We project onto a single unit output layer, and squash it with a sigmoid:
355
- model .add (Dense (hidden_dims , nb_classes ))
356
- model .add (Activation ('softmax' ))
357
-
358
- model .compile (loss = 'categorical_crossentropy' , optimizer = 'rmsprop' , class_mode = "categorical" )
267
+
268
+ model = mk_cnn ("train" , max_features , maxlen , embedding_dims , nb_filters , filter_length , hidden_dims , nb_classes )
359
269
model .fit (X_train , y_train , validation_split = 0.1 , batch_size = batch_size , nb_epoch = nb_epoch , show_accuracy = True )
360
270
361
271
model .mypreprocessor = preprocessor
362
- model_file = "cluster-weights.hdf5 "
272
+ # model_file = model_file + ".wei "
363
273
#modelfile = open_model(model_file)
364
- print "Saving model to" ,model_file
365
- model .save_weights (model_file )
274
+ print "Saving model to" ,model_file + ".wei"
275
+ model .save_weights (model_file + ".wei" )
366
276
367
- model_file = "cluster-preprocessor.pklz "
368
- modelfile = open_model (model_file )
369
- print "Saving preprocessor to" ,model_file
277
+ # model_file = model_file + ".pre "
278
+ modelfile = open_model (model_file + ".pre" )
279
+ print "Saving preprocessor to" ,model_file + ".pre"
370
280
#model.save_weights(model_file)
371
281
modelfile .write (pickle .dumps (preprocessor , protocol = 2 ))
372
282
@@ -404,11 +314,11 @@ def ClusterScikit(model_file, train_file, valid_file, ftype, nsamples):
404
314
plt .text (x , y + 0.02 , prog .split ("/" )[- 1 ])
405
315
406
316
407
- if valid_file is not None :
317
+ if valid_file is not None :
408
318
valid_programs , valid_features , valid_classes = read_traces (valid_file , None )
409
319
valid_dict = dict ()
410
320
valid_dict [ftype ] = valid_features
411
-
321
+
412
322
X_red = model .transform (valid_dict )
413
323
for prog ,[x ,y ],cl in zip (valid_programs , X_red , valid_classes ):
414
324
x = gauss (0 ,0.1 ) + x
0 commit comments