@@ -57,7 +57,7 @@ def ClusterConv(model_file, train_file, valid_file, ftype, nsamples, outdir):
57
57
58
58
#csvreader = load_csv(train_file)
59
59
print "Reading and sampling data to train.."
60
- train_programs , train_features , train_classes = read_traces (train_file , nsamples , cut = None )
60
+ train_programs , train_features , train_classes = read_traces (train_file , nsamples , cut = 10 , maxsize = window_size )
61
61
train_size = len (train_features )
62
62
63
63
#y = train_programs
@@ -111,6 +111,34 @@ def ClusterConv(model_file, train_file, valid_file, ftype, nsamples, outdir):
111
111
model = make_cluster_pipeline_subtraces (ftype )
112
112
X_red = model .fit_transform (train_dict )
113
113
114
+ colors = 'rbgcmykbgrcmykbgrcmykbgrcmyk'
115
+ ncolors = len (colors )
116
+
117
+ for prog ,[x ,y ] in zip (labels , X_red ):
118
+ x = gauss (0 ,0.1 ) + x
119
+ y = gauss (0 ,0.1 ) + y
120
+ plt .scatter (x , y , c = 'r' )
121
+ #plt.text(x, y+0.02, prog.split("/")[-1])
122
+
123
+
124
+ if valid_file is not None :
125
+ valid_programs , valid_features , valid_classes = read_traces (valid_file , None , cut = 10 , maxsize = window_size ) #None)
126
+ valid_dict = dict ()
127
+
128
+ X_valid , _ , valid_labels = preprocessor .preprocess_traces (valid_features , y_data = None , labels = valid_programs )
129
+ valid_dict [ftype ] = new_model ._predict (X_valid )
130
+ X_red = model .transform (valid_dict )
131
+
132
+ for prog ,[x ,y ] in zip (valid_labels , X_red ):
133
+ x = gauss (0 ,0.1 ) + x
134
+ y = gauss (0 ,0.1 ) + y
135
+ plt .scatter (x , y , c = 'b' )
136
+ plt .text (x , y + 0.02 , prog .split ("/" )[- 1 ])
137
+
138
+ plt .savefig ("plot.png" )
139
+ return None
140
+
141
+
114
142
from sklearn .cluster import MeanShift , estimate_bandwidth
115
143
116
144
bandwidth = estimate_bandwidth (X_red , quantile = 0.2 )
@@ -124,8 +152,6 @@ def ClusterConv(model_file, train_file, valid_file, ftype, nsamples, outdir):
124
152
125
153
plt .figure ()
126
154
print len (X_red ), len (labels )
127
- colors = 'rbgcmykbgrcmykbgrcmykbgrcmyk'
128
- ncolors = len (colors )
129
155
130
156
for ([x ,y ],label , cluster_label ) in zip (X_red ,labels , cluster_labels ):
131
157
x = gauss (0 ,0.1 ) + x
@@ -139,8 +165,8 @@ def ClusterConv(model_file, train_file, valid_file, ftype, nsamples, outdir):
139
165
140
166
plt .title ('Estimated number of clusters: %d' % n_clusters )
141
167
142
- #plb .savefig(outdir+"/ plot.png")
143
- plt .show ()
168
+ plt .savefig (" plot.png" )
169
+ # plt.show()
144
170
145
171
return zip (labels , cluster_labels )
146
172
#csvwriter = open_csv(train_file+".clusters")
@@ -279,6 +305,19 @@ def ClusterScikit(model_file, train_file, valid_file, ftype, nsamples):
279
305
plt .scatter (x , y , c = colors [cl ])
280
306
plt .text (x , y + 0.02 , prog .split ("/" )[- 1 ])
281
307
308
+
309
+ if valid_file is not None :
310
+ valid_programs , valid_features , valid_classes = read_traces (valid_file , None )
311
+ valid_dict = dict ()
312
+ valid_dict [ftype ] = valid_features
313
+
314
+ X_red = model .transform (valid_dict )
315
+ for prog ,[x ,y ],cl in zip (valid_programs , X_red , valid_classes ):
316
+ x = gauss (0 ,0.1 ) + x
317
+ y = gauss (0 ,0.1 ) + y
318
+ plt .scatter (x , y , c = colors [cl + 1 ])
319
+ plt .text (x , y + 0.02 , prog .split ("/" )[- 1 ])
320
+
282
321
plt .show ()
283
322
#af = MeanShift().fit(X_red)
284
323
0 commit comments