Skip to content

Commit 109b7c0

Browse files
author
gaa-cifasis
committed
fixes for HITB workshop
1 parent 0553b5b commit 109b7c0

File tree

3 files changed

+31
-23
lines changed

3 files changed

+31
-23
lines changed

fextractor

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,8 @@ if __name__ == "__main__":
148148
original_inputs = RandomInputMutator(args + files, NullMutator)
149149
#expanded_input_generator = RandomInputMutator(args + files, RandomExpanderMutator)
150150
mutated_input_generator = RandomInputMutator(args + files, RandomByteMutator)
151+
if included_mods == []:
152+
included_mods = [program]
151153

152154
app = Process(program, envs, timeout, included_mods, ignored_mods, no_stdout = not show_stdout )
153155
prt = TypePrinter(csvfile, testcase, mclass)

vd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ if __name__ == "__main__":
7070
traces_path = outfile#outdir+"/traces.raw"
7171

7272
if os.path.exists(traces_path):
73-
traces = traces_path
73+
print traces_path, "exists. I will not overwritte it. Aborting"
7474
else:
7575

7676
modules_to_trace = [main_module]

vdiscover/Cluster.py

Lines changed: 28 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
import matplotlib as mpl
2828

2929
# hack from https://stackoverflow.com/questions/2801882/generating-a-png-with-matplotlib-when-display-is-undefined to avoid using X
30-
mpl.use('Agg')
30+
#mpl.use('Agg')
3131
import matplotlib.pyplot as plt
3232

3333
from Utils import *
@@ -284,10 +284,6 @@ def TrainCnn(model_file, train_file, valid_file, ftype, nsamples):
284284

285285
def ClusterScikit(model_file, train_file, valid_file, ftype, nsamples):
286286

287-
#import matplotlib.pyplot as plt
288-
#import matplotlib as mpl
289-
290-
#csvreader = open_csv(train_file)
291287
train_programs, train_features, train_classes = read_traces(train_file, nsamples)
292288
train_size = len(train_programs)
293289

@@ -298,15 +294,14 @@ def ClusterScikit(model_file, train_file, valid_file, ftype, nsamples):
298294
#batch_size = 16
299295
#window_size = 20
300296

301-
#from sklearn.cluster import MeanShift
302-
303297
print "Transforming data and fitting model.."
304298
model = make_cluster_pipeline_bow(ftype)
305299
X_red = model.fit_transform(train_dict)
306300

307301
#mpl.rcParams.update({'font.size': 10})
308302
plt.figure()
309303
colors = 'brgcmykbgrcmykbgrcmykbgrcmyk'
304+
ncolors = len(colors)
310305

311306
for prog,[x,y],cl in zip(train_programs, X_red, train_classes):
312307
x = gauss(0,0.1) + x
@@ -332,26 +327,37 @@ def ClusterScikit(model_file, train_file, valid_file, ftype, nsamples):
332327
plt.text(x, y+0.02, prog.split("/")[-1])
333328

334329
plt.show()
335-
#af = MeanShift().fit(X_red)
330+
from sklearn.cluster import MeanShift, estimate_bandwidth
331+
332+
bandwidth = estimate_bandwidth(X_red, quantile=0.2)
333+
print "Clustering with bandwidth:", bandwidth
334+
335+
af = MeanShift(bandwidth=bandwidth/5).fit(X_red)
336+
337+
cluster_centers = af.cluster_centers_
338+
labels = af.labels_
339+
n_clusters_ = len(cluster_centers)
336340

337-
#cluster_centers = af.cluster_centers_
338-
#labels = af.labels_
339-
#n_clusters_ = len(cluster_centers)
341+
plt.close('all')
342+
plt.figure(1)
343+
plt.clf()
340344

341-
#plt.close('all')
342-
#plt.figure(1)
343-
#plt.clf()
345+
for ([x,y],label, cluster_label) in zip(X_red,train_programs, labels):
346+
x = gauss(0,0.1) + x
347+
y = gauss(0,0.1) + y
348+
plt.scatter(x, y, c = colors[cluster_label % ncolors])
344349

345-
#for k, col in zip(range(n_clusters_), colors):
346-
# my_members = labels == k
347-
# cluster_center = cluster_centers[k]
348-
# plt.plot(X_red[my_members, 0], X_red[my_members, 1], col + '.')
349-
# plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col,
350-
# markeredgecolor='k', markersize=14)
350+
for i,[x,y] in enumerate(cluster_centers):
351+
plt.plot(x, y, 'o', markerfacecolor=colors[i % ncolors],
352+
markeredgecolor='k', markersize=7)
351353

354+
plt.title('Estimated number of clusters: %d' % n_clusters_)
355+
plt.show()
352356

353-
#plt.title('Estimated number of clusters: %d' % n_clusters_)
354-
#plt.show()
357+
clustered_traces = zip(train_programs, labels)
358+
writer = write_csv(train_file.replace(".gz","")+".clusters")
359+
for label, cluster in clustered_traces:
360+
writer.writerow([label, cluster])
355361

356362
def Cluster(train_file, valid_file, ftype, nsamples):
357363

0 commit comments

Comments
 (0)