Skip to content

Commit

Permalink
fixes for HITB workshop
Browse files Browse the repository at this point in the history
  • Loading branch information
gaa-cifasis committed May 20, 2016
1 parent 0553b5b commit 109b7c0
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 23 deletions.
2 changes: 2 additions & 0 deletions fextractor
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,8 @@ if __name__ == "__main__":
original_inputs = RandomInputMutator(args + files, NullMutator)
#expanded_input_generator = RandomInputMutator(args + files, RandomExpanderMutator)
mutated_input_generator = RandomInputMutator(args + files, RandomByteMutator)
if included_mods == []:
included_mods = [program]

app = Process(program, envs, timeout, included_mods, ignored_mods, no_stdout = not show_stdout )
prt = TypePrinter(csvfile, testcase, mclass)
Expand Down
2 changes: 1 addition & 1 deletion vd
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ if __name__ == "__main__":
traces_path = outfile#outdir+"/traces.raw"

if os.path.exists(traces_path):
traces = traces_path
print traces_path, "exists. I will not overwritte it. Aborting"
else:

modules_to_trace = [main_module]
Expand Down
50 changes: 28 additions & 22 deletions vdiscover/Cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
import matplotlib as mpl

# hack from https://stackoverflow.com/questions/2801882/generating-a-png-with-matplotlib-when-display-is-undefined to avoid using X
mpl.use('Agg')
#mpl.use('Agg')
import matplotlib.pyplot as plt

from Utils import *
Expand Down Expand Up @@ -284,10 +284,6 @@ def TrainCnn(model_file, train_file, valid_file, ftype, nsamples):

def ClusterScikit(model_file, train_file, valid_file, ftype, nsamples):

#import matplotlib.pyplot as plt
#import matplotlib as mpl

#csvreader = open_csv(train_file)
train_programs, train_features, train_classes = read_traces(train_file, nsamples)
train_size = len(train_programs)

Expand All @@ -298,15 +294,14 @@ def ClusterScikit(model_file, train_file, valid_file, ftype, nsamples):
#batch_size = 16
#window_size = 20

#from sklearn.cluster import MeanShift

print "Transforming data and fitting model.."
model = make_cluster_pipeline_bow(ftype)
X_red = model.fit_transform(train_dict)

#mpl.rcParams.update({'font.size': 10})
plt.figure()
colors = 'brgcmykbgrcmykbgrcmykbgrcmyk'
ncolors = len(colors)

for prog,[x,y],cl in zip(train_programs, X_red, train_classes):
x = gauss(0,0.1) + x
Expand All @@ -332,26 +327,37 @@ def ClusterScikit(model_file, train_file, valid_file, ftype, nsamples):
plt.text(x, y+0.02, prog.split("/")[-1])

plt.show()
#af = MeanShift().fit(X_red)
from sklearn.cluster import MeanShift, estimate_bandwidth

bandwidth = estimate_bandwidth(X_red, quantile=0.2)
print "Clustering with bandwidth:", bandwidth

af = MeanShift(bandwidth=bandwidth/5).fit(X_red)

cluster_centers = af.cluster_centers_
labels = af.labels_
n_clusters_ = len(cluster_centers)

#cluster_centers = af.cluster_centers_
#labels = af.labels_
#n_clusters_ = len(cluster_centers)
plt.close('all')
plt.figure(1)
plt.clf()

#plt.close('all')
#plt.figure(1)
#plt.clf()
for ([x,y],label, cluster_label) in zip(X_red,train_programs, labels):
x = gauss(0,0.1) + x
y = gauss(0,0.1) + y
plt.scatter(x, y, c = colors[cluster_label % ncolors])

#for k, col in zip(range(n_clusters_), colors):
# my_members = labels == k
# cluster_center = cluster_centers[k]
# plt.plot(X_red[my_members, 0], X_red[my_members, 1], col + '.')
# plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col,
# markeredgecolor='k', markersize=14)
for i,[x,y] in enumerate(cluster_centers):
plt.plot(x, y, 'o', markerfacecolor=colors[i % ncolors],
markeredgecolor='k', markersize=7)

plt.title('Estimated number of clusters: %d' % n_clusters_)
plt.show()

#plt.title('Estimated number of clusters: %d' % n_clusters_)
#plt.show()
clustered_traces = zip(train_programs, labels)
writer = write_csv(train_file.replace(".gz","")+".clusters")
for label, cluster in clustered_traces:
writer.writerow([label, cluster])

def Cluster(train_file, valid_file, ftype, nsamples):

Expand Down

0 comments on commit 109b7c0

Please sign in to comment.