Skip to content

Commit

Permalink
first steps to have seed sampler
Browse files Browse the repository at this point in the history
  • Loading branch information
gaa-cifasis committed Oct 29, 2015
1 parent 49cf8b4 commit 0650fdb
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 21 deletions.
24 changes: 14 additions & 10 deletions vd
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,10 @@ sys.setrecursionlimit(1024*1024*1024)
#from vdiscover.Detection import WriteTestcase
from vdiscover.Process import Process
from vdiscover.Printer import TypePrinter
from vdiscover.Cluster import PlotDeepRepr
from vdiscover.Cluster import ClusterScikit, ClusterConv
from vdiscover.Utils import update_progress
from vdiscover.Sampling import cluster_sampler

if __name__ == "__main__":


Expand All @@ -46,7 +48,7 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser(description='')
parser.add_argument("-i", help="", type=str, default=None, required=True, dest="seeds")
parser.add_argument("-o", help="", type=str, default=None, required=True, dest="out")
#parser.add_argument("-v", help="", type=str, default=None, required=True, dest="vectorizer")
parser.add_argument("-v", help="", type=str, default=None, required=False, dest="vectorizer")
#parser.add_argument("-m", help="", type=str, default="afl", dest="fuzzer")
parser.add_argument("cmd", help="", type=str, default=None)

Expand All @@ -55,7 +57,7 @@ if __name__ == "__main__":
outfile = options.out
#fuzzer = options.fuzzer
cmd = options.cmd
#vectorizer = options.vectorizer
vectorizer = options.vectorizer
program = cmd.split(" ")[0]
programf = program.replace("/","__")
timeout = 5
Expand All @@ -66,7 +68,7 @@ if __name__ == "__main__":
traces = traces_path
else:

app = Process(program, envs, timeout, [], [], True)
app = Process(program, envs, timeout, ["libcairo"], [], True)
prt = TypePrinter(traces_path, program, 0)
traces = []
all_files = []
Expand All @@ -93,10 +95,12 @@ if __name__ == "__main__":
traces.append(prt.print_events(testcase,events))
#print prepared_cmd
#print traces[-1]

#clustered_traces = PlotDeepRepr(vectorizer, traces, None, "dynamic", None, outdir)
#clusters = dict()
#for label, cluster in clustered_traces:
# clusters[cluster] = clusters.get(cluster, []) + [label]

#print clusters

if vectorizer is None:
clustered_traces = ClusterScikit(vectorizer, traces, None, "dynamic", None)
else:
clustered_traces = ClusterConv(vectorizer, traces, None, "dynamic", None, None)

cluster_sampler(clustered_traces,1)
#print clusters
4 changes: 2 additions & 2 deletions vdiscover/Cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from Utils import *
from Pipeline import *

def PlotDeepRepr(model_file, train_file, valid_file, ftype, nsamples, outdir):
def ClusterConv(model_file, train_file, valid_file, ftype, nsamples, outdir):

f = open(model_file+".pre")
preprocessor = pickle.load(f)
Expand Down Expand Up @@ -139,7 +139,7 @@ def PlotDeepRepr(model_file, train_file, valid_file, ftype, nsamples, outdir):

plt.title('Estimated number of clusters: %d' % n_clusters)

plb.savefig(outdir+"/plot.png")
#plb.savefig(outdir+"/plot.png")
plt.show()

return zip(labels, cluster_labels)
Expand Down
32 changes: 32 additions & 0 deletions vdiscover/Sampling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""
This file is part of VDISCOVER.
VDISCOVER is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
VDISCOVER is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with VDISCOVER. If not, see <http://www.gnu.org/licenses/>.
Copyright 2014 by G.Grieco
"""

import random
import copy

def cluster_sampler(clustered_traces, n_per_cluster):
#cc = copy.copy(clusters)
#n_per_cluster = 1#n / len(cc)
clusters = dict()
for label, cluster in clustered_traces:
clusters[cluster] = clusters.get(cluster, []) + [label.split(":")[-1]]

print "Selecting", len(clusters), "seeds"
for (cluster, seeds) in clusters.items():
print ",".join(random.sample(seeds, n_per_cluster))
33 changes: 24 additions & 9 deletions vpredictor
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ sys.setrecursionlimit(1024*1024*1024)
from vdiscover.Pipeline import *
from vdiscover.Recall import Recall
from vdiscover.Train import Train
from vdiscover.Cluster import Cluster
from vdiscover.Cluster import ClusterScikit, ClusterConv

if __name__ == "__main__":

Expand Down Expand Up @@ -67,10 +67,13 @@ if __name__ == "__main__":
help="Valid a model using infile",
action="store", default=None)

parser.add_argument("--cluster",
help="Cluster input traces",
parser.add_argument("--cluster-bow",
help="Cluster input traces using BOW",
action="store_true", default=False)

parser.add_argument("--cluster-conv",
help="Cluster input traces using a convolutional model",
action="store_true", default=False)

parser.add_argument("--train-rf",
help="Train a Random Forest using infile",
Expand All @@ -97,8 +100,10 @@ if __name__ == "__main__":

training_mode_rf = options.train_rf
training_mode_lstm = options.train_lstm
training_mode_cluster = options.cluster
training_mode = training_mode_rf or training_mode_lstm or training_mode_cluster
training_mode_cluster_bow = options.cluster_bow
training_mode_cluster_conv = options.cluster_conv

training_mode = training_mode_rf or training_mode_lstm or training_mode_cluster_bow or training_mode_cluster_conv

probability_mode = options.prob
nsamples = options.n_samples
Expand All @@ -120,10 +125,20 @@ if __name__ == "__main__":
if training_mode:
if training_mode_rf:
Train(out_file, in_file, valid_file, "rf", ftype, nsamples)
elif training_mode_lstm:
Train(out_file, in_file, valid_file, "lstm", ftype, nsamples)
elif training_mode_cluster:
Cluster(in_file, valid_file, ftype, nsamples)
#elif training_mode_:
# Train(out_file, in_file, valid_file, "lstm", ftype, nsamples)
elif training_mode_cluster_bow:
#Cluster(in_file, valid_file, ftype, nsamples)
ClusterScikit(None, in_file, valid_file, ftype, nsamples)
elif training_mode_cluster_conv:
#Cluster(in_file, valid_file, ftype, nsamples)
if (model_file is None):
print "Clustering using a convolutional model requires a pre-trained model"
exit(-1)

ClusterConv(model_file, in_file, valid_file, ftype, nsamples, None)


else:
if model_file is None:
print "VDiscover requires a pre-trained model to predict"
Expand Down

0 comments on commit 0650fdb

Please sign in to comment.