diff --git a/fextractor b/fextractor index 87e404e..59131ad 100755 --- a/fextractor +++ b/fextractor @@ -155,7 +155,7 @@ if __name__ == "__main__": print "Execution of",program,"failed!" exit(-1) - prt.print_events(original_events) + prt.print_events(program,original_events) for (i, (d, mutated)) in enumerate(mutated_input_generator): @@ -163,4 +163,4 @@ if __name__ == "__main__": break events = app.getData(prepare_inputs(mutated)) - prt.print_events(events) + prt.print_events(program,events) diff --git a/vd b/vd index 172532a..df0b633 100755 --- a/vd +++ b/vd @@ -20,48 +20,77 @@ Copyright 2014 by G.Grieco """ import os +import os.path import argparse import sys import csv +import random + +csv.field_size_limit(sys.maxsize) +sys.setrecursionlimit(1024*1024*1024) #from vdiscover.Detection import WriteTestcase from vdiscover.Process import Process from vdiscover.Printer import TypePrinter -from vdiscover.Cluster import Cluster - +from vdiscover.Cluster import PlotDeepRepr +from vdiscover.Utils import update_progress if __name__ == "__main__": # Arguments parser = argparse.ArgumentParser(description='') - parser.add_argument("seeds", help="", type=str, default=None) - parser.add_argument("vectorizer", help="", type=str, default=None) + parser.add_argument("-i", help="", type=str, default=None, required=True, dest="seeds") + parser.add_argument("-o", help="", type=str, default=None, required=True, dest="out") + #parser.add_argument("-v", help="", type=str, default=None, required=True, dest="vectorizer") + #parser.add_argument("-m", help="", type=str, default="afl", dest="fuzzer") parser.add_argument("cmd", help="", type=str, default=None) options = parser.parse_args() seeds = options.seeds + outfile = options.out + fuzzer = options.fuzzer cmd = options.cmd vectorizer = options.vectorizer - #outdir = "outdir/"++ program = cmd.split(" ")[0] + programf = program.replace("/","__") timeout = 5 envs = dict() + traces_path = outfile#outdir+"/traces.raw" + + if os.path.exists(traces_path): + traces = traces_path + else: - app = Process(program, envs, timeout, [], [], True) - prt = TypePrinter("/dev/null", program, 0) - traces = [] + app = Process(program, envs, timeout, [], [], True) + prt = TypePrinter(traces_path, program, 0) + traces = [] + all_files = [] - print "Extracting traces.." - for x,y,files in os.walk(seeds): - for f in files: + print "Extracting traces.." + for x,y,files in os.walk(seeds): + nfiles = len(files) + #print "Processing directory ","./"++("/".join(y)), "with", nfiles, "seeds" + for f in files: + all_files.append(x+"/".join(y)+"/"+f) + + random.shuffle(all_files) + nfiles = len(all_files) + + for progress,testcase in enumerate(all_files): + #print testcase + progress = round(float(progress)/nfiles, 2) + update_progress(progress) prepared_cmd = cmd.replace(program,"") prepared_cmd = prepared_cmd.split("@@") - prepared_cmd = prepared_cmd[0].split(" ") + [x+"/".join(y)+"/"+f] + prepared_cmd[1].split(" ") + prepared_cmd = prepared_cmd[0].split(" ") + [testcase] + prepared_cmd[1].split(" ") prepared_cmd = filter(lambda x: x<>'', prepared_cmd) events = app.getData(prepared_cmd) - traces.append(prt.print_events(events)) + traces.append(prt.print_events(testcase,events)) #print prepared_cmd #print traces[-1] - Cluster(vectorizer, None, traces, None, "cluster", "dynamic", None) + #clustered_traces = PlotDeepRepr(vectorizer, traces, None, "dynamic", None, outdir) + #clusters = dict() + #for label, cluster in clustered_traces: + # clusters[cluster] = clusters.get(cluster, []) + [label] - + #print clusters diff --git a/vdiscover/Cluster.py b/vdiscover/Cluster.py index a885960..54a98f7 100644 --- a/vdiscover/Cluster.py +++ b/vdiscover/Cluster.py @@ -26,13 +26,14 @@ import numpy as np import matplotlib.pyplot as plt import matplotlib as mpl +import pylab as plb from Utils import * from Pipeline import * -def PlotDeepRepr(model_file, train_file, valid_file, ftype, nsamples): +def PlotDeepRepr(model_file, train_file, valid_file, ftype, nsamples, outdir): - f = gzip.open(model_file+".pre") + f = open(model_file+".pre") preprocessor = pickle.load(f) import h5py @@ -43,12 +44,6 @@ def PlotDeepRepr(model_file, train_file, valid_file, ftype, nsamples): g = f['layer_{}'.format(k)] layers.append([g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])]) - #assert(0) - - #preprocessor = old_model.mypreprocessor - - #print preprocessor.tokenizer - #print preprocessor.tokenizer.word_counts max_features = len(preprocessor.tokenizer.word_counts) batch_size = 100 @@ -56,7 +51,7 @@ def PlotDeepRepr(model_file, train_file, valid_file, ftype, nsamples): maxlen = window_size embedding_dims = 20 - nb_filters = 250 + nb_filters = 50 filter_length = 3 hidden_dims = 250 @@ -136,7 +131,7 @@ def PlotDeepRepr(model_file, train_file, valid_file, ftype, nsamples): x = gauss(0,0.1) + x y = gauss(0,0.1) + y plt.scatter(x, y, c = colors[cluster_label % ncolors]) - #plt.text(x-0.05, y+0.01, label.split("-")[-1].split(".")[0]) + plt.text(x-0.05, y+0.01, label.split("/")[-1]) for i,[x,y] in enumerate(cluster_centers): plt.plot(x, y, 'o', markerfacecolor=colors[i % ncolors], @@ -144,13 +139,15 @@ def PlotDeepRepr(model_file, train_file, valid_file, ftype, nsamples): plt.title('Estimated number of clusters: %d' % n_clusters) + plb.savefig(outdir+"/plot.png") plt.show() + + return zip(labels, cluster_labels) + #csvwriter = open_csv(train_file+".clusters") + #for (label, cluster_label) in zip(labels, cluster_labels): + # csvwriter.writerow([label, cluster_label]) - csvwriter = open_csv(train_file+".clusters") - for (label, cluster_label) in zip(labels, cluster_labels): - csvwriter.writerow([label, cluster_label]) - - print "Clusters dumped!" + #print "Clusters dumped!" def TrainDeepRepr(model_file, train_file, valid_file, ftype, nsamples): @@ -304,7 +301,7 @@ def ClusterScikit(model_file, train_file, valid_file, ftype, nsamples): plt.show() """ -def Cluster(model_file, out_file, train_file, valid_file, ttype, ftype, nsamples): +def Cluster(model_file, out_file, train_file, valid_file, ttype, ftype, nsamples, outfile): if ttype == "cluster": @@ -319,4 +316,4 @@ def Cluster(model_file, out_file, train_file, valid_file, ttype, ftype, nsamples if model_file is None: TrainDeepRepr(out_file, train_file, valid_file, ftype, nsamples) else: - PlotDeepRepr(model_file, train_file, valid_file, ftype, nsamples) + PlotDeepRepr(model_file, train_file, valid_file, ftype, nsamples, outfile) diff --git a/vdiscover/Printer.py b/vdiscover/Printer.py index c9ec73d..fd25718 100644 --- a/vdiscover/Printer.py +++ b/vdiscover/Printer.py @@ -72,7 +72,7 @@ def preprocess(self, event): return r - def print_events(self, events): + def print_events(self, label, events): r = list() @@ -93,7 +93,7 @@ def print_events(self, events): for x,y in events: trace = trace+x+"="+y+" " - row = [self.pname,trace] + row = [self.pname+":"+label,trace] if self.mclass is not None: row.append(self.mclass) diff --git a/vdiscover/data/prototypes.conf b/vdiscover/data/prototypes.conf index ed4fc5a..29606d2 100644 --- a/vdiscover/data/prototypes.conf +++ b/vdiscover/data/prototypes.conf @@ -159,8 +159,8 @@ string bindtextdomain(string, string); string textdomain(string); ; libio.h -char _IO_getc(file); -int _IO_putc(char,file); +;char _IO_getc(file); +;int _IO_putc(char,file); ; locale.h string setlocale(int, string);