Skip to content

Commit

Permalink
improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
gaa-cifasis committed Oct 19, 2015
1 parent 5c3fb35 commit 9f4b1c1
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 38 deletions.
4 changes: 2 additions & 2 deletions fextractor
Original file line number Diff line number Diff line change
Expand Up @@ -155,12 +155,12 @@ if __name__ == "__main__":
print "Execution of",program,"failed!"
exit(-1)

prt.print_events(original_events)
prt.print_events(program,original_events)

for (i, (d, mutated)) in enumerate(mutated_input_generator):

if i >= max_mut:
break

events = app.getData(prepare_inputs(mutated))
prt.print_events(events)
prt.print_events(program,events)
59 changes: 44 additions & 15 deletions vd
Original file line number Diff line number Diff line change
Expand Up @@ -20,48 +20,77 @@ Copyright 2014 by G.Grieco
"""

import os
import os.path
import argparse
import sys
import csv
import random

csv.field_size_limit(sys.maxsize)
sys.setrecursionlimit(1024*1024*1024)

#from vdiscover.Detection import WriteTestcase
from vdiscover.Process import Process
from vdiscover.Printer import TypePrinter
from vdiscover.Cluster import Cluster

from vdiscover.Cluster import PlotDeepRepr
from vdiscover.Utils import update_progress
if __name__ == "__main__":

# Arguments
parser = argparse.ArgumentParser(description='')
parser.add_argument("seeds", help="", type=str, default=None)
parser.add_argument("vectorizer", help="", type=str, default=None)
parser.add_argument("-i", help="", type=str, default=None, required=True, dest="seeds")
parser.add_argument("-o", help="", type=str, default=None, required=True, dest="out")
#parser.add_argument("-v", help="", type=str, default=None, required=True, dest="vectorizer")
#parser.add_argument("-m", help="", type=str, default="afl", dest="fuzzer")
parser.add_argument("cmd", help="", type=str, default=None)

options = parser.parse_args()
seeds = options.seeds
outfile = options.out
fuzzer = options.fuzzer
cmd = options.cmd
vectorizer = options.vectorizer
#outdir = "outdir/"++
program = cmd.split(" ")[0]
programf = program.replace("/","__")
timeout = 5
envs = dict()
traces_path = outfile#outdir+"/traces.raw"

if os.path.exists(traces_path):
traces = traces_path
else:

app = Process(program, envs, timeout, [], [], True)
prt = TypePrinter("/dev/null", program, 0)
traces = []
app = Process(program, envs, timeout, [], [], True)
prt = TypePrinter(traces_path, program, 0)
traces = []
all_files = []

print "Extracting traces.."
for x,y,files in os.walk(seeds):
for f in files:
print "Extracting traces.."
for x,y,files in os.walk(seeds):
nfiles = len(files)
#print "Processing directory ","./"++("/".join(y)), "with", nfiles, "seeds"
for f in files:
all_files.append(x+"/".join(y)+"/"+f)

random.shuffle(all_files)
nfiles = len(all_files)

for progress,testcase in enumerate(all_files):
#print testcase
progress = round(float(progress)/nfiles, 2)
update_progress(progress)
prepared_cmd = cmd.replace(program,"")
prepared_cmd = prepared_cmd.split("@@")
prepared_cmd = prepared_cmd[0].split(" ") + [x+"/".join(y)+"/"+f] + prepared_cmd[1].split(" ")
prepared_cmd = prepared_cmd[0].split(" ") + [testcase] + prepared_cmd[1].split(" ")
prepared_cmd = filter(lambda x: x<>'', prepared_cmd)
events = app.getData(prepared_cmd)
traces.append(prt.print_events(events))
traces.append(prt.print_events(testcase,events))
#print prepared_cmd
#print traces[-1]

Cluster(vectorizer, None, traces, None, "cluster", "dynamic", None)
#clustered_traces = PlotDeepRepr(vectorizer, traces, None, "dynamic", None, outdir)
#clusters = dict()
#for label, cluster in clustered_traces:
# clusters[cluster] = clusters.get(cluster, []) + [label]


#print clusters
31 changes: 14 additions & 17 deletions vdiscover/Cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,14 @@
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import pylab as plb

from Utils import *
from Pipeline import *

def PlotDeepRepr(model_file, train_file, valid_file, ftype, nsamples):
def PlotDeepRepr(model_file, train_file, valid_file, ftype, nsamples, outdir):

f = gzip.open(model_file+".pre")
f = open(model_file+".pre")
preprocessor = pickle.load(f)

import h5py
Expand All @@ -43,20 +44,14 @@ def PlotDeepRepr(model_file, train_file, valid_file, ftype, nsamples):
g = f['layer_{}'.format(k)]
layers.append([g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])])

#assert(0)

#preprocessor = old_model.mypreprocessor

#print preprocessor.tokenizer
#print preprocessor.tokenizer.word_counts
max_features = len(preprocessor.tokenizer.word_counts)

batch_size = 100
window_size = 300
maxlen = window_size

embedding_dims = 20
nb_filters = 250
nb_filters = 50
filter_length = 3
hidden_dims = 250

Expand Down Expand Up @@ -136,21 +131,23 @@ def PlotDeepRepr(model_file, train_file, valid_file, ftype, nsamples):
x = gauss(0,0.1) + x
y = gauss(0,0.1) + y
plt.scatter(x, y, c = colors[cluster_label % ncolors])
#plt.text(x-0.05, y+0.01, label.split("-")[-1].split(".")[0])
plt.text(x-0.05, y+0.01, label.split("/")[-1])

for i,[x,y] in enumerate(cluster_centers):
plt.plot(x, y, 'o', markerfacecolor=colors[i % ncolors],
markeredgecolor='k', markersize=7)

plt.title('Estimated number of clusters: %d' % n_clusters)

plb.savefig(outdir+"/plot.png")
plt.show()

return zip(labels, cluster_labels)
#csvwriter = open_csv(train_file+".clusters")
#for (label, cluster_label) in zip(labels, cluster_labels):
# csvwriter.writerow([label, cluster_label])

csvwriter = open_csv(train_file+".clusters")
for (label, cluster_label) in zip(labels, cluster_labels):
csvwriter.writerow([label, cluster_label])

print "Clusters dumped!"
#print "Clusters dumped!"


def TrainDeepRepr(model_file, train_file, valid_file, ftype, nsamples):
Expand Down Expand Up @@ -304,7 +301,7 @@ def ClusterScikit(model_file, train_file, valid_file, ftype, nsamples):
plt.show()
"""

def Cluster(model_file, out_file, train_file, valid_file, ttype, ftype, nsamples):
def Cluster(model_file, out_file, train_file, valid_file, ttype, ftype, nsamples, outfile):

if ttype == "cluster":

Expand All @@ -319,4 +316,4 @@ def Cluster(model_file, out_file, train_file, valid_file, ttype, ftype, nsamples
if model_file is None:
TrainDeepRepr(out_file, train_file, valid_file, ftype, nsamples)
else:
PlotDeepRepr(model_file, train_file, valid_file, ftype, nsamples)
PlotDeepRepr(model_file, train_file, valid_file, ftype, nsamples, outfile)
4 changes: 2 additions & 2 deletions vdiscover/Printer.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def preprocess(self, event):

return r

def print_events(self, events):
def print_events(self, label, events):

r = list()

Expand All @@ -93,7 +93,7 @@ def print_events(self, events):
for x,y in events:
trace = trace+x+"="+y+" "

row = [self.pname,trace]
row = [self.pname+":"+label,trace]

if self.mclass is not None:
row.append(self.mclass)
Expand Down
4 changes: 2 additions & 2 deletions vdiscover/data/prototypes.conf
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,8 @@ string bindtextdomain(string, string);
string textdomain(string);

; libio.h
char _IO_getc(file);
int _IO_putc(char,file);
;char _IO_getc(file);
;int _IO_putc(char,file);

; locale.h
string setlocale(int, string);
Expand Down

0 comments on commit 9f4b1c1

Please sign in to comment.