improvements

gaa-cifasis · gaa-cifasis · commit 9f4b1c102c06 · 2015-10-19T11:37:02.000Z
diff --git a/fextractor b/fextractor
@@ -155,12 +155,12 @@ if __name__ == "__main__":
         print "Execution of",program,"failed!"
         exit(-1)
 
-      prt.print_events(original_events) 
+      prt.print_events(program,original_events) 
 
       for (i, (d, mutated)) in enumerate(mutated_input_generator):
 
         if i >= max_mut:
           break
 
         events = app.getData(prepare_inputs(mutated))
-        prt.print_events(events)
+        prt.print_events(program,events)
diff --git a/vd b/vd
@@ -20,48 +20,77 @@ Copyright 2014 by G.Grieco
 """
 
 import os
+import os.path
 import argparse
 import sys
 import csv
+import random 
+
+csv.field_size_limit(sys.maxsize)
+sys.setrecursionlimit(1024*1024*1024)
 
 #from vdiscover.Detection import WriteTestcase
 from vdiscover.Process    import Process
 from vdiscover.Printer    import TypePrinter
-from vdiscover.Cluster  import Cluster
-
+from vdiscover.Cluster  import PlotDeepRepr
+from vdiscover.Utils import update_progress
 if __name__ == "__main__":
 
     # Arguments
     parser = argparse.ArgumentParser(description='')
-    parser.add_argument("seeds", help="", type=str, default=None)
-    parser.add_argument("vectorizer", help="", type=str, default=None)
+    parser.add_argument("-i", help="", type=str, default=None, required=True, dest="seeds")
+    parser.add_argument("-o", help="", type=str, default=None, required=True, dest="out")
+    #parser.add_argument("-v", help="", type=str, default=None, required=True, dest="vectorizer")
+    #parser.add_argument("-m", help="", type=str, default="afl", dest="fuzzer")
     parser.add_argument("cmd", help="", type=str, default=None)
 
     options = parser.parse_args()
     seeds = options.seeds
+    outfile = options.out
+    fuzzer = options.fuzzer
     cmd = options.cmd
     vectorizer = options.vectorizer
-    #outdir = "outdir/"++
     program = cmd.split(" ")[0]
+    programf = program.replace("/","__")
     timeout = 5
     envs = dict()
+    traces_path = outfile#outdir+"/traces.raw"
+
+    if os.path.exists(traces_path):
+      traces = traces_path
+    else:
 
-    app = Process(program, envs, timeout, [], [], True)
-    prt = TypePrinter("/dev/null", program, 0)
-    traces = []
+      app = Process(program, envs, timeout, [], [], True)  
+      prt = TypePrinter(traces_path, program, 0)
+      traces = []
+      all_files = []
 
-    print "Extracting traces.."
-    for x,y,files in os.walk(seeds):
-      for f in files:
+      print "Extracting traces.."
+      for x,y,files in os.walk(seeds):
+        nfiles = len(files)
+        #print "Processing directory ","./"++("/".join(y)), "with", nfiles, "seeds"
+        for f in files:
+          all_files.append(x+"/".join(y)+"/"+f)
+
+      random.shuffle(all_files)
+      nfiles = len(all_files)
+
+      for progress,testcase in enumerate(all_files):
+        #print testcase
+        progress = round(float(progress)/nfiles, 2)
+        update_progress(progress)
         prepared_cmd = cmd.replace(program,"")
         prepared_cmd = prepared_cmd.split("@@")
-        prepared_cmd = prepared_cmd[0].split(" ") + [x+"/".join(y)+"/"+f] + prepared_cmd[1].split(" ")
+        prepared_cmd = prepared_cmd[0].split(" ") + [testcase] + prepared_cmd[1].split(" ")
         prepared_cmd = filter(lambda x: x<>'', prepared_cmd)
         events = app.getData(prepared_cmd)
-        traces.append(prt.print_events(events))
+        traces.append(prt.print_events(testcase,events))
         #print prepared_cmd
         #print traces[-1]
     
-    Cluster(vectorizer, None, traces, None, "cluster", "dynamic", None)
+    #clustered_traces = PlotDeepRepr(vectorizer, traces, None, "dynamic", None, outdir)
+    #clusters = dict()
+    #for label, cluster in clustered_traces:
+    #  clusters[cluster] = clusters.get(cluster, []) + [label]
 
-    
+    #print clusters 
diff --git a/vdiscover/Cluster.py b/vdiscover/Cluster.py
@@ -26,13 +26,14 @@
 import numpy as np
 import matplotlib.pyplot as plt
 import matplotlib as mpl
+import pylab as plb
 
 from Utils import *
 from Pipeline import *
 
-def PlotDeepRepr(model_file, train_file, valid_file, ftype, nsamples):
+def PlotDeepRepr(model_file, train_file, valid_file, ftype, nsamples, outdir):
 
-  f = gzip.open(model_file+".pre")
+  f = open(model_file+".pre")
   preprocessor = pickle.load(f)
 
   import h5py
@@ -43,20 +44,14 @@ def PlotDeepRepr(model_file, train_file, valid_file, ftype, nsamples):
             g = f['layer_{}'.format(k)]
             layers.append([g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])])
 
-  #assert(0)
-
-  #preprocessor = old_model.mypreprocessor
-
-  #print preprocessor.tokenizer
-  #print preprocessor.tokenizer.word_counts
   max_features = len(preprocessor.tokenizer.word_counts)
 
   batch_size = 100
   window_size = 300
   maxlen = window_size
 
   embedding_dims = 20
-  nb_filters = 250
+  nb_filters = 50
   filter_length = 3
   hidden_dims = 250
 
@@ -136,21 +131,23 @@ def PlotDeepRepr(model_file, train_file, valid_file, ftype, nsamples):
     x = gauss(0,0.1) + x
     y = gauss(0,0.1) + y
     plt.scatter(x, y, c = colors[cluster_label % ncolors])
-    #plt.text(x-0.05, y+0.01, label.split("-")[-1].split(".")[0])
+    plt.text(x-0.05, y+0.01, label.split("/")[-1])
 
   for i,[x,y] in enumerate(cluster_centers):
     plt.plot(x, y, 'o', markerfacecolor=colors[i % ncolors],
              markeredgecolor='k', markersize=7)
 
   plt.title('Estimated number of clusters: %d' % n_clusters)
 
+  plb.savefig(outdir+"/plot.png")
   plt.show()
+  
+  return zip(labels, cluster_labels)
+  #csvwriter = open_csv(train_file+".clusters")
+  #for (label, cluster_label) in zip(labels, cluster_labels):
+  #  csvwriter.writerow([label, cluster_label])
 
-  csvwriter = open_csv(train_file+".clusters")
-  for (label, cluster_label) in zip(labels, cluster_labels):
-    csvwriter.writerow([label, cluster_label])
-
-  print "Clusters dumped!"
+  #print "Clusters dumped!"
 
 
 def TrainDeepRepr(model_file, train_file, valid_file, ftype, nsamples):
@@ -304,7 +301,7 @@ def ClusterScikit(model_file, train_file, valid_file, ftype, nsamples):
   plt.show()
 """
 
-def Cluster(model_file, out_file, train_file, valid_file, ttype, ftype, nsamples):
+def Cluster(model_file, out_file, train_file, valid_file, ttype, ftype, nsamples, outfile):
 
   if ttype == "cluster":
 
@@ -319,4 +316,4 @@ def Cluster(model_file, out_file, train_file, valid_file, ttype, ftype, nsamples
     if model_file is None:
       TrainDeepRepr(out_file, train_file, valid_file, ftype, nsamples)
     else:
-      PlotDeepRepr(model_file, train_file, valid_file, ftype, nsamples)
+      PlotDeepRepr(model_file, train_file, valid_file, ftype, nsamples, outfile)
diff --git a/vdiscover/Printer.py b/vdiscover/Printer.py
@@ -72,7 +72,7 @@ def preprocess(self, event):
 
     return r
 
-  def print_events(self, events):
+  def print_events(self, label, events):
 
     r = list()
 
@@ -93,7 +93,7 @@ def print_events(self, events):
     for x,y in events:
       trace = trace+x+"="+y+" "
 
-    row = [self.pname,trace]
+    row = [self.pname+":"+label,trace]
 
     if self.mclass is not None:
       row.append(self.mclass)
diff --git a/vdiscover/data/prototypes.conf b/vdiscover/data/prototypes.conf
@@ -159,8 +159,8 @@ string bindtextdomain(string, string);
 string textdomain(string);
 
 ; libio.h
-char _IO_getc(file);
-int _IO_putc(char,file);
+;char _IO_getc(file);
+;int _IO_putc(char,file);
 
 ; locale.h
 string setlocale(int, string);