improvements

CIFASIS · Oct 19, 2015 · fd5353c · fd5353c
1 parent 48d4120
commit fd5353c
Show file tree

Hide file tree

Showing 5 changed files with 49 additions and 36 deletions.
diff --git a/fextractor b/fextractor
@@ -50,6 +50,11 @@ if __name__ == "__main__":
     # To help argparse to detect the number of columns correctly
     #os.environ['COLUMNS'] = str(os.popen('stty size', 'r').read().split()[1]) #str(shutil.get_terminal_size().columns)
 
+    if open("/proc/sys/kernel/randomize_va_space").read().strip() != "0":
+        print("Address space layout randomization (ASLR) is enabled, disable it before continue to use the cache")
+        print("Hint: # echo 0 > /proc/sys/kernel/randomize_va_space")
+        sys.exit(-1)
+
     # Arguments
     parser = argparse.ArgumentParser(description='Feature extraction of VDiscover')
     parser.add_argument("testcase", help="Testcase to analyze", type=str, default=None)

diff --git a/vd b/vd
@@ -36,6 +36,12 @@ from vdiscover.Cluster  import PlotDeepRepr
 from vdiscover.Utils import update_progress
 if __name__ == "__main__":
 
+
+    if open("/proc/sys/kernel/randomize_va_space").read().strip() != "0":
+        print("Address space layout randomization (ASLR) is enabled, disable it before continue to use the cache")
+        print("Hint: # echo 0 > /proc/sys/kernel/randomize_va_space")
+        sys.exit(-1)
+
     # Arguments
     parser = argparse.ArgumentParser(description='')
     parser.add_argument("-i", help="", type=str, default=None, required=True, dest="seeds")
@@ -47,9 +53,9 @@ if __name__ == "__main__":
     options = parser.parse_args()
     seeds = options.seeds
     outfile = options.out
-    fuzzer = options.fuzzer
+    #fuzzer = options.fuzzer
     cmd = options.cmd
-    vectorizer = options.vectorizer
+    #vectorizer = options.vectorizer
     program = cmd.split(" ")[0]
     programf = program.replace("/","__")
     timeout = 5

diff --git a/vdiscover/Cluster.py b/vdiscover/Cluster.py
@@ -246,14 +246,14 @@ def TrainDeepRepr(model_file, train_file, valid_file, ftype, nsamples):
   #model.save_weights(model_file)
   modelfile.write(pickle.dumps(preprocessor, protocol=2))
 
-"""
+
 def ClusterScikit(model_file, train_file, valid_file, ftype, nsamples):
 
   #import matplotlib.pyplot as plt
   #import matplotlib as mpl
 
-  csvreader = open_csv(train_file)
-  train_programs, train_features, train_classes = read_traces(csvreader, train_file, nsamples)
+  #csvreader = open_csv(train_file)
+  train_programs, train_features, train_classes = read_traces(train_file, nsamples)
   train_size = len(train_programs)
 
   print "using", train_size,"examples to train."
@@ -263,7 +263,7 @@ def ClusterScikit(model_file, train_file, valid_file, ftype, nsamples):
   #batch_size = 16
   #window_size = 20
 
-  from sklearn.cluster import MeanShift
+  #from sklearn.cluster import MeanShift
 
   print "Transforming data and fitting model.."
   model = make_cluster_pipeline_bow(ftype)
@@ -277,43 +277,44 @@ def ClusterScikit(model_file, train_file, valid_file, ftype, nsamples):
     x = gauss(0,0.1) + x
     y = gauss(0,0.1) + y
     plt.scatter(x, y, c=colors[cl])
-    plt.text(x, y+0.2, prog.split("-")[-1])
+    plt.text(x, y+0.02, prog.split("/")[-1])
 
-  af = MeanShift().fit(X_red)
+  plt.show()
+  #af = MeanShift().fit(X_red)
 
-  cluster_centers = af.cluster_centers_
-  labels = af.labels_
-  n_clusters_ = len(cluster_centers)
+  #cluster_centers = af.cluster_centers_
+  #labels = af.labels_
+  #n_clusters_ = len(cluster_centers)
 
-  plt.close('all')
-  plt.figure(1)
-  plt.clf()
+  #plt.close('all')
+  #plt.figure(1)
+  #plt.clf()
 
-  for k, col in zip(range(n_clusters_), colors):
-    my_members = labels == k
-    cluster_center = cluster_centers[k]
-    plt.plot(X_red[my_members, 0], X_red[my_members, 1], col + '.')
-    plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col,
-             markeredgecolor='k', markersize=14)
+  #for k, col in zip(range(n_clusters_), colors):
+  #  my_members = labels == k
+  #  cluster_center = cluster_centers[k]
+  #  plt.plot(X_red[my_members, 0], X_red[my_members, 1], col + '.')
+  #  plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col,
+  #           markeredgecolor='k', markersize=14)
 
 
-  plt.title('Estimated number of clusters: %d' % n_clusters_)
-  plt.show()
-"""
+  #plt.title('Estimated number of clusters: %d' % n_clusters_)
+  #plt.show()
 
-def Cluster(model_file, out_file, train_file, valid_file, ttype, ftype, nsamples, outfile):
+def Cluster(train_file, valid_file, ftype, nsamples):
 
-  if ttype == "cluster":
+  ClusterScikit(None, train_file, valid_file, ftype, nsamples)
 
+  #if ttype == "cluster":
     #ClusterScikit(out_file, train_file, valid_file, ftype, nsamples)
 
-    try:
-      import keras
-    except:
-      print "Failed to import keras modules to perform LSTM training"
-      return
+    #try:
+    #  import keras
+    #except:
+    #  print "Failed to import keras modules to perform LSTM training"
+    #  return
 
-    if model_file is None:
-      TrainDeepRepr(out_file, train_file, valid_file, ftype, nsamples)
-    else:
-      PlotDeepRepr(model_file, train_file, valid_file, ftype, nsamples, outfile)
+    #if model_file is None:
+    #  TrainDeepRepr(out_file, train_file, valid_file, ftype, nsamples)
+    #else:
+    #  PlotDeepRepr(model_file, train_file, valid_file, ftype, nsamples, outfile)
diff --git a/vdiscover/Pipeline.py b/vdiscover/Pipeline.py
@@ -71,10 +71,11 @@ def __init__(self, maxv):
         self.maxv = maxv
 
     def fit(self, x, y=None):
-        self.pos = x > self.maxv
+        #self.pos = x > self.maxv
         return self
 
     def transform(self, X, y=None, **fit_params):
+        self.pos = X > self.maxv
         X[self.pos] = self.maxv
         return X
 

diff --git a/vpredictor b/vpredictor
@@ -123,7 +123,7 @@ if __name__ == "__main__":
       elif training_mode_lstm:
         Train(out_file, in_file, valid_file, "lstm", ftype, nsamples)
       elif training_mode_cluster:
-        Cluster(model_file, out_file, in_file, valid_file, "cluster", ftype, nsamples) 
+        Cluster(in_file, valid_file, ftype, nsamples)
     else:
       if model_file is None:
         print "VDiscover requires a pre-trained model to predict"