diff --git a/fextractor b/fextractor index 59131ad..42e4b2e 100755 --- a/fextractor +++ b/fextractor @@ -50,6 +50,11 @@ if __name__ == "__main__": # To help argparse to detect the number of columns correctly #os.environ['COLUMNS'] = str(os.popen('stty size', 'r').read().split()[1]) #str(shutil.get_terminal_size().columns) + if open("/proc/sys/kernel/randomize_va_space").read().strip() != "0": + print("Address space layout randomization (ASLR) is enabled, disable it before continue to use the cache") + print("Hint: # echo 0 > /proc/sys/kernel/randomize_va_space") + sys.exit(-1) + # Arguments parser = argparse.ArgumentParser(description='Feature extraction of VDiscover') parser.add_argument("testcase", help="Testcase to analyze", type=str, default=None) diff --git a/vd b/vd index df0b633..50a5f24 100755 --- a/vd +++ b/vd @@ -36,6 +36,12 @@ from vdiscover.Cluster import PlotDeepRepr from vdiscover.Utils import update_progress if __name__ == "__main__": + + if open("/proc/sys/kernel/randomize_va_space").read().strip() != "0": + print("Address space layout randomization (ASLR) is enabled, disable it before continue to use the cache") + print("Hint: # echo 0 > /proc/sys/kernel/randomize_va_space") + sys.exit(-1) + # Arguments parser = argparse.ArgumentParser(description='') parser.add_argument("-i", help="", type=str, default=None, required=True, dest="seeds") @@ -47,9 +53,9 @@ if __name__ == "__main__": options = parser.parse_args() seeds = options.seeds outfile = options.out - fuzzer = options.fuzzer + #fuzzer = options.fuzzer cmd = options.cmd - vectorizer = options.vectorizer + #vectorizer = options.vectorizer program = cmd.split(" ")[0] programf = program.replace("/","__") timeout = 5 diff --git a/vdiscover/Cluster.py b/vdiscover/Cluster.py index 54a98f7..38905cb 100644 --- a/vdiscover/Cluster.py +++ b/vdiscover/Cluster.py @@ -246,14 +246,14 @@ def TrainDeepRepr(model_file, train_file, valid_file, ftype, nsamples): #model.save_weights(model_file) modelfile.write(pickle.dumps(preprocessor, protocol=2)) -""" + def ClusterScikit(model_file, train_file, valid_file, ftype, nsamples): #import matplotlib.pyplot as plt #import matplotlib as mpl - csvreader = open_csv(train_file) - train_programs, train_features, train_classes = read_traces(csvreader, train_file, nsamples) + #csvreader = open_csv(train_file) + train_programs, train_features, train_classes = read_traces(train_file, nsamples) train_size = len(train_programs) print "using", train_size,"examples to train." @@ -263,7 +263,7 @@ def ClusterScikit(model_file, train_file, valid_file, ftype, nsamples): #batch_size = 16 #window_size = 20 - from sklearn.cluster import MeanShift + #from sklearn.cluster import MeanShift print "Transforming data and fitting model.." model = make_cluster_pipeline_bow(ftype) @@ -277,43 +277,44 @@ def ClusterScikit(model_file, train_file, valid_file, ftype, nsamples): x = gauss(0,0.1) + x y = gauss(0,0.1) + y plt.scatter(x, y, c=colors[cl]) - plt.text(x, y+0.2, prog.split("-")[-1]) + plt.text(x, y+0.02, prog.split("/")[-1]) - af = MeanShift().fit(X_red) + plt.show() + #af = MeanShift().fit(X_red) - cluster_centers = af.cluster_centers_ - labels = af.labels_ - n_clusters_ = len(cluster_centers) + #cluster_centers = af.cluster_centers_ + #labels = af.labels_ + #n_clusters_ = len(cluster_centers) - plt.close('all') - plt.figure(1) - plt.clf() + #plt.close('all') + #plt.figure(1) + #plt.clf() - for k, col in zip(range(n_clusters_), colors): - my_members = labels == k - cluster_center = cluster_centers[k] - plt.plot(X_red[my_members, 0], X_red[my_members, 1], col + '.') - plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col, - markeredgecolor='k', markersize=14) + #for k, col in zip(range(n_clusters_), colors): + # my_members = labels == k + # cluster_center = cluster_centers[k] + # plt.plot(X_red[my_members, 0], X_red[my_members, 1], col + '.') + # plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col, + # markeredgecolor='k', markersize=14) - plt.title('Estimated number of clusters: %d' % n_clusters_) - plt.show() -""" + #plt.title('Estimated number of clusters: %d' % n_clusters_) + #plt.show() -def Cluster(model_file, out_file, train_file, valid_file, ttype, ftype, nsamples, outfile): +def Cluster(train_file, valid_file, ftype, nsamples): - if ttype == "cluster": + ClusterScikit(None, train_file, valid_file, ftype, nsamples) + #if ttype == "cluster": #ClusterScikit(out_file, train_file, valid_file, ftype, nsamples) - try: - import keras - except: - print "Failed to import keras modules to perform LSTM training" - return + #try: + # import keras + #except: + # print "Failed to import keras modules to perform LSTM training" + # return - if model_file is None: - TrainDeepRepr(out_file, train_file, valid_file, ftype, nsamples) - else: - PlotDeepRepr(model_file, train_file, valid_file, ftype, nsamples, outfile) + #if model_file is None: + # TrainDeepRepr(out_file, train_file, valid_file, ftype, nsamples) + #else: + # PlotDeepRepr(model_file, train_file, valid_file, ftype, nsamples, outfile) diff --git a/vdiscover/Pipeline.py b/vdiscover/Pipeline.py index 6a442c9..4d8e851 100644 --- a/vdiscover/Pipeline.py +++ b/vdiscover/Pipeline.py @@ -71,10 +71,11 @@ def __init__(self, maxv): self.maxv = maxv def fit(self, x, y=None): - self.pos = x > self.maxv + #self.pos = x > self.maxv return self def transform(self, X, y=None, **fit_params): + self.pos = X > self.maxv X[self.pos] = self.maxv return X diff --git a/vpredictor b/vpredictor index 58e49d6..a9674cf 100755 --- a/vpredictor +++ b/vpredictor @@ -123,7 +123,7 @@ if __name__ == "__main__": elif training_mode_lstm: Train(out_file, in_file, valid_file, "lstm", ftype, nsamples) elif training_mode_cluster: - Cluster(model_file, out_file, in_file, valid_file, "cluster", ftype, nsamples) + Cluster(in_file, valid_file, ftype, nsamples) else: if model_file is None: print "VDiscover requires a pre-trained model to predict"