Skip to content

Commit

Permalink
improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
gaa-cifasis committed Oct 19, 2015
1 parent 48d4120 commit fd5353c
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 36 deletions.
5 changes: 5 additions & 0 deletions fextractor
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ if __name__ == "__main__":
# To help argparse to detect the number of columns correctly
#os.environ['COLUMNS'] = str(os.popen('stty size', 'r').read().split()[1]) #str(shutil.get_terminal_size().columns)

if open("/proc/sys/kernel/randomize_va_space").read().strip() != "0":
print("Address space layout randomization (ASLR) is enabled, disable it before continue to use the cache")
print("Hint: # echo 0 > /proc/sys/kernel/randomize_va_space")
sys.exit(-1)

# Arguments
parser = argparse.ArgumentParser(description='Feature extraction of VDiscover')
parser.add_argument("testcase", help="Testcase to analyze", type=str, default=None)
Expand Down
10 changes: 8 additions & 2 deletions vd
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ from vdiscover.Cluster import PlotDeepRepr
from vdiscover.Utils import update_progress
if __name__ == "__main__":


if open("/proc/sys/kernel/randomize_va_space").read().strip() != "0":
print("Address space layout randomization (ASLR) is enabled, disable it before continue to use the cache")
print("Hint: # echo 0 > /proc/sys/kernel/randomize_va_space")
sys.exit(-1)

# Arguments
parser = argparse.ArgumentParser(description='')
parser.add_argument("-i", help="", type=str, default=None, required=True, dest="seeds")
Expand All @@ -47,9 +53,9 @@ if __name__ == "__main__":
options = parser.parse_args()
seeds = options.seeds
outfile = options.out
fuzzer = options.fuzzer
#fuzzer = options.fuzzer
cmd = options.cmd
vectorizer = options.vectorizer
#vectorizer = options.vectorizer
program = cmd.split(" ")[0]
programf = program.replace("/","__")
timeout = 5
Expand Down
65 changes: 33 additions & 32 deletions vdiscover/Cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,14 +246,14 @@ def TrainDeepRepr(model_file, train_file, valid_file, ftype, nsamples):
#model.save_weights(model_file)
modelfile.write(pickle.dumps(preprocessor, protocol=2))

"""

def ClusterScikit(model_file, train_file, valid_file, ftype, nsamples):

#import matplotlib.pyplot as plt
#import matplotlib as mpl

csvreader = open_csv(train_file)
train_programs, train_features, train_classes = read_traces(csvreader, train_file, nsamples)
#csvreader = open_csv(train_file)
train_programs, train_features, train_classes = read_traces(train_file, nsamples)
train_size = len(train_programs)

print "using", train_size,"examples to train."
Expand All @@ -263,7 +263,7 @@ def ClusterScikit(model_file, train_file, valid_file, ftype, nsamples):
#batch_size = 16
#window_size = 20

from sklearn.cluster import MeanShift
#from sklearn.cluster import MeanShift

print "Transforming data and fitting model.."
model = make_cluster_pipeline_bow(ftype)
Expand All @@ -277,43 +277,44 @@ def ClusterScikit(model_file, train_file, valid_file, ftype, nsamples):
x = gauss(0,0.1) + x
y = gauss(0,0.1) + y
plt.scatter(x, y, c=colors[cl])
plt.text(x, y+0.2, prog.split("-")[-1])
plt.text(x, y+0.02, prog.split("/")[-1])

af = MeanShift().fit(X_red)
plt.show()
#af = MeanShift().fit(X_red)

cluster_centers = af.cluster_centers_
labels = af.labels_
n_clusters_ = len(cluster_centers)
#cluster_centers = af.cluster_centers_
#labels = af.labels_
#n_clusters_ = len(cluster_centers)

plt.close('all')
plt.figure(1)
plt.clf()
#plt.close('all')
#plt.figure(1)
#plt.clf()

for k, col in zip(range(n_clusters_), colors):
my_members = labels == k
cluster_center = cluster_centers[k]
plt.plot(X_red[my_members, 0], X_red[my_members, 1], col + '.')
plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col,
markeredgecolor='k', markersize=14)
#for k, col in zip(range(n_clusters_), colors):
# my_members = labels == k
# cluster_center = cluster_centers[k]
# plt.plot(X_red[my_members, 0], X_red[my_members, 1], col + '.')
# plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col,
# markeredgecolor='k', markersize=14)


plt.title('Estimated number of clusters: %d' % n_clusters_)
plt.show()
"""
#plt.title('Estimated number of clusters: %d' % n_clusters_)
#plt.show()

def Cluster(model_file, out_file, train_file, valid_file, ttype, ftype, nsamples, outfile):
def Cluster(train_file, valid_file, ftype, nsamples):

if ttype == "cluster":
ClusterScikit(None, train_file, valid_file, ftype, nsamples)

#if ttype == "cluster":
#ClusterScikit(out_file, train_file, valid_file, ftype, nsamples)

try:
import keras
except:
print "Failed to import keras modules to perform LSTM training"
return
#try:
# import keras
#except:
# print "Failed to import keras modules to perform LSTM training"
# return

if model_file is None:
TrainDeepRepr(out_file, train_file, valid_file, ftype, nsamples)
else:
PlotDeepRepr(model_file, train_file, valid_file, ftype, nsamples, outfile)
#if model_file is None:
# TrainDeepRepr(out_file, train_file, valid_file, ftype, nsamples)
#else:
# PlotDeepRepr(model_file, train_file, valid_file, ftype, nsamples, outfile)
3 changes: 2 additions & 1 deletion vdiscover/Pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,11 @@ def __init__(self, maxv):
self.maxv = maxv

def fit(self, x, y=None):
self.pos = x > self.maxv
#self.pos = x > self.maxv
return self

def transform(self, X, y=None, **fit_params):
self.pos = X > self.maxv
X[self.pos] = self.maxv
return X

Expand Down
2 changes: 1 addition & 1 deletion vpredictor
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ if __name__ == "__main__":
elif training_mode_lstm:
Train(out_file, in_file, valid_file, "lstm", ftype, nsamples)
elif training_mode_cluster:
Cluster(model_file, out_file, in_file, valid_file, "cluster", ftype, nsamples)
Cluster(in_file, valid_file, ftype, nsamples)
else:
if model_file is None:
print "VDiscover requires a pre-trained model to predict"
Expand Down

0 comments on commit fd5353c

Please sign in to comment.