From 2f1cfb31ae6120f00976d861e97b3f3e2a0ebde4 Mon Sep 17 00:00:00 2001
From: gustavo <g@g>
Date: Thu, 19 Jan 2017 16:48:32 -0300
Subject: [PATCH] autopep8, starting refactoring

---
 fextractor                   | 168 ++++----
 setup.py                     |  11 +-
 tcreator                     |  84 ++--
 tseeder                      |  26 +-
 vd                           | 112 ++---
 vdiscover/Alarm.py           |   8 +-
 vdiscover/Analysis.py        |  79 ++--
 vdiscover/Backtrace.py       |  19 +-
 vdiscover/Cluster.py         | 457 +++++++++++----------
 vdiscover/Detection.py       | 198 ++++-----
 vdiscover/ELF.py             | 373 +++++++++--------
 vdiscover/Event.py           | 332 ++++++++-------
 vdiscover/Input.py           | 155 +++----
 vdiscover/MemoryMap.py       | 180 ++++----
 vdiscover/Misc.py            |  35 +-
 vdiscover/Mutation.py        | 317 +++++++-------
 vdiscover/Pipeline.py        | 772 ++++++++++++++++++-----------------
 vdiscover/Printer.py         | 110 ++---
 vdiscover/Process.py         | 354 ++++++++--------
 vdiscover/RandomWalk.py      | 289 ++++++-------
 vdiscover/Recall.py          |  87 ++--
 vdiscover/Run.py             |  89 ++--
 vdiscover/Sampling.py        |  37 +-
 vdiscover/Spec.py            |   8 +-
 vdiscover/Train.py           | 244 +++++------
 vdiscover/Types.py           |  95 +++--
 vdiscover/Utils.py           | 250 ++++++------
 vdiscover/Vulnerabilities.py |  56 +--
 vdp                          |  94 +++--
 vpredictor                   | 150 ++++---
 30 files changed, 2740 insertions(+), 2449 deletions(-)

diff --git a/fextractor b/fextractor
index d5198ca..9ad6ca4 100755
--- a/fextractor
+++ b/fextractor
@@ -25,7 +25,7 @@ import csv
 import sys
 import random
 
-from vdiscover.Detection  import GetArgs, GetFiles, GetCmd
+from vdiscover.Detection import GetArgs, GetFiles, GetCmd
 
 # static feature extraction
 
@@ -33,13 +33,11 @@ from vdiscover.RandomWalk import RandomWalkElf
 
 # dynamic feature extraction
 
-from vdiscover.Process    import Process
-from vdiscover.Mutation   import NullMutator, RandomByteMutator, RandomExpanderMutator, RandomInputMutator
-from vdiscover.Printer    import TypePrinter
-from vdiscover.Misc       import readmodfile
-from vdiscover.Input      import prepare_inputs
-
-
+from vdiscover.Process import Process
+from vdiscover.Mutation import NullMutator, RandomByteMutator, RandomExpanderMutator, RandomInputMutator
+from vdiscover.Printer import TypePrinter
+from vdiscover.Misc import readmodfile
+from vdiscover.Input import prepare_inputs
 
 
 if __name__ == "__main__":
@@ -48,7 +46,8 @@ if __name__ == "__main__":
     random.seed()
 
     # To help argparse to detect the number of columns correctly
-    #os.environ['COLUMNS'] = str(os.popen('stty size', 'r').read().split()[1]) #str(shutil.get_terminal_size().columns)
+    # os.environ['COLUMNS'] = str(os.popen('stty size',
+    # 'r').read().split()[1]) #str(shutil.get_terminal_size().columns)
 
     if open("/proc/sys/kernel/randomize_va_space").read().strip() != "0":
         print("Address space layout randomization (ASLR) is enabled, disable it before continue to use the cache")
@@ -56,8 +55,10 @@ if __name__ == "__main__":
         sys.exit(-1)
 
     # Arguments
-    parser = argparse.ArgumentParser(description='Feature extraction of VDiscover')
-    parser.add_argument("testcase", help="Testcase to analyze", type=str, default=None)
+    parser = argparse.ArgumentParser(
+        description='Feature extraction of VDiscover')
+    parser.add_argument(
+        "testcase", help="Testcase to analyze", type=str, default=None)
 
     parser.add_argument("--static",
                         help="Extract only static features from an executable",
@@ -67,40 +68,65 @@ if __name__ == "__main__":
                         help="Extract only dynamic features from a testcase",
                         action="store_true", default=False)
 
-    parser.add_argument("--mclass", type=str,
-                        help="Include class column, to use later in training mode",
-                        action="store", default=None)
+    parser.add_argument(
+        "--mclass",
+        type=str,
+        help="Include class column, to use later in training mode",
+        action="store",
+        default=None)
 
     parser.add_argument("--out-file",
                         help="File to output the extracted features",
                         type=str, default="/dev/stdout")
 
-    parser.add_argument("--max-subtraces-collected", type=int,
-                        help="Maximum number of subtraces collected (static features only)", default=100)
-
-    parser.add_argument("--max-subtraces-explored", type=int,
-                        help="Maximum number of subtraces explored (static features only)", default=10000)
-
-    parser.add_argument("--min-subtrace-size", type=int,
-                        help="Minumum number of events in each subtrace collected (static features only)", default=3)
-
-    parser.add_argument("--show-stdout",
-                        help="Don't use /dev/null as stdout/stderr (dynamic features only)",
-                        action="store_true", default=False)
-
-    parser.add_argument("--inc-mods",
-                        help="Only extract features from the libraries matching the strings inside this file (dynamic features only)",
-                        type=str, default=None)
-
-    parser.add_argument("--ign-mods",
-                        help="Ignore extracted features from the libraries matching the string inside this file (dynamic features only)",
-                        type=str, default=None)
-
-    parser.add_argument("--timeout", dest="timeout", type=int,
-                        help="Timeout in seconds (dynamic features only)", default=3)
-
-    parser.add_argument("--max-mutations", type=int,
-                        help="Maximum number of mutations to the original testcase (dynamic features only)", default=0)
+    parser.add_argument(
+        "--max-subtraces-collected",
+        type=int,
+        help="Maximum number of subtraces collected (static features only)",
+        default=100)
+
+    parser.add_argument(
+        "--max-subtraces-explored",
+        type=int,
+        help="Maximum number of subtraces explored (static features only)",
+        default=10000)
+
+    parser.add_argument(
+        "--min-subtrace-size",
+        type=int,
+        help="Minumum number of events in each subtrace collected (static features only)",
+        default=3)
+
+    parser.add_argument(
+        "--show-stdout",
+        help="Don't use /dev/null as stdout/stderr (dynamic features only)",
+        action="store_true",
+        default=False)
+
+    parser.add_argument(
+        "--inc-mods",
+        help="Only extract features from the libraries matching the strings inside this file (dynamic features only)",
+        type=str,
+        default=None)
+
+    parser.add_argument(
+        "--ign-mods",
+        help="Ignore extracted features from the libraries matching the string inside this file (dynamic features only)",
+        type=str,
+        default=None)
+
+    parser.add_argument(
+        "--timeout",
+        dest="timeout",
+        type=int,
+        help="Timeout in seconds (dynamic features only)",
+        default=3)
+
+    parser.add_argument(
+        "--max-mutations",
+        type=int,
+        help="Maximum number of mutations to the original testcase (dynamic features only)",
+        default=0)
 
     options = parser.parse_args()
     testcase = options.testcase
@@ -108,13 +134,14 @@ if __name__ == "__main__":
     static_only = options.static
     dynamic_only = options.dynamic
 
-    if (not static_only and not dynamic_only) or (static_only and dynamic_only):
-      print "The feature extraction requires to select either static of dynamic features exclusively"
-      exit(-1)
+    if (not static_only and not dynamic_only) or (
+            static_only and dynamic_only):
+        print "The feature extraction requires to select either static of dynamic features exclusively"
+        exit(-1)
 
     max_subtraces_collected = options.max_subtraces_collected
-    max_subtraces_explored  = options.max_subtraces_explored
-    min_subtrace_size       = options.min_subtrace_size
+    max_subtraces_explored = options.max_subtraces_explored
+    min_subtrace_size = options.min_subtrace_size
 
     incmodfile = options.inc_mods
     ignmodfile = options.ign_mods
@@ -135,39 +162,42 @@ if __name__ == "__main__":
 
     if static_only:
 
-      RandomWalkElf(program, csvfile, mclass, max_subtraces_collected, max_subtraces_explored, min_subtrace_size)
+        RandomWalkElf(program, csvfile, mclass, max_subtraces_collected,
+                      max_subtraces_explored, min_subtrace_size)
 
     elif dynamic_only:
 
-      os.chdir("inputs")
+        os.chdir("inputs")
 
-      envs = dict()
-      args = GetArgs()
-      files = GetFiles()
+        envs = dict()
+        args = GetArgs()
+        files = GetFiles()
 
-      original_inputs = RandomInputMutator(args + files, NullMutator)
-      #expanded_input_generator = RandomInputMutator(args + files, RandomExpanderMutator)
-      mutated_input_generator = RandomInputMutator(args + files, RandomByteMutator)
-      if included_mods == []:
-        included_mods = [program]
+        original_inputs = RandomInputMutator(args + files, NullMutator)
+        #expanded_input_generator = RandomInputMutator(args + files, RandomExpanderMutator)
+        mutated_input_generator = RandomInputMutator(
+            args + files, RandomByteMutator)
+        if included_mods == []:
+            included_mods = [program]
 
-      app = Process(program, envs, timeout, included_mods, ignored_mods, no_stdout = not show_stdout )
-      prt = TypePrinter(csvfile, testcase, mclass)
+        app = Process(program, envs, timeout, included_mods,
+                      ignored_mods, no_stdout=not show_stdout)
+        prt = TypePrinter(csvfile, testcase, mclass)
 
-      # unchanged input
-      null_mutt, original_input = original_inputs.next()
-      original_events = app.getData(prepare_inputs(original_input))
+        # unchanged input
+        null_mutt, original_input = original_inputs.next()
+        original_events = app.getData(prepare_inputs(original_input))
 
-      if original_events is None:
-        print "Execution of",program,"failed!"
-        exit(-1)
+        if original_events is None:
+            print "Execution of", program, "failed!"
+            exit(-1)
 
-      prt.print_events(program,original_events) 
+        prt.print_events(program, original_events)
 
-      for (i, (d, mutated)) in enumerate(mutated_input_generator):
+        for (i, (d, mutated)) in enumerate(mutated_input_generator):
 
-        if i >= max_mut:
-          break
+            if i >= max_mut:
+                break
 
-        events = app.getData(prepare_inputs(mutated))
-        prt.print_events(program,events)
+            events = app.getData(prepare_inputs(mutated))
+            prt.print_events(program, events)
diff --git a/setup.py b/setup.py
index 5690720..9f798a2 100755
--- a/setup.py
+++ b/setup.py
@@ -12,10 +12,13 @@
     url='http://vdiscover.org/',
     author='G.Grieco',
     author_email='gg@cifasis-conicet.gov.ar',
-    scripts=['fextractor', 'vpredictor', 'tcreator', 'tseeder', 'vd'],
+    scripts=[
+        'fextractor',
+        'vpredictor',
+        'tcreator',
+        'tseeder',
+        'vd'],
     install_requires=[
         "python-ptrace",
-        "scikit-learn"
-    ],
+        "scikit-learn"],
 )
-
diff --git a/tcreator b/tcreator
index f3edd73..c65f05a 100755
--- a/tcreator
+++ b/tcreator
@@ -25,61 +25,73 @@ import sys
 import csv
 
 from vdiscover.Detection import WriteTestcase
-concatenate = lambda *lists: reduce((lambda a,b: a.extend(b) or a),lists,[])
+from functools import reduce
+concatenate = lambda *lists: reduce((lambda a, b: a.extend(b) or a), lists, [])
 
 if __name__ == "__main__":
 
     # Arguments
-    parser = argparse.ArgumentParser(description='A small utility to create new test cases using a name and a command line')
-    parser.add_argument("--name", help="The name of the ", type=str, default=None)
-    parser.add_argument("--cmd", help="Command-line to execute", type=str, default=None)
-    parser.add_argument("--batch", help="A csv with the command lines", type=str, default=None)
-
-    parser.add_argument("--copy", help="Force the copy of the files in command lines instead of symbolic linking", action='store_true', default=False)
-
-    parser.add_argument("outdir", help="Output directory to write testcases", type=str, default=None)
+    parser = argparse.ArgumentParser(
+        description='A small utility to create new test cases using a name and a command line')
+    parser.add_argument("--name", help="The name of the ",
+                        type=str, default=None)
+    parser.add_argument(
+        "--cmd", help="Command-line to execute", type=str, default=None)
+    parser.add_argument(
+        "--batch", help="A csv with the command lines", type=str, default=None)
+
+    parser.add_argument(
+        "--copy",
+        help="Force the copy of the files in command lines instead of symbolic linking",
+        action='store_true',
+        default=False)
+
+    parser.add_argument(
+        "outdir",
+        help="Output directory to write testcases",
+        type=str,
+        default=None)
 
     options = parser.parse_args()
     name = options.name
     cmd = options.cmd
     in_file = options.batch
     copy = options.copy
-    out_dir= options.outdir
+    out_dir = options.outdir
 
     if (name is not None and cmd is not None) ^ (in_file is not None):
-      pass
+        pass
     else:
-      #or (name not is None and cmd is not None) and in_file is None:
-      print "Either name and command should be used or an input file"
-      exit(-1)
+        # or (name not is None and cmd is not None) and in_file is None:
+        print "Either name and command should be used or an input file"
+        exit(-1)
 
     try:
-      os.makedirs(out_dir)
+        os.makedirs(out_dir)
     except:
-      pass
+        pass
 
     if in_file is not None:
-      infile = open(in_file,"r")
-      csvreader = csv.reader(infile, delimiter='\t')
-      os.chdir(out_dir)
+        infile = open(in_file, "r")
+        csvreader = csv.reader(infile, delimiter='\t')
+        os.chdir(out_dir)
 
-      for i,row in enumerate(csvreader):
-        args = filter(lambda x: x is not '', row[0].split(" "))
-        name = args[0].replace("/","_")+":"+str(i)
-        WriteTestcase(name,args[0],args[1:], copy)
+        for i, row in enumerate(csvreader):
+            args = filter(lambda x: x is not '', row[0].split(" "))
+            name = args[0].replace("/", "_") + ":" + str(i)
+            WriteTestcase(name, args[0], args[1:], copy)
 
     else:
 
-      os.chdir(out_dir)
-      args = cmd.split("'")
-      args = map(lambda x: x.split(" "), args)
-      pargs = []
-
-      for arg in args:
-        if arg <> '':
-          pargs = pargs + arg
-      #args = concatenate(args)
-      print "Procesing '" + " ".join(pargs) + "'"
-      #args = filter(lambda x: x is not '', cmd.split(" "))
-      WriteTestcase(name,pargs[0],pargs[1:], copy)
-
+        os.chdir(out_dir)
+        args = cmd.split("'")
+        args = map(lambda x: x.split(" "), args)
+        pargs = []
+
+        for arg in args:
+            if arg != '':
+                pargs = pargs + arg
+        #args = concatenate(args)
+        print "Procesing '" + " ".join(pargs) + "'"
+        #args = filter(lambda x: x is not '', cmd.split(" "))
+        WriteTestcase(name, pargs[0], pargs[1:], copy)
diff --git a/tseeder b/tseeder
index 1f7d8fa..6a13991 100755
--- a/tseeder
+++ b/tseeder
@@ -33,10 +33,20 @@ csv.field_size_limit(sys.maxsize)
 if __name__ == "__main__":
 
     # Arguments
-    parser = argparse.ArgumentParser(description='A small utility to perform seed selection for fuzzig')
-    parser.add_argument("infile", help="A csv with the features to train or predict", type=str, default=None)
-    parser.add_argument("outdir", help="A directory with the seeds", type=str, default=None)
-    parser.add_argument("-n", help="Number of seeds to select per cluster", type=int, default=1)
+    parser = argparse.ArgumentParser(
+        description='A small utility to perform seed selection for fuzzig')
+    parser.add_argument(
+        "infile",
+        help="A csv with the features to train or predict",
+        type=str,
+        default=None)
+    parser.add_argument(
+        "outdir", help="A directory with the seeds", type=str, default=None)
+    parser.add_argument(
+        "-n",
+        help="Number of seeds to select per cluster",
+        type=int,
+        default=1)
     #parser.add_argument("--random", help="Sample randomly", action="store_true", default=None)
 
     options = parser.parse_args()
@@ -47,14 +57,14 @@ if __name__ == "__main__":
     reader = load_csv(in_file)
     clusters = []
     for [label, cluster] in reader:
-      clusters.append((label.split(":")[-1], cluster))
+        clusters.append((label.split(":")[-1], cluster))
 
     selected = cluster_sampler(clusters, nseeds)
 
     if not os.path.exists(outdir):
-      os.makedirs(outdir)
+        os.makedirs(outdir)
 
     print "Copying seeds.."
     for seed in selected:
-      print seed
-      shutil.copy(seed, outdir)
+        print seed
+        shutil.copy(seed, outdir)
diff --git a/vd b/vd
index 26d7cae..b8304b0 100755
--- a/vd
+++ b/vd
@@ -24,21 +24,20 @@ import os.path
 import argparse
 import sys
 import csv
-import random 
+import random
 
 csv.field_size_limit(sys.maxsize)
-sys.setrecursionlimit(1024*1024*1024)
+sys.setrecursionlimit(1024 * 1024 * 1024)
 
 #from vdiscover.Detection import WriteTestcase
-from vdiscover.Process    import Process
-from vdiscover.Printer    import TypePrinter
+from vdiscover.Process import Process
+from vdiscover.Printer import TypePrinter
 #from vdiscover.Cluster  import ClusterScikit, ClusterConv
 from vdiscover.Utils import update_progress
-from vdiscover.Sampling import cluster_sampler 
+from vdiscover.Sampling import cluster_sampler
 
 if __name__ == "__main__":
 
-
     if open("/proc/sys/kernel/randomize_va_space").read().strip() != "0":
         print("Address space layout randomization (ASLR) is enabled, disable it before continue to use the cache")
         print("Hint: # echo 0 > /proc/sys/kernel/randomize_va_space")
@@ -46,8 +45,10 @@ if __name__ == "__main__":
 
     # Arguments
     parser = argparse.ArgumentParser(description='')
-    parser.add_argument("-i", help="", type=str, default=None, required=True, dest="seeds")
-    parser.add_argument("-o", help="", type=str, default=None, required=True, dest="out")
+    parser.add_argument("-i", help="", type=str,
+                        default=None, required=True, dest="seeds")
+    parser.add_argument("-o", help="", type=str,
+                        default=None, required=True, dest="out")
     parser.add_argument("-m", help="", type=str, nargs='+', dest="mods")
 
     #parser.add_argument("-v", help="", type=str, default=None, required=False, dest="vectorizer")
@@ -62,62 +63,63 @@ if __name__ == "__main__":
     cmd = options.cmd
     #vectorizer = options.vectorizer
     program = cmd.split(" ")[0]
-    programf = program.replace("/","__")
+    programf = program.replace("/", "__")
     main_module = program.split("/")[-1]
 
     timeout = 15
     envs = dict()
-    traces_path = outfile#outdir+"/traces.raw"
+    traces_path = outfile  # outdir+"/traces.raw"
 
     if os.path.exists(traces_path):
-      print traces_path, "exists. I will not overwritte it. Aborting"
+        print traces_path, "exists. I will not overwritte it. Aborting"
     else:
 
-      modules_to_trace = [main_module]
-      if mods is not None:
-        modules_to_trace = modules_to_trace + mods
-
-      if "LD_LIBRARY_PATH" in os.environ:
-        libs =  os.environ["LD_LIBRARY_PATH"]
-        for _,_,files in os.walk(libs):
-          for f in files:
-            modules_to_trace.append(f)
-        
-      print "Tracing", modules_to_trace
-      app = Process(program, envs, timeout, modules_to_trace, [], True)  
-      prt = TypePrinter(traces_path, program, 0)
-      traces = []
-      all_files = []
-
-      print "Extracting traces.."
-      for x,y,files in os.walk(seeds):
-        nfiles = len(files)
-        #print "Processing directory ","./"++("/".join(y)), "with", nfiles, "seeds"
-        for f in files:
-          all_files.append(x+"/".join(y)+f)
-
-      random.shuffle(all_files)
-      nfiles = len(all_files)
-
-      for progress,testcase in enumerate(all_files):
-        #print testcase
-        progress = round(float(progress)/nfiles,4)
-        update_progress(progress)
-        prepared_cmd = cmd.replace(program,"")
-        prepared_cmd = prepared_cmd.split("@@")
-        prepared_cmd = prepared_cmd[0].split(" ") + [testcase] + prepared_cmd[1].split(" ")
-        prepared_cmd = filter(lambda x: x<>'', prepared_cmd)
-        #print "Getting data.."
-        events = app.getData(prepared_cmd)
-        #print "Printing data.. ", len(events)
-        traces.append(prt.print_events(testcase,events))
-        #print prepared_cmd
-        #print traces[-1]
-
-
-    #if vectorizer is None:
+        modules_to_trace = [main_module]
+        if mods is not None:
+            modules_to_trace = modules_to_trace + mods
+
+        if "LD_LIBRARY_PATH" in os.environ:
+            libs = os.environ["LD_LIBRARY_PATH"]
+            for _, _, files in os.walk(libs):
+                for f in files:
+                    modules_to_trace.append(f)
+
+        print "Tracing", modules_to_trace
+        app = Process(program, envs, timeout, modules_to_trace, [], True)
+        prt = TypePrinter(traces_path, program, 0)
+        traces = []
+        all_files = []
+
+        print "Extracting traces.."
+        for x, y, files in os.walk(seeds):
+            nfiles = len(files)
+            # print "Processing directory ","./"++("/".join(y)), "with",
+            # nfiles, "seeds"
+            for f in files:
+                all_files.append(x + "/".join(y) + f)
+
+        random.shuffle(all_files)
+        nfiles = len(all_files)
+
+        for progress, testcase in enumerate(all_files):
+            # print testcase
+            progress = round(float(progress) / nfiles, 4)
+            update_progress(progress)
+            prepared_cmd = cmd.replace(program, "")
+            prepared_cmd = prepared_cmd.split("@@")
+            prepared_cmd = prepared_cmd[0].split(
+                " ") + [testcase] + prepared_cmd[1].split(" ")
+            prepared_cmd = filter(lambda x: x != '', prepared_cmd)
+            # print "Getting data.."
+            events = app.getData(prepared_cmd)
+            # print "Printing data.. ", len(events)
+            traces.append(prt.print_events(testcase, events))
+            # print prepared_cmd
+            # print traces[-1]
+
+    # if vectorizer is None:
     #  clustered_traces = ClusterScikit(vectorizer, traces, None, "dynamic", None)
-    #else:
+    # else:
     #  clustered_traces = ClusterConv(vectorizer, traces, None, "dynamic", None, None)
     #  cluster_sampler(clustered_traces,1)
     #  #print clusters
diff --git a/vdiscover/Alarm.py b/vdiscover/Alarm.py
index 2cf7373..6390cf6 100644
--- a/vdiscover/Alarm.py
+++ b/vdiscover/Alarm.py
@@ -19,10 +19,10 @@
 
 import signal
 
-class TimeoutEx(Exception):
-  pass
 
-def alarm_handler(signum, frame):
-  raise TimeoutEx
+class TimeoutEx(Exception):
+    pass
 
 
+def alarm_handler(signum, frame):
+    raise TimeoutEx
diff --git a/vdiscover/Analysis.py b/vdiscover/Analysis.py
index b835b5a..c4e05ee 100644
--- a/vdiscover/Analysis.py
+++ b/vdiscover/Analysis.py
@@ -22,47 +22,48 @@
 from Types import Type
 from ptrace.error import PtraceError
 
+
 def FindModule(value, mm):
- return mm.findModule(value)
+    return mm.findModule(value)
 
 
 def RefinePType(ptype, value, process, mm):
 
-  if value is None:
-    return (Type("Top32",4), value)
-
-  if str(ptype) == "Ptr32":
-    ptr = value
-    if ptr == 0x0:
-      return (Type("NPtr32",4), ptr)
-    else:
-
-      try:
-        _ = process.readBytes(ptr, 4)
-      except PtraceError:
-        return (Type("DPtr32",4), ptr)
-
-      mm.checkPtr(ptr)
-      if   mm.isStackPtr(ptr):
-        return (Type("SPtr32",4), ptr)
-      elif mm.isHeapPtr(ptr):
-        return (Type("HPtr32",4), ptr)
-      elif mm.isCodePtr(ptr):
-        return (Type("GxPtr32",4), ptr)
-      elif mm.isFilePtr(ptr):
-        return (Type("FPtr32",4), ptr)
-      elif mm.isGlobalPtr(ptr):
-        return (Type("GPtr32",4), ptr)
-      else:
-        return (Type("Ptr32",4), ptr)
-
-  elif str(ptype) == "Num32":
-    num = value
-    if num == 0x0:
-      return (Type("Num32B0",4), num)
-    else:
-      binlen = len(bin(num))-2
-      binlen = int(ceil(binlen / 8.0))*8
-      return (Type("Num32B"+str(binlen),4), num)
-
-  return (Type("Top32",4), value)
+    if value is None:
+        return (Type("Top32", 4), value)
+
+    if str(ptype) == "Ptr32":
+        ptr = value
+        if ptr == 0x0:
+            return (Type("NPtr32", 4), ptr)
+        else:
+
+            try:
+                _ = process.readBytes(ptr, 4)
+            except PtraceError:
+                return (Type("DPtr32", 4), ptr)
+
+            mm.checkPtr(ptr)
+            if mm.isStackPtr(ptr):
+                return (Type("SPtr32", 4), ptr)
+            elif mm.isHeapPtr(ptr):
+                return (Type("HPtr32", 4), ptr)
+            elif mm.isCodePtr(ptr):
+                return (Type("GxPtr32", 4), ptr)
+            elif mm.isFilePtr(ptr):
+                return (Type("FPtr32", 4), ptr)
+            elif mm.isGlobalPtr(ptr):
+                return (Type("GPtr32", 4), ptr)
+            else:
+                return (Type("Ptr32", 4), ptr)
+
+    elif str(ptype) == "Num32":
+        num = value
+        if num == 0x0:
+            return (Type("Num32B0", 4), num)
+        else:
+            binlen = len(bin(num)) - 2
+            binlen = int(ceil(binlen / 8.0)) * 8
+            return (Type("Num32B" + str(binlen), 4), num)
+
+    return (Type("Top32", 4), value)
diff --git a/vdiscover/Backtrace.py b/vdiscover/Backtrace.py
index edbe8f0..3dc0566 100644
--- a/vdiscover/Backtrace.py
+++ b/vdiscover/Backtrace.py
@@ -4,6 +4,7 @@
 from ptrace import PtraceError
 #from ptrace.six.moves import xrange
 
+
 class BacktraceFrame(object):
     """
     Backtrace frame.
@@ -13,6 +14,7 @@ class BacktraceFrame(object):
      - name: name of the function
      - arguments: value of the arguments
     """
+
     def __init__(self, ip):
         self.ip = ip
         self.name = u"???"
@@ -20,12 +22,15 @@ def __init__(self, ip):
 
     def __str__(self):
         arguments = (formatWordHex(arg) for arg in self.arguments)
-        return u"IP=%s: %s (%s)" % (formatAddress(self.ip), self.name, ", ".join(arguments))
+        return u"IP=%s: %s (%s)" % (formatAddress(self.ip),
+                                    self.name, ", ".join(arguments))
+
 
 class Backtrace(object):
     """
     Backtrace: all process frames since the start function.
     """
+
     def __init__(self):
         self.frames = []
         self.truncated = False
@@ -39,6 +44,7 @@ def __iter__(self):
     def __len__(self):
         return len(self.frames)
 
+
 def getBacktrace(process, max_args=6, max_depth=20):
     """
     Get the current backtrace of the specified process:
@@ -74,9 +80,9 @@ def getBacktrace(process, max_args=6, max_depth=20):
 
         # Create frame
         frame = getBacktraceFrame(process, ip, fp, nargs)
- 
-        #print frame
-        #print hex(fp),hex(nextfp), hex(nargs)
+
+        # print frame
+        # print hex(fp),hex(nextfp), hex(nargs)
         backtrace.append(frame)
 
         # End of the stack?
@@ -84,7 +90,7 @@ def getBacktrace(process, max_args=6, max_depth=20):
             break
 
         # Move to next instruction/frame pointer
-        ip = process.readWord(fp+CPU_WORD_SIZE)
+        ip = process.readWord(fp + CPU_WORD_SIZE)
         if ip == CPU_MAX_UINT:
             # Linux hack to detect end of the stack
             break
@@ -92,6 +98,7 @@ def getBacktrace(process, max_args=6, max_depth=20):
         depth += 1
     return backtrace
 
+
 def getBacktraceFrame(process, ip, fp, nargs):
     """
     Get a backtrace frame:
@@ -112,5 +119,3 @@ def getBacktraceFrame(process, ip, fp, nargs):
         # Ignore argument read error
         pass
     return frame
-
-
diff --git a/vdiscover/Cluster.py b/vdiscover/Cluster.py
index a1013b0..9993a3d 100644
--- a/vdiscover/Cluster.py
+++ b/vdiscover/Cluster.py
@@ -27,14 +27,14 @@
 import matplotlib as mpl
 
 # hack from https://stackoverflow.com/questions/2801882/generating-a-png-with-matplotlib-when-display-is-undefined to avoid using X
-#mpl.use('Agg')
+# mpl.use('Agg')
 import matplotlib.pyplot as plt
 
 from Utils import *
 from Pipeline import *
 
 
-#def Cluster(X, labels)
+# def Cluster(X, labels)
 """
   assert(len(X_red) == len(labels))
 
@@ -63,8 +63,7 @@
 
   plt.title('Estimated number of clusters: %d' % n_clusters)
 """
-#return zip(labels, cluster_labels)
-
+# return zip(labels, cluster_labels)
 
 
 batch_size = 25
@@ -77,54 +76,67 @@
 hidden_dims = 50
 nb_epoch = 3
 
-def ClusterCnn(model_file, train_file, valid_file, ftype, nsamples, outdir):
-
-  f = open(model_file+".pre")
-  preprocessor = pickle.load(f)
-
-  import h5py
-  f = h5py.File(model_file+".wei")
-
-  layers = []
-  for k in range(f.attrs['nb_layers']):
-            g = f['layer_{}'.format(k)]
-            layers.append([g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])])
 
-  max_features = len(preprocessor.tokenizer.word_counts)
+def ClusterCnn(model_file, train_file, valid_file, ftype, nsamples, outdir):
 
-  print "Reading and sampling data to train.."
-  train_programs, train_features, train_classes = read_traces(train_file, nsamples, cut=None)
-  train_size = len(train_features)
+    f = open(model_file + ".pre")
+    preprocessor = pickle.load(f)
+
+    import h5py
+    f = h5py.File(model_file + ".wei")
+
+    layers = []
+    for k in range(f.attrs['nb_layers']):
+        g = f['layer_{}'.format(k)]
+        layers.append([g['param_{}'.format(p)]
+                       for p in range(g.attrs['nb_params'])])
+
+    max_features = len(preprocessor.tokenizer.word_counts)
+
+    print "Reading and sampling data to train.."
+    train_programs, train_features, train_classes = read_traces(
+        train_file, nsamples, cut=None)
+    train_size = len(train_features)
+
+    #y = train_programs
+    X_train, y_train, labels = preprocessor.preprocess_traces(
+        train_features, y_data=train_classes, labels=train_programs)
+    new_model = make_cluster_cnn(
+        "test",
+        max_features,
+        maxlen,
+        embedding_dims,
+        nb_filters,
+        filter_length,
+        hidden_dims,
+        None,
+        weights=layers)
 
-  #y = train_programs
-  X_train, y_train, labels = preprocessor.preprocess_traces(train_features, y_data=train_classes, labels=train_programs)
-  new_model = make_cluster_cnn("test", max_features, maxlen, embedding_dims, nb_filters, filter_length, hidden_dims, None, weights=layers)
+    train_dict = dict()
+    train_dict[ftype] = new_model.predict(X_train)
 
-  train_dict = dict()
-  train_dict[ftype] = new_model.predict(X_train)
+    model = make_cluster_pipeline_subtraces(ftype)
+    X_red_comp = model.fit_transform(train_dict)
+    explained_var = np.var(X_red_comp, axis=0)
+    print explained_var
 
-  model = make_cluster_pipeline_subtraces(ftype)
-  X_red_comp = model.fit_transform(train_dict)
-  explained_var = np.var(X_red_comp, axis=0)
-  print explained_var
+    X_red = X_red_comp[:, 0:2]
+    X_red_next = X_red_comp[:, 2:4]
 
-  X_red = X_red_comp[:,0:2]
-  X_red_next = X_red_comp[:,2:4]
+    colors = mpl.colors.cnames.keys()
+    progs = list(set(labels))
+    ncolors = len(colors)
+    size = len(labels)
+    print "Plotting.."
 
-  colors = mpl.colors.cnames.keys()
-  progs = list(set(labels))
-  ncolors = len(colors)
-  size = len(labels)
-  print "Plotting.."
- 
-  for prog,[x,y] in zip(labels, X_red):
-  #for prog,[x,y] in sample(zip(labels, X_red), min(size, 1000)):
-    x = gauss(0,0.05) + x
-    y = gauss(0,0.05) + y
-    color = 'r'
-    plt.scatter(x, y, c=color )
+    for prog, [x, y] in zip(labels, X_red):
+        # for prog,[x,y] in sample(zip(labels, X_red), min(size, 1000)):
+        x = gauss(0, 0.05) + x
+        y = gauss(0, 0.05) + y
+        color = 'r'
+        plt.scatter(x, y, c=color)
 
-  """
+    """
   if valid_file is not None:
     valid_programs, valid_features, valid_classes = read_traces(valid_file, None, cut=None, maxsize=window_size) #None)
     valid_dict = dict()
@@ -141,46 +153,46 @@ def ClusterCnn(model_file, train_file, valid_file, ftype, nsamples, outdir):
       y = gauss(0,0.05) + y
       plt.scatter(x, y, c='b')
       plt.text(x, y+0.02, prog.split("/")[-1])
-  
+
   plt.show()
   """
-  plt.savefig(train_file.replace(".gz","")+".png")
-  print "Bandwidth estimation.."
-  from sklearn.cluster import MeanShift, estimate_bandwidth
+    plt.savefig(train_file.replace(".gz", "") + ".png")
+    print "Bandwidth estimation.."
+    from sklearn.cluster import MeanShift, estimate_bandwidth
 
+    X_red_sample = X_red[:min(size, 1000)]
+    bandwidth = estimate_bandwidth(X_red_sample, quantile=0.2)
+    print "Clustering with bandwidth:", bandwidth
 
-  X_red_sample = X_red[:min(size, 1000)]
-  bandwidth = estimate_bandwidth(X_red_sample, quantile=0.2)
-  print "Clustering with bandwidth:", bandwidth
-
-  #X_red = np.vstack((X_red,X_red_valid))
-  #X_red_next = np.vstack((X_red_next,X_red_valid_next))
-  #labels = labels + valid_labels
+    #X_red = np.vstack((X_red,X_red_valid))
+    #X_red_next = np.vstack((X_red_next,X_red_valid_next))
+    #labels = labels + valid_labels
 
-  print X_red.shape, len(X_red), len(labels)
-  #print valid_labels
-
-  af = MeanShift(bandwidth=bandwidth/1).fit(X_red)
+    print X_red.shape, len(X_red), len(labels)
+    # print valid_labels
 
-  cluster_centers = af.cluster_centers_
-  cluster_labels = af.labels_
-  n_clusters = len(cluster_centers)
-  
-  plt.figure()
-  for ([x,y],label, cluster_label) in zip(X_red,labels, cluster_labels):
-  #for ([x,y],label, cluster_label) in sample(zip(X_red,labels, cluster_labels), min(size, 1000)):
-    x = gauss(0,0.1) + x
-    y = gauss(0,0.1) + y
-    plt.scatter(x, y, c = colors[cluster_label % ncolors])
-    #print label
-    #if label in valid_labels:
-    #  plt.text(x-0.05, y+0.01, label.split("/")[-1])
+    af = MeanShift(bandwidth=bandwidth / 1).fit(X_red)
 
-  for i,[x,y] in enumerate(cluster_centers):
-    plt.plot(x, y, 'o', markerfacecolor=colors[i % ncolors],
-             markeredgecolor='k', markersize=7)
+    cluster_centers = af.cluster_centers_
+    cluster_labels = af.labels_
+    n_clusters = len(cluster_centers)
 
-  """
+    plt.figure()
+    for ([x, y], label, cluster_label) in zip(X_red, labels, cluster_labels):
+        # for ([x,y],label, cluster_label) in sample(zip(X_red,labels,
+        # cluster_labels), min(size, 1000)):
+        x = gauss(0, 0.1) + x
+        y = gauss(0, 0.1) + y
+        plt.scatter(x, y, c=colors[cluster_label % ncolors])
+        # print label
+        # if label in valid_labels:
+        #  plt.text(x-0.05, y+0.01, label.split("/")[-1])
+
+    for i, [x, y] in enumerate(cluster_centers):
+        plt.plot(x, y, 'o', markerfacecolor=colors[i % ncolors],
+                 markeredgecolor='k', markersize=7)
+
+    """
   #for prog,[x,y] in zip(valid_labels, X_red_valid):
     #x = gauss(0,0.1) + x
     #y = gauss(0,0.1) + y
@@ -193,14 +205,14 @@ def ClusterCnn(model_file, train_file, valid_file, ftype, nsamples, outdir):
   #plt.savefig("clusters.png")
   plt.show()
   """
-  plt.savefig(train_file.replace(".gz","")+".clusters.png")
+    plt.savefig(train_file.replace(".gz", "") + ".clusters.png")
 
-  clustered_traces = zip(labels, cluster_labels)
-  writer = open_csv(train_file.replace(".gz","")+".clusters")
-  for label, cluster in clustered_traces:
-     writer.writerow([label, cluster])
+    clustered_traces = zip(labels, cluster_labels)
+    writer = open_csv(train_file.replace(".gz", "") + ".clusters")
+    for label, cluster in clustered_traces:
+        writer.writerow([label, cluster])
 
-  """
+    """
 
   clusters = dict()
   for label, cluster in clustered_traces:
@@ -240,46 +252,56 @@ def ClusterCnn(model_file, train_file, valid_file, ftype, nsamples, outdir):
     #plt.savefig('cluster-%d.png' % cluster)
   """
 
-  #return clustered_traces
+    # return clustered_traces
 
 
 def TrainCnn(model_file, train_file, valid_file, ftype, nsamples):
 
-  csvreader = open_csv(train_file)
-
-  train_features = []
-  train_programs = []
-  train_classes = []
-
-  train_programs, train_features, train_classes = read_traces(train_file, nsamples, cut=None)
-  train_size = len(train_features)
-
-  from keras.preprocessing.text import Tokenizer
-
-  tokenizer = Tokenizer(nb_words=None, filters="", lower=False, split=" ")
-  #print type(train_features[0])
-  tokenizer.fit_on_texts(train_features)
-  max_features = len(tokenizer.word_counts)
-
-  preprocessor = DeepReprPreprocessor(tokenizer, window_size, batch_size)
-  X_train,y_train = preprocessor.preprocess(train_features, 10000)
-  nb_classes = len(preprocessor.classes)
-  print preprocessor.classes
-
-  model = make_cluster_cnn("train", max_features, maxlen, embedding_dims, nb_filters, filter_length, hidden_dims, nb_classes)
-  model.fit(X_train, y_train, validation_split=0.1, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True)
-
-  model.mypreprocessor = preprocessor
-  #model_file = model_file + ".wei"
-  #modelfile = open_model(model_file)
-  print "Saving model to",model_file + ".wei"
-  model.save_weights(model_file + ".wei")
-
-  #model_file = model_file + ".pre"
-  modelfile = open_model(model_file + ".pre")
-  print "Saving preprocessor to",model_file + ".pre"
-  #model.save_weights(model_file)
-  modelfile.write(pickle.dumps(preprocessor, protocol=2))
+    csvreader = open_csv(train_file)
+
+    train_features = []
+    train_programs = []
+    train_classes = []
+
+    train_programs, train_features, train_classes = read_traces(
+        train_file, nsamples, cut=None)
+    train_size = len(train_features)
+
+    from keras.preprocessing.text import Tokenizer
+
+    tokenizer = Tokenizer(nb_words=None, filters="", lower=False, split=" ")
+    # print type(train_features[0])
+    tokenizer.fit_on_texts(train_features)
+    max_features = len(tokenizer.word_counts)
+
+    preprocessor = DeepReprPreprocessor(tokenizer, window_size, batch_size)
+    X_train, y_train = preprocessor.preprocess(train_features, 10000)
+    nb_classes = len(preprocessor.classes)
+    print preprocessor.classes
+
+    model = make_cluster_cnn(
+        "train",
+        max_features,
+        maxlen,
+        embedding_dims,
+        nb_filters,
+        filter_length,
+        hidden_dims,
+        nb_classes)
+    model.fit(X_train, y_train, validation_split=0.1,
+              batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True)
+
+    model.mypreprocessor = preprocessor
+    #model_file = model_file + ".wei"
+    #modelfile = open_model(model_file)
+    print "Saving model to", model_file + ".wei"
+    model.save_weights(model_file + ".wei")
+
+    #model_file = model_file + ".pre"
+    modelfile = open_model(model_file + ".pre")
+    print "Saving preprocessor to", model_file + ".pre"
+    # model.save_weights(model_file)
+    modelfile.write(pickle.dumps(preprocessor, protocol=2))
 
 """
 def ClusterDoc2Vec(model_file, train_file, valid_file, ftype, nsamples, param):
@@ -294,7 +316,7 @@ def ClusterDoc2Vec(model_file, train_file, valid_file, ftype, nsamples, param):
 
   print "Vectorizing traces.."
   sentences = []
-  
+
   for (prog,trace) in zip(train_programs,train_features):
      sentences.append(TaggedDocument(trace.split(" "), [prog]))
 
@@ -333,7 +355,7 @@ def ClusterDoc2Vec(model_file, train_file, valid_file, ftype, nsamples, param):
     except ValueError:
         plt.text(x, y+0.02, cl)
 
-  #plt.show() 
+  #plt.show()
   plt.savefig(train_file.replace(".gz","")+".png")
 
   from sklearn.cluster import MeanShift, estimate_bandwidth
@@ -372,126 +394,135 @@ def ClusterDoc2Vec(model_file, train_file, valid_file, ftype, nsamples, param):
 
 """
 
-def ClusterScikit(model_file, train_file, valid_file, ftype, nsamples, vectorizer, reducer, param):
 
-  train_programs, train_features, train_classes = read_traces(train_file, nsamples)
-  train_size = len(train_programs)
-  print "using", train_size,"examples to train."
+def ClusterScikit(
+        model_file,
+        train_file,
+        valid_file,
+        ftype,
+        nsamples,
+        vectorizer,
+        reducer,
+        param):
 
-  if vectorizer == "bow":
- 
-    train_dict = dict()
-    train_dict[ftype] = train_features
-    #batch_size = 16
-    #window_size = 20
+    train_programs, train_features, train_classes = read_traces(
+        train_file, nsamples)
+    train_size = len(train_programs)
+    print "using", train_size, "examples to train."
 
-    print "Transforming data and fitting model.."
-    model = make_cluster_pipeline_bow(ftype, reducer)
-    X_red = model.fit_transform(train_dict)
+    if vectorizer == "bow":
 
-  elif vectorizer == "doc2vec":
+        train_dict = dict()
+        train_dict[ftype] = train_features
+        #batch_size = 16
+        #window_size = 20
 
-    from gensim.models.doc2vec import TaggedDocument
-    from gensim.models import Doc2Vec
+        print "Transforming data and fitting model.."
+        model = make_cluster_pipeline_bow(ftype, reducer)
+        X_red = model.fit_transform(train_dict)
 
-    print "Vectorizing traces.."
-    sentences = []
-  
-    for (prog,trace) in zip(train_programs,train_features):
-      sentences.append(TaggedDocument(trace.split(" "), [prog]))
+    elif vectorizer == "doc2vec":
 
-    model = Doc2Vec(dm=2, min_count=1, window=5, size=100, sample=1e-4, negative=5, workers=8, iter=1)
-    model.build_vocab(sentences)
+        from gensim.models.doc2vec import TaggedDocument
+        from gensim.models import Doc2Vec
 
-    for epoch in range(20):
-      #print model
-      model.train(sentences)
-      shuffle(sentences)
+        print "Vectorizing traces.."
+        sentences = []
 
-    train_dict = dict()
+        for (prog, trace) in zip(train_programs, train_features):
+            sentences.append(TaggedDocument(trace.split(" "), [prog]))
 
-    vec_train_features = []
-    for prog in train_programs:
-      #print prog, model.docvecs[prog]
-      vec_train_features.append(model.docvecs[prog])
+        model = Doc2Vec(dm=2, min_count=1, window=5, size=100,
+                        sample=1e-4, negative=5, workers=8, iter=1)
+        model.build_vocab(sentences)
 
-    train_dict[ftype] = vec_train_features
+        for epoch in range(20):
+            # print model
+            model.train(sentences)
+            shuffle(sentences)
 
-    print "Transforming data and fitting model.."
-    model = make_cluster_pipeline_doc2vec(ftype, reducer)
-    X_red = model.fit_transform(train_dict)
+        train_dict = dict()
 
+        vec_train_features = []
+        for prog in train_programs:
+            # print prog, model.docvecs[prog]
+            vec_train_features.append(model.docvecs[prog])
 
-  #pl.rcParams.update({'font.size': 10})
-  if type(X_red) == list:
-    X_red = np.vstack(X_red)
-    print X_red.shape 
+        train_dict[ftype] = vec_train_features
 
-  if X_red.shape[1] == 2:
+        print "Transforming data and fitting model.."
+        model = make_cluster_pipeline_doc2vec(ftype, reducer)
+        X_red = model.fit_transform(train_dict)
 
-    plt.figure()
-    colors = 'brgcmykbgrcmykbgrcmykbgrcmyk'
-    ncolors = len(colors)
+    #pl.rcParams.update({'font.size': 10})
+    if isinstance(X_red, list):
+        X_red = np.vstack(X_red)
+        print X_red.shape
 
-    for prog,[x,y],cl in zip(train_programs, X_red, train_classes):
-      x = gauss(0,0.1) + x
-      y = gauss(0,0.1) + y
-      try:
-          plt.scatter(x, y, c=colors[int(cl)])
-          plt.text(x, y+0.02, prog.split("/")[-1])
-      except ValueError:
-          plt.text(x, y+0.02, cl)
-     
-   
-
-    if valid_file is not None:
-      valid_programs, valid_features, valid_classes = read_traces(valid_file, None)
-      valid_dict = dict()
-      valid_dict[ftype] = valid_features
-
-      X_red = model.transform(valid_dict)
-      for prog,[x,y],cl in zip(valid_programs, X_red, valid_classes):
-        x = gauss(0,0.1) + x
-        y = gauss(0,0.1) + y
-        plt.scatter(x, y, c=colors[cl+1])
-        plt.text(x, y+0.02, prog.split("/")[-1])
+    if X_red.shape[1] == 2:
 
-    #plt.show()
-    plt.savefig(train_file.replace(".gz","")+".png")
+        plt.figure()
+        colors = 'brgcmykbgrcmykbgrcmykbgrcmyk'
+        ncolors = len(colors)
 
+        for prog, [x, y], cl in zip(train_programs, X_red, train_classes):
+            x = gauss(0, 0.1) + x
+            y = gauss(0, 0.1) + y
+            try:
+                plt.scatter(x, y, c=colors[int(cl)])
+                plt.text(x, y + 0.02, prog.split("/")[-1])
+            except ValueError:
+                plt.text(x, y + 0.02, cl)
 
-  from sklearn.cluster import MeanShift, estimate_bandwidth
+        if valid_file is not None:
+            valid_programs, valid_features, valid_classes = read_traces(
+                valid_file, None)
+            valid_dict = dict()
+            valid_dict[ftype] = valid_features
 
-  bandwidth = estimate_bandwidth(X_red, quantile=0.2)
-  print "Clustering with bandwidth:", bandwidth
+            X_red = model.transform(valid_dict)
+            for prog, [x, y], cl in zip(valid_programs, X_red, valid_classes):
+                x = gauss(0, 0.1) + x
+                y = gauss(0, 0.1) + y
+                plt.scatter(x, y, c=colors[cl + 1])
+                plt.text(x, y + 0.02, prog.split("/")[-1])
 
-  af = MeanShift(bandwidth=bandwidth*param).fit(X_red)
+        # plt.show()
+        plt.savefig(train_file.replace(".gz", "") + ".png")
 
-  cluster_centers = af.cluster_centers_
-  labels = af.labels_
-  n_clusters_ = len(cluster_centers)
+    from sklearn.cluster import MeanShift, estimate_bandwidth
 
-  if X_red.shape[1] == 2:
+    bandwidth = estimate_bandwidth(X_red, quantile=0.2)
+    print "Clustering with bandwidth:", bandwidth
 
-    plt.close('all')
-    plt.figure(1)
-    plt.clf()
+    af = MeanShift(bandwidth=bandwidth * param).fit(X_red)
 
-    for ([x,y],label, cluster_label) in zip(X_red,train_programs, labels):
-      x = gauss(0,0.1) + x
-      y = gauss(0,0.1) + y
-      plt.scatter(x, y, c = colors[cluster_label % ncolors])
+    cluster_centers = af.cluster_centers_
+    labels = af.labels_
+    n_clusters_ = len(cluster_centers)
 
-    for i,[x,y] in enumerate(cluster_centers):
-      plt.plot(x, y, 'o', markerfacecolor=colors[i % ncolors],
-               markeredgecolor='k', markersize=7)
+    if X_red.shape[1] == 2:
 
-    plt.title('Estimated number of clusters: %d' % n_clusters_)
-    plt.savefig(train_file.replace(".gz","")+".clusters.png")
+        plt.close('all')
+        plt.figure(1)
+        plt.clf()
 
-  #plt.show()
+        for ([x, y], label, cluster_label) in zip(
+                X_red, train_programs, labels):
+            x = gauss(0, 0.1) + x
+            y = gauss(0, 0.1) + y
+            plt.scatter(x, y, c=colors[cluster_label % ncolors])
 
-  clustered_traces = zip(train_programs, labels)
-  writer = write_csv(train_file.replace(".gz","")+".clusters")
-  for label, cluster in clustered_traces:
-     writer.writerow([label.split("/")[-1], cluster])
+        for i, [x, y] in enumerate(cluster_centers):
+            plt.plot(x, y, 'o', markerfacecolor=colors[i % ncolors],
+                     markeredgecolor='k', markersize=7)
+
+        plt.title('Estimated number of clusters: %d' % n_clusters_)
+        plt.savefig(train_file.replace(".gz", "") + ".clusters.png")
+
+    # plt.show()
+
+    clustered_traces = zip(train_programs, labels)
+    writer = write_csv(train_file.replace(".gz", "") + ".clusters")
+    for label, cluster in clustered_traces:
+        writer.writerow([label.split("/")[-1], cluster])
diff --git a/vdiscover/Detection.py b/vdiscover/Detection.py
index 218f2c7..e6f7a5a 100644
--- a/vdiscover/Detection.py
+++ b/vdiscover/Detection.py
@@ -23,122 +23,126 @@
 
 from Input import Arg, File
 
+
 def GetCmd(s):
 
-  if os.path.exists("path.txt"):
-    f = open("path.txt")
-    x = f.readline()
-    return x.replace("\n","").strip(" ")
-  else:
-    return s
+    if os.path.exists("path.txt"):
+        f = open("path.txt")
+        x = f.readline()
+        return x.replace("\n", "").strip(" ")
+    else:
+        return s
+
 
 def GetArg(n, conc):
 
-  if conc:
-    filename = "cargv_"+str(n)+".symb"
-    data = open(filename).read()
-    x = Arg(n, data)
-    x.SetConcrete()
-  else:
-    filename = "argv_"+str(n)+".symb"
-    data = open(filename).read()
-    x = Arg(n, data)
-    x.SetSymbolic()
+    if conc:
+        filename = "cargv_" + str(n) + ".symb"
+        data = open(filename).read()
+        x = Arg(n, data)
+        x.SetConcrete()
+    else:
+        filename = "argv_" + str(n) + ".symb"
+        data = open(filename).read()
+        x = Arg(n, data)
+        x.SetSymbolic()
+
+    return x
 
-  return x
 
 def WriteTestcase(name, program, args, copy=False):
-  try:
-    os.mkdir(name)
-  except:
-    pass
-
-  os.chdir(name)
-  filename = "path.txt"
-  open(filename,"w").write(program)
-  
-  try:
-    os.mkdir("inputs")
-  except:
-    pass
-
-  os.chdir("inputs")
-  for i,arg in enumerate(args):
-    if "file:" in arg:
-      #print arg
-      arg = arg.replace("file:","")
-      assert(arg[0] == '/')
-      filename = os.path.split(arg)[-1]
-      #print filename
-      if copy:
-        shutil.copyfile(os.path.realpath(arg),  "file_"+filename)
-      else:
-        os.symlink(os.path.realpath(arg), "file_"+filename)
-      arg = filename
-
-    filename = "argv_"+str(i+1)+".symb"
-    open(filename,"w").write(arg)
-
-  os.chdir("../..")
-   
+    try:
+        os.mkdir(name)
+    except:
+        pass
+
+    os.chdir(name)
+    filename = "path.txt"
+    open(filename, "w").write(program)
+
+    try:
+        os.mkdir("inputs")
+    except:
+        pass
+
+    os.chdir("inputs")
+    for i, arg in enumerate(args):
+        if "file:" in arg:
+            # print arg
+            arg = arg.replace("file:", "")
+            assert(arg[0] == '/')
+            filename = os.path.split(arg)[-1]
+            # print filename
+            if copy:
+                shutil.copyfile(os.path.realpath(arg), "file_" + filename)
+            else:
+                os.symlink(os.path.realpath(arg), "file_" + filename)
+            arg = filename
+
+        filename = "argv_" + str(i + 1) + ".symb"
+        open(filename, "w").write(arg)
+
+    os.chdir("../..")
+
 
 def GetArgs():
-  #i = 1
-  r = []
+    #i = 1
+    r = []
 
-  for _,_,files in os.walk('.'):
-    for f in files:
-      #print f
-      for i in range(10):
-        #print str(i), f
+    for _, _, files in os.walk('.'):
+        for f in files:
+            # print f
+            for i in range(10):
+                # print str(i), f
 
-        if ("cargv_"+str(i)) in f:
-          x = GetArg(i, True)
-          if x.IsValid():
-            r.append(x)
+                if ("cargv_" + str(i)) in f:
+                    x = GetArg(i, True)
+                    if x.IsValid():
+                        r.append(x)
 
-          break
+                    break
 
-        elif ("argv_"+str(i)) in f:
-          x = GetArg(i, False)
-          if x.IsValid():
-            r.append(x)
+                elif ("argv_" + str(i)) in f:
+                    x = GetArg(i, False)
+                    if x.IsValid():
+                        r.append(x)
 
-          break
+                    break
 
-  r.sort()
-  #print r
-  for i in range(len(r)):
-    if r[i].i <> i+1:
-      r = r[0:i]
-      break
+    r.sort()
+    # print r
+    for i in range(len(r)):
+        if r[i].i != i + 1:
+            r = r[0:i]
+            break
+
+    # print r
+    return r
 
-  #print r
-  return r
 
 def GetFile(filename, source):
-  #size = int(os.path.getsize(source))
-  data = open(source).read()
-  return File(filename, data)
+    #size = int(os.path.getsize(source))
+    data = open(source).read()
+    return File(filename, data)
 
-def GetFiles():
 
-  r = []
-  stdinf = "file___dev__stdin.symb"
-
-  for dir,_,files in os.walk('.'):
-    if dir == '.':
-      for f in files:
-        if (stdinf == f):
-          r.append(GetFile("/dev/stdin",stdinf))
-        elif ("file_" in f):
-          filename = f.split(".symb")[0]
-          #filename = f.replace(".symb","")
-          filename = filename.split("file_")[1]
-          filename = filename.replace(".__", "")
-          x = GetFile(filename,f)
-          if x.IsValid():
-            r.append(x)
-
-  return r
+def GetFiles():
 
+    r = []
+    stdinf = "file___dev__stdin.symb"
+
+    for dir, _, files in os.walk('.'):
+        if dir == '.':
+            for f in files:
+                if (stdinf == f):
+                    r.append(GetFile("/dev/stdin", stdinf))
+                elif ("file_" in f):
+                    filename = f.split(".symb")[0]
+                    #filename = f.replace(".symb","")
+                    filename = filename.split("file_")[1]
+                    filename = filename.replace(".__", "")
+                    x = GetFile(filename, f)
+                    if x.IsValid():
+                        r.append(x)
+
+    return r
diff --git a/vdiscover/ELF.py b/vdiscover/ELF.py
index 8530eaa..8833e21 100644
--- a/vdiscover/ELF.py
+++ b/vdiscover/ELF.py
@@ -19,7 +19,8 @@
 
 import re
 import csv
-import os, os.path
+import os
+import os.path
 import subprocess
 
 from Misc import parse_ldd_output, sh_string
@@ -28,14 +29,16 @@
 _FILE = '/usr/bin/file'
 _OBJDUMP = '/usr/bin/objdump'
 
+
 def die(s):
-  print s
-  exit(-1)
+    print s
+    exit(-1)
+
 
 def check(f):
-  import os
-  if not (os.access(f, os.X_OK) and os.path.isfile(f)):
-    die('Executable %s needed for readelf.py, please install binutils' % f)
+    import os
+    if not (os.access(f, os.X_OK) and os.path.isfile(f)):
+        die('Executable %s needed for readelf.py, please install binutils' % f)
 
 check(_READELF)
 check(_OBJDUMP)
@@ -43,190 +46,199 @@ def check(f):
 realpath = os.path.dirname(os.path.realpath(__file__))
 datadir = "../cache/"
 
+
 def _save_cached_data(path, plt, got, base):
-  filename = realpath+"/"+datadir+"/"+str(path.replace("/","_"))
-  csvfile = open(filename+".plt", 'wb')
-  writer = csv.writer(csvfile, delimiter='\t')
+    filename = realpath + "/" + datadir + "/" + str(path.replace("/", "_"))
+    csvfile = open(filename + ".plt", 'wb')
+    writer = csv.writer(csvfile, delimiter='\t')
 
-  for (name,addr) in plt.items():
-    if addr is not None:
-      writer.writerow((name,addr-base))
+    for (name, addr) in plt.items():
+        if addr is not None:
+            writer.writerow((name, addr - base))
 
-  csvfile = open(filename+".got", 'wb')
-  writer = csv.writer(csvfile, delimiter='\t')
+    csvfile = open(filename + ".got", 'wb')
+    writer = csv.writer(csvfile, delimiter='\t')
+
+    for (name, addr) in got.items():
+        # print "got",name, addr
+        if addr is None:
+            addr = 0x0
+        writer.writerow((name, addr))
 
-  for (name,addr) in got.items():
-    #print "got",name, addr
-    if addr is None:
-      addr = 0x0
-    writer.writerow((name,addr))
 
 def _load_cached_data(path, plt, got, base):
-  
-  cachedir = os.path.dirname(realpath+"/"+datadir)
-  if not os.path.exists(cachedir):
-    os.makedirs(cachedir)
-  
-
-  filename = realpath+"/"+datadir+"/"+str(path.replace("/","_"))
-
-  #print filename
-  try:
-      csvfile = open(filename+".plt", 'rb')
-  except IOError:
-      return False
-  #print "cached file:",filename+".plt"
-
-  reader = csv.reader(csvfile, delimiter='\t')
-
-  for (name,addr) in reader:
-      #print name, int(addr)+base
-      plt[name] = int(addr)+base
-
-  try:
-      csvfile = open(filename+".got", 'rb')
-  except IOError:
-      return False
-
-  reader = csv.reader(csvfile, delimiter='\t')
-
-  for (name,addr) in reader:
-     addr = int(addr)
-     if addr == 0x0:
-       addr = None 
-     got[name] = addr
-  
-  return True
+
+    cachedir = os.path.dirname(realpath + "/" + datadir)
+    if not os.path.exists(cachedir):
+        os.makedirs(cachedir)
+
+    filename = realpath + "/" + datadir + "/" + str(path.replace("/", "_"))
+
+    # print filename
+    try:
+        csvfile = open(filename + ".plt", 'rb')
+    except IOError:
+        return False
+    # print "cached file:",filename+".plt"
+
+    reader = csv.reader(csvfile, delimiter='\t')
+
+    for (name, addr) in reader:
+        # print name, int(addr)+base
+        plt[name] = int(addr) + base
+
+    try:
+        csvfile = open(filename + ".got", 'rb')
+    except IOError:
+        return False
+
+    reader = csv.reader(csvfile, delimiter='\t')
+
+    for (name, addr) in reader:
+        addr = int(addr)
+        if addr == 0x0:
+            addr = None
+        got[name] = addr
+
+    return True
+
 
 def plt_got(path, base):
-  plt, got = dict(), dict()
+    plt, got = dict(), dict()
 
-  if _load_cached_data(path, plt, got, base):
-    #print "plt",plt
-    #print "got",got
-    return plt, got
+    if _load_cached_data(path, plt, got, base):
+        # print "plt",plt
+        # print "got",got
+        return plt, got
 
-  cmd = ["env", "-i", _OBJDUMP, '-d', path]
-  out = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()[0]
-  got32 = '[^j]*jmp\s+\*0x(\S+)'
-  got64 = '[^#]*#\s+(\S+)'
-  lines = re.findall('([a-fA-F0-9]+)\s+<([^@<]+)@plt>:(%s|%s)' % (got32, got64), out)
+    cmd = ["env", "-i", _OBJDUMP, '-d', path]
+    out = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()[0]
+    got32 = '[^j]*jmp\s+\*0x(\S+)'
+    got64 = '[^#]*#\s+(\S+)'
+    lines = re.findall(
+        '([a-fA-F0-9]+)\s+<([^@<]+)@plt>:(%s|%s)' % (got32, got64), out)
 
-  for addr, name, _, gotaddr32, gotaddr64 in lines:
-     addr = int(addr, 16)
+    for addr, name, _, gotaddr32, gotaddr64 in lines:
+        addr = int(addr, 16)
 
-     try:
-       gotaddr = int(gotaddr32 or gotaddr64, 16)
-     except ValueError:
-       gotaddr = None
+        try:
+            gotaddr = int(gotaddr32 or gotaddr64, 16)
+        except ValueError:
+            gotaddr = None
 
-     plt[name] = base + addr
-     got[name] = gotaddr
+        plt[name] = base + addr
+        got[name] = gotaddr
 
-  #print "plt",plt
-  #print "got",got
+    # print "plt",plt
+    # print "got",got
+
+    _save_cached_data(path, plt, got, base)
+    return plt, got
 
-  _save_cached_data(path, plt, got, base)
-  return plt, got
 
 def load_raw_inss(path):
-  cmd = ["env", "-i", _OBJDUMP, '-d', '-j', ".text", path]
-  raw_instructions = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()[0]
-  #lines = re.findall('([a-fA-F0-9]+)\s+((<([^@<]+)@plt>)|%s)' % "|".join(inss), raw_instructions)
-  #lines = re.findall('$', raw_instructions)
-  return raw_instructions
+    cmd = ["env", "-i", _OBJDUMP, '-d', '-j', ".text", path]
+    raw_instructions = subprocess.Popen(
+        cmd, stdout=subprocess.PIPE).communicate()[0]
+    #lines = re.findall('([a-fA-F0-9]+)\s+((<([^@<]+)@plt>)|%s)' % "|".join(inss), raw_instructions)
+    #lines = re.findall('$', raw_instructions)
+    return raw_instructions
+
 
 def entrypoint(path):
     cmd = ["env", "-i", _READELF, '-hWS', path]
     out = subprocess.check_output(cmd)
     #elfclass = re.findall('Class:\s*(.*$)', out, re.MULTILINE)[0]
-    entrypoint = int(re.findall('Entry point address:\s*(.*$)', out, re.MULTILINE)[0], 16)
-    #print out
-    #print hex(entrypoint)
+    entrypoint = int(re.findall(
+        'Entry point address:\s*(.*$)', out, re.MULTILINE)[0], 16)
+    # print out
+    # print hex(entrypoint)
     if "DYN (Shared object file)" in out:
-      entrypoint = entrypoint + 0x80000000
+        entrypoint = entrypoint + 0x80000000
 
     return entrypoint
 
+
 def no_frame_pointer(path):
     cmd = ["env", "-i", _READELF, '-hWS', path]
     out = subprocess.check_output(cmd)
-    #print out
+    # print out
 
     #elfclass = re.findall('Class:\s*(.*$)', out, re.MULTILINE)[0]
     out = out.split('.eh_frame         PROGBITS        ')[1]
     out = out.split(" ")[2]
 
-    return (int(out,16) > 4)
+    return (int(out, 16) > 4)
 
-def file_type(path):
-   cmd = [_FILE, os.path.realpath(path)]
 
-   try:
-     out = subprocess.check_output(cmd)
-   except subprocess.CalledProcessError:
-     return ""
+def file_type(path):
+    cmd = [_FILE, os.path.realpath(path)]
 
-   if "ELF 32-bit" in out:
-     return "ELF 32-bit"
-   elif "ELF 64-bit" in out:
-     return "ELF 64-bit"
-   else:
-     return None
+    try:
+        out = subprocess.check_output(cmd)
+    except subprocess.CalledProcessError:
+        return ""
 
-class ELF:
-  '''A parsed ELF file'''
-  cachedir = "cache"
-
-  def __init__(self, path, plt = True, base = 0x0):
-    #print path, plt
-    self.path = str(path)
-    self.base = base
-    self.sections = dict()
-    self.filetype = file_type(path)
-
-    if self.filetype == "":
-      print "The executable at",path,"cannot be found"
-      exit(-1)
-
-    elif self.filetype is None:
-      print "The executable at",path,"is not a valid ELF file"
-      exit(-1)
-
-    self.entrypoint = entrypoint(path)
-    #print hex(self.entrypoint)
-    self.no_frame_pointer = no_frame_pointer(path)
-    #self._load_sections()
-
-    if plt:
-      self.plt, self.got = plt_got(self.path, self.base)
+    if "ELF 32-bit" in out:
+        return "ELF 32-bit"
+    elif "ELF 64-bit" in out:
+        return "ELF 64-bit"
     else:
-      self.plt, self.got = dict(), dict()
-    self.name2addr = self.plt
-    self.addr2name = dict()
-
-    for (name, addr) in self.name2addr.items():
-      self.addr2name[addr] = name
-
-    self.name2func = self.got
-    self.func2name = dict()
-
-    for (name, addr) in self.name2func.items():
-      self.func2name[addr] = name
+        return None
 
 
-  def _populate_libraries_ldd(self):
-    """
-    from pwntools
+class ELF:
+    '''A parsed ELF file'''
+    cachedir = "cache"
+
+    def __init__(self, path, plt=True, base=0x0):
+        # print path, plt
+        self.path = str(path)
+        self.base = base
+        self.sections = dict()
+        self.filetype = file_type(path)
+
+        if self.filetype == "":
+            print "The executable at", path, "cannot be found"
+            exit(-1)
+
+        elif self.filetype is None:
+            print "The executable at", path, "is not a valid ELF file"
+            exit(-1)
+
+        self.entrypoint = entrypoint(path)
+        # print hex(self.entrypoint)
+        self.no_frame_pointer = no_frame_pointer(path)
+        # self._load_sections()
+
+        if plt:
+            self.plt, self.got = plt_got(self.path, self.base)
+        else:
+            self.plt, self.got = dict(), dict()
+        self.name2addr = self.plt
+        self.addr2name = dict()
+
+        for (name, addr) in self.name2addr.items():
+            self.addr2name[addr] = name
+
+        self.name2func = self.got
+        self.func2name = dict()
+
+        for (name, addr) in self.name2func.items():
+            self.func2name[addr] = name
+
+    def _populate_libraries_ldd(self):
+        """
+        from pwntools
+        """
+        try:
+            cmd = '(ulimit -s unlimited; ldd %s > /dev/null && (LD_TRACE_LOADED_OBJECTS=1 %s || ldd %s)) 2>/dev/null'
+            arg = sh_string(self.path)
+            data = subprocess.check_output(cmd % (arg, arg, arg), shell=True)
+            self._libs = parse_ldd_output(data)
+        except subprocess.CalledProcessError:
+            self._libs = {}
     """
-    try:
-      cmd = '(ulimit -s unlimited; ldd %s > /dev/null && (LD_TRACE_LOADED_OBJECTS=1 %s || ldd %s)) 2>/dev/null'
-      arg = sh_string(self.path)
-      data = subprocess.check_output(cmd % (arg, arg, arg), shell = True)
-      self._libs = parse_ldd_output(data)
-    except subprocess.CalledProcessError:
-      self._libs = {}
-  """
   def _load_sections(self):
     # -W : Wide output
     # -S : Section headers
@@ -247,45 +259,46 @@ def _load_sections(self):
                              'flags' : flgs,
                              }
   """
-  def GetEntrypoint(self):
-    return self.entrypoint
 
-  def GetFunctions(self):
-    return self.name2func.keys()
+    def GetEntrypoint(self):
+        return self.entrypoint
 
-  def GetModname(self):
-    return str(self.path)
+    def GetFunctions(self):
+        return self.name2func.keys()
 
-  def FindFuncInPlt(self, name):
+    def GetModname(self):
+        return str(self.path)
 
-    if name in self.name2addr:
-      return self.name2addr[name]
-    else:
-      return None
+    def FindFuncInPlt(self, name):
 
-  def FindAddrInPlt(self, addr):
-    #print addr
-    if addr in self.addr2name:
-      return self.addr2name[addr]
-    else:
-      return None
+        if name in self.name2addr:
+            return self.name2addr[name]
+        else:
+            return None
 
-  def FindFuncInGot(self, name):
+    def FindAddrInPlt(self, addr):
+        # print addr
+        if addr in self.addr2name:
+            return self.addr2name[addr]
+        else:
+            return None
 
-    if name in self.name2addr:
-      return self.name2func[name]
-    else:
-      return None
+    def FindFuncInGot(self, name):
 
-  def FindAddrInGot(self, addr):
-    #print addr
-    if addr in self.addr2name:
-      return self.func2name[addr]
-    else:
-      return None
+        if name in self.name2addr:
+            return self.name2func[name]
+        else:
+            return None
+
+    def FindAddrInGot(self, addr):
+        # print addr
+        if addr in self.addr2name:
+            return self.func2name[addr]
+        else:
+            return None
 
-  def GetType(self):
-    return str(self.filetype)
+    def GetType(self):
+        return str(self.filetype)
 
-  def GetRawInss(self):
-    return load_raw_inss(self.path)
+    def GetRawInss(self):
+        return load_raw_inss(self.path)
diff --git a/vdiscover/Event.py b/vdiscover/Event.py
index dd33ea2..38a51e5 100644
--- a/vdiscover/Event.py
+++ b/vdiscover/Event.py
@@ -27,233 +27,255 @@
 
 #from distorm import Decode, Decode32Bits
 
+
 class Event:
-  module = None
-  def __init__(self):
-    pass
+    module = None
 
-class Call(Event):
+    def __init__(self):
+        pass
 
-  def __init__(self, name, module):
 
-    assert(name in specs)
-    spec = specs[name]
-    self.ret = str(spec[0])
-    #fixme: void functions and non-returned values should be different!
-    self.retvalue = (Type("Top32",4),None)
-    self.module = module
-    self.name = str(name)
-    self.param_types = list(spec[1:])
-    self.param_ptypes = []
-    self.param_values = []
+class Call(Event):
 
-  def __str__(self):
-    return str(self.name)
+    def __init__(self, name, module):
 
-  #def _detect_return_address(self):
-  #  addr = self.process.getreg("esp")
-  #  bytes = self.process.readBytes(addr, 4)
-  #  return RefinePType(Type("Ptr32",4),bytes2word(bytes), self.process, self.mm)
-  #  #return bytes2word(bytes)
+        assert(name in specs)
+        spec = specs[name]
+        self.ret = str(spec[0])
+        # fixme: void functions and non-returned values should be different!
+        self.retvalue = (Type("Top32", 4), None)
+        self.module = module
+        self.name = str(name)
+        self.param_types = list(spec[1:])
+        self.param_ptypes = []
+        self.param_values = []
 
-  def _detect_parameter_x86_64(self, ptype, index):
+    def __str__(self):
+        return str(self.name)
 
-    if index > 4:
-      return None   
+    # def _detect_return_address(self):
+    #  addr = self.process.getreg("esp")
+    #  bytes = self.process.readBytes(addr, 4)
+    #  return RefinePType(Type("Ptr32",4),bytes2word(bytes), self.process, self.mm)
+    #  #return bytes2word(bytes)
 
-    reg = ["rdi","rsi","rdx","rcx","r8"][index]
-    val = self.process.getreg(reg)
+    def _detect_parameter_x86_64(self, ptype, index):
 
-    #print "bs value", repr(bs), hex(bytes2word(bs))
+        if index > 4:
+            return None
 
-    return RefinePType(GetPtype(ptype),val, self.process, self.mm)
+        reg = ["rdi", "rsi", "rdx", "rcx", "r8"][index]
+        val = self.process.getreg(reg)
 
+        # print "bs value", repr(bs), hex(bytes2word(bs))
 
+        return RefinePType(GetPtype(ptype), val, self.process, self.mm)
 
+    def _detect_parameter_x86(self, ptype, offset):
+        addr = self.process.getStackPointer() + offset
+        bs = self.process.readBytes(addr, 4)
 
-  def _detect_parameter_x86(self, ptype, offset):
-    addr = self.process.getStackPointer()+offset
-    bs = self.process.readBytes(addr, 4)
+        # if CPU_X86_64:
+        #  bs = bs + (4*'\00')
 
-    #if CPU_X86_64:
-    #  bs = bs + (4*'\00')
+        # print "bs value", repr(bs), hex(bytes2word(bs))
 
-    #print "bs value", repr(bs), hex(bytes2word(bs))
+        return RefinePType(
+            GetPtype(ptype),
+            bytes2word(bs),
+            self.process,
+            self.mm)
 
-    return RefinePType(GetPtype(ptype),bytes2word(bs), self.process, self.mm)
+    def get_return_address(self):
+        return self.retaddr[1]
 
-  def get_return_address(self):
-    return self.retaddr[1]
+    def detect_parameters(self, process, mm):
+        self.process = process
+        self.mm = mm
+        self.retaddr = None
+        # print  "ret_addr:", str(self.retaddr[0]), hex(self.retaddr[1])
 
-  def detect_parameters(self, process, mm):
-    self.process = process
-    self.mm      = mm
-    self.retaddr = None
-    #print  "ret_addr:", str(self.retaddr[0]), hex(self.retaddr[1])
+        offset = 4
+        # print self.mm
+        # print self.name
+        for index, ctype in enumerate(self.param_types):
 
-    offset = 4
-    #print self.mm
-    #print self.name
-    for index,ctype in enumerate(self.param_types):
+            if CPU_X86_64:
+                (ptype, value) = self._detect_parameter_x86_64(ctype, index)
+            else:
+                (ptype, value) = self._detect_parameter_x86(ctype, offset)
 
-      if CPU_X86_64:
-        (ptype, value) = self._detect_parameter_x86_64(ctype, index)
-      else:
-        (ptype, value) = self._detect_parameter_x86(ctype, offset)
+            self.param_values.append(value)
+            self.param_ptypes.append(ptype)
+            offset += ptype.getSize()
+            #print (str(ptype), hex(value))
 
-      self.param_values.append(value)
-      self.param_ptypes.append(ptype)
-      offset += ptype.getSize()
-      #print (str(ptype), hex(value))
+    # def DetectReturnValue(self, process):
+    #  self.process = process
+    #  self.retvalue = RefinePType(GetPtype(self.ret),process.getreg("eax"), self.process, self.mm)
 
-  #def DetectReturnValue(self, process):
-  #  self.process = process
-  #  self.retvalue = RefinePType(GetPtype(self.ret),process.getreg("eax"), self.process, self.mm)
+    def GetTypedName(self):
+        return (str(self), list(self.param_ptypes))
 
-  def GetTypedName(self):
-    return (str(self), list(self.param_ptypes))
 
 class Signal(Event):
-  def __init__(self, name, process, mm):
 
-    self.fields = dict()
-    _sifields = process.getsiginfo()._sifields
+    def __init__(self, name, process, mm):
+
+        self.fields = dict()
+        _sifields = process.getsiginfo()._sifields
 
-    self.name = name
+        self.name = name
 
-    if hasattr(_sifields, "_sigfault") and self.name == "SIGSEGV":
-      self.fields["addr"] = RefinePType(Type("Ptr32",4), _sifields._sigfault._addr, process, mm)
-      #print "sigfault @",  _sifields._sigfault._addr
+        if hasattr(_sifields, "_sigfault") and self.name == "SIGSEGV":
+            self.fields["addr"] = RefinePType(
+                Type("Ptr32", 4), _sifields._sigfault._addr, process, mm)
+            # print "sigfault @",  _sifields._sigfault._addr
 
-  def __str__(self):
-    return str(self.name)
+    def __str__(self):
+        return str(self.name)
 
-  def GetTypedName(self):
+    def GetTypedName(self):
 
-    if len(self.fields) > 0:
-      ptypes = map(lambda (x,_): x, self.fields.values())
-      return (str(self.name), ptypes)
-    else:
-      return (str(self.name), ["()"])
+        if len(self.fields) > 0:
+            ptypes = map(lambda x__: x__[0], self.fields.values())
+            return (str(self.name), ptypes)
+        else:
+            return (str(self.name), ["()"])
 
 
 class Syscall(Event):
-  def __init__(self, name):
-    self.name = name
 
-  def __str__(self):
-    return str(self.name)
+    def __init__(self, name):
+        self.name = name
+
+    def __str__(self):
+        return str(self.name)
+
+    def GetTypedName(self):
+        return ("Syscall", [str(self.name)])
 
-  def GetTypedName(self):
-    return ("Syscall", [str(self.name)])
 
 class Exit(Event):
-  def __init__(self, code):
-    self.code = code
-    self.name = "Exit with "+str(code)
 
-  def __str__(self):
-    return str(self.name)
+    def __init__(self, code):
+        self.code = code
+        self.name = "Exit with " + str(code)
+
+    def __str__(self):
+        return str(self.name)
+
+    def GetTypedName(self):
+        return ("exited", str(self.code))
 
-  def GetTypedName(self):
-    return ("exited", str(self.code))
 
 class Abort(Event):
-  def __init__(self, process, mm):
-    self.name = "Abort"
-    ip = process.getInstrPointer()
 
-    self.bt =  process.getBacktrace(max_args=0, max_depth=20)
-    self.module = FindModule(ip,mm)
-    #print self.bt, type(self.bt)
-    frames = []
+    def __init__(self, process, mm):
+        self.name = "Abort"
+        ip = process.getInstrPointer()
 
-    if CPU_X86_64:
-      pass # detection of stack frame disabled, python-ptrace does not support ...
-    if CPU_I386:
+        self.bt = process.getBacktrace(max_args=0, max_depth=20)
+        self.module = FindModule(ip, mm)
+        # print self.bt, type(self.bt)
+        frames = []
 
-      for i,frame in enumerate(self.bt.frames):
-        r_type = RefinePType(Type("Ptr32",4), frame.ip, process, mm)
-        frames.append(r_type)
+        if CPU_X86_64:
+            # detection of stack frame disabled, python-ptrace does not support
+            # ...
+            pass
+        if CPU_I386:
 
-        if str(r_type[0]) == "DPtr32":
-          break
+            for i, frame in enumerate(self.bt.frames):
+                r_type = RefinePType(Type("Ptr32", 4), frame.ip, process, mm)
+                frames.append(r_type)
 
-    self.bt.frames = frames
-    #print "frames",frames
-    #print "self.bt.frames", self.bt.frames
+                if str(r_type[0]) == "DPtr32":
+                    break
 
-    self.eip = RefinePType(Type("Ptr32",4), ip, process, mm)
+        self.bt.frames = frames
+        # print "frames",frames
+        # print "self.bt.frames", self.bt.frames
 
-  def __str__(self):
-    return str(self.name)
+        self.eip = RefinePType(Type("Ptr32", 4), ip, process, mm)
+
+    def __str__(self):
+        return str(self.name)
+
+    def GetTypedName(self):
+        return ("abort", [self.eip[0]])
 
-  def GetTypedName(self):
-    return ("abort", [self.eip[0]])
 
 class Timeout(Event):
-  def __init__(self, secs):
-    self.secs = secs
-    self.name = "Timeout "+str(secs)+" secs"
 
-  def __str__(self):
-    return str(self.name)
+    def __init__(self, secs):
+        self.secs = secs
+        self.name = "Timeout " + str(secs) + " secs"
+
+    def __str__(self):
+        return str(self.name)
+
+    def GetTypedName(self):
+        return ("timeouted", ["()"])
 
-  def GetTypedName(self):
-    return ("timeouted", ["()"])
 
 class Crash(Event):
 
-  def __init__(self, process, mm):
-    ip = process.getInstrPointer()
-    fp = process.getFramePointer()
+    def __init__(self, process, mm):
+        ip = process.getInstrPointer()
+        fp = process.getFramePointer()
 
-    self.module = FindModule(ip,mm)
+        self.module = FindModule(ip, mm)
 
-    self.fp_type = RefinePType(Type("Ptr32",4), fp, process, mm)
-    #print "fp:",hex(fp_type[1]), str(fp_type[0])
-    if not process.no_frame_pointer: #str(self.fp_type[0]) == "SPtr32": 
-      self.bt =  getBacktrace(process,max_args=0, max_depth=20)
-    else:
-      self.bt = Backtrace()
-    frames = []
+        self.fp_type = RefinePType(Type("Ptr32", 4), fp, process, mm)
+        # print "fp:",hex(fp_type[1]), str(fp_type[0])
+        if not process.no_frame_pointer:  # str(self.fp_type[0]) == "SPtr32":
+            self.bt = getBacktrace(process, max_args=0, max_depth=20)
+        else:
+            self.bt = Backtrace()
+        frames = []
 
-    if CPU_X86_64:
-      pass # detection of stack frame disabled, python-ptrace does not support ...
-    if CPU_I386:
+        if CPU_X86_64:
+            # detection of stack frame disabled, python-ptrace does not support
+            # ...
+            pass
+        if CPU_I386:
 
-      for i,frame in enumerate(self.bt.frames):
-        print "frame",frame, hex(frame.ip)
-        r_type = RefinePType(Type("Ptr32",4), frame.ip, process, mm)
-        frames.append(r_type)
-        #print "ip:", str(r_type[0])
-        if not (str(r_type[0])  == "GxPtr32"):
-          break
+            for i, frame in enumerate(self.bt.frames):
+                print "frame", frame, hex(frame.ip)
+                r_type = RefinePType(Type("Ptr32", 4), frame.ip, process, mm)
+                frames.append(r_type)
+                # print "ip:", str(r_type[0])
+                if not (str(r_type[0]) == "GxPtr32"):
+                    break
 
-    self.bt.frames = frames
-    self.eip_type = RefinePType(Type("Ptr32",4), process.getInstrPointer(), process, mm)
+        self.bt.frames = frames
+        self.eip_type = RefinePType(
+            Type("Ptr32", 4), process.getInstrPointer(), process, mm)
 
-  def __str__(self):
-    return "Crash@"+hex(self.eip_type[1])+":"+str(self.eip_type[0])
+    def __str__(self):
+        return "Crash@" + hex(self.eip_type[1]) + ":" + str(self.eip_type[0])
 
-  def GetTypedName(self):
-    return ("crashed", [self.eip_type[0]])
+    def GetTypedName(self):
+        return ("crashed", [self.eip_type[0]])
 
 
 class Vulnerability(Event):
-  def __init__(self, vtype):
-    self.type = str(vtype)
-    self.name = "Vulnerability "+str(vtype)+" detected"
 
-  def __str__(self):
-    return str(self.name)
+    def __init__(self, vtype):
+        self.type = str(vtype)
+        self.name = "Vulnerability " + str(vtype) + " detected"
+
+    def __str__(self):
+        return str(self.name)
+
+    def GetTypedName(self):
+        return ("Vulnerability", [str(self.type)])
 
-  def GetTypedName(self):
-    return ("Vulnerability",[str(self.type)])
 
 def hash_events(events):
-  return hash(tuple(map(str, events)))
+    return hash(tuple(map(str, events)))
 
-def IsTimeout(event):
-  return isinstance(event, Timeout)
 
+def IsTimeout(event):
+    return isinstance(event, Timeout)
diff --git a/vdiscover/Input.py b/vdiscover/Input.py
index 5a62b73..80e5bd7 100644
--- a/vdiscover/Input.py
+++ b/vdiscover/Input.py
@@ -19,79 +19,83 @@
 
 import copy
 
+
 def prepare_inputs(inputs):
-  r = []
-  for input in inputs:
-    arg = input.PrepareData()
-    if not (arg is None):
-      r.append(arg)
+    r = []
+    for input in inputs:
+        arg = input.PrepareData()
+        if not (arg is None):
+            r.append(arg)
+
+    return r
 
-  return r
 
 class Input:
 
-  data = None
-  concrete = False
+    data = None
+    concrete = False
+
+    def __init__(self):
+        pass
 
-  def __init__(self):
-    pass
+    def __len__(self):
+        return len(self.data)
 
-  def __len__(self):
-    return len(self.data)
+    def copy(self):
+        # print "data:",self.data
+        return copy.copy(self)
 
-  def copy(self):
-    #print "data:",self.data
-    return copy.copy(self)
+    def isSymbolic(self):
+        return not self.concrete
 
-  def isSymbolic(self):
-    return not self.concrete
+    def isConcrete(self):
+        return self.concrete
 
-  def isConcrete(self):
-    return self.concrete
+    def SetSymbolic(self):
+        self.concrete = False
 
-  def SetSymbolic(self):
-    self.concrete = False
+    def SetConcrete(self):
+        self.concrete = True
 
-  def SetConcrete(self):
-    self.concrete = True
 
 class Arg(Input):
-  def __init__(self, i, data):
-    self.i = i
 
-    self.data = str(data)
-    if ("\0" in data):
-      self.data = self.data.split("\0")[0]
+    def __init__(self, i, data):
+        self.i = i
 
-    self.size = len(self.data)
+        self.data = str(data)
+        if ("\0" in data):
+            self.data = self.data.split("\0")[0]
 
-  def __str__(self):
-    return "Arg("+str(self.i)+") = "+repr(self.data)
+        self.size = len(self.data)
 
-  def GetData(self):
-    return str(self.data)
+    def __str__(self):
+        return "Arg(" + str(self.i) + ") = " + repr(self.data)
 
-  def GetSize(self):
-    return len(self.data)
+    def GetData(self):
+        return str(self.data)
 
-  def PrepareData(self):
+    def GetSize(self):
+        return len(self.data)
 
-    return self.GetData()
+    def PrepareData(self):
 
-  def IsValid(self):
-    return self.size > 0
+        return self.GetData()
 
-  def __cmp__(self, arg):
-    return cmp(self.i, arg.i)
+    def IsValid(self):
+        return self.size > 0
 
-  def GetName(self):
-    if self.concrete:
-      return "cargv_"+str(self.i)
-    else:
-      return "argv_"+str(self.i)
+    def __cmp__(self, arg):
+        return cmp(self.i, arg.i)
 
-  def GetType(self):
-    return "arg"
+    def GetName(self):
+        if self.concrete:
+            return "cargv_" + str(self.i)
+        else:
+            return "argv_" + str(self.i)
+
+    def GetType(self):
+        return "arg"
 
 
 # class Env(Input):
@@ -130,43 +134,44 @@ def GetType(self):
 #     return "env"
 
 class File(Input):
-  def __init__(self, filename, data):
-    self.filename = str(filename)
-    self.data = str(data)
-    self.size = len(data)
 
-  def __str__(self):
-    return "file("+str(self.filename)+") = "+repr(self.data)
+    def __init__(self, filename, data):
+        self.filename = str(filename)
+        self.data = str(data)
+        self.size = len(data)
+
+    def __str__(self):
+        return "file(" + str(self.filename) + ") = " + repr(self.data)
 
-  def GetData(self):
-    return str(self.data)
+    def GetData(self):
+        return str(self.data)
 
-  def GetSize(self):
-    return len(self.data)
+    def GetSize(self):
+        return len(self.data)
 
-  def PrepareData(self):
-    if self.filename == "/dev/stdin":
-      with open("Stdin", 'w') as f:
-        f.write(self.data)
+    def PrepareData(self):
+        if self.filename == "/dev/stdin":
+            with open("Stdin", 'w') as f:
+                f.write(self.data)
 
-      return "< Stdin"
-    else:
-      with open(self.filename, 'w') as f:
-        f.write(self.data)
+            return "< Stdin"
+        else:
+            with open(self.filename, 'w') as f:
+                f.write(self.data)
 
-      return None
+            return None
 
-  def IsValid(self):
-    return True
+    def IsValid(self):
+        return True
 
 #  def copy(self):
 #    return File(self.filename, self.data)
 
-  def GetName(self):
-    return "file_"+self.filename.replace("/", "__")
+    def GetName(self):
+        return "file_" + self.filename.replace("/", "__")
 
-  def GetFilename(self):
-    return str(self.filename)
+    def GetFilename(self):
+        return str(self.filename)
 
-  def GetType(self):
-    return "file"
+    def GetType(self):
+        return "file"
diff --git a/vdiscover/MemoryMap.py b/vdiscover/MemoryMap.py
index 4625e94..fa1d4bc 100644
--- a/vdiscover/MemoryMap.py
+++ b/vdiscover/MemoryMap.py
@@ -17,94 +17,96 @@
 Copyright 2014 by G.Grieco
 """
 
-class MemoryMaps:
-  def __init__(self, path, pid):
-    self.path = str(path)
-    self.pid  = pid
-    self.update()
-
-  def update(self):
-
-    self.mm = dict()
-    self.atts = dict()
-
-    for line in open('/proc/'+str(self.pid)+'/maps'):
-      line = line.replace("\n", "")
-      #print line
-      x = line.split(" ")
-
-      mrange = x[0].split("-")
-      mrange = map(lambda s: int(s, 16), mrange)
-      #print tuple(mrange)
-
-      self.mm[tuple(mrange)] = x[-1]
-      self.atts[tuple(mrange)] = x[1]
-
-  def isStackPtr(self, ptr):
-    for (mrange,zone) in self.mm.items():
-      if ptr >= mrange[0] and ptr < mrange[1]:
-          return zone == "[stack]"
-    return False
-
-  def isHeapPtr(self, ptr):
-    for (mrange,zone) in self.mm.items():
-      if ptr >= mrange[0] and ptr < mrange[1]:
-          return zone == "[heap]"
-    return False
-
-  def isCodePtr(self, ptr):
-    for (mrange,zone) in self.mm.items():
-      if ptr >= mrange[0] and ptr < mrange[1] and 'x' in self.atts[mrange]:
-          return True
-    return False
-
-  def isLibPtr(self, ptr):
-    for (mrange,zone) in self.mm.items():
-      if ptr >= mrange[0] and ptr < mrange[1]:
-          return "/lib/" in zone
-    return False
-
-  def isGlobalPtr(self, ptr):
-    for (mrange,zone) in self.mm.items():
-      if ptr >= mrange[0] and ptr < mrange[1]:
-          return zone == self.path
-    return False
-
-  def isFilePtr(self, ptr):
-    for (mrange,zone) in self.mm.items():
-      if ptr >= mrange[0] and ptr < mrange[1]:
-          return zone == ""
-    return False
-
-  def checkPtr(self, ptr, update=True):
-    for (mrange,zone) in self.mm.items():
-      if ptr >= mrange[0] and ptr < mrange[1]:
-          return True
-
-    if update:
-      self.update()
-    else:
-      return False
-
-    return self.checkPtr(ptr, update=False)
-
-  def findModule(self, ptr):
-    for (mrange,zone) in self.mm.items():
-      if ptr >= mrange[0] and ptr < mrange[1]:
-        return str(zone)
-    return None
-
-  def __str__(self):
-    r = ""
-    for (mrange,zone) in self.mm.items():
-      r = r + hex(mrange[0])+" - "+hex(mrange[1])+" -> "+zone+"\n"
-    return r
-
-  def items(self):
-    r = []
-    for (x,y) in self.mm.items():
-      r.append((x,y,self.atts[x]))
-
-    return r
 
+class MemoryMaps:
 
+    def __init__(self, path, pid):
+        self.path = str(path)
+        self.pid = pid
+        self.update()
+
+    def update(self):
+
+        self.mm = dict()
+        self.atts = dict()
+
+        for line in open('/proc/' + str(self.pid) + '/maps'):
+            line = line.replace("\n", "")
+            # print line
+            x = line.split(" ")
+
+            mrange = x[0].split("-")
+            mrange = map(lambda s: int(s, 16), mrange)
+            # print tuple(mrange)
+
+            self.mm[tuple(mrange)] = x[-1]
+            self.atts[tuple(mrange)] = x[1]
+
+    def isStackPtr(self, ptr):
+        for (mrange, zone) in self.mm.items():
+            if ptr >= mrange[0] and ptr < mrange[1]:
+                return zone == "[stack]"
+        return False
+
+    def isHeapPtr(self, ptr):
+        for (mrange, zone) in self.mm.items():
+            if ptr >= mrange[0] and ptr < mrange[1]:
+                return zone == "[heap]"
+        return False
+
+    def isCodePtr(self, ptr):
+        for (mrange, zone) in self.mm.items():
+            if ptr >= mrange[0] and ptr < mrange[
+                    1] and 'x' in self.atts[mrange]:
+                return True
+        return False
+
+    def isLibPtr(self, ptr):
+        for (mrange, zone) in self.mm.items():
+            if ptr >= mrange[0] and ptr < mrange[1]:
+                return "/lib/" in zone
+        return False
+
+    def isGlobalPtr(self, ptr):
+        for (mrange, zone) in self.mm.items():
+            if ptr >= mrange[0] and ptr < mrange[1]:
+                return zone == self.path
+        return False
+
+    def isFilePtr(self, ptr):
+        for (mrange, zone) in self.mm.items():
+            if ptr >= mrange[0] and ptr < mrange[1]:
+                return zone == ""
+        return False
+
+    def checkPtr(self, ptr, update=True):
+        for (mrange, zone) in self.mm.items():
+            if ptr >= mrange[0] and ptr < mrange[1]:
+                return True
+
+        if update:
+            self.update()
+        else:
+            return False
+
+        return self.checkPtr(ptr, update=False)
+
+    def findModule(self, ptr):
+        for (mrange, zone) in self.mm.items():
+            if ptr >= mrange[0] and ptr < mrange[1]:
+                return str(zone)
+        return None
+
+    def __str__(self):
+        r = ""
+        for (mrange, zone) in self.mm.items():
+            r = r + hex(mrange[0]) + " - " + \
+                hex(mrange[1]) + " -> " + zone + "\n"
+        return r
+
+    def items(self):
+        r = []
+        for (x, y) in self.mm.items():
+            r.append((x, y, self.atts[x]))
+
+        return r
diff --git a/vdiscover/Misc.py b/vdiscover/Misc.py
index e2de3ce..0784701 100644
--- a/vdiscover/Misc.py
+++ b/vdiscover/Misc.py
@@ -17,19 +17,27 @@
 Copyright 2014 by G.Grieco
 """
 
-import socket, re, os, stat, errno, string, base64
+import socket
+import re
+import os
+import stat
+import errno
+import string
+import base64
+
 
 def readmodfile(modfile):
-  hooked_mods = [] 
-  if modfile is not None:
-    hooked_mods =  open(modfile).read().split("\n")
-    hooked_mods = filter(lambda x: x <> '', hooked_mods)
-  return hooked_mods
+    hooked_mods = []
+    if modfile is not None:
+        hooked_mods = open(modfile).read().split("\n")
+        hooked_mods = filter(lambda x: x != '', hooked_mods)
+    return hooked_mods
 
 """
 from pwntools
 """
 
+
 def parse_ldd_output(output):
     """Parses the output from a run of 'ldd' on a binary.
     Returns a dictionary of {path: address} for
@@ -48,8 +56,9 @@ def parse_ldd_output(output):
         ... ''').keys())
         ['/lib/x86_64-linux-gnu/libc.so.6', '/lib/x86_64-linux-gnu/libdl.so.2', '/lib/x86_64-linux-gnu/libtinfo.so.5', '/lib64/ld-linux-x86-64.so.2']
     """
-    expr_linux   = re.compile(r'\s(?P<lib>\S?/\S+)\s+\((?P<addr>0x.+)\)')
-    expr_openbsd = re.compile(r'^\s+(?P<addr>[0-9a-f]+)\s+[0-9a-f]+\s+\S+\s+[01]\s+[0-9]+\s+[0-9]+\s+(?P<lib>\S+)$')
+    expr_linux = re.compile(r'\s(?P<lib>\S?/\S+)\s+\((?P<addr>0x.+)\)')
+    expr_openbsd = re.compile(
+        r'^\s+(?P<addr>[0-9a-f]+)\s+[0-9a-f]+\s+\S+\s+[01]\s+[0-9]+\s+[0-9]+\s+(?P<lib>\S+)$')
     libs = {}
 
     for s in output.split('\n'):
@@ -88,8 +97,8 @@ def sh_string(s):
     """
 
     very_good = set(string.ascii_letters + string.digits)
-    good      = (very_good | set(string.punctuation + ' ')) - set("'")
-    alt_good  = (very_good | set(string.punctuation + ' ')) - set('!')
+    good = (very_good | set(string.punctuation + ' ')) - set("'")
+    alt_good = (very_good | set(string.punctuation + ' ')) - set('!')
 
     if '\x00' in s:
         log.error("sh_string(): Cannot create a null-byte")
@@ -115,7 +124,5 @@ def sh_string(s):
                 fixed += c
             else:
                 fixed += '\\x%02x' % ord(c)
-        return '"$( (echo %s|(base64 -d||openssl enc -d -base64)||echo -en \'%s\') 2>/dev/null)"' % (base64.b64encode(s), fixed)
-
-
-
+        return '"$( (echo %s|(base64 -d||openssl enc -d -base64)||echo -en \'%s\') 2>/dev/null)"' % (
+            base64.b64encode(s), fixed)
diff --git a/vdiscover/Mutation.py b/vdiscover/Mutation.py
index e32c0e1..6cb352b 100644
--- a/vdiscover/Mutation.py
+++ b/vdiscover/Mutation.py
@@ -24,217 +24,238 @@
 
 import Input
 
-def opened_files(program, args, files, timeout=5):
 
-  # check if the testcase is opened
-  output = Popen(["timeout","-k","1",str(timeout), "strace","-e","open",program]+args, stdout=PIPE, stderr=PIPE, stdin=PIPE, env=dict()).communicate()
+def opened_files(program, args, files, timeout=5):
 
-  for mfile in files:
-    filename = mfile.filename
-    #print "checking",filename
-    if 'open("'+filename in output[1]:
-      return True
+    # check if the testcase is opened
+    output = Popen(["timeout",
+                    "-k",
+                    "1",
+                    str(timeout),
+                    "strace",
+                    "-e",
+                    "open",
+                    program] + args,
+                   stdout=PIPE,
+                   stderr=PIPE,
+                   stdin=PIPE,
+                   env=dict()).communicate()
+
+    for mfile in files:
+        filename = mfile.filename
+        # print "checking",filename
+        if 'open("' + filename in output[1]:
+            return True
+
+    return False
+    # print output
 
-  return False
-  #print output
 
 def fuzz_cmd(prepared_inputs, fuzzer_cmd, seed):
-  p = Popen(fuzzer_cmd.split(" ")+[str(seed)], stdout=PIPE, stdin=PIPE, stderr=PIPE)
-  mutated_input = p.communicate(input=prepared_inputs)[0]
-  return mutated_input.replace("\0","")[:32767]
+    p = Popen(fuzzer_cmd.split(" ") + [str(seed)],
+              stdout=PIPE, stdin=PIPE, stderr=PIPE)
+    mutated_input = p.communicate(input=prepared_inputs)[0]
+    return mutated_input.replace("\0", "")[:32767]
 
 
 class DeltaMutation(object):
-  def __init__(self, inp, atts):
-    self.inp_type = str(inp.GetType())
-    #self.mut_type = str(typ)
-    self.atts = copy.copy(atts) 
 
-  def __str__(self):
+    def __init__(self, inp, atts):
+        self.inp_type = str(inp.GetType())
+        #self.mut_type = str(typ)
+        self.atts = copy.copy(atts)
+
+    def __str__(self):
 
-    r = ["input="+self.inp_type, "type="+self.mut_type]
-    r = r + map(lambda (a,b): a+"="+str(b),self.atts.items())  
-    return " ".join(r)
+        r = ["input=" + self.inp_type, "type=" + self.mut_type]
+        r = r + map(lambda a_b: a_b[0] + "=" + str(a_b[1]), self.atts.items())
+        return " ".join(r)
 
 
 class NullDeltaMutation(DeltaMutation):
 
-  def __init__(self):
-    #pass
-    #DeltaMutation.__init__(inp, atts)
-    #super(self.__class__, self).__init__(inp, atts)
-    self.mut_type = "null"
+    def __init__(self):
+        # pass
+        #DeltaMutation.__init__(inp, atts)
+        #super(self.__class__, self).__init__(inp, atts)
+        self.mut_type = "null"
 
-  def __str__(self):
-    r = ["type="+self.mut_type]
-    return " ".join(r)
+    def __str__(self):
+        r = ["type=" + self.mut_type]
+        return " ".join(r)
 
-  def inv(self):
-    pass
+    def inv(self):
+        pass
 
 
 class OneByteDeltaMutation(DeltaMutation):
 
-  def __init__(self, inp, atts):
-    #DeltaMutation.__init__(inp, atts)
-    super(self.__class__, self).__init__(inp, atts)
-    self.mut_type = "mod"
+    def __init__(self, inp, atts):
+        #DeltaMutation.__init__(inp, atts)
+        super(self.__class__, self).__init__(inp, atts)
+        self.mut_type = "mod"
 
-  def inv(self):
-    t = self.atts["new"]
-    self.atts["new"] = self.atts["old"]
-    self.atts["old"] = t
+    def inv(self):
+        t = self.atts["new"]
+        self.atts["new"] = self.atts["old"]
+        self.atts["old"] = t
 
 
 class ByteExtensionDeltaMutation(DeltaMutation):
 
-  def __init__(self, inp, atts):
-    super(self.__class__, self).__init__(inp, atts)
-    self.mut_type = "ext"
+    def __init__(self, inp, atts):
+        super(self.__class__, self).__init__(inp, atts)
+        self.mut_type = "ext"
 
-  def inv(self):
-    self.mut_type = "con"
-    t = self.atts["new"]
-    self.atts["new"] = self.atts["old"]
-    self.atts["old"] = t
+    def inv(self):
+        self.mut_type = "con"
+        t = self.atts["new"]
+        self.atts["new"] = self.atts["old"]
+        self.atts["old"] = t
 
 
 class Mutator:
-  def __init__(self, input):
-    self.i = 0
-    self.input = input.copy()
-    self.input_len = len(input)
 
-    if   isinstance(input, Input.Arg):
-      self.array = map(chr, range(1, 256))
-    elif isinstance(input, Input.File):
-      self.array = map(chr, range(0, 256))
+    def __init__(self, input):
+        self.i = 0
+        self.input = input.copy()
+        self.input_len = len(input)
+
+        if isinstance(input, Input.Arg):
+            self.array = map(chr, range(1, 256))
+        elif isinstance(input, Input.File):
+            self.array = map(chr, range(0, 256))
+
+        self.array_len = len(self.array)
 
-    self.array_len = len(self.array)
+    # def GetDelta(self):
 
-  #def GetDelta(self):
+    def Mutate(self):
+        assert(0)
 
-  def Mutate(self):
-    assert(0)
-  def GetData(self):
-    return None
-  def GetDelta(self):
-    assert(0)
+    def GetData(self):
+        return None
+
+    def GetDelta(self):
+        assert(0)
 
 
 class RandomExpanderMutator(Mutator):
 
-  max_expansion = 10000
+    max_expansion = 10000
+
+    def __iter__(self):
+        return self
+
+    def next(self):
+
+        assert(self.input_len > 0)
 
-  def __iter__(self):
-    return self
+        input = self.input.copy()
+        delta = str(self.input.GetType()) + " "
 
-  def next(self):
+        # expansion mutation
+        i = random.randrange(self.input_len)
+        j = random.randrange(self.max_expansion)
+        m = self.array[random.randrange(self.array_len)]
 
-    assert(self.input_len > 0)
+        # print self.array[rand]
+        input.data = input.data[:i] + m * j + input.data[i + 1:]
 
-    input = self.input.copy()
-    delta = str(self.input.GetType())+" "
-    
-    # expansion mutation
-    i = random.randrange(self.input_len)
-    j = random.randrange(self.max_expansion)
-    m = self.array[random.randrange(self.array_len)]
+        rpos = int(i / (float(self.input_len)) * 100.0)
+        rsize = j / 100 * 100
+        self.delta = ByteExtensionDeltaMutation(input, dict(
+            pos=rpos, size=rsize, old=ord(self.input.data[i]), new=ord(m)))
 
-    #print self.array[rand]
-    input.data = input.data[:i] + m*j + input.data[i+1:]
+        return input
 
-      
-    rpos = int(i/(float(self.input_len))*100.0) 
-    rsize = j/100*100
-    self.delta = ByteExtensionDeltaMutation(input,  dict(pos = rpos, size = rsize, old = ord(self.input.data[i]), new = ord(m) )) 
- 
-    return input
+    def GetInput(self):
+        return self.input.copy()
 
-  def GetInput(self):
-    return self.input.copy()
+    def GetDelta(self):
+        return self.delta
 
-  def GetDelta(self):
-    return self.delta
 
 class RandomByteMutator(Mutator):
 
-  def __iter__(self):
-    return self
+    def __iter__(self):
+        return self
 
-  def next(self):
+    def next(self):
 
-    assert(self.input_len > 0)
+        assert(self.input_len > 0)
 
-    input = self.input.copy()
-    delta = str(self.input.GetType())+" "
- 
-    # single byte mutation
-    i = random.randrange(self.input_len)
-    #m = self.array[random.randrange(self.array_len)]
-    m = ord(input.data[i]) ^ (1 << random.randrange(7))
-    input.data = input.data[:i] + chr(m) + input.data[i+1:]
-      
-    rpos = int(i/(float(self.input_len))*100.0) 
-    self.delta = None#OneByteDeltaMutation(input, dict(pos = rpos, old = ord(self.input.data[i]), new=ord(m))) 
-    return input
+        input = self.input.copy()
+        delta = str(self.input.GetType()) + " "
 
-  def GetInput(self):
-    return self.input.copy()
+        # single byte mutation
+        i = random.randrange(self.input_len)
+        #m = self.array[random.randrange(self.array_len)]
+        m = ord(input.data[i]) ^ (1 << random.randrange(7))
+        input.data = input.data[:i] + chr(m) + input.data[i + 1:]
 
-  def GetDelta(self):
-    return self.delta
+        rpos = int(i / (float(self.input_len)) * 100.0)
+        # OneByteDeltaMutation(input, dict(pos = rpos, old = ord(self.input.data[i]), new=ord(m)))
+        self.delta = None
+        return input
+
+    def GetInput(self):
+        return self.input.copy()
+
+    def GetDelta(self):
+        return self.delta
 
 
 class NullMutator(Mutator):
 
-  def __iter__(self):
-    return self
+    def __iter__(self):
+        return self
 
-  def next(self):
+    def next(self):
 
-    input = self.input.copy()
-    return input
+        input = self.input.copy()
+        return input
 
-  def GetInput(self):
-    return self.input.copy()
+    def GetInput(self):
+        return self.input.copy()
 
-  #def GetData(self):
+    # def GetData(self):
 
-  def GetDelta(self):
-    return NullDeltaMutation()
+    def GetDelta(self):
+        return NullDeltaMutation()
 
 
 class RandomInputMutator:
-  def __init__(self, inputs, mutator):
-    assert(inputs <> [])
-    self.i = 0
-    self.inputs = map(mutator, inputs)
-    self.inputs_len = len(self.inputs)
-  
-  def __iter__(self):
-    return self
-
-  def next(self, mutate = True):
-    r = []
-    delta = None
-    symb_inputs = filter(lambda (_,x): x.input.isSymbolic() and x.input.GetType() == "file", enumerate(self.inputs))
-    symb_inputs_len = len(symb_inputs)
-    
-    self.i = symb_inputs[random.randrange(symb_inputs_len)][0]
-
-    for j, m in enumerate(self.inputs):
-      if self.i == j:
-        r.append(m.next())
-        #data = input.PrepareData()
-        delta = m.GetDelta()
-
-      else:
-        r.append(m.GetInput()) 
-        #data = input.PrepareData()
-
-      #if data:
-      #  r.append(data)
-
-    return delta, r
 
+    def __init__(self, inputs, mutator):
+        assert(inputs != [])
+        self.i = 0
+        self.inputs = map(mutator, inputs)
+        self.inputs_len = len(self.inputs)
+
+    def __iter__(self):
+        return self
+
+    def next(self, mutate=True):
+        r = []
+        delta = None
+        symb_inputs = filter(lambda __x: __x[1].input.isSymbolic(
+        ) and __x[1].input.GetType() == "file", enumerate(self.inputs))
+        symb_inputs_len = len(symb_inputs)
+
+        self.i = symb_inputs[random.randrange(symb_inputs_len)][0]
+
+        for j, m in enumerate(self.inputs):
+            if self.i == j:
+                r.append(m.next())
+                #data = input.PrepareData()
+                delta = m.GetDelta()
+
+            else:
+                r.append(m.GetInput())
+                #data = input.PrepareData()
+
+            # if data:
+            #  r.append(data)
+
+        return delta, r
diff --git a/vdiscover/Pipeline.py b/vdiscover/Pipeline.py
index cc7fc96..92e3780 100644
--- a/vdiscover/Pipeline.py
+++ b/vdiscover/Pipeline.py
@@ -18,10 +18,10 @@
 """
 import os
 
-from sklearn.base import  BaseEstimator, TransformerMixin
+from sklearn.base import BaseEstimator, TransformerMixin
 from sklearn.pipeline import Pipeline
 from sklearn.ensemble import RandomForestClassifier
-from sklearn.naive_bayes import GaussianNB,  MultinomialNB
+from sklearn.naive_bayes import GaussianNB, MultinomialNB
 from sklearn.linear_model import LogisticRegression
 from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
 from sklearn.decomposition import PCA, TruncatedSVD
@@ -29,11 +29,14 @@
 
 from random import random, randint, sample, gauss, shuffle
 
-def static_tokenizer(s):
-    return filter(lambda x: x<>'', s.split(" "))
 
-def dynamic_tokenizer(s):
-    return filter(lambda x: x<>'', s.split(" "))
+def staticTokenizer(s):
+    return filter(lambda x: x != '', s.split(" "))
+
+
+def dynamicTokenizer(s):
+    return filter(lambda x: x != '', s.split(" "))
+
 
 class DenseTransformer(TransformerMixin):
 
@@ -65,6 +68,7 @@ def transform(self, data_dict):
     def get_params(self, deep=True):
         return []
 
+
 class CutoffMax(BaseEstimator, TransformerMixin):
 
     def __init__(self, maxv):
@@ -83,468 +87,482 @@ def get_params(self, deep=True):
         return []
 
 
+def makeTrainPipelineBOW(ftype):
 
-def make_train_pipeline(ftype):
-
-  if ftype is "dynamic":
-
-    realpath = os.path.dirname(os.path.realpath(__file__))
-    f = open(realpath+"/data/dyn_events.dic")
-
-    event_dict = []
-
-    for line in f.readlines():
-        event_dict.append(line.replace("\n",""))
-
-    return Pipeline(steps=[
-         ('selector', ItemSelector(key='dynamic')),
-         ('dvectorizer', CountVectorizer(tokenizer=dynamic_tokenizer, ngram_range=(1,3), lowercase=False, vocabulary=event_dict)),
-         ('todense', DenseTransformer()),
-         ('cutfoff', CutoffMax(16)),
-         ('classifier', RandomForestClassifier(n_estimators=1000, max_features=None, max_depth=100))
-         #('classifier',  GaussianNB())
-
-    ])
-  elif ftype is "static":
-    return Pipeline(steps=[
-         ('selector', ItemSelector(key='static')),
-         ('dvectorizer', CountVectorizer(tokenizer=static_tokenizer, ngram_range=(1,1), lowercase=False)),
-         ('todense', DenseTransformer()),
-         ('classifier', LogisticRegression(penalty="l2", C=1e-07, tol=1e-06))
-    ])
-  else:
-    assert(0)
-
-def make_cluster_pipeline_bow(ftype, rdim):
-  if ftype is "dynamic" and rdim == "pca":
-
-    return Pipeline(steps=[
-         ('selector', ItemSelector(key='dynamic')),
-         ('dvectorizer', TfidfVectorizer(tokenizer=dynamic_tokenizer, use_idf=False, norm=None, ngram_range=(1,1), lowercase=False)),
-         ('todense', DenseTransformer()),
-         #('cutfoff', CutoffMax(16)),
-         ('reducer', PCA(n_components=2)),
-
-    ])
-
-  elif ftype is "dynamic" and rdim == "svd":
-
-    return Pipeline(steps=[
-         ('selector', ItemSelector(key='dynamic')),
-         ('dvectorizer', TfidfVectorizer(tokenizer=dynamic_tokenizer, use_idf=False, norm=None, ngram_range=(1,1), lowercase=False)),
-         ('todense', DenseTransformer()),
-         #('cutfoff', CutoffMax(16)),
-         ('reducer', TruncatedSVD(n_components=2)),
-
-    ])
-
-  elif ftype is "dynamic" and rdim == "none":
-
-    return Pipeline(steps=[
-         ('selector', ItemSelector(key='dynamic')),
-         ('dvectorizer', TfidfVectorizer(tokenizer=dynamic_tokenizer, use_idf=False, norm=None, ngram_range=(1,1), lowercase=False)),
-         ('todense', DenseTransformer()),
-         #('cutfoff', CutoffMax(16)),
-    ])
-
-  elif ftype is "static":
-    return Pipeline(steps=[
-         ('selector', ItemSelector(key='static')),
-         ('dvectorizer', TfidfVectorizer(tokenizer=dynamic_tokenizer, use_idf=False, norm=None, ngram_range=(1,1), lowercase=False)),
-         ('todense', DenseTransformer()),
-         ('cutfoff', CutoffMax(16)),
-         ('reducer', PCA(n_components=2)),
-    ])
-  else:
-    assert(0)
-
-
-def make_cluster_pipeline_doc2vec(ftype, rdim):
-  if ftype is "dynamic" and rdim == "pca":
-    return Pipeline(steps=[
-         ('selector', ItemSelector(key='dynamic')),
-         ('reducer', PCA(n_components=2)),
-    ])
-  elif ftype is "dynamic" and rdim == "svd":
-    return Pipeline(steps=[
-         ('selector', ItemSelector(key='dynamic')),
-         ('reducer', TruncatedSVD(n_components=2)),
-    ])
-  elif ftype is "dynamic" and rdim == "none":
-    return Pipeline(steps=[
-         ('selector', ItemSelector(key='dynamic'))
-    ])
-  elif ftype is "static":
-    raise NotImplemented
-  else:
-    assert(0)
-
-
-
-def make_cluster_pipeline_subtraces(ftype):
-  if ftype is "dynamic":
-    return Pipeline(steps=[
-         ('selector', ItemSelector(key='dynamic')),
-         ('reducer', PCA(n_components=12)),
-    ])
-  elif ftype is "static":
-    raise NotImplemented
-  else:
-    assert(0)
-
-def make_cluster_cnn(mode, max_features, maxlen, embedding_dims, nb_filters, filter_length, hidden_dims, nb_classes, weights=None):
-
-  #print mode, max_features, maxlen, embedding_dims, nb_filters, filter_length, hidden_dims, nb_classes
-  from keras.preprocessing import sequence
-  from keras.optimizers import RMSprop
-  from keras.models import Sequential
-  from keras.layers.core import Dense, Dropout, Activation, Flatten
-  from keras.layers.embeddings import Embedding
-  from keras.layers.convolutional import Convolution1D, MaxPooling1D
-
-  print('Build model...')
-  model = Sequential()
-
-  # we start off with an efficient embedding layer which maps
-  # our vocab indices into embedding_dims dimensions
-  if mode == "train":
-    model.add(Embedding(max_features, embedding_dims, input_length=maxlen))
-  elif mode == "test":
-    model.add(Embedding(max_features, embedding_dims, input_length=maxlen, weights=weights[0]))
-
-  model.add(Dropout(0.25))
-
-  # we add a Convolution1D, which will learn nb_filters
-  # word group filters of size filter_length:
-  if mode == "train":
-    model.add(Convolution1D(nb_filter=nb_filters,
-                        filter_length=filter_length,
-                        border_mode='valid',
-                        activation='relu',
-                        subsample_length=1))
-
-  elif mode == "test":
-    model.add(Convolution1D(nb_filter=nb_filters,
-                        filter_length=filter_length,
-                        border_mode='valid',
-                        activation='relu',
-                        subsample_length=1,
-                        weights=weights[2]))
-
-
-  # we use standard max pooling (halving the output of the previous layer):
-  model.add(MaxPooling1D(pool_length=2))
-
-  # We flatten the output of the conv layer, so that we can add a vanilla dense layer:
-  model.add(Flatten())
-
-  # Computing the output shape of a conv layer can be tricky;
-  # for a good tutorial, see: http://cs231n.github.io/convolutional-networks/
-  output_size = nb_filters * (((maxlen - filter_length) / 1) + 1) / 2
-  #print output_size, hidden_dims
-
-  # We add a vanilla hidden layer:
-  if mode == "train":
-    model.add(Dense(hidden_dims))
-  if mode == "test":
-    model.add(Dense(hidden_dims, weights=weights[5]))
-
-  if mode == "train":
+    if ftype is "dynamic":
 
-    model.add(Dropout(0.25))
-    model.add(Activation('relu'))
+        realpath = os.path.dirname(os.path.realpath(__file__))
+        f = open(realpath + "/data/dyn_events.dic")
 
-    # We project onto a single unit output layer, and squash it with a sigmoid:
-    model.add(Dense(nb_classes))
+        event_dict = []
 
-    model.add(Activation('softmax'))
-    model.compile(loss='categorical_crossentropy', optimizer='rmsprop', class_mode="categorical")
+        for line in f.readlines():
+            event_dict.append(line.replace("\n", ""))
 
-  elif mode == "test":
-    model.compile(loss='mean_squared_error', optimizer='rmsprop')
+        return Pipeline(steps=[
+            ('selector', ItemSelector(key='dynamic')),
+            ('dvectorizer', CountVectorizer(tokenizer=dynamicTokenizer,
+                                            ngram_range=(1, 3), lowercase=False, vocabulary=event_dict)),
+            ('todense', DenseTransformer()),
+            ('cutfoff', CutoffMax(16)),
+            ('classifier', RandomForestClassifier(
+                n_estimators=1000, max_features=None, max_depth=100))
+            #('classifier',  GaussianNB())
 
+        ])
+    elif ftype is "static":
+        return Pipeline(steps=[
+            ('selector', ItemSelector(key='static')),
+            ('dvectorizer', CountVectorizer(
+                tokenizer=static_tokenizer, ngram_range=(1, 1), lowercase=False)),
+            ('todense', DenseTransformer()),
+            ('classifier', LogisticRegression(penalty="l2", C=1e-07, tol=1e-06))
+        ])
+    else:
+        assert(0)
+
+
+def makeClusterPipelineBOW(ftype, rdim):
+    if ftype is "dynamic" and rdim == "pca":
+
+        return Pipeline(steps=[
+            ('selector', ItemSelector(key='dynamic')),
+            ('dvectorizer', TfidfVectorizer(tokenizer=dynamicTokenizer,
+                                            use_idf=False, norm=None, ngram_range=(1, 1), lowercase=False)),
+            ('todense', DenseTransformer()),
+            #('cutfoff', CutoffMax(16)),
+            ('reducer', PCA(n_components=2)),
+
+        ])
+
+    elif ftype is "dynamic" and rdim == "svd":
+
+        return Pipeline(steps=[
+            ('selector', ItemSelector(key='dynamic')),
+            ('dvectorizer', TfidfVectorizer(tokenizer=dynamicTokenizer,
+                                            use_idf=False, norm=None, ngram_range=(1, 1), lowercase=False)),
+            ('todense', DenseTransformer()),
+            #('cutfoff', CutoffMax(16)),
+            ('reducer', TruncatedSVD(n_components=2)),
+
+        ])
+
+    elif ftype is "dynamic" and rdim == "none":
+
+        return Pipeline(steps=[
+            ('selector', ItemSelector(key='dynamic')),
+            ('dvectorizer', TfidfVectorizer(tokenizer=dynamic_tokenizer,
+                                            use_idf=False, norm=None, ngram_range=(1, 1), lowercase=False)),
+            ('todense', DenseTransformer()),
+            #('cutfoff', CutoffMax(16)),
+        ])
+
+    elif ftype is "static":
+        return Pipeline(steps=[
+            ('selector', ItemSelector(key='static')),
+            ('dvectorizer', TfidfVectorizer(tokenizer=dynamic_tokenizer,
+                                            use_idf=False, norm=None, ngram_range=(1, 1), lowercase=False)),
+            ('todense', DenseTransformer()),
+            ('cutfoff', CutoffMax(16)),
+            ('reducer', PCA(n_components=2)),
+        ])
+    else:
+        assert(0)
+
+
+def makeClusterPipelineDoc2vec(ftype, rdim):
+    if ftype is "dynamic" and rdim == "pca":
+        return Pipeline(steps=[
+            ('selector', ItemSelector(key='dynamic')),
+            ('reducer', PCA(n_components=2)),
+        ])
+    elif ftype is "dynamic" and rdim == "svd":
+        return Pipeline(steps=[
+            ('selector', ItemSelector(key='dynamic')),
+            ('reducer', TruncatedSVD(n_components=2)),
+        ])
+    elif ftype is "dynamic" and rdim == "none":
+        return Pipeline(steps=[
+            ('selector', ItemSelector(key='dynamic'))
+        ])
+    elif ftype is "static":
+        raise NotImplemented
+    else:
+        assert(0)
 
-  return model
 
+def makeClusterPipelineSubtraces(ftype):
+    if ftype is "dynamic":
+        return Pipeline(steps=[
+            ('selector', ItemSelector(key='dynamic')),
+            ('reducer', PCA(n_components=12)),
+        ])
+    elif ftype is "static":
+        raise NotImplemented
+    else:
+        assert(0)
 
+"""
+def make_cluster_cnn(
+        mode,
+        max_features,
+        maxlen,
+        embedding_dims,
+        nb_filters,
+        filter_length,
+        hidden_dims,
+        nb_classes,
+        weights=None):
+
+    # print mode, max_features, maxlen, embedding_dims, nb_filters,
+    # filter_length, hidden_dims, nb_classes
+    from keras.preprocessing import sequence
+    from keras.optimizers import RMSprop
+    from keras.models import Sequential
+    from keras.layers.core import Dense, Dropout, Activation, Flatten
+    from keras.layers.embeddings import Embedding
+    from keras.layers.convolutional import Convolution1D, MaxPooling1D
+
+    print('Build model...')
+    model = Sequential()
+
+    # we start off with an efficient embedding layer which maps
+    # our vocab indices into embedding_dims dimensions
+    if mode == "train":
+        model.add(Embedding(max_features, embedding_dims, input_length=maxlen))
+    elif mode == "test":
+        model.add(Embedding(max_features, embedding_dims,
+                            input_length=maxlen, weights=weights[0]))
 
-try:
-  from keras.preprocessing import sequence
-except:
-  pass
+    model.add(Dropout(0.25))
 
+    # we add a Convolution1D, which will learn nb_filters
+    # word group filters of size filter_length:
+    if mode == "train":
+        model.add(Convolution1D(nb_filter=nb_filters,
+                                filter_length=filter_length,
+                                border_mode='valid',
+                                activation='relu',
+                                subsample_length=1))
 
-class DeepReprPreprocessor:
+    elif mode == "test":
+        model.add(Convolution1D(nb_filter=nb_filters,
+                                filter_length=filter_length,
+                                border_mode='valid',
+                                activation='relu',
+                                subsample_length=1,
+                                weights=weights[2]))
 
-  def __init__(self, tokenizer, max_len, batch_size):
-    self.tokenizer = tokenizer
-    self.max_len = max_len
-    self.batch_size = batch_size
+    # we use standard max pooling (halving the output of the previous layer):
+    model.add(MaxPooling1D(pool_length=2))
 
-  def preprocess_traces(self, X_data, y_data=None, labels=None):
+    # We flatten the output of the conv layer, so that we can add a vanilla
+    # dense layer:
+    model.add(Flatten())
 
-    cut_X_data = []
-    cut_label_data = []
-    cut_y_data = []
-    #rep = 5
+    # Computing the output shape of a conv layer can be tricky;
+    # for a good tutorial, see: http://cs231n.github.io/convolutional-networks/
+    output_size = nb_filters * (((maxlen - filter_length) / 1) + 1) / 2
+    # print output_size, hidden_dims
 
-    X_size = len(X_data)
+    # We add a vanilla hidden layer:
+    if mode == "train":
+        model.add(Dense(hidden_dims))
+    if mode == "test":
+        model.add(Dense(hidden_dims, weights=weights[5]))
 
-    for i,x in enumerate(X_data):
+    if mode == "train":
 
-      #i = randint(0, X_size-1)
+        model.add(Dropout(0.25))
+        model.add(Activation('relu'))
 
-      raw_trace = x[:-1]
-      trace = raw_trace.split(" ")
+        # We project onto a single unit output layer, and squash it with a
+        # sigmoid:
+        model.add(Dense(nb_classes))
 
-      size = len(trace)
-      rep = 1 + int(float(size) / float(self.max_len))
-      rep = min(rep, 10)
+        model.add(Activation('softmax'))
+        model.compile(loss='categorical_crossentropy',
+                      optimizer='rmsprop', class_mode="categorical")
 
-      for _ in range(rep):
+    elif mode == "test":
+        model.compile(loss='mean_squared_error', optimizer='rmsprop')
 
-        start = size - (self.max_len)
-        start = randint(0, max(start,0))
+    return model
 
-        new_trace = " ".join(trace[start:(start+self.max_len)])
-        #print "sizes:", size, len(trace[start:(start+self.max_len)])
+try:
+    from keras.preprocessing import sequence
+except:
+    pass
 
-        cut_X_data.append(new_trace)
+class DeepReprPreprocessor:
 
-        if labels is not None:
-          cut_label_data.append(labels[i])
-        else:
-          cut_label_data.append("+"+str(size))
+    def __init__(self, tokenizer, max_len, batch_size):
+        self.tokenizer = tokenizer
+        self.max_len = max_len
+        self.batch_size = batch_size
 
-        if y_data is not None:
-          cut_y_data.append(y_data[i])
-        else:
-          cut_y_data.append(0)
+    def preprocess_traces(self, X_data, y_data=None, labels=None):
 
-    X_train = self.tokenizer.texts_to_sequences(cut_X_data)
-    labels = cut_label_data
-    y_train = cut_y_data
-    X_train,y_train,labels = zip(*filter(lambda (x,y,z): not (x == []), zip(X_train,y_train,labels)))
+        cut_X_data = []
+        cut_label_data = []
+        cut_y_data = []
+        #rep = 5
 
+        X_size = len(X_data)
 
-    X_size = len(X_train)
-    X_train = sequence.pad_sequences(X_train, maxlen=self.max_len)
-    return X_train, y_train, labels
+        for i, x in enumerate(X_data):
 
-  def preprocess(self, X_data, cut_size=1):
+            #i = randint(0, X_size-1)
 
-    cut_X_data = []
-    cut_y_data = []
-    self.classes = []
-    X_size = len(X_data)
-    stats = dict()
+            raw_trace = x[:-1]
+            trace = raw_trace.split(" ")
 
-    for _ in xrange(1000):
+            size = len(trace)
+            rep = 1 + int(float(size) / float(self.max_len))
+            rep = min(rep, 10)
 
-      i = randint(0, X_size-1)
+            for _ in range(rep):
 
-      raw_trace = X_data[i][:-1]
-      trace = raw_trace.split(" ")
+                start = size - (self.max_len)
+                start = randint(0, max(start, 0))
 
-      size = len(trace)
+                new_trace = " ".join(trace[start:(start + self.max_len)])
+                # print "sizes:", size, len(trace[start:(start+self.max_len)])
 
-      if size <= (self.max_len + 1):
-          start = 0
-          end = size - 2
-          new_trace = " ".join(trace[start:(end+1)])
-          last_event = trace[(end+1)].split(":")
-          cut_y_data.append(last_event[0])
-      else:
-          #print size
-          start = size - (self.max_len) - 2
-          start = randint(0, start)
-          end = start + self.max_len
-          #print len(trace[start:end])
-          #new_trace = " ".join(trace[start:end])
+                cut_X_data.append(new_trace)
 
-          #start = randint(0, size-2)
-          #end = randint(start, size-2)
+                if labels is not None:
+                    cut_label_data.append(labels[i])
+                else:
+                    cut_label_data.append("+" + str(size))
 
-          new_trace = " ".join(trace[start:(end+1)])
-          last_event = trace[end+1].split(":")
-          cut_y_data.append(last_event[0])
+                if y_data is not None:
+                    cut_y_data.append(y_data[i])
+                else:
+                    cut_y_data.append(0)
 
+        X_train = self.tokenizer.texts_to_sequences(cut_X_data)
+        labels = cut_label_data
+        y_train = cut_y_data
+        X_train, y_train, labels = zip(
+            *filter(lambda x_y_z: not (x_y_z[0] == []), zip(X_train, y_train, labels)))
 
-    for y in set(cut_y_data):
-      stats[y] = float(cut_y_data.count(y)) / len(cut_y_data)
+        X_size = len(X_train)
+        X_train = sequence.pad_sequences(X_train, maxlen=self.max_len)
+        return X_train, y_train, labels
 
-    print stats, sum(stats.values())
-    #assert(0)
-    cut_y_data = []
-    for _ in xrange(cut_size):
+    def preprocess(self, X_data, cut_size=1):
 
-      i = randint(0, X_size-1)
+        cut_X_data = []
+        cut_y_data = []
+        self.classes = []
+        X_size = len(X_data)
+        stats = dict()
 
-      raw_trace = X_data[i][:-1]
-      trace = raw_trace.split(" ")
-      size = len(trace)
+        for _ in xrange(1000):
 
+            i = randint(0, X_size - 1)
 
-      if size <= (self.max_len + 1):
-          start = 0
-          end = size - 2
-          new_trace = " ".join(trace[start:(end+1)])
-          last_event = trace[(end+1)].split(":")
-      else:
-          #print size
-          start = size - (self.max_len) - 2
-          start = randint(0, start)
-          end = start + self.max_len
-          #print len(trace[start:end])
-          #new_trace = " ".join(trace[start:end])
+            raw_trace = X_data[i][:-1]
+            trace = raw_trace.split(" ")
 
-          #start = randint(0, size-2)
-          #end = randint(start, size-2)
+            size = len(trace)
 
-          new_trace = " ".join(trace[start:(end+1)])
-          last_event = trace[end+1].split(":")
+            if size <= (self.max_len + 1):
+                start = 0
+                end = size - 2
+                new_trace = " ".join(trace[start:(end + 1)])
+                last_event = trace[(end + 1)].split(":")
+                cut_y_data.append(last_event[0])
+            else:
+                # print size
+                start = size - (self.max_len) - 2
+                start = randint(0, start)
+                end = start + self.max_len
+                # print len(trace[start:end])
+                #new_trace = " ".join(trace[start:end])
 
-      cl = last_event[0]
+                #start = randint(0, size-2)
+                #end = randint(start, size-2)
 
-      if cl not in self.classes:
-        self.classes.append(cl)
-        stats[cl] = 0.0
-      else:
-        if random() <= stats[cl]:
-          continue
+                new_trace = " ".join(trace[start:(end + 1)])
+                last_event = trace[end + 1].split(":")
+                cut_y_data.append(last_event[0])
 
-      cut_X_data.append(new_trace)
-      cut_y_data.append(self.classes.index(cl))
+        for y in set(cut_y_data):
+            stats[y] = float(cut_y_data.count(y)) / len(cut_y_data)
 
-    X_train = self.tokenizer.texts_to_sequences(cut_X_data)
+        print stats, sum(stats.values())
+        # assert(0)
+        cut_y_data = []
+        for _ in xrange(cut_size):
 
-    y_train = []
+            i = randint(0, X_size - 1)
 
-    for y in cut_y_data:
-        v = [0]*len(self.classes)
-        v[y] = 1
-        y_train.append(v)
+            raw_trace = X_data[i][:-1]
+            trace = raw_trace.split(" ")
+            size = len(trace)
 
-    X_train = filter(lambda x: not (x == []), X_train)
+            if size <= (self.max_len + 1):
+                start = 0
+                end = size - 2
+                new_trace = " ".join(trace[start:(end + 1)])
+                last_event = trace[(end + 1)].split(":")
+            else:
+                # print size
+                start = size - (self.max_len) - 2
+                start = randint(0, start)
+                end = start + self.max_len
+                # print len(trace[start:end])
+                #new_trace = " ".join(trace[start:end])
 
-    X_size = len(X_train)
-    X_train = X_train[:(X_size-(X_size % self.batch_size))]
-    X_train = sequence.pad_sequences(X_train, maxlen=self.max_len)
+                #start = randint(0, size-2)
+                #end = randint(start, size-2)
 
-    if y_train is not None:
-      y_train = y_train[:(X_size-(X_size % self.batch_size))]
-      return X_train,y_train
-    else:
-      return X_train
+                new_trace = " ".join(trace[start:(end + 1)])
+                last_event = trace[end + 1].split(":")
 
+            cl = last_event[0]
+
+            if cl not in self.classes:
+                self.classes.append(cl)
+                stats[cl] = 0.0
+            else:
+                if random() <= stats[cl]:
+                    continue
+
+            cut_X_data.append(new_trace)
+            cut_y_data.append(self.classes.index(cl))
+
+        X_train = self.tokenizer.texts_to_sequences(cut_X_data)
+
+        y_train = []
+
+        for y in cut_y_data:
+            v = [0] * len(self.classes)
+            v[y] = 1
+            y_train.append(v)
+
+        X_train = filter(lambda x: not (x == []), X_train)
+
+        X_size = len(X_train)
+        X_train = X_train[:(X_size - (X_size % self.batch_size))]
+        X_train = sequence.pad_sequences(X_train, maxlen=self.max_len)
+
+        if y_train is not None:
+            y_train = y_train[:(X_size - (X_size % self.batch_size))]
+            return X_train, y_train
+        else:
+            return X_train
 
 
 class KerasPreprocessor:
 
-  def __init__(self, tokenizer, max_len, batch_size):
-    self.tokenizer = tokenizer
-    self.max_len = max_len
-    self.batch_size = batch_size
-
-  def preprocess(self, X_data, y_data=None, cut_size=1):
+    def __init__(self, tokenizer, max_len, batch_size):
+        self.tokenizer = tokenizer
+        self.max_len = max_len
+        self.batch_size = batch_size
 
-    cut_X_data = []
-    cut_y_data = []
-    X_size = len(X_data)
+    def preprocess(self, X_data, y_data=None, cut_size=1):
 
-    for _ in xrange(cut_size):
+        cut_X_data = []
+        cut_y_data = []
+        X_size = len(X_data)
 
-      i = randint(0, X_size-1)
+        for _ in xrange(cut_size):
 
-      raw_trace = X_data[i]
-      trace = raw_trace.split(" ")
+            i = randint(0, X_size - 1)
 
-      size = len(trace)
+            raw_trace = X_data[i]
+            trace = raw_trace.split(" ")
 
-      start = randint(0, size-1)
-      end = start + randint(0, self.max_len)
+            size = len(trace)
 
-      new_trace = " ".join(trace[start:(end+1)])
-      cut_X_data.append(new_trace)
+            start = randint(0, size - 1)
+            end = start + randint(0, self.max_len)
 
-      if y_data is not None:
-        y = y_data[i]
-        cut_y_data.append(y)
+            new_trace = " ".join(trace[start:(end + 1)])
+            cut_X_data.append(new_trace)
 
-    X_train = self.tokenizer.texts_to_sequences(cut_X_data)
-    y_train = cut_y_data
+            if y_data is not None:
+                y = y_data[i]
+                cut_y_data.append(y)
 
-    if y_train is not None:
-      X_train,y_train = zip(*filter(lambda (x,y): not (x == []), zip(X_train,y_train)))
-    else:
-      X_train = filter(lambda x: not (x == []), X_train)
+        X_train = self.tokenizer.texts_to_sequences(cut_X_data)
+        y_train = cut_y_data
 
+        if y_train is not None:
+            X_train, y_train = zip(
+                *filter(lambda x_y: not (x_y[0] == []), zip(X_train, y_train)))
+        else:
+            X_train = filter(lambda x: not (x == []), X_train)
 
-    X_size = len(X_train)
-    X_train = X_train[:(X_size-(X_size % self.batch_size))]
-    X_train = sequence.pad_sequences(X_train, maxlen=self.max_len)
+        X_size = len(X_train)
+        X_train = X_train[:(X_size - (X_size % self.batch_size))]
+        X_train = sequence.pad_sequences(X_train, maxlen=self.max_len)
 
-    if y_train is not None:
-      y_train = y_train[:(X_size-(X_size % self.batch_size))]
-      return X_train,y_train
-    else:
-      return X_train
+        if y_train is not None:
+            y_train = y_train[:(X_size - (X_size % self.batch_size))]
+            return X_train, y_train
+        else:
+            return X_train
 
+    def preprocess_one(self, raw_trace, sample_size=100):
 
-  def preprocess_one(self, raw_trace, sample_size=100):
+        trace = raw_trace.split(" ")
+        size = len(trace)
+        cut_X_data = []
+        # print trace
 
-    trace = raw_trace.split(" ")
-    size = len(trace)
-    cut_X_data = []
-    #print trace
+        for _ in xrange(sample_size):
 
-    for _ in xrange(sample_size):
+            start = randint(0, size - 1)
+            end = start + randint(0, self.max_len)
 
-      start = randint(0, size-1)
-      end = start + randint(0, self.max_len)
+            new_trace = " ".join(trace[start:(end + 1)])
+            cut_X_data.append(new_trace)
 
-      new_trace = " ".join(trace[start:(end+1)])
-      cut_X_data.append(new_trace)
+        X_train = self.tokenizer.texts_to_sequences(cut_X_data)
+        X_train = filter(lambda x: not (x == []), X_train)
 
-    X_train = self.tokenizer.texts_to_sequences(cut_X_data)
-    X_train = filter(lambda x: not (x == []), X_train)
+        X_size = len(X_train)
+        X_train = X_train[:(X_size - (X_size % self.batch_size))]
+        # print "X_size", X_size-(X_size % self.batch_size)
 
-    X_size = len(X_train)
-    X_train = X_train[:(X_size-(X_size % self.batch_size))]
-    #print "X_size", X_size-(X_size % self.batch_size)
+        X_train = sequence.pad_sequences(X_train, maxlen=self.max_len)
+        return X_train
 
-    X_train = sequence.pad_sequences(X_train, maxlen=self.max_len)
-    return X_train
 
 class KerasPredictor:
 
-    def __init__(self,preprocessor, model, ftype):
-      self.preprocessor = preprocessor
-      self.batch_size = preprocessor.batch_size
-      self.ftype = ftype
-      self.model = model
+    def __init__(self, preprocessor, model, ftype):
+        self.preprocessor = preprocessor
+        self.batch_size = preprocessor.batch_size
+        self.ftype = ftype
+        self.model = model
 
     def predict(self, X_data):
-      X_size = len(X_data)
-      X_data = X_data[self.ftype]
-      X_predictions = []
+        X_size = len(X_data)
+        X_data = X_data[self.ftype]
+        X_predictions = []
 
-      for raw_trace in X_data:
+        for raw_trace in X_data:
 
-        trace_data = self.preprocessor.preprocess_one(raw_trace)
+            trace_data = self.preprocessor.preprocess_one(raw_trace)
 
-        if len(trace_data) > 0:
-          predictions = self.model.predict(trace_data, verbose=0, batch_size=self.batch_size)
-        else: # imposible to predict
-          predictions = [0]
+            if len(trace_data) > 0:
+                predictions = self.model.predict(
+                    trace_data, verbose=0, batch_size=self.batch_size)
+            else:  # imposible to predict
+                predictions = [0]
 
-        avg_predictions = sum(predictions)/100.0
-        #print predictions, avg_predictions
-        if avg_predictions > 0.5:
-          X_predictions.append(1)
-        else:
-          X_predictions.append(0)
-
-      return X_predictions
+            avg_predictions = sum(predictions) / 100.0
+            # print predictions, avg_predictions
+            if avg_predictions > 0.5:
+                X_predictions.append(1)
+            else:
+                X_predictions.append(0)
 
+        return X_predictions
 
+"""
diff --git a/vdiscover/Printer.py b/vdiscover/Printer.py
index 83b32da..7d1af84 100644
--- a/vdiscover/Printer.py
+++ b/vdiscover/Printer.py
@@ -21,83 +21,85 @@
 import csv
 import copy
 
-from Event    import Call, Crash, Abort, Exit, Timeout, Signal, Vulnerability, specs
-from Types    import ptypes, isPtr, isNum, ptr32_ptypes, num32_ptypes, generic_ptypes
+from Event import Call, Crash, Abort, Exit, Timeout, Signal, Vulnerability, specs
+from Types import ptypes, isPtr, isNum, ptr32_ptypes, num32_ptypes, generic_ptypes
+
 
 class TypePrinter:
-  def __init__(self, filename, pname, mclass):
-    self.tests = set()
-    self.outfile = open(filename, "a+")
-    self.pname = pname
-    self.mclass = mclass
-    self.csvwriter = csv.writer(self.outfile, delimiter='\t')
 
-  def preprocess(self, event):
+    def __init__(self, filename, pname, mclass):
+        self.tests = set()
+        self.outfile = open(filename, "a+")
+        self.pname = pname
+        self.mclass = mclass
+        self.csvwriter = csv.writer(self.outfile, delimiter='\t')
+
+    def preprocess(self, event):
 
-    r = list()
+        r = list()
 
-    if isinstance(event, Call):
-      (name, args) = event.GetTypedName()
+        if isinstance(event, Call):
+            (name, args) = event.GetTypedName()
 
-      for (index, arg) in enumerate(args[:]):
-        r.append((name+":"+str(index),str(arg)))
+            for (index, arg) in enumerate(args[:]):
+                r.append((name + ":" + str(index), str(arg)))
 
-    elif isinstance(event, Abort):
-      (name, fields) = event.GetTypedName()
-      r.append((name+":eip",str(fields[0])))
+        elif isinstance(event, Abort):
+            (name, fields) = event.GetTypedName()
+            r.append((name + ":eip", str(fields[0])))
 
-    elif isinstance(event, Exit):
-      (name, fields) = event.GetTypedName()
-      r.append((name,str(())))
+        elif isinstance(event, Exit):
+            (name, fields) = event.GetTypedName()
+            r.append((name, str(())))
 
-    elif isinstance(event, Crash):
-      (name, fields) = event.GetTypedName()
-      r.append((name+":eip",str(fields[0])))
+        elif isinstance(event, Crash):
+            (name, fields) = event.GetTypedName()
+            r.append((name + ":eip", str(fields[0])))
 
-    elif isinstance(event, Vulnerability):
-      (name, fields) = event.GetTypedName()
-      r.append((name,str(fields[0])))
+        elif isinstance(event, Vulnerability):
+            (name, fields) = event.GetTypedName()
+            r.append((name, str(fields[0])))
 
-    elif isinstance(event, Timeout):
-      (name, fields) = event.GetTypedName()
-      r.append((name,str(())))
+        elif isinstance(event, Timeout):
+            (name, fields) = event.GetTypedName()
+            r.append((name, str(())))
 
-    elif isinstance(event, Signal):
-      (name, fields) = event.GetTypedName()
+        elif isinstance(event, Signal):
+            (name, fields) = event.GetTypedName()
 
-      if name == "SIGSEGV":
-        r.append((name+":addr",str(fields[0])))
-      else:
-        r.append((name,str(fields[0])))
+            if name == "SIGSEGV":
+                r.append((name + ":addr", str(fields[0])))
+            else:
+                r.append((name, str(fields[0])))
 
-    return r
+        return r
 
-  def print_events(self, label, events):
+    def print_events(self, label, events):
 
-    r = list()
+        r = list()
 
-    for event in events:
-      r = r + list(self.preprocess(event))
+        for event in events:
+            r = r + list(self.preprocess(event))
 
-    events = r
+        events = r
 
-    #x = hash(tuple(events))
+        #x = hash(tuple(events))
 
-    #if (x in self.tests):
-    #  return
+        # if (x in self.tests):
+        #  return
 
-    #self.tests.add(x)
+        # self.tests.add(x)
 
-    trace = ""
+        trace = ""
 
-    for x,y in events:
-      trace = trace + ("%s=%s " % (x,y))
+        for x, y in events:
+            trace = trace + ("%s=%s " % (x, y))
 
-    row = [self.pname+":"+label,trace]
+        row = [self.pname + ":" + label, trace]
 
-    if self.mclass is not None:
-      row.append(self.mclass)
+        if self.mclass is not None:
+            row.append(self.mclass)
 
-    self.csvwriter.writerow(row)
-    self.outfile.flush()
-    return row
+        self.csvwriter.writerow(row)
+        self.outfile.flush()
+        return row
diff --git a/vdiscover/Process.py b/vdiscover/Process.py
index 3fd580a..66fe41e 100644
--- a/vdiscover/Process.py
+++ b/vdiscover/Process.py
@@ -19,14 +19,18 @@
 
 from ptrace import PtraceError
 from ptrace.debugger import (PtraceDebugger, Application,
-    ProcessExit, NewProcessEvent, ProcessSignal,
-    ProcessExecution, ProcessError)
+                             ProcessExit, NewProcessEvent, ProcessSignal,
+                             ProcessExecution, ProcessError)
 
 from logging import getLogger, info, warning, error
 from ptrace.error import PTRACE_ERRORS, PtraceError, writeError
 from ptrace.disasm import HAS_DISASSEMBLER
-from ptrace.ctypes_tools import (truncateWord,
-    formatWordHex, formatAddress, formatAddressRange, word2bytes)
+from ptrace.ctypes_tools import (
+    truncateWord,
+    formatWordHex,
+    formatAddress,
+    formatAddressRange,
+    word2bytes)
 
 from ptrace.signames import signalName, SIGNAMES
 from signal import SIGTRAP, SIGALRM, SIGABRT, SIGSEGV, SIGILL, SIGCHLD, SIGWINCH, SIGFPE, SIGBUS, SIGTERM, SIGPIPE, signal, alarm
@@ -44,8 +48,19 @@
 from MemoryMap import MemoryMaps
 from Alarm import alarm_handler, TimeoutEx
 
+
 class Process(Application):
-    def __init__(self, program, envs, timeout, included_mods = [], ignored_mods = [], no_stdout = True, max_events = 320, min_events = -10*320):
+
+    def __init__(
+            self,
+            program,
+            envs,
+            timeout,
+            included_mods=[],
+            ignored_mods=[],
+            no_stdout=True,
+            max_events=320,
+            min_events=-10 * 320):
 
         Application.__init__(self)  # no effect
 
@@ -67,9 +82,9 @@ def __init__(self, program, envs, timeout, included_mods = [], ignored_mods = []
         self.min_events = min_events
 
         # Parse ELF
-        self.elf = ELF(self.program, plt = False)
+        self.elf = ELF(self.program, plt=False)
 
-        #if self.elf.GetType() <> "ELF 32-bit":
+        # if self.elf.GetType() <> "ELF 32-bit":
         #  print "Only ELF 32-bit are supported to be executed."
         #  exit(-1)
 
@@ -84,150 +99,161 @@ def __init__(self, program, envs, timeout, included_mods = [], ignored_mods = []
         self.binfo = dict()
 
     def setBreakpoints(self, elf):
-      #print elf.GetFunctions()
-      for func_name in elf.GetFunctions():
-        #print "func_name", elf.GetModname(), hex(elf.FindFuncInPlt(func_name))
-
-        if func_name in specs:
-          #print "func_name in spec",elf.GetModname(), func_name, hex(elf.FindFuncInPlt(func_name))
-          addr = elf.FindFuncInPlt(func_name)
-          self.binfo[addr] = elf.GetModname(),func_name
-          self.breakpoint(addr)
+        # print elf.GetFunctions()
+        for func_name in elf.GetFunctions():
+            # print "func_name", elf.GetModname(),
+            # hex(elf.FindFuncInPlt(func_name))
+
+            if func_name in specs:
+                # print "func_name in spec",elf.GetModname(), func_name,
+                # hex(elf.FindFuncInPlt(func_name))
+                addr = elf.FindFuncInPlt(func_name)
+                self.binfo[addr] = elf.GetModname(), func_name
+                self.breakpoint(addr)
 
     def findBreakpointInfo(self, addr):
-      if addr in self.binfo:
-        return self.binfo[addr]
-      else:
-        return None, None
+        if addr in self.binfo:
+            return self.binfo[addr]
+        else:
+            return None, None
 
     def createEvents(self, signal):
-        # Hit breakpoint?
+            # Hit breakpoint?
         if signal.signum == SIGTRAP:
             ip = self.process.getInstrPointer()
             if not CPU_POWERPC:
                 # Go before "INT 3" instruction
                 ip -= 1
             breakpoint = self.process.findBreakpoint(ip)
-            #print "breakpoint @",hex(ip)
+            # print "breakpoint @",hex(ip)
 
             if breakpoint:
                 module, name = self.findBreakpointInfo(breakpoint.address)
-                #print module, name, hex(ip)
+                # print module, name, hex(ip)
 
                 if ip == self.elf.GetEntrypoint():
-                  breakpoint.desinstall(set_ip=True)
+                    breakpoint.desinstall(set_ip=True)
 
-                  #if self.mm is None:
-                  self.mm  = MemoryMaps(self.program, self.pid)
-                  #self.setBreakpoints(self.elf)
+                    # if self.mm is None:
+                    self.mm = MemoryMaps(self.program, self.pid)
+                    # self.setBreakpoints(self.elf)
 
-                  #print self.mm
+                    # print self.mm
 
-                  for (range, mod, atts) in self.mm.items():
-                     if '/' in mod and 'x' in atts and not ("libc-" in mod):
+                    for (range, mod, atts) in self.mm.items():
+                        if '/' in mod and 'x' in atts and not ("libc-" in mod):
 
-                        # FIXME: self.elf.path should be absolute
-                        if mod == self.elf.path:
-                           base = 0
-                        else:
-                           base = range[0]
+                            # FIXME: self.elf.path should be absolute
+                            if mod == self.elf.path:
+                                base = 0
+                            else:
+                                base = range[0]
 
-                        if self.included_mods == [] or any(map(lambda l: l in mod, self.included_mods)):
-                          if self.ignored_mods == [] or not (any(map(lambda l: l in mod, self.ignored_mods))):
+                            if self.included_mods == [] or any(
+                                    map(lambda l: l in mod, self.included_mods)):
+                                if self.ignored_mods == [] or not (
+                                        any(map(lambda l: l in mod, self.ignored_mods))):
 
-                            if not (mod in self.modules):
-                              self.modules[mod] = ELF(mod, base = base)
-                            #print "hooking", mod, hex(base)
+                                    if not (mod in self.modules):
+                                        self.modules[mod] = ELF(mod, base=base)
+                                    # print "hooking", mod, hex(base)
 
-                            self.setBreakpoints(self.modules[mod])
+                                    self.setBreakpoints(self.modules[mod])
 
-
-                  return []
+                    return []
 
                 elif name is None:
-                  assert(0)
+                    assert(0)
 
                 else:
-                  call = Call(name, module)
-                  #self.mm.update()
-                  #print "updated mm"
-                  call.detect_parameters(self.process, self.mm)
-                  breakpoint.desinstall(set_ip=True)
-
-                  call_ip = ip
-                  self.process.singleStep()
-                  self.debugger.waitProcessEvent()
-
-                  n = self.nevents.get((ip,name), 0)
-                  self.nevents[(ip, name)] = n + 2
- 
-                  for ((ip_,name_),n) in self.nevents.items():
-
-                    if n > self.min_events + 1:
-                      self.nevents[(ip_, name_)] = n - 1
-                    elif n == self.min_events + 1:
-                       self.nevents[(ip_, name_)] = self.min_events
-                       #print "restoring!", (ip, name)
-                       self.breakpoint(call_ip)
-
-                  if n < self.max_events:
-                    self.breakpoint(call_ip)
-                  #else:
-                    #print "disabled!", (ip, name)
- 
-                  #print "call detected!"
-                  return [call]
+                    call = Call(name, module)
+                    # self.mm.update()
+                    # print "updated mm"
+                    call.detect_parameters(self.process, self.mm)
+                    breakpoint.desinstall(set_ip=True)
+
+                    call_ip = ip
+                    self.process.singleStep()
+                    self.debugger.waitProcessEvent()
+
+                    n = self.nevents.get((ip, name), 0)
+                    self.nevents[(ip, name)] = n + 2
+
+                    for ((ip_, name_), n) in self.nevents.items():
+
+                        if n > self.min_events + 1:
+                            self.nevents[(ip_, name_)] = n - 1
+                        elif n == self.min_events + 1:
+                            self.nevents[(ip_, name_)] = self.min_events
+                            # print "restoring!", (ip, name)
+                            self.breakpoint(call_ip)
+
+                    if n < self.max_events:
+                        self.breakpoint(call_ip)
+                    # else:
+                        # print "disabled!", (ip, name)
+
+                    # print "call detected!"
+                    return [call]
 
         elif signal.signum == SIGABRT:
-          self.crashed = True
-          return [Signal("SIGABRT",self.process, self.mm), Abort(self.process, self.mm)]
+            self.crashed = True
+            return [
+                Signal(
+                    "SIGABRT", self.process, self.mm), Abort(
+                    self.process, self.mm)]
 
         elif signal.signum == SIGSEGV:
-          self.crashed = True
-          self.mm  = MemoryMaps(self.program, self.pid)
-          return [Signal("SIGSEGV", self.process, self.mm), Crash(self.process, self.mm)]
+            self.crashed = True
+            self.mm = MemoryMaps(self.program, self.pid)
+            return [
+                Signal(
+                    "SIGSEGV", self.process, self.mm), Crash(
+                    self.process, self.mm)]
 
         elif signal.signum == SIGILL:
-          #self.crashed = True
-          self.mm  = MemoryMaps(self.program, self.pid)
-          return [Signal("SIGILL", self.process, self.mm)]
+            #self.crashed = True
+            self.mm = MemoryMaps(self.program, self.pid)
+            return [Signal("SIGILL", self.process, self.mm)]
 
         elif signal.signum == SIGFPE:
-          self.crashed = True
-          self.mm  = MemoryMaps(self.program, self.pid)
-          return [Signal("SIGFPE", self.process, self.mm), Crash(self.process, self.mm)]
+            self.crashed = True
+            self.mm = MemoryMaps(self.program, self.pid)
+            return [
+                Signal(
+                    "SIGFPE", self.process, self.mm), Crash(
+                    self.process, self.mm)]
 
         elif signal.signum == SIGBUS:
-          #self.crashed = True
-          self.mm  = MemoryMaps(self.program, self.pid)
-          return [Signal("SIGBUS", self.process, self.mm)]
+            #self.crashed = True
+            self.mm = MemoryMaps(self.program, self.pid)
+            return [Signal("SIGBUS", self.process, self.mm)]
 
         elif signal.signum == SIGCHLD:
-          #self.crashed = True
-          self.mm  = MemoryMaps(self.program, self.pid)
-          return [Signal("SIGCHLD", self.process, self.pid)]
+            #self.crashed = True
+            self.mm = MemoryMaps(self.program, self.pid)
+            return [Signal("SIGCHLD", self.process, self.pid)]
 
-        elif signal.signum == SIGTERM: # killed by the kernel?
-          self.crashed = True
-          return []
+        elif signal.signum == SIGTERM:  # killed by the kernel?
+            self.crashed = True
+            return []
 
         # Harmless signals
         elif signal.signum == SIGPIPE:
-          return [] # User generated, ignore.
+            return []  # User generated, ignore.
 
         # Harmless signals
         elif signal.signum == SIGWINCH:
-          return [] # User generated, ignore.
+            return []  # User generated, ignore.
 
         else:
-          print "I don't know what to do with this signal:", str(signal)
-          assert(False)
+            print "I don't know what to do with this signal:", str(signal)
+            assert(False)
 
         return []
 
     def DetectVulnerabilities(self, preevents, events):
-      return detect_vulnerabilities(preevents, events, self.process, self.mm)
-
+        return detect_vulnerabilities(preevents, events, self.process, self.mm)
 
     def createProcess(self, cmd, envs, no_stdout):
 
@@ -236,15 +262,17 @@ def createProcess(self, cmd, envs, no_stdout):
         is_attached = True
 
         try:
-            #print "initial processes:"
-            #for p in self.debugger:
+            # print "initial processes:"
+            # for p in self.debugger:
             #  print "p:", p
-            #print "end processes"
+            # print "end processes"
             return self.debugger.addProcess(self.pid, is_attached=is_attached)
-        except (ProcessExit, PtraceError), err:
+        except (ProcessExit, PtraceError) as err:
             if isinstance(err, PtraceError) \
-            and err.errno == EPERM:
-                error("ERROR: You are not allowed to trace process %s (permission denied or process already traced)" % self.pid)
+                    and err.errno == EPERM:
+                error(
+                    "ERROR: You are not allowed to trace process %s (permission denied or process already traced)" %
+                    self.pid)
             else:
                 error("ERROR: Process can no be attached! %s" % err)
         return None
@@ -279,13 +307,12 @@ def cont(self, signum=None):
         signal = self.debugger.waitSignals()
         process = signal.process
         events = self.createEvents(signal)
-        
+
         #vulns = self.DetectVulnerabilities(self.events, events)
-        #print "vulns detected"
-        self.events = self.events + events #+ vulns
+        # print "vulns detected"
+        self.events = self.events + events  # + vulns
         #self.nevents = self.nevents + len(events)
 
-
     def readInstrSize(self, address, default_size=None):
         if not HAS_DISASSEMBLER:
             return default_size
@@ -293,7 +320,7 @@ def readInstrSize(self, address, default_size=None):
             # Get address and size of instruction at specified address
             instr = self.process.disassembleOne(address)
             return instr.size
-        except PtraceError, err:
+        except PtraceError as err:
             warning("Warning: Unable to read instruction size at %s: %s" % (
                 formatAddress(address), err))
             return default_size
@@ -304,7 +331,7 @@ def breakpoint(self, address):
         size = self.readInstrSize(address)
         try:
             bp = self.process.createBreakpoint(address, size)
-        except PtraceError, err:
+        except PtraceError as err:
             return "Unable to set breakpoint at %s: %s" % (
                 formatAddress(address), err)
         #error("New breakpoint: %s" % bp)
@@ -312,14 +339,14 @@ def breakpoint(self, address):
 
     def runProcess(self, cmd):
 
-        #print "Running", cmd
+        # print "Running", cmd
 
         signal(SIGALRM, alarm_handler)
 
-        #if self.pid is None:
+        # if self.pid is None:
         #  timeout = 20*self.timeout
-        #else:
-        timeout = 10*self.timeout
+        # else:
+        timeout = 10 * self.timeout
 
         alarm(timeout)
 
@@ -328,18 +355,18 @@ def runProcess(self, cmd):
             self.process = self.createProcess(cmd, self.envs, self.no_stdout)
             self.process.no_frame_pointer = self.elf.no_frame_pointer
             #self.mm  = MemoryMaps(self.program, self.pid)
-            #print self.mm
+            # print self.mm
             self.crashed = False
-        except ChildError, err:
+        except ChildError as err:
             print "a"
             writeError(getLogger(), err, "Unable to create child process")
             return
-        except OSError, err:
+        except OSError as err:
             print "b"
             writeError(getLogger(), err, "Unable to create child process")
             return
 
-        except IOError, err:
+        except IOError as err:
             print "c"
             writeError(getLogger(), err, "Unable to create child process")
             return
@@ -347,75 +374,72 @@ def runProcess(self, cmd):
         if not self.process:
             return
 
-
         # Set the breakpoints
         self.breakpoint(self.elf.GetEntrypoint())
-        #print hex(self.elf.GetEntrypoint())
+        # print hex(self.elf.GetEntrypoint())
 
         try:
-          while True:
-
-            #self.cont() 
-            #if self.nevents > self.max_events:
-            #
-            #    self.events.append(Timeout(timeout))
-            #    alarm(0)
-            #    return
-            if not self.debugger or self.crashed:
-                # There is no more process: quit
-                alarm(0)
-                return
-            else:
-              self.cont()
-
-          #alarm(0)
-        #except PtraceError:
-          #print "deb:",self.debugger, "crash:", self.crashed
-          #print "PtraceError"
-          #alarm(0)
-          #return        
-
-        except ProcessExit, event:
-          alarm(0)
-          self.events.append(Exit(event.exitcode))
-          return
+            while True:
+
+                # self.cont()
+                # if self.nevents > self.max_events:
+                #
+                #    self.events.append(Timeout(timeout))
+                #    alarm(0)
+                #    return
+                if not self.debugger or self.crashed:
+                    # There is no more process: quit
+                    alarm(0)
+                    return
+                else:
+                    self.cont()
+
+            # alarm(0)
+        # except PtraceError:
+            # print "deb:",self.debugger, "crash:", self.crashed
+            # print "PtraceError"
+            # alarm(0)
+            # return
+
+        except ProcessExit as event:
+            alarm(0)
+            self.events.append(Exit(event.exitcode))
+            return
 
         except OSError:
-          alarm(0)
-          self.events.append(Timeout(timeout))
-          self.timeouts += 1
-          return
+            alarm(0)
+            self.events.append(Timeout(timeout))
+            self.timeouts += 1
+            return
 
         except IOError:
-          alarm(0)
-          self.events.append(Timeout(timeout))
-          self.timeouts += 1
-          return
+            alarm(0)
+            self.events.append(Timeout(timeout))
+            self.timeouts += 1
+            return
 
         except TimeoutEx:
-          self.events.append(Timeout(timeout))
-          return
-
-
+            self.events.append(Timeout(timeout))
+            return
 
     def getData(self, inputs):
         self.events = []
         self.nevents = dict()
         self.debugger = PtraceDebugger()
 
-        self.runProcess([self.program]+inputs)
-        #print self.pid
+        self.runProcess([self.program] + inputs)
+        # print self.pid
 
-        #if self.crashed:
+        # if self.crashed:
         #  print "we should terminate.."
-        #sleep(3)
+        # sleep(3)
 
         if self.process is None:
-          return None
+            return None
 
         self.process.terminate()
         self.process.detach()
-        #print self.nevents
+        # print self.nevents
 
         self.process = None
         return self.events
diff --git a/vdiscover/RandomWalk.py b/vdiscover/RandomWalk.py
index 7f6fc11..a4881cd 100644
--- a/vdiscover/RandomWalk.py
+++ b/vdiscover/RandomWalk.py
@@ -23,202 +23,207 @@
 import csv
 import re
 
-from ELF  import ELF
+from ELF import ELF
 from Spec import specs
 from Misc import readmodfile
 
-def RandomWalkElf(program, outfile, mclass, max_subtraces, max_explored_subtraces, min_size):
 
+def RandomWalkElf(
+        program,
+        outfile,
+        mclass,
+        max_subtraces,
+        max_explored_subtraces,
+        min_size):
 
-  csvwriter = csv.writer(open(outfile, "a+"), delimiter='\t')
-  elf = ELF(program)
+    csvwriter = csv.writer(open(outfile, "a+"), delimiter='\t')
+    elf = ELF(program)
 
-  # plt is inverted
-  inv_plt = dict()
+    # plt is inverted
+    inv_plt = dict()
 
-  for func, addr in elf.plt.items():
-    if func in specs:  # external functions are discarded
-      inv_plt[addr] = func
+    for func, addr in elf.plt.items():
+        if func in specs:  # external functions are discarded
+            inv_plt[addr] = func
 
-  elf.plt = inv_plt
+    elf.plt = inv_plt
 
-  cond_control_flow_ins = ["jo", "jno", "js", "jns", "je",
-                           "jz","jnz", "jb", "jnae", "jc",
-                           "jnb", "jae", "jnc", "jbe", "jna",
-                           "ja", "jnbe", "jl", "jnge", "jge",
-                           "jnl", "jle", "jng", "jg",  "jnle",
-                           "jp", "jpe", "jnp", "jpo", "jcxz", "jecxz"]
+    cond_control_flow_ins = ["jo", "jno", "js", "jns", "je",
+                             "jz", "jnz", "jb", "jnae", "jc",
+                             "jnb", "jae", "jnc", "jbe", "jna",
+                             "ja", "jnbe", "jl", "jnge", "jge",
+                             "jnl", "jle", "jng", "jg", "jnle",
+                             "jp", "jpe", "jnp", "jpo", "jcxz", "jecxz"]
 
-  ncond_control_flow_ins = ["ret","jmp","call", "retq","jmp","callq"]
+    ncond_control_flow_ins = ["ret", "jmp", "call", "retq", "jmp", "callq"]
 
-  control_flow_ins = cond_control_flow_ins + ncond_control_flow_ins
+    control_flow_ins = cond_control_flow_ins + ncond_control_flow_ins
 
-  raw_inss = elf.GetRawInss()
-  useful_inss_list = []
-  useful_inss_dict = dict()
-  libc_calls = []
-  labels = dict()
+    raw_inss = elf.GetRawInss()
+    useful_inss_list = []
+    useful_inss_dict = dict()
+    libc_calls = []
+    labels = dict()
 
-  #print sys.argv[1]+"\t",
-  #rclass = str(1)
+    # print sys.argv[1]+"\t",
+    #rclass = str(1)
 
-  for i,ins in enumerate(raw_inss.split("\n")):
+    for i, ins in enumerate(raw_inss.split("\n")):
 
-    # prefix removal
-    ins = ins.replace("repz ","")
-    ins = ins.replace("rep ","")
+        # prefix removal
+        ins = ins.replace("repz ", "")
+        ins = ins.replace("rep ", "")
 
-    pins = ins.split("\t")
-    #print pins
-    ins_addr = pins[0].replace(":","").replace(" ","")
-    #print pins,ins_addr
+        pins = ins.split("\t")
+        # print pins
+        ins_addr = pins[0].replace(":", "").replace(" ", "")
+        # print pins,ins_addr
 
-    if len(pins) == 1 and ">" in ins: #label
-      #print ins
-      #assert(0)
-      x = pins[0].split(" ")
+        if len(pins) == 1 and ">" in ins:  # label
+            # print ins
+            # assert(0)
+            x = pins[0].split(" ")
 
-      ins_addr = x[0]
+            ins_addr = x[0]
 
-      y = [i,ins_addr, None, None]
-      useful_inss_dict[ins_addr] = y
-      useful_inss_list.append(y)
+            y = [i, ins_addr, None, None]
+            useful_inss_dict[ins_addr] = y
+            useful_inss_list.append(y)
 
-      #print "label:",y
+            # print "label:",y
 
-    elif any(map( lambda x: x in ins, control_flow_ins)) and len(pins) == 3: # control flow instruction
-      #print pins
-      x = pins[2].split(" ")
+        elif any(map(lambda x: x in ins, control_flow_ins)) and len(pins) == 3:  # control flow instruction
+            # print pins
+            x = pins[2].split(" ")
 
-      ins_nme = x[0]
-      ins_jaddr = x[-2]
+            ins_nme = x[0]
+            ins_jaddr = x[-2]
 
-      #if ("" == ins_jaddr):
-      #  print pins
-      #print x
-      #print ins_nme, ins_jaddr
-      y = [i, ins_addr, ins_nme, ins_jaddr]
+            # if ("" == ins_jaddr):
+            #  print pins
+            # print x
+            # print ins_nme, ins_jaddr
+            y = [i, ins_addr, ins_nme, ins_jaddr]
 
-      useful_inss_dict[ins_addr] = y
-      useful_inss_list.append(y)
+            useful_inss_dict[ins_addr] = y
+            useful_inss_list.append(y)
 
-      if "call" in pins[2]:
-        if ins_jaddr <> '':
-          func_addr = int(ins_jaddr,16)
-          if func_addr in elf.plt:
-            libc_calls.append(i)
+            if "call" in pins[2]:
+                if ins_jaddr != '':
+                    func_addr = int(ins_jaddr, 16)
+                    if func_addr in elf.plt:
+                        libc_calls.append(i)
 
-    else: # all other instructions 
-      y = [i, ins_addr, None, None]
+        else:  # all other instructions
+            y = [i, ins_addr, None, None]
 
-      useful_inss_dict[ins_addr] = y
-      useful_inss_list.append(y)
+            useful_inss_dict[ins_addr] = y
+            useful_inss_list.append(y)
 
-  #print useful_inss_list
-  max_inss = len(useful_inss_list)
-  traces = set()
-  collected_traces = ""
+    # print useful_inss_list
+    max_inss = len(useful_inss_list)
+    traces = set()
+    collected_traces = ""
 
-  # exploration time!
-  for _ in range(max_explored_subtraces):
+    # exploration time!
+    for _ in range(max_explored_subtraces):
 
-    # resuling (sub)trace
-    r = ""
-    # starting point
-    i = random.choice(libc_calls)
-    j = 0
+        # resuling (sub)trace
+        r = ""
+        # starting point
+        i = random.choice(libc_calls)
+        j = 0
 
-    #r = elf.path+"\t"
-    r = ""
+        #r = elf.path+"\t"
+        r = ""
 
-    while True:
+        while True:
 
-      # last instruction case
-      if (i+j) == max_inss:
-        break
+            # last instruction case
+            if (i + j) == max_inss:
+                break
 
-      _,ins_addr,ins_nme,ins_jaddr = useful_inss_list[i+j]
+            _, ins_addr, ins_nme, ins_jaddr = useful_inss_list[i + j]
 
-      #print i+j,ins_nme, ins_jaddr
+            # print i+j,ins_nme, ins_jaddr
 
-      if ins_nme in ['call', 'callq']: # ordinary call
-        #"addr", ins_jaddr
+            if ins_nme in ['call', 'callq']:  # ordinary call
+                #"addr", ins_jaddr
 
-        if ins_jaddr == '':
-          break # parametric jmp, similar to ret for us
+                if ins_jaddr == '':
+                    break  # parametric jmp, similar to ret for us
 
-        ins_jaddr = int(ins_jaddr,16)
-        if ins_jaddr in elf.plt:
-          r = r + " " + elf.plt[ins_jaddr]
-          if elf.plt[ins_jaddr] == "exit":
-            break
-        else:
+                ins_jaddr = int(ins_jaddr, 16)
+                if ins_jaddr in elf.plt:
+                    r = r + " " + elf.plt[ins_jaddr]
+                    if elf.plt[ins_jaddr] == "exit":
+                        break
+                else:
 
-          if ins_jaddr in useful_inss_dict:
-            #assert(0)
-            #r = r + " " + hex(ins_jaddr)
-            i,_,_,_ = useful_inss_dict[ins_jaddr]
-            j = 0
-            continue
+                    if ins_jaddr in useful_inss_dict:
+                        # assert(0)
+                        #r = r + " " + hex(ins_jaddr)
+                        i, _, _, _ = useful_inss_dict[ins_jaddr]
+                        j = 0
+                        continue
 
-          else:
-            pass # ignored call
+                    else:
+                        pass  # ignored call
 
-      elif ins_nme in ['ret','retq']:
-        break
-      else:
-        pass
-        #print i+j,ins_nme, ins_jaddr
+            elif ins_nme in ['ret', 'retq']:
+                break
+            else:
+                pass
+                # print i+j,ins_nme, ins_jaddr
 
-      #print j
-      if ins_nme == 'jmp' :
+            # print j
+            if ins_nme == 'jmp':
 
-        if ins_jaddr in elf.plt: # call equivalent using jmp
-          r = r + " " + elf.plt[jaddr]
+                if ins_jaddr in elf.plt:  # call equivalent using jmp
+                    r = r + " " + elf.plt[jaddr]
 
-        else:
+                else:
 
-          if ins_jaddr == '':
-            break # parametric jmp, similar to ret for us
+                    if ins_jaddr == '':
+                        break  # parametric jmp, similar to ret for us
 
-          ins_jaddr = int(ins_jaddr,16)
-          if ins_jaddr in useful_inss_dict:
-            #r = r + " " + hex(ins_jaddr)
-            i,_,_,_ = useful_inss_dict[ins_jaddr]
-            j = 0
-            continue
+                    ins_jaddr = int(ins_jaddr, 16)
+                    if ins_jaddr in useful_inss_dict:
+                        #r = r + " " + hex(ins_jaddr)
+                        i, _, _, _ = useful_inss_dict[ins_jaddr]
+                        j = 0
+                        continue
 
-          else:
-            pass # ignored call
+                    else:
+                        pass  # ignored call
 
+            if ins_nme in cond_control_flow_ins:
 
-      if ins_nme in cond_control_flow_ins:
+                assert(ins_jaddr is not None)
 
-        assert(ins_jaddr <> None)
+                cond = random.randint(0, 1)
 
-        cond = random.randint(0,1)
+                if cond == 1:
 
-        if cond == 1:
+                    i, _, _, _ = useful_inss_dict[ins_jaddr]
+                    j = 0
+                    continue
 
-          i,_,_,_ = useful_inss_dict[ins_jaddr]
-          j = 0
-          continue
+            j = j + 1
 
-      j = j + 1
+        #r = r + "\t"+rclass
+        x = hash(r)
+        size = len(r.split(" ")) - 1
 
-    #r = r + "\t"+rclass
-    x = hash(r)
-    size = len(r.split(" "))-1
+        # if x not in traces and size >= min_size:
+        # print r+" .",
+        collected_traces = collected_traces + r + " ."
+        # traces.add(x)
+        # if len(traces) >= max_subtraces:
+        #  break
 
-    #if x not in traces and size >= min_size:
-      #print r+" .",
-    collected_traces = collected_traces + r + " ."
-      #traces.add(x)
-      #if len(traces) >= max_subtraces:
-      #  break
+    row = [elf.path, collected_traces]
+    if mclass is not None:
+        row.append(mclass)
 
-  row = [elf.path, collected_traces]
-  if mclass is not None:
-    row.append(mclass)
-
-  csvwriter.writerow(row)
+    csvwriter.writerow(row)
diff --git a/vdiscover/Recall.py b/vdiscover/Recall.py
index fe0396b..aabdf96 100644
--- a/vdiscover/Recall.py
+++ b/vdiscover/Recall.py
@@ -8,63 +8,58 @@
 
 from Utils import *
 
-def Recall(model_file, in_file, in_type, out_file, test_mode, probability=False):
 
-  model = load_model(model_file)
-  csvwriter = write_csv(out_file)
+def Recall(
+        model_file,
+        in_file,
+        in_type,
+        out_file,
+        test_mode,
+        probability=False):
 
-  x = dict()
+    model = loadModel(model_file)
+    csvwriter = writeCSV(out_file)
 
-  testcases, features, test_classes = read_traces(in_file, None, cut=None)
-  x[in_type] = features
+    x = dict()
 
-  if probability:
-    predicted_classes = map(lambda x: x[1], model.predict_proba(x)) # probability of the second class
-  else:
-    predicted_classes = model.predict(x)
+    testcases, features, test_classes = readTraces(in_file, None, cut=None)
+    x[in_type] = features
 
-  for testcase,y in zip(testcases,predicted_classes):
-    csvwriter.writerow([testcase,y])
-
-  if test_mode == "simple":
-    nclasses = len(set(test_classes))
-    one_class = int(test_classes[0])
-
-    if nclasses == 1:
-      err = [None, None]
-      err[one_class] = recall_score(test_classes, predicted_classes, average=None)[one_class]
-      err[1 - one_class] = err[one_class]
+    if probability:
+        # probability of the second class
+        predicted_classes = map(lambda x: x[1], model.predict_proba(x))
     else:
-      err = recall_score(test_classes, predicted_classes, average=None)
-
-    print classification_report(test_classes, predicted_classes)
-    print "Accuracy per class:", round(err[0],2), round(err[1],2)
-    print "Average accuracy:", round(sum(err)/2.0,2)
-
-  elif test_mode == "aggregated":
-
-
-    #print len(testcases), len(predicted_classes), len(test_classes)
-    prog_pred = dict()
+        predicted_classes = map(str, model.predict(x))
+        #predicted_classes = model.predict(x)
 
-    for (program, predicted, real) in zip(testcases, predicted_classes, test_classes):
-      prog_pred[program] = prog_pred.get(program,[]) + [abs(predicted-real)]
+    for testcase, y in zip(testcases, predicted_classes):
+        csvwriter.writerow([testcase, y])
 
-    print round(numpy.mean(map(numpy.mean, prog_pred.values())),2)
+    if test_mode == "simple":
+        nclasses = len(set(test_classes))
+        one_class = int(test_classes[0])
 
-    # BROKEN!
-    #prog_classes = dict()
-    #for prog,cl in zip(testcases, test_classes):
-    #  prog_classes[prog] = cl
+        if nclasses == 1:
+            err = [None, None]
+            err[one_class] = recall_score(
+                test_classes, predicted_classes, average=None)[one_class]
+            err[1 - one_class] = err[one_class]
+        else:
+            err = recall_score(test_classes, predicted_classes, average=None)
 
-    #prog_pred = dict(zip(prog_classes.keys(), [[]]*len(prog_classes)))
-    #for prog, pred in zip(testcases,predicted_classes):
-    #  prog_pred[prog].append(abs(pred - prog_classes[prog]))
+        print classification_report(test_classes, predicted_classes)
+        print "Accuracy per class:", round(err[0], 2), round(err[1], 2)
+        print "Average accuracy:", round(sum(err) / 2.0, 2)
 
-    #errors = []
-    #for prog, preds in prog_pred.items():
-    #  errors.append(sum(preds)/float(len(preds)))
+    elif test_mode == "aggregated":
 
-    #print sum(errors) / float(len(errors))
+        # print len(testcases), len(predicted_classes), len(test_classes)
+        prog_pred = dict()
 
+        for (program, predicted, real) in zip(
+             testcases, predicted_classes, test_classes):
+            predicted,real = int(predicted), int(real)
+            prog_pred[program] = prog_pred.get(
+                program, []) + [abs(predicted - real)]
 
+        print round(numpy.mean(map(numpy.mean, prog_pred.values())), 2)
diff --git a/vdiscover/Run.py b/vdiscover/Run.py
index 95ba6ba..cda4293 100644
--- a/vdiscover/Run.py
+++ b/vdiscover/Run.py
@@ -19,7 +19,6 @@
 """
 
 
-
 #from ptrace.debugger.child import createChild
 from os import system, dup2, close, open as fopen, O_RDONLY
 from sys import stdin
@@ -38,6 +37,7 @@
 class ChildError(RuntimeError):
     pass
 
+
 def _execChild(arguments, no_stdout, env):
     if no_stdout:
         try:
@@ -45,7 +45,7 @@ def _execChild(arguments, no_stdout, env):
             dup2(null.fileno(), 1)
             dup2(1, 2)
             null.close()
-        except IOError, err:
+        except IOError as err:
             close(2)
             close(1)
     try:
@@ -53,9 +53,10 @@ def _execChild(arguments, no_stdout, env):
             execve(arguments[0], arguments, env)
         else:
             execv(arguments[0], arguments)
-    except Exception, err:
+    except Exception as err:
         raise ChildError(str(err))
 
+
 def createChild(arguments, no_stdout, env=None):
     """
     Create a child process:
@@ -73,66 +74,66 @@ def createChild(arguments, no_stdout, env=None):
     if pid:
         return pid
     else:
-        #print "limit",getrlimit(RLIMIT_DATA)
-        setrlimit(RLIMIT_AS, (1024*1024*1024, -1))
-        #print "limit",getrlimit(RLIMIT_DATA)
+        # print "limit",getrlimit(RLIMIT_DATA)
+        setrlimit(RLIMIT_AS, (1024 * 1024 * 1024, -1))
+        # print "limit",getrlimit(RLIMIT_DATA)
 
         try:
-          ptrace_traceme()
-        except PtraceError, err:
-          raise ChildError(str(err))
+            ptrace_traceme()
+        except PtraceError as err:
+            raise ChildError(str(err))
 
         _execChild(arguments, no_stdout, env)
         exit(255)
 
 
 def Launch(cmd, no_stdout, env):
-  global fds
-  global c
-  c = c + 1
-  #cmd = ["/usr/bin/timeout", "-k", "1", "3"]+cmd
-  #print cmd
-  if cmd[-1][0:2] == "< ":
-    filename = cmd[-1].replace("< ", "")
+    global fds
+    global c
+    c = c + 1
+    #cmd = ["/usr/bin/timeout", "-k", "1", "3"]+cmd
+    # print cmd
+    if cmd[-1][0:2] == "< ":
+        filename = cmd[-1].replace("< ", "")
 
-    #try:
-    #  close(3)
-    #except OSError:
-    #  print "OsError!"
-    #  pass
+        # try:
+        #  close(3)
+        # except OSError:
+        #  print "OsError!"
+        #  pass
 
-    for fd in fds:
-      #print fd,
-      try:
-        close(fd)
-        #print "closed!"
-      except OSError:
-        #print "failed close!"
-        pass
+        for fd in fds:
+            # print fd,
+            try:
+                close(fd)
+                # print "closed!"
+            except OSError:
+                # print "failed close!"
+                pass
 
-    fds = []
+        fds = []
 
-    desc = fopen(filename,O_RDONLY)
-    fds.append(desc)
-    dup2(desc, stdin.fileno())
-    fds.append(desc)
-    #close(desc)
+        desc = fopen(filename, O_RDONLY)
+        fds.append(desc)
+        dup2(desc, stdin.fileno())
+        fds.append(desc)
+        # close(desc)
 
-    cmd = cmd[:-1]
+        cmd = cmd[:-1]
 
-  #print "c:", c
-  #print "self pid", getpid()
+    # print "c:", c
+    # print "self pid", getpid()
 
-  r = createChild(cmd, no_stdout, env)
+    r = createChild(cmd, no_stdout, env)
 
-  #print "new pid", r
-  #print "self pid", getpid()
-  #print "Done!"
+    # print "new pid", r
+    # print "self pid", getpid()
+    # print "Done!"
 
-  return r
+    return r
 
 
-#class Runner:
+# class Runner:
 #    def __init__(self, cmd, timeout):
 #        #threading.Thread.__init__(self)
 #
diff --git a/vdiscover/Sampling.py b/vdiscover/Sampling.py
index 23d5ac4..c8ef251 100644
--- a/vdiscover/Sampling.py
+++ b/vdiscover/Sampling.py
@@ -20,22 +20,23 @@
 import random
 import copy
 
+
 def cluster_sampler(clustered_traces, n_per_cluster):
-  #cc = copy.copy(clusters)
-  #n_per_cluster = 1#n / len(cc)
-  clusters = dict()
-  for label, cluster in clustered_traces:
-    clusters[cluster] = clusters.get(cluster, []) + [label.split(":")[-1]]
-
-  selected = set()
-  tmp = set()
-
-  for (cluster, seeds) in clusters.items():
-    n_sample = min(len(seeds), n_per_cluster)
-    tmp = set(seeds).intersection(selected)
-    if len(tmp) >= n_sample: 
-      selected.update(set(random.sample(tmp, n_sample)))
-    else:
-      selected.update(set(random.sample(seeds, n_sample)))
-
-  return selected
+    #cc = copy.copy(clusters)
+    # n_per_cluster = 1#n / len(cc)
+    clusters = dict()
+    for label, cluster in clustered_traces:
+        clusters[cluster] = clusters.get(cluster, []) + [label.split(":")[-1]]
+
+    selected = set()
+    tmp = set()
+
+    for (cluster, seeds) in clusters.items():
+        n_sample = min(len(seeds), n_per_cluster)
+        tmp = set(seeds).intersection(selected)
+        if len(tmp) >= n_sample:
+            selected.update(set(random.sample(tmp, n_sample)))
+        else:
+            selected.update(set(random.sample(seeds, n_sample)))
+
+    return selected
diff --git a/vdiscover/Spec.py b/vdiscover/Spec.py
index c80455d..054a147 100644
--- a/vdiscover/Spec.py
+++ b/vdiscover/Spec.py
@@ -21,7 +21,7 @@
 
 realpath = os.path.dirname(os.path.realpath(__file__))
 datadir = "data/"
-f = open(realpath+"/"+datadir+"prototypes.conf")
+f = open(realpath + "/" + datadir + "prototypes.conf")
 specs = dict()
 
 for raw_spec in f.readlines():
@@ -30,12 +30,12 @@
     raw_spec = raw_spec.replace(" (", "(")
     raw_spec = raw_spec.replace("  ", " ")
     raw_spec = raw_spec.replace("  ", " ")
-    if raw_spec <> "" and raw_spec[0] <> ";" and (not "SYS_" in raw_spec):
+    if raw_spec != "" and raw_spec[0] != ";" and (not "SYS_" in raw_spec):
         x = raw_spec.split(" ")
         ret = x[0]
         x = x[1].split("(")
         name = x[0]
-        param_types  = x[1].replace(");", "").split(",")
+        param_types = x[1].replace(");", "").split(",")
         specs[name] = [ret] + param_types
 
-#print specs
+# print specs
diff --git a/vdiscover/Train.py b/vdiscover/Train.py
index 3bc9a13..d177844 100644
--- a/vdiscover/Train.py
+++ b/vdiscover/Train.py
@@ -23,133 +23,145 @@
 from Pipeline import *
 from sklearn.metrics import confusion_matrix
 
-def TrainScikitLearn(model_file, train_file, valid_file, ftype, nsamples):
 
-  #csvreader = open_csv(train_file)
-  modelfile = open_model(model_file)
-  train_programs, train_features, train_classes = read_traces(train_file, nsamples, cut=None)
-  print "using", len(train_features),"examples to train."
+def TrainScikitLearn(model_file, train_file, valid_file, vtype, ftype, nsamples):
 
-  train_dict = dict()
-  train_dict[ftype] = train_features
+    modelfile = openModel(model_file)
+    train_programs, train_features, train_classes = readTraces(
+        train_file, nsamples, cut=None)
+    print "using", len(train_features), "examples to train."
 
-  print "Transforming data and fitting model.."
-  model = make_train_pipeline(ftype)
-  model.fit(train_dict,train_classes)
+    train_dict = dict()
+    train_dict[ftype] = train_features
 
-  print "Done!"
-  #print model
-  #print confusion_matrix(train_classes, model.predict(train_dict))
+    print "Transforming data and fitting model.."
 
-  print "Saving model to",model_file
-  modelfile.write(pickle.dumps(model))
+    if vtype == "bow":
+        model = makeTrainPipelineBOW(ftype)
 
-def TrainKeras(model_file, train_file, valid_file, ftype, nsamples):
-
-  csvreader = open_csv(train_file)
-  modelfile = open_model(model_file)
-
-  train_features = []
-  train_programs = []
-  train_classes = []
-
-  print "Reading and sampling data to train..",
-  if nsamples is None:
-    for i,(program, features, cl) in enumerate(csvreader):
-      train_programs.append(program)
-      train_features.append(features)
-      train_classes.append(int(cl))
-  else:
-
-    train_size = file_len(in_file)
-    skip_until = random.randint(0,train_size - nsamples)
-
-    for i,(program, features, cl) in enumerate(csvreader):
-
-      if i < skip_until:
-        continue
-      elif i - skip_until == nsamples:
-        break
-
-      train_programs.append(program)
-      train_features.append(features)
-      train_classes.append(int(cl))
-  train_size = len(train_features)
-
-  assert(train_size == len(train_classes))
-
-  print "using", train_size,"examples to train."
-
-  train_dict = dict()
-  train_dict[ftype] = train_features
-  batch_size = 16
-  window_size = 25
-
-  from keras.preprocessing.text import Tokenizer
+    model.fit(train_dict, train_classes)
 
-  tokenizer = Tokenizer(nb_words=None, filters="", lower=False, split=" ")
-  #print type(train_features[0])
-  tokenizer.fit_on_texts(train_features)
-  max_features = len(tokenizer.word_counts)
+    print "Done!"
+    # print model
+    # print confusion_matrix(train_classes, model.predict(train_dict))
 
-  preprocessor = KerasPreprocessor(tokenizer, window_size, batch_size)
+    print "Saving model to", model_file
+    modelfile.write(pickle.dumps(model))
 
-  if valid_file is not None:
-    csvreader = open_csv(valid_file)
 
-    valid_features = []
-    valid_programs = []
-    valid_classes = []
-
-    print "Reading data to valid..",
-    for i,(program, features, cl) in enumerate(csvreader):
-      valid_programs.append(program)
-      valid_features.append(features)
-      valid_classes.append(int(cl))
-
-    print "using", len(train_features),"examples to valid."
-    #X_valid,y_valid = preprocessor.preprocess(valid_features, valid_classes)
-  else:
-    valid_features,train_features = train_features[0:int(0.1*train_size)], train_features[int(0.1*train_size):]
-    valid_classes,train_classes = train_classes[0:int(0.1*train_size)], train_classes[int(0.1*train_size):]
-
-  X_valid,y_valid = preprocessor.preprocess(valid_features, valid_classes, 500)
-  X_train,y_train = preprocessor.preprocess(train_features, train_classes, 10000)
-
-  from keras.models import Sequential
-  from keras.layers.core import Dense, Dropout, Activation
-  from keras.layers.embeddings import Embedding
-  from keras.layers.recurrent import LSTM, GRU
-  from keras.optimizers import Adam
-
-  print "Creating and compiling a LSTM.."
-  model = Sequential()
-  model.add(Embedding(max_features, 10))
-  model.add(LSTM(10, 32))
-  model.add(Dropout(0.50))
-  model.add(Dense(32, 1))
-  model.add(Activation('sigmoid'))
-
-  # try using different optimizers and different optimizer config
-  opt = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-8, kappa=1-1e-8)
-  model.compile(loss='binary_crossentropy', optimizer=opt, class_mode="binary")
-  #model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=30, validation_data = (X_valid,y_valid), show_accuracy=True)
-  model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=5, show_accuracy=True)
-
-  print "Saving model to",model_file
-
-  modelfile.write(pickle.dumps(KerasPredictor(preprocessor,model,ftype)))
+"""
+def TrainKeras(model_file, train_file, valid_file, ftype, nsamples):
 
+    csvreader = open_csv(train_file)
+    modelfile = open_model(model_file)
+
+    train_features = []
+    train_programs = []
+    train_classes = []
+
+    print "Reading and sampling data to train..",
+    if nsamples is None:
+        for i, (program, features, cl) in enumerate(csvreader):
+            train_programs.append(program)
+            train_features.append(features)
+            train_classes.append(int(cl))
+    else:
+
+        train_size = file_len(in_file)
+        skip_until = random.randint(0, train_size - nsamples)
+
+        for i, (program, features, cl) in enumerate(csvreader):
+
+            if i < skip_until:
+                continue
+            elif i - skip_until == nsamples:
+                break
+
+            train_programs.append(program)
+            train_features.append(features)
+            train_classes.append(int(cl))
+    train_size = len(train_features)
+
+    assert(train_size == len(train_classes))
+
+    print "using", train_size, "examples to train."
+
+    train_dict = dict()
+    train_dict[ftype] = train_features
+    batch_size = 16
+    window_size = 25
+
+    from keras.preprocessing.text import Tokenizer
+
+    tokenizer = Tokenizer(nb_words=None, filters="", lower=False, split=" ")
+    # print type(train_features[0])
+    tokenizer.fit_on_texts(train_features)
+    max_features = len(tokenizer.word_counts)
+
+    preprocessor = KerasPreprocessor(tokenizer, window_size, batch_size)
+
+    if valid_file is not None:
+        csvreader = open_csv(valid_file)
+
+        valid_features = []
+        valid_programs = []
+        valid_classes = []
+
+        print "Reading data to valid..",
+        for i, (program, features, cl) in enumerate(csvreader):
+            valid_programs.append(program)
+            valid_features.append(features)
+            valid_classes.append(int(cl))
+
+        print "using", len(train_features), "examples to valid."
+        #X_valid,y_valid = preprocessor.preprocess(valid_features, valid_classes)
+    else:
+        valid_features, train_features = train_features[
+            0:int(0.1 * train_size)], train_features[int(0.1 * train_size):]
+        valid_classes, train_classes = train_classes[
+            0:int(0.1 * train_size)], train_classes[int(0.1 * train_size):]
+
+    X_valid, y_valid = preprocessor.preprocess(
+        valid_features, valid_classes, 500)
+    X_train, y_train = preprocessor.preprocess(
+        train_features, train_classes, 10000)
+
+    from keras.models import Sequential
+    from keras.layers.core import Dense, Dropout, Activation
+    from keras.layers.embeddings import Embedding
+    from keras.layers.recurrent import LSTM, GRU
+    from keras.optimizers import Adam
+
+    print "Creating and compiling a LSTM.."
+    model = Sequential()
+    model.add(Embedding(max_features, 10))
+    model.add(LSTM(10, 32))
+    model.add(Dropout(0.50))
+    model.add(Dense(32, 1))
+    model.add(Activation('sigmoid'))
+
+    # try using different optimizers and different optimizer config
+    opt = Adam(lr=0.001, beta_1=0.9, beta_2=0.999,
+               epsilon=1e-8, kappa=1 - 1e-8)
+    model.compile(loss='binary_crossentropy',
+                  optimizer=opt, class_mode="binary")
+    #model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=30, validation_data = (X_valid,y_valid), show_accuracy=True)
+    model.fit(X_train, y_train, batch_size=batch_size,
+              nb_epoch=5, show_accuracy=True)
+
+    print "Saving model to", model_file
+
+    modelfile.write(pickle.dumps(KerasPredictor(preprocessor, model, ftype)))
+"""
 
-def Train(model_file, train_file, valid_file, ttype, ftype, nsamples):
-  if ttype == "rf":
-    TrainScikitLearn(model_file, train_file, valid_file, ftype, nsamples)
+def Train(model_file, train_file, valid_file, model_type, vector_type, feature_type, nsamples):
 
-  elif ttype == "lstm":
-     try:
-       import keras
-     except:
-       print "Failed to import keras modules to perform LSTM training"
-       return
-     TrainKeras(model_file, train_file, valid_file, ftype, nsamples)
+    TrainScikitLearn(model_file, train_file, valid_file, vector_type, feature_type, nsamples)
 
+    #elif ttype == "lstm":
+    #    try:
+    #        import keras
+    #    except:
+    #        print "Failed to import keras modules to perform LSTM training"
+    #        return
+    #    TrainKeras(model_file, train_file, valid_file, ftype, nsamples)
diff --git a/vdiscover/Types.py b/vdiscover/Types.py
index e7d87ef..f7065b4 100644
--- a/vdiscover/Types.py
+++ b/vdiscover/Types.py
@@ -19,64 +19,71 @@
 
 import copy
 
+
 class Type:
-  def __init__(self, name, size, index = None):
-    self.name = str(name)
-    self.size_in_bytes = size 
-    self.index = index
-    
-  def __str__(self):
-    
-    r = str(self.name)
-    if (self.index <> None):
-      r = r +"("+str(self.index)+")"
-    
-    return r
-
-  def getSize(self):
-    return self.size_in_bytes
-    
-  #def copy(self):
-  #  return copy.copy(self)
-       
-ptypes = [Type("Num32",  4, None) ,
-          Type("Ptr32",  4, None) , # Generic pointer
-          Type("SPtr32", 4, None), # Stack pointer
-          Type("HPtr32", 4, None), # Heap pointer
-          Type("GxPtr32", 4, None), # Global eXecutable pointer
-          Type("FPtr32", 4, None), # File pointer
-          Type("NPtr32", 4, None), # NULL pointer
-          Type("DPtr32", 4, None), # Dangling pointer
-          Type("GPtr32", 4, None), # Global pointer
+
+    def __init__(self, name, size, index=None):
+        self.name = str(name)
+        self.size_in_bytes = size
+        self.index = index
+
+    def __str__(self):
+
+        r = str(self.name)
+        if (self.index is not None):
+            r = r + "(" + str(self.index) + ")"
+
+        return r
+
+    def getSize(self):
+        return self.size_in_bytes
+
+    # def copy(self):
+    #  return copy.copy(self)
+
+ptypes = [Type("Num32", 4, None),
+          Type("Ptr32", 4, None),  # Generic pointer
+          Type("SPtr32", 4, None),  # Stack pointer
+          Type("HPtr32", 4, None),  # Heap pointer
+          Type("GxPtr32", 4, None),  # Global eXecutable pointer
+          Type("FPtr32", 4, None),  # File pointer
+          Type("NPtr32", 4, None),  # NULL pointer
+          Type("DPtr32", 4, None),  # Dangling pointer
+          Type("GPtr32", 4, None),  # Global pointer
           Type("Top32", 4, None)
           ]
 
-for i in range(0,33,8):
-    ptypes.append(Type("Num32B"+str(i), 4, None))
+for i in range(0, 33, 8):
+    ptypes.append(Type("Num32B" + str(i), 4, None))
 
-num32_ptypes   = filter(lambda t: "Num32" in str(t), ptypes)
-ptr32_ptypes   = ptypes[1:9]
+num32_ptypes = filter(lambda t: "Num32" in str(t), ptypes)
+ptr32_ptypes = ptypes[1:9]
 generic_ptypes = [Type("Top32", 4, None)]
 
+
 def isNum(ptype):
-  return ptype in ["int", "ulong", "long", "char"]
+    return ptype in ["int", "ulong", "long", "char"]
+
 
 def isPtr(ptype):
-  return "addr" in ptype or "*" in ptype or "string" in ptype or "format" in ptype or "file" in ptype
+    return "addr" in ptype or "*" in ptype or "string" in ptype or "format" in ptype or "file" in ptype
+
 
 def isVoid(ptype):
-  return ptype == "void"
+    return ptype == "void"
+
 
 def isNull(val):
-  return val == "0x0" or val == "0"
+    return val == "0x0" or val == "0"
+
 
 def GetPtype(ptype):
 
-  if isPtr(ptype):
-    return Type("Ptr32", 4)
-  elif isNum(ptype):
-    return Type("Num32", 4)
-  elif isVoid(ptype):
-    return Type("Top32", 4)
-  else:
-    return Type("Top32", 4)
+    if isPtr(ptype):
+        return Type("Ptr32", 4)
+    elif isNum(ptype):
+        return Type("Num32", 4)
+    elif isVoid(ptype):
+        return Type("Top32", 4)
+    else:
+        return Type("Top32", 4)
diff --git a/vdiscover/Utils.py b/vdiscover/Utils.py
index 91ecf48..cfa6780 100644
--- a/vdiscover/Utils.py
+++ b/vdiscover/Utils.py
@@ -26,7 +26,7 @@
 
 
 def update_progress(progress):
-    barLength = 30 # Modify this to change the length of the progress bar
+    barLength = 30  # Modify this to change the length of the progress bar
     status = ""
     if isinstance(progress, int):
         progress = float(progress)
@@ -39,167 +39,177 @@ def update_progress(progress):
     if progress >= 1:
         progress = 1
         status = "Done...\r\n"
-    block = int(round(barLength*progress))
-    text = "\rPercent: [{0}] {1}% {2}".format( "#"*block + "-"*(barLength-block), progress*100, status)
+    block = int(round(barLength * progress))
+    text = "\rPercent: [{0}] {1}% {2}".format(
+        "#" * block + "-" * (barLength - block), progress * 100, status)
     sys.stdout.write(text)
     sys.stdout.flush()
 
-def file_len(fname):
 
-  if ".gz" in fname:
-    cat = "zcat"
-  else:
-    cat = "cat"
+def getFileLength(fname):
 
-  p = subprocess.Popen(cat + " " + fname + " | wc -l", shell=True, stdout=subprocess.PIPE,
-                                                                     stderr=subprocess.PIPE)
-  result, err = p.communicate()
-  if p.returncode != 0:
-      raise IOError(err)
-  return int(result.strip().split()[0])
+    if ".gz" in fname:
+        cat = "zcat"
+    else:
+        cat = "cat"
 
-def load_csv(in_file):
+    p = subprocess.Popen(
+        cat + " " + fname + " | wc -l",
+        shell=True,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE)
+    result, err = p.communicate()
+    if p.returncode != 0:
+        raise IOError(err)
+    return int(result.strip().split()[0])
 
-  if ".gz" in in_file:
-    infile = gzip.open(in_file, "r")
-  else:
-    infile = open(in_file, "r")
 
-  return csv.reader(infile, delimiter='\t')
+def loadCSV(in_file):
 
-def write_csv(in_file):
+    if ".gz" in in_file:
+        infile = gzip.open(in_file, "r")
+    else:
+        infile = open(in_file, "r")
 
-  if ".gz" in in_file:
-    infile = gzip.open(in_file, "w")
-  else:
-    infile = open(in_file, "w")
+    return csv.reader(infile, delimiter='\t')
 
-  return csv.writer(infile, delimiter='\t')
 
-def open_csv(in_file):
+def writeCSV(in_file):
 
-  if ".gz" in in_file:
-    infile = gzip.open(in_file, "a+")
-  else:
-    infile = open(in_file, "a+")
+    if ".gz" in in_file:
+        infile = gzip.open(in_file, "w")
+    else:
+        infile = open(in_file, "w")
 
-  return csv.writer(infile, delimiter='\t')
+    return csv.writer(infile, delimiter='\t')
 
-def load_model(model_file):
 
-  if ".pklz" in model_file:
-    modelfile = gzip.open(model_file,"r")
-  else:
-    modelfile = open(model_file,"r")
+def openCSV(in_file):
 
-  model = pickle.load(gzip.open(model_file))
-  return model
+    if ".gz" in in_file:
+        infile = gzip.open(in_file, "a+")
+    else:
+        infile = open(in_file, "a+")
 
-def open_model(model_file):
+    return csv.writer(infile, delimiter='\t')
 
-  if ".pklz" in model_file:
-    modelfile = gzip.open(model_file,"w+")
-  else:
-    modelfile = open(model_file,"w+")
 
-  return modelfile
+def loadModel(model_file):
 
-def read_traces(train_file, nsamples, cut=None, maxsize=50):
+    if ".pklz" in model_file:
+        modelfile = gzip.open(model_file, "r")
+    else:
+        modelfile = open(model_file, "r")
 
-  if type(train_file) == str:
-    csvreader = load_csv(train_file)
-  elif type(train_file) == list:
-    csvreader = train_file
-  else:
-    assert(0)
+    model = pickle.load(gzip.open(model_file))
+    return model
 
-  train_features = []
-  train_programs = []
-  train_classes = []
 
-  #print "Reading and sampling data to train..",
-  if nsamples is None:
-    for i,col in enumerate(csvreader):
+def openModel(model_file):
 
-      if len(col) < 2 or len(col) > 3:
-        print "Ignoring line", i, ":", "\t".join(col)
-        continue
+    if ".pklz" in model_file:
+        modelfile = gzip.open(model_file, "w+")
+    else:
+        modelfile = open(model_file, "w+")
 
-      program = col[0]
-      features = col[1]
-      if len(col) == 3:
-        cl = str(col[2]) #int(col[2])
-      else:
-        cl = -1
+    return modelfile
 
-      raw_trace = features[:-1]
-      trace = raw_trace.split(" ")
-      size = len(trace)
 
-      if cut is None or size < maxsize:
+def readTraces(train_file, nsamples, cut=None, maxsize=50):
 
-        train_programs.append(program)
-        train_features.append(features)
-        train_classes.append(cl)
-      else:
-        for _ in range(cut):
+    if isinstance(train_file, str):
+        csvreader = loadCSV(train_file)
+    elif isinstance(train_file, list):
+        csvreader = train_file
+    else:
+        assert(0)
 
-          #start = random.randint(0,size/2)
-          #end = random.randint(size/2+1, size)
-          start = random.randint(0,size)
-          end = start + maxsize
+    train_features = []
+    train_programs = []
+    train_classes = []
 
-          features = " ".join(trace[start:end+1])
+    # print "Reading and sampling data to train..",
+    if nsamples is None:
+        for i, col in enumerate(csvreader):
 
-          train_programs.append(program)
-          train_features.append(features)
-          train_classes.append(cl)
-  else:
+            if len(col) < 2 or len(col) > 3:
+                print "Ignoring line", i, ":", "\t".join(col)
+                continue
 
-    if type(train_file) == str:
-      train_size = file_len(train_file)
-    elif type(train_file) == list:
-      train_size = len(csvreader)
+            program = col[0]
+            features = col[1]
+            if len(col) == 3:
+                cl = str(col[2])  # int(col[2])
+            else:
+                cl = -1
 
-    #train_size = file_len(train_file)
-    skip_until = random.randint(0,train_size - nsamples)
+            raw_trace = features[:-1]
+            trace = raw_trace.split(" ")
+            size = len(trace)
 
-    for i,col in enumerate(csvreader):
+            if cut is None or size < maxsize:
 
-      if i < skip_until:
-        continue
-      elif i - skip_until == nsamples:
-        break
+                train_programs.append(program)
+                train_features.append(features)
+                train_classes.append(cl)
+            else:
+                for _ in range(cut):
 
-      program = col[0]
-      features = col[1]
-      if len(col) > 2:
-        cl = int(col[2])
-      else:
-        cl = -1
+                    #start = random.randint(0,size/2)
+                    #end = random.randint(size/2+1, size)
+                    start = random.randint(0, size)
+                    end = start + maxsize
 
-      raw_trace = features[:-1]
-      trace = raw_trace.split(" ")
-      size = len(trace)
+                    features = " ".join(trace[start:end + 1])
 
-      if cut is None or size < maxsize:
+                    train_programs.append(program)
+                    train_features.append(features)
+                    train_classes.append(cl)
+    else:
 
-        train_programs.append(program)
-        train_features.append(features)
-        train_classes.append(cl)
-      else:
-        for _ in range(cut):
+        if isinstance(train_file, str):
+            train_size = getFileLength(train_file)
+        elif isinstance(train_file, list):
+            train_size = len(csvreader)
 
-          #start = random.randint(0,size/2)
-          #end = random.randint(size/2+1, size)
-          start = random.randint(0,size-2)
-          end = start + random.randint(1,size-1)
+        #train_size = file_len(train_file)
+        skip_until = random.randint(0, train_size - nsamples)
 
-          features = " ".join(trace[start:end+1])
+        for i, col in enumerate(csvreader):
 
-          train_programs.append(program)
-          train_features.append(features)
-          train_classes.append(cl)
+            if i < skip_until:
+                continue
+            elif i - skip_until == nsamples:
+                break
 
+            program = col[0]
+            features = col[1]
+            if len(col) > 2:
+                cl = int(col[2])
+            else:
+                cl = -1
 
-  return train_programs, train_features, train_classes
+            raw_trace = features[:-1]
+            trace = raw_trace.split(" ")
+            size = len(trace)
+
+            if cut is None or size < maxsize:
+
+                train_programs.append(program)
+                train_features.append(features)
+                train_classes.append(cl)
+            else:
+                for _ in range(cut):
+
+                    #start = random.randint(0,size/2)
+                    #end = random.randint(size/2+1, size)
+                    start = random.randint(0, size - 2)
+                    end = start + random.randint(1, size - 1)
+
+                    features = " ".join(trace[start:end + 1])
+
+                    train_programs.append(program)
+                    train_features.append(features)
+                    train_classes.append(cl)
+
+    return train_programs, train_features, train_classes
diff --git a/vdiscover/Vulnerabilities.py b/vdiscover/Vulnerabilities.py
index 0835d11..7481878 100644
--- a/vdiscover/Vulnerabilities.py
+++ b/vdiscover/Vulnerabilities.py
@@ -17,53 +17,57 @@
 Copyright 2014 by G.Grieco
 """
 
-from Event    import Call, Crash, Abort, Exit, Signal, Vulnerability
+from Event import Call, Crash, Abort, Exit, Signal, Vulnerability
 from Analysis import FindModule
 
+
 def detect_vulnerabilities(preevents, events, process, mm):
 
-  r = []
+    r = []
+
+    for (i, event) in enumerate(events):
+        r.append(detect_vulnerability(preevents, event, process, mm))
 
-  for (i, event) in enumerate(events):
-    r.append(detect_vulnerability(preevents, event, process, mm))
+    return filter(lambda e: e is not None, r)
 
-  return filter(lambda e: e is not None, r)
 
 def detect_vulnerability(preevents, event, process, mm):
 
     if isinstance(event, Call):
 
-      (name, args) = event.GetTypedName()
-      if name == "system" or name == "popen":
-       pass
+        (name, args) = event.GetTypedName()
+        if name == "system" or name == "popen":
+            pass
 
     elif isinstance(event, Abort):
 
-      if len(event.bt) > 0 and len(preevents) > 0:
+        if len(event.bt) > 0 and len(preevents) > 0:
 
-        if not (str(preevents[-1]) in ["free", "malloc", "realloc"]):
-          return None
+            if not (str(preevents[-1]) in ["free", "malloc", "realloc"]):
+                return None
 
-        for (typ, val) in event.bt:
-           module = FindModule(val, mm)
-           if module == "[vdso]":
-             pass
-           elif "libc-" in module:
-             assert(0)
-             return Vulnerability("MemoryCorruption")
-           else:
-             return None
+            for (typ, val) in event.bt:
+                module = FindModule(val, mm)
+                if module == "[vdso]":
+                    pass
+                elif "libc-" in module:
+                    assert(0)
+                    return Vulnerability("MemoryCorruption")
+                else:
+                    return None
 
     elif isinstance(event, Crash):
 
-      if str(event.fp_type[0]) == "DPtr32" and str(event.eip_type[0]) == "DPtr32":
-        return Vulnerability("StackCorruption")
+        if str(
+                event.fp_type[0]) == "DPtr32" and str(
+                event.eip_type[0]) == "DPtr32":
+            return Vulnerability("StackCorruption")
 
-      for (typ,val) in event.bt:
-        if str(typ) == "DPtr32":
-          return Vulnerability("StackCorruption")
+        for (typ, val) in event.bt:
+            if str(typ) == "DPtr32":
+                return Vulnerability("StackCorruption")
 
     elif isinstance(event, Signal):
-      pass
+        pass
 
     return None
diff --git a/vdp b/vdp
index e2a1be5..c5ee9f6 100755
--- a/vdp
+++ b/vdp
@@ -27,26 +27,36 @@ import random
 
 from subprocess import Popen, PIPE, STDOUT
 
-from vdiscover.Detection  import GetArgs, GetFiles, GetCmd
-from vdiscover.Mutation   import NullMutator, RandomByteMutator, RandomExpanderMutator, RandomInputMutator, opened_files
-from vdiscover.Input      import prepare_inputs
+from vdiscover.Detection import GetArgs, GetFiles, GetCmd
+from vdiscover.Mutation import NullMutator, RandomByteMutator, RandomExpanderMutator, RandomInputMutator, opened_files
+from vdiscover.Input import prepare_inputs
 
 
 if __name__ == "__main__":
 
     # To help argparse to detect the number of columns correctly
-    #os.environ['COLUMNS'] = str(os.popen('stty size', 'r').read().split()[1]) #str(shutil.get_terminal_size().columns)
+    # os.environ['COLUMNS'] = str(os.popen('stty size',
+    # 'r').read().split()[1]) #str(shutil.get_terminal_size().columns)
 
     # Arguments
-    parser = argparse.ArgumentParser(description='Vulnerability Detection Procedure')
-    parser.add_argument("testcase", help="Testcase to analyze", type=str, default=None)
-    parser.add_argument("cmd", help="Testcase to analyze", type=str, default=None)
-
-    parser.add_argument("--io-mode",
-                        help="Input-Output mode", action="store_true", default=False)
-
-    parser.add_argument("--seed-range", type=str,
-                        help="A seed range to feed the cmd (io mode only)", default="0:0")
+    parser = argparse.ArgumentParser(
+        description='Vulnerability Detection Procedure')
+    parser.add_argument(
+        "testcase", help="Testcase to analyze", type=str, default=None)
+    parser.add_argument("cmd", help="Testcase to analyze",
+                        type=str, default=None)
+
+    parser.add_argument(
+        "--io-mode",
+        help="Input-Output mode",
+        action="store_true",
+        default=False)
+
+    parser.add_argument(
+        "--seed-range",
+        type=str,
+        help="A seed range to feed the cmd (io mode only)",
+        default="0:0")
 
     parser.add_argument("--show-stdout",
                         help="Don't use /dev/null as stdout/stderr",
@@ -59,10 +69,9 @@ if __name__ == "__main__":
     parser.add_argument("--timeout", dest="timeout", type=int,
                         help="Timeout in seconds (io mode only)", default=3)
 
-
     options = parser.parse_args()
 
-    start_seed,stop_seed = tuple(options.seed_range.split(":"))
+    start_seed, stop_seed = tuple(options.seed_range.split(":"))
     testcase = options.testcase
     vdp_cmd = options.cmd
     show_stdout = options.show_stdout
@@ -84,33 +93,42 @@ if __name__ == "__main__":
     prepared_inputs = prepare_inputs(original_input)
 
     if show_cmd:
-      print vdp_cmd,program," ".join(prepared_inputs)
-      exit(0)
+        print vdp_cmd, program, " ".join(prepared_inputs)
+        exit(0)
 
     if io_mode:
 
-      DEVNULL = open(os.devnull, 'wb')
-      in_filename = files[0].GetName()
-      out_filename = files[0].GetFilename()
-      vdp_cmd = vdp_cmd.replace("<input>", in_filename)
-      vdp_cmd = vdp_cmd.replace("<output>", out_filename)
-
-      for seed in xrange(int(start_seed),int(stop_seed)):
-        cmd = vdp_cmd.replace("<seed>",str(seed))
-        p = Popen(cmd.split(" "), stdin=PIPE, stdout=DEVNULL, stderr=DEVNULL, env=dict())
-        p.communicate()
-        p = Popen(["timeout","-k","1",str(timeout),program]+prepared_inputs, stdin=PIPE, stdout=DEVNULL, stderr=DEVNULL, env=dict())
-        p.communicate()
-
-        if p.returncode < 0:
-          print >> sys.stderr, testcase, p.returncode, seed
-          exit(1)
-
-      exit(0)
+        DEVNULL = open(os.devnull, 'wb')
+        in_filename = files[0].GetName()
+        out_filename = files[0].GetFilename()
+        vdp_cmd = vdp_cmd.replace("<input>", in_filename)
+        vdp_cmd = vdp_cmd.replace("<output>", out_filename)
+
+        for seed in xrange(int(start_seed), int(stop_seed)):
+            cmd = vdp_cmd.replace("<seed>", str(seed))
+            p = Popen(cmd.split(" "), stdin=PIPE,
+                      stdout=DEVNULL, stderr=DEVNULL, env=dict())
+            p.communicate()
+            p = Popen(["timeout",
+                       "-k",
+                       "1",
+                       str(timeout),
+                       program] + prepared_inputs,
+                      stdin=PIPE,
+                      stdout=DEVNULL,
+                      stderr=DEVNULL,
+                      env=dict())
+            p.communicate()
+
+            if p.returncode < 0:
+                print >> sys.stderr, testcase, p.returncode, seed
+                exit(1)
+
+        exit(0)
 
     else:
-      p = Popen(vdp_cmd.split(" ")+[program]+prepared_inputs, stdin=PIPE, env=dict())
-      p.communicate()
+        p = Popen(vdp_cmd.split(" ") + [program] +
+                  prepared_inputs, stdin=PIPE, env=dict())
+        p.communicate()
 
     exit(p.returncode)
-     
diff --git a/vpredictor b/vpredictor
index c3dd938..674fd19 100755
--- a/vpredictor
+++ b/vpredictor
@@ -25,25 +25,32 @@ import sys
 import csv
 
 csv.field_size_limit(sys.maxsize)
-sys.setrecursionlimit(1024*1024*1024)
+sys.setrecursionlimit(1024 * 1024 * 1024)
 
 from vdiscover.Pipeline import *
-from vdiscover.Recall  import Recall
-from vdiscover.Train  import Train
+from vdiscover.Recall import Recall
+from vdiscover.Train import Train
 
 if __name__ == "__main__":
 
     # Arguments
-    parser = argparse.ArgumentParser(description='A trainer and predictor of vulnerabilities')
-    parser.add_argument("infile", help="A csv with the features to train or predict", type=str, default=None)
+    parser = argparse.ArgumentParser(
+        description='A trainer and predictor of vulnerabilities')
+    parser.add_argument(
+        "infile",
+        help="A csv with the features to train or predict",
+        type=str,
+        default=None)
 
     parser.add_argument("--model", type=str,
                         help="Use a pretrained model (recall only)",
                         action="store", default=None)
 
-    parser.add_argument("--prob",
-                        help="Output the probability of each prediction (recall only)",
-                        action="store_true", default=False)
+    parser.add_argument(
+        "--prob",
+        help="Output the probability of each prediction (recall only)",
+        action="store_true",
+        default=False)
 
     parser.add_argument("--test",
                         help="Test a model using infile (recall only)",
@@ -53,7 +60,6 @@ if __name__ == "__main__":
                         help="Test a model using infile (recall only)",
                         action="store_true", default=False)
 
-
     parser.add_argument("--static",
                         help="Use static features",
                         action="store_true", default=False)
@@ -62,45 +68,59 @@ if __name__ == "__main__":
                         help="Use dynamic features",
                         action="store_true", default=False)
 
-    parser.add_argument("--valid",
-                        help="Valid a model using infile",
-                        action="store", default=None)
+    #parser.add_argument("--valid",
+    #                    help="Valid a model using infile",
+    #                    action="store", default=None)
 
-    parser.add_argument("--cluster-with-repr",
-                        help="Cluster input traces using some representation (bow, doc2vec)",
-                        action="store", default=None)
+    #parser.add_argument(
+    #    "--cluster-with-repr",
+    #    help="Cluster input traces using some representation (bow, doc2vec)",
+    #    action="store",
+    #    default=None)
 
-    parser.add_argument("--cluster-with-rdim",
-                        help="Cluster input traces reducing dimensionality (pca, svd, none)",
-                        action="store", default="pca")
+    #parser.add_argument(
+    #    "--cluster-with-rdim",
+    #    help="Cluster input traces reducing dimensionality (pca, svd, none)",
+    #    action="store",
+    #    default="pca")
 
-    #parser.add_argument("--cluster-doc2vec",
+    # parser.add_argument("--cluster-doc2vec",
     #                    help="Cluster input traces using doc2vec",
     #                    action="store_true", default=False)
 
-    parser.add_argument("--cluster-param", type=float,
-                        help="Cluster parameter",
-                        action="store", default=0.1)
+    #parser.add_argument("--cluster-param", type=float,
+    #                    help="Cluster parameter",
+    #                    action="store", default=0.1)
 
-    parser.add_argument("--cluster-cnn",
-                        help="Cluster input traces using a convolutional model",
-                        action="store_true", default=False)
+    #parser.add_argument(
+    #    "--cluster-cnn",
+    #    help="Cluster input traces using a convolutional model",
+    #    action="store_true",
+    #    default=False)
 
-    parser.add_argument("--train-rf",
-                        help="Train a Random Forest using infile",
+    parser.add_argument("--train",
+                        help="Train a model using a random forest",
                         action="store_true", default=False)
 
-    #parser.add_argument("--train-lstm",
+    parser.add_argument("--vect", type=str,
+                        help="Which technique use to vectorize traces",
+                        action="store", default="bow")
+
+
+    # parser.add_argument("--train-lstm",
     #                    help="Train a LSTM using infile (warning: very experimental and slow)",
     #                    action="store_true", default=False)
 
-    #parser.add_argument("--train-cnn",
+    # parser.add_argument("--train-cnn",
     #                    help="Train a CNN using infile",
     #                    action="store_true", default=False)
 
-    parser.add_argument("--n-samples", type=int,
-                        help="Select a number of samples from infile (train only)",
-                        action="store", default=None)
+    parser.add_argument(
+        "--n-samples",
+        type=int,
+        help="Select a number of samples from infile (train only)",
+        action="store",
+        default=None)
 
     parser.add_argument("--out-file",
                         help="File to output the results/model",
@@ -108,21 +128,24 @@ if __name__ == "__main__":
 
     options = parser.parse_args()
     in_file = options.infile
-    valid_file = options.valid
+    vector_type = options.vect
+
+    #valid_file = options.valid
 
     test_simple = options.test
     test_aggr = options.test_aggr
 
-    training_mode_rf = options.train_rf
+    #training_mode_rf = options.train
     #training_mode_lstm = options.train_lstm
     #training_mode_cnn = options.train_cnn
 
-    training_mode_cluster_repr = options.cluster_with_repr
+    #training_mode_cluster_repr = options.cluster_with_repr
 
-    cluster_rdim = options.cluster_with_rdim
-    cluster_param = options.cluster_param
+    #cluster_rdim = options.cluster_with_rdim
+    #cluster_param = options.cluster_param
 
-    training_mode = training_mode_rf or training_mode_cluster_repr #training_mode_cluster_bow or training_mode_cluster_cnn or training_mode_cluster_doc2vec
+    # training_mode_cluster_bow or training_mode_cluster_cnn or training_mode_cluster_doc2vec
+    training_mode = options.train #training_mode_rf or training_mode_cluster_repr
 
     probability_mode = options.prob
     nsamples = options.n_samples
@@ -133,25 +156,27 @@ if __name__ == "__main__":
     out_file = options.out_file
     model_file = options.model
 
-    if (not static_only and not dynamic_only) or (static_only and dynamic_only):
-      print "VDiscover requires to select either static of dynamic features exclusively"
-      exit(-1)
+    if (not static_only and not dynamic_only) or (
+            static_only and dynamic_only):
+        print "VDiscover requires to select either static of dynamic features exclusively"
+        exit(-1)
     elif static_only:
-      ftype = "static"
+        features_type = "static"
     elif dynamic_only:
-      ftype = "dynamic"
+        features_type = "dynamic"
 
     if training_mode:
-      if training_mode_rf:
-        Train(out_file, in_file, valid_file, "rf", ftype, nsamples)
+        model_type = "rf"
+        Train(out_file, in_file, None, model_type, vector_type, features_type, nsamples)
 
-      elif training_mode_cluster_repr:
-        cluster_repr = training_mode_cluster_repr
-        from vdiscover.Cluster  import ClusterScikit
+        #elif training_mode_cluster_repr:
+        #    cluster_repr = training_mode_cluster_repr
+        #    from vdiscover.Cluster import ClusterScikit
+        #
+        #    ClusterScikit(None, in_file, valid_file, ftype, nsamples,
+        #                  cluster_repr, cluster_rdim, cluster_param)
 
-        ClusterScikit(None, in_file, valid_file, ftype, nsamples, cluster_repr, cluster_rdim, cluster_param)
-
-      """
+        """
       elif training_mode_cluster_cnn:
 
         if (model_file is None):
@@ -165,14 +190,15 @@ if __name__ == "__main__":
       """
 
     else:
-      if model_file is None:
-        print "VDiscover requires a pre-trained model to predict"
-        exit(-1)
-
-      test_mode = None
-      if test_simple:
-        test_mode = "simple"
-      elif test_aggr:
-        test_mode = "aggregated"
-
-      Recall(model_file, in_file, ftype, out_file, test_mode, probability=probability_mode)
+        if model_file is None:
+            print "VDiscover requires a pre-trained model to predict"
+            exit(-1)
+
+        test_mode = None
+        if test_simple:
+            test_mode = "simple"
+        elif test_aggr:
+            test_mode = "aggregated"
+
+        Recall(model_file, in_file, features_type, out_file,
+               test_mode, probability=probability_mode)