From ab9224dab0033c674f9dcdd87a7d58469e61cf84 Mon Sep 17 00:00:00 2001 From: gaa-cifasis Date: Sat, 17 Oct 2015 22:03:53 +0000 Subject: [PATCH 1/2] fixes for eko --- tcreator | 8 ++++---- vdiscover/Pipeline.py | 9 +++++++++ vdiscover/Recall.py | 8 +++----- vdiscover/Train.py | 6 +++--- vdiscover/Utils.py | 8 ++++++++ 5 files changed, 27 insertions(+), 12 deletions(-) diff --git a/tcreator b/tcreator index 35cea91..f3edd73 100755 --- a/tcreator +++ b/tcreator @@ -30,12 +30,12 @@ concatenate = lambda *lists: reduce((lambda a,b: a.extend(b) or a),lists,[]) if __name__ == "__main__": # Arguments - parser = argparse.ArgumentParser(description='A script to create new test cases using a name and a command line') - parser.add_argument("--name", help="A csv with the features to train or predict", type=str, default=None) + parser = argparse.ArgumentParser(description='A small utility to create new test cases using a name and a command line') + parser.add_argument("--name", help="The name of the ", type=str, default=None) parser.add_argument("--cmd", help="Command-line to execute", type=str, default=None) parser.add_argument("--batch", help="A csv with the command lines", type=str, default=None) - parser.add_argument("--copy", help="A csv with the features to train or predict", action='store_true', default=False) + parser.add_argument("--copy", help="Force the copy of the files in command lines instead of symbolic linking", action='store_true', default=False) parser.add_argument("outdir", help="Output directory to write testcases", type=str, default=None) @@ -79,7 +79,7 @@ if __name__ == "__main__": if arg <> '': pargs = pargs + arg #args = concatenate(args) - print pargs + print "Procesing '" + " ".join(pargs) + "'" #args = filter(lambda x: x is not '', cmd.split(" ")) WriteTestcase(name,pargs[0],pargs[1:], copy) diff --git a/vdiscover/Pipeline.py b/vdiscover/Pipeline.py index 9120636..a428c9d 100644 --- a/vdiscover/Pipeline.py +++ b/vdiscover/Pipeline.py @@ -47,6 +47,9 @@ def fit_transform(self, X, y=None, **fit_params): def fit(self, X, y=None, **fit_params): return self + def get_params(self, deep=True): + return [] + class ItemSelector(BaseEstimator, TransformerMixin): @@ -59,6 +62,9 @@ def fit(self, x, y=None): def transform(self, data_dict): return data_dict[self.key] + def get_params(self, deep=True): + return [] + class CutoffMax(BaseEstimator, TransformerMixin): def __init__(self, maxv): @@ -72,6 +78,9 @@ def transform(self, X, y=None, **fit_params): X[self.pos] = self.maxv return X + def get_params(self, deep=True): + return [] + def make_train_pipeline(ftype): diff --git a/vdiscover/Recall.py b/vdiscover/Recall.py index 0618391..b5d63a1 100644 --- a/vdiscover/Recall.py +++ b/vdiscover/Recall.py @@ -11,10 +11,7 @@ def Recall(model_file, in_file, in_type, out_file, test_mode, probability=False): model = load_model(model_file) - #csvreader = open_csv(in_file) - - outfile = open_csv(out_file) - csvwriter = csv.writer(outfile, delimiter='\t') + csvwriter = write_csv(out_file) x = dict() @@ -39,8 +36,9 @@ def Recall(model_file, in_file, in_type, out_file, test_mode, probability=False) else: err = recall_score(test_classes, predicted_classes, average=None) - print err[0], err[1], sum(err)/2.0 print classification_report(test_classes, predicted_classes) + print "Errors per class:", err[0], err[1] + print "Average error:", sum(err)/2.0 elif test_mode == "aggregated": diff --git a/vdiscover/Train.py b/vdiscover/Train.py index e9a8d04..3bc9a13 100644 --- a/vdiscover/Train.py +++ b/vdiscover/Train.py @@ -37,9 +37,9 @@ def TrainScikitLearn(model_file, train_file, valid_file, ftype, nsamples): model = make_train_pipeline(ftype) model.fit(train_dict,train_classes) - print "Resulting model:" - print model - print confusion_matrix(train_classes, model.predict(train_dict)) + print "Done!" + #print model + #print confusion_matrix(train_classes, model.predict(train_dict)) print "Saving model to",model_file modelfile.write(pickle.dumps(model)) diff --git a/vdiscover/Utils.py b/vdiscover/Utils.py index da2b427..1eff7b6 100644 --- a/vdiscover/Utils.py +++ b/vdiscover/Utils.py @@ -46,6 +46,14 @@ def load_csv(in_file): return csv.reader(infile, delimiter='\t') +def write_csv(in_file): + + if ".gz" in in_file: + infile = gzip.open(in_file, "w") + else: + infile = open(in_file, "w") + + return csv.writer(infile, delimiter='\t') def open_csv(in_file): From c7e46f683e83c20cf6571e4ea18e2cad78cd427e Mon Sep 17 00:00:00 2001 From: gaa-cifasis Date: Sun, 18 Oct 2015 20:42:14 +0000 Subject: [PATCH 2/2] typo in error printing --- vdiscover/Recall.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vdiscover/Recall.py b/vdiscover/Recall.py index b5d63a1..0a51a14 100644 --- a/vdiscover/Recall.py +++ b/vdiscover/Recall.py @@ -37,8 +37,8 @@ def Recall(model_file, in_file, in_type, out_file, test_mode, probability=False) err = recall_score(test_classes, predicted_classes, average=None) print classification_report(test_classes, predicted_classes) - print "Errors per class:", err[0], err[1] - print "Average error:", sum(err)/2.0 + print "Accuracy per class:", round(err[0],2), round(err[1],2) + print "Average accuracy:", round(sum(err)/2.0,2) elif test_mode == "aggregated":