-
Notifications
You must be signed in to change notification settings - Fork 125
/
propti_analyse.py
1356 lines (1072 loc) · 46.3 KB
/
propti_analyse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python3
import os
import pandas as pd
import pickle
import logging
import argparse
import sys
import copy
import shutil
import propti.basic_functions as pbf
# import matplotlib.pyplot as plt
import propti as pr
import propti.propti_monitor as pm
import propti.propti_post_processing as ppm
parser = argparse.ArgumentParser()
parser.add_argument("root_dir", type=str,
help="optimisation root directory (location of the 'propti_db.csv', e.g. '.')",
default='.')
parser.add_argument("--inspect_init",
help="provide overview over the data stored in the "
"'pickle.init' file",
action="store_true")
parser.add_argument("--create_best_input",
help="Creates simulation input file with "
"best parameter set",
action="store_true")
parser.add_argument("--run_best",
help="run simulation(s) with best parameter set",
action="store_true")
parser.add_argument("--plot_fitness_development",
help="Scatter plot of fitness values", action="store_true")
parser.add_argument("--plot_para_values",
help="plot like and values", action="store_true")
parser.add_argument("--dump_plots",
help="plot like and values", action="store_true")
parser.add_argument("--calc_stat",
help="calculate statistics", action="store_true")
parser.add_argument("--plot_best_sim_exp",
help="plot results of the simulation of the best parameter "
"set and the experimental data to be compared with",
action="store_true")
parser.add_argument("--plot_best_para_gen",
help="Plots the value of the best parameter set for each"
"parameter, by generation.",
action="store_true")
parser.add_argument("--plot_fit_semilogx",
help="Plots fitness values with semi-log x scale.",
action="store_true")
parser.add_argument("--extract_data",
help="Extracts parameter data, based on fitness values.",
action="store_true")
parser.add_argument("--extractor_sim_input",
help="Creates input files, based on the resulting file "
"from the data extractor.",
action="store_true")
parser.add_argument("--create_case_input",
help="Creates input files for user cases, based on the "
"resulting file from the data extractor.",
action="store_true")
parser.add_argument("--clean_db",
help="Removes restart markers from the database file.",
action="store_true")
parser.add_argument("--func_test",
help="Executes test function for testing purpose",
action="store_true")
parser.add_argument("--plot_para_vs_fitness",
help="Plots each parameter against the fitness values, "
"colour coded by repetition.",
action="store_true")
# Prototyping of ne analysis script.
parser.add_argument("--create_new_database",
help="Creates a new database file from the spotpy data "
"base CSV (propti_db.csv)",
action="store_true")
cmdl_args = parser.parse_args()
ver = None # type: pr.Version
setups = None # type: pr.SimulationSetupSet
ops = None # type: pr.ParameterSet
optimiser = None # type: pr.OptimiserProperties
def check_directory(dir_list):
"""
Take a list of directory names (strings) and attach them to the root
path. Check if this path exists, if not create it.
:param dir_list: List containing the directory names, as string.
:return: New file path, based on files root and user input.
"""
# Set up new path.
new_dir = os.path.join(cmdl_args.root_dir)
for directory in dir_list:
new_dir = os.path.join(new_dir, directory)
# Check if the new path exists, otherwise create it.
if not os.path.exists(new_dir):
os.makedirs(new_dir)
# Return new path for further usage.
return new_dir
# Names of sub-directories that are used to contain the results of the
# analysis.
p1, p2, p3 = 'Analysis', 'Plots', 'Extractor'
print("")
print("* Loading information of the optimisation process.")
print("----------------------")
# Check if `propti.pickle.finish` exists, else use `propti.pickle.init`.
if os.path.isfile(os.path.join(cmdl_args.root_dir, 'propti.pickle.finished')):
pickle_file = os.path.join(cmdl_args.root_dir, 'propti.pickle.finished')
elif os.path.isfile(os.path.join(cmdl_args.root_dir, 'propti.pickle.init')):
pickle_file = os.path.join(cmdl_args.root_dir, 'propti.pickle.init')
else:
sys.exit("Neither 'propti.pickle.finished' nor 'propti.pickle.init' "
"detected. Script execution stopped.")
in_file = open(pickle_file, 'rb')
#######################################################
# TODO: Enable better backwards compatibility than the following:
pickle_items = []
for item in pickle.load(in_file):
pickle_items.append(item)
in_file.close()
p_length = len(pickle_items)
print('Pickle length: {}'.format(p_length))
if p_length == 3:
setups, ops, optimiser = pickle_items
elif p_length == 4:
ver, setups, ops, optimiser = pickle_items
else:
print('The init-file is incompatible '
'with this version of propti_analyse.')
#
#######################################################
print("Loading complete.")
# Check if all components are there, otherwise write message to the log file.
if ver is None:
logging.critical("* Version(s) not defined. Legacy '*.pickle.init' file?")
if setups is None:
logging.critical("* Simulation setups are not defined.")
if ops is None:
logging.critical("* Optimisation parameters are not defined.")
if optimiser is None:
logging.critical("* Optimiser parameters are not defined.")
# TODO: define spotpy db file name in optimiser properties
# TODO: use placeholder as name? or other way round?
##########################
# Inspect PROPTI Init File
if cmdl_args.inspect_init:
"""
Calls the various print methods of the respective PROPTI objects and
prints their content in human-readable form.
Used to check how the IMP is set up (content of the 'propti.pickle.init').
"""
db_file_name = os.path.join(cmdl_args.root_dir,
'{}.{}'.format(optimiser.db_name,
optimiser.db_type))
print("")
print("* Inspection of the 'pickle.init' content")
print("----------------------")
print("* Version(s):")
print(ver)
print("* Simulation Setups:")
print(setups)
print("* Optimisation Parameters:")
print(ops)
print("* Optimiser Settings:")
print(optimiser)
print("")
print("")
######################################
# Run Simulation of Best Parameter Set
if cmdl_args.run_best:
"""
Extracts the best parameter set from the data base and writes it into a
copy of the simulation input template. Afterwards, the simulation is
executed.
"""
db_file_name = os.path.join(cmdl_args.root_dir,
'{}.{}'.format(optimiser.db_name,
optimiser.db_type))
# Check if a directory for the result files exists. If not, create it.
results_dir = check_directory([p1, 'RunBestPara'])
print("")
print("* Run simulation(s) of best parameter set")
print("----------------------")
pr.run_best_para(setups, ops, optimiser, pickle_file)
print("")
print("")
###################
# Create Best Input
if cmdl_args.create_best_input:
"""
Takes the (up to now) best parameter set from the optimiser data base and
reads the corresponding parameter values. The parameter values are written
into the simulation input file and saved as `*_bestpara.file-type`.
This functionality is focused on the usage of SPOTPY.
"""
print("")
print("* Create input file with best parameter set")
print("----------------------")
print("Reading data base file, please wait...")
print("")
# Read data base file name from the pickle file.
db_file_name = os.path.join(cmdl_args.root_dir,
'{}.{}'.format(optimiser.db_name,
optimiser.db_type))
# Determine the best fitness value and its location in the data base.
print("* Locate best parameter set:")
print("---")
fitness_values = pd.read_csv(db_file_name, usecols=['like1'])
best_fitness_index = fitness_values.idxmin().iloc[0]
best_fitness_value = fitness_values.min().iloc[0]
print("Best fitness index: line {}".format(best_fitness_index))
print("Best fitness value: {}".format(best_fitness_value))
print("---")
print("")
# Check if a directory for the result files exists. If not, create it.
results_dir = check_directory([p1, p3, 'CurrentBestParameter',
'Repetition_{}'.format(best_fitness_index)])
# Collect simulation setup names.
print("* Collect simulation setup names:")
print("---")
sim_setup_names = []
for ssn in range(len(setups)):
ssn_value = setups[ssn].name
sim_setup_names.append(ssn_value)
print("Setup {}: {}".format(ssn, ssn_value))
print("---")
print("")
# Collect the optimisation parameter names. Change format to match column
# headers in propti_db, based on SPOTPY definition. Store headers in a list.
cols = []
for p in ops:
cols.append("par{}".format(p.place_holder))
# Collect parameter names.
print("* Collect parameter names and place holders:")
print("---")
para_names = []
para_simsetup_complete = []
para_name_list = []
for s_i in range(len(setups)):
# Place holder list.
para_ph_list = []
# Collect meta parameters, those which describe the simulation setup.
# First, find all parameters and place holders.
para_meta_simsetup = []
for s_j in range(len(setups[s_i].model_parameter.parameters)):
paras = setups[s_i].model_parameter.parameters
# Parameter names.
para_name = paras[s_j].name
para_name_list.append(para_name)
# Place holders.
para_ph = paras[s_j].place_holder
para_ph_list.append(para_ph)
# Compare the place holders with the optimisation parameters, to
# determine if they are meta parameters.
p_i = 'par{}'.format(para_ph)
if p_i not in cols:
# Store meta parameters (place holder and value) in list.
para_meta_simsetup.append([para_ph, paras[s_j].value])
print('Name: {}'.format(para_name))
print('Place holder: {}'.format(para_ph))
print("---")
# Put meta lists into list which mirrors the simulation setups.
para_simsetup_complete.append(para_meta_simsetup)
print("")
print("* Extract best parameter set")
print("---")
print("Read data base file, please wait...")
print("")
# Read PROPTI data base.
parameter_values = pd.read_csv(db_file_name, usecols=cols)
print("Best parameter values:")
print("---")
# Extract the parameter values of the best set. Store place holder and
# parameter values in lists.
opti_para = []
for i in range(len(cols)):
new_para_value = parameter_values.at[best_fitness_index, cols[i]]
print("{}: {}".format(para_name_list[i], new_para_value))
opti_para.append([cols[i][3:], new_para_value])
# Append optimisation parameter place holders and values to the parameter
# lists, sorted by simulation setups.
for pssc in para_simsetup_complete:
for para in opti_para:
pssc.append(para)
# print("para complete: {}".format(para_simsetup_complete))
print("")
# Load templates from each simulation setup, fill in the values and write
# the new input files in the appropriate directories.
print("* Fill templates")
print("--------------")
# Counter of simulation setups.
css = 0
for simsetup in sim_setup_names:
# Create new directories, based on simulation setup names.
check_directory([results_dir, simsetup])
# Load template.
template_file_path = setups[css].model_template
temp_raw = pbf.read_template(template_file_path)
# Create new input files with best parameters,
# based on simulation setups.
for bestpara in para_simsetup_complete[css]:
print("Best para: {}".format(bestpara))
new_para_value = bestpara[1]
# Account for scientific notation of floats.
if type(new_para_value) == float:
temp_raw = temp_raw.replace("#" + bestpara[0] + "#",
"{:E}".format(new_para_value))
else:
temp_raw = temp_raw.replace("#" + bestpara[0] + "#",
str(new_para_value))
# Write new input file with best parameters.
bip = os.path.join(results_dir, simsetup,
simsetup + '_rep{}.fds'.format(best_fitness_index))
pbf.write_input_file(temp_raw, bip)
print("---")
# Advance counter.
css += 1
print("")
print("Simulation input file, based on best parameter set, was written.")
print("* Task finished.")
print("")
print("")
##########################
# Plot Fitness Development
if cmdl_args.plot_fitness_development:
"""
Scatter plot of fitness value (RMSE) development. It reads the PROPTI data
base file, based on information stored in the pickle file.
This functionality is focused on the usage of SPOTPY.
"""
print("")
print("* Plot fitness values.")
print("----------------------")
db_file_name = os.path.join(cmdl_args.root_dir,
'{}.{}'.format(optimiser.db_name,
optimiser.db_type))
# Check if a directory for the result files exists. If not create it.
results_dir = check_directory(['Analysis', 'Plots', 'Scatter'])
# Extract data to be plotted.
cols = ['like1']
data = pd.read_csv(db_file_name, usecols=cols)
# Scatter plots of parameter development over the whole run.
pr.plot_scatter(cols[0], data,
'Fitness value development', 'FitnessDevelopment',
results_dir, 'Root Mean Square Error (RMSE)')
print("Plot(s) have been created.")
print("")
print("")
####################################
# Plot Development of All Parameters
if cmdl_args.plot_para_values:
"""
This functionality is deprecated!
Creates scatter plots of the development of each parameter over the
optimisation process. It reads the propti data
base file, based on information stored in the pickle file.
This functionality is focused on the usage of SPOTPY.
"""
# TODO: Check for optimisation algorithm
# TODO: Adjust output depending on optimisation algorithm
print("")
print("* Plot likes and values.")
print("----------------------")
db_file_name = os.path.join(cmdl_args.root_dir,
'{}.{}'.format(optimiser.db_name,
optimiser.db_type))
# Check if a directory for the result files exists. If not create it.
results_dir_scatter = check_directory(['Analysis', 'Plots', 'Scatter'])
results_dir_boxplot = check_directory(['Analysis', 'Plots', 'Boxplot'])
results_dir_histogram = check_directory(['Analysis', 'Plots', 'Histogram'])
results_dir_para_gen = check_directory(['Analysis', 'Plots', 'Para_Gen'])
results_dir_log = check_directory(['Analysis', 'Plots', 'Log'])
# Extract data to be plotted.
cols = ['like1', 'chain']
for p in ops:
cols.append("par{}".format(p.place_holder))
data = pd.read_csv(db_file_name, usecols=cols)
# Scatter plots of parameter development over the whole run.
for c in cols[2:]:
# Scatter plots of parameter development over the whole run.
pr.plot_scatter(c, data, 'Parameter development: ', c,
results_dir_scatter, version=ver.ver_propti)
# Histogram plots of parameters
pr.plot_hist(c, data, 'Histogram per generation for: ' + c,
c, results_dir_histogram, y_label=None)
# Scatter plot of fitness values.
pr.plot_scatter('like1', data, 'Fitness value development',
'FitnessDevelopment', results_dir_scatter,
'Root Mean Square Error (RMSE)',
version=ver.ver_propti)
# Plot values of best parameter set, by generation.
pm.plot_best_para_generation(cols, data, len(ops), optimiser.ngs,
results_dir_para_gen)
# Plot fitness semi-log x.
pm.plot_semilogx_scatter('like1', data, 'Fitness value development',
'FitnessDevelopment', results_dir_log,
'Root Mean Square Error (RMSE)',
version=ver.ver_propti)
# Box plot to visualise steps (generations).
pr.plot_box_rmse(data, 'Fitness values, histogram per step (generation)',
len(ops),
optimiser.ngs,
'FitnessDevelopment', results_dir_boxplot)
print("Plots have been created.")
print("")
print("")
####################################
# Plot Development of All Parameters
if cmdl_args.dump_plots:
"""
Creates scatter plots of the development of each parameter over the
optimisation process. It reads the propti data
base file, based on information stored in the pickle file.
This functionality is focused on the usage of SPOTPY.
"""
# TODO: Check for optimisation algorithm
# TODO: Adjust output depending on optimisation algorithm
print("")
print("* Plot 'Likes' and Values.")
print("----------------------")
db_file_name = os.path.join(cmdl_args.root_dir,
'{}.{}'.format(optimiser.db_name,
optimiser.db_type))
# Check if a directory for the result files exists. If not create it.
results_dir_scatter = check_directory([p1, p2, 'Scatter'])
results_dir_boxplot = check_directory([p1, p2, 'Boxplot'])
results_dir_para_gen = check_directory([p1, p2, 'Para_Gen'])
results_dir_para_vs_fit = check_directory([p1, p2, 'Para_vs_Fitness'])
# Extract data to be plotted.
cols = ['like1', 'chain']
pars = list()
for parameter in ops:
par_label = "par{}".format(parameter.place_holder)
cols.append(par_label)
pars.append(par_label)
# Read data for the plots.
data = pd.read_csv(db_file_name, usecols=cols)
# Start plotting.
# ---------------
# Scatter plots of parameter development over the whole run.
for c in cols[2:]:
# Scatter plots of parameter development over the whole run.
pr.plot_scatter(c, data, 'Parameter development: ', c,
results_dir_scatter, version=ver.ver_propti)
# Scatter plot of fitness values.
pr.plot_scatter('like1', data, 'Fitness value development',
'FitnessDevelopment', results_dir_scatter,
'Root Mean Square Error (RMSE)',
version=ver.ver_propti)
# Plot values of best parameter set, by generation.
pm.plot_best_para_generation(cols, data, len(ops), optimiser.ngs,
results_dir_para_gen)
# Box plot to visualise steps (generations).
pr.plot_box_rmse(data, 'Fitness values, histogram per step (generation)',
len(ops),
optimiser.ngs,
'FitnessDevelopment', results_dir_boxplot)
# Plot the parameter values against the fitness, colour coded by
# repetition.
pr.plot_para_vs_fitness(data_frame=data,
fitness_label=cols[0],
parameter_labels=pars,
file_path=results_dir_para_vs_fit,
version=ver.ver_propti)
print("Plots have been created.")
print("")
print("")
if cmdl_args.calc_stat:
"""
This functionality is very much work in progress.
"""
# TODO: write statistics data to file
print("")
print("- calculate statistics")
print("----------------------")
db_file_name = os.path.join(cmdl_args.root_dir,
'{}.{}'.format(optimiser.db_name,
optimiser.db_type))
for s in setups:
cols = []
lab = ['like1']
for p in ops:
cols.append("par{}".format(p.place_holder))
lab.append("par{}".format(p.place_holder))
data_raw = pd.read_csv(db_file_name, usecols=cols)
data = []
for i in cols:
data.append(data_raw[i])
fname = s.analyser_input_file
with open(fname) as f:
content = f.readlines()
for line in content:
if 'pearson_coeff' in line:
pear_coeff = True
if pear_coeff is True:
print('Pearson coefficient matrix for the whole run:')
mat = pr.calc_pearson_coefficient(data)
print('')
data_fit = pd.read_csv(db_file_name, usecols=lab)
# print(data_fit.head())
# print('')
data_fit['like1'].tolist()
t = pr.collect_best_para_multi(db_file_name, lab)
# print(t)
print('')
best_para_sets = []
for i in cols:
best_para_sets.append(t[i])
print('Pearson coefficient matrix for the best parameter collection:')
mat_best_collection = pr.calc_pearson_coefficient(best_para_sets)
print('')
if cmdl_args.plot_best_sim_exp:
# TODO: write statistics data to file
print("")
print("- plot best simulation and experimental data")
print("----------------------")
db_file_name = os.path.join(cmdl_args.root_dir,
'{}.{}'.format(optimiser.db_name,
optimiser.db_type))
for s in setups:
pr.plot_best_sim_exp(s, pickle_file)
print("")
print("")
##########################################
# Plot Best Parameter Value, by Generation
if cmdl_args.plot_best_para_gen:
"""
Plot the parameter values, for the best parameter set, of each generation.
"""
print("")
print("* Plot best values of a generation.")
print("----------------------")
db_file_name = os.path.join(cmdl_args.root_dir,
'{}.{}'.format(optimiser.db_name,
optimiser.db_type))
# Check if a directory for the result files exists. If not create it.
results_dir = check_directory(['Analysis', 'Plots', 'Para_Gen'])
# Collect the optimisation parameter names. Change format to match column
# headers in propti_db, based on SPOTPY definition. Store headers in a list.
cols = ['like1', 'chain']
for p in ops:
cols.append("par{}".format(p.place_holder))
# Extract data to be plotted.
data = pd.read_csv(db_file_name, usecols=cols)
pm.plot_best_para_generation(cols, data, len(ops), optimiser.ngs,
results_dir)
print("")
print("Plotting task completed.")
print("")
print("")
#########################
# Plot Fitness Semi-log x
if cmdl_args.plot_fit_semilogx:
"""
Used to test functionality.
"""
print("")
print("* Fitness semi-log x.")
print("----------------------")
db_file_name = os.path.join(cmdl_args.root_dir,
'{}.{}'.format(optimiser.db_name,
optimiser.db_type))
# Check if a directory for the result files exists. If not create it.
results_dir_semilogx_fitness = check_directory(['Analysis', 'Plots', 'Log'])
# Collect the optimisation parameter names. Change format to match column
# headers in propti_db, based on SPOTPY definition. Store headers in a list.
cols = ['like1', 'chain']
data = pd.read_csv(db_file_name, usecols=cols)
# Scatter plot of fitness values.
pm.plot_semilogx_scatter('like1', data, 'Fitness value development',
'FitnessDevelopment', results_dir_semilogx_fitness,
'Root Mean Square Error (RMSE)')
print("")
print("Plot fitness semi-log x completed.")
print("")
print("")
################
# Data Extractor
if cmdl_args.extract_data:
"""
Used to extract parameter sets, based on their fitness value.
"""
print("")
print("* Extract data.")
print("----------------------")
db_file_name = os.path.join(cmdl_args.root_dir,
'{}.{}'.format(optimiser.db_name,
optimiser.db_type))
# Check if a directory for the result files exists. If not create it.
results_dir_best_para = check_directory([p1, p3, 'BestParameterGeneration'])
# Collect the optimisation parameter names. Change format to match column
# headers in propti_db, based on SPOTPY definition. Store headers in a list.
cols = ['like1', 'chain']
for p in ops:
cols.append("par{}".format(p.place_holder))
data = pd.read_csv(db_file_name, usecols=cols)
# Scatter plot of fitness values.
pm.data_extractor(cols, data, len(ops), optimiser.ngs,
'BestParaExtraction', results_dir_best_para)
print("")
print("Extraction completed and file saved.")
print("")
print("")
##################################
# Create Input from Data Extractor
if cmdl_args.extractor_sim_input:
"""
Takes the file that contains the results of the data extractor and builds
simulation input files from it. Files are stored in appropriate directory.
"""
print("")
print("* Creating input files from extracted data.")
print("----------------------")
db_file_name = os.path.join(cmdl_args.root_dir,
'{}.{}'.format(optimiser.db_name,
optimiser.db_type))
# Check if a directory for the result files exists. If not create it.
extractor_dir = check_directory([p1, p3, 'ExtractorSimInput'])
# Directory that shall contain the results from data_extractor.
results_dir_best_para = os.path.join(cmdl_args.root_dir, p1, p3,
'BestParameterGeneration')
# Check if data collection exists.
extr_file = os.path.join(results_dir_best_para, 'BestParaExtraction.csv')
if os.path.isfile(extr_file):
print("Data collection from data_extractor found.")
print("")
else:
print("No data collection from data_extractor found.\n"
"Please run the data_collector first.")
print("")
exit()
# Collect simulation setup names.
print("* Collect simulation setup names:")
print("---")
sim_setup_names = []
for ssn in range(len(setups)):
ssn_value = setups[ssn].name
sim_setup_names.append(ssn_value)
print("Setup {}: {}".format(ssn, ssn_value))
print("---")
print("")
# Collect the optimisation parameter names. Change format to match column
# headers in propti_db, based on SPOTPY definition. Store headers in a list.
cols = []
for p in ops:
cols.append("par{}".format(p.place_holder))
print(cols)
# Collect parameter names
print("* Collect parameter names and place holders:")
print("---")
para_names = []
para_simsetup_complete = []
para_name_list = []
for s_i in range(len(setups)):
# Place holder list
para_ph_list = []
# Collect model parameters, those which describe the simulation setup.
# First, find all parameters and place holders.
para_meta_simsetup = []
for s_j in range(len(setups[s_i].model_parameter.parameters)):
paras = setups[s_i].model_parameter.parameters
para_name = paras[s_j].name
para_name_list.append(para_name)
para_ph = paras[s_j].place_holder
para_ph_list.append(para_ph)
# Compare the place holders with the optimisation parameters, to
# determine if they are meta parameters.
p_i = 'par{}'.format(para_ph)
if p_i not in cols:
# Store meta parameters (place holder and value) in list.
para_meta_simsetup.append([para_ph, paras[s_j].value])
print('Name: {}'.format(para_name))
print('Place holder: {}'.format(para_ph))
print("---")
# Put meta lists into list which mirrors the simulation setups.
para_simsetup_complete.append(para_meta_simsetup)
print("")
print("* Extract data from collection.")
print("---")
print("Read data collection file, please wait...")
print("")
# Read data collection from data_extractor.
extr_data = pd.read_csv(extr_file, sep=',')
#
print("Number of data sets: {}".format(len(extr_data['repetition'])))
for i in range(len(extr_data['repetition'])):
print("* Fill templates")
print("--------------")
rep_value = int(extr_data.iloc[i]['repetition'])
new_dir_rep = 'rep_{:08d}'.format(rep_value)
check_directory([extractor_dir, new_dir_rep])
print("Line: {}".format(i))
print("Repetition value: {}".format(rep_value))
print("")
print("Parameters:")
print("---")
# Extract the parameter values of the best set. Store place holder and
# parameter values in lists.
opti_para = []
for j in range(len(cols)):
new_para_value = extr_data.at[i, cols[j]]
print("{}: {}".format(para_name_list[j], new_para_value))
opti_para.append([cols[j][3:], new_para_value])
# Append optimisation parameter place holders and values to the
# parameter lists, sorted by simulation setups.
para_simsetup_complete_work = copy.deepcopy(para_simsetup_complete)
for pssc in para_simsetup_complete_work:
for para in opti_para:
pssc.append(para)
# Load templates from each simulation setup, fill in the values and
# write the new input files in the appropriate directories.
# Counter
css = 0
for simsetup in sim_setup_names:
# Create new directories, based on simulation setup names.
check_directory([extractor_dir, new_dir_rep, simsetup])
# Load template.
template_file_path = setups[css].model_template
temp_raw = pbf.read_template(template_file_path)
# Create new input files with best parameters,
# based on simulation setups.
for bestpara in para_simsetup_complete_work[css]:
new_para_value = bestpara[1]
if type(new_para_value) == float:
temp_raw = temp_raw.replace("#" + bestpara[0] + "#",
"{:E}".format(new_para_value))
else:
temp_raw = temp_raw.replace("#" + bestpara[0] + "#",
str(new_para_value))
# Write new input file with best parameters.
bip = os.path.join(extractor_dir, new_dir_rep, simsetup,
simsetup + '_rep{}.fds'.format(
int(extr_data.iloc[i]['repetition'])))
pbf.write_input_file(temp_raw, bip)
# Advance counter.
css += 1
para_simsetup_complete_work.clear()
print("---")
print("")
print("Input files created.")
print("")
print("")
#############################
# Create Input for User Cases
if cmdl_args.create_case_input:
"""
Templates of simulation input files are filled with data from the data
extractor. However, the templates are free to chose, thus means are provided
to implement results from the IMP into different use cases.
"""
print("")
print("* Functionality testing.")
print("----------------------")
db_file_name = os.path.join(cmdl_args.root_dir,
'{}.{}'.format(optimiser.db_name,
optimiser.db_type))
# Check if a directory for the result files exists. If not create it.
case_dir = check_directory(['Analysis', 'Cases'])
# Directory that is supposed to contain the results from data_extractor.
results_dir_best_para = os.path.join(cmdl_args.root_dir, 'Analysis',
'BestParameter')
# Check if data collection exists.
extr_file = os.path.join(results_dir_best_para, 'BestParaExtraction.csv')
if os.path.isfile(extr_file):
print("Data collection from data_extractor found.")
print("")
else:
print("No data collection from data_extractor found.\n"
"Please run the data_collector first.")
print("")
exit()
# Check if template exists.
case_temp_name = 'C219_MT3_LargeDomain'
template_file_path = '{}.fds'.format(case_temp_name)
if os.path.isfile(template_file_path):
print("Template for cases found.")
print("")
else:
print("No template for cases found.\n"
"Please provide a template.")
print("")
exit()
# Read data collection from data_extractor.
extr_data = pd.read_csv(extr_file, sep=',')