-
Notifications
You must be signed in to change notification settings - Fork 23
Open
Description
Suggest consolidation. While we're at it we should remove the code around forced_colors (originally implemented for ncov) as that's always felt confusing.
diff --git a/phylogenetic/scripts/assign-colors.py b/nextclade/scripts/assign-colors.py
index e7587f5..72f9dc6 100644
--- a/phylogenetic/scripts/assign-colors.py
+++ b/nextclade/scripts/assign-colors.py
@@ -1,21 +1,24 @@
import argparse
-import pdb
+
import pandas as pd
# Forced colours MUST NOT appear in the ordering TSV
-forced_colors = {
-}
+forced_colors = {}
-if __name__ == '__main__':
+if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Assign colors based on ordering",
- formatter_class=argparse.ArgumentDefaultsHelpFormatter
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
- parser.add_argument('--ordering', type=str, required=True, help="input ordering file")
- parser.add_argument('--color-schemes', type=str, required=True, help="input color schemes file")
- parser.add_argument('--metadata', type=str, help="if provided, restrict colors to only those found in metadata")
- parser.add_argument('--output', type=str, required=True, help="output colors tsv")
+ parser.add_argument("--ordering", type=str, required=True, help="input ordering file")
+ parser.add_argument("--color-schemes", type=str, required=True, help="input color schemes file")
+ parser.add_argument(
+ "--metadata",
+ type=str,
+ help="if provided, restrict colors to only those found in metadata",
+ )
+ parser.add_argument("--output", type=str, required=True, help="output colors tsv")
args = parser.parse_args()
assignment = {}
@@ -34,14 +37,18 @@ if __name__ == '__main__':
# 1. remove assignments that don't exist in metadata
# 2. remove assignments that have 'focal' set to 'False' in metadata
if args.metadata:
- metadata = pd.read_csv(args.metadata, delimiter='\t')
+ metadata = pd.read_csv(args.metadata, delimiter="\t")
for name, trait in assignment.items():
# Items not to exclude if not (yet) present in metadata to solve bootstrapping issue
- if name in metadata and name not in ['clade_membership', 'outbreak', 'lineage']:
+ if name in metadata and name not in [
+ "clade_membership",
+ "outbreak",
+ "lineage",
+ ]:
subset_present = [x for x in assignment[name] if x in metadata[name].unique()]
assignment[name] = subset_present
- if name in metadata and 'focal' in metadata:
- focal_list = metadata.loc[metadata['focal'] == True, name].unique()
+ if name in metadata and "focal" in metadata:
+ focal_list = metadata.loc[metadata["focal"] == True, name].unique()
subset_focal = [x for x in assignment[name] if x in focal_list]
assignment[name] = subset_focal
@@ -53,28 +60,28 @@ if __name__ == '__main__':
array = line.lstrip().rstrip().split("\t")
schemes[counter] = array
- with open(args.output, 'w') as f:
+ with open(args.output, "w") as f:
for trait_name, trait_array in assignment.items():
- if len(trait_array)==0:
+ if len(trait_array) == 0:
print(f"No traits found for {trait_name}")
continue
- if len(schemes)<len(trait_array):
- print(f"WARNING: insufficient colours available for trait {trait_name} - reusing colours!")
- remain = len(trait_array)
- color_array = []
- while(remain>0):
- if (remain>len(schemes)):
- color_array = [*color_array, *schemes[len(schemes)]]
- remain -= len(schemes)
- else:
- color_array = [*color_array, *schemes[remain]]
- remain = 0
+ if len(schemes) < len(trait_array):
+ print(f"WARNING: insufficient colours available for trait {trait_name} - reusing colours!")
+ remain = len(trait_array)
+ color_array = []
+ while remain > 0:
+ if remain > len(schemes):
+ color_array = [*color_array, *schemes[len(schemes)]]
+ remain -= len(schemes)
+ else:
+ color_array = [*color_array, *schemes[remain]]
+ remain = 0
else:
- color_array = schemes[len(trait_array)]
+ color_array = schemes[len(trait_array)]
extra_trait_values = list(forced_colors.get(trait_name, {}).keys())
extra_color_values = list(forced_colors.get(trait_name, {}).values())
- zipped = list(zip(trait_array+extra_trait_values, color_array+extra_color_values))
+ zipped = list(zip(trait_array + extra_trait_values, color_array + extra_color_values))
for trait_value, color in zipped:
f.write(trait_name + "\t" + trait_value + "\t" + color + "\n")
f.write("\n")victorlin
Metadata
Metadata
Assignees
Labels
No labels