Skip to content

assign-colors scripts are largely identical #286

@jameshadfield

Description

@jameshadfield

Suggest consolidation. While we're at it we should remove the code around forced_colors (originally implemented for ncov) as that's always felt confusing.

diff --git a/phylogenetic/scripts/assign-colors.py b/nextclade/scripts/assign-colors.py
index e7587f5..72f9dc6 100644
--- a/phylogenetic/scripts/assign-colors.py
+++ b/nextclade/scripts/assign-colors.py
@@ -1,21 +1,24 @@
 import argparse
-import pdb
+
 import pandas as pd
 
 # Forced colours MUST NOT appear in the ordering TSV
-forced_colors = {
-}
+forced_colors = {}
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     parser = argparse.ArgumentParser(
         description="Assign colors based on ordering",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
     )
 
-    parser.add_argument('--ordering', type=str, required=True, help="input ordering file")
-    parser.add_argument('--color-schemes', type=str, required=True, help="input color schemes file")
-    parser.add_argument('--metadata', type=str, help="if provided, restrict colors to only those found in metadata")
-    parser.add_argument('--output', type=str, required=True, help="output colors tsv")
+    parser.add_argument("--ordering", type=str, required=True, help="input ordering file")
+    parser.add_argument("--color-schemes", type=str, required=True, help="input color schemes file")
+    parser.add_argument(
+        "--metadata",
+        type=str,
+        help="if provided, restrict colors to only those found in metadata",
+    )
+    parser.add_argument("--output", type=str, required=True, help="output colors tsv")
     args = parser.parse_args()
 
     assignment = {}
@@ -34,14 +37,18 @@ if __name__ == '__main__':
     # 1. remove assignments that don't exist in metadata
     # 2. remove assignments that have 'focal' set to 'False' in metadata
     if args.metadata:
-        metadata = pd.read_csv(args.metadata, delimiter='\t')
+        metadata = pd.read_csv(args.metadata, delimiter="\t")
         for name, trait in assignment.items():
             # Items not to exclude if not (yet) present in metadata to solve bootstrapping issue
-            if name in metadata and name not in ['clade_membership', 'outbreak', 'lineage']:
+            if name in metadata and name not in [
+                "clade_membership",
+                "outbreak",
+                "lineage",
+            ]:
                 subset_present = [x for x in assignment[name] if x in metadata[name].unique()]
                 assignment[name] = subset_present
-            if name in metadata and 'focal' in metadata:
-                focal_list = metadata.loc[metadata['focal'] == True, name].unique()
+            if name in metadata and "focal" in metadata:
+                focal_list = metadata.loc[metadata["focal"] == True, name].unique()
                 subset_focal = [x for x in assignment[name] if x in focal_list]
                 assignment[name] = subset_focal
 
@@ -53,28 +60,28 @@ if __name__ == '__main__':
             array = line.lstrip().rstrip().split("\t")
             schemes[counter] = array
 
-    with open(args.output, 'w') as f:
+    with open(args.output, "w") as f:
         for trait_name, trait_array in assignment.items():
-            if len(trait_array)==0:
+            if len(trait_array) == 0:
                 print(f"No traits found for {trait_name}")
                 continue
-            if len(schemes)<len(trait_array):
-              print(f"WARNING: insufficient colours available for trait {trait_name} - reusing colours!")
-              remain = len(trait_array)
-              color_array = []
-              while(remain>0):
-                if (remain>len(schemes)):
-                  color_array = [*color_array, *schemes[len(schemes)]]
-                  remain -= len(schemes)
-                else:
-                  color_array = [*color_array, *schemes[remain]]
-                  remain = 0
+            if len(schemes) < len(trait_array):
+                print(f"WARNING: insufficient colours available for trait {trait_name} - reusing colours!")
+                remain = len(trait_array)
+                color_array = []
+                while remain > 0:
+                    if remain > len(schemes):
+                        color_array = [*color_array, *schemes[len(schemes)]]
+                        remain -= len(schemes)
+                    else:
+                        color_array = [*color_array, *schemes[remain]]
+                        remain = 0
             else:
-              color_array = schemes[len(trait_array)]
+                color_array = schemes[len(trait_array)]
             extra_trait_values = list(forced_colors.get(trait_name, {}).keys())
             extra_color_values = list(forced_colors.get(trait_name, {}).values())
 
-            zipped = list(zip(trait_array+extra_trait_values, color_array+extra_color_values))
+            zipped = list(zip(trait_array + extra_trait_values, color_array + extra_color_values))
             for trait_value, color in zipped:
                 f.write(trait_name + "\t" + trait_value + "\t" + color + "\n")
             f.write("\n")

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions