|
1 | 1 | import argparse |
2 | 2 | import json |
| 3 | +import re |
| 4 | +from numpy import linspace |
| 5 | +from math import floor |
| 6 | + |
| 7 | +def adjust_coloring_for_epiweeks(dataset): |
| 8 | + """ |
| 9 | + If an auspice JSON specifies a colouring with the key "epiweek" (case sensitive) then we create a categorical |
| 10 | + colorscale which evenly spaces the canonical nextstrain rainbow across the observed time window. |
| 11 | +
|
| 12 | + NOTE: epiweek must be in CDC format ("YYYYMM") but this may be relaxed to include ISO format in the future. |
| 13 | + """ |
| 14 | + EPIKEY="epiweek" |
| 15 | + try: |
| 16 | + (cidx, coloring) = [(i, c) for i, c in enumerate(dataset['meta'].get("colorings", [])) if c['key']==EPIKEY][0] |
| 17 | + except IndexError: # coloring doesn't define an epiweek |
| 18 | + return |
| 19 | + |
| 20 | + # remove any duplicate coloring entries in the JSON to ensure the entry we edit is the one used by Auspice |
| 21 | + # (NOTE: this is augur bug https://github.com/nextstrain/augur/issues/719) |
| 22 | + dataset['meta']['colorings'] = [c for i,c in enumerate(dataset['meta']['colorings']) if not (c['key']==EPIKEY and i!=cidx)] |
| 23 | + |
| 24 | + # delay import to support older setups not using epiweeks package |
| 25 | + from epiweeks import Year, Week |
| 26 | + |
| 27 | + observed_values = set() |
| 28 | + def recurse(node): |
| 29 | + value = node.get("node_attrs", {}).get(EPIKEY, {}).get("value", False) |
| 30 | + if value: |
| 31 | + # we validate using both the epiweeks package and a regex (epiweeks will perform coercion of non-valid data into valid data) |
| 32 | + if not re.match(r'^(\d{4})(\d{2})$', value): |
| 33 | + raise(ValueError(f"Epiweek value {value} was not in format YYYYMM.")) |
| 34 | + week = Week.fromstring(value, system="cdc") # raises ValueError if not valid |
| 35 | + observed_values.add(week) |
| 36 | + for child in node.get("children", []): |
| 37 | + recurse(child) |
| 38 | + try: |
| 39 | + recurse(dataset["tree"]) |
| 40 | + except ValueError as e: |
| 41 | + print(str(e)) |
| 42 | + print("Skipping color scale creation for epiweek.") |
| 43 | + return |
| 44 | + observed_values = sorted(list(observed_values)) |
| 45 | + |
| 46 | + ## generate epiweeks across the entire observed range for color generation |
| 47 | + epiweeks = [ observed_values[0] ] |
| 48 | + while epiweeks[-1] < observed_values[-1]: |
| 49 | + epiweeks.append(epiweeks[-1]+1) |
| 50 | + ## generate rainbow colour scale across epiweeks. |
| 51 | + ## Since a "default" augur install does not include matplotlib, rather than interpolating between values in the scale |
| 52 | + ## we reuse them. This only applies when n(epiweeks)>30, where distinguising between colors is problematic anyway. |
| 53 | + rainbow = ["#511EA8", "#482BB6", "#4039C3", "#3F4ACA", "#3E5CD0", "#416CCE", "#447CCD", "#4989C4", "#4E96BC", "#559FB0", "#5DA8A4", "#66AE96", "#6FB388", "#7AB77C", "#85BA6F", "#91BC64", "#9DBE5A", "#AABD53", "#B6BD4B", "#C2BA46", "#CDB642", "#D6B03F", "#DDA83C", "#E29D39", "#E69036", "#E67F33", "#E56D30", "#E2592C", "#DF4428", "#DC2F24"] |
| 54 | + color_indicies = [floor(x) for x in linspace(0, len(rainbow), endpoint=False, num=len(epiweeks))] |
| 55 | + coloring['scale'] = [ |
| 56 | + [epiweek.cdcformat(), rainbow[color_indicies[i]]] |
| 57 | + for i,epiweek in enumerate(epiweeks) |
| 58 | + if epiweek in observed_values |
| 59 | + ] |
| 60 | + ## auspice will order the legend according to the provided color scale, so there is no need to set |
| 61 | + ## `coloring['legend']` unless we want to restrict this for some reason. |
| 62 | + coloring['type'] = 'categorical' # force the scale type to be categorical |
3 | 63 |
|
4 | 64 | if __name__ == '__main__': |
5 | 65 | parser = argparse.ArgumentParser( |
|
23 | 83 |
|
24 | 84 | input_json["meta"]["colorings"] = fixed_colorings |
25 | 85 |
|
| 86 | + adjust_coloring_for_epiweeks(input_json) |
| 87 | + |
26 | 88 | with open(args.output, 'w') as f: |
27 | 89 | json.dump(input_json, f, indent=2) |
0 commit comments