diff --git a/CHANGES.md b/CHANGES.md index f9a6018f2..6aa64d1a3 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -6,6 +6,7 @@ * Updated default latitudes and longitudes for geography traits that includes location name changes. See the pull request for more details. [#1744][] (@joverlee521) * curate apply-geolocation-rules: Augur's standard geolocation rules are used by default and rules provided via `--geolocation-rules` are considered custom rules that have precedence over the default rules. The `--no-default-rules` flag can be used to ignore the default rules. See the pull request for more details. [#1745][] (@joverlee521) +* export v1 has been removed (deprecated in v22.2.0, July 2023). `augur export v2` was introduced in Augur version 6.0.0. Migrate by following the [official guide](https://docs.nextstrain.org/projects/augur/page/releases/migrating-v5-v6.html). ### Features diff --git a/DEPRECATED.md b/DEPRECATED.md index 667bded3a..86d39c70c 100644 --- a/DEPRECATED.md +++ b/DEPRECATED.md @@ -26,8 +26,7 @@ We recognize the existing usage of this function, so it has been moved to ## `augur export v1` -*Deprecated in version 22.2.0 (July 2023). Planned for [removal](https://github.com/nextstrain/augur/issues/1266) -January 2024 or after.* +*Deprecated in version 22.2.0 (July 2023). Removed in version TKTK (TKTK 2025)* `augur export v2` was introduced in Augur version 6.0.0. Migrate by following the [official guide](https://docs.nextstrain.org/projects/augur/page/releases/migrating-v5-v6.html). diff --git a/augur/data/schema-auspice-config-v2.json b/augur/data/schema-auspice-config-v2.json index fd3707cd2..0f350a4e0 100644 --- a/augur/data/schema-auspice-config-v2.json +++ b/augur/data/schema-auspice-config-v2.json @@ -3,7 +3,7 @@ "$id": "https://nextstrain.org/schemas/auspice/config/v2", "type": "object", "title": "Auspice config file to be supplied to `augur export v2`", - "$comment": "This schema includes deprecated-but-handled-by-augur-export-v1 properties, but their schema definitions are somewhat incomplete", + "$comment": "This schema includes properties which were used in `augur export v1` as these are still handled by `augur export v2`. The should all be considered deprecated.", "additionalProperties": false, "required": [], "properties" : { diff --git a/augur/data/schema-export-root-sequence.json b/augur/data/schema-export-root-sequence.json index 3bf82cbb0..c5471bde2 100644 --- a/augur/data/schema-export-root-sequence.json +++ b/augur/data/schema-export-root-sequence.json @@ -2,32 +2,20 @@ "$schema": "http://json-schema.org/draft-06/schema#", "$id": "https://nextstrain.org/schemas/dataset/root-sequence", "title": "Nextstrain root-sequence sidecar for datasets", - "description": "Typically produced by Augur and consumed by Auspice. Applicable to the `--root-sequence` output of `augur export v2` as well as the `--output-sequence` option of `augur export v1`.", - "oneOf": [ - { - "$comment": "This is sort of weird, but `augur export v1` can explicitly produce an empty object.", - "description": "An empty object", - "type": "object", - "properties": {}, - "additionalProperties": false - }, - { - "description": "An object containing at least a \"nuc\" key and optionally additional keys for genome annotations (e.g. genes)", - "type": "object", - "required": ["nuc"], - "properties": { - "nuc": { - "description": "Nucleotide sequence of whole genome (from the output of `augur ancestral`)", - "type": "string" - } - }, - "patternProperties": { - "^[a-zA-Z0-9*_-]+$": { - "$comment": "This pattern is the same pattern used in the corresponding parts of schema-export-v2.json.", - "description": "Amino acid sequence of genome annotation (e.g. gene) identified by this key (from the output of `augur translate`)", - "type": "string" - } - } + "description": "Typically produced by Augur and consumed by Auspice. Applicable to the `--root-sequence` output of `augur export v2`. An object containing at least a \"nuc\" key and optionally additional keys for genome annotations (CDSs / genes)", + "type": "object", + "required": ["nuc"], + "properties": { + "nuc": { + "description": "Nucleotide sequence of whole genome (from the output of `augur ancestral`)", + "type": "string" } - ] + }, + "patternProperties": { + "^[a-zA-Z0-9*_-]+$": { + "$comment": "This pattern is the same pattern used in the corresponding parts of schema-export-v2.json.", + "description": "Amino acid sequence of genome annotation (e.g. gene) identified by this key (from the output of `augur translate`)", + "type": "string" + } + } } diff --git a/augur/data/schema-export-v1-meta.json b/augur/data/schema-export-v1-meta.json deleted file mode 100644 index 33c2f2a22..000000000 --- a/augur/data/schema-export-v1-meta.json +++ /dev/null @@ -1,251 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-06/schema#", - "$id": "https://nextstrain.org/schemas/dataset/v1/meta", - "type": "object", - "title": "Nextstrain minimal metadata JSON schema", - "description": "This is the validation schema for the augur produced metadata JSON, for consumption in Auspice. Note that every field is optional, but excluding fields may disable certain features in Auspice.", - "additionalProperties": true, - "properties" : { - "title" : { - "description": "Auspice displays this at the top of the page", - "type" : "string" - }, - "updated" : { - "description": "Auspice displays this in the footer", - "type" : "string" - }, - "virus_count" : { - "description": "Auspice uses this in the header info text and the status page. Should be renamed or deprecated for version 1.0", - "type" : "number" - }, - "author_info": { - "description": "Used to display information about terminal nodes & for filtering by author (if \"authors\" is in \"filters\", see below)", - "$comment": "Each key is typically an author name, e.g. \"Black et al\", and must exist in the attrs object of at least one terminal node of the tree", - "type": "object", - "additionalProperties": false, - "patternProperties": { - "^.+$": { - "type": "object", - "additionalProperties": false, - "$comment": "in the future these properties may be required", - "properties": { - "n": { - "description": "Number of sequences / samples associated with this publication", - "type": "number" - }, - "title": { - "description": "Publication title", - "type": "string" - }, - "journal": { - "description": "Journal title", - "type": "string" - }, - "paper_url": { - "description": "URL link to paper or NCBI genome. Use \"?\" if unknown", - "type": "string", - "oneOf": [ - {"pattern": "^https?://.+$"}, - {"enum": ["?"]} - ] - } - } - } - } - }, - "annotations": { - "description": "Genome annotations (e.g. genes), relative to the reference genome", - "$comment": "Required for the entropy panel", - "type": "object", - "required": ["nuc"], - "additionalProperties": false, - "properties": { - "nuc": { - "type": "object", - "properties": { - "start": { - "description": "Gene start position (zero-based, i.e. BED format)", - "type": "number" - }, - "end": { - "description": "Gene end position (zero-based half open, i.e. BED format)", - "type": "number" - }, - "strand": { - "description": "Positive or negative strand", - "type": "number", - "enum": [-1, 1] - } - } - } - }, - "patternProperties": { - "^[a-zA-Z0-9*_-]+$": {"$ref": "#/properties/annotations/properties/nuc"} - } - }, - "maintainer": { - "description": "Auspice displays the maintainer in the footer", - "type": "array", - "items": [ - { - "type": "string", - "description": "Text displayed in the browser" - }, - { - "type": "string", - "description": "href for the text. E.g. mailto:, twitter link, lab website" - } - ] - }, - "filters": { - "description": "These appear as filters in the footer of Auspice (which populates the displayed values based upon the tree)", - "$comment": "These values must be present as keys on the attr of nodes on the tree", - "type": "array", - "uniqueItems": true, - "items": {"type": "string"} - }, - "panels": { - "description": "Which panels should Auspice display?", - "type": "array", - "items": { - "type": "string", - "enum": ["tree", "map", "frequencies", "entropy"] - }, - "uniqueItems": true, - "minItems": 1 - }, - "geo": { - "description": "The available options for the geographic resolution dropdown", - "type": "object", - "patternProperties": { - "^[a-z]*$": { - "description": "The names of the geographic resolutions", - "$comment": "E.g. country or region. These need to be present as keys in the attrs of the tree nodes", - "type": "object", - "patternProperties": { - "^[a-z_]*$": { - "description": "The values (of the demes) for this geographic resolutions", - "$comment": "E.g. Brazil, China etc. The tree nodes must contain these as values of the geographic resolution key", - "type": "object", - "additionalProperties": false, - "properties": { - "latitude": { - "type": "number", - "minimum": -180, - "maximum": 180 - }, - "longitude": { - "type": "number", - "minimum": -180, - "maximum": 180 - } - } - } - } - } - } - }, - "color_options": { - "description": "Available colorBys for Auspice", - "$comment": "These properties need to be present as keys in the attrs of the tree nodes", - "type": "object", - "additionalProperties": false, - "patternProperties": { - "^[A-Za-z_]*$": { - "description": "property name is the key used on the tree node attrs", - "type": "object", - "properties": { - "key": { - "description": "???", - "type": "string" - }, - "menuItem": { - "description": "Text to be displayed in the \"color by\" dropdown menu", - "type": "string" - }, - "legendTitle": { - "description": "Text to be displayed in the \"color by\" tree menu", - "type": "string" - }, - "type": { - "description": "Dictates how the color scale should be made", - "type": "string", - "enum": ["continuous", "discrete"] - }, - "color_map": { - "description": "Links values to colors. Enforces a discrete scale, no matter what the \"type\" property is.", - "$comment": "A greyscale ramp will be used for missing values (i.e. defined on the tree but not here)", - "type": "array", - "uniqueItems": true, - "items": { - "type": "array", - "additionalItems": false, - "items": [ - { - "description": "value (tree nodes -> attrs -> propertyName)", - "type": "string", - "pattern": "^[ 0-9A-Za-z_.-]+$" - }, - { - "description": "color hex value", - "type": "string", - "pattern": "^#[0-9A-Fa-f]{6}$" - } - ] - } - } - } - } - } - }, - "defaults": { - "description": "Set the defaults for certain display options in Auspice. All are optional.", - "type": "object", - "additionalProperties": false, - "properties": { - "geoResolution": { - "description": "Default geographic resolution", - "$comment": "The value here must be present in the geo object (see above)", - "type": "string" - }, - "colorBy": { - "description": "Default color by", - "$comment": "The value here must be present in the color_options object (see above)", - "type": "string" - }, - "distanceMeasure": { - "description": "Default tree metric", - "type": "string", - "enum": ["div", "num_date"] - }, - "mapTriplicate": { - "description": "Should the map be extended / wrapped around. Useful if transmissions are worldwide.", - "type": "boolean" - } - } - }, - "commit": { - "description": "Augur commit used to prepare these JSONs", - "$comment": "DEPRECATED. Will be replaced with version in 0.1", - "type": "string" - }, - "controls": { - "$comment": "DEPRECATED" - }, - "seq_author_map": { - "$comment": "DEPRECATED" - }, - "vaccine_choices": { - "description": "Vaccine strains displayed in Auspice", - "type": "object", - "patternProperties": { - "^.+$": { - "description": "Strain names", - "$comment": "must exist as a strain name on the tree. Currently the value is unused.", - "type": "string", - "pattern": "^[0-9X]{4}-[0-9X]{2}-[0-9X]{2}$" - } - } - } - } -} diff --git a/augur/data/schema-export-v1-tree.json b/augur/data/schema-export-v1-tree.json deleted file mode 100644 index 9c70251df..000000000 --- a/augur/data/schema-export-v1-tree.json +++ /dev/null @@ -1,167 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-06/schema#", - "$id": "https://nextstrain.org/schemas/dataset/v1/tree", - "type": "object", - "title": "Nextstrain tree JSON schema", - "additionalProperties": false, - "required": ["attr", "strain"], - "properties": { - "attr": { - "description": "Attributes / decorations on the nodes", - "type": "object", - "additionalProperties": true, - "required": ["div", "num_date"], - "properties": { - "div": { - "description": "Node (phylogenetic) divergence", - "$comment": "Cumulative (root = 0)", - "type": "number" - }, - "num_date": { - "description": "Node date", - "$comment": "Year in decimal format", - "type": "number" - }, - "num_date_confidence": { - "description": "Confidence of the node date", - "type": "array", - "items": [ - {"type": "number"}, - {"type": "number"} - ] - }, - "date": { - "description": "Node date", - "$comment": "Is this used? Should only use this or num_date", - "type": "string", - "pattern": "^[0-9X]{4}-[0-9X]{2}-[0-9X]{2}$" - }, - "clock_length": { - "$comment": "DEPRECATED. Not used in auspice.", - "type": "number" - }, - "mutation_length": { - "$comment": "DEPRECATED. Not used in auspice.", - "type": "number" - }, - "branch_length": { - "$comment": "DEPRECATED. Not used in auspice.", - "type": "number" - }, - "url": { - "description": "URL of the sequence (usually https://www.ncbi.nlm.nih.gov/nuccore/...)", - "type": ["null", "string"] - }, - "accession": { - "description": "Genome accession", - "oneOf": [ - { - "type": "string", - "pattern": "^[0-9A-Za-z-]+$" - }, - { - "type": "null" - } - ] - }, - "authors": { - "description": "Author lookup key for the relevant publication / credit", - "$comment": "Should have a corresponding entry in the meta JSON author_info property", - "type": ["null", "string"] - } - }, - "patternProperties": { - "^.*_confidence$": { - "description": "confidence values for trait", - "$comment": "The trait must exist. This cannot be checked for in the JSON schema AFAIK", - "oneOf": [ - { - "description": "", - "type": "object", - "patternProperties": { - "^.+$": { - "type": "number" - } - } - }, - { - "description": "", - "type": "array", - "items": [ - {"type": "number"}, - {"type": "number"} - ] - } - ] - }, - "^.*_entropy$": { - "description": "", - "type": "number" - } - } - }, - "branch_length": { - "$comment": "DEPRECATED. Not used in auspice.", - "type": "number" - }, - "strain": { - "description": "Strain name. Must be unique", - "type": "string" - }, - "clade": { - "description": "Will be removed in schema 2.0", - "type": ["number"] - }, - "tvalue": { - "$comment": "DEPRECATED. Not used in auspice.", - "type": ["null", "number"] - }, - "yvalue": { - "$comment": "DEPRECATED. Not used in auspice.", - "type": ["null", "number"] - }, - "xvalue": { - "$comment": "DEPRECATED. Not used in auspice.", - "type": ["null", "number"] - }, - "serum": { - "description": "to do", - "$comment": "Used in dengue & flu trees", - "type": "boolean" - }, - "muts": { - "description": "Nucleotide mutations occuring between the parent and this node", - "$comment": "same numbering scheme as used by the meta.JSON -> annotations", - "type": "array", - "items": { - "oneOf": [ - {"type": "string", "pattern": "^[ATCGNYRWSKMDVHB-][0-9]+[ATCGNYRWSKMDVHB-]$"}, - {"type": "string", "pattern": "^insertion [0-9]+-[0-9]+$", "$comment": "unused by auspice"}, - {"type": "string", "pattern": "^deletion [0-9]+-[0-9]+$", "$comment": "unused by auspice"} - ] - } - }, - "aa_muts": { - "description": "Amino-acid mutations (for each gene) occuring between the parent and this node", - "$comment": "properties (keys) must exist in the meta.JSON -> annotation object, and cannot be \"nuc\"", - "type": "object", - "additionalProperties": false, - "patternProperties": { - "^[a-zA-Z0-9*_-]+$": { - "description": "Mutations for this gene (or annotated region)", - "type": "array", - "items": { - "pattern": "^[A-Z*][0-9]+[A-Z*]$" - } - } - } - }, - "children": { - "description": "Child nodes. Recursive structure. Terminal nodes do not have this property.", - "$comment": "Polytomies (more than 2 items) allowed.", - "type": "array", - "minItems": 2, - "items": {"$ref": "#"} - } - } -} diff --git a/augur/export.py b/augur/export.py index 38c025b80..a8e547ac4 100644 --- a/augur/export.py +++ b/augur/export.py @@ -2,11 +2,10 @@ Export JSON files suitable for visualization with auspice. """ from .argparse_ import add_command_subparsers -from . import export_v1, export_v2 +from . import export_v2 SUBCOMMANDS = [ export_v2, - export_v1, ] diff --git a/augur/export_v1.py b/augur/export_v1.py deleted file mode 100644 index ed396f988..000000000 --- a/augur/export_v1.py +++ /dev/null @@ -1,423 +0,0 @@ -""" -Export version 1 JSON schema (separate meta and tree JSONs) for visualization with Auspice -""" - -import sys -from textwrap import dedent -import time -import numpy as np -from Bio import Phylo -from argparse import SUPPRESS -from collections import defaultdict -from .argparse_ import ExtendOverwriteDefault -from .errors import AugurError -from .io.metadata import DEFAULT_DELIMITERS, InvalidDelimiter, read_metadata -from .io.sequences import read_sequences, read_single_sequence -from .utils import read_node_data, write_json, read_config, read_lat_longs, read_colors - -def convert_tree_to_json_structure(node, metadata, div=0, strains=None): - """ - converts the Biopython tree structure to a dictionary that can - be written to file as a json. This is called recursively. - Creates the strain property & divergence on each node - - input - node -- node for which top level dict is produced. - div -- cumulative divergence (root = 0) - - returns - tree in JSON structure - list of strains - """ - node_struct = { - 'attr': {"div": div}, - 'strain': node.name, - 'clade': node.clade - } - for attr in ['branch_length', 'tvalue', 'yvalue', 'xvalue']: - try: - node_struct[attr] = node.__getattribute__(attr) - except AttributeError: - pass - - - if strains is None: - strains = [node_struct["strain"]] - else: - strains.append(node_struct["strain"]) - - if node.clades: - node_struct["children"] = [] - for child in node.clades: - if 'mutation_length' in metadata[child.name]: - cdiv = div + metadata[child.name]['mutation_length'] - elif 'branch_length' in metadata[child.name]: - cdiv = div + metadata[child.name]['branch_length'] - else: - print("ERROR: Cannot find branch length information for %s"%(child.name)) - node_struct["children"].append(convert_tree_to_json_structure(child, metadata, div=cdiv, strains=strains)[0]) - - return (node_struct, strains) - - - -def recursively_decorate_tree_json_v1_schema(node, node_metadata, decorations): - """ - This function is deprecated and is used to produce the v1-compatable JSON format - - For given decorations, add information from node_metadata to - each node in the tree. - * decorations must have property "key" which is the key used to insert - into the node and the default key used to access node_metadata - * if decorations has property "lookup_key", this is used to access - node meta_data instead - * if decorations has property "is_attr" (and it's value is True) - then the result is inserted into node["attr"] - - returns Null - """ - try: - metadata = node_metadata[node["strain"]] - metadata["strain"] = node["strain"] - except KeyError: - raise Exception("ERROR: node %s is not found in the node metadata."%node.name) - - for data in decorations: - val = None - insert_key = data["key"] - try: - if "lookup_key" in data: - val = metadata[data["lookup_key"]] - else: - val = metadata[insert_key] - except KeyError: - pass - - if val is not None: - if "is_attr" in data and data["is_attr"]: - node["attr"][insert_key] = val - else: - if insert_key == 'aa_muts': - val = {k:v for k,v in val.items() if len(v) } - node[insert_key] = val - - if "children" in node: - for child in node["children"]: - recursively_decorate_tree_json_v1_schema(child, node_metadata, decorations) - - -def tree_layout(T): - """ - calculate tree layout. - This function is deprecated, and only used for the v1 JSON format - """ - yval=T.count_terminals() - clade = 0; - for n in T.find_clades(order='postorder'): - n.clade=clade; clade+=1; - if n.is_terminal(): - n.yvalue=yval - yval-=1 - else: - child_yvalues = [c.yvalue for c in n] - n.yvalue=0.5*(np.min(child_yvalues)+np.max(child_yvalues)) - - -def process_colorings(jsn, color_mapping, nodes=None, node_metadata=None): - if "color_options" not in jsn: - print("WARNING: no color options were defined") - return - data = jsn["color_options"] - - for trait, options in data.items(): - if "legendTitle" not in options: options["legendTitle"] = trait - if "menuItem" not in options: options["menuItem"] = trait - if "key" not in options: options["key"] = trait - - if nodes: - values_in_tree = {node[trait] for node in nodes.values() if trait in node} - else: - values_in_tree = {data["traits"][trait]["value"] for name, data in node_metadata.items() if trait in data['traits']} - - if trait.lower() in color_mapping: - # remember that the color maps (from the TSV) are in lower case, but this is not how they should be exported - case_map = {str(val).lower(): val for val in values_in_tree} - options["color_map"] = [(case_map[m[0]], m[1]) for m in color_mapping[trait.lower()] if m[0] in case_map] - - return data - - -def process_geographic_info(jsn, lat_long_mapping, node_metadata=None, nodes=None): - if "geo" not in jsn: - return {} - geo = defaultdict(dict) - - traits = jsn["geo"] - - for trait in traits: - demes_in_tree = {node[trait] for node in nodes.values() if trait in node} - - for deme in demes_in_tree: - try: - geo[trait][deme] = lat_long_mapping[(trait.lower(),deme.lower())] - except KeyError: - print("Error. {}->{} did not have an associated lat/long value (matching performed in lower case)".format(trait, deme)) - return geo - - -def process_annotations(node_data): - # `augur translate` adds "annotations" to node_data - if "annotations" not in node_data: - return None - # starting with augur v6 the node data JSONs use GFF like syntax, i.e. - # [one-origin, inclusive], strand: "+" / "-" - # however v1 JSONs used [zero-origin, half-open), strand: "1" / "-1" - annotations = {} - for name, info in node_data["annotations"].items(): - annotations[name] = { - "start": info["start"]-1, - "end": info["end"], - "strand": -1 if info["strand"] == "-" else 1 - } - return annotations - -def process_panels(user_panels, meta_json): - try: - panels = meta_json["panels"] - except KeyError: - panels = ["tree", "map", "entropy"] - - if user_panels is not None and len(user_panels) != 0: - panels = user_panels - - if "geo" in meta_json: - geoTraits = meta_json["geo"].keys() - else: - geoTraits = [] - - if "annotations" in meta_json: - annotations = meta_json["annotations"].keys() - else: - annotations = [] - - if "entropy" in panels and len(annotations) == 0: - panels.remove("entropy") - if "map" in panels and len(geoTraits) == 0: - panels.remove("map") - - return panels - -def ensure_config_is_v1(config): - """ - Check the provided config file is intended for `augur export v1` (not v2, v3 etc) - Side effects: may print a warning & exit - """ - if config.get("maintainers") or config.get("geo_resolutions") or config.get("display_defaults") or config.get("colorings"): - print("ERROR. It appears that your provided config file is using a newer schema than required for `augur export v1`.") - # TODO: print documentation URL when we have one available - sys.exit(2) - -def construct_author_info_v1(metadata, tree, nodes): - """ - author info maps the "authors" property present on tree nodes - to further information about the paper etc - """ - - authorsInTree = set() - for node in tree.find_clades(order='postorder'): - if node.is_terminal and node.name in nodes and "authors" in nodes[node.name]: - authorsInTree.add(nodes[node.name]["authors"]) - - author_info = defaultdict(lambda: {"n": 0}) - no_authors = 0 - for strain, data in metadata.iterrows(): - if "authors" not in data: - no_authors += 1 - continue - if data["authors"] not in authorsInTree: - continue - authors = data["authors"] - author_info[authors]["n"] += 1 - # add in extra attributes if they're present in the meta TSV (for this strain...) - for attr in ["title", "journal", "paper_url"]: - if attr in data: - if attr in author_info[authors] and data[attr].strip() != author_info[authors][attr].strip(): - print("Error - {} had contradictory {}(s): {} vs {}".format(authors, attr, data[attr], author_info[authors][attr])) - author_info[authors][attr] = data[attr].strip() - if no_authors: - print("Warning - {} samples did not have author information.".format(no_authors)) - - return author_info - - -def add_tsv_metadata_to_nodes(nodes, meta_tsv, meta_json, extra_fields=['authors', 'url', 'accession']): - """ - Only used for v1 schema compatability - - Add the relevent fields from meta_tsv to the nodes - (both are dictionaries keyed off of strain names) - * the relevent fields are found by scanning the meta json - together with the extra_fields param - """ - if "color_options" in meta_json and isinstance(meta_json["color_options"], dict): - fields = [x for x in meta_json["color_options"].keys() if x != "gt"] + extra_fields - else: - fields = list(extra_fields) - - if "geo" in meta_json: - fields += meta_json["geo"] - - for strain, node in nodes.items(): - if strain not in meta_tsv.index: - continue - for field in fields: - # Allow fields to have value of 0! - but prevent from having value of "" (breaks auspice v1) - if field not in node and field in meta_tsv.columns and (meta_tsv.at[strain, field] or meta_tsv.at[strain, field]==0): - node[field] = meta_tsv.at[strain, field] - - -def get_root_sequence(root_node, ref=None, translations=None): - ''' - create a json structure that contains the sequence of the root, both as - nucleotide and as translations. This allows look-up of the sequence for - all states, including those that are not variable. - - Parameters - ---------- - root_node : dict - data associated with the node - ref : str, optional - filename of the root sequence - translations : str, optional - file name of translations - - Returns - ------- - dict - dict of nucleotide sequence and translations - ''' - root_sequence = {} - if ref and translations: - refseq = read_single_sequence(ref, format='fasta') - root_sequence['nuc']=str(refseq.seq) - for gene in read_sequences(translations, format='fasta'): - root_sequence[gene.id] = str(gene.seq) - else: - root_sequence["nuc"] = root_node["sequence"] - root_sequence.update(root_node["aa_sequences"]) - - return root_sequence - - -def add_core_args(parser): - core = parser.add_argument_group("REQUIRED") - core.add_argument('--tree','-t', required=True, help="tree to perform trait reconstruction on") - core.add_argument('--metadata', required=True, metavar="FILE", help="sequence metadata") - core.add_argument('--metadata-delimiters', default=DEFAULT_DELIMITERS, nargs="+", action=ExtendOverwriteDefault, - help="delimiters to accept when reading a metadata file. Only one delimiter will be inferred.") - core.add_argument('--node-data', required=True, nargs='+', action=ExtendOverwriteDefault, help="JSON files with meta data for each node") - core.add_argument('--output-tree', help="JSON file name that is passed on to auspice (e.g., zika_tree.json).") - core.add_argument('--output-meta', help="JSON file name that is passed on to auspice (e.g., zika_meta.json).") - core.add_argument('--auspice-config', help="file with auspice configuration") - return core - - -def add_option_args(parser): - options = parser.add_argument_group("OPTIONS") - options.add_argument('--colors', metavar="FILE", help="Custom color definitions, one per line in the format `TRAIT_TYPE\\tTRAIT_VALUE\\tHEX_CODE`") - options.add_argument('--lat-longs', help="file latitudes and longitudes, overrides built in mappings") - options.add_argument('--tree-name', default=False, help="Tree name (needed for tangle tree functionality)") - options.add_argument('--minify-json', action="store_true", help="export JSONs without indentation or line returns") - options.add_argument('--output-sequence', help="JSON file name that is passed on to auspice (e.g., zika_seq.json).") - options.add_argument('--reference', required=False, help="reference sequence for export to browser, only vcf") - options.add_argument('--reference-translations', required=False, help="reference translations for export to browser, only vcf") - return options - - -def register_parser(parent_subparsers): - parser = parent_subparsers.add_parser("v1", help=__doc__) - add_core_args(parser) - add_option_args(parser) - parser.add_argument("--v1", help=SUPPRESS, default=True) - return parser - - -def run(args): - print(dedent("""\ - DEPRECATION WARNING: augur export v1 is no longer maintained and will be removed in a future release. - Read more on how to migrate to export v2: """),file=sys.stderr) - - T = Phylo.read(args.tree, 'newick') - node_data = read_node_data(args.node_data) # args.node_data is an array of multiple files (or a single file) - nodes = node_data["nodes"] # this is the per-node metadata produced by various augur modules - - if args.minify_json: - json_indent = None - else: - json_indent = 2 - - # export reference sequence data including translations. This is either the - # inferred sequence of the root, or the reference sequence with respect to - # which mutations are made on the tree (including possible mutations leading - # to the root of the tree -- typical case for vcf input data). - if args.output_sequence: - if T.root.name in nodes: - root_sequence = get_root_sequence(nodes[T.root.name], ref=args.reference, - translations=args.reference_translations) - else: - root_sequence = {} - - write_json(root_sequence, args.output_sequence, include_version=False) - - meta_json = read_config(args.auspice_config) - ensure_config_is_v1(meta_json) - try: - meta_tsv = read_metadata( - args.metadata, - delimiters=args.metadata_delimiters) - except InvalidDelimiter: - raise AugurError( - f"Could not determine the delimiter of {args.metadata!r}. " - f"Valid delimiters are: {args.metadata_delimiters!r}. " - "This can be changed with --metadata-delimiters." - ) - add_tsv_metadata_to_nodes(nodes, meta_tsv, meta_json) - - tree_layout(T) - tree_json, _ = convert_tree_to_json_structure(T.root, nodes) - - # now the messy bit about what decorations (e.g. "country", "aa_muts") do we want to add to the tree? - # see recursively_decorate_tree_json to understand the tree_decorations structure - tree_decorations = [ - {"key": "num_date", "lookup_key": "numdate", "is_attr": True}, - {"key": "muts", "is_attr": False}, - {"key": "aa_muts", "is_attr": False} - ] - traits_via_node_metadata = {k for node in nodes.values() for k in node.keys()} - traits_via_node_metadata -= {'sequence', 'mutation_length', 'branch_length', 'numdate', - 'mutations', 'muts', 'aa_muts', 'aa_sequences'} - for trait in traits_via_node_metadata: - tree_decorations.append({"key": trait, "is_attr": True}) - - recursively_decorate_tree_json_v1_schema(tree_json, nodes, decorations=tree_decorations) - write_json(tree_json, args.output_tree, indent=json_indent, include_version=False) - - # Export the metadata JSON - lat_long_mapping = read_lat_longs(args.lat_longs) - try: - color_mapping = read_colors(args.colors) - except FileNotFoundError as e: - print(f"ERROR: required file could not be read: {e}") - sys.exit(2) - meta_json["updated"] = time.strftime("%d %b %Y") - meta_json["virus_count"] = len(list(T.get_terminals())) - meta_json["author_info"] = construct_author_info_v1(meta_tsv, T, nodes) - meta_json["color_options"] = process_colorings(meta_json, color_mapping, nodes=nodes) - meta_json["geo"] = process_geographic_info(meta_json, lat_long_mapping, nodes=nodes) - annotations = process_annotations(node_data) - if annotations: - meta_json["annotations"] = annotations - meta_json["panels"] = process_panels(None, meta_json) - - write_json(meta_json, args.output_meta, indent=json_indent, include_version=False) - return 0 diff --git a/augur/export_v2.py b/augur/export_v2.py index 6484eca7d..30d616c0d 100644 --- a/augur/export_v2.py +++ b/augur/export_v2.py @@ -1,5 +1,6 @@ """ -Export version 2 JSON schema for visualization with Auspice +Export version 2 JSON schema for visualization with Auspice. +The JSON schema is available at """ import os from pathlib import Path diff --git a/augur/validate.py b/augur/validate.py index da758f2b5..127d8d92e 100644 --- a/augur/validate.py +++ b/augur/validate.py @@ -16,7 +16,7 @@ from augur.io.file import open_file from augur.io.print import print_err from augur.io.json import shorten_as_json -from .validate_export import verifyMainJSONIsInternallyConsistent, verifyMetaAndOrTreeJSONsAreInternallyConsistent +from .validate_export import verifyMainJSONIsInternallyConsistent def fatal(message): print("FATAL ERROR: {}".format(message)) @@ -215,28 +215,6 @@ def export_v2(main_json, **kwargs): print("Validation of {!r} succeeded, but there were warnings you may want to resolve.".format(main_json)) -def export_v1(meta_json, tree_json, **kwargs): - meta_schema = load_json_schema("schema-export-v1-meta.json") - tree_schema = load_json_schema("schema-export-v1-tree.json") - - if not meta_json.endswith("_meta.json"): - raise ValidateError("The metadata JSON pathname {} must end with '_meta.json'.".format(meta_json)) - - if not tree_json.endswith("_tree.json"): - raise ValidateError("The metadata JSON pathname {} must end with '_tree.json'.".format(tree_json)) - - meta = load_json(meta_json) - tree = load_json(tree_json) - - validate(meta, meta_schema, meta_json) - validate(tree, tree_schema, tree_json) - - if verifyMetaAndOrTreeJSONsAreInternallyConsistent(meta, tree, ValidateError): - print("Validation of {!r} and {!r} succeeded.".format(meta_json, tree_json)) - else: - print("Validation of {!r} and {!r} succeeded, but there were warnings you may want to resolve.".format(meta_json, tree_json)) - - def get_unique_keys(list_of_dicts): """ Returns a set of unique keys from a list of dicts @@ -416,10 +394,6 @@ def register_parser(parent_subparsers): subparsers.add_parser("export-v2", help="validate JSON intended for auspice v2") \ .add_argument('main_json', metavar='JSON', help="exported (main) v2 auspice JSON") - export_v1 = subparsers.add_parser("export-v1", help="validate tree+meta JSONs intended for auspice v1") - export_v1.add_argument('meta_json', metavar='META-JSON', help="exported (v1) meta JSON") - export_v1.add_argument('tree_json', metavar='TREE-JSON', help="exported (v1) tree JSON") - subparsers.add_parser("auspice-config-v2", help="validate auspice config intended for `augur export v2`") \ .add_argument('config_json', metavar='JSON', help="auspice config JSON") diff --git a/augur/validate_export.py b/augur/validate_export.py index 5cd3f4c4c..3590c1532 100644 --- a/augur/validate_export.py +++ b/augur/validate_export.py @@ -220,75 +220,3 @@ def recurse(node): recurse(root) return genes - -def verifyMetaAndOrTreeJSONsAreInternallyConsistent(meta_json, tree_json, ValidateError): - """ - Check all possible sources of conflict internally & between the metadata & tree JSONs - This is only that which cannot be checked by the schemas - """ - warnings = False - def warn(msg): - nonlocal warnings - warnings = True - print("\tWARNING: ", msg, file=sys.stderr) - - - print("Validating that meta + tree JSONs are internally consistent...") - mj = meta_json - - if "panels" in mj and "entropy" in mj["panels"] and "annotations" not in mj: - warn("\tERROR: The entropy panel has been specified but annotations don't exist.") - - tj = tree_json - treeAttrs, num_terminal_nodes = collectTreeAttrsV1(tj) - - if "geo" in mj: - for geoName in mj["geo"]: - if geoName not in treeAttrs: - warn("The geographic resolution \"{}\" does not appear as an attr on any tree nodes.".format(geoName)) - else: - for geoValue in mj["geo"][geoName]: - if geoValue not in treeAttrs[geoName]["values"]: - warn("\"{}\", a value of the geographic resolution \"{}\", does not appear as a value of attr->{} on any tree nodes.".format(geoValue, geoName, geoName)) - for geoValue in treeAttrs[geoName]["values"]: - if geoValue not in mj["geo"][geoName]: - warn("\"{}\", a value of the geographic resolution \"{}\", appears in the tree but not in the metadata.".format(geoValue, geoName)) - warn("\tThis will cause transmissions & demes involving this location not to be displayed in Auspice") - - - if "color_options" in mj: - for colorBy in [x for x in mj["color_options"] if x != "gt"]: - if colorBy not in treeAttrs: - warn("The color_option \"{}\" does not appear as an attr on any tree nodes.".format(colorBy)) - elif "color_map" in mj["color_options"][colorBy]: - # if there's a color_map, then check that there are no values which aren't seen on the tree - for (value, hex) in mj["color_options"][colorBy]["color_map"]: - if value not in treeAttrs[colorBy]["values"]: - warn("Color option \"{}\" specifies a hex code for \"{}\" but this isn't ever seen on the tree nodes.".format(colorBy, value)) - # inversely, check for values on the tree not defined in the color_map - for value in treeAttrs[colorBy]["values"]: - color_map_values = [x[0] for x in mj["color_options"][colorBy]["color_map"]] - if value not in color_map_values: - warn("Color option \"{}\", which contains a color_map, is missing \"{}\"".format(colorBy, value)) - - - - if "filters" in mj: - for filter in mj["filters"]: - if filter not in treeAttrs: - warn("The filter \"{}\" does not appear on any tree nodes.".format(filter)) - - - if "virus_count" in mj and mj["virus_count"] != num_terminal_nodes: - raise ValidateError("Meta JSON virus_count ({}) differs from the number of nodes in the tree ({})".format(mj["virus_count"], num_terminal_nodes)) - - genes_with_aa_muts = collectAAMutationGenesV1(tj) - if len(genes_with_aa_muts): - if "annotations" not in mj: - warn("The tree defined AA mutations on genes {}, but annotations aren't defined in the meta JSON.".format(", ".join(genes_with_aa_muts))) - else: - for gene in genes_with_aa_muts: - if gene not in mj["annotations"]: - warn("The tree defined AA mutations on gene {} which doesn't appear in the metadata annotations object.".format(gene)) - - return not warnings diff --git a/docs/api/developer/augur.export_v1.rst b/docs/api/developer/augur.export_v1.rst deleted file mode 100644 index 7cde9fb15..000000000 --- a/docs/api/developer/augur.export_v1.rst +++ /dev/null @@ -1,7 +0,0 @@ -augur.export\_v1 module -======================= - -.. automodule:: augur.export_v1 - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/api/developer/augur.rst b/docs/api/developer/augur.rst index a71e502e6..67eead512 100644 --- a/docs/api/developer/augur.rst +++ b/docs/api/developer/augur.rst @@ -35,7 +35,6 @@ Submodules augur.distance augur.errors augur.export - augur.export_v1 augur.export_v2 augur.frequencies augur.frequency_estimators diff --git a/docs/faq/colors.md b/docs/faq/colors.md index 28487ffea..9619d7ab0 100644 --- a/docs/faq/colors.md +++ b/docs/faq/colors.md @@ -2,7 +2,7 @@ Auspice uses a number of default color schemes to color the tree using meta data or values that the augur pipeline computed. In some cases these defaults are not suitable for particular type of data, and you'd like to use your own color schemes. -To specify a mapping between discrete trait values and colors, you can pass a tab-delimited file to `augur export`/`augur export v1`/`augur export v2` using `--colors`. +To specify a mapping between discrete trait values and colors, you can pass a tab-delimited file to `augur export v2` using `--colors`. > _Note that it's not currently possible to specify color schemes for nucleotides, amino acids, or continuous data._ diff --git a/docs/faq/import-beast.md b/docs/faq/import-beast.md index fabf0a999..03fb5a105 100644 --- a/docs/faq/import-beast.md +++ b/docs/faq/import-beast.md @@ -55,9 +55,9 @@ posterior 273 0 ``` augur import beast --mcc data/MERS_CoV_mcc.tree --output-tree results/mers.new --output-node-data results/beast_data.json -augur export v1 --tree results/mers.new --node-data results/beast_data.json +augur export v2 --tree results/mers.new --node-data results/beast_data.json --auspice-config config/auspice_config.json - --output-tree auspice/mers_tree.json --output-meta auspice/mers_meta.json + --output auspice/mers.json ``` ``` diff --git a/docs/faq/metadata.rst b/docs/faq/metadata.rst index 622c045da..db5aa3214 100644 --- a/docs/faq/metadata.rst +++ b/docs/faq/metadata.rst @@ -72,16 +72,13 @@ Consistancy and Style ~~~~~~~~~~~~~~~~~~~~~ Check that your metadata is free from spelling mistakes and that values -are consistant. Augur doesn't know that 'UK' and 'United Kingdom' or +are consistent. Augur doesn't know that 'UK' and 'United Kingdom' or 'cat' and 'feline' are the same! -Previously, auspice 'prettified' traits by capitalizing them +Note: Previously, auspice 'prettified' traits by capitalizing them automatically, and removing the underscores that separated two-word -locations ('new_zealand' became 'New Zealand'). - -Auspice will still do this if you are exporting 'V1' type JSON files -(from augur v5 or augur v6 using ``export v1``), but will not do this if -you are using ``export v2`` (:ref:`read more `). +locations ('new_zealand' became 'New Zealand'). With `augur export v2` +this is no longer the case, see :ref:`read more `. Instead, you should update your metadata files so that traits look the same as you'd like them to display in Auspice (change 'new_zealand' to 'New Zealand' in your metadata, and in any additional latitude-longitude diff --git a/docs/redirects.yaml b/docs/redirects.yaml index b0b57d39a..e941923a3 100644 --- a/docs/redirects.yaml +++ b/docs/redirects.yaml @@ -82,7 +82,11 @@ - type: page from_url: /api/augur.export_v1.html - to_url: /api/developer/augur.export_v1.html + to_url: /releases/migrating-v5-v6.html + +- type: page + from_url: /api/developer/augur.export_v1.html + to_url: /releases/migrating-v5-v6.html - type: page from_url: /api/augur.export_v2.html