diff --git a/CHANGES.md b/CHANGES.md index 52447c001..4300ae1eb 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -11,10 +11,13 @@ * index: Previously specifying a directory that does not exist in the path to `--output` would result in an incorrect error stating that the input file does not exist. It now shows the correct path responsible for the error. [#1644][] (@victorlin) * curate format-dates: Update help docs and improve failure messages to show use of `--expected-date-formats`. [#1653][] (@joverlee521) +* tests: Fix false negative tests due to incorrect use of DeepDiff's `--exclude-regex-path` option. [#1665][] [#1666][] (@corneliusroemer) [#1644]: https://github.com/nextstrain/augur/issues/1644 [#1653]: https://github.com/nextstrain/augur/pull/1653 [#1656]: https://github.com/nextstrain/augur/pull/1656 +[#1665]: https://github.com/nextstrain/augur/issues/1665 +[#1666]: https://github.com/nextstrain/augur/pull/1666 ## 26.0.0 (17 September 2024) diff --git a/docs/contribute/DEV_DOCS.md b/docs/contribute/DEV_DOCS.md index 7ee843bd3..c5ea14db0 100644 --- a/docs/contribute/DEV_DOCS.md +++ b/docs/contribute/DEV_DOCS.md @@ -89,6 +89,8 @@ To compare JSON outputs with stochastic numerical values, use `scripts/diff_json Both tree and JSON comparison scripts rely on [deepdiff](https://deepdiff.readthedocs.io/en/latest/) for underlying comparisons. +When using `diff_jsons.py`'s `--exclude-regex-paths` argument, make sure to escape any special regex characters, in particular square brackets: do this `\['seqid'\]`, not this `['seqid']`. See [#1655](https://github.com/nextstrain/augur/issues/1665) for what happens if you don't. + #### When to use which type of test 1. Unit tests should be used for the [public API](https://docs.nextstrain.org/projects/augur/en/stable/api/public/index.html). diff --git a/scripts/diff_jsons.py b/scripts/diff_jsons.py index 4d0ffafb4..946f29665 100644 --- a/scripts/diff_jsons.py +++ b/scripts/diff_jsons.py @@ -3,6 +3,7 @@ import argparse import deepdiff import json +import re from augur.argparse_ import ExtendOverwriteDefault @@ -15,12 +16,22 @@ parser.add_argument("first_json", help="first JSON to compare") parser.add_argument("second_json", help="second JSON to compare") parser.add_argument("--significant-digits", type=int, default=5, help="number of significant digits to use when comparing numeric values") - parser.add_argument("--exclude-paths", nargs="+", action=ExtendOverwriteDefault, help="list of paths to exclude from consideration when performing a diff", default=["root['generated_by']['version']"]) + parser.add_argument("--exclude-paths", nargs="+", action=ExtendOverwriteDefault, help="list of paths to exclude from consideration when performing a diff", default=["root['generated_by']", "root['meta']['updated']"]) parser.add_argument("--exclude-regex-paths", nargs="+", action="extend", help="list of path regular expressions to exclude from consideration when performing a diff") parser.add_argument("--ignore-numeric-type-changes", action="store_true", help="ignore numeric type changes in the diff (e.g., int of 1 to float of 1.0)") args = parser.parse_args() + # Test for most fatal errors in regex path usage + # Exclude regexes should never match `'`, otherwise the diff is always going to pass + for regex in args.exclude_regex_paths or []: + result = re.compile(regex).search("'") + if result is not None: + raise Exception( + f"Exclude regex {regex} matches `'` which means this diff will always pass which is probably not what you want.\n" + "You probably forgot to escape something in your regex. See for example: https://stackoverflow.com/a/79173188/7483211" + ) + with open(args.first_json, "r") as fh: first_json = json.load(fh) diff --git a/tests/functional/ancestral/cram/ambiguous-positions.t b/tests/functional/ancestral/cram/ambiguous-positions.t index 0fc7b124e..2912f7561 100644 --- a/tests/functional/ancestral/cram/ambiguous-positions.t +++ b/tests/functional/ancestral/cram/ambiguous-positions.t @@ -36,7 +36,7 @@ Setup $ python3 "$SCRIPTS/diff_jsons.py" \ > expected.json "nt_muts.json" \ - > --exclude-regex-paths "root\['annotations'\]" "root\['generated_by'\]" "root\['reference'\]" + > --exclude-paths "annotations" "generated_by" "reference" {} $ python3 "$SCRIPTS/compare-json-vcf.py" \ diff --git a/tests/functional/ancestral/cram/case-sensitive.t b/tests/functional/ancestral/cram/case-sensitive.t index fbe35df75..6e2d105fe 100644 --- a/tests/functional/ancestral/cram/case-sensitive.t +++ b/tests/functional/ancestral/cram/case-sensitive.t @@ -15,8 +15,7 @@ Change the _reference_ to lowercase $ python3 "$TESTDIR/../../../../scripts/diff_jsons.py" \ > "$TESTDIR/../data/simple-genome/nt_muts.ref-seq.json" \ - > "nt_muts.ref-seq.json" \ - > --exclude-paths "root['generated_by']" + > "nt_muts.ref-seq.json" {} @@ -37,6 +36,5 @@ be lowecase which will be compared against the uppercase reference $ python3 "$TESTDIR/../../../../scripts/diff_jsons.py" \ > "$TESTDIR/../data/simple-genome/nt_muts.ref-seq.json" \ - > "nt_muts.ref-seq.json" \ - > --exclude-paths "root['generated_by']" + > "nt_muts.ref-seq.json" {} \ No newline at end of file diff --git a/tests/functional/ancestral/cram/general.t b/tests/functional/ancestral/cram/general.t index 9591db36a..6a791b376 100644 --- a/tests/functional/ancestral/cram/general.t +++ b/tests/functional/ancestral/cram/general.t @@ -19,8 +19,7 @@ node-data JSON we diff against. $ python3 "$TESTDIR/../../../../scripts/diff_jsons.py" \ > "$TESTDIR/../data/simple-genome/nt_muts.ref-seq.json" \ - > "nt_muts.ref-seq.json" \ - > --exclude-paths "root['generated_by']" + > "nt_muts.ref-seq.json" {} Same as above but without providing a `--root-sequence`. The effect of this on behaviour is: @@ -39,6 +38,5 @@ mutations (as there's nothing to compare the root node to) $ python3 "$TESTDIR/../../../../scripts/diff_jsons.py" \ > "$TESTDIR/../data/simple-genome/nt_muts.no-ref-seq.json" \ - > "nt_muts.no-ref-seq.json" \ - > --exclude-paths "root['generated_by']" + > "nt_muts.no-ref-seq.json" {} diff --git a/tests/functional/ancestral/cram/infer-amino-acid-sequences-with-root-sequence.t b/tests/functional/ancestral/cram/infer-amino-acid-sequences-with-root-sequence.t index 0d61ca296..aa60db892 100644 --- a/tests/functional/ancestral/cram/infer-amino-acid-sequences-with-root-sequence.t +++ b/tests/functional/ancestral/cram/infer-amino-acid-sequences-with-root-sequence.t @@ -20,7 +20,7 @@ ancestor). Check that the reference length was correctly exported as the nuc annotation $ python3 "$TESTDIR/../../../../scripts/diff_jsons.py" \ - > --exclude-regex-paths "['seqid']" -- \ + > --exclude-regex-paths "\['seqid'\]" -- \ > "$TESTDIR/../data/ancestral_mutations_with_root_sequence.json" \ > "$CRAMTMP/$TESTFILE/ancestral_mutations.json" {} diff --git a/tests/functional/ancestral/cram/vcf-multi-allele.t b/tests/functional/ancestral/cram/vcf-multi-allele.t index dfa52a7f4..7423e620f 100644 --- a/tests/functional/ancestral/cram/vcf-multi-allele.t +++ b/tests/functional/ancestral/cram/vcf-multi-allele.t @@ -24,7 +24,7 @@ See for the bug this is testin $ python3 "$TESTDIR/../../../../scripts/diff_jsons.py" \ > "$DATA/nt_muts.ref-seq.json" \ > nt_muts.json \ - > --exclude-regex-paths "root\['nodes'\]\['.+'\]\['sequence'\]" "root\['generated_by'\]" + > --exclude-regex-paths "root\['nodes'\]\['.+'\]\['sequence'\]" {'iterable_item_added': {"root['nodes']['sample_B']['muts'][0]": 'A30G'}} $ cat > expected.vcf < "$DATA/nt_muts.ref-seq.json" \ > "nt_muts.vcf-input.ref-seq.json" \ - > --exclude-regex-paths "root\['nodes'\]\['.+'\]\['sequence'\]" "root\['generated_by'\]" + > --exclude-regex-paths "root\['nodes'\]\['.+'\]\['sequence'\]" {} Here's the same mutations as in $DATA/nt_muts.ref-seq.json, diff --git a/tests/functional/ancestral/data/ancestral_mutations_with_root_sequence.json b/tests/functional/ancestral/data/ancestral_mutations_with_root_sequence.json index dbbbaa0c6..a22cd1a7e 100644 --- a/tests/functional/ancestral/data/ancestral_mutations_with_root_sequence.json +++ b/tests/functional/ancestral/data/ancestral_mutations_with_root_sequence.json @@ -23,7 +23,7 @@ }, "generated_by": { "program": "augur", - "version": "22.1.0" + "version": "26.0.0" }, "masknodes": { @@ -224,6 +224,10 @@ "ENV": [], "PRO": [] }, + "aa_sequences": { + "ENV": "IRCIGVSNRDFVEGMSGGTWVDVVLEHGGCVTVMAQDKPTVDIELVTTTVSNMAEVRSYCYEASISDMASDSRCPTQGEAYLDKQSDTQYVCKRTLVDRGWGNGCGLFGKGSLVTCAKFACSKKMTGKSIQPENLEYRIMLSVHGSQHSGMIVNDTGHETDENRAKVEITPNSPRAEATLGGFGSLGLDCEPRTGLDFSDLYYLTMNNKHWLVHKEWFHDIPLPWHAGADTGTPHWNNKEALVEFKDAHAKRQTVVVLGSQEGAVHTALAGALEAEMDGAKGRLSSGHLKCRLKMDKLRLKGVSYSLCTAAFTFTKIPAETLHGTVTVEVQYAGTDGPCKVPAQMAVDMQTLTPVGRLITANPVITESTENSKMMLELDPPFGDSYIVIGVGEKKITHHWHRSGSTIGKAFEATVRGAKRMAVLGDTAWDFGSVGGALNSLGKGIHQIFGAAFKSLFGGMSWFSQILIGTLLMWLGLNTKNGSISLMCLALGGVLIFLSTAVSA", + "PRO": "AEVTRRGSAYYMYLDRNDAGEAISFPTTLGMNKCYIQIMDLGHMCDATMSYECPMLDEGVEPDDVDCWCNTTSTWVVYGTCHHKKGEARRSRR" + }, "muts": [ "A765G", "C859T", @@ -384,4 +388,4 @@ "PRO": "AEVTRRGSAYYMYLDRNDAGEAISFPTTLGMNKCYIQIMDLGHMCDATMSYECPMLDEGVEPDDVDCWCNTTSTWVVYGTCHHKKGEARRSRR", "nuc": "GAATCAGACTGCGACAGTTCGAGTTTGAAGCGAAAGCTAGCAACAGTATCAACAGGTTTTATTTGGATTTGGAAACGAGAGTTTCTGGTCATGAAAAACCCAAAAAAGAAATCCGGAGGATTCCGGATTGTCAATATGCTAAAACGCGGAGTAGCCCGTGTGAGCCCCTTTGGGGGCTTGAAGAGGCTGCCAGCCGGACTTCTGCTGGGTCATGGGCCCATCAGGATGGTCTTGGCGATTCTAGCCTTTTTGAGATTCACGGCAATCAAGCCATCACTGGGTCTCATCAATAGATGGGGTTCAGTGGGGAAAAAAGAGGCTATGGAAATAATAAAGAAGTTCAAGAAAGATCTGGCTGCCATGCTGAGAATAATCAATGCTAGGAAGGAGAAGAAGAGACGAGGCGCAGATACTAGTGTCGGAATTGTTGGCCTCCTGCTGACCACAGCTATGGCAGCGGAGGTCACTAGACGTGGGAGTGCATACTATATGTACTTGGACAGAAACGATGCTGGGGAGGCCATATCTTTTCCAACCACATTGGGGATGAATAAGTGTTATATACAGATCATGGATCTTGGACACATGTGTGATGCCACCATGAGCTATGAATGCCCTATGCTGGATGAGGGGGTGGAACCAGATGACGTCGATTGTTGGTGCAACACGACGTCAACTTGGGTTGTGTACGGAACCTGCCATCACAAAAAAGGTGAAGCACGGAGATCTAGAAGAGCTGTGACGCTCCCCTCCCATTCCACTAGAAAGCTGCAAACGCGGTCGCAAACCTGGTTGGAATCAAGAGAATACACAAAGCACTTGATTAGAGTCGAAAATTGGATATTCAGGAACCCTGGCCTCGCGTTAGCAGCAGCTGCCATCGCTTGGCTTTTGGGAAGCTCAACGAGCCAAAAAGTCATATACTTGGTCATGATACTGCTGATTGCCCCGGCATACAGCATCAGGTGCATAGGAGTCAGCAATAGGGACTTTGTGGAAGGTATGTCAGGTGGGACTTGGGTTGATGTTGTCTTGGAACATGGAGGTTGTGTCACCGTAATGGCACAGGACAAACCGACTGTCGACATAGAGCTGGTTACAACAACAGTCAGCAACATGGCGGAGGTAAGATCCTACTGCTATGAGGCATCAATATCAGACATGGCTTCGGACAGCCGCTGCCCAACACAAGGTGAAGCCTACCTTGACAAGCAATCAGACACTCAATATGTCTGCAAAAGAACGTTAGTGGACAGAGGCTGGGGAAATGGATGTGGACTTTTTGGCAAAGGGAGCCTGGTGACATGCGCTAAGTTTGCATGCTCCAAGAAAATGACCGGGAAGAGCATCCAGCCAGAGAATCTGGAGTACCGGATAATGCTGTCAGTTCATGGCTCCCAGCACAGTGGGATGATCGTTAATGACACAGGACATGAAACTGATGAGAATAGAGCGAAGGTTGAGATAACGCCCAATTCACCAAGAGCCGAAGCCACCCTGGGGGGTTTTGGAAGCCTAGGACTTGATTGTGAACCGAGGACAGGCCTTGACTTTTCAGATTTGTATTACTTGACTATGAATAACAAGCACTGGTTGGTTCACAAGGAGTGGTTCCACGACATTCCATTACCTTGGCACGCTGGGGCAGACACCGGAACTCCACACTGGAACAACAAAGAAGCACTGGTAGAGTTCAAGGACGCACATGCCAAAAGGCAAACTGTCGTGGTTCTAGGGAGTCAAGAAGGAGCAGTTCACACGGCCCTTGCTGGAGCTCTGGAGGCTGAGATGGATGGTGCAAAGGGAAGGCTGTCCTCTGGCCACTTGAAATGTCGCCTGAAAATGGATAAACTTAGATTGAAGGGCGTGTCATACTCCTTGTGTACCGCAGCGTTCACATTCACCAAGATCCCGGCTGAAACACTGCACGGGACAGTCACAGTGGAGGTACAGTACGCAGGGACAGATGGACCTTGCAAGGTTCCAGCTCAGATGGCGGTGGACATGCAAACTCTGACCCCAGTTGGGAGGTTGATAACCGCTAACCCCGTAATCACTGAAAGCACTGAGAACTCTAAGATGATGCTGGAACTTGATCCACCATTTGGGGACTCTTACATTGTCATAGGAGTCGGGGAGAAGAAGATCACCCACCACTGGCACAGGAGTGGCAGCACCATTGGAAAAGCATTTGAAGCCACTGTGAGAGGTGCCAAGAGAATGGCAGTCTTGGGAGACACAGCCTGGGACTTTGGATCAGTTGGAGGCGCTCTCAACTCATTGGGCAAGGGCATCCATCAAATTTTTGGAGCAGCTTTCAAATCATTGTTTGGAGGAATGTCCTGGTTCTCACAAATTCTCATTGGAACGTTGCTGATGTGGTTGGGTCTGAACACAAAGAATGGATCTATTTCCCTTATGTGCTTGGCCTTAGGGGGAGTGTTGATCTTCTTATCCACAGCCGTCTCTGCTGATGTGGGGTGCTCGGTGGACTTCTCAAAGAAGGAGACGAGATGCGGTACAGGGGTGTTCGTCTATAACGACGTTGAAGCCTGGAGGGACAGGTACAAGTACCATCCTGACTCCCCCCGTAGATTGGCAGCAGCAGTCAAGCAAGCCTGGGAAGATGGTATCTGTGGGATCTCCTCTGTTTCAAGAATGGAAAACATCATGTGGAGATCAGTAGAAGGGGAGCTCAACGCAATCCTGGAAGAGAATGGAGTTCAACTGACGGTCGTTGTGGGATCTGTAAAAAACCCCATGTGGAGAGGTCCACAGAGATTGCCCGTGCCTGTGAACGAGCTGCCCCACGGCTGGAAGGCTTGGGGGAAATCGTACTTCGTCAGAGCAGCAAAGACAAATAACAGCTTTGTCGTGGATGGTGACACACTGAAGGAATGCCCACTCGAACATAGAGCATGGAACAGCTTTCTTGTGGAGGATCATGGGTTCGGGGTATTTCACACTAGTGTCTGGCTCAAGGTTAGAGAAGATTATTCATTAGAGTGTGATCCAGCCGTTATTGGAACAGCTGTTAAGGGAAAGGAGGCTGTACACAGTGATCTAGGCTACTGGATTGAGAGTGAGAAGAATGACACATGGAGGCTGAAGAGGGCCCATCTGATCGAGATGAAAACATGTGAATGGCCAAAGTCCCACACATTGTGGACAGATGGAATAGAAGAGAGTGATCTGATCATACCCAAGTCTTTAGCTGGGCCACTCAGCCATCACAATACCAGAGAGGGCTACAGGACCCAAATGAAAGGGCCATGGCACAGTGAAGAGCTTGAAATTCGGTTTGAGGAATGCCCAGGCACTAAGGTCCACGTGGAGGAAACATGTGGAACAAGAGGACCATCTCTGAGATCAACCACTGCAAGCGGAAGGGTGATCGAGGAATGGTGCTGCAGGGAGTGCACAATGCCCCCACTGTCGTTCCGGGCTAAAGATGGCTGTTGGTATGGAATGGAGATAAGGCCCAGGAAAGAACCAGAAAGTAACTTAGTAAGGTCAATGGTGACTGCAGGATCAACTGATCACATGGATCACTTCTCCCTTGGAGTGCTTGTGATTCTGCTCATGGTGCAGGAAGGGCTGAAGAAGAGAATGACCACAAAGATCATCATAAGCACATCAATGGCAGTGCTGGTAGCTATGATCCTGGGAGGATTTTCAATGAGTGACCTGGCTAAGCTTGCAATTTTGATGGGTGCCACCTTCGCGGAAATGAACACTGGAGGAGATGTAGCTCATCTGGCGCTGATAGCGGCATTCAAAGTCAGACCAGCGTTGCTGGTATCTTTCATCTTCAGAGCTAATTGGACACCCCGTGAAAGCATGCTGCTGGCCTTGGCCTCGTGTCTTTTGCAAACTGCGATCTCCGCCTTGGAAGGCGACCTGATGGTTCTCATCAATGGTTTTGCTTTGGCCTGGTTGGCAATACGAGCGATGGTTGTTCCACGCACTGATAACATCACCTTGGCAATCCTGGCTGCTCTGACACCACTGGCCCGGGGCACACTGCTTGTGGCGTGGAGAGCAGGCCTTGCTACTTGCGGGGGGTTTATGCTCCTCTCTCTGAAGGGAAAAGGCAGTGTGAAGAAGAACTTACCATTTGTCATGGCCCTGGGACTAACCGCTGTGAGGCTGGTCGACCCCATCAACGTGGTGGGACTGCTGTTGCTCACAAGGAGTGGGAAGCGGAGCTGGCCCCCTAGCGAAGTACTCACAGCTGTTGGCCTGATATGCGCATTGGCTGGAGGGTTCGCCAAGGCAGATATAGAGATGGCTGGGCCCATGGCCGCGGTCGGTCTGCTAATTGTCAGTTACGTGGTCTCAGGAAAGAGTGTGGACATGTACATTGAAAGAGCAGGTGACATCACATGGGAAAAAGATGCGGAAGTCACTGGAAACAGTCCCCGGCTCGATGTGGCGCTAGATGAGAGTGGTGATTTCTCCCTGGTGGAGGATGACGGTCCCCCCATGAGAGAGATCATACTCAAGGTGGTCCTGATGACCATCTGTGGCATGAACCCAATAGCCATACCCTTTGCAGCTGGAGCGTGGTACGTATACGTGAAGACTGGAAAAAGGAGTGGTGCTCTATGGGATGTGCCTGCTCCCAAGGAAGTAAAAAAGGGGGAGACCACAGATGGAGTGTACAGAGTAATGACTCGTAGACTGCTAGGTTCAACACAAGTTGGAGTGGGAGTTATGCAAGAGGGGGTCTTTCACACTATGTGGCACGTCACAAAAGGATCCGCGCTGAGAAGCGGTGAAGGGAGACTTGATCCATACTGGGGAGATGTCAAGCAGGATCTGGTGTCATACTGTGGTCCATGGAAGCTAGATGCCGCCTGGGACGGGCACAGCGAGGTGCAGCTCTTGGCCGTGCCCCCCGGAGAGAGAGCGAGGAACATCCAGACTCTGCCCGGAATATTTAAGACAAAGGATGGGGACATTGGAGCGGTTGCGCTGGATTACCCAGCAGGAACTTCAGGATCTCCAATCCTAGACAAGTGTGGGAGAGTGATAGGACTTTATGGCAATGGGGTCGTGATCAAAAATGGGAGTTATGTTAGTGCCATCACCCAAGGGAGGAGGGAGGAAGAGACTCCTGTTGAGTGCTTCGAGCCTTCGATGCTGAAGAAGAAGCAGCTAACTGTCTTAGACTTGCATCCTGGAGCTGGGAAAACCAGGAGAGTTCTTCCTGAAATAGTCCGTGAAGCCATAAAAACAAGACTCCGTACTGTGATCTTAGCTCCAACCAGGGTTGTCGCTGCTGAAATGGAGGAAGCCCTTAGAGGGCTTCCAGTGCGTTATATGACAACAGCAGTCAATGTCACCCACTCTGGAACAGAAATCGTCGACTTAATGTGCCATGCCACCTTCACTTCACGTCTACTACAGCCAATCAGAGTCCCCAACTATAATCTGTATATTATGGATGAGGCCCACTTCACAGATCCCTCAAGTATAGCAGCAAGAGGATACATTTCAACAAGGGTTGAGATGGGCGAGGCGGCTGCCATCTTCATGACCGCCACGCCACCAGGAACCCGTGACGCATTTCCGGACTCCAACTCACCAATTATGGACACCGAAGTGGAAGTCCCAGAGAGAGCCTGGAGCTCAGGCTTTGATTGGGTGACGGATCATTCTGGAAAAACAGTTTGGTTTGTTCCAAGCGTGAGGAACGGCAATGAGATCGCAGCTTGTCTGACAAAGGCTGGAAAACGGGTCATACAGCTCAGCAGAAAGACTTTTGAGACAGAGTTCCAGAAAACAAAACATCAAGAGTGGGACTTTGTCGTGACAACTGACATTTCAGAGATGGGCGCCAACTTTAAAGCTGACCGTGTCATAGATTCCAGGAGATGCCTAAAGCCGGTCATACTTGATGGCGAGAGAGTCATTCTGGCTGGACCCATGCCTGTCACACATGCCAGCGCTGCCCAGAGGAGGGGGCGCATAGGCAGGAATCCCAACAAACCTGGAGATGAGTATCTGTATGGAGGTGGGTGCGCAGAGACTGACGAAGACCATGCACACTGGCTTGAAGCAAGAATGCTCCTTGACAATATTTACCTCCAAGATGGCCTCATAGCCTCGCTCTATCGACCTGAGGCCGACAAAGTAGCAGCCATTGAGGGAGAGTTCAAGCTTAGGACGGAGCAAAGGAAGACCTTTGTGGAACTCATGAAAAGAGGAGATCTTCCTGTTTGGCTGGCCTATCAGGTTGCATCTGCCGGAATAACCTACACAGATAGAAGATGGTGCTTTGATGGCACGACCAACAACACCATAATGGAAGACAGTGTGCCGGCAGAGGTGTGGACCAGACACGGAGAGAAAAGAGTGCTCAAACCGAGGTGGATGGACGCCAGAGTTTGTTCAGATCATGCGGCCCTGAAGTCATTCAAGGAGTTTGCCGCTGGGAAAAGAGGAGCGGCTTTTGGAGTGATGGAAGCCCTGGGAACACTGCCAGGACACATGACAGAGAGATTCCAGGAAGCCATTGACAACCTCGCTGTGCTCATGCGGGCAGAGACTGGAAGCAGGCCTTACAAAGCCGCGGCGGCCCAATTGCCGGAGACCCTAGAGACCATTATGCTTTTGGGGTTGCTGGGAACAGTCTCGCTGGGAATCTTTTTCGTCTTGATGAGGAACAAGGGCATAGGGAAGATGGGCTTTGGAATGGTGACTCTTGGGGCCAGCGCATGGCTCATGTGGCTCTCGGAAATTGAGCCAGCCAGAATTGCATGTGTCCTCATTGTTGTGTTCCTATTGCTGGTGGTGCTCATACCTGAGCCAGAAAAGCAAAGATCTCCCCAGGACAACCAAATGGCAATCATCATCATGGTAGCAGTAGGTCTTCTGGGCTTGATTACCGCCAATGAACTCGGATGGTTGGAGAGAACAAAGAGTGACCTAAGCCATCTAATGGGAAGGAGAGAGGAGGGGGCAACCATAGGATTCTCAATGGACATTGACCTGCGGCCAGCCTCAGCTTGGGCCATCTATGCTGCCTTGACAACTTTCATTACCCCAGCCGTCCAACATGCAGTGACCACTTCATACAACAACTACTCCTTAATGGCGATGGCCACGCAAGCTGGAGTGTTGTTTGGTATGGGCAAAGGGATGCCATTCTACGCATGGGACTTTGGAGTCCCGCTGCTAATGATAGGTTGCTACTCACAATTAACACCCCTGACCCTAATAGTGGCCATCATTTTGCTCGTGGCGCACTACATGTACTTGATCCCAGGGCTGCAGGCAGCAGCTGCGCGTGCTGCCCAGAAGAGAACGGCAGCTGGCATCATGAAGAACCCTGTTGTGGATGGAATAGTGGTGACTGACATTGACACAATGACAATTGACCCCCAAGTGGAGAAAAAGATGGGACAGGTGCTACTCATAGCAGTAGCCGTCTCCAGCGCCATACTGTCGCGGACCGCCTGGGGGTGGGGGGAGGCTGGGGCCCTGATCACAGCTGCAACTTCCACTTTGTGGGAAGGCTCTCCGAACAAGTACTGGAACTCCTCTACAGCCACTTCACTGTGTAACATTTTTAGGGGAAGTTACTTGGCTGGAGCTTCTCTAATCTACACAGTAACAAGAAACGCTGGCTTGGTCAAGAGACGTGGGGGTGGAACAGGAGAGACCCTGGGAGAGAAATGGAAGGCCCGCTTGAACCAGATGTCGGCCCTGGAGTTCTACTCCTACAAAAAGTCAGGCATCACCGAGGTGTGCAGAGAAGAGGCCCGCCGCGCCCTCAAGGACGGTGTGGCAACGGGAGGCCATGCTGTGTCCCGAGGAAGTGCAAAGCTGAGATGGTTGGTGGAGCGGGGATACCTGCAGCCCTATGGAAAGGTCATTGATCTTGGATGTGGCAGAGGGGGCTGGAGTTACTACGCCGCCACCATCCGCAAAGTTCAAGAAGTGAAAGGATACACAAAAGGAGGCCCTGGTCATGAAGAACCCATGTTGGTGCAAAGCTATGGGTGGAACATAGTCCGTCTTAAGAGTGGGGTGGACGTCTTTCATATGGCGGCTGAGCCGTGTGACACGTTGCTGTGTGACATAGGTGAGTCATCATCTAGTCCTGAAGTGGAAGAAGCACGGACGCTCAGAGTCCTCTCCATGGTGGGGGATTGGCTTGAAAAAAGACCAGGAGCCTTTTGTATAAAAGTGTTGTGCCCATACACCAGCACTATGATGGAAACCCTGGAGCGACTGCAGCGTAGGTATGGGGGAGGACTGGTCAGAGTGCCACTCTCCCGCAACTCTACACATGAGATGTACTGGGTCTCTGGAGCGAAAAGCAACACCATAAAAAGTGTGTCCACCACGAGCCAGCTCCTCTTGGGGCGCATGGACGGGCCCAGGAGGCCAGTGAAATATGAGGAGGATGTGAATCTCGGCTCTGGCACGCGGGCTGTGGTAAGCTGCGCTGAAGCTCCCAACATGAAGATCATTGGTAACCGCATTGAAAGGATCCGCAGTGAGCACGCGGAAACGTGGTTCTTTGACGAGAACCACCCATATAGGACATGGGCTTACCATGGAAGCTATGAGGCCCCCACACAAGGGTCAGCGTCCTCTCTAATAAACGGGGTTGTCAGGCTCCTGTCAAAACCCTGGGATGTGGTGACTGGAGTCACAGGAATAGCCATGACCGACACCACACCGTATGGTCAGCAAAGAGTTTTCAAGGAAAAAGTGGACACTAGGGTGCCAGACCCCCAAGAAGGCACTCGTCAGGTTATGAGCATGGTCTCTTCCTGGTTGTGGAAAGAGCTAGGCAAACACAAACGGCCACGAGTCTGTACCAAAGAAGAGTTCATCAACAAGGTTCGTAGCAATGCAGCATTAGGGGCAATATTTGAAGAGGAAAAAGAGTGGAAGACTGCAGTGGAAGCTGTGAACGATCCAAGGTTCTGGGCTCTAGTGGACAAGGAAAGAGAGCACCACCTGAGAGGAGAGTGCCAGAGTTGTGTGTACAACATGATGGGAAAAAGAGAAAAGAAACAAGGGGAATTTGGAAAGGCCAAGGGCAGCCGCGCCATCTGGTATATGTGGCTAGGGGCTAGATTTCTAGAGTTCGAAGCCCTTGGATTCTTGAACGAGGATCACTGGATGGGGAGAGAGAACTCAGGAGGTGGTGTTGAAGGGCTGGGATTACAAAGACTCGGATATGTCCTAGAAGAGATGAGTCGCATACCAGGAGGAAGGATGTATGCAGATGACACTGCTGGCTGGGACACCCGCATCAGCAGGTTTGATCTGGAGAATGAAGCTCTAATCACCAACCAAATGGAGAAAGGGCACAGGGCCTTGGCATTGGCCATAATCAAGTACACATACCAAAACAAAGTGGTAAAGGTCCTTAGACCAGCTGAAAAAGGGAAGACAGTTATGGACATTATTTCGAGACAAGACCAAAGGGGGAGCGGACAAGTTGTCACTTACGCTCTTAACACATTTACCAACCTAGTGGTGCAACTCATTCGGAATATGGAGGCTGAGGAAGTTCTAGAGATGCAAGACTTGTGGCTGCTGCGGAGGTCAGAGAAAGTGACCAACTGGTTGCAGAGCAACGGATGGGATAGGCTCAAACGAATGGCAGTCAGTGGAGATGATTGCGTTGTGAAGCCAATTGATGATAGGTTTGCACATGCCCTCAGGTTCTTGAATGATATGGGAAAAGTTAGGAAGGACACACAAGAGTGGAAACCCTCAACTGGATGGGACAACTGGGAAGAAGTTCCGTTTTGCTCCCACCACTTCAACAAGCTCCATCTCAAGGACGGGAGGTCCATTGTGGTTCCCTGCCGCCACCAAGATGAACTGATTGGCCGGGCCCGCGTCTCTCCAGGGGCGGGATGGAGCATCCGGGAGACTGCTTGCCTAGCAAAATCATATGCGCAAATGTGGCAGCTCCTTTATTTCCACAGAAGGGACCTCCGACTGATGGCCAATGCCATTTGTTCATCTGTGCCAGTTGACTGGGTTCCAACTGGGAGAACTACCTGGTCAATCCATGGAAAGGGAGAATGGATGACCACTGAAGACATGCTTGTGGTGTGGAACAGAGTGTGGATTGAGGAGAACGACCACATGGAAGACAAGACCCCAGTTACGAAATGGACAGACATTCCCTATTTGGGAAAAAGGGAAGACTTGTGGTGTGGATCTCTCATAGGGCACAGACCGCGCACCACCTGGGCTGAGAACATTAAAAACACAGTCAACATGGTGCGCAGGATCATAGGTGATGAAGAAAAGTACATGGACTACCTATCCACCCAAGTTCGCTACTTGGGTGAAGAAGGGTCTACACCTGGAGTGCTGTAAGCACCAATCTTAGTGTTGTCAGGCCTGCTAGTCAGCCACAGCTTGGGGAAAGCTGTGCAGCCTGTGACCCCCCCAGGAGAAGCTGGGAAACCAAGCCTATAGTCAGGCCGAGAACGCCATGGCACGGAAGAAGCCATGCTGCCTGTGAGCCCCTCAGAGGACACTGAGTCAAAAAACCCCACGCGCTTGGAGGCGCAGGATGGGAAAAGAAGGTGGCGACCTTCCCCACCCTTCAATCTGGGGCCTGAACTGGAGATCAGCTGTGGATCTCCAGAAGAGGGACTAGTGGTTAGAGGAGACCCCCCGGAAAACGCAAAACAGCATATTGACGCTGGGAAAGACCAGAGACTCCATGAGTTTCCACCACGCTGGCCGCCAGGCACAGATCGCCGAATAGCGGCGGCCGG" } -} \ No newline at end of file +} diff --git a/tests/functional/clades/cram/membership-and-label.t b/tests/functional/clades/cram/membership-and-label.t index bd978fb77..36b593776 100644 --- a/tests/functional/clades/cram/membership-and-label.t +++ b/tests/functional/clades/cram/membership-and-label.t @@ -13,6 +13,5 @@ Test custom membership key + label key. The only change should be the key names $ cat clades_custom.json | sed "s/lineage/clade_membership/" | sed "s/origin/clade/" > clades_sed.json - $ python3 "$TESTDIR/../../../../scripts/diff_jsons.py" "$TESTDIR/../data/clades.json" clades_sed.json \ - > --exclude-paths "root['generated_by']" + $ python3 "$TESTDIR/../../../../scripts/diff_jsons.py" "$TESTDIR/../data/clades.json" clades_sed.json {} diff --git a/tests/functional/clades/cram/no-label.t b/tests/functional/clades/cram/no-label.t index c3ff5e9c6..2153df9fc 100644 --- a/tests/functional/clades/cram/no-label.t +++ b/tests/functional/clades/cram/no-label.t @@ -11,6 +11,5 @@ Test the ability to _not_ export a branch label (same logic as not exporting the > --label-name none \ > --output-node-data clades_no-labels.json &>/dev/null - $ python3 "$TESTDIR/../../../../scripts/diff_jsons.py" "$TESTDIR/../data/clades.json" clades_no-labels.json \ - > --exclude-paths "root['generated_by']" + $ python3 "$TESTDIR/../../../../scripts/diff_jsons.py" "$TESTDIR/../data/clades.json" clades_no-labels.json {'dictionary_item_removed': [root['branches']]} diff --git a/tests/functional/clades/cram/root-clade-identification.t b/tests/functional/clades/cram/root-clade-identification.t index 98f965cd5..ca9d823c5 100644 --- a/tests/functional/clades/cram/root-clade-identification.t +++ b/tests/functional/clades/cram/root-clade-identification.t @@ -12,8 +12,7 @@ This is an oversight and ideally would be fixed > --clades "$TESTDIR/../data/toy_clades_nuc.tsv" \ > --output-node-data toy_clades_1.json &>/dev/null - $ python3 "$TESTDIR/../../../../scripts/diff_jsons.py" "$TESTDIR/../data/toy_clades_1.json" toy_clades_1.json \ - > --exclude-paths "root['generated_by']" + $ python3 "$TESTDIR/../../../../scripts/diff_jsons.py" "$TESTDIR/../data/toy_clades_1.json" toy_clades_1.json {} A clade which exists at the root is identified (and correctly propogated) if the root sequence @@ -25,8 +24,7 @@ is explicitly set. > --clades "$TESTDIR/../data/toy_clades_nuc.tsv" \ > --output-node-data toy_clades_2a.json &>/dev/null - $ python3 "$TESTDIR/../../../../scripts/diff_jsons.py" "$TESTDIR/../data/toy_clades_2.json" toy_clades_2a.json \ - > --exclude-paths "root['generated_by']" + $ python3 "$TESTDIR/../../../../scripts/diff_jsons.py" "$TESTDIR/../data/toy_clades_2.json" toy_clades_2a.json {} A clade which exists at the root is identified (and correctly propogated) without a root sequence @@ -38,6 +36,5 @@ if the (branch leading to the) root has the clade-defining mutation. > --clades "$TESTDIR/../data/toy_clades_nuc.tsv" \ > --output-node-data toy_clades_2b.json &>/dev/null - $ python3 "$TESTDIR/../../../../scripts/diff_jsons.py" "$TESTDIR/../data/toy_clades_2.json" toy_clades_2b.json \ - > --exclude-paths "root['generated_by']" + $ python3 "$TESTDIR/../../../../scripts/diff_jsons.py" "$TESTDIR/../data/toy_clades_2.json" toy_clades_2b.json {} diff --git a/tests/functional/refine/cram/not-timetree-mutations-per-site.t b/tests/functional/refine/cram/not-timetree-mutations-per-site.t index faa08fceb..e3ab1337d 100644 --- a/tests/functional/refine/cram/not-timetree-mutations-per-site.t +++ b/tests/functional/refine/cram/not-timetree-mutations-per-site.t @@ -28,5 +28,5 @@ Confirm that trees match expected topology and branch lengths, given that the ou > "$TESTDIR/../data/mutations_per_site_branch_lengths.json" \ > branch_lengths.json \ > --significant-digits 0 \ - > --exclude-paths "root['generated_by']['version']" "root['input_tree']" + > --exclude-paths "generated_by" "input_tree" {} diff --git a/tests/functional/refine/cram/not-timetree-mutations.t b/tests/functional/refine/cram/not-timetree-mutations.t index dad22241b..3ccbcd3ee 100644 --- a/tests/functional/refine/cram/not-timetree-mutations.t +++ b/tests/functional/refine/cram/not-timetree-mutations.t @@ -29,5 +29,5 @@ Confirm that trees match expected topology and branch lengths, given that the ou > "$TESTDIR/../data/integer_branch_lengths.json" \ > branch_lengths.json \ > --significant-digits 0 \ - > --exclude-paths "root['generated_by']['version']" "root['input_tree']" "root['alignment']" + > --exclude-paths "generated_by" "input_tree" "alignment" {} diff --git a/tests/functional/translate/cram/general.t b/tests/functional/translate/cram/general.t index ff62a2c7e..01a085870 100644 --- a/tests/functional/translate/cram/general.t +++ b/tests/functional/translate/cram/general.t @@ -18,7 +18,7 @@ which validate the output will fail as it's missing a 'nuc' annotation. $ python3 "$SCRIPTS/diff_jsons.py" \ > "$DATA/aa_muts.json" \ > "aa_muts.json" \ - > --exclude-regex-paths "root\['annotations'\]\['.+'\]\['seqid'\]" "root['meta']['updated']" + > --exclude-regex-paths "root\['annotations'\]\['.+'\]\['seqid'\]" {} Same as above but using a GenBank file. This changes the 'type' of the annotations, @@ -33,5 +33,5 @@ but this is irrelevant for Auspice's use and simply reflects the reference sourc $ python3 "$SCRIPTS/diff_jsons.py" \ > "$DATA/aa_muts.json" \ > "aa_muts.genbank.json" \ - > --exclude-regex-paths "root\['annotations'\]\['.+'\]\['seqid'\]" "root\['annotations'\]\['.+'\]\['type'\]" "root['meta']['updated']" + > --exclude-regex-paths "root\['annotations'\]\['.+'\]\['(seqid|type)'\]" {} diff --git a/tests/functional/translate/cram/genes.t b/tests/functional/translate/cram/genes.t index 85ed36c3f..c36c25469 100644 --- a/tests/functional/translate/cram/genes.t +++ b/tests/functional/translate/cram/genes.t @@ -21,9 +21,9 @@ as a feature ('nuc' in this case) amino acid mutations written to .+ (re) $ python3 "$SCRIPTS/diff_jsons.py" \ - > "$DATA/aa_muts.json" \ - > "aa_muts.genes-args.json" \ - > --exclude-regex-paths "seqid" "gene1" "root['meta']['updated']" + > "$DATA/aa_muts.json" \ + > "aa_muts.genes-args.json" \ + > --exclude-regex-paths "seqid" "gene1" {} Using a text file rather than command line arguments @@ -43,7 +43,6 @@ Using a text file rather than command line arguments amino acid mutations written to .+ (re) $ python3 "$SCRIPTS/diff_jsons.py" \ - > "aa_muts.genes-args.json" \ - > "aa_muts.genes-txt.json" \ - > --exclude-paths "root['meta']['updated']" + > "aa_muts.genes-args.json" \ + > "aa_muts.genes-txt.json" {} diff --git a/tests/functional/translate/cram/gff.t b/tests/functional/translate/cram/gff.t index 85bba041d..33a4d20af 100644 --- a/tests/functional/translate/cram/gff.t +++ b/tests/functional/translate/cram/gff.t @@ -77,7 +77,7 @@ GFF file with 'region' removed, so the only genome information is the ##sequence $ python3 "$TESTDIR/../../../../scripts/diff_jsons.py" \ > "$DATA/aa_muts.json" \ > "aa_muts.pragma-only.json" \ - > --exclude-regex-paths "root\['annotations'\]\['.+'\]\['seqid'\]" "root['meta']['updated']" + > --exclude-regex-paths "root\['annotations'\]\['.+'\]\['seqid'\]" {} GFF file with no genome coordinate information diff --git a/tests/functional/translate/cram/root-mutations.t b/tests/functional/translate/cram/root-mutations.t index de9c0f55d..2a8970a70 100644 --- a/tests/functional/translate/cram/root-mutations.t +++ b/tests/functional/translate/cram/root-mutations.t @@ -27,5 +27,5 @@ is unchanged (MPCG*). There is also a mutation E4G at the root node to compensat $ python3 "$SCRIPTS/diff_jsons.py" \ > "$DATA/aa_muts.json" \ > "aa_muts.json" \ - > --exclude-regex-paths "root\['annotations'\]\['.+'\]\['seqid'\]" "root['meta']['updated']" + > --exclude-regex-paths "root\['annotations'\]\['.+'\]\['seqid'\]" {'values_changed': {"root['reference']['gene1']": {'new_value': 'MPCE*', 'old_value': 'MPCG*'}}, 'iterable_item_added': {"root['nodes']['node_root']['aa_muts']['gene1'][0]": 'E4G'}} \ No newline at end of file diff --git a/tests/functional/translate/cram/translate-with-genbank.t b/tests/functional/translate/cram/translate-with-genbank.t index 262978a01..31fefa4ad 100644 --- a/tests/functional/translate/cram/translate-with-genbank.t +++ b/tests/functional/translate/cram/translate-with-genbank.t @@ -18,5 +18,5 @@ Translate amino acids for genes using a GenBank file. amino acid mutations written to .* (re) $ python3 "$SCRIPTS/diff_jsons.py" $DATA/zika/aa_muts_genbank.json aa_muts.json \ - > --exclude-regex-paths "root\['annotations'\]\['.+'\]\['seqid'\]" "root['meta']['updated']" + > --exclude-regex-paths "root\['annotations'\]\['.+'\]\['seqid'\]" {} diff --git a/tests/functional/translate/cram/translate-with-gff-and-gene-name.t b/tests/functional/translate/cram/translate-with-gff-and-gene-name.t index 9bb18ded7..06d8fe6a7 100644 --- a/tests/functional/translate/cram/translate-with-gff-and-gene-name.t +++ b/tests/functional/translate/cram/translate-with-gff-and-gene-name.t @@ -26,7 +26,7 @@ Other than the sequence ids which will include a temporary path, the JSONs should be identical. $ python3 "${SCRIPTS}/diff_jsons.py" \ - > --exclude-regex-paths "['seqid']" -- \ + > --exclude-regex-paths "\['seqid'\]" -- \ > "${DATA}/zika/aa_muts_gff.json" \ > aa_muts.json {} diff --git a/tests/functional/translate/cram/translate-with-gff-and-gene.t b/tests/functional/translate/cram/translate-with-gff-and-gene.t index 33ef6a3b3..5c8bbb4d3 100644 --- a/tests/functional/translate/cram/translate-with-gff-and-gene.t +++ b/tests/functional/translate/cram/translate-with-gff-and-gene.t @@ -23,7 +23,7 @@ Translate amino acids for genes using a GFF3 file where the gene names are store amino acid mutations written to .* (re) $ python3 "${SCRIPTS}/diff_jsons.py" \ - > --exclude-regex-paths "['seqid']" -- \ + > --exclude-regex-paths "\['seqid'\]" -- \ > "${DATA}/zika/aa_muts_gff.json" \ > aa_muts.json {} diff --git a/tests/functional/translate/cram/translate-with-gff-and-locus-tag.t b/tests/functional/translate/cram/translate-with-gff-and-locus-tag.t index 54ffde7a8..d4871ee07 100644 --- a/tests/functional/translate/cram/translate-with-gff-and-locus-tag.t +++ b/tests/functional/translate/cram/translate-with-gff-and-locus-tag.t @@ -23,7 +23,7 @@ This is an identical test setup as `translate-with-gff-and-gene.t` but using loc amino acid mutations written to .* (re) $ python3 "${SCRIPTS}/diff_jsons.py" \ - > --exclude-regex-paths "['seqid']" -- \ + > --exclude-regex-paths "\['seqid'\]" -- \ > "${DATA}/zika/aa_muts_gff.json" \ > aa_muts.json {} diff --git a/tests/functional/translate/cram/vcf-with-root-mutation.t b/tests/functional/translate/cram/vcf-with-root-mutation.t index 36b4a4f65..fcb3d5c3f 100644 --- a/tests/functional/translate/cram/vcf-with-root-mutation.t +++ b/tests/functional/translate/cram/vcf-with-root-mutation.t @@ -43,5 +43,5 @@ node is E (and so are all the other nodes)) $ python3 "$SCRIPTS/diff_jsons.py" \ > aa_muts.truth.json \ > aa_muts.json \ - > --exclude-regex-paths "root\['annotations'\]\['.+'\]\['seqid'\]" "root['meta']['updated']" + > --exclude-regex-paths "root\['annotations'\]\['.+'\]\['seqid'\]" {} \ No newline at end of file diff --git a/tests/functional/translate/cram/vcf.t b/tests/functional/translate/cram/vcf.t index 07563a635..861480a16 100644 --- a/tests/functional/translate/cram/vcf.t +++ b/tests/functional/translate/cram/vcf.t @@ -26,7 +26,7 @@ Setup $ python3 "$SCRIPTS/diff_jsons.py" \ > "$DATA/aa_muts.json" \ > aa_muts.json \ - > --exclude-regex-paths "root\['annotations'\]\['.+'\]\['seqid'\]" "root['meta']['updated']" + > --exclude-regex-paths "root\['annotations'\]\['.+'\]\['seqid'\]" {} ------------------------------ MISSING TEST ---------------------------------- diff --git a/tests/functional/translate/data/zika/aa_muts_gff.json b/tests/functional/translate/data/zika/aa_muts_gff.json index 1f3fdd38a..ff91ba545 100644 --- a/tests/functional/translate/data/zika/aa_muts_gff.json +++ b/tests/functional/translate/data/zika/aa_muts_gff.json @@ -13,11 +13,18 @@ "start": 457, "strand": "+", "type": "gene" + }, + "nuc": { + "end": 10769, + "seqid": "genemap.gff", + "start": 1, + "strand": "+", + "type": "##sequence-region pragma" } }, "generated_by": { "program": "augur", - "version": "16.0.3" + "version": "26.0.0" }, "nodes": { "BRA/2016/FC_6706": { @@ -91,7 +98,10 @@ } }, "NODE_0000006": { - "aa_muts": {}, + "aa_muts": { + "CA": [], + "PRO": [] + }, "aa_sequences": { "CA": "MKNPKKKSGGFRIVNMLKRGVARVSPFGGLKRLPAGLLLGHGPIRMVLAILAFLRFTAIKPSLGLINRWGSVGKKEAMEIIKKFKKDLAAMLRIINARKEKKRRGADTSVGIVGLLLTTAMA", "PRO": "AEVTRRGSAYYMYLDRNDAGEAISFPTTLGMNKCYIQIMDLGHMCDATMSYECPMLDEGVEPDDVDCWCNTTSTWVVYGTCHHKKGEARRSRR"