Skip to content

Commit a30b7f6

Browse files
authored
Merge pull request #1926 from nextstrain/export-url
export: support export of URL for non-string values
2 parents e452789 + b89691f commit a30b7f6

File tree

4 files changed

+50
-47
lines changed

4 files changed

+50
-47
lines changed

CHANGES.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,11 @@
99
### Bug fixes
1010

1111
* filter: Previously, `--query`, `--exclude-where`, and `--include-where` did not work for the id column (`strain`, `name`, or other from `--metadata-id-columns`). This has been fixed. [#1915][] (@corneliusroemer, @victorlin)
12+
* export v2: Support export of URLs for non-string values. [#1926][] (@joverlee521)
1213

1314
[#1915]: https://github.com/nextstrain/augur/issues/1915
1415
[#1917]: https://github.com/nextstrain/augur/pull/1917
16+
[#1926]: https://github.com/nextstrain/augur/pull/1926
1517

1618
## 32.0.0 (21 October 2025)
1719

augur/export_v2.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -772,7 +772,7 @@ def _transfer_additional_metadata_columns(node, raw_data):
772772
for col in additional_metadata_columns:
773773
if is_valid(value:=raw_data.get(col, None)):
774774
node["node_attrs"][col] = {"value": value}
775-
if type(value) is str and valid_url(url:=raw_data.get(url_name(col), None)):
775+
if valid_url(url:=raw_data.get(url_name(col), None)):
776776
node["node_attrs"][col]['url'] = url
777777

778778
def _transfer_vaccine_info(node, raw_data):
@@ -820,12 +820,11 @@ def _transfer_colorings_filters(node, raw_data):
820820
for key in trait_keys:
821821
value = raw_data.get(key, None)
822822
if is_valid(value):
823-
if is_numeric(value):
824-
node["node_attrs"][key] = {"value": format_number(value)}
825-
else:
826-
node["node_attrs"][key] = {"value": value}
827-
if valid_url(url:=raw_data.get(url_name(key), None)):
828-
node["node_attrs"][key]['url'] = url
823+
node["node_attrs"][key] = {"value": format_number(value) if is_numeric(value) else value}
824+
825+
if valid_url(url:=raw_data.get(url_name(key), None)):
826+
node["node_attrs"][key]['url'] = url
827+
829828
node["node_attrs"][key].update(attr_confidence(node["name"], raw_data, key))
830829

831830
def _transfer_author_data(node):

tests/functional/export_v2/cram/metadata-urls.t

Lines changed: 21 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,13 @@ Setup
55
Create files for testing.
66

77
$ cat >metadata.tsv <<~~
8-
> strain field_A field_A__url
9-
> tipA nextstrain https://nextstrain.org
10-
> tipB BB: not-a-url
11-
> tipC github https://github.com
12-
> tipD DD <not-a-url>
13-
> tipE EE invalid-url
14-
> tipF FF
8+
> strain field_A field_A__url field_B field_B__url
9+
> tipA nextstrain https://nextstrain.org 1 https://nextstrain.org
10+
> tipB BB: not-a-url 2
11+
> tipC github https://github.com 3 https://github.com
12+
> tipD DD <not-a-url> 4
13+
> tipE EE invalid-url 5 invalid-url
14+
> tipF FF 6
1515
> ~~
1616

1717
$ cat >tree.nwk <<~~
@@ -23,14 +23,9 @@ Check that URLs were extracted from metadata values when added as an "extra meta
2323
$ ${AUGUR} export v2 \
2424
> --tree tree.nwk \
2525
> --metadata metadata.tsv \
26-
> --metadata-columns "field_A" \
26+
> --metadata-columns "field_A" "field_B" \
2727
> --maintainers "Nextstrain Team" \
28-
> --output dataset.json
29-
Validating schema of 'dataset.json'...
30-
Validation of 'dataset.json' succeeded.
31-
Validating produced JSON
32-
Validating that the JSON is internally consistent...
33-
28+
> --output dataset.json &> /dev/null
3429

3530
$ python3 "$TESTDIR/../../../../scripts/diff_jsons.py" "$TESTDIR/../data/dataset-with-parsed-urls.json" dataset.json \
3631
> --exclude-paths "root['meta']['updated']"
@@ -42,15 +37,9 @@ Check that URLs were extracted from metadata values when used as a coloring
4237
$ ${AUGUR} export v2 \
4338
> --tree tree.nwk \
4439
> --metadata metadata.tsv \
45-
> --color-by-metadata "field_A" \
40+
> --color-by-metadata "field_A" "field_B" \
4641
> --maintainers "Nextstrain Team" \
47-
> --output dataset2.json
48-
Validating schema of 'dataset2.json'...
49-
Validation of 'dataset2.json' succeeded.
50-
Trait 'field_A' was guessed as being type 'categorical'. Use a 'config' file if you'd like to set this yourself.
51-
Validating produced JSON
52-
Validating that the JSON is internally consistent...
53-
42+
> --output dataset2.json &> /dev/null
5443

5544
$ python3 "$TESTDIR/../../../../scripts/diff_jsons.py" "$TESTDIR/../data/dataset-with-parsed-urls.json" dataset2.json \
5645
> --exclude-paths "root['meta']['updated']" "root['meta']['colorings']" "root['meta']['filters']"
@@ -62,12 +51,12 @@ The data is essentially the same, but tipB & tipD have empty-string URLs and tip
6251
$ cat >node-data.json <<~~
6352
> {"nodes":
6453
> {
65-
> "tipA": {"field_A": "nextstrain", "field_A__url": "https://nextstrain.org"},
66-
> "tipB": {"field_A": "BB: not-a-url", "field_A__url": ""},
67-
> "tipC": {"field_A": "github", "field_A__url": "https://github.com"},
68-
> "tipD": {"field_A": "DD <not-a-url>", "field_A__url": ""},
69-
> "tipE": {"field_A": "EE", "field_A__url": "invalid-url"},
70-
> "tipF": {"field_A": "FF"}
54+
> "tipA": {"field_A": "nextstrain", "field_A__url": "https://nextstrain.org", "field_B": 1, "field_B__url": "https://nextstrain.org"},
55+
> "tipB": {"field_A": "BB: not-a-url", "field_A__url": "", "field_B": 2, "field_B__url": ""},
56+
> "tipC": {"field_A": "github", "field_A__url": "https://github.com", "field_B": 3, "field_B__url": "https://github.com"},
57+
> "tipD": {"field_A": "DD <not-a-url>", "field_A__url": "", "field_B": 4, "field_B__url": ""},
58+
> "tipE": {"field_A": "EE", "field_A__url": "invalid-url", "field_B": 5, "field_B__url": "invalid-url"},
59+
> "tipF": {"field_A": "FF", "field_B": 6}
7160
> }
7261
> }
7362
> ~~
@@ -76,15 +65,8 @@ The data is essentially the same, but tipB & tipD have empty-string URLs and tip
7665
> --tree tree.nwk \
7766
> --node-data node-data.json \
7867
> --maintainers "Nextstrain Team" \
79-
> --output dataset3.json
80-
Validating schema of 'dataset3.json'...
81-
Validation of 'dataset3.json' succeeded.
82-
Trait 'field_A' was guessed as being type 'categorical'. Use a 'config' file if you'd like to set this yourself.
83-
Validating produced JSON
84-
Validating that the JSON is internally consistent...
85-
68+
> --output dataset3.json &> /dev/null
8669

87-
88-
$ python3 "$TESTDIR/../../../../scripts/diff_jsons.py" "$TESTDIR/../data/dataset-with-parsed-urls.json" dataset.json \
89-
> --exclude-paths "root['meta']['updated']"
90-
{}
70+
$ python3 "$TESTDIR/../../../../scripts/diff_jsons.py" "$TESTDIR/../data/dataset-with-parsed-urls.json" dataset3.json \
71+
> --exclude-paths "root['meta']['updated']" "root['meta']['colorings']" "root['meta']['filters']"
72+
{}

tests/functional/export_v2/data/dataset-with-parsed-urls.json

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@
2727
"field_A": {
2828
"value": "nextstrain",
2929
"url": "https://nextstrain.org"
30+
},
31+
"field_B": {
32+
"value": 1,
33+
"url": "https://nextstrain.org"
3034
}
3135
},
3236
"branch_attrs": {}
@@ -44,6 +48,9 @@
4448
"div": 3.0,
4549
"field_A": {
4650
"value": "BB: not-a-url"
51+
},
52+
"field_B": {
53+
"value": 2
4754
}
4855
},
4956
"branch_attrs": {}
@@ -55,6 +62,10 @@
5562
"field_A": {
5663
"value": "github",
5764
"url": "https://github.com"
65+
},
66+
"field_B": {
67+
"value": 3,
68+
"url": "https://github.com"
5869
}
5970
},
6071
"branch_attrs": {}
@@ -74,6 +85,9 @@
7485
"div": 8.0,
7586
"field_A": {
7687
"value": "DD <not-a-url>"
88+
},
89+
"field_B": {
90+
"value": 4
7791
}
7892
},
7993
"branch_attrs": {}
@@ -84,6 +98,9 @@
8498
"div": 9.0,
8599
"field_A": {
86100
"value": "EE"
101+
},
102+
"field_B": {
103+
"value": 5
87104
}
88105
},
89106
"branch_attrs": {}
@@ -94,6 +111,9 @@
94111
"div": 6.0,
95112
"field_A": {
96113
"value": "FF"
114+
},
115+
"field_B": {
116+
"value": 6
97117
}
98118
},
99119
"branch_attrs": {}
@@ -102,4 +122,4 @@
102122
}
103123
]
104124
}
105-
}
125+
}

0 commit comments

Comments
 (0)