Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 44 additions & 4 deletions augur/dates/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import re
from functools import cache
from treetime.utils import numeric_date as tt_numeric_date, datetime_from_numeric
from typing import Any, Dict, Optional, Tuple, Union
from typing import Any, Dict, Literal, Optional, Tuple, Union
from augur.errors import AugurError
from .errors import InvalidDate

Expand Down Expand Up @@ -75,17 +75,49 @@ def numeric_date(date):

raise InvalidDate(date, f"""Ensure it is in one of the supported formats:\n{SUPPORTED_DATE_HELP_TEXT}""")

def numeric_date_type(date):
"""Wraps numeric_date() for argparse usage.
def numeric_date_type_min(date) -> float:
"""Wraps numeric_date() for argparse usage, taking the minimum value if resolved to a range.

This raises an ArgumentTypeError from InvalidDateFormat exceptions, otherwise the custom exception message won't be shown in console output due to:
https://github.com/python/cpython/blob/5c4d1f6e0e192653560ae2941a6677fbf4fbd1f2/Lib/argparse.py#L2503-L2513

>>> round(numeric_date_type_min("2018"), 3)
2018.001
"""
try:
return get_single_numeric_date(date, fmt="%Y-%m-%d", min_or_max="min")
except InvalidDate as error:
raise argparse.ArgumentTypeError(str(error)) from error

def numeric_date_type_max(date) -> float:
"""Wraps numeric_date() for argparse usage, taking the maximum value if resolved to a range.

This raises an ArgumentTypeError from InvalidDateFormat exceptions, otherwise the custom exception message won't be shown in console output due to:
https://github.com/python/cpython/blob/5c4d1f6e0e192653560ae2941a6677fbf4fbd1f2/Lib/argparse.py#L2503-L2513

>>> round(numeric_date_type_max("2018"), 3)
2018.999
"""
try:
return numeric_date(date)
return get_single_numeric_date(date, fmt="%Y-%m-%d", min_or_max="max")
except InvalidDate as error:
raise argparse.ArgumentTypeError(str(error)) from error

def get_single_numeric_date(value, fmt, min_or_max: Literal["min", "max"]) -> float:
numeric_date = get_numerical_date_from_value(value, fmt)

if isinstance(numeric_date, float):
return numeric_date

if isinstance(numeric_date, tuple):
if min_or_max == "min":
return numeric_date[0]
if min_or_max == "max":
return numeric_date[1]

raise InvalidDate(value, f"""Ensure it is in one of the supported formats:\n{SUPPORTED_DATE_HELP_TEXT}""")


def is_date_ambiguous(date, ambiguous_by):
"""
Returns whether a given date string in the format of YYYY-MM-DD is ambiguous by a given part of the date (e.g., day, month, year, or any parts).
Expand Down Expand Up @@ -209,6 +241,14 @@ def get_numerical_date_from_value(value, fmt, min_max_year=None) -> Union[float,
# closest in-bound value.
raise InvalidDate(value, str(error)) from error

# Check if value is an ISO 8601 duration treated as a backwards-looking relative date
try:
if not value.startswith('P'):
value = 'P' + value
return date_to_numeric(datetime.date.today() - isodate.parse_duration(value))
except (ValueError, isodate.ISO8601Error):
pass

# Return none (silent error) if the date does not match any of the checked formats.

return None
Expand Down
6 changes: 3 additions & 3 deletions augur/filter/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
program vcftools must be available on PATH.
"""
from augur.argparse_ import ExtendOverwriteDefault, SKIP_AUTO_DEFAULT_IN_HELP
from augur.dates import numeric_date_type
from augur.dates import numeric_date_type_min, numeric_date_type_max
from augur.filter.arguments import descriptions
from augur.filter.io import column_type_pair
from augur.io.metadata import DEFAULT_DELIMITERS, DEFAULT_ID_COLUMNS
Expand All @@ -39,8 +39,8 @@ def register_arguments(parser):

metadata_filter_group.add_argument('--query', help=descriptions['query'])
metadata_filter_group.add_argument('--query-columns', type=column_type_pair, nargs="+", action=ExtendOverwriteDefault, help=descriptions['query_columns'])
metadata_filter_group.add_argument('--min-date', type=numeric_date_type, help=descriptions['min_date'])
metadata_filter_group.add_argument('--max-date', type=numeric_date_type, help=descriptions['max_date'])
metadata_filter_group.add_argument('--min-date', type=numeric_date_type_min, help=descriptions['min_date'])
metadata_filter_group.add_argument('--max-date', type=numeric_date_type_max, help=descriptions['max_date'])
metadata_filter_group.add_argument('--exclude-ambiguous-dates-by', choices=['any', 'day', 'month', 'year'], help=descriptions['exclude_ambiguous_dates_by'])
metadata_filter_group.add_argument('--exclude', type=str, nargs="+", action=ExtendOverwriteDefault, help=descriptions['exclude'])
metadata_filter_group.add_argument('--exclude-where', nargs='+', action=ExtendOverwriteDefault, help=descriptions['exclude_where'])
Expand Down
6 changes: 3 additions & 3 deletions augur/frequencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from .errors import AugurError
from .frequency_estimators import get_pivots, alignment_frequencies, tree_frequencies
from .frequency_estimators import AlignmentKdeFrequencies, TreeKdeFrequencies, TreeKdeFrequenciesError
from .dates import numeric_date_type, SUPPORTED_DATE_HELP_TEXT, get_numerical_dates
from .dates import numeric_date_type_min, numeric_date_type_max, SUPPORTED_DATE_HELP_TEXT, get_numerical_dates
from .io.file import open_file
from .io.metadata import DEFAULT_DELIMITERS, DEFAULT_ID_COLUMNS, METADATA_DATE_COLUMN, InvalidDelimiter, Metadata, read_metadata
from .utils import write_json
Expand All @@ -37,9 +37,9 @@ def register_parser(parent_subparsers):
help="number of units between pivots")
parser.add_argument("--pivot-interval-units", type=str, default="months", choices=['months', 'weeks'],
help="space pivots by months (default) or by weeks")
parser.add_argument('--min-date', type=numeric_date_type,
parser.add_argument('--min-date', type=numeric_date_type_min,
help=f"date to begin frequencies calculations; may be specified as: {SUPPORTED_DATE_HELP_TEXT}")
parser.add_argument('--max-date', type=numeric_date_type,
parser.add_argument('--max-date', type=numeric_date_type_max,
help=f"date to end frequencies calculations; may be specified as: {SUPPORTED_DATE_HELP_TEXT}")

# Tree-specific arguments
Expand Down
16 changes: 16 additions & 0 deletions tests/dates/test_dates.py
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some test matrix jobs are failing at tests/filter/test_relative_dates.py:

tests/filter/test_relative_dates.py:152: in test_filter_relative_dates
    assert output_sorted == output_sorted_expected
E   AssertionError: assert ['SEQ_1', 'SEQ_2', 'SEQ_3'] == ['SEQ_2', 'SEQ_3']
E     
E     At index 0 diff: 'SEQ_1' != 'SEQ_2'
E     Left contains one more item: 'SEQ_3'
E     
E     Full diff:
E       [
E     +     'SEQ_1',
E           'SEQ_2',
E           'SEQ_3',
E       ]
        argparse_params = '--min-date 1D'
…
================== 6 failed, 569 passed, 2 warnings in 14.95s ==================

This needs investigating. To narrow things down a bit, I noticed it happens in test (python=3.10 biopython=1.80 numpy=latest) and not test (python=3.10 biopython=1.80 numpy=1.26.4). I'm unable to reproduce locally with (python=3.13 biopython=1.85 numpy=2.3.2), so it's not just the numpy version.

(unrelated to this file, just needed to start a thread)

Original file line number Diff line number Diff line change
Expand Up @@ -173,3 +173,19 @@ def test_get_numerical_dates_dict_error(self):
}
with pytest.raises(AugurError):
dates.get_numerical_dates(metadata, "%Y-%m-%d")

@freeze_time("2000-02-20")
def test_get_numerical_date_from_value_relative_dates(self):
"""Test that get_numerical_date_from_value handles ISO duration strings as relative dates."""
assert dates.get_numerical_date_from_value("1D", "%Y-%m-%d") == pytest.approx(2000.135, abs=1e-3)
assert dates.get_numerical_date_from_value("1W", "%Y-%m-%d") == pytest.approx(2000.119, abs=1e-3)
assert dates.get_numerical_date_from_value("1M", "%Y-%m-%d") == pytest.approx(2000.053, abs=1e-3)
assert dates.get_numerical_date_from_value("1Y", "%Y-%m-%d") == pytest.approx(1999.138, abs=1e-3)
assert dates.get_numerical_date_from_value("1Y1M1W", "%Y-%m-%d") == pytest.approx(1999.034, abs=1e-3)

@freeze_time("2000-02-20")
def test_get_numerical_date_from_value_relative_dates_with_p_prefix(self):
"""Test that get_numerical_date_from_value handles ISO duration strings with P prefix."""
assert dates.get_numerical_date_from_value("P1D", "%Y-%m-%d") == pytest.approx(2000.135, abs=1e-3)
assert dates.get_numerical_date_from_value("P1W", "%Y-%m-%d") == pytest.approx(2000.119, abs=1e-3)
assert dates.get_numerical_date_from_value("P1M", "%Y-%m-%d") == pytest.approx(2000.053, abs=1e-3)
10 changes: 5 additions & 5 deletions tests/functional/filter/cram/filter-max-date.t
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,16 @@ Create metadata TSV file for testing.

$ cat >metadata.tsv <<~~
> strain date
> SEQ_1 2020-03-XX
> SEQ_2 2020-03-01
> SEQ_3 2020-03-02
> SEQ_1 2019-XX-XX
> SEQ_2 2019-12-31
> SEQ_3 2020-01-01
> ~~

Test that --max-date is inclusive.
Test that --max-date is inclusive even with ambiguity.

$ ${AUGUR} filter \
> --metadata metadata.tsv \
> --max-date 2020-03-01 \
> --max-date 2019 \
> --output-strains filtered_strains.txt 2>/dev/null
$ sort filtered_strains.txt
SEQ_1
Expand Down
Loading