Skip to content

Commit 662c3c8

Browse files
committed
WIP - shared smk to enforce declared deps
1 parent dfe4019 commit 662c3c8

File tree

4 files changed

+217
-1
lines changed

4 files changed

+217
-1
lines changed

nextstrain-pathogen.yaml

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,26 @@
88
---
99
compatibility:
1010
nextstrain run: true
11+
# [Discussion] Realistically we won't implement buildpacks for a number of months
12+
# and so that can't block our dependency checking. We can try to second guess the
13+
# correct syntax or (easier) deliberately choose a top-level key name which won't
14+
# be used by buildpacks and then migrate to the buildpacks syntax when we implement
15+
# them for each repo.
16+
dependencies:
17+
# nextstrain-augur is the name of the python package, so this will check we have augur>=30 installed
18+
nextstrain-augur: ">=30"
19+
# augur is the name of the CLI (not a python package which I have), so this will check we have augur>=33
20+
# installed as a CLI *as long as* we don't have a python package called 'augur'
21+
# (We don't need to list 'augur' and 'nextstrain-augur' - this is for testing!)
22+
# Note: Augur 33 doesn't exist, so this is reported as a "Version incompatibilities"
23+
augur: ">=33"
24+
snakemake: ">=9,<10"
25+
nextclade: '>=3.15'
26+
nextstrain: '>=10.2'
27+
# The following program should be reported under "Not found dependencies"
28+
this-program-doesnt-exist: '>=1.1'
29+
# The following program should be reported under "Declaration errors" as '1' is not a valid specifier
30+
invalid-specifier: '1'
31+
# I have a executable program called 'program-which-exits-2' which exits 2 every time. This dependency
32+
# is reported under "Unexpected errors" for me, but may be "Not found dependencies" for you
33+
program-which-exits-2: '>3.0'

phylogenetic/Snakefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11

22
# Utility functions shared across all workflows.
33
include: "../shared/vendored/snakemake/config.smk"
4-
4+
include: "../shared/vendored/snakemake/versioning.smk"
55

66
# Use default configuration values. Extend with Snakemake's --configfile/--config options.Add commentMore actions
77
configfile: os.path.join(workflow.basedir, "defaults/config.yaml")
@@ -10,6 +10,7 @@ configfile: os.path.join(workflow.basedir, "defaults/config.yaml")
1010
if os.path.exists("config.yaml"):
1111
configfile: "config.yaml"
1212

13+
check_pathogen_required_versions()
1314

1415
rule all:
1516
input:

shared/vendored/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ Potential Nextstrain CLI scripts
122122
Snakemake workflow functions that are shared across many pathogen workflows that don’t really belong in any of our existing tools.
123123

124124
- [config.smk](snakemake/config.smk) - Shared functions for parsing workflow configs.
125+
- [versioning.smk](snakemake/versioning.smk) - Shared functions for enforcing dependency versions.
125126

126127
## Software requirements
127128

Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
"""
2+
Shared functions to be used within a Snakemake workflow for enforcing
3+
versions of dependencies the repo defines within its `nextstrain-pathogen.yaml`
4+
"""
5+
6+
from os import path
7+
from sys import stderr
8+
from packaging.specifiers import SpecifierSet, InvalidSpecifier # snakemake dependency
9+
from packaging.version import Version, InvalidVersion # snakemake dependency
10+
from importlib.metadata import version as importlib_version, PackageNotFoundError
11+
from snakemake.common import __version__ as snakemake_version
12+
import subprocess
13+
from shutil import which
14+
import re
15+
16+
class ProgramNotFoundError(Exception):
17+
pass
18+
19+
class DependencyChecker():
20+
def __init__(self, registration):
21+
super().__init__()
22+
self.error_attrs = ["version_incompatibilities", "not_found_dependencies", "declaration_errors", "unexpected_errors"]
23+
for attr in self.error_attrs:
24+
setattr(self, attr, [])
25+
self.declared_dependencies = self.parse_dependencies(registration)
26+
27+
def parse_dependencies(self, registration):
28+
declared_dependencies = {}
29+
dependencies = registration.get('dependencies', {})
30+
if type(dependencies) is not dict:
31+
raise WorkflowError(f"Within `nextstrain-pathogen.yaml` the dependencies must be a dict of <name>: <specifier>. You provided {type(dependencies).__name__}")
32+
for name, spec in dependencies.items():
33+
try:
34+
declared_dependencies[name] = SpecifierSet(spec)
35+
except InvalidSpecifier:
36+
self.declaration_errors.append(f"This pathogen declared an invalid version specification for CLI program {name!r} of {spec}")
37+
return declared_dependencies
38+
39+
def check(self):
40+
for name, specifier in self.declared_dependencies.items():
41+
try: # First assume it's a python package
42+
self.check_python_package(name, specifier)
43+
except PackageNotFoundError:
44+
try: # if it's not a python package, maybe it's a CLI?
45+
self.check_cli_version(name, specifier)
46+
except ProgramNotFoundError:
47+
self.not_found_dependencies.append(f"{name!r} is not installed as a python dependency nor a CLI program. This pathogen requires a version satisfying {specifier!r}")
48+
49+
def report_errors(self) -> bool:
50+
if sum([len(getattr(self, attr)) for attr in self.error_attrs])==0:
51+
print("All dependencies declared by this pathogen satisfied", file=stderr)
52+
return False
53+
54+
print()
55+
print('_'*80)
56+
print(f"This pathogen declares dependencies which were not met.", file=stderr)
57+
for attr in self.error_attrs:
58+
errors = getattr(self, attr)
59+
if len(errors)==0:
60+
continue
61+
print(attr.replace('_', ' ').capitalize() + ":")
62+
print("-"*(len(attr)+1))
63+
for msg in errors:
64+
print(f"\t{msg}", file=stderr)
65+
print('_'*80)
66+
print()
67+
return True
68+
69+
def check_python_package(self, name: str, specifier: SpecifierSet):
70+
"""
71+
Check whether the installed python library *name* meets the specifier *specifier*.
72+
This uses importlib.metadata to check the available version which avoids importing
73+
the top-level import.
74+
75+
If the package is found but the version doesn't satisfy the provided *specifier*
76+
we log an error. Raises `PackageNotFoundError` if the package is not found.
77+
"""
78+
try:
79+
if name=='snakemake':
80+
# in conda environments importlib reports a snakemake version of 0.0.0,
81+
# so follow the approach of Snakemake's own min_version function
82+
version = Version(snakemake_version)
83+
else:
84+
version = Version(importlib_version(name))
85+
except InvalidVersion: # <https://packaging.pypa.io/en/stable/version.html#packaging.version.InvalidVersion>
86+
self.unexpected_errors.append(f"Python dependency {name!r} reported a version of {output} which we were unable to parse")
87+
return
88+
89+
ok = specifier.contains(version)
90+
# print(f"[DEBUG] Checking python dependency: {name!r} installed: {version} requirements: {specifier} OK? {ok}", file=stderr)
91+
if not ok:
92+
self.version_incompatibilities.append(f"Python dependency {name!r} version incompatibility. You have {version} but this pathogen declares {specifier}")
93+
94+
def check_cli_version(self, name: str, specifier: SpecifierSet) -> None:
95+
"""
96+
Check whether the requested *name* is (a) installed and (b) reports a version
97+
which satisfies the *specifier*. Both (a) and (b) are achieved by calling
98+
`<name> --version`.
99+
100+
If *name* isn't found (or is not executable) we raise a ProgramNotFoundError.
101+
If the package is found but the version doesn't satisfy the provided *specifier*
102+
we log an error.
103+
"""
104+
if which(name) is None:
105+
raise ProgramNotFoundError()
106+
107+
cmd = [name, "--version"]
108+
try:
109+
proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=True)
110+
output = ((proc.stdout or "") + " " + (proc.stderr or "")).strip()
111+
except subprocess.CalledProcessError as e:
112+
self.unexpected_errors.append(f"CLI program {name!r} exited code {e.returncode} when called using {' '.join(cmd)!r}")
113+
return
114+
115+
m = re.search(r"\d+(\.\d+(\.\d+)?)?([.-][0-9A-Za-z]+)*", output)
116+
# 1 . 2 . 3 alpha etc
117+
if not m:
118+
self.unexpected_errors.append(f"CLI program {name!r} didn't report a parseable version when called via {' '.join(cmd)!r}")
119+
return
120+
121+
try:
122+
version = Version(m.group(0))
123+
except InvalidVersion: # <https://packaging.pypa.io/en/stable/version.html#packaging.version.InvalidVersion>
124+
self.unexpected_errors.append(f"CLI program {name!r} reported a version of {m.group(0)} which we were unable to parse")
125+
126+
ok = specifier.contains(version)
127+
# print(f"[DEBUG] Checking CLI program: {name!r} installed: {version} requirements: {specifier} OK? {ok}", file=stderr)
128+
if not ok:
129+
self.version_incompatibilities.append(f"CLI program {name!r} version incompatibility. You have {version} but this pathogen declares {specifier}")
130+
131+
132+
def _read_nextstrain_pathogen_yaml(path: str) -> dict:
133+
"""
134+
Reads a ``nextstrain-pathogen.yaml`` file at *path* and returns a dict of
135+
its deserialized contents.
136+
137+
Taken from <https://github.com/nextstrain/cli/blob/4dbac262b22a3db9c48267e23f713ad56251ffd0/nextstrain/cli/pathogens.py#L843C1-L858C24>
138+
with modifications. (Note: pathogen repos don't need the nextstrain CLI to be installed and thus we can't import the code.)
139+
"""
140+
import yaml
141+
with open(path, encoding = "utf-8") as f:
142+
registration = yaml.safe_load(f)
143+
144+
if not isinstance(registration, dict):
145+
raise ValueError(f"nextstrain-pathogen.yaml not a dict (got a {type(registration).__name__}): {str(path)!r}")
146+
147+
return registration
148+
149+
def pathogen_yaml(*, subdir_max=3):
150+
_searched_paths = []
151+
for i in range(0, subdir_max):
152+
p = path.normpath(path.join(workflow.basedir, *['..']*i, "nextstrain-pathogen.yaml"))
153+
_searched_paths.append(p)
154+
if path.isfile(p):
155+
try:
156+
registration = _read_nextstrain_pathogen_yaml(p)
157+
except Exception as e:
158+
raise WorkflowError(f"Unable to parse {p} (as YAML). Error: {e}")
159+
break
160+
else:
161+
print("Could not find a nextstrain-pathogen.yaml file to check version dependencies.\n"
162+
"Searched paths:\n\t" + "\n\t".join(_searched_paths))
163+
raise WorkflowError()
164+
return registration
165+
166+
167+
def check_pathogen_required_versions(*, fatal=True):
168+
"""
169+
Checks if dependencies declared via the pathogen's 'nextstrain-pathogen.yaml'
170+
are satisfied. Dependencies should be defined within the YAML like so:
171+
172+
dependencies:
173+
<name>: <specification>
174+
175+
The syntax of <specification> is detailed in <https://packaging.python.org/en/latest/specifications/version-specifiers/#id5>
176+
177+
We first check if the <name> is a python package. If it is not installed
178+
as a python package we check if it's an installed CLI and attempt to
179+
get the version by running `<name> --version`.
180+
181+
If *fatal* is True (default) we raise a WorkflowError if
182+
all conditions are not satisfied.
183+
"""
184+
if config.get('skip_dependency_version_checking', False) is True:
185+
print("Skipping dependency version checking as per config setting", file=stderr)
186+
return
187+
checker = DependencyChecker(pathogen_yaml())
188+
checker.check()
189+
errors = checker.report_errors()
190+
if errors and fatal:
191+
raise WorkflowError("Dependencies not satisfied")

0 commit comments

Comments
 (0)