Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add package #8

Merged
merged 4 commits into from
Nov 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: build

on:
push:
branches: [main]
pull_request:
branches: [main]

jobs:
build:
runs-on: ubuntu-latest
env:
GITHUB_EVENT_NAME: ${{ github.event_name }}
strategy:
fail-fast: false
matrix:
python-version: ["3.12"]
timeout-minutes: 8

steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0

- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- uses: actions/cache@v3
with:
path: ~/.cache/pre-commit
key: pre-commit-${{ runner.os }}-${{ hashFiles('.pre-commit-config.yaml') }}
- run: pip install laminci
- run: nox -s lint
- run: nox -s build
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -163,3 +163,4 @@ node_modules
# lamin
test.ipynb
*/test-perturbation
lamin/data
22 changes: 22 additions & 0 deletions noxfile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import nox
from laminci.nox import SYSTEM, build_docs, run, run_pre_commit, run_pytest

nox.options.default_venv_backend = "none"


@nox.session
def lint(session: nox.Session) -> None:
run_pre_commit(session)


@nox.session
def build(session: nox.Session) -> None:
run(session, f"uv pip install {SYSTEM} .[dev]")
run_pytest(session, coverage=False)


# Currently not enabled
@nox.session
def docs(session: nox.Session) -> None:
run(session, "lamin init --storage ./docsbuild --schema bionty,wetlab")
build_docs(session, strict=True)
7 changes: 7 additions & 0 deletions pertpy_datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
__author__ = "Lukas Heumos"
__email__ = "[email protected]"
__version__ = "0.1.0"

from pertpy_datasets.perturbation_curator import PerturbationCurator

__all__ = ["PerturbationCurator"]
104 changes: 104 additions & 0 deletions pertpy_datasets/perturbation_curator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
from typing import TYPE_CHECKING, Literal

import anndata as ad

if TYPE_CHECKING:
from lnschema_core import Record


class _PerturbationValidatorUnavailable:
"""Curator for perturbation data."""

def __init__(self):
raise RuntimeError("PerturbationValidator can only be instantiated if connected to a lamindb instance.")


# Nested try because django might not be installed
try:
from django.core.exceptions import ImproperlyConfigured

try:
import bionty as bt
import wetlab as wl
from cellxgene_lamin import CellxGeneFields, Curate
from lamin_utils import logger
from lamindb_setup.core.types import UPathStr
from lnschema_core.types import FieldAttr

class PerturbationCurator(Curate):
"""Curator flow for Perturbation data - see pertpy-datasets."""

def __init__(
self,
adata: ad.AnnData | UPathStr,
var_index: FieldAttr = bt.Gene.ensembl_gene_id,
organism: Literal["human", "mouse"] = "human",
*,
verbosity: str = "hint",
cxg_schema_version: Literal["5.0.0", "5.1.0"] = "5.1.0",
using_key: str = "laminlabs/pertpy-datasets",
):
"""Curator flow for Perturbation data.

Args:
adata: Path to or AnnData object to curate against the CELLxGENE schema.
var_index: The registry field for mapping the ``.var`` index.
categoricals: A dictionary mapping ``.obs.columns`` to a registry field.
The PerturbationCurator maps against the required CELLxGENE fields and perturbation fields by default.
organism: The organism name. CELLxGENE restricts it to 'human' and 'mouse' and therefore so do we.
defaults: Default values that are set if columns or column values are missing.
extra_sources: A dictionary mapping ``.obs.columns`` to Source records.
verbosity: The verbosity level.
cxg_schema_version: The CELLxGENE schema version to curate against.
using_key: A reference LaminDB instance.
"""
PT_DEFAULT_VALUES = CellxGeneFields.OBS_FIELD_DEFAULTS | {
"cell_line": "unknown",
"genetic_treatments": "",
"compound_treatments": "",
"environmental_treatments": "",
"combination_treatments": "",
}

PT_CATEGORICALS = CellxGeneFields.OBS_FIELDS | {
"cell_line": bt.CellLine.name,
"genetic_treatments": wl.GeneticTreatment.name,
"compound_treatments": wl.CompoundTreatment.name,
"environmental_treatments": wl.EnvironmentalTreatment.name,
"combination_treatments": wl.CombinationTreatment.name,
}

PT_SOURCES: dict[str, Record] = {
"depmap_id": bt.Source.filter(name="depmap").one(),
"cell_line": bt.Source.filter(name="depmap").one(),
# "compound_treatments": bt.Source.filter(entity="Drug", name="chebi").first()
}

self.organism = organism

# Set the Compound source to chebi; we don't want output if the source has already been set
with logger.mute():
chebi_source = bt.Source.filter(entity="Drug", name="chebi").first()
wl.Compound.add_source(chebi_source)

super().__init__(
adata=adata,
var_index=var_index,
categoricals=PT_CATEGORICALS,
using_key=using_key,
defaults=PT_DEFAULT_VALUES,
verbosity=verbosity,
organism=self.organism,
extra_sources=PT_SOURCES,
schema_version=cxg_schema_version,
)

def validate(self) -> bool:
"""Validates the AnnData object against cellxgene and pertpy's requirements."""
return super().validate()

except ImproperlyConfigured:
PerturbationCurator = _PerturbationValidatorUnavailable # type: ignore

except ImportError:
PerturbationCurator = _PerturbationValidatorUnavailable # type: ignore
46 changes: 46 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,48 @@
[build-system]
build-backend = "hatchling.build"
requires = ["hatchling"]

[project]
name = "pertpy_datasets"
requires-python = ">=3.9, <=3.12"
authors = [{name = "Lukas Heumos", email = "[email protected]"}]
readme = "README.md"
dynamic = ["version", "description"]
classifiers = [
"License :: OSI Approved :: Apache Software License",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
]
dependencies = [
"cellxgene-lamin",
"findrefs",
"wetlab",
"requests",
]

[tool.hatch.version]
path = "pertpy_datasets/__init__.py"


[project.urls]
Home = "https://lamin.ai/laminlabs/pertpy-datasets"

[project.optional-dependencies]
dev = [
"pre-commit",
"nox",
"pytest>=6.0",
"pytest-cov",
"nbproject_test",
]

[tool.pytest.ini_options]
testpaths = [
"tests",
]

[tool.ruff]
line-length = 120

Expand Down Expand Up @@ -62,6 +107,7 @@ convention = "google"
"docs/*" = ["I"]
"tests/*" = ["D"]
"*/__init__.py" = ["F401"]
"noxfile.py" = ["D"]

[tool.mypy]
strict = false
Expand Down
Loading
Loading