-
Notifications
You must be signed in to change notification settings - Fork 448
Open
Labels
maebAudio extensionAudio extension
Description
We will need to run all MAEB tasks + models.
I have outlined a script for doing it below. Since retrieval is not ready this run everything except retrieval.
"""Creates slurm jobs for running models on all tasks"""
from __future__ import annotations
import subprocess
from collections.abc import Iterable
from pathlib import Path
import mteb
def create_slurm_job_file(
model_name: str,
task_name: str,
results_folder: Path,
slurm_prefix: str,
slurm_jobs_folder: Path,
) -> Path:
"""Create slurm job file for running a model on a task"""
slurm_job = f"{slurm_prefix}\n"
slurm_job += f"mteb run -m {model_name} -t {task_name} --output_folder {results_folder.resolve()} --co2_tracker true"
model_path_name = model_name.replace("/", "__")
slurm_job_file = slurm_jobs_folder / f"{model_path_name}_{task_name}.sh"
with open(slurm_job_file, "w") as f:
f.write(slurm_job)
return slurm_job_file
def create_slurm_job_files(
model_names: list[str],
tasks: Iterable[mteb.AbsTask],
results_folder: Path,
slurm_prefix: str,
slurm_jobs_folder: Path,
) -> list[Path]:
"""Create slurm job files for running models on all tasks"""
slurm_job_files = []
for model_name in model_names:
for task in tasks:
slurm_job_file = create_slurm_job_file(
model_name,
task.metadata.name,
results_folder,
slurm_prefix,
slurm_jobs_folder,
)
slurm_job_files.append(slurm_job_file)
return slurm_job_files
def run_slurm_jobs(files: list[Path]) -> None:
"""Run slurm jobs based on the files provided"""
for file in files:
subprocess.run(["sbatch", file])
if __name__ == "__main__":
# TODO: SHOULD BE UPDATED
slurm_prefix = """#!/bin/bash
#SBATCH --job-name=mteb
#SBATCH --nodes=1
#SBATCH --partition=a3
#SBATCH --gres=gpu:1 # number of gpus
#SBATCH --time 24:00:00 # maximum execution time (HH:MM:SS)
#SBATCH --output=/data/niklas/jobs/%x-%j.out # output file name
#SBATCH --exclusive
"""
project_root = Path(__file__).parent / ".." / ".." / ".."
results_folder = project_root / "results"
results_folder = Path("/data/niklas/results")
slurm_jobs_folder = Path(__file__).parent / "slurm_jobs"
metas = mteb.get_model_metas()
model_metas = [meta for meta in metas if "audio" in meta.modalities]
model_names = [meta.name for meta in model_metas if isinstance(meta.name, str)]
# expanding to a full list of tasks
tasks = mteb.get_tasks(modalities=["audio"])
# TODO: remove once retrieval tasks are fully supported
tasks = [
task
for task in tasks
if task.metadata.task_type
not in ["Any2AnyRetrieval", "Any2AnyMultilingualRetrieval"]
]
slurm_jobs_folder.mkdir(exist_ok=True)
files = create_slurm_job_files(
model_names, tasks, results_folder, slurm_prefix, slurm_jobs_folder
)
run_slurm_jobs(files)
gowitheflow-1998
Metadata
Metadata
Assignees
Labels
maebAudio extensionAudio extension