EleutherAI
diff --git a/‎pyproject.toml‎
Lines changed: 51 additions & 25 deletions b/‎pyproject.toml‎
Lines changed: 51 additions & 25 deletions
diff --git a/‎requirements.txt‎
Lines changed: 0 additions & 1 deletion b/‎requirements.txt‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎setup.py‎
Lines changed: 0 additions & 5 deletions b/‎setup.py‎
Lines changed: 0 additions & 5 deletions
@@ -1,5 +1,5 @@
 [build-system]
-requires = ["setuptools>=40.8.0", "wheel"]
+requires = ["setuptools>=64.0"]
 build-backend = "setuptools.build_meta"
 
 [project]
@@ -13,19 +13,15 @@ readme = "README.md"
 classifiers = [
     "Development Status :: 3 - Alpha",
     "Programming Language :: Python :: 3",
-    "License :: OSI Approved :: MIT License",
     "Operating System :: OS Independent",
 ]
 requires-python = ">=3.10"
-license = { "text" = "MIT" }
+license = { text = "MIT" }
 dependencies = [
-    "evaluate",
     "datasets>=2.16.0",
     "evaluate>=0.4.0",
+    "jinja2",
     "jsonlines",
-    "numexpr",
-    "peft>=0.2.0",
-    "pybind11>=2.6.2",
     "pytablewriter",
     "rouge-score>=0.0.4",
     "sacrebleu>=1.5.0",
@@ -55,21 +51,21 @@ Repository = "https://github.com/EleutherAI/lm-evaluation-harness"
 [project.optional-dependencies]
 # Model backend dependencies
 api = ["requests", "aiohttp", "tenacity", "tqdm", "tiktoken"]
-hf = ["transformers>=4.1","torch>=1.8", "accelerate>=0.26.0"]
+hf = ["transformers>=4.1","torch>=1.8", "accelerate>=0.26.0", "peft>=0.2.0",]
 vllm = ["vllm>=0.4.2"]
 gptq = ["auto-gptq[triton]>=0.6.0"]
 gptqmodel = ["gptqmodel>=1.0.9"]
-ipex = ["optimum"]
+ipex = ["optimum-intel"]
 ibm_watsonx_ai = ["ibm_watsonx_ai>=1.1.22", "python-dotenv"]
-#mamba = ["mamba_ssm", "causal-conv1d==1.0.2", "torch"] # build error
-neuronx = ["optimum[neuronx]>0.0.1"]
+# mamba requires CUDA (nvcc) - cannot build on macOS/CPU-only systems
+# mamba = ["mamba_ssm", "causal-conv1d==1.0.2", "torch"]
 optimum = ["optimum[openvino]"]
 sparsify = ["sparsify"]
 sae_lens = ["sae_lens"]
 # Task specific dependencies
 acpbench = ["lark>=1.1.9", "tarski[clingo]==0.8.2", "pddl==0.4.2", "kstar-planner==1.4.2"]
 audiolm_qwen = ["librosa", "soundfile"]
-dev = ["pytest", "pytest-cov", "pytest-xdist", "pre-commit", "requests", "aiohttp", "tenacity", "tqdm", "tiktoken", "sentencepiece"]
+dev = ["pytest", "pytest-cov", "pytest-xdist", "pre-commit", "requests", "aiohttp", "tenacity", "tqdm", "tiktoken", "sentencepiece", "ruff"]
 hf_transfer = ["hf_transfer"]
 ifeval = ["langdetect", "immutabledict", "nltk>=3.9.1"]
 japanese_leaderboard = ["emoji==2.14.0", "neologdn==0.5.3", "fugashi[unidic-lite]", "rouge_score>=0.1.2"]
@@ -92,12 +88,16 @@ tasks = [
     "lm_eval[japanese_leaderboard]",
     "lm_eval[longbench]",
     "lm_eval[libra]",
-    "lm_eval[mamba]",
     "lm_eval[math]",
     "lm_eval[multilingual]",
     "lm_eval[ruler]",
 ]
 
+[dependency-groups]
+dev = [
+    "lm_eval[api]", "lm_eval[dev]", "lm_eval[hf]","sentencepiece"
+]
+
 [tool.uv]
 conflicts = [
     [
@@ -122,18 +122,44 @@ plugins.md028.enabled = false # no-blanks-blockquote
 plugins.md029.allow_extended_start_values = true # ol-prefix
 plugins.md034.enabled = false # no-bare-urls
 
-[tool.ruff.lint]
-extend-select = ["I", "W605"]
-
-[tool.ruff.lint.isort]
-lines-after-imports = 2
-known-first-party = ["lm_eval"]
+[tool.ruff]
+lint.extend-select = [
+    "I",      # isort
+    "UP",     # pyupgrade
+    "E",      # pycodestyle errors
+    "C419",   # unnecessary-comprehension-in-call
+    "F",      # pyflakes
+    "B",      # flake8-bugbear
+    "SIM",    # flake8-simplify
+    "RUF034", # useless-if-else
+    "W605",   # invalid-escape-sequence
+    "FURB",   # refurb
+]
+lint.fixable = [
+    "I001",   # unsorted-imports
+    "F401",   # unused-import
+    "UP",     # pyupgrade fixes
+]
+lint.ignore = [
+    "E402",   # module-import-not-at-top-of-file
+    "E731",   # lambda-assignment
+    "E501",   # line-too-long
+    "E111",   # indentation-with-invalid-multiple
+    "E114",   # indentation-with-invalid-multiple-comment
+    "E117",   # over-indented
+    "E741",   # ambiguous-variable-name
+    "E701",   # multiple-statements-on-one-line-colon
+]
 
 [tool.ruff.lint.extend-per-file-ignores]
-"__init__.py" = ["F401", "F402", "F403"]
-"utils.py" = ["F401"]
-
-[dependency-groups]
-dev = [
-    "lm_eval[api]", "lm_eval[dev]", "lm_eval[hf]","sentencepiece"
+"__init__.py" = [
+    "F401",   # unused-import
+    "F402",   # import-shadowed-by-loop-var
+    "F403",   # undefined-local-with-import-star
+    "F405",   # undefined-local-with-import-star-usage
 ]
+
+[tool.ruff.lint.isort]
+combine-as-imports = true
+known-first-party = ["lm_eval"]
+lines-after-imports = 2