PrimeIntellect-ai
diff --git a/‎.github/workflows/style.yml‎
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/style.yml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 1 deletion b/‎.gitignore‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎configs/prime-rl/wiki-search.toml‎
Lines changed: 10 additions & 8 deletions b/‎configs/prime-rl/wiki-search.toml‎
Lines changed: 10 additions & 8 deletions
diff --git a/‎configs/vf-rl/reasoning-gym.toml‎
Lines changed: 2 additions & 4 deletions b/‎configs/vf-rl/reasoning-gym.toml‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎configs/vf-rl/wiki-search.toml‎
Lines changed: 2 additions & 2 deletions b/‎configs/vf-rl/wiki-search.toml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎configs/vf-rl/wordle.toml‎
Lines changed: 2 additions & 5 deletions b/‎configs/vf-rl/wordle.toml‎
Lines changed: 2 additions & 5 deletions
diff --git a/‎environments/math_group/pyproject.toml‎
Lines changed: 2 additions & 2 deletions b/‎environments/math_group/pyproject.toml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎environments/math_python/pyproject.toml‎
Lines changed: 2 additions & 2 deletions b/‎environments/math_python/pyproject.toml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎environments/sentence_repeater/sentence_repeater.py‎
Lines changed: 6 additions & 6 deletions b/‎environments/sentence_repeater/sentence_repeater.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎environments/wiki_search/pyproject.toml‎
Lines changed: 2 additions & 2 deletions b/‎environments/wiki_search/pyproject.toml‎
Lines changed: 2 additions & 2 deletions
@@ -29,12 +29,12 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v6
         with:
-          python-version: '3.11'
+          python-version: "3.11"
       - name: Install uv
         uses: astral-sh/setup-uv@v4
         with:
           version: "latest"
       - name: Install dependencies
-        run: uv sync
+        run: uv sync --extra rl
       - name: Run ty
-        run: uv run ty check .
+        run: uv run ty check verifiers
@@ -29,7 +29,7 @@ docs/build/
 *.pyc
 
 # libraries
-prime-rl/
+prime-rl
 
 # outputs
 wandb/
 
@@ -1,13 +1,14 @@
-inference_gpu_ids = [0]
-trainer_gpu_ids = [1]
+inference_gpu_ids = [0,1,2,3,4,5]
+trainer_gpu_ids = [6,7]
 
 max_steps = 500
+max_async_level = 4
 
 [model]
-name = "Qwen/Qwen3-4B-Instruct-2507"
+name = "Qwen/Qwen3-4B-Thinking-2507"
 
 [wandb]
-project = "wiki-search"
+project = "wiki-search-debug"
 name = "wiki-search-4b"
 
 [trainer.optim]
@@ -31,16 +32,17 @@ target_modules = [
 [orchestrator]
 batch_size = 512
 rollouts_per_example = 16
-seq_len = 4096
+seq_len = 16384
 mask_truncated_completions = false
 zero_truncated_completions = true
+oversampling_factor = 2.0
+
 
 [orchestrator.sampling]
-max_tokens = 512
+max_tokens = 4096
 
 [orchestrator.buffer]
-type = "online-difficulty"
-oversampling_factor = 2.0
+online_difficulty_filtering = true
 
 [[orchestrator.env]]
 id = "primeintellect/wiki-search"
 
@@ -10,13 +10,11 @@ num_eval_examples = 2000
 seed = 1
 
 [inference]
-gpus = 4
-tensor_parallel_size = 2
-data_parallel_size = 2
+gpus = 6
 enforce_eager = true
 
 [trainer]
-gpus = 4
+gpus = 2
 batch_size = 512
 micro_batch_size = 2
 max_seq_len = 4096
 
@@ -1,7 +1,7 @@
 model = "Qwen/Qwen3-4B-Instruct-2507"
 
 [env]
-id = "primeintellect/wiki-search"
+id = "wiki-search"
 
 [env.args]
 max_turns = 10
@@ -20,7 +20,7 @@ gpus = 1
 run_name = "wiki-search"
 micro_batch_size = 4
 rollouts_per_example = 16
-batch_size = 1024
+batch_size = 512
 max_steps = 500
 max_tokens = 512
 max_seq_len = 4096
@@ -1,21 +1,18 @@
 model = "Qwen/Qwen3-4B-Instruct-2507"
 
 [env]
-id = "will/wordle"
+id = "wordle"
 
 [inference]
 gpus = 1
 
-[inference.args]
-enforce_eager = true
-
 [trainer]
 gpus = 1
 
 [trainer.args]
 lora_target_modules = "all-linear"
 run_name = "wordle"
-micro_batch_size = 8
+micro_batch_size = 4
 rollouts_per_example = 16
 batch_size = 512
 max_steps = 500
 
@@ -1,8 +1,8 @@
 [project]
 name = "math-group"
-version = "0.1.0"
+version = "0.1.1"
 dependencies = [
-    "verifiers>=0.1.4",
+    "verifiers>=0.1.8",
     "math-verify>=0.8.0",
 ]
 
 
@@ -2,10 +2,10 @@
 name = "math-python"
 description = "Solve math problems using Python in a sandbox environment"
 tags = ["tool-use", "math", "sandbox", "train", "prime-sandboxes", "python", "coding"]
-version = "0.1.7"
+version = "0.1.8"
 requires-python = ">=3.11"
 dependencies = [
-    "verifiers>=0.1.5.post0",
+    "verifiers>=0.1.8",
     "math-verify>=0.8.0",
 ]
 
 
@@ -1,7 +1,7 @@
 import random
 from copy import deepcopy
 from difflib import SequenceMatcher
-from typing import List, Tuple
+from typing import List
 
 from datasets import Dataset, load_dataset
 
@@ -75,19 +75,19 @@ class SentenceRepeaterEnv(vf.MultiTurnEnv):
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
 
-    async def is_completed(self, messages: Messages, state: State, **kwargs) -> bool:
-        max_turns_reached = await super().is_completed(messages, state, **kwargs)
-        return state["turn"] >= len(state["info"]["questions"]) or max_turns_reached
+    @vf.stop
+    async def all_questions_answered(self, state: State) -> bool:
+        return len(state["trajectory"]) >= len(state["info"]["questions"])
 
     async def env_response(
         self, messages: Messages, state: State, **kwargs
-    ) -> Tuple[Messages, State]:
+    ) -> Messages:
         return [
             {
                 "role": "user",
                 "content": state["info"]["questions"][state["turn"]],
             }
-        ], state
+        ]
 
 
 def load_environment(**kwargs) -> vf.Environment:
 
@@ -3,9 +3,9 @@ name = "wiki-search"
 description = "Agentic RAG over Wikipedia pages for trivia Q&A"
 tags = ["wikipedia", "multi-turn", "agentic-search", "rag", "train", "eval", "llm-judge"]
 requires-python = ">=3.11"
-version = "0.1.20"
+version = "0.1.21"
 dependencies = [
-    "verifiers>=0.1.7",
+    "verifiers>=0.1.8",
     "chromadb",
     "datasets",
     "openai",