EleutherAI
diff --git a/‎.github/workflows/build.yml
Lines changed: 61 additions & 0 deletions b/‎.github/workflows/build.yml
Lines changed: 61 additions & 0 deletions
diff --git a/‎.github/workflows/tests.yml
Lines changed: 0 additions & 34 deletions b/‎.github/workflows/tests.yml
Lines changed: 0 additions & 34 deletions
diff --git a/‎.gitignore
Lines changed: 0 additions & 1 deletion b/‎.gitignore
Lines changed: 0 additions & 1 deletion
diff --git a/‎.pre-commit-config.yaml
Lines changed: 1 addition & 1 deletion b/‎.pre-commit-config.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎CHANGELOG.md
Lines changed: 30 additions & 0 deletions b/‎CHANGELOG.md
Lines changed: 30 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 2 additions & 0 deletions b/‎README.md
Lines changed: 2 additions & 0 deletions
diff --git a/‎__init__.py b/‎__init__.py
diff --git a/‎delphi/__init__.py
Lines changed: 1 addition & 0 deletions b/‎delphi/__init__.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎delphi/__main__.py
Lines changed: 15 additions & 3 deletions b/‎delphi/__main__.py
Lines changed: 15 additions & 3 deletions
diff --git a/‎delphi/clients/offline.py
Lines changed: 1 addition & 1 deletion b/‎delphi/clients/offline.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎delphi/clients/openrouter.py
Lines changed: 3 additions & 2 deletions b/‎delphi/clients/openrouter.py
Lines changed: 3 additions & 2 deletions
diff --git a/‎delphi/config.py
Lines changed: 7 additions & 2 deletions b/‎delphi/config.py
Lines changed: 7 additions & 2 deletions
diff --git a/‎delphi/explainers/contrastive_explainer.py
Lines changed: 1 addition & 1 deletion b/‎delphi/explainers/contrastive_explainer.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎delphi/explainers/explainer.py
Lines changed: 2 additions & 2 deletions b/‎delphi/explainers/explainer.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎delphi/latents/cache.py
Lines changed: 2 additions & 1 deletion b/‎delphi/latents/cache.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎delphi/latents/constructors.py
Lines changed: 2 additions & 2 deletions b/‎delphi/latents/constructors.py
Lines changed: 2 additions & 2 deletions
@@ -0,0 +1,61 @@
+name: build
+
+env:
+  HF_HUB_DOWNLOAD_TIMEOUT: 100
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e ".[dev,visualize]"
+      - name: Run tests
+        run: pytest
+      - name: build
+        run: pip wheel --no-deps -w dist .
+  release:
+    needs: build
+    permissions:
+      contents: write
+      id-token: write
+    if: github.event_name == 'push' && github.ref == 'refs/heads/main' && !contains(github.event.head_commit.message, 'chore(release):')
+    runs-on: ubuntu-latest
+    concurrency: release
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+      - name: Install dependencies
+        run: pip install build twine
+      - name: Semantic Release
+        id: release
+        uses: python-semantic-release/[email protected]
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+      - name: Build package
+        run: python -m build
+        if: steps.release.outputs.released == 'true'
+      - name: Publish package distributions to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        if: steps.release.outputs.released == 'true'
+      - name: Publish package distributions to GitHub Releases
+        uses: python-semantic-release/publish-action@main
+        if: steps.release.outputs.released == 'true'
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
@@ -8,7 +8,6 @@ latents/*
 results/*
 extras/*
 temp/*
-tests/*
 saved*
 .nfs*
 
 
@@ -12,7 +12,7 @@ repos:
     hooks:
     -   id: black
 -   repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: 'v0.11.0'
+    rev: 'v0.11.5'
     hooks:
         - id: ruff
           args: [--fix, --exit-non-zero-on-fix]
@@ -0,0 +1,30 @@
+# CHANGELOG
+
+
+## v0.0.1 (2025-04-21)
+
+### Bug Fixes
+
+- Add missing libraries
+  ([`d6f7c72`](https://github.com/EleutherAI/delphi/commit/d6f7c72c0b9dd4fd12dc78aa75fd77d146b0199b))
+
+- Add missing libraries
+  ([`853d8ca`](https://github.com/EleutherAI/delphi/commit/853d8ca93256b8bda1395bab08199de45eb63926))
+
+- Scope issue with sae causing it to not be loaded properly
+  ([`94f40cc`](https://github.com/EleutherAI/delphi/commit/94f40cc8def426baa9be682e22c96d4c31a8b5ed))
+
+- Scope issue with sae causing it to not be loaded properly
+  ([`23caed7`](https://github.com/EleutherAI/delphi/commit/23caed746e0536da5a0739b6f0cdf12c678be467))
+
+### Documentation
+
+- Update README.md
+  ([`4385b0b`](https://github.com/EleutherAI/delphi/commit/4385b0b3a9ee99fdbf5713a3f990ab5721b12d1e))
+
+acessed -> accessed
+
+- Update README.md
+  ([`31b6896`](https://github.com/EleutherAI/delphi/commit/31b6896ae412903ab797da429f8c834a26a8dfed))
+
+acessed -> accessed
@@ -280,6 +280,8 @@ Run an end-to-end test:
 
 ```python -m delphi.tests.e2e```
 
+We use [conventional commits](https://www.conventionalcommits.org/en/v1.0.0/) for releases.
+
 ## License
 
 Copyright 2024 the EleutherAI Institute
 
@@ -0,0 +1 @@
+__version__ = "0.0.1"
@@ -55,7 +55,12 @@ def load_artifacts(run_cfg: RunConfig):
         compile=True,
     )
 
-    return run_cfg.hookpoints, hookpoint_to_sparse_encode, model, transcode
+    return (
+        list(hookpoint_to_sparse_encode.keys()),
+        hookpoint_to_sparse_encode,
+        model,
+        transcode,
+    )
 
 
 def create_neighbours(
@@ -212,7 +217,7 @@ def scorer_postprocess(result, score_dir):
                 client,
                 n_examples_shown=run_cfg.num_examples_per_scorer_prompt,
                 verbose=run_cfg.verbose,
-                log_prob=False,
+                log_prob=run_cfg.log_probs,
             ),
             preprocess=scorer_preprocess,
             postprocess=partial(scorer_postprocess, score_dir=detection_scores_path),
@@ -222,7 +227,7 @@ def scorer_postprocess(result, score_dir):
                 client,
                 n_examples_shown=run_cfg.num_examples_per_scorer_prompt,
                 verbose=run_cfg.verbose,
-                log_prob=False,
+                log_prob=run_cfg.log_probs,
             ),
             preprocess=scorer_preprocess,
             postprocess=partial(scorer_postprocess, score_dir=fuzz_scores_path),
@@ -235,6 +240,13 @@ def scorer_postprocess(result, score_dir):
         scorer_pipe,
     )
 
+    if run_cfg.pipeline_num_proc > 1 and run_cfg.explainer_provider == "openrouter":
+        print(
+            "OpenRouter does not support multiprocessing,"
+            " setting pipeline_num_proc to 1"
+        )
+        run_cfg.pipeline_num_proc = 1
+
     await pipeline.run(run_cfg.pipeline_num_proc)
 
 
 
@@ -222,7 +222,7 @@ async def _process_batches(self):
                     if not future.done():
                         future.set_result(result)
             except Exception as e:
-                logger.error(f"Batch processing failed: {e}")
+                logger.error(f"Batch processing failed: {repr(e)}")
                 for future in batch_futures:
                     if not future.done():
                         future.set_exception(e)
 
@@ -28,7 +28,8 @@ def __init__(
         self.headers = {"Authorization": f"Bearer {api_key}"}
 
         self.url = base_url
-        self.client = httpx.AsyncClient()
+        timeout_config = httpx.Timeout(5.0)
+        self.client = httpx.AsyncClient(timeout=timeout_config)
 
     def postprocess(self, response):
         response_json = response.json()
@@ -66,7 +67,7 @@ async def generate(  # type: ignore
                 )
 
             except Exception as e:
-                logger.warning(f"Attempt {attempt + 1}: {str(e)}, retrying...")
+                logger.warning(f"Attempt {attempt + 1}: {repr(e)}, retrying...")
 
             await sleep(1)
 
 
@@ -66,7 +66,7 @@ class ConstructorConfig(Serializable):
 
 @dataclass
 class CacheConfig(Serializable):
-    dataset_repo: str = "EleutherAI/fineweb-edu-dedup-10b"
+    dataset_repo: str = "EleutherAI/SmolLM2-135M-10B"
     """Dataset repository to use for generating latent activations."""
 
     dataset_split: str = "train[:1%]"
@@ -142,10 +142,15 @@ class RunConfig(Serializable):
     filter_bos: bool = False
     """Whether to filter out BOS tokens from the cache."""
 
+    log_probs: bool = False
+    """Whether to attempt to gather log probabilities for each scorer prompt."""
+
     load_in_8bit: bool = False
     """Load the model in 8-bit mode."""
 
-    hf_token: str | None = None
+    # Use a dummy encoding function to prevent the token from being saved
+    # to disk in plain text
+    hf_token: str | None = field(default=None, encoding_fn=lambda _: None)
     """Huggingface API token for downloading models."""
 
     pipeline_num_proc: int = field(
 
@@ -66,7 +66,7 @@ async def __call__(self, record: LatentRecord) -> ExplainerResult:
         except Exception as e:
             from ..logger import logger
 
-            logger.error(f"Explanation parsing failed: {e}")
+            logger.error(f"Explanation parsing failed: {repr(e)}")
             return ExplainerResult(
                 record=record, explanation="Explanation could not be parsed."
             )
 
@@ -54,7 +54,7 @@ async def __call__(self, record: LatentRecord) -> ExplainerResult:
 
             return ExplainerResult(record=record, explanation=explanation)
         except Exception as e:
-            logger.error(f"Explanation parsing failed: {e}")
+            logger.error(f"Explanation parsing failed: {repr(e)}")
             return ExplainerResult(
                 record=record, explanation="Explanation could not be parsed."
             )
@@ -67,7 +67,7 @@ def parse_explanation(self, text: str) -> str:
             else:
                 return "Explanation could not be parsed."
         except Exception as e:
-            logger.error(f"Explanation parsing regex failed: {e}")
+            logger.error(f"Explanation parsing regex failed: {repr(e)}")
             raise
 
     def _highlight(self, str_toks: list[str], activations: list[float]) -> str:
 
@@ -474,7 +474,8 @@ def generate_statistics_cache(
     # torch always sorts for unique, so we might as well do it
     sorted_latents, latent_indices = latents.sort()
     sorted_activations = activations[latent_indices]
-    sorted_tokens = tokens[latent_locations[latent_indices]]
+    locations = latent_locations[latent_indices]
+    sorted_tokens = tokens[locations[:, 0], locations[:, 1]]
 
     unique_latents, counts = torch.unique_consecutive(
         sorted_latents, return_counts=True
 
@@ -351,7 +351,7 @@ def faiss_non_activation_windows(
             index = faiss.read_index(str(non_activating_cache_file), faiss.IO_FLAG_MMAP)
             print(f"Loaded non-activating index from {non_activating_cache_file}")
         except Exception as e:
-            print(f"Error loading cached embeddings: {e}")
+            print(f"Error loading cached embeddings: {repr(e)}")
 
     if index is None:
         print("Decoding non-activating tokens...")
@@ -378,7 +378,7 @@ def faiss_non_activation_windows(
             activating_embeddings = np.load(activating_cache_file)
             print(f"Loaded cached activating embeddings from {activating_cache_file}")
         except Exception as e:
-            print(f"Error loading cached embeddings: {e}")
+            print(f"Error loading cached embeddings: {repr(e)}")
     # Compute embeddings for activating examples if not cached
     if activating_embeddings is None:
         print("Computing activating embeddings...")
Original file line number	Diff line number	Diff line change
`@@ -66,7 +66,7 @@ async def __call__(self, record: LatentRecord) -> ExplainerResult:`
`66`	`66`	`except Exception as e:`
`67`	`67`	`from ..logger import logger`
`68`	`68`
`69`		`- logger.error(f"Explanation parsing failed: {e}")`
	`69`	`+ logger.error(f"Explanation parsing failed: {repr(e)}")`
`70`	`70`	`return ExplainerResult(`
`71`	`71`	`record=record, explanation="Explanation could not be parsed."`
`72`	`72`	`)`