Skip to content

Commit d75f7d0

Browse files
author
SrGonao
committed
Merge branch 'main' of https://github.com/EleutherAI/delphi into frequency_estimation
2 parents 661ad15 + 400066d commit d75f7d0

25 files changed

+422
-312
lines changed

.github/workflows/build.yml

+61
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
name: build
2+
3+
env:
4+
HF_HUB_DOWNLOAD_TIMEOUT: 100
5+
6+
on:
7+
push:
8+
branches:
9+
- main
10+
pull_request:
11+
branches:
12+
- main
13+
jobs:
14+
build:
15+
runs-on: ubuntu-latest
16+
steps:
17+
- uses: actions/checkout@v4
18+
- uses: actions/setup-python@v5
19+
with:
20+
python-version: "3.10"
21+
- name: Install dependencies
22+
run: |
23+
python -m pip install --upgrade pip
24+
pip install -e ".[dev,visualize]"
25+
- name: Run tests
26+
run: pytest
27+
- name: build
28+
run: pip wheel --no-deps -w dist .
29+
release:
30+
needs: build
31+
permissions:
32+
contents: write
33+
id-token: write
34+
if: github.event_name == 'push' && github.ref == 'refs/heads/main' && !contains(github.event.head_commit.message, 'chore(release):')
35+
runs-on: ubuntu-latest
36+
concurrency: release
37+
steps:
38+
- uses: actions/checkout@v4
39+
with:
40+
fetch-depth: 0
41+
- uses: actions/setup-python@v5
42+
with:
43+
python-version: "3.10"
44+
- name: Install dependencies
45+
run: pip install build twine
46+
- name: Semantic Release
47+
id: release
48+
uses: python-semantic-release/[email protected]
49+
with:
50+
github_token: ${{ secrets.GITHUB_TOKEN }}
51+
- name: Build package
52+
run: python -m build
53+
if: steps.release.outputs.released == 'true'
54+
- name: Publish package distributions to PyPI
55+
uses: pypa/gh-action-pypi-publish@release/v1
56+
if: steps.release.outputs.released == 'true'
57+
- name: Publish package distributions to GitHub Releases
58+
uses: python-semantic-release/publish-action@main
59+
if: steps.release.outputs.released == 'true'
60+
with:
61+
github_token: ${{ secrets.GITHUB_TOKEN }}

.github/workflows/tests.yml

-34
This file was deleted.

.gitignore

-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ latents/*
88
results/*
99
extras/*
1010
temp/*
11-
tests/*
1211
saved*
1312
.nfs*
1413

.pre-commit-config.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ repos:
1212
hooks:
1313
- id: black
1414
- repo: https://github.com/astral-sh/ruff-pre-commit
15-
rev: 'v0.11.0'
15+
rev: 'v0.11.5'
1616
hooks:
1717
- id: ruff
1818
args: [--fix, --exit-non-zero-on-fix]

CHANGELOG.md

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# CHANGELOG
2+
3+
4+
## v0.0.1 (2025-04-21)
5+
6+
### Bug Fixes
7+
8+
- Add missing libraries
9+
([`d6f7c72`](https://github.com/EleutherAI/delphi/commit/d6f7c72c0b9dd4fd12dc78aa75fd77d146b0199b))
10+
11+
- Add missing libraries
12+
([`853d8ca`](https://github.com/EleutherAI/delphi/commit/853d8ca93256b8bda1395bab08199de45eb63926))
13+
14+
- Scope issue with sae causing it to not be loaded properly
15+
([`94f40cc`](https://github.com/EleutherAI/delphi/commit/94f40cc8def426baa9be682e22c96d4c31a8b5ed))
16+
17+
- Scope issue with sae causing it to not be loaded properly
18+
([`23caed7`](https://github.com/EleutherAI/delphi/commit/23caed746e0536da5a0739b6f0cdf12c678be467))
19+
20+
### Documentation
21+
22+
- Update README.md
23+
([`4385b0b`](https://github.com/EleutherAI/delphi/commit/4385b0b3a9ee99fdbf5713a3f990ab5721b12d1e))
24+
25+
acessed -> accessed
26+
27+
- Update README.md
28+
([`31b6896`](https://github.com/EleutherAI/delphi/commit/31b6896ae412903ab797da429f8c834a26a8dfed))
29+
30+
acessed -> accessed

README.md

+2
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,8 @@ Run an end-to-end test:
280280

281281
```python -m delphi.tests.e2e```
282282

283+
We use [conventional commits](https://www.conventionalcommits.org/en/v1.0.0/) for releases.
284+
283285
## License
284286

285287
Copyright 2024 the EleutherAI Institute

__init__.py

Whitespace-only changes.

delphi/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
__version__ = "0.0.1"

delphi/__main__.py

+15-3
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,12 @@ def load_artifacts(run_cfg: RunConfig):
5555
compile=True,
5656
)
5757

58-
return run_cfg.hookpoints, hookpoint_to_sparse_encode, model, transcode
58+
return (
59+
list(hookpoint_to_sparse_encode.keys()),
60+
hookpoint_to_sparse_encode,
61+
model,
62+
transcode,
63+
)
5964

6065

6166
def create_neighbours(
@@ -212,7 +217,7 @@ def scorer_postprocess(result, score_dir):
212217
client,
213218
n_examples_shown=run_cfg.num_examples_per_scorer_prompt,
214219
verbose=run_cfg.verbose,
215-
log_prob=False,
220+
log_prob=run_cfg.log_probs,
216221
),
217222
preprocess=scorer_preprocess,
218223
postprocess=partial(scorer_postprocess, score_dir=detection_scores_path),
@@ -222,7 +227,7 @@ def scorer_postprocess(result, score_dir):
222227
client,
223228
n_examples_shown=run_cfg.num_examples_per_scorer_prompt,
224229
verbose=run_cfg.verbose,
225-
log_prob=False,
230+
log_prob=run_cfg.log_probs,
226231
),
227232
preprocess=scorer_preprocess,
228233
postprocess=partial(scorer_postprocess, score_dir=fuzz_scores_path),
@@ -235,6 +240,13 @@ def scorer_postprocess(result, score_dir):
235240
scorer_pipe,
236241
)
237242

243+
if run_cfg.pipeline_num_proc > 1 and run_cfg.explainer_provider == "openrouter":
244+
print(
245+
"OpenRouter does not support multiprocessing,"
246+
" setting pipeline_num_proc to 1"
247+
)
248+
run_cfg.pipeline_num_proc = 1
249+
238250
await pipeline.run(run_cfg.pipeline_num_proc)
239251

240252

delphi/clients/offline.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ async def _process_batches(self):
222222
if not future.done():
223223
future.set_result(result)
224224
except Exception as e:
225-
logger.error(f"Batch processing failed: {e}")
225+
logger.error(f"Batch processing failed: {repr(e)}")
226226
for future in batch_futures:
227227
if not future.done():
228228
future.set_exception(e)

delphi/clients/openrouter.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@ def __init__(
2828
self.headers = {"Authorization": f"Bearer {api_key}"}
2929

3030
self.url = base_url
31-
self.client = httpx.AsyncClient()
31+
timeout_config = httpx.Timeout(5.0)
32+
self.client = httpx.AsyncClient(timeout=timeout_config)
3233

3334
def postprocess(self, response):
3435
response_json = response.json()
@@ -66,7 +67,7 @@ async def generate( # type: ignore
6667
)
6768

6869
except Exception as e:
69-
logger.warning(f"Attempt {attempt + 1}: {str(e)}, retrying...")
70+
logger.warning(f"Attempt {attempt + 1}: {repr(e)}, retrying...")
7071

7172
await sleep(1)
7273

delphi/config.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ class ConstructorConfig(Serializable):
6666

6767
@dataclass
6868
class CacheConfig(Serializable):
69-
dataset_repo: str = "EleutherAI/fineweb-edu-dedup-10b"
69+
dataset_repo: str = "EleutherAI/SmolLM2-135M-10B"
7070
"""Dataset repository to use for generating latent activations."""
7171

7272
dataset_split: str = "train[:1%]"
@@ -142,10 +142,15 @@ class RunConfig(Serializable):
142142
filter_bos: bool = False
143143
"""Whether to filter out BOS tokens from the cache."""
144144

145+
log_probs: bool = False
146+
"""Whether to attempt to gather log probabilities for each scorer prompt."""
147+
145148
load_in_8bit: bool = False
146149
"""Load the model in 8-bit mode."""
147150

148-
hf_token: str | None = None
151+
# Use a dummy encoding function to prevent the token from being saved
152+
# to disk in plain text
153+
hf_token: str | None = field(default=None, encoding_fn=lambda _: None)
149154
"""Huggingface API token for downloading models."""
150155

151156
pipeline_num_proc: int = field(

delphi/explainers/contrastive_explainer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ async def __call__(self, record: LatentRecord) -> ExplainerResult:
6666
except Exception as e:
6767
from ..logger import logger
6868

69-
logger.error(f"Explanation parsing failed: {e}")
69+
logger.error(f"Explanation parsing failed: {repr(e)}")
7070
return ExplainerResult(
7171
record=record, explanation="Explanation could not be parsed."
7272
)

delphi/explainers/explainer.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ async def __call__(self, record: LatentRecord) -> ExplainerResult:
5454

5555
return ExplainerResult(record=record, explanation=explanation)
5656
except Exception as e:
57-
logger.error(f"Explanation parsing failed: {e}")
57+
logger.error(f"Explanation parsing failed: {repr(e)}")
5858
return ExplainerResult(
5959
record=record, explanation="Explanation could not be parsed."
6060
)
@@ -67,7 +67,7 @@ def parse_explanation(self, text: str) -> str:
6767
else:
6868
return "Explanation could not be parsed."
6969
except Exception as e:
70-
logger.error(f"Explanation parsing regex failed: {e}")
70+
logger.error(f"Explanation parsing regex failed: {repr(e)}")
7171
raise
7272

7373
def _highlight(self, str_toks: list[str], activations: list[float]) -> str:

delphi/latents/cache.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -474,7 +474,8 @@ def generate_statistics_cache(
474474
# torch always sorts for unique, so we might as well do it
475475
sorted_latents, latent_indices = latents.sort()
476476
sorted_activations = activations[latent_indices]
477-
sorted_tokens = tokens[latent_locations[latent_indices]]
477+
locations = latent_locations[latent_indices]
478+
sorted_tokens = tokens[locations[:, 0], locations[:, 1]]
478479

479480
unique_latents, counts = torch.unique_consecutive(
480481
sorted_latents, return_counts=True

delphi/latents/constructors.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,7 @@ def faiss_non_activation_windows(
351351
index = faiss.read_index(str(non_activating_cache_file), faiss.IO_FLAG_MMAP)
352352
print(f"Loaded non-activating index from {non_activating_cache_file}")
353353
except Exception as e:
354-
print(f"Error loading cached embeddings: {e}")
354+
print(f"Error loading cached embeddings: {repr(e)}")
355355

356356
if index is None:
357357
print("Decoding non-activating tokens...")
@@ -378,7 +378,7 @@ def faiss_non_activation_windows(
378378
activating_embeddings = np.load(activating_cache_file)
379379
print(f"Loaded cached activating embeddings from {activating_cache_file}")
380380
except Exception as e:
381-
print(f"Error loading cached embeddings: {e}")
381+
print(f"Error loading cached embeddings: {repr(e)}")
382382
# Compute embeddings for activating examples if not cached
383383
if activating_embeddings is None:
384384
print("Computing activating embeddings...")

0 commit comments

Comments
 (0)