Skip to content

Commit f3e86f2

Browse files
caporalCodercopybara-github
authored andcommitted
Add safe_pickle module for secure loading of pickled data.
This module provides functions to load pickled data with enhanced security. It restricts unpickling to a small set of allowed builtin types. This mitigates risks associated with unpickling untrusted data while maintaining performance for AlphaFold 3's generated artifacts (which is 5x faster than json). PiperOrigin-RevId: 827973163 Change-Id: I405ac3ebc0035df8bf6bf9c66b48e5894278b98f
1 parent c0ab1c4 commit f3e86f2

File tree

4 files changed

+72
-6
lines changed

4 files changed

+72
-6
lines changed
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
# Copyright 2025 DeepMind Technologies Limited
2+
#
3+
# AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4+
# this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5+
#
6+
# To request access to the AlphaFold 3 model parameters, follow the process set
7+
# out at https://github.com/google-deepmind/alphafold3. You may only use these
8+
# if received directly from Google. Use is subject to terms of use available at
9+
# https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10+
11+
"""Restricted-safe wrapper around pickle for loading trusted data.
12+
13+
This prevents arbitrary object instantiation during unpickling by only
14+
allowing a small allowlist of built-in, innocuous types.
15+
16+
Intended for loading pickled constant data that ships with the repository.
17+
If the pickle is tampered with, an UnpicklingError will be raised instead
18+
of silently executing attacker-controlled bytecode.
19+
"""
20+
21+
from collections.abc import Collection
22+
import pickle
23+
from typing import Any, BinaryIO, Final
24+
25+
26+
# Builtin types expected from AlphaFold 3 generated data.
27+
_ALLOWED_BUILTINS: Final[Collection[str]] = frozenset({
28+
"NoneType",
29+
"bool",
30+
"bytes",
31+
"dict",
32+
"float",
33+
"frozenset",
34+
"int",
35+
"list",
36+
"set",
37+
"str",
38+
"tuple",
39+
})
40+
41+
42+
class _RestrictedUnpickler(pickle.Unpickler):
43+
"""A pickle `Unpickler` that forbids loading arbitrary global classes."""
44+
45+
def find_class(self, module: str, name: str) -> Any:
46+
"""Returns the class for `module` and `name` if allowed."""
47+
if module == "builtins" and name in _ALLOWED_BUILTINS:
48+
return super().find_class(module, name)
49+
raise pickle.UnpicklingError(f"Can't unpickle disallowed '{module}.{name}'")
50+
51+
52+
def load(file_obj: BinaryIO) -> Any:
53+
"""Safely loads pickle data from an already-opened binary file handle.
54+
55+
Only built-in container/primitive types listed in `_ALLOWED_BUILTINS` are
56+
permitted. Any attempt to load other types raises `pickle.UnpicklingError`.
57+
58+
Args:
59+
file_obj: A binary file-like object open for reading.
60+
61+
Returns:
62+
The unpickled data.
63+
"""
64+
65+
return _RestrictedUnpickler(file_obj).load()

src/alphafold3/constants/chemical_component_sets.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,17 @@
1010

1111
"""Sets of chemical components."""
1212

13-
import pickle
1413
from typing import Final
1514

1615
from alphafold3.common import resources
17-
16+
from alphafold3.common import safe_pickle
1817

1918
_CCD_SETS_CCD_PICKLE_FILE = resources.filename(
2019
resources.ROOT / 'constants/converters/chemical_component_sets.pickle'
2120
)
2221

23-
_CCD_SET = pickle.load(open(_CCD_SETS_CCD_PICKLE_FILE, 'rb'))
22+
with open(_CCD_SETS_CCD_PICKLE_FILE, 'rb') as f:
23+
_CCD_SET = safe_pickle.load(f)
2424

2525
# Glycan (or 'Saccharide') ligands.
2626
# _chem_comp.type containing 'saccharide' and 'linking' (when lower-case).

src/alphafold3/constants/chemical_components.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@
1414
import dataclasses
1515
import functools
1616
import os
17-
import pickle
1817

1918
from alphafold3.common import resources
19+
from alphafold3.common import safe_pickle
2020
from alphafold3.cpp import cif_dict
2121

2222

@@ -31,7 +31,7 @@ def _load_ccd_pickle_cached(
3131
) -> dict[str, Mapping[str, Sequence[str]]]:
3232
"""Loads the CCD pickle file and caches it so that it is only loaded once."""
3333
with open(path, 'rb') as f:
34-
return pickle.loads(f.read())
34+
return safe_pickle.load(f)
3535

3636

3737
class Ccd(Mapping[str, Mapping[str, Sequence[str]]]):

src/alphafold3/constants/converters/chemical_component_sets_gen.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import sys
1818

1919
from alphafold3.common import resources
20+
from alphafold3.common import safe_pickle
2021
import tqdm
2122

2223

@@ -65,7 +66,7 @@ def main(argv: Sequence[str]) -> None:
6566

6667
print(f'Loading {_CCD_PICKLE_FILE}', flush=True)
6768
with open(_CCD_PICKLE_FILE, 'rb') as f:
68-
ccd: Mapping[str, Mapping[str, Sequence[str]]] = pickle.load(f)
69+
ccd: Mapping[str, Mapping[str, Sequence[str]]] = safe_pickle.load(f)
6970
output_path = pathlib.Path(argv[1])
7071
output_path.parent.mkdir(exist_ok=True)
7172
print('Finding ions and glycans', flush=True)

0 commit comments

Comments
 (0)