Skip to content
This repository was archived by the owner on Mar 31, 2025. It is now read-only.

Commit 0355d91

Browse files
committed
Dramatically speed up YAML parsing for setenv.sh
If available, this change makes YAML parsing use C bindings from LibYAML. This is strongly recommended by the PyYAML team, since LibYAML bindings are "much faster than the pure Python version." This change will speed up the `./setenv.sh` script considerably: on my system, `mk_shell_variables.py` decreases runtime from 450ms down to 70ms. That script takes the overwhelming majority of execution time for `setenv.sh`. On Ubuntu systems, `LibYAML` can be used by installing from apt: apt install libyaml-dev Homebrew users can run: brew install libyaml After doing so, `pyyaml` should be re-installed. This change preserves the safe loading of files, which should *always* be done when loading any untrusted YAML (and is good practice even if the source is trusted). The change also doesn't *require* that one have the C bindings.
1 parent 92103f8 commit 0355d91

File tree

4 files changed

+28
-6
lines changed

4 files changed

+28
-6
lines changed

clrenv/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
import yaml
22

33
from .lazy_env import get_env, LazyEnv
4+
from .load import safe_load
45
from .path import find_environment_path
56

67

78
def mapping():
89
with open(find_environment_path()) as f:
9-
return yaml.safe_load(f.read())['mapping']
10+
return safe_load(f.read())['mapping']
1011

1112
env = LazyEnv()
1213
get_env = get_env

clrenv/lazy_env.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
from botocore.exceptions import EndpointConnectionError
1313

1414
from munch import Munch, munchify
15-
import yaml
1615

16+
from .load import safe_load
1717
from .path import find_environment_path, find_user_environment_paths
1818
from functools import reduce
1919

@@ -59,7 +59,7 @@ def get_env(*mode):
5959
if not mode in _env:
6060
y = (_load_current_environment(),)
6161
upaths = find_user_environment_paths()
62-
y = tuple(yaml.safe_load(open(p).read()) for p in upaths if os.path.isfile(p)) + y
62+
y = tuple(safe_load(open(p).read()) for p in upaths if os.path.isfile(p)) + y
6363

6464
assignments = tuple(m for m in mode if m.find('=') != -1)
6565
mode = tuple(m for m in mode if m.find('=') == -1)
@@ -78,7 +78,7 @@ def get_env(*mode):
7878
e = _merged(*dicts)
7979

8080
for k, v in overrides:
81-
for pytype in (yaml.safe_load, eval, int, float, str):
81+
for pytype in (safe_load, eval, int, float, str):
8282
try:
8383
pyval = pytype(v)
8484
break
@@ -146,7 +146,7 @@ def _setattr_rec(d, k, v):
146146

147147
def _load_current_environment():
148148
with open(find_environment_path()) as f:
149-
environment = yaml.safe_load(f.read())
149+
environment = safe_load(f.read())
150150
return environment
151151

152152
_kf_dict_cache = {}

clrenv/load.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
"""
2+
This module exists to provide a faster, yet equally secure version of `safe_load`
3+
"""
4+
import yaml
5+
6+
try:
7+
# If available, use the C bindings for far, far faster loading
8+
# See: https://pyyaml.org/wiki/PyYAMLDocumentation
9+
from yaml import CSafeLoader as SafeLoader
10+
except ImportError:
11+
# If the C bindings aren't available, fall back to the "much slower" Python bindings
12+
from yaml import SafeLoader
13+
14+
15+
def safe_load(str_content):
16+
""" Safely load YAML, doing so quickly with C bindings if available.
17+
18+
By default, `yaml.safe_load()` uses the (slower) Python bindings.
19+
This method is a stand-in replacement that can be considerably faster.
20+
"""
21+
return yaml.load(str_content, Loader=SafeLoader)

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88

99
setup(name = "clrenv",
10-
version = "0.1.6",
10+
version = "0.1.7",
1111
description = "A tool to give easy access to environment yaml file to python.",
1212
author = "Color Genomics",
1313
author_email = "[email protected]",

0 commit comments

Comments
 (0)