Skip to content

Commit 32428a2

Browse files
committed
Avoid redundant downloading and decompressing across processes
1 parent 036f593 commit 32428a2

File tree

2 files changed

+19
-6
lines changed

2 files changed

+19
-6
lines changed

hanlp/utils/io_util.py

+18-5
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,13 @@ def tempdir_human():
9090
return tempdir(now_filename())
9191

9292

93+
def temp_lock(path):
94+
from filelock import FileLock
95+
import hashlib
96+
lock = FileLock(f"{tempdir()}/.{hashlib.md5(path.encode('utf8')).hexdigest()}.lock")
97+
return lock
98+
99+
93100
def hanlp_home_default():
94101
"""Default data directory depending on the platform and environment variables"""
95102
if windows():
@@ -292,6 +299,7 @@ def get_resource(path: str, save_dir=hanlp_home(), extract=True, prefix=HANLP_UR
292299
The real path to the resource.
293300
294301
"""
302+
_path = path
295303
path = hanlp.pretrained.ALL.get(path, path)
296304
anchor: str = None
297305
compressed = None
@@ -333,12 +341,17 @@ def get_resource(path: str, save_dir=hanlp_home(), extract=True, prefix=HANLP_UR
333341
# realpath is where its path after exaction
334342
if compressed:
335343
realpath += compressed
336-
if not os.path.isfile(realpath):
337-
path = download(url=path, save_path=realpath, verbose=verbose)
338-
else:
339-
path = realpath
344+
with temp_lock(path):
345+
if not os.path.isfile(realpath):
346+
path = download(url=path, save_path=realpath, verbose=verbose)
347+
else:
348+
path = realpath
340349
if extract and compressed:
341-
path = uncompress(path, verbose=verbose)
350+
with temp_lock(path):
351+
if os.path.isfile(path):
352+
path = uncompress(path, verbose=verbose)
353+
else: # other process must have already decompressed it and deleted it
354+
return get_resource(_path, save_dir, extract, prefix, append_location, verbose)
342355
if anchor:
343356
path = path_join(path, anchor)
344357

hanlp/version.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# Author: hankcs
33
# Date: 2019-12-28 19:26
44

5-
__version__ = '2.1.0-beta.61'
5+
__version__ = '2.1.0-beta.62'
66
"""HanLP version"""
77

88

0 commit comments

Comments
 (0)