Skip to content

Commit 5c2b70c

Browse files
authored
Add exclusive write (#651)
1 parent 290f572 commit 5c2b70c

File tree

3 files changed

+66
-19
lines changed

3 files changed

+66
-19
lines changed

gcsfs/core.py

+51-19
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,8 @@ def _location():
115115
-------
116116
valid http location
117117
"""
118-
_emulator_location = os.getenv("STORAGE_EMULATOR_HOST", None)
119-
if _emulator_location:
118+
_emulator_location = os.getenv("STORAGE_EMULATOR_HOST", "")
119+
if _emulator_location not in {"default", "", None}:
120120
if not any(
121121
_emulator_location.startswith(scheme) for scheme in ("http://", "https://")
122122
):
@@ -222,6 +222,10 @@ class GCSFileSystem(asyn.AsyncFileSystem):
222222
In the default case the cache is never expired. This may be controlled via the ``cache_timeout``
223223
GCSFileSystem parameter or via explicit calls to ``GCSFileSystem.invalidate_cache``.
224224
225+
NOTE on "exclusive" mode: mode=="create"" (in pipe and put) and open(mode="xb") are supported on an
226+
experimental basis. The test harness does not currently support this, so use at your
227+
own risk.
228+
225229
Parameters
226230
----------
227231
project : string
@@ -1332,13 +1336,14 @@ async def _pipe_file(
13321336
content_type="application/octet-stream",
13331337
fixed_key_metadata=None,
13341338
chunksize=50 * 2**20,
1339+
mode="overwrite",
13351340
):
13361341
# enforce blocksize should be a multiple of 2**18
13371342
consistency = consistency or self.consistency
13381343
bucket, key, generation = self.split_path(path)
13391344
size = len(data)
13401345
out = None
1341-
if size < 5 * 2**20:
1346+
if size < chunksize:
13421347
location = await simple_upload(
13431348
self,
13441349
bucket,
@@ -1348,6 +1353,7 @@ async def _pipe_file(
13481353
consistency,
13491354
content_type,
13501355
fixed_key_metadata=fixed_key_metadata,
1356+
mode=mode,
13511357
)
13521358
else:
13531359
location = await initiate_upload(
@@ -1357,12 +1363,20 @@ async def _pipe_file(
13571363
content_type,
13581364
metadata,
13591365
fixed_key_metadata=fixed_key_metadata,
1366+
mode=mode,
13601367
)
1361-
for offset in range(0, len(data), chunksize):
1362-
bit = data[offset : offset + chunksize]
1363-
out = await upload_chunk(
1364-
self, location, bit, offset, size, content_type
1368+
try:
1369+
for offset in range(0, len(data), chunksize):
1370+
bit = data[offset : offset + chunksize]
1371+
out = await upload_chunk(
1372+
self, location, bit, offset, size, content_type
1373+
)
1374+
except Exception:
1375+
await self._call(
1376+
"DELETE",
1377+
location.replace("&ifGenerationMatch=0", ""),
13651378
)
1379+
raise
13661380

13671381
checker = get_consistency_checker(consistency)
13681382
checker.update(data)
@@ -1381,6 +1395,7 @@ async def _put_file(
13811395
chunksize=50 * 2**20,
13821396
callback=None,
13831397
fixed_key_metadata=None,
1398+
mode="overwrite",
13841399
**kwargs,
13851400
):
13861401
# enforce blocksize should be a multiple of 2**18
@@ -1407,6 +1422,7 @@ async def _put_file(
14071422
metadatain=metadata,
14081423
content_type=content_type,
14091424
fixed_key_metadata=fixed_key_metadata,
1425+
mode=mode,
14101426
)
14111427
callback.absolute_update(size)
14121428

@@ -1418,18 +1434,26 @@ async def _put_file(
14181434
content_type,
14191435
metadata=metadata,
14201436
fixed_key_metadata=fixed_key_metadata,
1437+
mode=mode,
14211438
)
14221439
offset = 0
1423-
while True:
1424-
bit = f0.read(chunksize)
1425-
if not bit:
1426-
break
1427-
out = await upload_chunk(
1428-
self, location, bit, offset, size, content_type
1440+
try:
1441+
while True:
1442+
bit = f0.read(chunksize)
1443+
if not bit:
1444+
break
1445+
out = await upload_chunk(
1446+
self, location, bit, offset, size, content_type
1447+
)
1448+
offset += len(bit)
1449+
callback.absolute_update(offset)
1450+
checker.update(bit)
1451+
except Exception:
1452+
await self._call(
1453+
"DELETE",
1454+
self.location.replace("&ifGenerationMatch=0", ""),
14291455
)
1430-
offset += len(bit)
1431-
callback.absolute_update(offset)
1432-
checker.update(bit)
1456+
raise
14331457

14341458
checker.validate_json_response(out)
14351459

@@ -1780,7 +1804,7 @@ def __init__(
17801804
self.fixed_key_metadata = _convert_fixed_key_metadata(det, from_google=True)
17811805
self.fixed_key_metadata.update(fixed_key_metadata or {})
17821806
self.timeout = timeout
1783-
if mode == "wb":
1807+
if mode in {"wb", "xb"}:
17841808
if self.blocksize < GCS_MIN_BLOCK_SIZE:
17851809
warnings.warn("Setting block size to minimum value, 2**18")
17861810
self.blocksize = GCS_MIN_BLOCK_SIZE
@@ -1886,6 +1910,7 @@ def _initiate_upload(self):
18861910
self.content_type,
18871911
self.metadata,
18881912
self.fixed_key_metadata,
1913+
mode="create" if "x" in self.mode else "overwrite",
18891914
timeout=self.timeout,
18901915
)
18911916

@@ -1898,7 +1923,7 @@ def discard(self):
18981923
return
18991924
self.gcsfs.call(
19001925
"DELETE",
1901-
self.location,
1926+
self.location.replace("&ifGenerationMatch=0", ""),
19021927
)
19031928
self.location = None
19041929
self.closed = True
@@ -1918,6 +1943,7 @@ def _simple_upload(self):
19181943
self.consistency,
19191944
self.content_type,
19201945
self.fixed_key_metadata,
1946+
mode="create" if "x" in self.mode else "overwrite",
19211947
timeout=self.timeout,
19221948
)
19231949

@@ -1989,17 +2015,20 @@ async def initiate_upload(
19892015
content_type="application/octet-stream",
19902016
metadata=None,
19912017
fixed_key_metadata=None,
2018+
mode="overwrie",
19922019
):
19932020
j = {"name": key}
19942021
if metadata:
19952022
j["metadata"] = metadata
2023+
kw = {"ifGenerationMatch": "0"} if mode == "create" else {}
19962024
j.update(_convert_fixed_key_metadata(fixed_key_metadata))
19972025
headers, _ = await fs._call(
19982026
method="POST",
1999-
path=f"{fs._location}/upload/storage/v1/b/{quote(bucket)}/o",
2027+
path=f"{fs._location}/upload/storage/v1/b/{quote(bucket)}/o?name={quote(key)}",
20002028
uploadType="resumable",
20012029
json=j,
20022030
headers={"X-Upload-Content-Type": content_type},
2031+
**kw,
20032032
)
20042033
loc = headers["Location"]
20052034
out = loc[0] if isinstance(loc, list) else loc # <- for CVR responses
@@ -2017,12 +2046,14 @@ async def simple_upload(
20172046
consistency=None,
20182047
content_type="application/octet-stream",
20192048
fixed_key_metadata=None,
2049+
mode="overwrite",
20202050
):
20212051
checker = get_consistency_checker(consistency)
20222052
path = f"{fs._location}/upload/storage/v1/b/{quote(bucket)}/o"
20232053
metadata = {"name": key}
20242054
if metadatain is not None:
20252055
metadata["metadata"] = metadatain
2056+
kw = {"ifGenerationMatch": "0"} if mode == "create" else {}
20262057
metadata.update(_convert_fixed_key_metadata(fixed_key_metadata))
20272058
metadata = json.dumps(metadata)
20282059
template = (
@@ -2039,6 +2070,7 @@ async def simple_upload(
20392070
headers={"Content-Type": 'multipart/related; boundary="==0=="'},
20402071
data=UnclosableBytesIO(data),
20412072
json_out=True,
2073+
**kw,
20422074
)
20432075
checker.update(datain)
20442076
checker.validate_json_response(j)

gcsfs/retry.py

+2
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,8 @@ def validate_response(status, content, path, args=None):
109109

110110
if status == 403:
111111
raise OSError(f"Forbidden: {path}\n{msg}")
112+
elif status == 412:
113+
raise FileExistsError(path)
112114
elif status == 502:
113115
raise requests.exceptions.ProxyError()
114116
elif "invalid" in str(msg):

gcsfs/tests/test_core.py

+13
Original file line numberDiff line numberDiff line change
@@ -1532,3 +1532,16 @@ def test_sign(gcs, monkeypatch):
15321532

15331533
response = requests.get(result)
15341534
assert response.text == "This is a test string"
1535+
1536+
1537+
@pytest.mark.xfail(reason="emulator does not support condition")
1538+
def test_write_x_mpu(gcs):
1539+
fn = TEST_BUCKET + "/test.file"
1540+
with gcs.open(fn, mode="xb", block_size=5 * 2**20) as f:
1541+
assert f.mode == "xb"
1542+
f.write(b"0" * 5 * 2**20)
1543+
f.write(b"done")
1544+
with pytest.raises(FileExistsError):
1545+
with gcs.open(fn, mode="xb", block_size=5 * 2**20) as f:
1546+
f.write(b"0" * 5 * 2**20)
1547+
f.write(b"done")

0 commit comments

Comments
 (0)