Skip to content

Commit 736ee5a

Browse files
Platobefiop
andauthoredJan 25, 2022
persist metadata on append (#575)
Co-authored-by: Ruslan Kuprieiev <kupruser@gmail.com>
1 parent 3545677 commit 736ee5a

File tree

3 files changed

+78
-2
lines changed

3 files changed

+78
-2
lines changed
 

‎.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@ __pycache__
77
dist/
88
*.egg-info
99
build/
10+
venv/

‎s3fs/core.py

+45-2
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,25 @@ def setup_logging(level=None):
5252

5353
_VALID_FILE_MODES = {"r", "w", "a", "rb", "wb", "ab"}
5454

55+
_PRESERVE_KWARGS = [
56+
"CacheControl",
57+
"ContentDisposition",
58+
"ContentEncoding",
59+
"ContentLanguage",
60+
"ContentLength",
61+
"ContentType",
62+
"Expires",
63+
"WebsiteRedirectLocation",
64+
"ServerSideEncryption",
65+
"SSECustomerAlgorithm",
66+
"SSEKMSKeyId",
67+
"BucketKeyEnabled",
68+
"StorageClass",
69+
"ObjectLockMode",
70+
"ObjectLockRetainUntilDate",
71+
"ObjectLockLegalHoldStatus",
72+
"Metadata",
73+
]
5574

5675
key_acls = {
5776
"private",
@@ -1047,7 +1066,7 @@ async def _info(self, path, bucket=None, key=None, refresh=False, version_id=Non
10471066
"size": out["ContentLength"],
10481067
"name": "/".join([bucket, key]),
10491068
"type": "file",
1050-
"StorageClass": "STANDARD",
1069+
"StorageClass": out.get("StorageClass", "STANDARD"),
10511070
"VersionId": out.get("VersionId"),
10521071
"ContentType": out.get("ContentType"),
10531072
}
@@ -1817,14 +1836,38 @@ def __init__(
18171836
self.append_block = False
18181837

18191838
if "a" in mode and s3.exists(path):
1820-
loc = s3.info(path)["size"]
1839+
# See:
1840+
# put: https://boto3.amazonaws.com/v1/documentation/api/latest
1841+
# /reference/services/s3.html#S3.Client.put_object
1842+
#
1843+
# head: https://boto3.amazonaws.com/v1/documentation/api/latest
1844+
# /reference/services/s3.html#S3.Client.head_object
1845+
head = self._call_s3(
1846+
"head_object",
1847+
self.kwargs,
1848+
Bucket=bucket,
1849+
Key=key,
1850+
**version_id_kw(version_id),
1851+
**self.req_kw,
1852+
)
1853+
1854+
head = {
1855+
key: value
1856+
for key, value in head.items()
1857+
if key in _PRESERVE_KWARGS and key not in self.s3_additional_kwargs
1858+
}
1859+
1860+
loc = head.pop("ContentLength")
18211861
if loc < 5 * 2 ** 20:
18221862
# existing file too small for multi-upload: download
18231863
self.write(self.fs.cat(self.path))
18241864
else:
18251865
self.append_block = True
18261866
self.loc = loc
18271867

1868+
# Reflect head
1869+
self.s3_additional_kwargs.update(head)
1870+
18281871
if "r" in mode and "ETag" in self.details:
18291872
self.req_kw["IfMatch"] = self.details["ETag"]
18301873

‎s3fs/tests/test_s3fs.py

+32
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
import moto
1616
from itertools import chain
1717
import fsspec.core
18+
from dateutil.tz import tzutc
19+
1820
import s3fs.core
1921
from s3fs.core import S3FileSystem
2022
from s3fs.utils import ignoring, SSEParams
@@ -1290,6 +1292,36 @@ def test_append(s3):
12901292
assert f.tell() == 20 * 2 ** 20 + 5
12911293
assert s3.cat(a) == b"a" * 10 * 2 ** 20 + b"extra" + b"b" * 10 * 2 ** 20
12921294

1295+
# Keep Head Metadata
1296+
head = dict(
1297+
CacheControl="public",
1298+
ContentDisposition="string",
1299+
ContentEncoding="gzip",
1300+
ContentLanguage="ru-RU",
1301+
ContentType="text/csv",
1302+
Expires=datetime.datetime(2015, 1, 1, 0, 0, tzinfo=tzutc()),
1303+
Metadata={"string": "string"},
1304+
ServerSideEncryption="AES256",
1305+
StorageClass="REDUCED_REDUNDANCY",
1306+
WebsiteRedirectLocation="https://www.example.com/",
1307+
BucketKeyEnabled=False,
1308+
)
1309+
with s3.open(a, "wb", **head) as f:
1310+
f.write(b"data")
1311+
1312+
with s3.open(a, "ab") as f:
1313+
f.write(b"other")
1314+
1315+
with s3.open(a) as f:
1316+
filehead = {
1317+
k: v
1318+
for k, v in f._call_s3(
1319+
"head_object", f.kwargs, Bucket=f.bucket, Key=f.key
1320+
).items()
1321+
if k in head
1322+
}
1323+
assert filehead == head
1324+
12931325

12941326
def test_bigger_than_block_read(s3):
12951327
with s3.open(test_bucket_name + "/2014-01-01.csv", "rb", block_size=3) as f:

0 commit comments

Comments
 (0)