33import errno
44import io
55import logging
6+ import math
67import mimetypes
78import os
89import socket
@@ -69,6 +70,8 @@ def setup_logging(level=None):
6970 ResponseParserError ,
7071)
7172
73+ MAX_UPLOAD_PARTS = 10_000 # maximum number of parts for S3 multipart upload
74+
7275if ClientPayloadError is not None :
7376 S3_RETRYABLE_ERRORS += (ClientPayloadError ,)
7477
@@ -174,6 +177,18 @@ def _coalesce_version_id(*args):
174177 return version_ids .pop ()
175178
176179
180+ def calculate_chunksize (filesize , chunksize = None , max_parts = MAX_UPLOAD_PARTS ) -> int :
181+ if chunksize is None :
182+ chunksize = 50 * 2 ** 20 # default chunksize set to 50 MiB
183+ required_chunks = math .ceil (filesize / chunksize )
184+ # increase chunksize to fit within the max_parts limit
185+ if required_chunks > max_parts :
186+ # S3 supports uploading objects up to 5 TiB in size,
187+ # so each chunk can be up to ~524 MiB.
188+ chunksize = math .ceil (filesize / max_parts )
189+ return chunksize
190+
191+
177192class S3FileSystem (AsyncFileSystem ):
178193 """
179194 Access S3 as if it were a file system.
@@ -1242,7 +1257,7 @@ async def _put_file(
12421257 lpath ,
12431258 rpath ,
12441259 callback = _DEFAULT_CALLBACK ,
1245- chunksize = 50 * 2 ** 20 ,
1260+ chunksize = None ,
12461261 max_concurrency = None ,
12471262 mode = "overwrite" ,
12481263 ** kwargs ,
@@ -1270,6 +1285,7 @@ async def _put_file(
12701285 if content_type is not None :
12711286 kwargs ["ContentType" ] = content_type
12721287
1288+ chunksize = calculate_chunksize (size , chunksize = chunksize )
12731289 with open (lpath , "rb" ) as f0 :
12741290 if size < min (5 * 2 ** 30 , 2 * chunksize ):
12751291 chunk = f0 .read ()
@@ -1288,8 +1304,8 @@ async def _put_file(
12881304 key ,
12891305 mpu ,
12901306 f0 ,
1307+ chunksize ,
12911308 callback = callback ,
1292- chunksize = chunksize ,
12931309 max_concurrency = max_concurrency ,
12941310 )
12951311 parts = [
@@ -1317,8 +1333,8 @@ async def _upload_file_part_concurrent(
13171333 key ,
13181334 mpu ,
13191335 f0 ,
1336+ chunksize ,
13201337 callback = _DEFAULT_CALLBACK ,
1321- chunksize = 50 * 2 ** 20 ,
13221338 max_concurrency = None ,
13231339 ):
13241340 max_concurrency = max_concurrency or self .max_concurrency
0 commit comments