From d8933533474afb7f697a4f3151864949378216de Mon Sep 17 00:00:00 2001 From: Jon Pentland Date: Mon, 26 Jun 2023 11:48:08 +0100 Subject: [PATCH 01/11] Rename tus temporary file to temporary blob location instead of iterating data --- src/plone/restapi/deserializer/dxfields.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/plone/restapi/deserializer/dxfields.py b/src/plone/restapi/deserializer/dxfields.py index 534291b872..6a52340739 100644 --- a/src/plone/restapi/deserializer/dxfields.py +++ b/src/plone/restapi/deserializer/dxfields.py @@ -31,6 +31,7 @@ import codecs import dateutil import html as html_parser +import os @implementer(IFieldDeserializer) @@ -238,6 +239,7 @@ class NamedFieldDeserializer(DefaultFieldDeserializer): def __call__(self, value): content_type = "application/octet-stream" filename = None + tus_filepath = None if isinstance(value, dict): if "data" not in value: # We are probably pushing the contents of a previous GET @@ -257,10 +259,11 @@ def __call__(self, value): elif isinstance(value, TUSUpload): content_type = value.metadata().get("content-type", content_type) filename = value.metadata().get("filename", filename) - data = value.open() + # Put an single byte in place. Blob file is moved below. + data = b'0' + tus_filepath = value.filepath else: data = value - # Convert if we have data if data: value = self.field._type( @@ -269,6 +272,15 @@ def __call__(self, value): else: value = None + # If it si a TUS upload, we rename the temporary file to the temp blob + # file. If the two files are on the same disk volume this will be a + # very quick operation + if tus_filepath: + try: + os.rename(tus_filepath, value._blob._p_blob_uncommitted) + except OSError: + os.copy(tus_filepath, value._blob._p_blob_uncommitted) + # Always validate to check for required fields self.field.validate(value) return value From 31d5b0a9bae5a7f9e290d46b8718f0c64ee19422 Mon Sep 17 00:00:00 2001 From: Jon Pentland Date: Mon, 26 Jun 2023 11:59:21 +0100 Subject: [PATCH 02/11] black/flake8 fixes --- src/plone/restapi/deserializer/dxfields.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/plone/restapi/deserializer/dxfields.py b/src/plone/restapi/deserializer/dxfields.py index 6a52340739..c39da6e2be 100644 --- a/src/plone/restapi/deserializer/dxfields.py +++ b/src/plone/restapi/deserializer/dxfields.py @@ -260,7 +260,7 @@ def __call__(self, value): content_type = value.metadata().get("content-type", content_type) filename = value.metadata().get("filename", filename) # Put an single byte in place. Blob file is moved below. - data = b'0' + data = b"0" tus_filepath = value.filepath else: data = value @@ -273,7 +273,7 @@ def __call__(self, value): value = None # If it si a TUS upload, we rename the temporary file to the temp blob - # file. If the two files are on the same disk volume this will be a + # file. If the two files are on the same disk volume this will be a # very quick operation if tus_filepath: try: From 36eee20c2cc3eeba8efafaaa2d28d1cef39ab8ae Mon Sep 17 00:00:00 2001 From: Jon Pentland Date: Mon, 26 Jun 2023 12:57:52 +0100 Subject: [PATCH 03/11] Use zodb rename_or_copy blob to handle final blob move --- src/plone/restapi/deserializer/dxfields.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/plone/restapi/deserializer/dxfields.py b/src/plone/restapi/deserializer/dxfields.py index c39da6e2be..bc4d199233 100644 --- a/src/plone/restapi/deserializer/dxfields.py +++ b/src/plone/restapi/deserializer/dxfields.py @@ -27,11 +27,11 @@ from zope.schema.interfaces import ITime from zope.schema.interfaces import ITimedelta from zope.schema.interfaces import IVocabularyTokenized +from ZODB.blob import rename_or_copy_blob import codecs import dateutil import html as html_parser -import os @implementer(IFieldDeserializer) @@ -276,10 +276,7 @@ def __call__(self, value): # file. If the two files are on the same disk volume this will be a # very quick operation if tus_filepath: - try: - os.rename(tus_filepath, value._blob._p_blob_uncommitted) - except OSError: - os.copy(tus_filepath, value._blob._p_blob_uncommitted) + rename_or_copy_blob(tus_filepath, value._blob._p_blob_uncommitted) # Always validate to check for required fields self.field.validate(value) From a5ae68f4a4c7836bddde3f2226a3b30d500c8697 Mon Sep 17 00:00:00 2001 From: Jon Pentland Date: Mon, 3 Jul 2023 15:29:43 +0100 Subject: [PATCH 04/11] =?UTF-8?q?Move=20the=20blob=20after=20validation=20?= =?UTF-8?q?to=20save=20time/memory.=20Move=20blob-move=20=C2=A0code=20to?= =?UTF-8?q?=20TUSUpload=20object?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plone/restapi/deserializer/dxfields.py | 18 ++++++++++-------- src/plone/restapi/services/content/tus.py | 5 +++++ 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/plone/restapi/deserializer/dxfields.py b/src/plone/restapi/deserializer/dxfields.py index bc4d199233..04c3c28ce9 100644 --- a/src/plone/restapi/deserializer/dxfields.py +++ b/src/plone/restapi/deserializer/dxfields.py @@ -27,7 +27,6 @@ from zope.schema.interfaces import ITime from zope.schema.interfaces import ITimedelta from zope.schema.interfaces import IVocabularyTokenized -from ZODB.blob import rename_or_copy_blob import codecs import dateutil @@ -239,7 +238,7 @@ class NamedFieldDeserializer(DefaultFieldDeserializer): def __call__(self, value): content_type = "application/octet-stream" filename = None - tus_filepath = None + tus = None if isinstance(value, dict): if "data" not in value: # We are probably pushing the contents of a previous GET @@ -261,7 +260,7 @@ def __call__(self, value): filename = value.metadata().get("filename", filename) # Put an single byte in place. Blob file is moved below. data = b"0" - tus_filepath = value.filepath + tus = value else: data = value # Convert if we have data @@ -272,14 +271,17 @@ def __call__(self, value): else: value = None - # If it si a TUS upload, we rename the temporary file to the temp blob - # file. If the two files are on the same disk volume this will be a - # very quick operation - if tus_filepath: - rename_or_copy_blob(tus_filepath, value._blob._p_blob_uncommitted) + # Always validate to check for required fields self.field.validate(value) + + # If it is a TUS upload, we rename the temporary file to the temp blob + # file. If the two files are on the same disk volume this will be a + # very quick operation + if tus: + tus.process_blob(value._blob) + return value diff --git a/src/plone/restapi/services/content/tus.py b/src/plone/restapi/services/content/tus.py index cd85343321..667050d853 100644 --- a/src/plone/restapi/services/content/tus.py +++ b/src/plone/restapi/services/content/tus.py @@ -22,6 +22,7 @@ from zope.lifecycleevent import ObjectCreatedEvent from zope.publisher.interfaces import IPublishTraverse from zope.publisher.interfaces import NotFound +from ZODB.blob import rename_or_copy_blob import json import os @@ -394,3 +395,7 @@ def expires(self): else: expiration = time.time() + self.expiration_period return formatdate(expiration, False, True) + + def process_blob(self, blob): + rename_or_copy_blob(self.filepath, blob._p_blob_uncommitted) + From 18e98bc0d2f3a328e0bc13466959954f122ad871 Mon Sep 17 00:00:00 2001 From: Dylan Jay Date: Thu, 23 Nov 2023 14:46:19 +0700 Subject: [PATCH 05/11] use storage adapter and put in test that shows validation reads file into memory --- src/plone/restapi/deserializer/dxfields.py | 19 +++---- .../restapi/services/content/configure.zcml | 6 +++ src/plone/restapi/services/content/tus.py | 13 ++++- src/plone/restapi/tests/test_tus.py | 50 +++++++++++++++++++ 4 files changed, 74 insertions(+), 14 deletions(-) diff --git a/src/plone/restapi/deserializer/dxfields.py b/src/plone/restapi/deserializer/dxfields.py index 04c3c28ce9..89a7a91b69 100644 --- a/src/plone/restapi/deserializer/dxfields.py +++ b/src/plone/restapi/deserializer/dxfields.py @@ -238,7 +238,7 @@ class NamedFieldDeserializer(DefaultFieldDeserializer): def __call__(self, value): content_type = "application/octet-stream" filename = None - tus = None + size = None if isinstance(value, dict): if "data" not in value: # We are probably pushing the contents of a previous GET @@ -258,9 +258,9 @@ def __call__(self, value): elif isinstance(value, TUSUpload): content_type = value.metadata().get("content-type", content_type) filename = value.metadata().get("filename", filename) - # Put an single byte in place. Blob file is moved below. - data = b"0" - tus = value + size = value.length + # Note there is a special TUSUploadStorage that will move instead reread the data + data = value else: data = value # Convert if we have data @@ -268,20 +268,15 @@ def __call__(self, value): value = self.field._type( data=data, contentType=content_type, filename=filename ) + if size is not None: + # Shortcut so NamedBlobFile doesn't have to reopen the file again + value.__dict__['size'] = size else: value = None - - # Always validate to check for required fields self.field.validate(value) - # If it is a TUS upload, we rename the temporary file to the temp blob - # file. If the two files are on the same disk volume this will be a - # very quick operation - if tus: - tus.process_blob(value._blob) - return value diff --git a/src/plone/restapi/services/content/configure.zcml b/src/plone/restapi/services/content/configure.zcml index 72073faafb..7f18aa5314 100644 --- a/src/plone/restapi/services/content/configure.zcml +++ b/src/plone/restapi/services/content/configure.zcml @@ -140,4 +140,10 @@ name="@tus-upload" /> + + diff --git a/src/plone/restapi/services/content/tus.py b/src/plone/restapi/services/content/tus.py index 667050d853..5baa1c4597 100644 --- a/src/plone/restapi/services/content/tus.py +++ b/src/plone/restapi/services/content/tus.py @@ -23,6 +23,8 @@ from zope.publisher.interfaces import IPublishTraverse from zope.publisher.interfaces import NotFound from ZODB.blob import rename_or_copy_blob +from plone.namedfile.interfaces import IStorage +from plone.namedfile.interfaces import NotStorable import json import os @@ -396,6 +398,13 @@ def expires(self): expiration = time.time() + self.expiration_period return formatdate(expiration, False, True) - def process_blob(self, blob): - rename_or_copy_blob(self.filepath, blob._p_blob_uncommitted) + +@implementer(IStorage) +class TUSUploadStorable: + """ Special storage to skip reading and writing of the upload since it's already on disk """ + def store(self, data, blob): + if not isinstance(data, TUSUpload): + raise NotStorable('Could not store data (not of "FileUpload").') + + rename_or_copy_blob(data.filepath, blob._p_blob_uncommitted) diff --git a/src/plone/restapi/tests/test_tus.py b/src/plone/restapi/tests/test_tus.py index bece6c679b..e5c86d3bf6 100644 --- a/src/plone/restapi/tests/test_tus.py +++ b/src/plone/restapi/tests/test_tus.py @@ -395,6 +395,56 @@ def test_tus_can_upload_text_file(self): ) self.assertEqual(response.status_code, 204) + def test_tus_can_upload_with_rereading(self): + # if tmp dir is on the same filesystem then we shouldn't have delay caused by reading and writing large file + + import ZODB.blob + old_open = ZODB.blob.Blob.open + blob_read = 0 + blob_write = 0 + + def count_open(self, mode="r"): + nonlocal blob_read, blob_write + blob_read += 1 if "r" in mode else 0 + blob_write += 1 if "w" in mode else 0 + return old_open(self, mode) + ZODB.blob.Blob.open = count_open + + # TODO: count NamedBlobFile._getData calls + + pdf_file_path = os.path.join(os.path.dirname(__file__), UPLOAD_PDF_FILENAME) + pdf_file_size = os.path.getsize(pdf_file_path) + metadata = _prepare_metadata(UPLOAD_PDF_FILENAME, UPLOAD_PDF_MIMETYPE) + response = self.api_session.post( + self.upload_url, + headers={ + "Tus-Resumable": "1.0.0", + "Upload-Length": str(pdf_file_size), + "Upload-Metadata": metadata, + }, + ) + self.assertEqual(response.status_code, 201) + location = response.headers["Location"] + + # upload the data with PATCH + with open(pdf_file_path, "rb") as pdf_file: + response = self.api_session.patch( + location, + headers={ + "Content-Type": "application/offset+octet-stream", + "Upload-Offset": "0", + "Tus-Resumable": "1.0.0", + }, + data=pdf_file, + ) + self.assertEqual(response.status_code, 204) + + self.assertEqual(blob_write, 1, "Slow write to blob instead of os rename. Should be only 1 on init") + self.assertEqual(blob_read, 0, "Validation is reading the whole blob in memory") + # TODO: would be better test to patch file read instead and ensure its not called? + + ZODB.blob.Blob.open = old_open + def test_tus_can_replace_pdf_file(self): # Create a test file self.file = api.content.create( From 833cbe62ca1462a50e295330ba041dbf5533bd17 Mon Sep 17 00:00:00 2001 From: Dylan Jay Date: Thu, 23 Nov 2023 16:02:25 +0700 Subject: [PATCH 06/11] fix pep8 --- src/plone/restapi/services/content/tus.py | 1 - src/plone/restapi/tests/test_tus.py | 2 -- 2 files changed, 3 deletions(-) diff --git a/src/plone/restapi/services/content/tus.py b/src/plone/restapi/services/content/tus.py index 5baa1c4597..9819f6b2b0 100644 --- a/src/plone/restapi/services/content/tus.py +++ b/src/plone/restapi/services/content/tus.py @@ -407,4 +407,3 @@ def store(self, data, blob): raise NotStorable('Could not store data (not of "FileUpload").') rename_or_copy_blob(data.filepath, blob._p_blob_uncommitted) - diff --git a/src/plone/restapi/tests/test_tus.py b/src/plone/restapi/tests/test_tus.py index e5c86d3bf6..a4870933be 100644 --- a/src/plone/restapi/tests/test_tus.py +++ b/src/plone/restapi/tests/test_tus.py @@ -410,8 +410,6 @@ def count_open(self, mode="r"): return old_open(self, mode) ZODB.blob.Blob.open = count_open - # TODO: count NamedBlobFile._getData calls - pdf_file_path = os.path.join(os.path.dirname(__file__), UPLOAD_PDF_FILENAME) pdf_file_size = os.path.getsize(pdf_file_path) metadata = _prepare_metadata(UPLOAD_PDF_FILENAME, UPLOAD_PDF_MIMETYPE) From c2ec9ec28ba6b53864b57ed50ff729ef4aaedd5f Mon Sep 17 00:00:00 2001 From: Dylan Jay Date: Thu, 23 Nov 2023 16:05:10 +0700 Subject: [PATCH 07/11] format with black --- src/plone/restapi/services/content/tus.py | 6 ++---- src/plone/restapi/tests/test_tus.py | 10 +++++++--- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/plone/restapi/services/content/tus.py b/src/plone/restapi/services/content/tus.py index 9819f6b2b0..da2c365629 100644 --- a/src/plone/restapi/services/content/tus.py +++ b/src/plone/restapi/services/content/tus.py @@ -167,7 +167,6 @@ class UploadHead(UploadFileBase): """TUS upload endpoint for handling HEAD requests""" def reply(self): - tus_upload = self.tus_upload() if tus_upload is None: return self.error("Not Found", "", 404) @@ -191,7 +190,6 @@ class UploadPatch(UploadFileBase): """TUS upload endpoint for handling PATCH requests""" def reply(self): - tus_upload = self.tus_upload() if tus_upload is None: return self.error("Not Found", "", 404) @@ -288,7 +286,6 @@ def create_or_modify_content(self, tus_upload): class TUSUpload: - file_prefix = "tus_upload_" expiration_period = 60 * 60 finished = False @@ -401,7 +398,8 @@ def expires(self): @implementer(IStorage) class TUSUploadStorable: - """ Special storage to skip reading and writing of the upload since it's already on disk """ + """Special storage to skip reading and writing of the upload since it's already on disk""" + def store(self, data, blob): if not isinstance(data, TUSUpload): raise NotStorable('Could not store data (not of "FileUpload").') diff --git a/src/plone/restapi/tests/test_tus.py b/src/plone/restapi/tests/test_tus.py index a4870933be..bba8597d6b 100644 --- a/src/plone/restapi/tests/test_tus.py +++ b/src/plone/restapi/tests/test_tus.py @@ -51,7 +51,6 @@ def _prepare_metadata(filename, content_type): class TestTUS(unittest.TestCase): - layer = PLONE_RESTAPI_DX_FUNCTIONAL_TESTING def setUp(self): @@ -399,6 +398,7 @@ def test_tus_can_upload_with_rereading(self): # if tmp dir is on the same filesystem then we shouldn't have delay caused by reading and writing large file import ZODB.blob + old_open = ZODB.blob.Blob.open blob_read = 0 blob_write = 0 @@ -408,6 +408,7 @@ def count_open(self, mode="r"): blob_read += 1 if "r" in mode else 0 blob_write += 1 if "w" in mode else 0 return old_open(self, mode) + ZODB.blob.Blob.open = count_open pdf_file_path = os.path.join(os.path.dirname(__file__), UPLOAD_PDF_FILENAME) @@ -437,7 +438,11 @@ def count_open(self, mode="r"): ) self.assertEqual(response.status_code, 204) - self.assertEqual(blob_write, 1, "Slow write to blob instead of os rename. Should be only 1 on init") + self.assertEqual( + blob_write, + 1, + "Slow write to blob instead of os rename. Should be only 1 on init", + ) self.assertEqual(blob_read, 0, "Validation is reading the whole blob in memory") # TODO: would be better test to patch file read instead and ensure its not called? @@ -616,7 +621,6 @@ class CORSTestPolicy(CORSPolicy): class TestTUSUploadWithCORS(unittest.TestCase): - layer = PLONE_RESTAPI_DX_FUNCTIONAL_TESTING def setUp(self): From a559fcae46c011a98d1a6a16c8891370b116b3e9 Mon Sep 17 00:00:00 2001 From: Dylan Jay Date: Thu, 23 Nov 2023 16:06:38 +0700 Subject: [PATCH 08/11] more black --- src/plone/restapi/deserializer/dxfields.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plone/restapi/deserializer/dxfields.py b/src/plone/restapi/deserializer/dxfields.py index 89a7a91b69..188a4da8dd 100644 --- a/src/plone/restapi/deserializer/dxfields.py +++ b/src/plone/restapi/deserializer/dxfields.py @@ -270,7 +270,7 @@ def __call__(self, value): ) if size is not None: # Shortcut so NamedBlobFile doesn't have to reopen the file again - value.__dict__['size'] = size + value.__dict__["size"] = size else: value = None From 90bd5c4f408cca1f931011106120441f9a9a4519 Mon Sep 17 00:00:00 2001 From: Dylan Jay Date: Thu, 23 Nov 2023 16:07:38 +0700 Subject: [PATCH 09/11] zpretty --- base.cfg | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/base.cfg b/base.cfg index 8d85e44ff7..8e867da4cc 100644 --- a/base.cfg +++ b/base.cfg @@ -13,6 +13,7 @@ parts = sphinx-python deploy-to-heroku omelette + vscode zpretty develop = . sources-dir = extras @@ -172,6 +173,14 @@ input = inline: output = ${buildout:directory}/bin/zpretty-run mode = 755 +[vscode] +recipe = collective.recipe.vscode >= 0.1.8 +eggs = + Plone + Pillow + plone.app.debugtoolbar + plone.restapi [test] + [sources] plone.rest = git git://github.com/plone/plone.rest.git pushurl=git@github.com:plone/plone.rest.git branch=master plone.schema = git git://github.com/plone/plone.schema.git pushurl=git@github.com:plone/plone.schema.git branch=master From dabf6d91537579f086cc4abc53e1fb209eaf9078 Mon Sep 17 00:00:00 2001 From: Dylan Jay Date: Fri, 24 Nov 2023 09:54:49 +0700 Subject: [PATCH 10/11] fix lint --- src/plone/restapi/services/content/configure.zcml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plone/restapi/services/content/configure.zcml b/src/plone/restapi/services/content/configure.zcml index 7f18aa5314..29418041ba 100644 --- a/src/plone/restapi/services/content/configure.zcml +++ b/src/plone/restapi/services/content/configure.zcml @@ -141,8 +141,8 @@ /> From 0792e4771b685759a6bd6dc95e774d851cc2354e Mon Sep 17 00:00:00 2001 From: Dylan Jay Date: Sun, 7 Jul 2024 14:27:52 +0700 Subject: [PATCH 11/11] use mock --- src/plone/restapi/tests/test_tus.py | 63 ++++++++++++++--------------- 1 file changed, 30 insertions(+), 33 deletions(-) diff --git a/src/plone/restapi/tests/test_tus.py b/src/plone/restapi/tests/test_tus.py index bba8597d6b..f157e6fd18 100644 --- a/src/plone/restapi/tests/test_tus.py +++ b/src/plone/restapi/tests/test_tus.py @@ -409,44 +409,41 @@ def count_open(self, mode="r"): blob_write += 1 if "w" in mode else 0 return old_open(self, mode) - ZODB.blob.Blob.open = count_open + with unittest.mock.patch.object(ZODB.blob.Blob, 'open', count_open): - pdf_file_path = os.path.join(os.path.dirname(__file__), UPLOAD_PDF_FILENAME) - pdf_file_size = os.path.getsize(pdf_file_path) - metadata = _prepare_metadata(UPLOAD_PDF_FILENAME, UPLOAD_PDF_MIMETYPE) - response = self.api_session.post( - self.upload_url, - headers={ - "Tus-Resumable": "1.0.0", - "Upload-Length": str(pdf_file_size), - "Upload-Metadata": metadata, - }, - ) - self.assertEqual(response.status_code, 201) - location = response.headers["Location"] - - # upload the data with PATCH - with open(pdf_file_path, "rb") as pdf_file: - response = self.api_session.patch( - location, + pdf_file_path = os.path.join(os.path.dirname(__file__), UPLOAD_PDF_FILENAME) + pdf_file_size = os.path.getsize(pdf_file_path) + metadata = _prepare_metadata(UPLOAD_PDF_FILENAME, UPLOAD_PDF_MIMETYPE) + response = self.api_session.post( + self.upload_url, headers={ - "Content-Type": "application/offset+octet-stream", - "Upload-Offset": "0", "Tus-Resumable": "1.0.0", + "Upload-Length": str(pdf_file_size), + "Upload-Metadata": metadata, }, - data=pdf_file, ) - self.assertEqual(response.status_code, 204) - - self.assertEqual( - blob_write, - 1, - "Slow write to blob instead of os rename. Should be only 1 on init", - ) - self.assertEqual(blob_read, 0, "Validation is reading the whole blob in memory") - # TODO: would be better test to patch file read instead and ensure its not called? - - ZODB.blob.Blob.open = old_open + self.assertEqual(response.status_code, 201) + location = response.headers["Location"] + + # upload the data with PATCH + with open(pdf_file_path, "rb") as pdf_file: + response = self.api_session.patch( + location, + headers={ + "Content-Type": "application/offset+octet-stream", + "Upload-Offset": "0", + "Tus-Resumable": "1.0.0", + }, + data=pdf_file, + ) + self.assertEqual(response.status_code, 204) + + self.assertEqual( + blob_write, + 1, + "Slow write to blob instead of os rename. Should be only 1 on init", + ) + self.assertEqual(blob_read, 0, "Validation is reading the whole blob in memory") def test_tus_can_replace_pdf_file(self): # Create a test file