Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TUS temporary file optimization (mv instead of read on same filesystem) #1690

Draft
wants to merge 11 commits into
base: main
Choose a base branch
from
9 changes: 9 additions & 0 deletions base.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ parts =
sphinx-python
deploy-to-heroku
omelette
vscode
zpretty
develop = .
sources-dir = extras
Expand Down Expand Up @@ -172,6 +173,14 @@ input = inline:
output = ${buildout:directory}/bin/zpretty-run
mode = 755

[vscode]
recipe = collective.recipe.vscode >= 0.1.8
eggs =
Plone
Pillow
plone.app.debugtoolbar
plone.restapi [test]

[sources]
plone.rest = git git://github.com/plone/plone.rest.git [email protected]:plone/plone.rest.git branch=master
plone.schema = git git://github.com/plone/plone.schema.git [email protected]:plone/plone.schema.git branch=master
Expand Down
10 changes: 8 additions & 2 deletions src/plone/restapi/deserializer/dxfields.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,7 @@ class NamedFieldDeserializer(DefaultFieldDeserializer):
def __call__(self, value):
content_type = "application/octet-stream"
filename = None
size = None
if isinstance(value, dict):
if "data" not in value:
# We are probably pushing the contents of a previous GET
Expand All @@ -257,20 +258,25 @@ def __call__(self, value):
elif isinstance(value, TUSUpload):
content_type = value.metadata().get("content-type", content_type)
filename = value.metadata().get("filename", filename)
data = value.open()
size = value.length
# Note there is a special TUSUploadStorage that will move instead reread the data
data = value
else:
data = value

# Convert if we have data
if data:
value = self.field._type(
data=data, contentType=content_type, filename=filename
)
if size is not None:
# Shortcut so NamedBlobFile doesn't have to reopen the file again
value.__dict__["size"] = size
else:
value = None

# Always validate to check for required fields
self.field.validate(value)

return value


Expand Down
6 changes: 6 additions & 0 deletions src/plone/restapi/services/content/configure.zcml
Original file line number Diff line number Diff line change
Expand Up @@ -140,4 +140,10 @@
name="@tus-upload"
/>

<utility
provides="plone.namedfile.interfaces.IStorage"
name="plone.restapi.services.content.tus.TUSUpload"
factory=".tus.TUSUploadStorable"
/>

</configure>
17 changes: 14 additions & 3 deletions src/plone/restapi/services/content/tus.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
from zope.lifecycleevent import ObjectCreatedEvent
from zope.publisher.interfaces import IPublishTraverse
from zope.publisher.interfaces import NotFound
from ZODB.blob import rename_or_copy_blob
from plone.namedfile.interfaces import IStorage
from plone.namedfile.interfaces import NotStorable

import json
import os
Expand Down Expand Up @@ -164,7 +167,6 @@ class UploadHead(UploadFileBase):
"""TUS upload endpoint for handling HEAD requests"""

def reply(self):

tus_upload = self.tus_upload()
if tus_upload is None:
return self.error("Not Found", "", 404)
Expand All @@ -188,7 +190,6 @@ class UploadPatch(UploadFileBase):
"""TUS upload endpoint for handling PATCH requests"""

def reply(self):

tus_upload = self.tus_upload()
if tus_upload is None:
return self.error("Not Found", "", 404)
Expand Down Expand Up @@ -285,7 +286,6 @@ def create_or_modify_content(self, tus_upload):


class TUSUpload:

file_prefix = "tus_upload_"
expiration_period = 60 * 60
finished = False
Expand Down Expand Up @@ -394,3 +394,14 @@ def expires(self):
else:
expiration = time.time() + self.expiration_period
return formatdate(expiration, False, True)


@implementer(IStorage)
class TUSUploadStorable:
"""Special storage to skip reading and writing of the upload since it's already on disk"""

def store(self, data, blob):
if not isinstance(data, TUSUpload):
raise NotStorable('Could not store data (not of "FileUpload").')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
raise NotStorable('Could not store data (not of "FileUpload").')
raise NotStorable('Could not store data (not of type "TUSUpload").')

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@djay Now that I'm looking at things more carefully, I'm wondering if this special storage adapter is really needed.

Previously, an open file was passed into the NamedBlobFile constructor. This should end up using the FileDescriptorStorable which uses blob.consumeFile, which is already using rename_or_copy_blob.


rename_or_copy_blob(data.filepath, blob._p_blob_uncommitted)
56 changes: 54 additions & 2 deletions src/plone/restapi/tests/test_tus.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ def _prepare_metadata(filename, content_type):


class TestTUS(unittest.TestCase):

layer = PLONE_RESTAPI_DX_FUNCTIONAL_TESTING

def setUp(self):
Expand Down Expand Up @@ -395,6 +394,60 @@ def test_tus_can_upload_text_file(self):
)
self.assertEqual(response.status_code, 204)

def test_tus_can_upload_with_rereading(self):
# if tmp dir is on the same filesystem then we shouldn't have delay caused by reading and writing large file

import ZODB.blob

old_open = ZODB.blob.Blob.open
blob_read = 0
blob_write = 0

def count_open(self, mode="r"):
nonlocal blob_read, blob_write
blob_read += 1 if "r" in mode else 0
blob_write += 1 if "w" in mode else 0
return old_open(self, mode)

ZODB.blob.Blob.open = count_open

pdf_file_path = os.path.join(os.path.dirname(__file__), UPLOAD_PDF_FILENAME)
pdf_file_size = os.path.getsize(pdf_file_path)
metadata = _prepare_metadata(UPLOAD_PDF_FILENAME, UPLOAD_PDF_MIMETYPE)
response = self.api_session.post(
self.upload_url,
headers={
"Tus-Resumable": "1.0.0",
"Upload-Length": str(pdf_file_size),
"Upload-Metadata": metadata,
},
)
self.assertEqual(response.status_code, 201)
location = response.headers["Location"]

# upload the data with PATCH
with open(pdf_file_path, "rb") as pdf_file:
response = self.api_session.patch(
location,
headers={
"Content-Type": "application/offset+octet-stream",
"Upload-Offset": "0",
"Tus-Resumable": "1.0.0",
},
data=pdf_file,
)
self.assertEqual(response.status_code, 204)

self.assertEqual(
blob_write,
1,
"Slow write to blob instead of os rename. Should be only 1 on init",
)
self.assertEqual(blob_read, 0, "Validation is reading the whole blob in memory")
# TODO: would be better test to patch file read instead and ensure its not called?

ZODB.blob.Blob.open = old_open
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should go in a finally block in case there's an error during the test.


def test_tus_can_replace_pdf_file(self):
# Create a test file
self.file = api.content.create(
Expand Down Expand Up @@ -568,7 +621,6 @@ class CORSTestPolicy(CORSPolicy):


class TestTUSUploadWithCORS(unittest.TestCase):

layer = PLONE_RESTAPI_DX_FUNCTIONAL_TESTING

def setUp(self):
Expand Down