Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support optional hash output in save method #1454

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 18 additions & 6 deletions jupyter_server/services/contents/filemanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,8 +475,14 @@ def _save_directory(self, os_path, model, path=""):
else:
self.log.debug("Directory %r already exists", os_path)

def save(self, model, path=""):
"""Save the file model and return the model with no content."""
def save(self, model, path="", require_hash=False):
"""Save the file model and return the model with no content.

Parameters
----------
require_hash: bool, optional
Whether to include the hash of the file contents.
"""
path = path.strip("/")

self.run_pre_save_hooks(model=model, path=path)
Expand Down Expand Up @@ -519,7 +525,7 @@ def save(self, model, path=""):
self.validate_notebook_model(model, validation_error=validation_error)
validation_message = model.get("message", None)

model = self.get(path, content=False)
model = self.get(path, content=False, require_hash=require_hash)
if validation_message:
model["message"] = validation_message

Expand Down Expand Up @@ -941,8 +947,14 @@ async def _save_directory(self, os_path, model, path=""):
else:
self.log.debug("Directory %r already exists", os_path)

async def save(self, model, path=""):
"""Save the file model and return the model with no content."""
async def save(self, model, path="", require_hash=False):
"""Save the file model and return the model with no content.

Parameters
----------
require_hash: bool, optional
Whether to include the hash of the file contents.
"""
path = path.strip("/")

self.run_pre_save_hooks(model=model, path=path)
Expand Down Expand Up @@ -982,7 +994,7 @@ async def save(self, model, path=""):
self.validate_notebook_model(model, validation_error=validation_error)
validation_message = model.get("message", None)

model = await self.get(path, content=False)
model = await self.get(path, content=False, require_hash=require_hash)
if validation_message:
model["message"] = validation_message

Expand Down
15 changes: 12 additions & 3 deletions jupyter_server/services/contents/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,12 +238,14 @@ async def _new_untitled(self, path, type="", ext=""):
validate_model(model)
self._finish_model(model)

async def _save(self, model, path):
async def _save(self, model, path, require_hash):
"""Save an existing file."""
chunk = model.get("chunk", None)
if not chunk or chunk == -1: # Avoid tedious log information
self.log.info("Saving file at %s", path)
model = await ensure_async(self.contents_manager.save(model, path))
model = await ensure_async(
self.contents_manager.save(model, path, require_hash=require_hash)
)
Comment on lines +246 to +248
Copy link
Collaborator

@krassowski krassowski Feb 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe this should fix the backward compatibility @Zsailer had in mind. Import needs to be moved, to top but GitHub UI does not allow to add a suggestion on non-changed part of the file.

Suggested change
model = await ensure_async(
self.contents_manager.save(model, path, require_hash=require_hash)
)
import inspect
save = self.contents_manager.save
signature = inspect.signature(save)
parameters = signature.parameters
has_kwargs = any(p.kind == p.VAR_KEYWORD for p in parameters)
if "require_hash" in parameters or has_kwargs:
save_coroutine = save(model, path, require_hash=require_hash)
else:
save_coroutine = save(model, path)
model = await ensure_async(save_coroutine)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just re-read Zach's comment and it is proposing a more radical approach:

inspecting the method signature of a ContentsManager.save method before it is instantiated in the ServerApp, wrap their method and overload it with the new argument, and raise a FutureWarning telling folks with custom ContentsManagers they will need to update their class soon (before 3.x release someday)

I don't know what is better, will yield to maintainers here. In any case I hope the suggestion is helpful.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we've used backcall for this in the past for IPython hooks to safely add arguments without breaking anything. I don't have a strong argument for the implementation details, but the pattern is well established and friendly to both maintainers and extension developers. The FutureWarning is good if we want to force this on all ContentsManager implementations, or we can skip it if not supporting the hash feature is a reasonable approach to take (I expect it is).

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(backcall was removed from IPython in ipython/ipython#14216)

validate_model(model)
self._finish_model(model)

Expand Down Expand Up @@ -304,6 +306,13 @@ async def put(self, path=""):
model = self.get_json_body()
cm = self.contents_manager

hash_str = self.get_query_argument("hash", default="0")
if hash_str not in {"0", "1"}:
raise web.HTTPError(
400, f"Hash argument {hash_str!r} is invalid. It must be '0' or '1'."
)
require_hash = int(hash_str)

if model:
if model.get("copy_from"):
raise web.HTTPError(400, "Cannot copy with PUT, only POST")
Expand All @@ -318,7 +327,7 @@ async def put(self, path=""):
# fall back to file if unknown type
model["type"] = "file"
if exists:
await self._save(model, path)
await self._save(model, path, require_hash=require_hash)
else:
await self._upload(model, path)
else:
Expand Down
28 changes: 20 additions & 8 deletions jupyter_server/services/contents/largefilemanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,14 @@
class LargeFileManager(FileContentsManager):
"""Handle large file upload."""

def save(self, model, path=""):
"""Save the file model and return the model with no content."""
def save(self, model, path="", require_hash=False):
"""Save the file model and return the model with no content.

Parameters
----------
require_hash: bool, optional
Whether to include the hash of the file contents.
"""
chunk = model.get("chunk", None)
if chunk is not None:
path = path.strip("/")
Expand Down Expand Up @@ -49,15 +55,15 @@ def save(self, model, path=""):
self.log.error("Error while saving file: %s %s", path, e, exc_info=True)
raise web.HTTPError(500, f"Unexpected error while saving file: {path} {e}") from e

model = self.get(path, content=False)
model = self.get(path, content=False, require_hash=require_hash)

# Last chunk
if chunk == -1:
self.run_post_save_hooks(model=model, os_path=os_path)
self.emit(data={"action": "save", "path": path})
return model
else:
return super().save(model, path)
return super().save(model, path, require_hash=require_hash)

def _save_large_file(self, os_path, content, format):
"""Save content of a generic file."""
Expand Down Expand Up @@ -85,8 +91,14 @@ def _save_large_file(self, os_path, content, format):
class AsyncLargeFileManager(AsyncFileContentsManager):
"""Handle large file upload asynchronously"""

async def save(self, model, path=""):
"""Save the file model and return the model with no content."""
async def save(self, model, path="", require_hash=False):
"""Save the file model and return the model with no content.

Parameters
----------
require_hash: bool, optional
Whether to include the hash of the file contents.
"""
chunk = model.get("chunk", None)
if chunk is not None:
path = path.strip("/")
Expand Down Expand Up @@ -121,7 +133,7 @@ async def save(self, model, path=""):
self.log.error("Error while saving file: %s %s", path, e, exc_info=True)
raise web.HTTPError(500, f"Unexpected error while saving file: {path} {e}") from e

model = await self.get(path, content=False)
model = await self.get(path, content=False, require_hash=require_hash)

# Last chunk
if chunk == -1:
Expand All @@ -130,7 +142,7 @@ async def save(self, model, path=""):
self.emit(data={"action": "save", "path": path})
return model
else:
return await super().save(model, path)
return await super().save(model, path, require_hash=require_hash)

async def _save_large_file(self, os_path, content, format):
"""Save content of a generic file."""
Expand Down
14 changes: 12 additions & 2 deletions jupyter_server/services/contents/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,10 +460,15 @@ def get(self, path, content=True, type=None, format=None, require_hash=False):
"""
raise NotImplementedError

def save(self, model, path):
def save(self, model, path, require_hash=False):
"""
Save a file or directory model to path.

Parameters
----------
require_hash : bool
Whether the file hash must be returned or not.

Should return the saved model with no content. Save implementations
should call self.run_pre_save_hook(model=model, path=path) prior to
writing any data.
Expand Down Expand Up @@ -870,10 +875,15 @@ async def get(self, path, content=True, type=None, format=None, require_hash=Fal
"""
raise NotImplementedError

async def save(self, model, path):
async def save(self, model, path, require_hash=False):
"""
Save a file or directory model to path.

Parameters
----------
require_hash : bool
Whether the file hash must be returned or not.

Should return the saved model with no content. Save implementations
should call self.run_pre_save_hook(model=model, path=path) prior to
writing any data.
Expand Down
Loading