From 3166fbb05de911fb71947a7711346534c98e18d4 Mon Sep 17 00:00:00 2001 From: Andrew Pikul Date: Sun, 15 Dec 2024 14:41:24 -0500 Subject: [PATCH] Reconstruct tempfile logic in own class --- choreographer/__init__.py | 4 + choreographer/browser.py | 31 ++++--- choreographer/tempfile.py | 167 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 189 insertions(+), 13 deletions(-) create mode 100644 choreographer/tempfile.py diff --git a/choreographer/__init__.py b/choreographer/__init__.py index 00ad5ae9..d133b772 100644 --- a/choreographer/__init__.py +++ b/choreographer/__init__.py @@ -3,6 +3,8 @@ from .browser import get_browser_path from .cli_utils import get_browser from .cli_utils import get_browser_sync +from .tempfile import TempDirectory +from .tempfile import TempDirWarning __all__ = [ Browser, @@ -10,4 +12,6 @@ get_browser_sync, browser_which, get_browser_path, + TempDirectory, + TempDirWarning, ] diff --git a/choreographer/browser.py b/choreographer/browser.py index 5457de27..28d53cf8 100644 --- a/choreographer/browser.py +++ b/choreographer/browser.py @@ -19,6 +19,8 @@ from .system import which_browser from .tab import Tab from .target import Target +from .tempfile import TempDirectory +from .tempfile import TempDirWarning class UnhandledMessageWarning(UserWarning): @@ -33,9 +35,6 @@ class BrowserClosedError(RuntimeError): pass -with_onexc = bool(sys.version_info[:3] >= (3, 12)) - - def get_browser_path(): return os.environ.get("BROWSER_PATH", which_browser()) @@ -69,14 +68,17 @@ def __init__( self._loop_hack = False # see _check_loop self.lock = None # TODO where else is this set self.tabs = OrderedDict() + self.sandboxed = False # this is if our processes can't use /tmp # Browser Configuration if not path: path = get_browser_path() if not path: raise BrowserFailedError( - "Could not find an acceptable browser. Please set environmental variable BROWSER_PATH or pass `path=/path/to/browser` into the Browser() constructor. See documentation for downloading browser from python.", + "Could not find an acceptable browser. Please call `choreo.get_browser()`, set environmental variable BROWSER_PATH or pass `path=/path/to/browser` into the Browser() constructor. The latter two work with Edge.", ) + if "snap" in str(path): + self.sandboxed = True self._env["BROWSER_PATH"] = str(path) self.headless = headless if headless: @@ -90,8 +92,9 @@ def __init__( self._env["SANDBOX_ENABLED"] = "true" # Expert Configuration - self._tmp_path = kwargs.pop("tmp_path", None) - # TODO: stub, tempfile, must create + tmp_path = kwargs.pop("tmp_path", None) + self.tmp_dir = TempDirectory(tmp_path, sneak=self.sandboxed) + try: self.loop = kwargs.pop("loop", asyncio.get_running_loop()) except Exception: @@ -386,7 +389,7 @@ async def close_task(): except ProcessLookupError: pass self.pipe.close() - self._clean_temp() + self.temp_dir.clean() return asyncio.create_task(close_task()) else: @@ -395,7 +398,7 @@ async def close_task(): except ProcessLookupError: pass self.pipe.close() - self._clean_temp() + self.temp_dir.clean() async def _watchdog(self): self._watchdog_healthy = True @@ -410,11 +413,13 @@ async def _watchdog(self): await self.close() await asyncio.sleep(1) with warnings.catch_warnings(): - # we'll ignore warnings here because - # if the user sloppy-closes the browsers - # they may leave processes up still trying to create temporary files - # warnings.filterwarnings("ignore", category=TempDirWarning) #TODO - self._retry_delete_manual(self._temp_dir_name, delete=True) + # ignore warnings here because + # watchdog killing is last resort + # and can leaves stuff in weird state + warnings.filterwarnings("ignore", category=TempDirWarning) + self.temp_dir.clean() + if self.temp_dir.exists: + self.temp_dir.delete_manually() def __exit__(self, type, value, traceback): self.close() diff --git a/choreographer/tempfile.py b/choreographer/tempfile.py new file mode 100644 index 00000000..20650c5e --- /dev/null +++ b/choreographer/tempfile.py @@ -0,0 +1,167 @@ +import os +import platform +import shutil +import stat +import sys +import tempfile +import time +import warnings +from pathlib import Path +from threading import Thread + + +class TempDirWarning(UserWarning): + pass + + +# Python's built-in temporary directory functions are lacking +# In short, they don't handle removal well, and there's lots of API changes over recent versions. +# Here we have our own class to deal with it. +class TempDirectory: + def __init__(self, path=None, sneak=False): + self._with_onexc = bool(sys.version_info[:3] >= (3, 12)) + args = {} + + if path: + args = dict(dir=path) + elif sneak: + args = dict(prefix=".choreographer-", dir=Path.home()) + + if platform.system() != "Windows": + self.temp_dir = tempfile.TemporaryDirectory(**args) + else: # is windows + vinfo = sys.version_info[:3] + if vinfo >= (3, 12): + self.temp_dir = tempfile.TemporaryDirectory( + delete=False, + ignore_cleanup_errors=True, + **args, + ) + elif vinfo >= (3, 10): + self.temp_dir = tempfile.TemporaryDirectory( + ignore_cleanup_errors=True, + **args, + ) + else: + self.temp_dir = tempfile.TemporaryDirectory(**args) + + self.path = self.temp_dir.name + self.exists = True + if self.debug: + print(f"TEMP DIR NAME: {self._temp_dir_name}", file=sys.stderr) + + def delete_manually(self, check_only=False): + if not os.path.exists(self.path): + self.exists = False + if self.debug: + print( + "No retry delete manual necessary, path doesn't exist", + file=sys.stderr, + ) + return 0, 0, [] + n_dirs = 0 + n_files = 0 + errors = [] + for root, dirs, files in os.walk(self.path, topdown=False): + n_dirs += len(dirs) + n_files += len(files) + if not check_only: + for f in files: + fp = os.path.join(root, f) + if self.debug: + print(f"Removing file: {fp}", file=sys.stderr) + try: + os.chmod(fp, stat.S_IWUSR) + os.remove(fp) + if self.debug: + print("Success", file=sys.stderr) + except BaseException as e: + errors.append((fp, e)) + for d in dirs: + fp = os.path.join(root, d) + if self.debug: + print(f"Removing dir: {fp}", file=sys.stderr) + try: + os.chmod(fp, stat.S_IWUSR) + os.rmdir(fp) + if self.debug: + print("Success", file=sys.stderr) + except BaseException as e: + errors.append((fp, e)) + + # clean up directory + if not check_only: + try: + os.chmod(self.path, stat.S_IWUSR) + os.rmdir(self.path) + except BaseException as e: + errors.append((self.path, e)) + + if check_only: + if n_dirs or n_files: + self.exists = True + else: + self.exists = False + elif errors: + warnings.warn( + f"The temporary directory could not be deleted, execution will continue. errors: {errors}", + TempDirWarning, + ) + self.exists = True + else: + self.exists = False + + return n_dirs, n_files, errors + + def clean(self): + try: + # no faith in this python implementation, always fails with windows + # very unstable recently as well, lots new arguments in tempfile package + self.temp_dir.cleanup() + self.exists = False + return + except BaseException as e: + if self.debug: + print( + f"First tempdir deletion failed: TempDirWarning: {str(e)}", + file=sys.stderr, + ) + + def remove_readonly(func, path, excinfo): + try: + os.chmod(path, stat.S_IWUSR) + func(path) + except FileNotFoundError: + pass + + try: + if self._with_onexc: + shutil.rmtree(self.path, onexc=remove_readonly) + else: + shutil.rmtree(self.path, onerror=remove_readonly) + self.exists = False + del self.temp_dir + return + except FileNotFoundError: + pass # it worked! + except BaseException as e: + if self.debug: + print( + f"Second tmpdir deletion failed (shutil.rmtree): {str(e)}", + file=sys.stderr, + ) + self.delete_manually(check_only=True) + if not self.exists: + return + + def extra_clean(): + time.sleep(3) + self.delete_manually() + + t = Thread(target=extra_clean) + t.run() + if self.debug: + print( + f"Tempfile still exists?: {bool(os.path.exists(str(self.path)))}", + file=sys.stderr, + )