From 7800637f931a2ccf054001c435501e804b249678 Mon Sep 17 00:00:00 2001 From: William Moore Date: Thu, 20 Apr 2023 14:46:05 +0100 Subject: [PATCH 1/9] Add scripts/managed_repo_symlinks.py --- scripts/managed_repo_symlinks.py | 103 +++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 scripts/managed_repo_symlinks.py diff --git a/scripts/managed_repo_symlinks.py b/scripts/managed_repo_symlinks.py new file mode 100644 index 00000000..ee013ab9 --- /dev/null +++ b/scripts/managed_repo_symlinks.py @@ -0,0 +1,103 @@ +import argparse +import sys +import os +import shutil + +import omero.clients +from omero.cli import cli_login +from omero.gateway import BlitzGateway + + +def create_symlinks(conn, fileset, args): + + template_path = os.path.join(args.repo, fileset.templatePrefix) + + if args.report: + print("\nFileset:", fileset.id, template_path) + # /data/OMERO/ManagedRepository/demo_2/Blitz-0-Ice.ThreadPool.Server-8/2023-04/07/13-29-24.048/ + + # find files/dirs in Fileset template_path that are also in the symlink dir... + fs_contents = os.listdir(template_path) + if args.report: + print("fs_contents", fs_contents) + + for fs_item in fs_contents: + # check if item is in target dir + symlink_target = os.path.join(args.target, fs_item) + if not os.path.exists(symlink_target): + print("Symlink target not found:", symlink_target) + continue + symlink_source = os.path.join(template_path, fs_item) + if args.report: + print(f"Link from {symlink_source} to {symlink_target}") + if not args.dry_run: + # delete directory + try: + shutil.rmtree(symlink_source) + except: + # in case the source is already a symlink (or a file) + os.remove(symlink_source) + # replace with symlink + target_is_directory = os.path.isdir(symlink_target) + os.symlink(symlink_target, symlink_source, target_is_directory) + + +def get_object(conn, obj_string): + for dtype in ["Screen", "Plate", "Image", "Fileset", "Dataset"]: + if obj_string.startswith(dtype): + obj_id = int(obj_string.replace(dtype + ":", "")) + obj = conn.getObject(dtype, obj_id) + if obj is None: + print(obj_string, "not found!") + return obj + + +def get_fileset(conn, obj_string): + """obj_string is Image:123 or Fileset:123 or Plate:123""" + + obj = get_object(conn, obj_string) + if obj_string.startswith("Fileset:"): + return obj + if obj_string.startswith("Image:"): + return obj.getFileset() + if obj_string.startswith("Plate:"): + well = list(obj.listChildren())[0] + image = list(well.listChildren())[0].getImage() + return image.getFileset() + + +def main(argv): + """ + We go through all Filesets under the top-level Object, and check for items in the + templatePrefix dir under ManagedRepository. If these items are also found in + the target directory, we delete the item from the Managed Repo and replace it with + a symlink to the equivalent dir (or file) in the target dir. + """ + parser = argparse.ArgumentParser() + parser.add_argument('object', help='Object:ID where Object is Screen, Plate, Dataset, Image, Fileset') + parser.add_argument("target", help="path to dir that contains symlink targets") + parser.add_argument("--repo", help="Managed Repo absolute path", default="/data/OMERO/ManagedRepository") + parser.add_argument("--report", action="store_true", help="Print logs") + parser.add_argument("--dry-run", action="store_true", help="Don't save any changes") + args = parser.parse_args(argv) + object_str = args.object + + with cli_login() as cli: + conn = BlitzGateway(client_obj=cli._client) + assert ":" in object_str + + obj_strings = [object_str] + if "Screen" in object_str: + screen = get_object(conn, object_str) + obj_strings = [f"Plate:{plate.id}" for plate in screen.listChildren()] + elif "Dataset" in object_str: + dataset = get_object(conn, object_str) + obj_strings = [f"Image:{image.id}" for image in dataset.listChildren()] + + for object_str in obj_strings: + fileset = get_fileset(conn, object_str) + create_symlinks(conn, fileset, args) + + +if __name__ == '__main__': + main(sys.argv[1:]) From e4bf3882035944157dc6be681cc94612831a48d2 Mon Sep 17 00:00:00 2001 From: William Moore Date: Wed, 26 Apr 2023 12:47:00 +0100 Subject: [PATCH 2/9] Preview image before updating symlinks --- scripts/managed_repo_symlinks.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/managed_repo_symlinks.py b/scripts/managed_repo_symlinks.py index ee013ab9..e33ba65b 100644 --- a/scripts/managed_repo_symlinks.py +++ b/scripts/managed_repo_symlinks.py @@ -10,6 +10,7 @@ def create_symlinks(conn, fileset, args): + preview_image(fileset) template_path = os.path.join(args.repo, fileset.templatePrefix) if args.report: @@ -42,6 +43,11 @@ def create_symlinks(conn, fileset, args): os.symlink(symlink_target, symlink_source, target_is_directory) +def preview_image(fileset): + first_image = list(fileset.copyImages())[0] + first_image.renderJpeg() + + def get_object(conn, obj_string): for dtype in ["Screen", "Plate", "Image", "Fileset", "Dataset"]: if obj_string.startswith(dtype): From 177a9eb1e6b89c8d9be1b9ab342e2fc1118cff3b Mon Sep 17 00:00:00 2001 From: William Moore Date: Fri, 28 Apr 2023 11:13:00 +0100 Subject: [PATCH 3/9] Never load all Images and Files for a Fileset --- scripts/managed_repo_symlinks.py | 41 +++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/scripts/managed_repo_symlinks.py b/scripts/managed_repo_symlinks.py index e33ba65b..ede3be7f 100644 --- a/scripts/managed_repo_symlinks.py +++ b/scripts/managed_repo_symlinks.py @@ -8,13 +8,14 @@ from omero.gateway import BlitzGateway -def create_symlinks(conn, fileset, args): +def create_symlinks(conn, fileset_id, args): - preview_image(fileset) - template_path = os.path.join(args.repo, fileset.templatePrefix) + preview_image(conn, fileset_id, args) + fileset = conn.getQueryService().get("Fileset", fileset_id, conn.SERVICE_OPTS) + template_path = os.path.join(args.repo, fileset.templatePrefix.val) if args.report: - print("\nFileset:", fileset.id, template_path) + print("\nFileset:", fileset.id.val, template_path) # /data/OMERO/ManagedRepository/demo_2/Blitz-0-Ice.ThreadPool.Server-8/2023-04/07/13-29-24.048/ # find files/dirs in Fileset template_path that are also in the symlink dir... @@ -43,9 +44,18 @@ def create_symlinks(conn, fileset, args): os.symlink(symlink_target, symlink_source, target_is_directory) -def preview_image(fileset): - first_image = list(fileset.copyImages())[0] - first_image.renderJpeg() +def preview_image(conn, fileset_id, args): + + params = omero.sys.ParametersI() + params.addId(fileset_id) + params.page(0, 1) + query = "select img from Image img where img.fileset.id=:id" + first_image = conn.getQueryService().findAllByQuery(query, params, conn.SERVICE_OPTS)[0] + + if args.report: + print("Render Image", first_image.id.val) + image = conn.getObject("Image", first_image.id.val) + image.renderJpeg() def get_object(conn, obj_string): @@ -58,18 +68,21 @@ def get_object(conn, obj_string): return obj -def get_fileset(conn, obj_string): +def get_fileset_id(conn, obj_string): """obj_string is Image:123 or Fileset:123 or Plate:123""" - obj = get_object(conn, obj_string) + # We never do conn.getObject("Fileset") as it loads + # all original files and images - can be too slow if obj_string.startswith("Fileset:"): - return obj + return int(obj_string.replace("Fileset:", "")) + + obj = get_object(conn, obj_string) if obj_string.startswith("Image:"): - return obj.getFileset() + return obj.fileset.id.val if obj_string.startswith("Plate:"): well = list(obj.listChildren())[0] image = list(well.listChildren())[0].getImage() - return image.getFileset() + return image.fileset.id.val def main(argv): @@ -101,8 +114,8 @@ def main(argv): obj_strings = [f"Image:{image.id}" for image in dataset.listChildren()] for object_str in obj_strings: - fileset = get_fileset(conn, object_str) - create_symlinks(conn, fileset, args) + fileset_id = get_fileset_id(conn, object_str) + create_symlinks(conn, fileset_id, args) if __name__ == '__main__': From d74a42030957fbb5db1950de14ee811bfc406133 Mon Sep 17 00:00:00 2001 From: William Moore Date: Fri, 28 Apr 2023 11:20:43 +0100 Subject: [PATCH 4/9] Render image for Fileset after printing fileset --- scripts/managed_repo_symlinks.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/managed_repo_symlinks.py b/scripts/managed_repo_symlinks.py index ede3be7f..9d707ee0 100644 --- a/scripts/managed_repo_symlinks.py +++ b/scripts/managed_repo_symlinks.py @@ -10,7 +10,6 @@ def create_symlinks(conn, fileset_id, args): - preview_image(conn, fileset_id, args) fileset = conn.getQueryService().get("Fileset", fileset_id, conn.SERVICE_OPTS) template_path = os.path.join(args.repo, fileset.templatePrefix.val) @@ -18,6 +17,8 @@ def create_symlinks(conn, fileset_id, args): print("\nFileset:", fileset.id.val, template_path) # /data/OMERO/ManagedRepository/demo_2/Blitz-0-Ice.ThreadPool.Server-8/2023-04/07/13-29-24.048/ + preview_image(conn, fileset_id, args) + # find files/dirs in Fileset template_path that are also in the symlink dir... fs_contents = os.listdir(template_path) if args.report: From 2ce54a1eeeba661a22489c38d20b435c7d6807b9 Mon Sep 17 00:00:00 2001 From: William Moore Date: Mon, 29 May 2023 13:33:25 +0100 Subject: [PATCH 5/9] Support --fileset-mappings csv file --- scripts/managed_repo_symlinks.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/scripts/managed_repo_symlinks.py b/scripts/managed_repo_symlinks.py index 9d707ee0..4a7903de 100644 --- a/scripts/managed_repo_symlinks.py +++ b/scripts/managed_repo_symlinks.py @@ -1,4 +1,5 @@ import argparse +import csv import sys import os import shutil @@ -19,6 +20,14 @@ def create_symlinks(conn, fileset_id, args): preview_image(conn, fileset_id, args) + fileset_dirs = {} + # handle fileset_mappings + if args.fileset_mappings: + with open(args.fileset_mappings, newline='') as csvfile: + csvreader = csv.reader(csvfile, delimiter=' ', quotechar='|') + for row in csvreader: + fileset_dirs[row[0]] = row[1] + # find files/dirs in Fileset template_path that are also in the symlink dir... fs_contents = os.listdir(template_path) if args.report: @@ -26,7 +35,8 @@ def create_symlinks(conn, fileset_id, args): for fs_item in fs_contents: # check if item is in target dir - symlink_target = os.path.join(args.target, fs_item) + target_dir = fileset_dirs.get(fs_item, fs_item) + symlink_target = os.path.join(args.target, target_dir) if not os.path.exists(symlink_target): print("Symlink target not found:", symlink_target) continue @@ -92,10 +102,14 @@ def main(argv): templatePrefix dir under ManagedRepository. If these items are also found in the target directory, we delete the item from the Managed Repo and replace it with a symlink to the equivalent dir (or file) in the target dir. + If fileset-mappings csv file is given, we map from fileset name e.g. plate1.zarr to a differently + named directory within the `target`, e.g. abc123/abc123.zarr (pattern used by BioStudies). + Each row of csv should be e.g. "plate1.zarr, abc123/abc123.zarr" """ parser = argparse.ArgumentParser() parser.add_argument('object', help='Object:ID where Object is Screen, Plate, Dataset, Image, Fileset') parser.add_argument("target", help="path to dir that contains symlink targets") + parser.add_argument("--fileset-mappings", help="Optional path/to/file.csv where each row is: fs_name, symlink_target") parser.add_argument("--repo", help="Managed Repo absolute path", default="/data/OMERO/ManagedRepository") parser.add_argument("--report", action="store_true", help="Print logs") parser.add_argument("--dry-run", action="store_true", help="Don't save any changes") From 42f42c5f6396ae2f9400846644ca4b39e021fc6c Mon Sep 17 00:00:00 2001 From: William Moore Date: Mon, 29 May 2023 13:57:30 +0100 Subject: [PATCH 6/9] Fix csv delimiter --- scripts/managed_repo_symlinks.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/managed_repo_symlinks.py b/scripts/managed_repo_symlinks.py index 4a7903de..397e9159 100644 --- a/scripts/managed_repo_symlinks.py +++ b/scripts/managed_repo_symlinks.py @@ -24,9 +24,11 @@ def create_symlinks(conn, fileset_id, args): # handle fileset_mappings if args.fileset_mappings: with open(args.fileset_mappings, newline='') as csvfile: - csvreader = csv.reader(csvfile, delimiter=' ', quotechar='|') + csvreader = csv.reader(csvfile, delimiter=',') for row in csvreader: fileset_dirs[row[0]] = row[1] + if args.report: + print("fileset_dirs", fileset_dirs) # find files/dirs in Fileset template_path that are also in the symlink dir... fs_contents = os.listdir(template_path) From 163530818a11c6d7bfa42c276579cdb8bbc55d84 Mon Sep 17 00:00:00 2001 From: William Moore Date: Tue, 30 May 2023 17:33:42 +0100 Subject: [PATCH 7/9] Only read the fileset_mappings csv once --- scripts/managed_repo_symlinks.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/scripts/managed_repo_symlinks.py b/scripts/managed_repo_symlinks.py index 397e9159..19fb0140 100644 --- a/scripts/managed_repo_symlinks.py +++ b/scripts/managed_repo_symlinks.py @@ -9,7 +9,8 @@ from omero.gateway import BlitzGateway -def create_symlinks(conn, fileset_id, args): +def create_symlinks(conn, fileset_id, fileset_dirs, args): + """ fileset_dirs is dict of 'filset_dir.zarr': 's3_dir_name.zarr' """ fileset = conn.getQueryService().get("Fileset", fileset_id, conn.SERVICE_OPTS) template_path = os.path.join(args.repo, fileset.templatePrefix.val) @@ -20,16 +21,6 @@ def create_symlinks(conn, fileset_id, args): preview_image(conn, fileset_id, args) - fileset_dirs = {} - # handle fileset_mappings - if args.fileset_mappings: - with open(args.fileset_mappings, newline='') as csvfile: - csvreader = csv.reader(csvfile, delimiter=',') - for row in csvreader: - fileset_dirs[row[0]] = row[1] - if args.report: - print("fileset_dirs", fileset_dirs) - # find files/dirs in Fileset template_path that are also in the symlink dir... fs_contents = os.listdir(template_path) if args.report: @@ -118,6 +109,16 @@ def main(argv): args = parser.parse_args(argv) object_str = args.object + fileset_dirs = {} + # handle fileset_mappings + if args.fileset_mappings: + with open(args.fileset_mappings, newline='') as csvfile: + csvreader = csv.reader(csvfile, delimiter=',') + for row in csvreader: + fileset_dirs[row[0]] = row[1] + if args.report: + print("fileset_dirs", fileset_dirs) + with cli_login() as cli: conn = BlitzGateway(client_obj=cli._client) assert ":" in object_str @@ -132,7 +133,7 @@ def main(argv): for object_str in obj_strings: fileset_id = get_fileset_id(conn, object_str) - create_symlinks(conn, fileset_id, args) + create_symlinks(conn, fileset_id, fileset_dirs, args) if __name__ == '__main__': From 2888b21c2751421ecf24a845c0b0da2aa6662184 Mon Sep 17 00:00:00 2001 From: William Moore Date: Fri, 20 Oct 2023 17:05:48 +0100 Subject: [PATCH 8/9] TEMP add /ngff/idr0125 to templatePrefix for each fileset --- scripts/managed_repo_symlinks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/managed_repo_symlinks.py b/scripts/managed_repo_symlinks.py index 19fb0140..638a235d 100644 --- a/scripts/managed_repo_symlinks.py +++ b/scripts/managed_repo_symlinks.py @@ -13,7 +13,7 @@ def create_symlinks(conn, fileset_id, fileset_dirs, args): """ fileset_dirs is dict of 'filset_dir.zarr': 's3_dir_name.zarr' """ fileset = conn.getQueryService().get("Fileset", fileset_id, conn.SERVICE_OPTS) - template_path = os.path.join(args.repo, fileset.templatePrefix.val) + template_path = os.path.join(args.repo, fileset.templatePrefix.val, "ngff", "idr0125") if args.report: print("\nFileset:", fileset.id.val, template_path) From 2485c0050da9a5f2bb0544a436b695701d669f2d Mon Sep 17 00:00:00 2001 From: William Moore Date: Mon, 23 Oct 2023 13:21:34 +0100 Subject: [PATCH 9/9] Revert "TEMP add /ngff/idr0125 to templatePrefix for each fileset" This reverts commit 2888b21c2751421ecf24a845c0b0da2aa6662184. --- scripts/managed_repo_symlinks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/managed_repo_symlinks.py b/scripts/managed_repo_symlinks.py index 638a235d..19fb0140 100644 --- a/scripts/managed_repo_symlinks.py +++ b/scripts/managed_repo_symlinks.py @@ -13,7 +13,7 @@ def create_symlinks(conn, fileset_id, fileset_dirs, args): """ fileset_dirs is dict of 'filset_dir.zarr': 's3_dir_name.zarr' """ fileset = conn.getQueryService().get("Fileset", fileset_id, conn.SERVICE_OPTS) - template_path = os.path.join(args.repo, fileset.templatePrefix.val, "ngff", "idr0125") + template_path = os.path.join(args.repo, fileset.templatePrefix.val) if args.report: print("\nFileset:", fileset.id.val, template_path)