From a23f59e62fb755fcf55d57dbc2078701e775830d Mon Sep 17 00:00:00 2001
From: Dominik Lindner <d.lindner@dundee.ac.uk>
Date: Tue, 22 Aug 2023 11:36:31 +0100
Subject: [PATCH 1/3] Add script to fix duplicated URLs annotations

---
 scripts/annotate/fix_annotations.py | 71 +++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100644 scripts/annotate/fix_annotations.py
diff --git a/scripts/annotate/fix_annotations.py b/scripts/annotate/fix_annotations.py
new file mode 100644
index 00000000..65b2e1e5
--- /dev/null
+++ b/scripts/annotate/fix_annotations.py
@@ -0,0 +1,71 @@
+import argparse
+import re
+
+import omero.cli
+
+parser = argparse.ArgumentParser(description="Generate SQL script to fix duplicated URL annotations")
+parser.add_argument("url", help="Regex of the URL to check for (incl a named group to match the ID!)"
+                                ", e.g. \".+ncbi\.nlm\.nih\.gov\/gene\/(?P<ID>.+)\"")
+parser.add_argument("namespace", help="The namespace of the annotations, e.g. openmicroscopy.org/mapr/gene")
+
+url_names = {"openmicroscopy.org/mapr/gene": "Gene Identifier URL",
+             "openmicroscopy.org/mapr/compound": "Compound Name URL"}
+
+pref_urls = [re.compile(r"^https:\/\/(?!www).+"),\
+             re.compile(r"^https:\/\/.+")] # prefer https:// without www over with www
+
+def load_annotations(conn, namespace):
+    metadataService = conn.getMetadataService()
+    annotations = metadataService.loadSpecifiedAnnotations(
+        'omero.model.MapAnnotation', [namespace], None, None)
+    for ann in annotations:
+        yield ann
+
+def get_urls(ann, pattern):
+    urls = []
+    ids = set()
+    symbol = ""
+    for nv in ann._mapValue:
+        if nv.name == url_names[ann._ns._val]:
+            m = pattern.match(nv.value)
+            if m:
+                urls.append(nv.value)
+                ids.add(m.group("ID"))
+        if "Symbol" in nv.name:
+            symbol = nv.value
+        elif "Name" in nv.name:
+            symbol = nv.value
+    if len(ids) > 1:
+        raise Exception(f"IDs don't match! ({ann._id._val}, {symbol}, {urls})")
+    return symbol, urls
+
+
+def check_annotations(conn, args):
+    pattern = re.compile(f"{args.url}")
+    for ann in load_annotations(conn, args.namespace):
+        symbol, urls = get_urls(ann, pattern)
+        if (len(urls) > 1):
+            url_to_keep = None
+            for url in urls:
+                for pref_url in pref_urls:
+                    if pref_url.match(url):
+                        url_to_keep = url
+                        break
+                if url_to_keep:
+                    break
+            if url_to_keep:
+                urls_to_delete = []
+                for url in urls:
+                    if url != url_to_keep:
+                        urls_to_delete.append(url)
+                print(f"-- Symbol: {symbol} - Annotation ID: {ann._id._val}")
+                print(f"-- URLs: {urls} - keep: {url_to_keep}")
+                for url in urls_to_delete:
+                    print(f"DELETE FROM annotation_mapvalue mv WHERE mv.annotation_id = {ann._id._val} AND mv.value = '{url}';\n")
+
+
+args = parser.parse_args()
+with omero.cli.cli_login() as c:
+    conn = omero.gateway.BlitzGateway(client_obj=c.get_client())
+    check_annotations(conn, args)
+

From 995a0a5f717aa573ec365e02291e99457493091f Mon Sep 17 00:00:00 2001
From: Dominik Lindner <d.lindner@dundee.ac.uk>
Date: Tue, 22 Aug 2023 14:29:58 +0100
Subject: [PATCH 2/3] Group urls by id not annotation id

---
 scripts/annotate/fix_annotations.py | 71 +++++++++++++++--------------
 1 file changed, 38 insertions(+), 33 deletions(-)

diff --git a/scripts/annotate/fix_annotations.py b/scripts/annotate/fix_annotations.py
index 65b2e1e5..7594460b 100644
--- a/scripts/annotate/fix_annotations.py
+++ b/scripts/annotate/fix_annotations.py
@@ -1,67 +1,72 @@
 import argparse
 import re
-
 import omero.cli
 
+
 parser = argparse.ArgumentParser(description="Generate SQL script to fix duplicated URL annotations")
 parser.add_argument("url", help="Regex of the URL to check for (incl a named group to match the ID!)"
                                 ", e.g. \".+ncbi\.nlm\.nih\.gov\/gene\/(?P<ID>.+)\"")
 parser.add_argument("namespace", help="The namespace of the annotations, e.g. openmicroscopy.org/mapr/gene")
 
 url_names = {"openmicroscopy.org/mapr/gene": "Gene Identifier URL",
-             "openmicroscopy.org/mapr/compound": "Compound Name URL"}
+             "openmicroscopy.org/mapr/compound": "Compound Name URL",
+             "openmicroscopy.org/mapr/phenotype": "Phenotype Term Accession URL"}
 
 pref_urls = [re.compile(r"^https:\/\/(?!www).+"),\
              re.compile(r"^https:\/\/.+")] # prefer https:// without www over with www
 
-def load_annotations(conn, namespace):
+
+def get_annotations(conn, namespace):
+    """
+    Get all map annotation with the specific namespace
+    :param conn: Reference to the BlitzGateway
+    :param namespace: The namespace
+    :return: Generator for map annotations
+    """
     metadataService = conn.getMetadataService()
     annotations = metadataService.loadSpecifiedAnnotations(
         'omero.model.MapAnnotation', [namespace], None, None)
     for ann in annotations:
         yield ann
 
+
 def get_urls(ann, pattern):
-    urls = []
-    ids = set()
-    symbol = ""
+    """
+    Get all URLs from the map annotation matching the given pattern
+    :param ann: The map annotation
+    :param pattern: The regex pattern
+    :return: Dictionary with lists of URLs (key: ID)
+    """
+    urls = dict()
     for nv in ann._mapValue:
         if nv.name == url_names[ann._ns._val]:
             m = pattern.match(nv.value)
             if m:
-                urls.append(nv.value)
-                ids.add(m.group("ID"))
-        if "Symbol" in nv.name:
-            symbol = nv.value
-        elif "Name" in nv.name:
-            symbol = nv.value
-    if len(ids) > 1:
-        raise Exception(f"IDs don't match! ({ann._id._val}, {symbol}, {urls})")
-    return symbol, urls
+                if m.group("ID") not in urls:
+                    urls[m.group("ID")] = []
+                urls[m.group("ID")].append(nv.value)
+    return urls
 
 
 def check_annotations(conn, args):
     pattern = re.compile(f"{args.url}")
-    for ann in load_annotations(conn, args.namespace):
-        symbol, urls = get_urls(ann, pattern)
-        if (len(urls) > 1):
-            url_to_keep = None
-            for url in urls:
-                for pref_url in pref_urls:
-                    if pref_url.match(url):
-                        url_to_keep = url
+    for ann in get_annotations(conn, args.namespace):
+        for id, urls in get_urls(ann, pattern).items():
+            if (len(urls) > 1):
+                url_to_keep = None
+                for url in urls:
+                    for pref_url in pref_urls:
+                        if pref_url.match(url):
+                            url_to_keep = url
+                            break
+                    if url_to_keep:
                         break
                 if url_to_keep:
-                    break
-            if url_to_keep:
-                urls_to_delete = []
-                for url in urls:
-                    if url != url_to_keep:
-                        urls_to_delete.append(url)
-                print(f"-- Symbol: {symbol} - Annotation ID: {ann._id._val}")
-                print(f"-- URLs: {urls} - keep: {url_to_keep}")
-                for url in urls_to_delete:
-                    print(f"DELETE FROM annotation_mapvalue mv WHERE mv.annotation_id = {ann._id._val} AND mv.value = '{url}';\n")
+                    print(f"-- ID: {id} - Annotation ID: {ann._id._val}")
+                    print(f"-- URLs: {urls} - keep: {url_to_keep}")
+                    for url in urls:
+                        if url != url_to_keep:
+                            print(f"DELETE FROM annotation_mapvalue mv WHERE mv.annotation_id = {ann._id._val} AND mv.value = '{url}';\n")
 
 
 args = parser.parse_args()

From f11273eec27cdbefb4692f950b4bb5d354b52d46 Mon Sep 17 00:00:00 2001
From: Dominik Lindner <d.lindner@dundee.ac.uk>
Date: Tue, 22 Aug 2023 14:48:58 +0100
Subject: [PATCH 3/3] Add antibody

---
 scripts/annotate/fix_annotations.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/annotate/fix_annotations.py b/scripts/annotate/fix_annotations.py
index 7594460b..3ec241a0 100644
--- a/scripts/annotate/fix_annotations.py
+++ b/scripts/annotate/fix_annotations.py
@@ -10,7 +10,8 @@
 
 url_names = {"openmicroscopy.org/mapr/gene": "Gene Identifier URL",
              "openmicroscopy.org/mapr/compound": "Compound Name URL",
-             "openmicroscopy.org/mapr/phenotype": "Phenotype Term Accession URL"}
+             "openmicroscopy.org/mapr/phenotype": "Phenotype Term Accession URL",
+             "openmicroscopy.org/mapr/antibody": "Antibody Identifier URL"}
 
 pref_urls = [re.compile(r"^https:\/\/(?!www).+"),\
              re.compile(r"^https:\/\/.+")] # prefer https:// without www over with www