VedaWebProject
diff --git a/‎Tekst-API/demo/contents.json
Lines changed: 204 additions & 174 deletions b/‎Tekst-API/demo/contents.json
Lines changed: 204 additions & 174 deletions
diff --git a/‎Tekst-API/demo/resources.json
Lines changed: 17 additions & 0 deletions b/‎Tekst-API/demo/resources.json
Lines changed: 17 additions & 0 deletions
diff --git a/‎Tekst-API/openapi.json
Lines changed: 12 additions & 42 deletions b/‎Tekst-API/openapi.json
Lines changed: 12 additions & 42 deletions
diff --git a/‎Tekst-API/pyproject.toml
Lines changed: 1 addition & 1 deletion b/‎Tekst-API/pyproject.toml
Lines changed: 1 addition & 1 deletion
diff --git a/‎Tekst-API/tekst/db/migrations/migration_0_19_0a0.py
Lines changed: 20 additions & 0 deletions b/‎Tekst-API/tekst/db/migrations/migration_0_19_0a0.py
Lines changed: 20 additions & 0 deletions
diff --git a/‎Tekst-API/tekst/resources/text_annotation.py
Lines changed: 21 additions & 84 deletions b/‎Tekst-API/tekst/resources/text_annotation.py
Lines changed: 21 additions & 84 deletions
@@ -481,6 +481,10 @@
           "multi_value_delimiter": "/",
           "anno_integration": {
             "groups": [
+              {
+                "key": "form",
+                "translations": [{ "locale": "*", "translation": "Form" }]
+              },
               {
                 "key": "ne",
                 "translations": [
@@ -497,6 +501,19 @@
               }
             ],
             "item_props": [
+              {
+                "key": "form",
+                "translations": [{ "locale": "*", "translation": "Form" }],
+                "group": "form"
+              },
+              {
+                "key": "eol",
+                "translations": [
+                  { "locale": "enUS", "translation": "End-of-line" },
+                  { "locale": "deDE", "translation": "Zeilenende" }
+                ],
+                "group": "misc"
+              },
               {
                 "key": "len",
                 "translations": [
 
@@ -4,7 +4,7 @@
     "title": "Tekst-Dev",
     "summary": "An online text research platform",
     "contact": {},
-    "version": "0.18.9a0"
+    "version": "0.19.0a0"
   },
   "servers": [
     {
@@ -4956,15 +4956,9 @@
             "in": "query",
             "required": false,
             "schema": {
-              "enum": [
-                "json",
-                "tekst-json",
-                "csv"
-              ],
-              "type": "string",
+              "$ref": "#/components/schemas/ResourceExportFormat",
               "description": "Export format",
-              "default": "json",
-              "title": "Format"
+              "default": "json"
             },
             "description": "Export format"
           },
@@ -15623,6 +15617,14 @@
         ],
         "title": "ResourceDescriptionTranslation"
       },
+      "ResourceExportFormat": {
+        "type": "string",
+        "enum": [
+          "json",
+          "tekst-json",
+          "csv"
+        ]
+      },
       "ResourceSearchQuery": {
         "properties": {
           "cmn": {
@@ -17875,22 +17877,6 @@
             "title": "Type",
             "description": "Type of the resource to search in"
           },
-          "token": {
-            "type": "string",
-            "maxLength": 512,
-            "minLength": 0,
-            "title": "Token",
-            "description": "Token search query",
-            "default": "",
-            "optionalNullable": true
-          },
-          "twc": {
-            "type": "boolean",
-            "title": "Twc",
-            "description": "Whether to interpret wildcards in the token query",
-            "default": false,
-            "optionalNullable": true
-          },
           "anno": {
             "items": {
               "$ref": "#/components/schemas/TextAnnotationQueryEntry"
@@ -17927,34 +17913,18 @@
       },
       "TextAnnotationToken": {
         "properties": {
-          "token": {
-            "type": "string",
-            "maxLength": 4096,
-            "minLength": 1,
-            "title": "Token",
-            "description": "Text token"
-          },
           "annotations": {
             "items": {
               "$ref": "#/components/schemas/TextAnnotationEntry"
             },
             "type": "array",
             "maxItems": 128,
             "title": "Annotations",
-            "description": "List of annotations on this token",
+            "description": "List of annotations on a token",
             "default": []
-          },
-          "lb": {
-            "type": "boolean",
-            "title": "Lb",
-            "description": "Whether this token ends a line",
-            "default": false
           }
         },
         "type": "object",
-        "required": [
-          "token"
-        ],
         "title": "TextAnnotationToken"
       },
       "TextAnnotationValue": {
 
@@ -1,6 +1,6 @@
 [project]
 name = "tekst"
-version = "0.18.9a0"
+version = "0.19.0a0"
 description = "An online text research platform"
 readme = "README.md"
 authors = [
 
@@ -0,0 +1,20 @@
+from tekst.db import Database
+
+
+async def migration(db: Database) -> None:
+    # - add "token.token" to "token.annotations" as "form"
+    # - add "token.lb" to "token.annotations" as "eol"
+    async for content in db.contents.find({"resource_type": "textAnnotation"}):
+        for token in content["tokens"]:
+            if token_form := token.get("token"):
+                token["annotations"].append({"key": "form", "value": [token_form]})
+            if token.get("lb"):
+                token["annotations"].append({"key": "eol", "value": ["true"]})
+        # replace with updated content doc
+        await db.contents.replace_one({"_id": content["_id"]}, content)
+
+    # remove "token" and "lb" fields from text annotation contents' "tokens" objects
+    await db.contents.update_many(
+        {"resource_type": "textAnnotation"},
+        {"$unset": {"tokens.$[].token": 1, "tokens.$[].lb": 1}},
+    )
@@ -54,16 +54,6 @@ def _rtype_index_mappings(
             "tokens": {
                 "type": "nested",
                 "properties": {
-                    "token": {
-                        "type": "keyword",
-                        "normalizer": "no_diacritics_normalizer",
-                        "fields": {
-                            "strict": {
-                                "type": "keyword",
-                                "normalizer": "lowercase_normalizer",
-                            }
-                        },
-                    },
                     "annotations": {
                         "type": "nested",
                         "properties": {
@@ -101,10 +91,14 @@ def _rtype_index_doc(
         cls,
         content: "TextAnnotationContent",
     ) -> dict[str, Any] | None:
+        token_forms = []
+        for token in content.tokens:
+            for anno in token.annotations:
+                if anno.key == "form":
+                    token_forms.append("/".join(anno.value))
         return {
             "tokens": [
                 {
-                    "token": token.token or "",
                     "annotations": [
                         {
                             "key": anno.key,
@@ -119,7 +113,7 @@ def _rtype_index_doc(
                 }
                 for token in content.tokens
             ],
-            "tokens_concat": "; ".join(token.token or "" for token in content.tokens),
+            "tokens_concat": "; ".join(token_forms),
         }
 
     @classmethod
@@ -134,31 +128,9 @@ def rtype_es_queries(
         res_id = str(query.common.resource_id)
         q_id = str(uuid4())
 
-        token_usr_q = (query.resource_type_specific.token or "").strip(" ") or None
-        token_es_q = []
-        token_wc = query.resource_type_specific.token_wildcards
         annos_usr_q = query.resource_type_specific.annotations or []
         annos_es_q = []
 
-        # process token query
-        if token_usr_q and token_usr_q.strip("* "):
-            # handle actual token query with content
-            token_es_q.append(
-                {
-                    "wildcard": {
-                        f"resources.{res_id}.tokens.token{strict_suffix}": {
-                            "value": token_usr_q,
-                        }
-                    }
-                }
-                if token_wc
-                else {
-                    "term": {
-                        f"resources.{res_id}.tokens.token{strict_suffix}": token_usr_q
-                    }
-                }
-            )
-
         # process annotation queries
         for anno_q in annos_usr_q:
             if anno_q.key and not anno_q.value:
@@ -176,6 +148,7 @@ def rtype_es_queries(
                 )
             elif anno_q.key and anno_q.value:
                 # both key and value are set: query for specific key/value combination
+                anno_v = anno_q.value.strip()
                 anno_k_q = {
                     "term": {f"resources.{res_id}.tokens.annotations.key": anno_q.key}
                 }
@@ -186,7 +159,7 @@ def rtype_es_queries(
                                 f"resources.{res_id}.tokens.annotations"
                                 f".value{strict_suffix}"
                             ): {
-                                "value": anno_q.value,
+                                "value": anno_v,
                             }
                         }
                     }
@@ -196,7 +169,7 @@ def rtype_es_queries(
                             (
                                 f"resources.{res_id}.tokens.annotations"
                                 f".value{strict_suffix}"
-                            ): anno_q.value
+                            ): anno_v
                         }
                     }
                 )
@@ -214,20 +187,19 @@ def rtype_es_queries(
                 )
 
         # add token and annotation queries to the ES queries
-        if token_es_q or annos_es_q:
-            es_sub_queries = [*token_es_q, *annos_es_q]
+        if annos_es_q:
             es_queries.append(
                 {
                     "nested": {
                         "path": f"resources.{res_id}.tokens",
                         "inner_hits": {"name": q_id},
                         "query": {
                             "bool": {
-                                "must": es_sub_queries,
+                                "must": annos_es_q,
                             },
                         }
-                        if len(es_sub_queries) > 1
-                        else es_sub_queries[0],
+                        if len(annos_es_q) > 1
+                        else annos_es_q[0],
                     }
                 }
             )
@@ -243,14 +215,13 @@ def _highlights_generator(hit: dict[str, Any]) -> list[str]:
                     hl_strings.extend(hl_v)
             for ih in hit.get("inner_hits", {}).values():
                 for ih_hit in ih.get("hits", {}).get("hits", []):
-                    token = ih_hit["_source"]["token"]
-                    annos = ih_hit["_source"]["annotations"]
-                    values = [a["value"] for a in annos] if annos else []
-                    values_strings = []
-                    for v in values:
-                        values_strings.extend(v if isinstance(v, list) else [v])
-                    annos = f" ({'; '.join(values_strings)})"
-                    hl_strings.append(f"{token} {annos}")
+                    values = [
+                        a["value"] for a in ih_hit["_source"]["annotations"] or []
+                    ]
+                    values_strings = [
+                        ", ".join(v) if isinstance(v, list) else v for v in values
+                    ]
+                    hl_strings.append("; ".join(values_strings))
             return hl_strings
 
         return _highlights_generator
@@ -302,7 +273,6 @@ async def _export_csv(
                 [
                     "LOCATION",
                     "POSITION",
-                    "TOKEN",
                     *anno_keys,
                     "AUTHORS_COMMENT",
                     "EDITORS_COMMENT",
@@ -323,7 +293,6 @@ async def _export_csv(
                         [
                             full_location_labels.get(str(content.location_id), ""),
                             i,
-                            token.token,
                             *csv_annos,
                             content.authors_comment,
                             content.editors_comment,
@@ -529,26 +498,13 @@ class TextAnnotationEntry(ModelBase):
 
 
 class TextAnnotationToken(ModelBase):
-    token: Annotated[
-        ConStr(
-            max_length=4096,
-            cleanup="oneline",
-        ),
-        Field(
-            description="Text token",
-        ),
-    ]
     annotations: Annotated[
         list[TextAnnotationEntry],
         Field(
-            description="List of annotations on this token",
+            description="List of annotations on a token",
             max_length=128,
         ),
     ] = []
-    lb: Annotated[
-        bool,
-        Field(description="Whether this token ends a line"),
-    ] = False
 
 
 class TextAnnotationContent(ContentBase):
@@ -603,25 +559,6 @@ class TextAnnotationSearchQuery(ModelBase):
             description="Type of the resource to search in",
         ),
     ]
-    token: Annotated[
-        ConStr(
-            min_length=0,
-            max_length=512,
-            cleanup="oneline",
-        ),
-        Field(
-            description="Token search query",
-        ),
-        SchemaOptionalNullable,
-    ] = ""
-    token_wildcards: Annotated[
-        bool,
-        Field(
-            alias="twc",
-            description="Whether to interpret wildcards in the token query",
-        ),
-        SchemaOptionalNullable,
-    ] = False
     annotations: Annotated[
         list[TextAnnotationQueryEntry],
         Field(