more work

emiltj · emiltj · commit fd99fbec0c99 · 2023-02-15T14:23:54.000+01:00
diff --git a/.DS_Store b/.DS_Store
diff --git a/README.md b/README.md
@@ -557,15 +557,139 @@ pip install https://huggingface.co/emiltj/da_multi_dupli_rater_1_onto/resolve/ma
 python src/predict_single/predict_rater_2-9.py
 ```
 
-# GOTTEN TO HERE
-
 - **Assess agreement between rater and model**
     - Make assessment fine-grained, and assess for each type of ent, in prodigy using the review recipe
+    For rater 3:
+    Cases where ents are same between predicted and model: 638
+    Cases where ents are NOT same between preds and model: 888
+    For rater 4:
+    Cases where ents are same between predicted and model: 1114
+    Cases where ents are NOT same between preds and model: 1363
+    For rater 5:
+    Cases where ents are same between predicted and model: 422
+    Cases where ents are NOT same between preds and model: 980
+    For rater 6:
+    Cases where ents are same between predicted and model: 1046
+    Cases where ents are NOT same between preds and model: 1213
+    For rater 7:
+    Cases where ents are same between predicted and model: 754
+    Cases where ents are NOT same between preds and model: 1148
+    For rater 8:
+    Cases where ents are same between predicted and model: 622
+    Cases where ents are NOT same between preds and model: 1076
+    For rater 9:
+    Cases where ents are same between predicted and model: 906
+    Cases where ents are NOT same between preds and model: 1203
+    Total cases where ents are same 5502
+    Total cases where ents are NOT same 7871
 ```bash
 # Go through script manually:
 # src/data_assessment/model_and_raters_agreement.ipynb
 ```
 
+- **Add predictions to db**
+    - Creates in db:
+        - rater_"$i"_single_unprocessed_preds
+    - Creates in folders:
+        - ./data/single/unprocessed/rater_$i/rater_"$i"_preds.jsonl
+```bash
+# tools/raters_preds_to_db.sh
+prodigy drop rater_2_single_unprocessed_preds
+prodigy drop rater_10_single_unprocessed_preds
+```
+
+# GOTTEN TO HERE
+
+- **Review raters 3, 4, 5, 6, 7, 8, 9**
+```bash
+prodigy review rater_3_single_gold_all rater_3_single_unprocessed,rater_3_single_unprocessed_preds --label PERSON,NORP,FACILITY,ORGANIZATION,LOCATION,EVENT,LAW,DATE,TIME,PERCENT,MONEY,QUANTITY,ORDINAL,CARDINAL,GPE,WORK\ OF\ ART,LANGUAGE,PRODUCT -S -A
+
+prodigy review rater_4_single_gold_all rater_4_single_unprocessed,rater_4_single_unprocessed_preds --label PERSON,NORP,FACILITY,ORGANIZATION,LOCATION,EVENT,LAW,DATE,TIME,PERCENT,MONEY,QUANTITY,ORDINAL,CARDINAL,GPE,WORK\ OF\ ART,LANGUAGE,PRODUCT -S -A
+
+prodigy review rater_5_single_gold_all rater_5_single_unprocessed,rater_5_single_unprocessed_preds --label PERSON,NORP,FACILITY,ORGANIZATION,LOCATION,EVENT,LAW,DATE,TIME,PERCENT,MONEY,QUANTITY,ORDINAL,CARDINAL,GPE,WORK\ OF\ ART,LANGUAGE,PRODUCT -S -A
+
+prodigy review rater_6_single_gold_all rater_6_single_unprocessed,rater_6_single_unprocessed_preds --label PERSON,NORP,FACILITY,ORGANIZATION,LOCATION,EVENT,LAW,DATE,TIME,PERCENT,MONEY,QUANTITY,ORDINAL,CARDINAL,GPE,WORK\ OF\ ART,LANGUAGE,PRODUCT -S -A
+
+prodigy review rater_7_single_gold_all rater_7_single_unprocessed,rater_7_single_unprocessed_preds --label PERSON,NORP,FACILITY,ORGANIZATION,LOCATION,EVENT,LAW,DATE,TIME,PERCENT,MONEY,QUANTITY,ORDINAL,CARDINAL,GPE,WORK\ OF\ ART,LANGUAGE,PRODUCT -S -A
+
+prodigy review rater_8_single_gold_all rater_8_single_unprocessed,rater_8_single_unprocessed_preds --label PERSON,NORP,FACILITY,ORGANIZATION,LOCATION,EVENT,LAW,DATE,TIME,PERCENT,MONEY,QUANTITY,ORDINAL,CARDINAL,GPE,WORK\ OF\ ART,LANGUAGE,PRODUCT -S -A
+
+prodigy review rater_9_single_gold_all rater_9_single_unprocessed,rater_9_single_unprocessed_preds --label PERSON,NORP,FACILITY,ORGANIZATION,LOCATION,EVENT,LAW,DATE,TIME,PERCENT,MONEY,QUANTITY,ORDINAL,CARDINAL,GPE,WORK\ OF\ ART,LANGUAGE,PRODUCT -S -A
+```
+
+- **Split the rater_3_single_gold_all**
+    - Creates new files:
+        - ./data/single/gold/rater_{r}/rater_{r}_single_gold_all.jsonl
+    - Creates new in db:
+        - rater_{r}_single_gold_accepted
+        - rater_{r}_single_gold_ignored
+        - rater_{r}_single_gold_rejected
+```bash
+python src/preprocessing/split_by_answer_rater_3_9_single_gold.py
+```
+
+- **Resolve ignored cases in rater_{r}_single_gold_ignored**
+    - Creates in db:
+        - rater_{r}_single_gold_ignored_resolved
+```bash
+prodigy mark rater_3_single_gold_ignored_resolved dataset:rater_3_single_gold_ignored --view-id review --label PERSON,NORP,FACILITY,ORGANIZATION,LOCATION,EVENT,LAW,DATE,TIME,PERCENT,MONEY,QUANTITY,ORDINAL,CARDINAL,GPE,WORK\ OF\ ART,LANGUAGE,PRODUCT
+
+prodigy mark rater_4_single_gold_ignored_resolved dataset:rater_4_single_gold_ignored --view-id review --label PERSON,NORP,FACILITY,ORGANIZATION,LOCATION,EVENT,LAW,DATE,TIME,PERCENT,MONEY,QUANTITY,ORDINAL,CARDINAL,GPE,WORK\ OF\ ART,LANGUAGE,PRODUCT
+
+prodigy mark rater_5_single_gold_ignored_resolved dataset:rater_5_single_gold_ignored --view-id review --label PERSON,NORP,FACILITY,ORGANIZATION,LOCATION,EVENT,LAW,DATE,TIME,PERCENT,MONEY,QUANTITY,ORDINAL,CARDINAL,GPE,WORK\ OF\ ART,LANGUAGE,PRODUCT
+
+prodigy mark rater_6_single_gold_ignored_resolved dataset:rater_6_single_gold_ignored --view-id review --label PERSON,NORP,FACILITY,ORGANIZATION,LOCATION,EVENT,LAW,DATE,TIME,PERCENT,MONEY,QUANTITY,ORDINAL,CARDINAL,GPE,WORK\ OF\ ART,LANGUAGE,PRODUCT
+
+prodigy mark rater_7_single_gold_ignored_resolved dataset:rater_7_single_gold_ignored --view-id review --label PERSON,NORP,FACILITY,ORGANIZATION,LOCATION,EVENT,LAW,DATE,TIME,PERCENT,MONEY,QUANTITY,ORDINAL,CARDINAL,GPE,WORK\ OF\ ART,LANGUAGE,PRODUCT
+
+prodigy mark rater_8_single_gold_ignored_resolved dataset:rater_8_single_gold_ignored --view-id review --label PERSON,NORP,FACILITY,ORGANIZATION,LOCATION,EVENT,LAW,DATE,TIME,PERCENT,MONEY,QUANTITY,ORDINAL,CARDINAL,GPE,WORK\ OF\ ART,LANGUAGE,PRODUCT
+
+prodigy mark rater_9_single_gold_ignored_resolved dataset:rater_9_single_gold_ignored --view-id review --label PERSON,NORP,FACILITY,ORGANIZATION,LOCATION,EVENT,LAW,DATE,TIME,PERCENT,MONEY,QUANTITY,ORDINAL,CARDINAL,GPE,WORK\ OF\ ART,LANGUAGE,PRODUCT
+```
+
+- **Dump the rater_{r}_single_gold_ignored**
+```bash
+prodigy db-out rater_{r}_single_gold_ignored data/single/gold
+```
+
+- **Merge the rater_{r}_single_gold_ignored and the rater_{r}_single_gold_accepted**
+```bash
+prodigy db-merge rater-{r}-single-gold-accepted,rater-{r}-single-gold-ignored rater_1_single_gold
+```
+
+
+# Only written out steps to here(!)
+# Below add:
+- Merge all single gold for all raters
+- Add language and product predictions to single gold combined
+- Resolve them
+- Add overwrite the resolved in single gold combined (see above way of doing it)
+- Merge the single-gold-combined with extra lang+prod into the gold-multi-and-gold-rater-1-single
+- Have it be NER manual instead (see above way of doing it)
+...???
+
+
+
+
+
+- **Add Language and Product predictions on the gold-multi dataset**
+    - Use tner/roberta-large-ontonotes5
+    - Only adds one, wrong label. So I'll skip it
+    - Perhaps to make sense to mention in methods, regardless
+```bash
+#gold-multi-training/datasets/lang_product_predict_gold_multi.py
+```
+
+
+- **Merge all gold datasets in db**
+
+
+
+
+
+
+
+
 - **Potentially. Make appropriate changes on gold-standard-multi data based on the assessment between rater and model**
 
 - **Potentially. Re-train model on new gold-standard-multi data**
diff --git a/src/data_assessment/model_and_raters_agreement.ipynb b/src/data_assessment/model_and_raters_agreement.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 56,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -15,7 +15,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 51,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -24,60 +24,42 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 52,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Cases where ents are same between predicted and model: 638\n",
-      "Cases where ents are NOT same between preds and model: 888\n"
-     ]
-    }
-   ],
-   "source": [
-    "# testing for single rater\n",
-    "preds = list(db.from_disk(\"data/single/unprocessed/rater_3/rater_3_preds.spacy\").get_docs(nlp.vocab))\n",
-    "annotations = list(db.from_disk(\"data/single/unprocessed/rater_3/train.spacy\").get_docs(nlp.vocab))\n",
-    "counter = sum(\n",
-    "    preds[i].ents == annotations[i].ents for i in range(len(preds))\n",
-    ")\n",
-    "print(f\"Cases where ents are same between predicted and model: {counter}\")\n",
-    "print(f\"Cases where ents are NOT same between preds and model: {len(preds) - counter}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 55,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "For rater 1:\n",
-      "Cases where ents are same between predicted and model: 508\n",
-      "Cases where ents are NOT same between preds and model: 904\n",
       "For rater 3:\n",
       "Cases where ents are same between predicted and model: 638\n",
-      "Cases where ents are NOT same between preds and model: 888\n"
-     ]
-    },
-    {
-     "ename": "IndexError",
-     "evalue": "list index out of range",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mIndexError\u001b[0m                                Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[55], line 4\u001b[0m\n\u001b[1;32m      2\u001b[0m preds \u001b[39m=\u001b[39m \u001b[39mlist\u001b[39m(db\u001b[39m.\u001b[39mfrom_disk(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mdata/single/unprocessed/rater_\u001b[39m\u001b[39m{\u001b[39;00mr\u001b[39m}\u001b[39;00m\u001b[39m/rater_\u001b[39m\u001b[39m{\u001b[39;00mr\u001b[39m}\u001b[39;00m\u001b[39m_preds.spacy\u001b[39m\u001b[39m\"\u001b[39m)\u001b[39m.\u001b[39mget_docs(nlp\u001b[39m.\u001b[39mvocab))\n\u001b[1;32m      3\u001b[0m annotations \u001b[39m=\u001b[39m \u001b[39mlist\u001b[39m(db\u001b[39m.\u001b[39mfrom_disk(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mdata/single/unprocessed/rater_\u001b[39m\u001b[39m{\u001b[39;00mr\u001b[39m}\u001b[39;00m\u001b[39m/train.spacy\u001b[39m\u001b[39m\"\u001b[39m)\u001b[39m.\u001b[39mget_docs(nlp\u001b[39m.\u001b[39mvocab))\n\u001b[0;32m----> 4\u001b[0m counter \u001b[39m=\u001b[39m \u001b[39msum\u001b[39;49m(\n\u001b[1;32m      5\u001b[0m     preds[i]\u001b[39m.\u001b[39;49ments \u001b[39m==\u001b[39;49m annotations[i]\u001b[39m.\u001b[39;49ments \u001b[39mfor\u001b[39;49;00m i \u001b[39min\u001b[39;49;00m \u001b[39mrange\u001b[39;49m(\u001b[39mlen\u001b[39;49m(preds))\n\u001b[1;32m      6\u001b[0m )\n\u001b[1;32m      7\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39mFor rater \u001b[39m\u001b[39m{\u001b[39;00mr\u001b[39m}\u001b[39;00m\u001b[39m:\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[1;32m      8\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mCases where ents are same between predicted and model: \u001b[39m\u001b[39m{\u001b[39;00mcounter\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m)\n",
-      "Cell \u001b[0;32mIn[55], line 5\u001b[0m, in \u001b[0;36m<genexpr>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m      2\u001b[0m preds \u001b[39m=\u001b[39m \u001b[39mlist\u001b[39m(db\u001b[39m.\u001b[39mfrom_disk(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mdata/single/unprocessed/rater_\u001b[39m\u001b[39m{\u001b[39;00mr\u001b[39m}\u001b[39;00m\u001b[39m/rater_\u001b[39m\u001b[39m{\u001b[39;00mr\u001b[39m}\u001b[39;00m\u001b[39m_preds.spacy\u001b[39m\u001b[39m\"\u001b[39m)\u001b[39m.\u001b[39mget_docs(nlp\u001b[39m.\u001b[39mvocab))\n\u001b[1;32m      3\u001b[0m annotations \u001b[39m=\u001b[39m \u001b[39mlist\u001b[39m(db\u001b[39m.\u001b[39mfrom_disk(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mdata/single/unprocessed/rater_\u001b[39m\u001b[39m{\u001b[39;00mr\u001b[39m}\u001b[39;00m\u001b[39m/train.spacy\u001b[39m\u001b[39m\"\u001b[39m)\u001b[39m.\u001b[39mget_docs(nlp\u001b[39m.\u001b[39mvocab))\n\u001b[1;32m      4\u001b[0m counter \u001b[39m=\u001b[39m \u001b[39msum\u001b[39m(\n\u001b[0;32m----> 5\u001b[0m     preds[i]\u001b[39m.\u001b[39ments \u001b[39m==\u001b[39m annotations[i]\u001b[39m.\u001b[39ments \u001b[39mfor\u001b[39;00m i \u001b[39min\u001b[39;00m \u001b[39mrange\u001b[39m(\u001b[39mlen\u001b[39m(preds))\n\u001b[1;32m      6\u001b[0m )\n\u001b[1;32m      7\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39mFor rater \u001b[39m\u001b[39m{\u001b[39;00mr\u001b[39m}\u001b[39;00m\u001b[39m:\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[1;32m      8\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mCases where ents are same between predicted and model: \u001b[39m\u001b[39m{\u001b[39;00mcounter\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m)\n",
-      "\u001b[0;31mIndexError\u001b[0m: list index out of range"
+      "Cases where ents are NOT same between preds and model: 888\n",
+      "For rater 4:\n",
+      "Cases where ents are same between predicted and model: 1114\n",
+      "Cases where ents are NOT same between preds and model: 1363\n",
+      "For rater 5:\n",
+      "Cases where ents are same between predicted and model: 422\n",
+      "Cases where ents are NOT same between preds and model: 980\n",
+      "For rater 6:\n",
+      "Cases where ents are same between predicted and model: 1046\n",
+      "Cases where ents are NOT same between preds and model: 1213\n",
+      "For rater 7:\n",
+      "Cases where ents are same between predicted and model: 754\n",
+      "Cases where ents are NOT same between preds and model: 1148\n",
+      "For rater 8:\n",
+      "Cases where ents are same between predicted and model: 622\n",
+      "Cases where ents are NOT same between preds and model: 1076\n",
+      "For rater 9:\n",
+      "Cases where ents are same between predicted and model: 906\n",
+      "Cases where ents are NOT same between preds and model: 1203\n",
+      "Total cases where ents are same 5502\n",
+      "Total cases where ents are NOT same 7871\n"
      ]
     }
    ],
    "source": [
+    "docs_not_same = 0\n",
+    "docs_same = 0\n",
     "for r in raters:\n",
     "    preds = list(db.from_disk(f\"data/single/unprocessed/rater_{r}/rater_{r}_preds.spacy\").get_docs(nlp.vocab))\n",
     "    annotations = list(db.from_disk(f\"data/single/unprocessed/rater_{r}/train.spacy\").get_docs(nlp.vocab))\n",
@@ -86,7 +68,11 @@
     "    )\n",
     "    print(f'For rater {r}:')\n",
     "    print(f\"Cases where ents are same between predicted and model: {counter}\")\n",
-    "    print(f\"Cases where ents are NOT same between preds and model: {len(preds) - counter}\")"
+    "    print(f\"Cases where ents are NOT same between preds and model: {len(preds) - counter}\")\n",
+    "    docs_not_same += len(preds) - counter\n",
+    "    docs_same += counter\n",
+    "print(f\"Total cases where ents are same {docs_same}\")\n",
+    "print(f\"Total cases where ents are NOT same {docs_not_same}\")"
    ]
   },
   {
diff --git a/src/predict_single/predict_rater_2-9.py b/src/predict_single/predict_rater_2-9.py
@@ -6,12 +6,12 @@
 
 # Load rater_2-9 data
 db = DocBin()
-db2 = DocBin()
 raters = [3, 4, 5, 6, 7, 8, 9]
 nlp = spacy.blank("da")
 print("Loading model ...")
 nlp2 = spacy.load("da_multi_dupli_rater_1_onto")
 print("Model loaded, predicting on raters")
+
 # For each rater
 for r in [3, 4, 5, 6, 7, 8, 9]:  # raters:
     print(f"Predicting on rater {r} ...")
@@ -20,6 +20,7 @@
     texts = [doc.text for doc in r_docs]
     predicted_docs = [nlp2(text) for text in texts]
     savepath = f"data/single/unprocessed/rater_{r}/rater_{r}_preds.spacy"
+    db2 = DocBin()
     for doc in predicted_docs:
         db2.add(doc)
     db2.to_disk(savepath)
diff --git a/src/preprocessing/split_by_answer_rater_1_single_gold.py b/src/preprocessing/split_by_answer_rater_1_single_gold.py
@@ -6,11 +6,11 @@
 srsly.write_jsonl("./data/single/gold/rater_1/gold_all.jsonl", examples)
 
 accepted = [e for e in examples if e["answer"] == "accept"]
-srsly.write_jsonl("./data/single/gold/rater_1/gold_ignored.jsonl", accepted)
+srsly.write_jsonl("./data/single/gold/rater_1/gold_accepted.jsonl", accepted)
 db.add_examples(accepted, ["rater_1_single_gold_accepted"])
 
 ignored = [e for e in examples if e["answer"] == "ignore"]
-srsly.write_jsonl("./data/single/gold/rater_1/gold_accepted.jsonl", ignored)
+srsly.write_jsonl("./data/single/gold/rater_1/gold_ignored.jsonl", ignored)
 db.add_examples(ignored, ["rater_1_single_gold_ignored"])
 
 rejected = [e for e in examples if e["answer"] == "reject"]
diff --git a/src/preprocessing/split_by_answer_rater_3_9_single_gold.py b/src/preprocessing/split_by_answer_rater_3_9_single_gold.py
@@ -0,0 +1,47 @@
+from prodigy.components.db import connect
+import srsly
+
+db = connect()
+
+raters = [3, 4, 5, 6, 7, 8, 9]
+
+for r in raters:
+    print(f"Splitting rater {r}")
+
+    examples = db.get_dataset(f"rater_{r}_single_gold_all")
+    srsly.write_jsonl(
+        f"./data/single/gold/rater_{r}/rater_{r}_single_gold_all.jsonl", examples
+    )
+    print(
+        f"New file has been created: ./data/single/gold/rater_{r}/rater_{r}_single_gold_all.jsonl"
+    )
+
+    accepted = [e for e in examples if e["answer"] == "accept"]
+    srsly.write_jsonl(
+        f"./data/single/gold/rater_{r}/rater_{r}_single_gold_accepted.jsonl", accepted
+    )
+    db.add_examples(accepted, [f"rater_{r}_single_gold_accepted"])
+    print(
+        f"New file has been created: ./data/single/gold/rater_{r}/rater_{r}_single_gold_accepted.jsonl"
+    )
+    print(f"New dataset has been added to db: rater_{r}_single_gold_accepted")
+
+    ignored = [e for e in examples if e["answer"] == "ignore"]
+    srsly.write_jsonl(
+        f"./data/single/gold/rater_{r}/rater_{r}_single_gold_ignored.jsonl", ignored
+    )
+    db.add_examples(ignored, [f"rater_{r}_single_gold_ignored"])
+    print(
+        f"New file has been created: ./data/single/gold/rater_{r}/rater_{r}_single_gold_ignored.jsonl"
+    )
+    print(f"New dataset has been added to db: rater_{r}_single_gold_ignored")
+
+    rejected = [e for e in examples if e["answer"] == "reject"]
+    srsly.write_jsonl(
+        f"./data/single/gold/rater_{r}/rater_{r}_single_gold_rejected.jsonl", rejected
+    )
+    db.add_examples(rejected, [f"rater_{r}_single_gold_rejected"])
+    print(
+        f"New file has been created: ./data/single/gold/rater_{r}/rater_{r}_single_gold_rejected.jsonl"
+    )
+    print(f"New dataset has been added to db: rater_{r}_single_gold_rejected")
diff --git a/tools/raters_preds_to_db.sh b/tools/raters_preds_to_db.sh
@@ -0,0 +1,7 @@
+for i in {1..10}
+do
+    echo "Exporting rater_"$i"_preds to jsonl"
+    python ./src/preprocessing/load_docbin_as_jsonl.py ./data/single/unprocessed/rater_$i/rater_"$i"_preds.spacy blank:da --ner > ./data/single/unprocessed/rater_$i/rater_"$i"_preds.jsonl
+    echo "Importing rater_"$i"_preds to db"
+    prodigy db-in rater_"$i"_single_unprocessed_preds ./data/single/unprocessed/rater_$i/rater_"$i"_preds.jsonl
+done