From 77459a9eb70e3e1bd9c933d8d37fbc1f610a5c42 Mon Sep 17 00:00:00 2001
From: Ishvinder Sethi <ishvindersethi22@gmail.com>
Date: Mon, 8 Jul 2024 15:22:35 +0530
Subject: [PATCH 01/12] Allow org owners to manipulate user details

---
 backend/users/views.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/users/views.py b/backend/users/views.py
index 320a06901..64aa7709f 100644
--- a/backend/users/views.py
+++ b/backend/users/views.py
@@ -858,7 +858,7 @@ def user_profile_image_update(self, request, pk=None):
     @swagger_auto_schema(request_body=UserUpdateSerializer)
     @action(detail=True, methods=["patch"], url_path="edit_user_details")
     def user_details_update(self, request, pk=None):
-        if request.user.role != User.ADMIN:
+        if request.user.role not in [User.ADMIN, User.ORGANIZATION_OWNER]:
             return Response(
                 {"message": "Not Authorized"}, status=status.HTTP_403_FORBIDDEN
             )

From 062dceec6903958d94ce731d51aad2bf52acd8df Mon Sep 17 00:00:00 2001
From: Ishvinder Sethi <ishvindersethi22@gmail.com>
Date: Mon, 8 Jul 2024 15:25:04 +0530
Subject: [PATCH 02/12] Update views.py

---
 backend/users/views.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/users/views.py b/backend/users/views.py
index 64aa7709f..b600a3dde 100644
--- a/backend/users/views.py
+++ b/backend/users/views.py
@@ -804,7 +804,7 @@ def enable_email(self, request):
     @swagger_auto_schema(responses={200: UserProfileSerializer, 403: "Not Authorized"})
     @action(detail=False, methods=["get"], url_path="user_details")
     def user_details(self, request):
-        if request.user.role == User.ADMIN:
+        if request.user.role in [User.ADMIN, User.ORGANIZATION_OWNER]:
             user_details = User.objects.all()
             serializer = UserProfileSerializer(user_details, many=True)
             return Response(serializer.data, status=status.HTTP_200_OK)

From 64df2db771b4d154d6017db7dfd5959acc841898 Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Mon, 8 Jul 2024 10:12:50 +0000
Subject: [PATCH 03/12] added wer scores

---
 backend/projects/utils.py   |  4 +-
 backend/workspaces/tasks.py | 88 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 89 insertions(+), 3 deletions(-)

diff --git a/backend/projects/utils.py b/backend/projects/utils.py
index 9408d44ce..678515b47 100644
--- a/backend/projects/utils.py
+++ b/backend/projects/utils.py
@@ -222,7 +222,7 @@ def calculate_word_error_rate_between_two_audio_transcription_annotation(
     annotation_result2_text = ""
 
     for result in annotation_result1:
-        if result["from_name"] in ["transcribed_json", "verbatim_transcribed_json"]:
+        if "type" in result and result["type"] == "textarea":
             try:
                 for s in result["value"]["text"]:
                     annotation_result1_text += s
@@ -230,7 +230,7 @@ def calculate_word_error_rate_between_two_audio_transcription_annotation(
                 pass
 
     for result in annotation_result2:
-        if result["from_name"] in ["transcribed_json", "verbatim_transcribed_json"]:
+        if "type" in result and result["type"] == "textarea":
             try:
                 for s in result["value"]["text"]:
                     annotation_result2_text += s
diff --git a/backend/workspaces/tasks.py b/backend/workspaces/tasks.py
index e63273dea..9ffd8afb1 100644
--- a/backend/workspaces/tasks.py
+++ b/backend/workspaces/tasks.py
@@ -13,6 +13,11 @@
     ANNOTATOR_ANNOTATION,
     REVIEWER_ANNOTATION,
     SUPER_CHECKER_ANNOTATION,
+    ACCEPTED,
+    ACCEPTED_WITH_MINOR_CHANGES,
+    ACCEPTED_WITH_MAJOR_CHANGES,
+    VALIDATED,
+    VALIDATED_WITH_CHANGES,
 )
 from .models import Workspace
 from users.models import User
@@ -66,6 +71,45 @@ def get_all_annotation_reports(
             completed_by=userid,
             updated_at__range=[start_date, end_date],
         )
+    number_of_tasks_contributed_for_ar_wer, number_of_tasks_contributed_for_as_wer = (
+        0,
+        0,
+    )
+    ar_wer_score, as_wer_score = 0, 0
+    for ann in submitted_tasks:
+        all_annotations = Annotation.objects.filter(task_id=ann.task_id)
+        ar_done, as_done = False, False  # for duplicate annotations
+        for a in all_annotations:
+            rev_ann, sup_ann = "", ""
+            if a.annotation_type == REVIEWER_ANNOTATION and a.annotation_status in [
+                ACCEPTED,
+                ACCEPTED_WITH_MINOR_CHANGES,
+                ACCEPTED_WITH_MAJOR_CHANGES,
+            ]:
+                rev_ann = a
+            elif (
+                a.annotation_type == SUPER_CHECKER_ANNOTATION
+                and a.annotation_status in [VALIDATED, VALIDATED_WITH_CHANGES]
+            ):
+                sup_ann = a
+            if rev_ann and not ar_done:
+                try:
+                    ar_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
+                        rev_ann.result, ann.result
+                    )
+                    number_of_tasks_contributed_for_ar_wer += 1
+                    ar_done = True
+                except Exception as e:
+                    pass
+            if sup_ann and not as_done:
+                try:
+                    as_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
+                        sup_ann.result, ann.result
+                    )
+                    number_of_tasks_contributed_for_as_wer += 1
+                    as_done = True
+                except Exception as e:
+                    pass
 
     submitted_tasks_count = submitted_tasks.count()
 
@@ -120,6 +164,20 @@ def get_all_annotation_reports(
         "Word Count": total_word_count,
         "Submitted Tasks": submitted_tasks_count,
         "Language": user_lang,
+        "Average Word Error Rate Annotator Vs Reviewer": ar_wer_score
+        / number_of_tasks_contributed_for_ar_wer
+        if number_of_tasks_contributed_for_ar_wer
+        else 0,
+        "Cumulative Word Error Rate Annotator Vs Reviewer": ar_wer_score
+        if number_of_tasks_contributed_for_ar_wer
+        else 0,
+        "Average Word Error Rate Annotator Vs Superchecker": as_wer_score
+        / number_of_tasks_contributed_for_as_wer
+        if number_of_tasks_contributed_for_as_wer
+        else 0,
+        "Cumulative Word Error Rate Annotator Vs Superchecker": as_wer_score
+        if number_of_tasks_contributed_for_as_wer
+        else 0,
     }
 
     if project_type in get_audio_project_types() or project_type == "AllAudioProjects":
@@ -187,7 +245,27 @@ def get_all_review_reports(
             annotation_type=REVIEWER_ANNOTATION,
             updated_at__range=[start_date, end_date],
         )
-
+    number_of_tasks_contributed_for_rs_wer = 0
+    rs_wer_score = 0
+    for ann in submitted_tasks:
+        all_annotations = Annotation.objects.filter(task_id=ann.task_id)
+        rs_done = False  # for duplicate annotations
+        for a in all_annotations:
+            sup_ann = ""
+            if (
+                a.annotation_type == SUPER_CHECKER_ANNOTATION
+                and a.annotation_status in [VALIDATED, VALIDATED_WITH_CHANGES]
+            ):
+                sup_ann = a
+            if sup_ann and not rs_done:
+                try:
+                    rs_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
+                        sup_ann.result, ann.result
+                    )
+                    number_of_tasks_contributed_for_rs_wer += 1
+                    rs_done = True
+                except Exception as e:
+                    pass
     submitted_tasks_count = submitted_tasks.count()
 
     project_type_lower = project_type.lower()
@@ -240,6 +318,13 @@ def get_all_review_reports(
         "Word Count": total_word_count,
         "Submitted Tasks": submitted_tasks_count,
         "Language": user_lang,
+        "Average Word Error Rate Annotator Vs Superchecker": rs_wer_score
+        / number_of_tasks_contributed_for_rs_wer
+        if number_of_tasks_contributed_for_rs_wer
+        else 0,
+        "Cumulative Word Error Rate Annotator Vs Superchecker": rs_wer_score
+        if number_of_tasks_contributed_for_rs_wer
+        else 0,
     }
 
     if project_type in get_audio_project_types() or project_type == "AllAudioProjects":
@@ -509,6 +594,7 @@ def send_user_reports_mail_ws(
     final_reports = sorted(final_reports, key=lambda x: x["Name"], reverse=False)
 
     df = pd.DataFrame.from_dict(final_reports)
+    df = df.fillna("NA")
 
     content = df.to_csv(index=False)
     content_type = "text/csv"

From 9589f3ed8ffaf0384616877d35c047f2ecb19b36 Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Mon, 8 Jul 2024 11:43:20 +0000
Subject: [PATCH 04/12] minor fix

---
 backend/projects/utils.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/backend/projects/utils.py b/backend/projects/utils.py
index 678515b47..71c8fa982 100644
--- a/backend/projects/utils.py
+++ b/backend/projects/utils.py
@@ -215,8 +215,13 @@ def audio_word_count(annotation_result):
 def calculate_word_error_rate_between_two_audio_transcription_annotation(
     annotation_result1, annotation_result2
 ):
-    annotation_result1 = sorted(annotation_result1, key=lambda i: (i["value"]["end"]))
-    annotation_result2 = sorted(annotation_result2, key=lambda i: (i["value"]["end"]))
+    if "end" in annotation_result1[0]["value"]:
+        annotation_result1 = sorted(
+            annotation_result1, key=lambda i: (i["value"]["end"])
+        )
+        annotation_result2 = sorted(
+            annotation_result2, key=lambda i: (i["value"]["end"])
+        )
 
     annotation_result1_text = ""
     annotation_result2_text = ""

From 5345aede03fef89c79cba451c0f1540b5257e5f6 Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Wed, 10 Jul 2024 04:56:09 +0000
Subject: [PATCH 05/12] skipped the blank data tasks

---
 backend/projects/views.py | 55 +++++++++++++++++++++------------------
 1 file changed, 29 insertions(+), 26 deletions(-)

diff --git a/backend/projects/views.py b/backend/projects/views.py
index 579e26ee1..3ea5f5abd 100644
--- a/backend/projects/views.py
+++ b/backend/projects/views.py
@@ -4092,35 +4092,38 @@ def download(self, request, pk=None, *args, **kwargs):
             )
             is_OCRSegmentCategorization = project_type == "OCRSegmentCategorization"
             for task in tasks:
-                curr_task = process_task(
-                    task,
-                    export_type,
-                    include_input_data_metadata_json,
-                    dataset_model,
-                    is_audio_project_type,
-                )
-                if (
-                    is_ConversationTranslation
-                    or is_ConversationTranslationEditing
-                    or is_ConversationVerification
-                ):
-                    process_conversation_tasks(
-                        curr_task,
-                        is_ConversationTranslation,
-                        is_ConversationVerification,
+                try:
+                    curr_task = process_task(
+                        task,
+                        export_type,
+                        include_input_data_metadata_json,
+                        dataset_model,
+                        is_audio_project_type,
                     )
-                elif dataset_type in ["SpeechConversation", "OCRDocument"]:
-                    is_SpeechConversation = dataset_type == "SpeechConversation"
-                    if is_SpeechConversation:
-                        process_speech_tasks(
-                            curr_task, is_AudioSegmentation, project_type
-                        )
-                    else:
-                        process_ocr_tasks(
+                    if (
+                        is_ConversationTranslation
+                        or is_ConversationTranslationEditing
+                        or is_ConversationVerification
+                    ):
+                        process_conversation_tasks(
                             curr_task,
-                            is_OCRSegmentCategorization,
-                            is_OCRSegmentCategorizationEditing,
+                            is_ConversationTranslation,
+                            is_ConversationVerification,
                         )
+                    elif dataset_type in ["SpeechConversation", "OCRDocument"]:
+                        is_SpeechConversation = dataset_type == "SpeechConversation"
+                        if is_SpeechConversation:
+                            process_speech_tasks(
+                                curr_task, is_AudioSegmentation, project_type
+                            )
+                        else:
+                            process_ocr_tasks(
+                                curr_task,
+                                is_OCRSegmentCategorization,
+                                is_OCRSegmentCategorizationEditing,
+                            )
+                except Exception as e:
+                    continue
                 tasks_list.append(curr_task)
             download_resources = True
             export_stream, content_type, filename = DataExport.generate_export_file(

From efac1e1fc66bb308b0429f2f2ca3b5ec9e70a17d Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Wed, 10 Jul 2024 09:48:19 +0000
Subject: [PATCH 06/12] 
 fix_calculate_word_error_rate_between_two_audio_transcription_annotation

---
 backend/dataset/views.py    |  2 ++
 backend/functions/tasks.py  |  8 ++++--
 backend/projects/utils.py   | 50 +++++++++++++++++++++++++++++--------
 backend/projects/views.py   |  8 +++---
 backend/workspaces/tasks.py | 20 ++++++++-------
 backend/workspaces/views.py |  2 ++
 6 files changed, 64 insertions(+), 26 deletions(-)

diff --git a/backend/dataset/views.py b/backend/dataset/views.py
index 5c9e1fa21..7e6b4227c 100644
--- a/backend/dataset/views.py
+++ b/backend/dataset/views.py
@@ -1038,6 +1038,7 @@ def project_analytics(self, request, pk=None):
                                 calculate_word_error_rate_between_two_audio_transcription_annotation(
                                     review_annotation.result,
                                     review_annotation.parent_annotation.result,
+                                    project_type,
                                 )
                             )
                         except:
@@ -1067,6 +1068,7 @@ def project_analytics(self, request, pk=None):
                                 calculate_word_error_rate_between_two_audio_transcription_annotation(
                                     supercheck_annotation.result,
                                     supercheck_annotation.parent_annotation.result,
+                                    project_type,
                                 )
                             )
                         except:
diff --git a/backend/functions/tasks.py b/backend/functions/tasks.py
index c235ad1ca..f6bf51261 100644
--- a/backend/functions/tasks.py
+++ b/backend/functions/tasks.py
@@ -1371,6 +1371,7 @@ def get_stats_helper(
                             get_most_recent_annotation(
                                 ann_obj.parent_annotation
                             ).result,
+                            project_type,
                         )
                     )
                 except Exception as error:
@@ -1425,6 +1426,7 @@ def get_stats_helper(
                             get_most_recent_annotation(
                                 ann_obj.parent_annotation.parent_annotation
                             ).result,
+                            project_type,
                         )
                     )
                 except Exception as error:
@@ -1436,6 +1438,7 @@ def get_stats_helper(
                             get_most_recent_annotation(
                                 ann_obj.parent_annotation
                             ).result,
+                            project_type,
                         )
                     )
                 except Exception as error:
@@ -1447,6 +1450,7 @@ def get_stats_helper(
                             get_most_recent_annotation(
                                 ann_obj.parent_annotation.parent_annotation
                             ).result,
+                            project_type,
                         )
                     )
                 except Exception as error:
@@ -1518,10 +1522,10 @@ def calculate_ced_between_two_annotations(annotation1, annotation2):
     return ced_list
 
 
-def calculate_wer_between_two_annotations(annotation1, annotation2):
+def calculate_wer_between_two_annotations(annotation1, annotation2, project_type):
     try:
         return calculate_word_error_rate_between_two_audio_transcription_annotation(
-            annotation1, annotation2
+            annotation1, annotation2, project_type
         )
     except Exception as e:
         return 0
diff --git a/backend/projects/utils.py b/backend/projects/utils.py
index 71c8fa982..ab1342162 100644
--- a/backend/projects/utils.py
+++ b/backend/projects/utils.py
@@ -213,7 +213,7 @@ def audio_word_count(annotation_result):
 
 
 def calculate_word_error_rate_between_two_audio_transcription_annotation(
-    annotation_result1, annotation_result2
+    annotation_result1, annotation_result2, project_type
 ):
     if "end" in annotation_result1[0]["value"]:
         annotation_result1 = sorted(
@@ -228,19 +228,47 @@ def calculate_word_error_rate_between_two_audio_transcription_annotation(
 
     for result in annotation_result1:
         if "type" in result and result["type"] == "textarea":
-            try:
-                for s in result["value"]["text"]:
-                    annotation_result1_text += s
-            except:
-                pass
+            if project_type in [
+                "AcousticNormalisedTranscriptionEditing",
+                "StandardizedTranscriptionEditing",
+            ]:
+                if (
+                    "from_name" in result
+                    and result["from_name"] == "verbatim_transcribed_json"
+                ):
+                    try:
+                        for s in result["value"]["text"]:
+                            annotation_result1_text += s
+                    except:
+                        pass
+            else:
+                try:
+                    for s in result["value"]["text"]:
+                        annotation_result1_text += s
+                except:
+                    pass
 
     for result in annotation_result2:
         if "type" in result and result["type"] == "textarea":
-            try:
-                for s in result["value"]["text"]:
-                    annotation_result2_text += s
-            except:
-                pass
+            if project_type in [
+                "AcousticNormalisedTranscriptionEditing",
+                "StandardizedTranscriptionEditing",
+            ]:
+                if (
+                    "from_name" in result
+                    and result["from_name"] == "verbatim_transcribed_json"
+                ):
+                    try:
+                        for s in result["value"]["text"]:
+                            annotation_result1_text += s
+                    except:
+                        pass
+            else:
+                try:
+                    for s in result["value"]["text"]:
+                        annotation_result1_text += s
+                except:
+                    pass
     if len(annotation_result1_text) == 0 or len(annotation_result2_text) == 0:
         return 0
     return wer(annotation_result1_text, annotation_result2_text)
diff --git a/backend/projects/views.py b/backend/projects/views.py
index 3ea5f5abd..68e48c472 100644
--- a/backend/projects/views.py
+++ b/backend/projects/views.py
@@ -300,7 +300,7 @@ def get_review_reports(proj_id, userid, start_date, end_date):
             try:
                 total_word_error_rate_rs_list.append(
                     calculate_word_error_rate_between_two_audio_transcription_annotation(
-                        anno.result, anno.parent_annotation.result
+                        anno.result, anno.parent_annotation.result, proj_type
                     )
                 )
             except:
@@ -309,7 +309,7 @@ def get_review_reports(proj_id, userid, start_date, end_date):
             try:
                 total_word_error_rate_ar_list.append(
                     calculate_word_error_rate_between_two_audio_transcription_annotation(
-                        anno.result, anno.parent_annotation.result
+                        anno.result, anno.parent_annotation.result, proj_type
                     )
                 )
             except:
@@ -603,7 +603,7 @@ def get_supercheck_reports(proj_id, userid, start_date, end_date):
             try:
                 total_word_error_rate_list.append(
                     calculate_word_error_rate_between_two_audio_transcription_annotation(
-                        anno.result, anno.parent_annotation.result
+                        anno.result, anno.parent_annotation.result, proj_type
                     )
                 )
             except:
@@ -3370,7 +3370,7 @@ def get_analytics(self, request, pk=None, *args, **kwargs):
                     try:
                         total_word_error_rate_ar_list.append(
                             calculate_word_error_rate_between_two_audio_transcription_annotation(
-                                anno.result, anno.parent_annotation.result
+                                anno.result, anno.parent_annotation.result, project_type
                             )
                         )
                     except:
diff --git a/backend/workspaces/tasks.py b/backend/workspaces/tasks.py
index 9ffd8afb1..e12c5ff8d 100644
--- a/backend/workspaces/tasks.py
+++ b/backend/workspaces/tasks.py
@@ -95,7 +95,7 @@ def get_all_annotation_reports(
             if rev_ann and not ar_done:
                 try:
                     ar_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
-                        rev_ann.result, ann.result
+                        rev_ann.result, ann.result, project_type
                     )
                     number_of_tasks_contributed_for_ar_wer += 1
                     ar_done = True
@@ -104,7 +104,7 @@ def get_all_annotation_reports(
             if sup_ann and not as_done:
                 try:
                     as_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
-                        sup_ann.result, ann.result
+                        sup_ann.result, ann.result, project_type
                     )
                     number_of_tasks_contributed_for_as_wer += 1
                     as_done = True
@@ -260,7 +260,7 @@ def get_all_review_reports(
             if sup_ann and not rs_done:
                 try:
                     rs_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
-                        sup_ann.result, ann.result
+                        sup_ann.result, ann.result, project_type
                     )
                     number_of_tasks_contributed_for_rs_wer += 1
                     rs_done = True
@@ -318,11 +318,11 @@ def get_all_review_reports(
         "Word Count": total_word_count,
         "Submitted Tasks": submitted_tasks_count,
         "Language": user_lang,
-        "Average Word Error Rate Annotator Vs Superchecker": rs_wer_score
+        "Average Word Error Rate Reviewer Vs Superchecker": rs_wer_score
         / number_of_tasks_contributed_for_rs_wer
         if number_of_tasks_contributed_for_rs_wer
         else 0,
-        "Cumulative Word Error Rate Annotator Vs Superchecker": rs_wer_score
+        "Cumulative Word Error Rate Reviewer Vs Superchecker": rs_wer_score
         if number_of_tasks_contributed_for_rs_wer
         else 0,
     }
@@ -844,6 +844,7 @@ def send_project_analysis_reports_mail_ws(
                             calculate_word_error_rate_between_two_audio_transcription_annotation(
                                 review_annotation.result,
                                 review_annotation.parent_annotation.result,
+                                project_type,
                             )
                         )
                     except:
@@ -878,6 +879,7 @@ def send_project_analysis_reports_mail_ws(
                             calculate_word_error_rate_between_two_audio_transcription_annotation(
                                 supercheck_annotation.result,
                                 supercheck_annotation.parent_annotation.result,
+                                project_type,
                             )
                         )
                     except:
@@ -1190,7 +1192,7 @@ def get_supercheck_reports(proj_ids, userid, start_date, end_date, project_type=
                 try:
                     total_word_error_rate_rs_list.append(
                         calculate_word_error_rate_between_two_audio_transcription_annotation(
-                            anno.result, anno.parent_annotation.result
+                            anno.result, anno.parent_annotation.result, project_type
                         )
                     )
                     total_raw_audio_duration_list.append(
@@ -1202,7 +1204,7 @@ def get_supercheck_reports(proj_ids, userid, start_date, end_date, project_type=
                 try:
                     total_word_error_rate_rs_list.append(
                         calculate_word_error_rate_between_two_audio_transcription_annotation(
-                            anno.result, anno.parent_annotation.result
+                            anno.result, anno.parent_annotation.result, project_type
                         )
                     )
                 except:
@@ -1469,7 +1471,7 @@ def get_review_reports(
                     )
                     total_word_error_rate_ar_list.append(
                         calculate_word_error_rate_between_two_audio_transcription_annotation(
-                            anno.result, anno.parent_annotation.result
+                            anno.result, anno.parent_annotation.result, project_type
                         )
                     )
                 except:
@@ -1478,7 +1480,7 @@ def get_review_reports(
                 try:
                     total_word_error_rate_rs_list.append(
                         calculate_word_error_rate_between_two_audio_transcription_annotation(
-                            anno.result, anno.parent_annotation.result
+                            anno.result, anno.parent_annotation.result, project_type
                         )
                     )
                 except:
diff --git a/backend/workspaces/views.py b/backend/workspaces/views.py
index ccc3fd269..e5cd136d9 100644
--- a/backend/workspaces/views.py
+++ b/backend/workspaces/views.py
@@ -648,6 +648,7 @@ def project_analytics(self, request, pk=None):
                                     calculate_word_error_rate_between_two_audio_transcription_annotation(
                                         review_annotation.result,
                                         review_annotation.parent_annotation.result,
+                                        project_type,
                                     )
                                 )
                             except:
@@ -682,6 +683,7 @@ def project_analytics(self, request, pk=None):
                                     calculate_word_error_rate_between_two_audio_transcription_annotation(
                                         supercheck_annotation.result,
                                         supercheck_annotation.parent_annotation.result,
+                                        project_type,
                                     )
                                 )
                             except:

From f6734d094338a8a3afd25090125b31c4e0997edf Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Thu, 11 Jul 2024 07:03:49 +0000
Subject: [PATCH 07/12] fix wer

---
 backend/projects/utils.py   | 38 +++++++++----------------------------
 backend/workspaces/tasks.py | 22 ++++++++++++---------
 2 files changed, 22 insertions(+), 38 deletions(-)

diff --git a/backend/projects/utils.py b/backend/projects/utils.py
index ab1342162..4987ed878 100644
--- a/backend/projects/utils.py
+++ b/backend/projects/utils.py
@@ -228,20 +228,10 @@ def calculate_word_error_rate_between_two_audio_transcription_annotation(
 
     for result in annotation_result1:
         if "type" in result and result["type"] == "textarea":
-            if project_type in [
-                "AcousticNormalisedTranscriptionEditing",
-                "StandardizedTranscriptionEditing",
-            ]:
-                if (
-                    "from_name" in result
-                    and result["from_name"] == "verbatim_transcribed_json"
-                ):
-                    try:
-                        for s in result["value"]["text"]:
-                            annotation_result1_text += s
-                    except:
-                        pass
-            else:
+            if (
+                "from_name" in result
+                and result["from_name"] != "acoustic_normalised_transcribed_json"
+            ):
                 try:
                     for s in result["value"]["text"]:
                         annotation_result1_text += s
@@ -250,23 +240,13 @@ def calculate_word_error_rate_between_two_audio_transcription_annotation(
 
     for result in annotation_result2:
         if "type" in result and result["type"] == "textarea":
-            if project_type in [
-                "AcousticNormalisedTranscriptionEditing",
-                "StandardizedTranscriptionEditing",
-            ]:
-                if (
-                    "from_name" in result
-                    and result["from_name"] == "verbatim_transcribed_json"
-                ):
-                    try:
-                        for s in result["value"]["text"]:
-                            annotation_result1_text += s
-                    except:
-                        pass
-            else:
+            if (
+                "from_name" in result
+                and result["from_name"] != "acoustic_normalised_transcribed_json"
+            ):
                 try:
                     for s in result["value"]["text"]:
-                        annotation_result1_text += s
+                        annotation_result2_text += s
                 except:
                     pass
     if len(annotation_result1_text) == 0 or len(annotation_result2_text) == 0:
diff --git a/backend/workspaces/tasks.py b/backend/workspaces/tasks.py
index e12c5ff8d..11ad34828 100644
--- a/backend/workspaces/tasks.py
+++ b/backend/workspaces/tasks.py
@@ -78,9 +78,9 @@ def get_all_annotation_reports(
     ar_wer_score, as_wer_score = 0, 0
     for ann in submitted_tasks:
         all_annotations = Annotation.objects.filter(task_id=ann.task_id)
-        ar_done, as_done = False, False  # for duplicate annotations
+        ar_done, as_done = False, False
+        ann_ann, rev_ann, sup_ann = "", "", ""
         for a in all_annotations:
-            rev_ann, sup_ann = "", ""
             if a.annotation_type == REVIEWER_ANNOTATION and a.annotation_status in [
                 ACCEPTED,
                 ACCEPTED_WITH_MINOR_CHANGES,
@@ -92,19 +92,21 @@ def get_all_annotation_reports(
                 and a.annotation_status in [VALIDATED, VALIDATED_WITH_CHANGES]
             ):
                 sup_ann = a
-            if rev_ann and not ar_done:
+            elif a.annotation_type == ANNOTATOR_ANNOTATION:
+                ann_ann = a
+            if ann_ann and rev_ann and not ar_done:
                 try:
                     ar_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
-                        rev_ann.result, ann.result, project_type
+                        rev_ann.result, ann_ann.result, project_type
                     )
                     number_of_tasks_contributed_for_ar_wer += 1
                     ar_done = True
                 except Exception as e:
                     pass
-            if sup_ann and not as_done:
+            if ann_ann and sup_ann and not as_done:
                 try:
                     as_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
-                        sup_ann.result, ann.result, project_type
+                        sup_ann.result, ann_ann.result, project_type
                     )
                     number_of_tasks_contributed_for_as_wer += 1
                     as_done = True
@@ -250,17 +252,19 @@ def get_all_review_reports(
     for ann in submitted_tasks:
         all_annotations = Annotation.objects.filter(task_id=ann.task_id)
         rs_done = False  # for duplicate annotations
+        sup_ann, rev_ann = "", ""
         for a in all_annotations:
-            sup_ann = ""
             if (
                 a.annotation_type == SUPER_CHECKER_ANNOTATION
                 and a.annotation_status in [VALIDATED, VALIDATED_WITH_CHANGES]
             ):
                 sup_ann = a
-            if sup_ann and not rs_done:
+            elif a.annotation_type == REVIEWER_ANNOTATION:
+                rev_ann = a
+            if rev_ann and sup_ann and not rs_done:
                 try:
                     rs_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
-                        sup_ann.result, ann.result, project_type
+                        sup_ann.result, rev_ann.result, project_type
                     )
                     number_of_tasks_contributed_for_rs_wer += 1
                     rs_done = True

From b82a915e6d53d9f61785029c9c1178f6022ddf42 Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Fri, 12 Jul 2024 05:17:44 +0000
Subject: [PATCH 08/12] added org level wer scores

---
 backend/organizations/tasks.py | 93 +++++++++++++++++++++++++++++++++-
 1 file changed, 92 insertions(+), 1 deletion(-)

diff --git a/backend/organizations/tasks.py b/backend/organizations/tasks.py
index a364f876d..1a300178c 100644
--- a/backend/organizations/tasks.py
+++ b/backend/organizations/tasks.py
@@ -12,6 +12,11 @@
     ANNOTATOR_ANNOTATION,
     REVIEWER_ANNOTATION,
     SUPER_CHECKER_ANNOTATION,
+    ACCEPTED,
+    ACCEPTED_WITH_MINOR_CHANGES,
+    ACCEPTED_WITH_MAJOR_CHANGES,
+    VALIDATED,
+    VALIDATED_WITH_CHANGES,
 )
 from .models import Organization
 from users.models import User
@@ -23,6 +28,7 @@
     get_audio_transcription_duration,
     get_audio_segments_count,
     ocr_word_count,
+    calculate_word_error_rate_between_two_audio_transcription_annotation,
 )
 from workspaces.tasks import (
     un_pack_annotation_tasks,
@@ -68,6 +74,47 @@ def get_all_annotation_reports(
             completed_by=userid,
             updated_at__range=[start_date, end_date],
         )
+    number_of_tasks_contributed_for_ar_wer, number_of_tasks_contributed_for_as_wer = (
+        0,
+        0,
+    )
+    ar_wer_score, as_wer_score = 0, 0
+    for ann in submitted_tasks:
+        all_annotations = Annotation.objects.filter(task_id=ann.task_id)
+        ar_done, as_done = False, False
+        ann_ann, rev_ann, sup_ann = "", "", ""
+        for a in all_annotations:
+            if a.annotation_type == REVIEWER_ANNOTATION and a.annotation_status in [
+                ACCEPTED,
+                ACCEPTED_WITH_MINOR_CHANGES,
+                ACCEPTED_WITH_MAJOR_CHANGES,
+            ]:
+                rev_ann = a
+            elif (
+                a.annotation_type == SUPER_CHECKER_ANNOTATION
+                and a.annotation_status in [VALIDATED, VALIDATED_WITH_CHANGES]
+            ):
+                sup_ann = a
+            elif a.annotation_type == ANNOTATOR_ANNOTATION:
+                ann_ann = a
+            if ann_ann and rev_ann and not ar_done:
+                try:
+                    ar_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
+                        rev_ann.result, ann_ann.result, project_type
+                    )
+                    number_of_tasks_contributed_for_ar_wer += 1
+                    ar_done = True
+                except Exception as e:
+                    pass
+            if ann_ann and sup_ann and not as_done:
+                try:
+                    as_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
+                        sup_ann.result, ann_ann.result, project_type
+                    )
+                    number_of_tasks_contributed_for_as_wer += 1
+                    as_done = True
+                except Exception as e:
+                    pass
 
     submitted_tasks_count = submitted_tasks.count()
 
@@ -122,6 +169,20 @@ def get_all_annotation_reports(
         "Word Count": total_word_count,
         "Submitted Tasks": submitted_tasks_count,
         "Language": user_lang,
+        "Average Word Error Rate Annotator Vs Reviewer": ar_wer_score
+        / number_of_tasks_contributed_for_ar_wer
+        if number_of_tasks_contributed_for_ar_wer
+        else 0,
+        "Cumulative Word Error Rate Annotator Vs Reviewer": ar_wer_score
+        if number_of_tasks_contributed_for_ar_wer
+        else 0,
+        "Average Word Error Rate Annotator Vs Superchecker": as_wer_score
+        / number_of_tasks_contributed_for_as_wer
+        if number_of_tasks_contributed_for_as_wer
+        else 0,
+        "Cumulative Word Error Rate Annotator Vs Superchecker": as_wer_score
+        if number_of_tasks_contributed_for_as_wer
+        else 0,
     }
 
     if project_type in get_audio_project_types() or project_type == "AllAudioProjects":
@@ -189,7 +250,29 @@ def get_all_review_reports(
             annotation_type=REVIEWER_ANNOTATION,
             updated_at__range=[start_date, end_date],
         )
-
+    number_of_tasks_contributed_for_rs_wer = 0
+    rs_wer_score = 0
+    for ann in submitted_tasks:
+        all_annotations = Annotation.objects.filter(task_id=ann.task_id)
+        rs_done = False  # for duplicate annotations
+        sup_ann, rev_ann = "", ""
+        for a in all_annotations:
+            if (
+                a.annotation_type == SUPER_CHECKER_ANNOTATION
+                and a.annotation_status in [VALIDATED, VALIDATED_WITH_CHANGES]
+            ):
+                sup_ann = a
+            elif a.annotation_type == REVIEWER_ANNOTATION:
+                rev_ann = a
+            if rev_ann and sup_ann and not rs_done:
+                try:
+                    rs_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
+                        sup_ann.result, rev_ann.result, project_type
+                    )
+                    number_of_tasks_contributed_for_rs_wer += 1
+                    rs_done = True
+                except Exception as e:
+                    pass
     submitted_tasks_count = submitted_tasks.count()
 
     project_type_lower = project_type.lower()
@@ -243,6 +326,13 @@ def get_all_review_reports(
         "Word Count": total_word_count,
         "Submitted Tasks": submitted_tasks_count,
         "Language": user_lang,
+        "Average Word Error Rate Reviewer Vs Superchecker": rs_wer_score
+        / number_of_tasks_contributed_for_rs_wer
+        if number_of_tasks_contributed_for_rs_wer
+        else 0,
+        "Cumulative Word Error Rate Reviewer Vs Superchecker": rs_wer_score
+        if number_of_tasks_contributed_for_rs_wer
+        else 0,
     }
 
     if project_type in get_audio_project_types() or project_type == "AllAudioProjects":
@@ -512,6 +602,7 @@ def send_user_reports_mail_org(
     final_reports = sorted(final_reports, key=lambda x: x["Name"], reverse=False)
 
     df = pd.DataFrame.from_dict(final_reports)
+    df = df.fillna("NA")
 
     content = df.to_csv(index=False)
     content_type = "text/csv"

From 8aa7b03c114068e1b5882a3e2b214dd02bf1b9a9 Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Fri, 12 Jul 2024 12:34:05 +0000
Subject: [PATCH 09/12] added rej score and bleu score

---
 backend/tasks/views.py      |  71 ++++++++++++++---
 backend/workspaces/tasks.py | 149 +++++++++++++++++++++++++++++++++++-
 2 files changed, 207 insertions(+), 13 deletions(-)

diff --git a/backend/tasks/views.py b/backend/tasks/views.py
index 50de59c54..43ebf9339 100644
--- a/backend/tasks/views.py
+++ b/backend/tasks/views.py
@@ -2517,11 +2517,18 @@ def calculate_bleu_score(self, request):
                 sentence1 = request["sentence1"]
                 sentence2 = request["sentence2"]
             except:
-                return Response(
-                    {"message": "Invalid parameters in request body!"},
-                    status=status.HTTP_400_BAD_REQUEST,
-                )
-
+                try:
+                    annotation_result1 = request.data.get["annotation_result1"]
+                    annotation_result2 = request.data.get["annotation_result2"]
+                except:
+                    try:
+                        annotation_result1 = request["annotation_result1"]
+                        annotation_result2 = request["annotation_result2"]
+                    except:
+                        return Response(
+                            {"message": "Invalid parameters in request body!"},
+                            status=status.HTTP_400_BAD_REQUEST,
+                        )
         try:
             sentence1 = [sentence1]
             sentence2 = [[sentence2]]
@@ -2535,10 +2542,56 @@ def calculate_bleu_score(self, request):
                 status=status.HTTP_200_OK,
             )
         except:
-            return Response(
-                {"message": "Invalid parameters in request body!"},
-                status=status.HTTP_400_BAD_REQUEST,
-            )
+            try:
+                if "end" in annotation_result1[0]["value"]:
+                    annotation_result1 = sorted(
+                        annotation_result1, key=lambda i: (i["value"]["end"])
+                    )
+                    annotation_result2 = sorted(
+                        annotation_result2, key=lambda i: (i["value"]["end"])
+                    )
+
+                annotation_result1_text = ""
+                annotation_result2_text = ""
+
+                for result in annotation_result1:
+                    if "type" in result and result["type"] == "textarea":
+                        if (
+                            "from_name" in result
+                            and result["from_name"]
+                            != "acoustic_normalised_transcribed_json"
+                        ):
+                            try:
+                                for s in result["value"]["text"]:
+                                    annotation_result1_text += s
+                            except:
+                                pass
+
+                for result in annotation_result2:
+                    if "type" in result and result["type"] == "textarea":
+                        if (
+                            "from_name" in result
+                            and result["from_name"]
+                            != "acoustic_normalised_transcribed_json"
+                        ):
+                            try:
+                                for s in result["value"]["text"]:
+                                    annotation_result2_text += s
+                            except:
+                                pass
+                bleu = sacrebleu.corpus_bleu(
+                    [annotation_result1_text], [[annotation_result2_text]]
+                )
+                bleu_score = bleu.score
+                return Response(
+                    {"bleu_score": str(bleu_score)},
+                    status=status.HTTP_200_OK,
+                )
+            except:
+                return Response(
+                    {"message": "Invalid parameters in request body!"},
+                    status=status.HTTP_400_BAD_REQUEST,
+                )
 
 
 @swagger_auto_schema(
diff --git a/backend/workspaces/tasks.py b/backend/workspaces/tasks.py
index 11ad34828..1568121ee 100644
--- a/backend/workspaces/tasks.py
+++ b/backend/workspaces/tasks.py
@@ -31,6 +31,7 @@
     get_audio_segments_count,
     ocr_word_count,
 )
+from tasks.views import SentenceOperationViewSet
 
 
 def get_all_annotation_reports(
@@ -71,13 +72,33 @@ def get_all_annotation_reports(
             completed_by=userid,
             updated_at__range=[start_date, end_date],
         )
-    number_of_tasks_contributed_for_ar_wer, number_of_tasks_contributed_for_as_wer = (
+    (
+        number_of_tasks_contributed_for_ar_wer,
+        number_of_tasks_contributed_for_as_wer,
+        number_of_tasks_contributed_for_ar_bleu,
+        number_of_tasks_contributed_for_as_bleu,
+    ) = (
+        0,
+        0,
         0,
         0,
     )
-    ar_wer_score, as_wer_score = 0, 0
+    ar_wer_score, as_wer_score, ar_bleu_score, as_bleu_score = 0, 0, 0, 0
+    tasks_and_rejection_count_map_ar, number_of_tasks_that_has_review_annotations = (
+        {},
+        0,
+    )
     for ann in submitted_tasks:
         all_annotations = Annotation.objects.filter(task_id=ann.task_id)
+        try:
+            task = ann.task
+            revision_loop_count = task.revision_loop_count
+            r_count = revision_loop_count["review_count"]
+            tasks_and_rejection_count_map_ar[r_count] = (
+                tasks_and_rejection_count_map_ar.get(r_count, 0) + 1
+            )
+        except Exception as e:
+            pass
         ar_done, as_done = False, False
         ann_ann, rev_ann, sup_ann = "", "", ""
         for a in all_annotations:
@@ -94,6 +115,8 @@ def get_all_annotation_reports(
                 sup_ann = a
             elif a.annotation_type == ANNOTATOR_ANNOTATION:
                 ann_ann = a
+            if a.annotation_type == REVIEWER_ANNOTATION:
+                number_of_tasks_that_has_review_annotations += 1
             if ann_ann and rev_ann and not ar_done:
                 try:
                     ar_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
@@ -103,6 +126,18 @@ def get_all_annotation_reports(
                     ar_done = True
                 except Exception as e:
                     pass
+                try:
+                    s1 = SentenceOperationViewSet()
+                    sampleRequest = {
+                        "annotation_result1": rev_ann.result,
+                        "annotation_result2": ann_ann.result,
+                    }
+                    ar_bleu_score += s1.calculate_bleu_score(sampleRequest).data[
+                        "bleu_score"
+                    ]
+                    number_of_tasks_contributed_for_ar_bleu += 1
+                except Exception as e:
+                    pass
             if ann_ann and sup_ann and not as_done:
                 try:
                     as_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
@@ -112,6 +147,18 @@ def get_all_annotation_reports(
                     as_done = True
                 except Exception as e:
                     pass
+                try:
+                    s1 = SentenceOperationViewSet()
+                    sampleRequest = {
+                        "annotation_result1": sup_ann.result,
+                        "annotation_result2": ann_ann.result,
+                    }
+                    as_bleu_score += s1.calculate_bleu_score(sampleRequest).data[
+                        "bleu_score"
+                    ]
+                    number_of_tasks_contributed_for_as_bleu += 1
+                except Exception as e:
+                    pass
 
     submitted_tasks_count = submitted_tasks.count()
 
@@ -154,6 +201,10 @@ def get_all_annotation_reports(
     total_raw_audio_duration = convert_seconds_to_hours(
         sum(total_raw_audio_duration_list)
     )
+    cumulative_rejection_score_ar = 0
+    if tasks_and_rejection_count_map_ar:
+        for task, rc in tasks_and_rejection_count_map_ar.items():
+            cumulative_rejection_score_ar += task * rc
 
     result = {
         "Name": userName,
@@ -180,6 +231,18 @@ def get_all_annotation_reports(
         "Cumulative Word Error Rate Annotator Vs Superchecker": as_wer_score
         if number_of_tasks_contributed_for_as_wer
         else 0,
+        "Average Bleu Score Annotator Vs Reviewer": ar_bleu_score
+        / number_of_tasks_contributed_for_ar_bleu
+        if number_of_tasks_contributed_for_ar_bleu
+        else 0,
+        "Average Bleu Score Annotator Vs Superchecker": as_bleu_score
+        / number_of_tasks_contributed_for_as_bleu
+        if number_of_tasks_contributed_for_as_bleu
+        else 0,
+        "Average Rejection Count Annotator Vs Reviewer": cumulative_rejection_score_ar
+        / number_of_tasks_that_has_review_annotations
+        if number_of_tasks_that_has_review_annotations
+        else 0,
     }
 
     if project_type in get_audio_project_types() or project_type == "AllAudioProjects":
@@ -247,10 +310,34 @@ def get_all_review_reports(
             annotation_type=REVIEWER_ANNOTATION,
             updated_at__range=[start_date, end_date],
         )
-    number_of_tasks_contributed_for_rs_wer = 0
-    rs_wer_score = 0
+    number_of_tasks_contributed_for_rs_wer, number_of_tasks_contributed_for_rs_bleu = (
+        0,
+        0,
+    )
+    rs_wer_score, rs_bleu_score = 0, 0
+    (
+        tasks_and_rejection_count_map_ar,
+        tasks_and_rejection_count_map_rs,
+        number_of_tasks_that_has_sup_annotations,
+    ) = ({}, {}, 0)
     for ann in submitted_tasks:
         all_annotations = Annotation.objects.filter(task_id=ann.task_id)
+        task = ann.task
+        revision_loop_count = task.revision_loop_count
+        try:
+            r_count = revision_loop_count["review_count"]
+            tasks_and_rejection_count_map_ar[r_count] = (
+                tasks_and_rejection_count_map_ar.get(r_count, 0) + 1
+            )
+        except Exception as e:
+            pass
+        try:
+            s_count = revision_loop_count["super_check_count"]
+            tasks_and_rejection_count_map_rs[s_count] = (
+                tasks_and_rejection_count_map_rs.get(s_count, 0) + 1
+            )
+        except Exception as e:
+            pass
         rs_done = False  # for duplicate annotations
         sup_ann, rev_ann = "", ""
         for a in all_annotations:
@@ -261,6 +348,8 @@ def get_all_review_reports(
                 sup_ann = a
             elif a.annotation_type == REVIEWER_ANNOTATION:
                 rev_ann = a
+            if a.annotation_type == SUPER_CHECKER_ANNOTATION:
+                number_of_tasks_that_has_sup_annotations += 1
             if rev_ann and sup_ann and not rs_done:
                 try:
                     rs_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
@@ -270,6 +359,18 @@ def get_all_review_reports(
                     rs_done = True
                 except Exception as e:
                     pass
+                try:
+                    s1 = SentenceOperationViewSet()
+                    sampleRequest = {
+                        "annotation_result1": sup_ann.result,
+                        "annotation_result2": rev_ann.result,
+                    }
+                    rs_bleu_score += s1.calculate_bleu_score(sampleRequest).data[
+                        "bleu_score"
+                    ]
+                    number_of_tasks_contributed_for_rs_bleu += 1
+                except Exception as e:
+                    pass
     submitted_tasks_count = submitted_tasks.count()
 
     project_type_lower = project_type.lower()
@@ -310,6 +411,15 @@ def get_all_review_reports(
     total_raw_audio_duration = convert_seconds_to_hours(
         sum(total_raw_audio_duration_list)
     )
+    cumulative_rejection_score_ar = 0
+    if tasks_and_rejection_count_map_ar:
+        for task, rc in tasks_and_rejection_count_map_ar.items():
+            cumulative_rejection_score_ar += task * rc
+
+    cumulative_rejection_score_rs = 0
+    if tasks_and_rejection_count_map_rs:
+        for task, rc in tasks_and_rejection_count_map_rs.items():
+            cumulative_rejection_score_rs += task * rc
 
     result = {
         "Name": userName,
@@ -329,6 +439,18 @@ def get_all_review_reports(
         "Cumulative Word Error Rate Reviewer Vs Superchecker": rs_wer_score
         if number_of_tasks_contributed_for_rs_wer
         else 0,
+        "Average Bleu Score  Reviewer Vs Superchecker": rs_bleu_score
+        / number_of_tasks_contributed_for_rs_bleu
+        if number_of_tasks_contributed_for_rs_bleu
+        else 0,
+        "Average Rejection Count Annotator Vs Reviewer": cumulative_rejection_score_ar
+        / submitted_tasks_count
+        if submitted_tasks_count
+        else 0,
+        "Average Rejection Count Reviewer Vs Superchecker": cumulative_rejection_score_rs
+        / number_of_tasks_that_has_sup_annotations
+        if number_of_tasks_that_has_sup_annotations
+        else 0,
     }
 
     if project_type in get_audio_project_types() or project_type == "AllAudioProjects":
@@ -381,6 +503,17 @@ def get_all_supercheck_reports(
             annotation_type=SUPER_CHECKER_ANNOTATION,
             updated_at__range=[start_date, end_date],
         )
+    tasks_and_rejection_count_map_rs = {}
+    for ann in submitted_tasks:
+        task = ann.task
+        revision_loop_count = task.revision_loop_count
+        try:
+            s_count = revision_loop_count["super_check_count"]
+            tasks_and_rejection_count_map_rs[s_count] = (
+                tasks_and_rejection_count_map_rs.get(s_count, 0) + 1
+            )
+        except Exception as e:
+            pass
 
     submitted_tasks_count = submitted_tasks.count()
 
@@ -427,6 +560,10 @@ def get_all_supercheck_reports(
     validated_raw_audio_duration = convert_seconds_to_hours(
         sum(validated_raw_audio_duration_list)
     )
+    cumulative_rejection_score_rs = 0
+    if tasks_and_rejection_count_map_rs:
+        for task, rc in tasks_and_rejection_count_map_rs.items():
+            cumulative_rejection_score_rs += task * rc
 
     result = {
         "Name": userName,
@@ -439,6 +576,10 @@ def get_all_supercheck_reports(
         "Word Count": validated_word_count,
         "Submitted Tasks": submitted_tasks_count,
         "Language": user_lang,
+        "Average Rejection Count Reviewer Vs Superchecker": cumulative_rejection_score_rs
+        / submitted_tasks_count
+        if submitted_tasks_count
+        else 0,
     }
 
     if project_type in get_audio_project_types() or project_type == "AllAudioProjects":

From 5db327eae38930c12cd0ed44a61205ed4d729b27 Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Mon, 15 Jul 2024 06:12:05 +0000
Subject: [PATCH 10/12] added changes to org level

---
 backend/organizations/tasks.py | 150 +++++++++++++++++++++++++++++++--
 backend/workspaces/tasks.py    |   2 +-
 2 files changed, 145 insertions(+), 7 deletions(-)

diff --git a/backend/organizations/tasks.py b/backend/organizations/tasks.py
index 1a300178c..85f329735 100644
--- a/backend/organizations/tasks.py
+++ b/backend/organizations/tasks.py
@@ -74,13 +74,33 @@ def get_all_annotation_reports(
             completed_by=userid,
             updated_at__range=[start_date, end_date],
         )
-    number_of_tasks_contributed_for_ar_wer, number_of_tasks_contributed_for_as_wer = (
+    (
+        number_of_tasks_contributed_for_ar_wer,
+        number_of_tasks_contributed_for_as_wer,
+        number_of_tasks_contributed_for_ar_bleu,
+        number_of_tasks_contributed_for_as_bleu,
+    ) = (
+        0,
+        0,
         0,
         0,
     )
-    ar_wer_score, as_wer_score = 0, 0
+    ar_wer_score, as_wer_score, ar_bleu_score, as_bleu_score = 0, 0, 0, 0
+    tasks_and_rejection_count_map_ar, number_of_tasks_that_has_review_annotations = (
+        {},
+        0,
+    )
     for ann in submitted_tasks:
         all_annotations = Annotation.objects.filter(task_id=ann.task_id)
+        try:
+            task = ann.task
+            revision_loop_count = task.revision_loop_count
+            r_count = revision_loop_count["review_count"]
+            tasks_and_rejection_count_map_ar[r_count] = (
+                tasks_and_rejection_count_map_ar.get(r_count, 0) + 1
+            )
+        except Exception as e:
+            pass
         ar_done, as_done = False, False
         ann_ann, rev_ann, sup_ann = "", "", ""
         for a in all_annotations:
@@ -97,6 +117,8 @@ def get_all_annotation_reports(
                 sup_ann = a
             elif a.annotation_type == ANNOTATOR_ANNOTATION:
                 ann_ann = a
+            if a.annotation_type == REVIEWER_ANNOTATION:
+                number_of_tasks_that_has_review_annotations += 1
             if ann_ann and rev_ann and not ar_done:
                 try:
                     ar_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
@@ -106,6 +128,18 @@ def get_all_annotation_reports(
                     ar_done = True
                 except Exception as e:
                     pass
+                try:
+                    s1 = SentenceOperationViewSet()
+                    sampleRequest = {
+                        "annotation_result1": rev_ann.result,
+                        "annotation_result2": ann_ann.result,
+                    }
+                    ar_bleu_score += s1.calculate_bleu_score(sampleRequest).data[
+                        "bleu_score"
+                    ]
+                    number_of_tasks_contributed_for_ar_bleu += 1
+                except Exception as e:
+                    pass
             if ann_ann and sup_ann and not as_done:
                 try:
                     as_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
@@ -115,6 +149,18 @@ def get_all_annotation_reports(
                     as_done = True
                 except Exception as e:
                     pass
+                try:
+                    s1 = SentenceOperationViewSet()
+                    sampleRequest = {
+                        "annotation_result1": sup_ann.result,
+                        "annotation_result2": ann_ann.result,
+                    }
+                    as_bleu_score += s1.calculate_bleu_score(sampleRequest).data[
+                        "bleu_score"
+                    ]
+                    number_of_tasks_contributed_for_as_bleu += 1
+                except Exception as e:
+                    pass
 
     submitted_tasks_count = submitted_tasks.count()
 
@@ -157,7 +203,10 @@ def get_all_annotation_reports(
     total_raw_audio_duration = convert_seconds_to_hours(
         sum(total_raw_audio_duration_list)
     )
-
+    cumulative_rejection_score_ar = 0
+    if tasks_and_rejection_count_map_ar:
+        for task, rc in tasks_and_rejection_count_map_ar.items():
+            cumulative_rejection_score_ar += task * rc
     result = {
         "Name": userName,
         "Email": email,
@@ -183,6 +232,18 @@ def get_all_annotation_reports(
         "Cumulative Word Error Rate Annotator Vs Superchecker": as_wer_score
         if number_of_tasks_contributed_for_as_wer
         else 0,
+        "Average Bleu Score Annotator Vs Reviewer": ar_bleu_score
+        / number_of_tasks_contributed_for_ar_bleu
+        if number_of_tasks_contributed_for_ar_bleu
+        else 0,
+        "Average Bleu Score Annotator Vs Superchecker": as_bleu_score
+        / number_of_tasks_contributed_for_as_bleu
+        if number_of_tasks_contributed_for_as_bleu
+        else 0,
+        "Average Rejection Count Annotator Vs Reviewer": cumulative_rejection_score_ar
+        / number_of_tasks_that_has_review_annotations
+        if number_of_tasks_that_has_review_annotations
+        else 0,
     }
 
     if project_type in get_audio_project_types() or project_type == "AllAudioProjects":
@@ -250,10 +311,34 @@ def get_all_review_reports(
             annotation_type=REVIEWER_ANNOTATION,
             updated_at__range=[start_date, end_date],
         )
-    number_of_tasks_contributed_for_rs_wer = 0
-    rs_wer_score = 0
+    number_of_tasks_contributed_for_rs_wer, number_of_tasks_contributed_for_rs_bleu = (
+        0,
+        0,
+    )
+    rs_wer_score, rs_bleu_score = 0, 0
+    (
+        tasks_and_rejection_count_map_ar,
+        tasks_and_rejection_count_map_rs,
+        number_of_tasks_that_has_sup_annotations,
+    ) = ({}, {}, 0)
     for ann in submitted_tasks:
         all_annotations = Annotation.objects.filter(task_id=ann.task_id)
+        task = ann.task
+        revision_loop_count = task.revision_loop_count
+        try:
+            r_count = revision_loop_count["review_count"]
+            tasks_and_rejection_count_map_ar[r_count] = (
+                tasks_and_rejection_count_map_ar.get(r_count, 0) + 1
+            )
+        except Exception as e:
+            pass
+        try:
+            s_count = revision_loop_count["super_check_count"]
+            tasks_and_rejection_count_map_rs[s_count] = (
+                tasks_and_rejection_count_map_rs.get(s_count, 0) + 1
+            )
+        except Exception as e:
+            pass
         rs_done = False  # for duplicate annotations
         sup_ann, rev_ann = "", ""
         for a in all_annotations:
@@ -264,6 +349,8 @@ def get_all_review_reports(
                 sup_ann = a
             elif a.annotation_type == REVIEWER_ANNOTATION:
                 rev_ann = a
+            if a.annotation_type == SUPER_CHECKER_ANNOTATION:
+                number_of_tasks_that_has_sup_annotations += 1
             if rev_ann and sup_ann and not rs_done:
                 try:
                     rs_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
@@ -273,6 +360,18 @@ def get_all_review_reports(
                     rs_done = True
                 except Exception as e:
                     pass
+                try:
+                    s1 = SentenceOperationViewSet()
+                    sampleRequest = {
+                        "annotation_result1": sup_ann.result,
+                        "annotation_result2": rev_ann.result,
+                    }
+                    rs_bleu_score += s1.calculate_bleu_score(sampleRequest).data[
+                        "bleu_score"
+                    ]
+                    number_of_tasks_contributed_for_rs_bleu += 1
+                except Exception as e:
+                    pass
     submitted_tasks_count = submitted_tasks.count()
 
     project_type_lower = project_type.lower()
@@ -314,6 +413,15 @@ def get_all_review_reports(
     total_raw_audio_duration = convert_seconds_to_hours(
         sum(total_raw_audio_duration_list)
     )
+    cumulative_rejection_score_ar = 0
+    if tasks_and_rejection_count_map_ar:
+        for task, rc in tasks_and_rejection_count_map_ar.items():
+            cumulative_rejection_score_ar += task * rc
+
+    cumulative_rejection_score_rs = 0
+    if tasks_and_rejection_count_map_rs:
+        for task, rc in tasks_and_rejection_count_map_rs.items():
+            cumulative_rejection_score_rs += task * rc
 
     result = {
         "Name": userName,
@@ -333,6 +441,18 @@ def get_all_review_reports(
         "Cumulative Word Error Rate Reviewer Vs Superchecker": rs_wer_score
         if number_of_tasks_contributed_for_rs_wer
         else 0,
+        "Average Bleu Score Reviewer Vs Superchecker": rs_bleu_score
+        / number_of_tasks_contributed_for_rs_bleu
+        if number_of_tasks_contributed_for_rs_bleu
+        else 0,
+        "Average Rejection Count Annotator Vs Reviewer": cumulative_rejection_score_ar
+        / submitted_tasks_count
+        if submitted_tasks_count
+        else 0,
+        "Average Rejection Count Reviewer Vs Superchecker": cumulative_rejection_score_rs
+        / number_of_tasks_that_has_sup_annotations
+        if number_of_tasks_that_has_sup_annotations
+        else 0,
     }
 
     if project_type in get_audio_project_types() or project_type == "AllAudioProjects":
@@ -385,7 +505,17 @@ def get_all_supercheck_reports(
             annotation_type=SUPER_CHECKER_ANNOTATION,
             updated_at__range=[start_date, end_date],
         )
-
+    tasks_and_rejection_count_map_rs = {}
+    for ann in submitted_tasks:
+        task = ann.task
+        revision_loop_count = task.revision_loop_count
+        try:
+            s_count = revision_loop_count["super_check_count"]
+            tasks_and_rejection_count_map_rs[s_count] = (
+                tasks_and_rejection_count_map_rs.get(s_count, 0) + 1
+            )
+        except Exception as e:
+            pass
     submitted_tasks_count = submitted_tasks.count()
 
     project_type_lower = project_type.lower()
@@ -431,6 +561,10 @@ def get_all_supercheck_reports(
     validated_raw_audio_duration = convert_seconds_to_hours(
         sum(validated_raw_audio_duration_list)
     )
+    cumulative_rejection_score_rs = 0
+    if tasks_and_rejection_count_map_rs:
+        for task, rc in tasks_and_rejection_count_map_rs.items():
+            cumulative_rejection_score_rs += task * rc
 
     result = {
         "Name": userName,
@@ -443,6 +577,10 @@ def get_all_supercheck_reports(
         "Word Count": validated_word_count,
         "Submitted Tasks": submitted_tasks_count,
         "Language": user_lang,
+        "Average Rejection Count Reviewer Vs Superchecker": cumulative_rejection_score_rs
+        / submitted_tasks_count
+        if submitted_tasks_count
+        else 0,
     }
 
     if project_type in get_audio_project_types() or project_type == "AllAudioProjects":
diff --git a/backend/workspaces/tasks.py b/backend/workspaces/tasks.py
index 1568121ee..27e0b7c15 100644
--- a/backend/workspaces/tasks.py
+++ b/backend/workspaces/tasks.py
@@ -439,7 +439,7 @@ def get_all_review_reports(
         "Cumulative Word Error Rate Reviewer Vs Superchecker": rs_wer_score
         if number_of_tasks_contributed_for_rs_wer
         else 0,
-        "Average Bleu Score  Reviewer Vs Superchecker": rs_bleu_score
+        "Average Bleu Score Reviewer Vs Superchecker": rs_bleu_score
         / number_of_tasks_contributed_for_rs_bleu
         if number_of_tasks_contributed_for_rs_bleu
         else 0,

From 644d2d7b8b33eafd210f5ff0040c6741dd4deefa Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Tue, 16 Jul 2024 12:24:17 +0000
Subject: [PATCH 11/12] added minor fixes for bleu score

---
 backend/workspaces/tasks.py | 32 +++++++-------------------------
 1 file changed, 7 insertions(+), 25 deletions(-)

diff --git a/backend/workspaces/tasks.py b/backend/workspaces/tasks.py
index 27e0b7c15..4720d6a6d 100644
--- a/backend/workspaces/tasks.py
+++ b/backend/workspaces/tasks.py
@@ -76,14 +76,12 @@ def get_all_annotation_reports(
         number_of_tasks_contributed_for_ar_wer,
         number_of_tasks_contributed_for_as_wer,
         number_of_tasks_contributed_for_ar_bleu,
-        number_of_tasks_contributed_for_as_bleu,
     ) = (
         0,
         0,
         0,
-        0,
     )
-    ar_wer_score, as_wer_score, ar_bleu_score, as_bleu_score = 0, 0, 0, 0
+    ar_wer_score, as_wer_score, ar_bleu_score = 0, 0, 0
     tasks_and_rejection_count_map_ar, number_of_tasks_that_has_review_annotations = (
         {},
         0,
@@ -132,9 +130,9 @@ def get_all_annotation_reports(
                         "annotation_result1": rev_ann.result,
                         "annotation_result2": ann_ann.result,
                     }
-                    ar_bleu_score += s1.calculate_bleu_score(sampleRequest).data[
-                        "bleu_score"
-                    ]
+                    ar_bleu_score += float(
+                        s1.calculate_bleu_score(sampleRequest).data["bleu_score"]
+                    )
                     number_of_tasks_contributed_for_ar_bleu += 1
                 except Exception as e:
                     pass
@@ -147,18 +145,6 @@ def get_all_annotation_reports(
                     as_done = True
                 except Exception as e:
                     pass
-                try:
-                    s1 = SentenceOperationViewSet()
-                    sampleRequest = {
-                        "annotation_result1": sup_ann.result,
-                        "annotation_result2": ann_ann.result,
-                    }
-                    as_bleu_score += s1.calculate_bleu_score(sampleRequest).data[
-                        "bleu_score"
-                    ]
-                    number_of_tasks_contributed_for_as_bleu += 1
-                except Exception as e:
-                    pass
 
     submitted_tasks_count = submitted_tasks.count()
 
@@ -235,10 +221,6 @@ def get_all_annotation_reports(
         / number_of_tasks_contributed_for_ar_bleu
         if number_of_tasks_contributed_for_ar_bleu
         else 0,
-        "Average Bleu Score Annotator Vs Superchecker": as_bleu_score
-        / number_of_tasks_contributed_for_as_bleu
-        if number_of_tasks_contributed_for_as_bleu
-        else 0,
         "Average Rejection Count Annotator Vs Reviewer": cumulative_rejection_score_ar
         / number_of_tasks_that_has_review_annotations
         if number_of_tasks_that_has_review_annotations
@@ -365,9 +347,9 @@ def get_all_review_reports(
                         "annotation_result1": sup_ann.result,
                         "annotation_result2": rev_ann.result,
                     }
-                    rs_bleu_score += s1.calculate_bleu_score(sampleRequest).data[
-                        "bleu_score"
-                    ]
+                    rs_bleu_score += float(
+                        s1.calculate_bleu_score(sampleRequest).data["bleu_score"]
+                    )
                     number_of_tasks_contributed_for_rs_bleu += 1
                 except Exception as e:
                     pass

From d2b11bd7c5aa752a83737ad56f3b2ad73777fd0c Mon Sep 17 00:00:00 2001
From: Kunal Tiwary <kunaltiwary7@gmail.com>
Date: Tue, 16 Jul 2024 12:30:05 +0000
Subject: [PATCH 12/12] fix for org reports

---
 backend/organizations/tasks.py | 32 +++++++-------------------------
 1 file changed, 7 insertions(+), 25 deletions(-)

diff --git a/backend/organizations/tasks.py b/backend/organizations/tasks.py
index 85f329735..b9141e82b 100644
--- a/backend/organizations/tasks.py
+++ b/backend/organizations/tasks.py
@@ -78,14 +78,12 @@ def get_all_annotation_reports(
         number_of_tasks_contributed_for_ar_wer,
         number_of_tasks_contributed_for_as_wer,
         number_of_tasks_contributed_for_ar_bleu,
-        number_of_tasks_contributed_for_as_bleu,
     ) = (
         0,
         0,
         0,
-        0,
     )
-    ar_wer_score, as_wer_score, ar_bleu_score, as_bleu_score = 0, 0, 0, 0
+    ar_wer_score, as_wer_score, ar_bleu_score = 0, 0, 0
     tasks_and_rejection_count_map_ar, number_of_tasks_that_has_review_annotations = (
         {},
         0,
@@ -134,9 +132,9 @@ def get_all_annotation_reports(
                         "annotation_result1": rev_ann.result,
                         "annotation_result2": ann_ann.result,
                     }
-                    ar_bleu_score += s1.calculate_bleu_score(sampleRequest).data[
-                        "bleu_score"
-                    ]
+                    ar_bleu_score += float(
+                        s1.calculate_bleu_score(sampleRequest).data["bleu_score"]
+                    )
                     number_of_tasks_contributed_for_ar_bleu += 1
                 except Exception as e:
                     pass
@@ -149,18 +147,6 @@ def get_all_annotation_reports(
                     as_done = True
                 except Exception as e:
                     pass
-                try:
-                    s1 = SentenceOperationViewSet()
-                    sampleRequest = {
-                        "annotation_result1": sup_ann.result,
-                        "annotation_result2": ann_ann.result,
-                    }
-                    as_bleu_score += s1.calculate_bleu_score(sampleRequest).data[
-                        "bleu_score"
-                    ]
-                    number_of_tasks_contributed_for_as_bleu += 1
-                except Exception as e:
-                    pass
 
     submitted_tasks_count = submitted_tasks.count()
 
@@ -236,10 +222,6 @@ def get_all_annotation_reports(
         / number_of_tasks_contributed_for_ar_bleu
         if number_of_tasks_contributed_for_ar_bleu
         else 0,
-        "Average Bleu Score Annotator Vs Superchecker": as_bleu_score
-        / number_of_tasks_contributed_for_as_bleu
-        if number_of_tasks_contributed_for_as_bleu
-        else 0,
         "Average Rejection Count Annotator Vs Reviewer": cumulative_rejection_score_ar
         / number_of_tasks_that_has_review_annotations
         if number_of_tasks_that_has_review_annotations
@@ -366,9 +348,9 @@ def get_all_review_reports(
                         "annotation_result1": sup_ann.result,
                         "annotation_result2": rev_ann.result,
                     }
-                    rs_bleu_score += s1.calculate_bleu_score(sampleRequest).data[
-                        "bleu_score"
-                    ]
+                    rs_bleu_score += float(
+                        s1.calculate_bleu_score(sampleRequest).data["bleu_score"]
+                    )
                     number_of_tasks_contributed_for_rs_bleu += 1
                 except Exception as e:
                     pass