AI4Bharat · aparna-aa · Jul 26, 2024 · May 6, 2024 · Jul 8, 2024 · Jul 8, 2024
diff --git a/backend/dataset/views.py b/backend/dataset/views.py
@@ -1038,6 +1038,7 @@ def project_analytics(self, request, pk=None):
                                 calculate_word_error_rate_between_two_audio_transcription_annotation(
                                     review_annotation.result,
                                     review_annotation.parent_annotation.result,
+                                    project_type,
                                 )
                             )
                         except:
@@ -1067,6 +1068,7 @@ def project_analytics(self, request, pk=None):
                                 calculate_word_error_rate_between_two_audio_transcription_annotation(
                                     supercheck_annotation.result,
                                     supercheck_annotation.parent_annotation.result,
+                                    project_type,
                                 )
                             )
                         except:

diff --git a/backend/functions/tasks.py b/backend/functions/tasks.py
@@ -1372,6 +1372,7 @@ def get_stats_helper(
                             get_most_recent_annotation(
                                 ann_obj.parent_annotation
                             ).result,
+                            project_type,
                         )
                     )
                 except Exception as error:
@@ -1426,6 +1427,7 @@ def get_stats_helper(
                             get_most_recent_annotation(
                                 ann_obj.parent_annotation.parent_annotation
                             ).result,
+                            project_type,
                         )
                     )
                 except Exception as error:
@@ -1437,6 +1439,7 @@ def get_stats_helper(
                             get_most_recent_annotation(
                                 ann_obj.parent_annotation
                             ).result,
+                            project_type,
                         )
                     )
                 except Exception as error:
@@ -1448,6 +1451,7 @@ def get_stats_helper(
                             get_most_recent_annotation(
                                 ann_obj.parent_annotation.parent_annotation
                             ).result,
+                            project_type,
                         )
                     )
                 except Exception as error:
@@ -1519,10 +1523,10 @@ def calculate_ced_between_two_annotations(annotation1, annotation2):
     return ced_list
 
 
-def calculate_wer_between_two_annotations(annotation1, annotation2):
+def calculate_wer_between_two_annotations(annotation1, annotation2, project_type):
     try:
         return calculate_word_error_rate_between_two_audio_transcription_annotation(
-            annotation1, annotation2
+            annotation1, annotation2, project_type
         )
     except Exception as e:
         return 0

diff --git a/backend/organizations/tasks.py b/backend/organizations/tasks.py
@@ -13,6 +13,11 @@
     ANNOTATOR_ANNOTATION,
     REVIEWER_ANNOTATION,
     SUPER_CHECKER_ANNOTATION,
+    ACCEPTED,
+    ACCEPTED_WITH_MINOR_CHANGES,
+    ACCEPTED_WITH_MAJOR_CHANGES,
+    VALIDATED,
+    VALIDATED_WITH_CHANGES,
 )
 from .models import Organization
 from users.models import User
@@ -24,6 +29,7 @@
     get_audio_transcription_duration,
     get_audio_segments_count,
     ocr_word_count,
+    calculate_word_error_rate_between_two_audio_transcription_annotation,
 )
 from workspaces.tasks import (
     un_pack_annotation_tasks,
@@ -69,6 +75,79 @@ def get_all_annotation_reports(
             completed_by=userid,
             updated_at__range=[start_date, end_date],
         )
+    (
+        number_of_tasks_contributed_for_ar_wer,
+        number_of_tasks_contributed_for_as_wer,
+        number_of_tasks_contributed_for_ar_bleu,
+    ) = (
+        0,
+        0,
+        0,
+    )
+    ar_wer_score, as_wer_score, ar_bleu_score = 0, 0, 0
+    tasks_and_rejection_count_map_ar, number_of_tasks_that_has_review_annotations = (
+        {},
+        0,
+    )
+    for ann in submitted_tasks:
+        all_annotations = Annotation.objects.filter(task_id=ann.task_id)
+        try:
+            task = ann.task
+            revision_loop_count = task.revision_loop_count
+            r_count = revision_loop_count["review_count"]
+            tasks_and_rejection_count_map_ar[r_count] = (
+                tasks_and_rejection_count_map_ar.get(r_count, 0) + 1
+            )
+        except Exception as e:
+            pass
+        ar_done, as_done = False, False
+        ann_ann, rev_ann, sup_ann = "", "", ""
+        for a in all_annotations:
+            if a.annotation_type == REVIEWER_ANNOTATION and a.annotation_status in [
+                ACCEPTED,
+                ACCEPTED_WITH_MINOR_CHANGES,
+                ACCEPTED_WITH_MAJOR_CHANGES,
+            ]:
+                rev_ann = a
+            elif (
+                a.annotation_type == SUPER_CHECKER_ANNOTATION
+                and a.annotation_status in [VALIDATED, VALIDATED_WITH_CHANGES]
+            ):
+                sup_ann = a
+            elif a.annotation_type == ANNOTATOR_ANNOTATION:
+                ann_ann = a
+            if a.annotation_type == REVIEWER_ANNOTATION:
+                number_of_tasks_that_has_review_annotations += 1
+            if ann_ann and rev_ann and not ar_done:
+                try:
+                    ar_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
+                        rev_ann.result, ann_ann.result, project_type
+                    )
+                    number_of_tasks_contributed_for_ar_wer += 1
+                    ar_done = True
+                except Exception as e:
+                    pass
+                try:
+                    s1 = SentenceOperationViewSet()
+                    sampleRequest = {
+                        "annotation_result1": rev_ann.result,
+                        "annotation_result2": ann_ann.result,
+                    }
+                    ar_bleu_score += float(
+                        s1.calculate_bleu_score(sampleRequest).data["bleu_score"]
+                    )
+                    number_of_tasks_contributed_for_ar_bleu += 1
+                except Exception as e:
+                    pass
+            if ann_ann and sup_ann and not as_done:
+                try:
+                    as_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
+                        sup_ann.result, ann_ann.result, project_type
+                    )
+                    number_of_tasks_contributed_for_as_wer += 1
+                    as_done = True
+                except Exception as e:
+                    pass
 
     submitted_tasks_count = submitted_tasks.count()
 
@@ -111,7 +190,10 @@ def get_all_annotation_reports(
     total_raw_audio_duration = convert_seconds_to_hours(
         sum(total_raw_audio_duration_list)
     )
-
+    cumulative_rejection_score_ar = 0
+    if tasks_and_rejection_count_map_ar:
+        for task, rc in tasks_and_rejection_count_map_ar.items():
+            cumulative_rejection_score_ar += task * rc
     result = {
         "Name": userName,
         "Email": email,
@@ -123,6 +205,28 @@ def get_all_annotation_reports(
         "Word Count": total_word_count,
         "Submitted Tasks": submitted_tasks_count,
         "Language": user_lang,
+        "Average Word Error Rate Annotator Vs Reviewer": ar_wer_score
+        / number_of_tasks_contributed_for_ar_wer
+        if number_of_tasks_contributed_for_ar_wer
+        else 0,
+        "Cumulative Word Error Rate Annotator Vs Reviewer": ar_wer_score
+        if number_of_tasks_contributed_for_ar_wer
+        else 0,
+        "Average Word Error Rate Annotator Vs Superchecker": as_wer_score
+        / number_of_tasks_contributed_for_as_wer
+        if number_of_tasks_contributed_for_as_wer
+        else 0,
+        "Cumulative Word Error Rate Annotator Vs Superchecker": as_wer_score
+        if number_of_tasks_contributed_for_as_wer
+        else 0,
+        "Average Bleu Score Annotator Vs Reviewer": ar_bleu_score
+        / number_of_tasks_contributed_for_ar_bleu
+        if number_of_tasks_contributed_for_ar_bleu
+        else 0,
+        "Average Rejection Count Annotator Vs Reviewer": cumulative_rejection_score_ar
+        / number_of_tasks_that_has_review_annotations
+        if number_of_tasks_that_has_review_annotations
+        else 0,
     }
 
     if project_type in get_audio_project_types() or project_type == "AllAudioProjects":
@@ -190,7 +294,67 @@ def get_all_review_reports(
             annotation_type=REVIEWER_ANNOTATION,
             updated_at__range=[start_date, end_date],
         )
-
+    number_of_tasks_contributed_for_rs_wer, number_of_tasks_contributed_for_rs_bleu = (
+        0,
+        0,
+    )
+    rs_wer_score, rs_bleu_score = 0, 0
+    (
+        tasks_and_rejection_count_map_ar,
+        tasks_and_rejection_count_map_rs,
+        number_of_tasks_that_has_sup_annotations,
+    ) = ({}, {}, 0)
+    for ann in submitted_tasks:
+        all_annotations = Annotation.objects.filter(task_id=ann.task_id)
+        task = ann.task
+        revision_loop_count = task.revision_loop_count
+        try:
+            r_count = revision_loop_count["review_count"]
+            tasks_and_rejection_count_map_ar[r_count] = (
+                tasks_and_rejection_count_map_ar.get(r_count, 0) + 1
+            )
+        except Exception as e:
+            pass
+        try:
+            s_count = revision_loop_count["super_check_count"]
+            tasks_and_rejection_count_map_rs[s_count] = (
+                tasks_and_rejection_count_map_rs.get(s_count, 0) + 1
+            )
+        except Exception as e:
+            pass
+        rs_done = False  # for duplicate annotations
+        sup_ann, rev_ann = "", ""
+        for a in all_annotations:
+            if (
+                a.annotation_type == SUPER_CHECKER_ANNOTATION
+                and a.annotation_status in [VALIDATED, VALIDATED_WITH_CHANGES]
+            ):
+                sup_ann = a
+            elif a.annotation_type == REVIEWER_ANNOTATION:
+                rev_ann = a
+            if a.annotation_type == SUPER_CHECKER_ANNOTATION:
+                number_of_tasks_that_has_sup_annotations += 1
+            if rev_ann and sup_ann and not rs_done:
+                try:
+                    rs_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
+                        sup_ann.result, rev_ann.result, project_type
+                    )
+                    number_of_tasks_contributed_for_rs_wer += 1
+                    rs_done = True
+                except Exception as e:
+                    pass
+                try:
+                    s1 = SentenceOperationViewSet()
+                    sampleRequest = {
+                        "annotation_result1": sup_ann.result,
+                        "annotation_result2": rev_ann.result,
+                    }
+                    rs_bleu_score += float(
+                        s1.calculate_bleu_score(sampleRequest).data["bleu_score"]
+                    )
+                    number_of_tasks_contributed_for_rs_bleu += 1
+                except Exception as e:
+                    pass
     submitted_tasks_count = submitted_tasks.count()
 
     project_type_lower = project_type.lower()
@@ -232,6 +396,15 @@ def get_all_review_reports(
     total_raw_audio_duration = convert_seconds_to_hours(
         sum(total_raw_audio_duration_list)
     )
+    cumulative_rejection_score_ar = 0
+    if tasks_and_rejection_count_map_ar:
+        for task, rc in tasks_and_rejection_count_map_ar.items():
+            cumulative_rejection_score_ar += task * rc
+
+    cumulative_rejection_score_rs = 0
+    if tasks_and_rejection_count_map_rs:
+        for task, rc in tasks_and_rejection_count_map_rs.items():
+            cumulative_rejection_score_rs += task * rc
 
     result = {
         "Name": userName,
@@ -244,6 +417,25 @@ def get_all_review_reports(
         "Word Count": total_word_count,
         "Submitted Tasks": submitted_tasks_count,
         "Language": user_lang,
+        "Average Word Error Rate Reviewer Vs Superchecker": rs_wer_score
+        / number_of_tasks_contributed_for_rs_wer
+        if number_of_tasks_contributed_for_rs_wer
+        else 0,
+        "Cumulative Word Error Rate Reviewer Vs Superchecker": rs_wer_score
+        if number_of_tasks_contributed_for_rs_wer
+        else 0,
+        "Average Bleu Score Reviewer Vs Superchecker": rs_bleu_score
+        / number_of_tasks_contributed_for_rs_bleu
+        if number_of_tasks_contributed_for_rs_bleu
+        else 0,
+        "Average Rejection Count Annotator Vs Reviewer": cumulative_rejection_score_ar
+        / submitted_tasks_count
+        if submitted_tasks_count
+        else 0,
+        "Average Rejection Count Reviewer Vs Superchecker": cumulative_rejection_score_rs
+        / number_of_tasks_that_has_sup_annotations
+        if number_of_tasks_that_has_sup_annotations
+        else 0,
     }
 
     if project_type in get_audio_project_types() or project_type == "AllAudioProjects":
@@ -296,7 +488,17 @@ def get_all_supercheck_reports(
             annotation_type=SUPER_CHECKER_ANNOTATION,
             updated_at__range=[start_date, end_date],
         )
-
+    tasks_and_rejection_count_map_rs = {}
+    for ann in submitted_tasks:
+        task = ann.task
+        revision_loop_count = task.revision_loop_count
+        try:
+            s_count = revision_loop_count["super_check_count"]
+            tasks_and_rejection_count_map_rs[s_count] = (
+                tasks_and_rejection_count_map_rs.get(s_count, 0) + 1
+            )
+        except Exception as e:
+            pass
     submitted_tasks_count = submitted_tasks.count()
 
     project_type_lower = project_type.lower()
@@ -342,6 +544,10 @@ def get_all_supercheck_reports(
     validated_raw_audio_duration = convert_seconds_to_hours(
         sum(validated_raw_audio_duration_list)
     )
+    cumulative_rejection_score_rs = 0
+    if tasks_and_rejection_count_map_rs:
+        for task, rc in tasks_and_rejection_count_map_rs.items():
+            cumulative_rejection_score_rs += task * rc
 
     result = {
         "Name": userName,
@@ -354,6 +560,10 @@ def get_all_supercheck_reports(
         "Word Count": validated_word_count,
         "Submitted Tasks": submitted_tasks_count,
         "Language": user_lang,
+        "Average Rejection Count Reviewer Vs Superchecker": cumulative_rejection_score_rs
+        / submitted_tasks_count
+        if submitted_tasks_count
+        else 0,
     }
 
     if project_type in get_audio_project_types() or project_type == "AllAudioProjects":
@@ -513,6 +723,7 @@ def send_user_reports_mail_org(
     final_reports = sorted(final_reports, key=lambda x: x["Name"], reverse=False)
 
     df = pd.DataFrame.from_dict(final_reports)
+    df = df.fillna("NA")
 
     content = df.to_csv(index=False)
     content_type = "text/csv"