AI4Bharat
diff --git a/‎backend/dataset/views.py
Lines changed: 2 additions & 0 deletions b/‎backend/dataset/views.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎backend/functions/tasks.py
Lines changed: 6 additions & 2 deletions b/‎backend/functions/tasks.py
Lines changed: 6 additions & 2 deletions
diff --git a/‎backend/organizations/tasks.py
Lines changed: 214 additions & 3 deletions b/‎backend/organizations/tasks.py
Lines changed: 214 additions & 3 deletions
@@ -1038,6 +1038,7 @@ def project_analytics(self, request, pk=None):
                                 calculate_word_error_rate_between_two_audio_transcription_annotation(
                                     review_annotation.result,
                                     review_annotation.parent_annotation.result,
+                                    project_type,
                                 )
                             )
                         except:
@@ -1067,6 +1068,7 @@ def project_analytics(self, request, pk=None):
                                 calculate_word_error_rate_between_two_audio_transcription_annotation(
                                     supercheck_annotation.result,
                                     supercheck_annotation.parent_annotation.result,
+                                    project_type,
                                 )
                             )
                         except:
 
@@ -1372,6 +1372,7 @@ def get_stats_helper(
                             get_most_recent_annotation(
                                 ann_obj.parent_annotation
                             ).result,
+                            project_type,
                         )
                     )
                 except Exception as error:
@@ -1426,6 +1427,7 @@ def get_stats_helper(
                             get_most_recent_annotation(
                                 ann_obj.parent_annotation.parent_annotation
                             ).result,
+                            project_type,
                         )
                     )
                 except Exception as error:
@@ -1437,6 +1439,7 @@ def get_stats_helper(
                             get_most_recent_annotation(
                                 ann_obj.parent_annotation
                             ).result,
+                            project_type,
                         )
                     )
                 except Exception as error:
@@ -1448,6 +1451,7 @@ def get_stats_helper(
                             get_most_recent_annotation(
                                 ann_obj.parent_annotation.parent_annotation
                             ).result,
+                            project_type,
                         )
                     )
                 except Exception as error:
@@ -1519,10 +1523,10 @@ def calculate_ced_between_two_annotations(annotation1, annotation2):
     return ced_list
 
 
-def calculate_wer_between_two_annotations(annotation1, annotation2):
+def calculate_wer_between_two_annotations(annotation1, annotation2, project_type):
     try:
         return calculate_word_error_rate_between_two_audio_transcription_annotation(
-            annotation1, annotation2
+            annotation1, annotation2, project_type
         )
     except Exception as e:
         return 0
 
@@ -13,6 +13,11 @@
     ANNOTATOR_ANNOTATION,
     REVIEWER_ANNOTATION,
     SUPER_CHECKER_ANNOTATION,
+    ACCEPTED,
+    ACCEPTED_WITH_MINOR_CHANGES,
+    ACCEPTED_WITH_MAJOR_CHANGES,
+    VALIDATED,
+    VALIDATED_WITH_CHANGES,
 )
 from .models import Organization
 from users.models import User
@@ -24,6 +29,7 @@
     get_audio_transcription_duration,
     get_audio_segments_count,
     ocr_word_count,
+    calculate_word_error_rate_between_two_audio_transcription_annotation,
 )
 from workspaces.tasks import (
     un_pack_annotation_tasks,
@@ -69,6 +75,79 @@ def get_all_annotation_reports(
             completed_by=userid,
             updated_at__range=[start_date, end_date],
         )
+    (
+        number_of_tasks_contributed_for_ar_wer,
+        number_of_tasks_contributed_for_as_wer,
+        number_of_tasks_contributed_for_ar_bleu,
+    ) = (
+        0,
+        0,
+        0,
+    )
+    ar_wer_score, as_wer_score, ar_bleu_score = 0, 0, 0
+    tasks_and_rejection_count_map_ar, number_of_tasks_that_has_review_annotations = (
+        {},
+        0,
+    )
+    for ann in submitted_tasks:
+        all_annotations = Annotation.objects.filter(task_id=ann.task_id)
+        try:
+            task = ann.task
+            revision_loop_count = task.revision_loop_count
+            r_count = revision_loop_count["review_count"]
+            tasks_and_rejection_count_map_ar[r_count] = (
+                tasks_and_rejection_count_map_ar.get(r_count, 0) + 1
+            )
+        except Exception as e:
+            pass
+        ar_done, as_done = False, False
+        ann_ann, rev_ann, sup_ann = "", "", ""
+        for a in all_annotations:
+            if a.annotation_type == REVIEWER_ANNOTATION and a.annotation_status in [
+                ACCEPTED,
+                ACCEPTED_WITH_MINOR_CHANGES,
+                ACCEPTED_WITH_MAJOR_CHANGES,
+            ]:
+                rev_ann = a
+            elif (
+                a.annotation_type == SUPER_CHECKER_ANNOTATION
+                and a.annotation_status in [VALIDATED, VALIDATED_WITH_CHANGES]
+            ):
+                sup_ann = a
+            elif a.annotation_type == ANNOTATOR_ANNOTATION:
+                ann_ann = a
+            if a.annotation_type == REVIEWER_ANNOTATION:
+                number_of_tasks_that_has_review_annotations += 1
+            if ann_ann and rev_ann and not ar_done:
+                try:
+                    ar_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
+                        rev_ann.result, ann_ann.result, project_type
+                    )
+                    number_of_tasks_contributed_for_ar_wer += 1
+                    ar_done = True
+                except Exception as e:
+                    pass
+                try:
+                    s1 = SentenceOperationViewSet()
+                    sampleRequest = {
+                        "annotation_result1": rev_ann.result,
+                        "annotation_result2": ann_ann.result,
+                    }
+                    ar_bleu_score += float(
+                        s1.calculate_bleu_score(sampleRequest).data["bleu_score"]
+                    )
+                    number_of_tasks_contributed_for_ar_bleu += 1
+                except Exception as e:
+                    pass
+            if ann_ann and sup_ann and not as_done:
+                try:
+                    as_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
+                        sup_ann.result, ann_ann.result, project_type
+                    )
+                    number_of_tasks_contributed_for_as_wer += 1
+                    as_done = True
+                except Exception as e:
+                    pass
 
     submitted_tasks_count = submitted_tasks.count()
 
@@ -111,7 +190,10 @@ def get_all_annotation_reports(
     total_raw_audio_duration = convert_seconds_to_hours(
         sum(total_raw_audio_duration_list)
     )
-
+    cumulative_rejection_score_ar = 0
+    if tasks_and_rejection_count_map_ar:
+        for task, rc in tasks_and_rejection_count_map_ar.items():
+            cumulative_rejection_score_ar += task * rc
     result = {
         "Name": userName,
         "Email": email,
@@ -123,6 +205,28 @@ def get_all_annotation_reports(
         "Word Count": total_word_count,
         "Submitted Tasks": submitted_tasks_count,
         "Language": user_lang,
+        "Average Word Error Rate Annotator Vs Reviewer": ar_wer_score
+        / number_of_tasks_contributed_for_ar_wer
+        if number_of_tasks_contributed_for_ar_wer
+        else 0,
+        "Cumulative Word Error Rate Annotator Vs Reviewer": ar_wer_score
+        if number_of_tasks_contributed_for_ar_wer
+        else 0,
+        "Average Word Error Rate Annotator Vs Superchecker": as_wer_score
+        / number_of_tasks_contributed_for_as_wer
+        if number_of_tasks_contributed_for_as_wer
+        else 0,
+        "Cumulative Word Error Rate Annotator Vs Superchecker": as_wer_score
+        if number_of_tasks_contributed_for_as_wer
+        else 0,
+        "Average Bleu Score Annotator Vs Reviewer": ar_bleu_score
+        / number_of_tasks_contributed_for_ar_bleu
+        if number_of_tasks_contributed_for_ar_bleu
+        else 0,
+        "Average Rejection Count Annotator Vs Reviewer": cumulative_rejection_score_ar
+        / number_of_tasks_that_has_review_annotations
+        if number_of_tasks_that_has_review_annotations
+        else 0,
     }
 
     if project_type in get_audio_project_types() or project_type == "AllAudioProjects":
@@ -190,7 +294,67 @@ def get_all_review_reports(
             annotation_type=REVIEWER_ANNOTATION,
             updated_at__range=[start_date, end_date],
         )
-
+    number_of_tasks_contributed_for_rs_wer, number_of_tasks_contributed_for_rs_bleu = (
+        0,
+        0,
+    )
+    rs_wer_score, rs_bleu_score = 0, 0
+    (
+        tasks_and_rejection_count_map_ar,
+        tasks_and_rejection_count_map_rs,
+        number_of_tasks_that_has_sup_annotations,
+    ) = ({}, {}, 0)
+    for ann in submitted_tasks:
+        all_annotations = Annotation.objects.filter(task_id=ann.task_id)
+        task = ann.task
+        revision_loop_count = task.revision_loop_count
+        try:
+            r_count = revision_loop_count["review_count"]
+            tasks_and_rejection_count_map_ar[r_count] = (
+                tasks_and_rejection_count_map_ar.get(r_count, 0) + 1
+            )
+        except Exception as e:
+            pass
+        try:
+            s_count = revision_loop_count["super_check_count"]
+            tasks_and_rejection_count_map_rs[s_count] = (
+                tasks_and_rejection_count_map_rs.get(s_count, 0) + 1
+            )
+        except Exception as e:
+            pass
+        rs_done = False  # for duplicate annotations
+        sup_ann, rev_ann = "", ""
+        for a in all_annotations:
+            if (
+                a.annotation_type == SUPER_CHECKER_ANNOTATION
+                and a.annotation_status in [VALIDATED, VALIDATED_WITH_CHANGES]
+            ):
+                sup_ann = a
+            elif a.annotation_type == REVIEWER_ANNOTATION:
+                rev_ann = a
+            if a.annotation_type == SUPER_CHECKER_ANNOTATION:
+                number_of_tasks_that_has_sup_annotations += 1
+            if rev_ann and sup_ann and not rs_done:
+                try:
+                    rs_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
+                        sup_ann.result, rev_ann.result, project_type
+                    )
+                    number_of_tasks_contributed_for_rs_wer += 1
+                    rs_done = True
+                except Exception as e:
+                    pass
+                try:
+                    s1 = SentenceOperationViewSet()
+                    sampleRequest = {
+                        "annotation_result1": sup_ann.result,
+                        "annotation_result2": rev_ann.result,
+                    }
+                    rs_bleu_score += float(
+                        s1.calculate_bleu_score(sampleRequest).data["bleu_score"]
+                    )
+                    number_of_tasks_contributed_for_rs_bleu += 1
+                except Exception as e:
+                    pass
     submitted_tasks_count = submitted_tasks.count()
 
     project_type_lower = project_type.lower()
@@ -232,6 +396,15 @@ def get_all_review_reports(
     total_raw_audio_duration = convert_seconds_to_hours(
         sum(total_raw_audio_duration_list)
     )
+    cumulative_rejection_score_ar = 0
+    if tasks_and_rejection_count_map_ar:
+        for task, rc in tasks_and_rejection_count_map_ar.items():
+            cumulative_rejection_score_ar += task * rc
+
+    cumulative_rejection_score_rs = 0
+    if tasks_and_rejection_count_map_rs:
+        for task, rc in tasks_and_rejection_count_map_rs.items():
+            cumulative_rejection_score_rs += task * rc
 
     result = {
         "Name": userName,
@@ -244,6 +417,25 @@ def get_all_review_reports(
         "Word Count": total_word_count,
         "Submitted Tasks": submitted_tasks_count,
         "Language": user_lang,
+        "Average Word Error Rate Reviewer Vs Superchecker": rs_wer_score
+        / number_of_tasks_contributed_for_rs_wer
+        if number_of_tasks_contributed_for_rs_wer
+        else 0,
+        "Cumulative Word Error Rate Reviewer Vs Superchecker": rs_wer_score
+        if number_of_tasks_contributed_for_rs_wer
+        else 0,
+        "Average Bleu Score Reviewer Vs Superchecker": rs_bleu_score
+        / number_of_tasks_contributed_for_rs_bleu
+        if number_of_tasks_contributed_for_rs_bleu
+        else 0,
+        "Average Rejection Count Annotator Vs Reviewer": cumulative_rejection_score_ar
+        / submitted_tasks_count
+        if submitted_tasks_count
+        else 0,
+        "Average Rejection Count Reviewer Vs Superchecker": cumulative_rejection_score_rs
+        / number_of_tasks_that_has_sup_annotations
+        if number_of_tasks_that_has_sup_annotations
+        else 0,
     }
 
     if project_type in get_audio_project_types() or project_type == "AllAudioProjects":
@@ -296,7 +488,17 @@ def get_all_supercheck_reports(
             annotation_type=SUPER_CHECKER_ANNOTATION,
             updated_at__range=[start_date, end_date],
         )
-
+    tasks_and_rejection_count_map_rs = {}
+    for ann in submitted_tasks:
+        task = ann.task
+        revision_loop_count = task.revision_loop_count
+        try:
+            s_count = revision_loop_count["super_check_count"]
+            tasks_and_rejection_count_map_rs[s_count] = (
+                tasks_and_rejection_count_map_rs.get(s_count, 0) + 1
+            )
+        except Exception as e:
+            pass
     submitted_tasks_count = submitted_tasks.count()
 
     project_type_lower = project_type.lower()
@@ -342,6 +544,10 @@ def get_all_supercheck_reports(
     validated_raw_audio_duration = convert_seconds_to_hours(
         sum(validated_raw_audio_duration_list)
     )
+    cumulative_rejection_score_rs = 0
+    if tasks_and_rejection_count_map_rs:
+        for task, rc in tasks_and_rejection_count_map_rs.items():
+            cumulative_rejection_score_rs += task * rc
 
     result = {
         "Name": userName,
@@ -354,6 +560,10 @@ def get_all_supercheck_reports(
         "Word Count": validated_word_count,
         "Submitted Tasks": submitted_tasks_count,
         "Language": user_lang,
+        "Average Rejection Count Reviewer Vs Superchecker": cumulative_rejection_score_rs
+        / submitted_tasks_count
+        if submitted_tasks_count
+        else 0,
     }
 
     if project_type in get_audio_project_types() or project_type == "AllAudioProjects":
@@ -513,6 +723,7 @@ def send_user_reports_mail_org(
     final_reports = sorted(final_reports, key=lambda x: x["Name"], reverse=False)
 
     df = pd.DataFrame.from_dict(final_reports)
+    df = df.fillna("NA")
 
     content = df.to_csv(index=False)
     content_type = "text/csv"
Original file line number	Diff line number	Diff line change
`@@ -1038,6 +1038,7 @@ def project_analytics(self, request, pk=None):`
`1038`	`1038`	`calculate_word_error_rate_between_two_audio_transcription_annotation(`
`1039`	`1039`	`review_annotation.result,`
`1040`	`1040`	`review_annotation.parent_annotation.result,`
	`1041`	`+ project_type,`
`1041`	`1042`	`)`
`1042`	`1043`	`)`
`1043`	`1044`	`except:`
`@@ -1067,6 +1068,7 @@ def project_analytics(self, request, pk=None):`
`1067`	`1068`	`calculate_word_error_rate_between_two_audio_transcription_annotation(`
`1068`	`1069`	`supercheck_annotation.result,`
`1069`	`1070`	`supercheck_annotation.parent_annotation.result,`
	`1071`	`+ project_type,`
`1070`	`1072`	`)`
`1071`	`1073`	`)`
`1072`	`1074`	`except:`
Original file line number	Diff line number	Diff line change
`@@ -1372,6 +1372,7 @@ def get_stats_helper(`
`1372`	`1372`	`get_most_recent_annotation(`
`1373`	`1373`	`ann_obj.parent_annotation`
`1374`	`1374`	`).result,`
	`1375`	`+ project_type,`
`1375`	`1376`	`)`
`1376`	`1377`	`)`
`1377`	`1378`	`except Exception as error:`
`@@ -1426,6 +1427,7 @@ def get_stats_helper(`
`1426`	`1427`	`get_most_recent_annotation(`
`1427`	`1428`	`ann_obj.parent_annotation.parent_annotation`
`1428`	`1429`	`).result,`
	`1430`	`+ project_type,`
`1429`	`1431`	`)`
`1430`	`1432`	`)`
`1431`	`1433`	`except Exception as error:`
`@@ -1437,6 +1439,7 @@ def get_stats_helper(`
`1437`	`1439`	`get_most_recent_annotation(`
`1438`	`1440`	`ann_obj.parent_annotation`
`1439`	`1441`	`).result,`
	`1442`	`+ project_type,`
`1440`	`1443`	`)`
`1441`	`1444`	`)`
`1442`	`1445`	`except Exception as error:`
`@@ -1448,6 +1451,7 @@ def get_stats_helper(`
`1448`	`1451`	`get_most_recent_annotation(`
`1449`	`1452`	`ann_obj.parent_annotation.parent_annotation`
`1450`	`1453`	`).result,`
	`1454`	`+ project_type,`
`1451`	`1455`	`)`
`1452`	`1456`	`)`
`1453`	`1457`	`except Exception as error:`
`@@ -1519,10 +1523,10 @@ def calculate_ced_between_two_annotations(annotation1, annotation2):`
`1519`	`1523`	`return ced_list`
`1520`	`1524`
`1521`	`1525`
`1522`		`-def calculate_wer_between_two_annotations(annotation1, annotation2):`
	`1526`	`+def calculate_wer_between_two_annotations(annotation1, annotation2, project_type):`
`1523`	`1527`	`try:`
`1524`	`1528`	`return calculate_word_error_rate_between_two_audio_transcription_annotation(`
`1525`		`- annotation1, annotation2`
	`1529`	`+ annotation1, annotation2, project_type`
`1526`	`1530`	`)`
`1527`	`1531`	`except Exception as e:`
`1528`	`1532`	`return 0`