From 77459a9eb70e3e1bd9c933d8d37fbc1f610a5c42 Mon Sep 17 00:00:00 2001 From: Ishvinder Sethi Date: Mon, 8 Jul 2024 15:22:35 +0530 Subject: [PATCH 01/12] Allow org owners to manipulate user details --- backend/users/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/users/views.py b/backend/users/views.py index 320a06901..64aa7709f 100644 --- a/backend/users/views.py +++ b/backend/users/views.py @@ -858,7 +858,7 @@ def user_profile_image_update(self, request, pk=None): @swagger_auto_schema(request_body=UserUpdateSerializer) @action(detail=True, methods=["patch"], url_path="edit_user_details") def user_details_update(self, request, pk=None): - if request.user.role != User.ADMIN: + if request.user.role not in [User.ADMIN, User.ORGANIZATION_OWNER]: return Response( {"message": "Not Authorized"}, status=status.HTTP_403_FORBIDDEN ) From 062dceec6903958d94ce731d51aad2bf52acd8df Mon Sep 17 00:00:00 2001 From: Ishvinder Sethi Date: Mon, 8 Jul 2024 15:25:04 +0530 Subject: [PATCH 02/12] Update views.py --- backend/users/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/users/views.py b/backend/users/views.py index 64aa7709f..b600a3dde 100644 --- a/backend/users/views.py +++ b/backend/users/views.py @@ -804,7 +804,7 @@ def enable_email(self, request): @swagger_auto_schema(responses={200: UserProfileSerializer, 403: "Not Authorized"}) @action(detail=False, methods=["get"], url_path="user_details") def user_details(self, request): - if request.user.role == User.ADMIN: + if request.user.role in [User.ADMIN, User.ORGANIZATION_OWNER]: user_details = User.objects.all() serializer = UserProfileSerializer(user_details, many=True) return Response(serializer.data, status=status.HTTP_200_OK) From 64df2db771b4d154d6017db7dfd5959acc841898 Mon Sep 17 00:00:00 2001 From: Kunal Tiwary Date: Mon, 8 Jul 2024 10:12:50 +0000 Subject: [PATCH 03/12] added wer scores --- backend/projects/utils.py | 4 +- backend/workspaces/tasks.py | 88 ++++++++++++++++++++++++++++++++++++- 2 files changed, 89 insertions(+), 3 deletions(-) diff --git a/backend/projects/utils.py b/backend/projects/utils.py index 9408d44ce..678515b47 100644 --- a/backend/projects/utils.py +++ b/backend/projects/utils.py @@ -222,7 +222,7 @@ def calculate_word_error_rate_between_two_audio_transcription_annotation( annotation_result2_text = "" for result in annotation_result1: - if result["from_name"] in ["transcribed_json", "verbatim_transcribed_json"]: + if "type" in result and result["type"] == "textarea": try: for s in result["value"]["text"]: annotation_result1_text += s @@ -230,7 +230,7 @@ def calculate_word_error_rate_between_two_audio_transcription_annotation( pass for result in annotation_result2: - if result["from_name"] in ["transcribed_json", "verbatim_transcribed_json"]: + if "type" in result and result["type"] == "textarea": try: for s in result["value"]["text"]: annotation_result2_text += s diff --git a/backend/workspaces/tasks.py b/backend/workspaces/tasks.py index e63273dea..9ffd8afb1 100644 --- a/backend/workspaces/tasks.py +++ b/backend/workspaces/tasks.py @@ -13,6 +13,11 @@ ANNOTATOR_ANNOTATION, REVIEWER_ANNOTATION, SUPER_CHECKER_ANNOTATION, + ACCEPTED, + ACCEPTED_WITH_MINOR_CHANGES, + ACCEPTED_WITH_MAJOR_CHANGES, + VALIDATED, + VALIDATED_WITH_CHANGES, ) from .models import Workspace from users.models import User @@ -66,6 +71,45 @@ def get_all_annotation_reports( completed_by=userid, updated_at__range=[start_date, end_date], ) + number_of_tasks_contributed_for_ar_wer, number_of_tasks_contributed_for_as_wer = ( + 0, + 0, + ) + ar_wer_score, as_wer_score = 0, 0 + for ann in submitted_tasks: + all_annotations = Annotation.objects.filter(task_id=ann.task_id) + ar_done, as_done = False, False # for duplicate annotations + for a in all_annotations: + rev_ann, sup_ann = "", "" + if a.annotation_type == REVIEWER_ANNOTATION and a.annotation_status in [ + ACCEPTED, + ACCEPTED_WITH_MINOR_CHANGES, + ACCEPTED_WITH_MAJOR_CHANGES, + ]: + rev_ann = a + elif ( + a.annotation_type == SUPER_CHECKER_ANNOTATION + and a.annotation_status in [VALIDATED, VALIDATED_WITH_CHANGES] + ): + sup_ann = a + if rev_ann and not ar_done: + try: + ar_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation( + rev_ann.result, ann.result + ) + number_of_tasks_contributed_for_ar_wer += 1 + ar_done = True + except Exception as e: + pass + if sup_ann and not as_done: + try: + as_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation( + sup_ann.result, ann.result + ) + number_of_tasks_contributed_for_as_wer += 1 + as_done = True + except Exception as e: + pass submitted_tasks_count = submitted_tasks.count() @@ -120,6 +164,20 @@ def get_all_annotation_reports( "Word Count": total_word_count, "Submitted Tasks": submitted_tasks_count, "Language": user_lang, + "Average Word Error Rate Annotator Vs Reviewer": ar_wer_score + / number_of_tasks_contributed_for_ar_wer + if number_of_tasks_contributed_for_ar_wer + else 0, + "Cumulative Word Error Rate Annotator Vs Reviewer": ar_wer_score + if number_of_tasks_contributed_for_ar_wer + else 0, + "Average Word Error Rate Annotator Vs Superchecker": as_wer_score + / number_of_tasks_contributed_for_as_wer + if number_of_tasks_contributed_for_as_wer + else 0, + "Cumulative Word Error Rate Annotator Vs Superchecker": as_wer_score + if number_of_tasks_contributed_for_as_wer + else 0, } if project_type in get_audio_project_types() or project_type == "AllAudioProjects": @@ -187,7 +245,27 @@ def get_all_review_reports( annotation_type=REVIEWER_ANNOTATION, updated_at__range=[start_date, end_date], ) - + number_of_tasks_contributed_for_rs_wer = 0 + rs_wer_score = 0 + for ann in submitted_tasks: + all_annotations = Annotation.objects.filter(task_id=ann.task_id) + rs_done = False # for duplicate annotations + for a in all_annotations: + sup_ann = "" + if ( + a.annotation_type == SUPER_CHECKER_ANNOTATION + and a.annotation_status in [VALIDATED, VALIDATED_WITH_CHANGES] + ): + sup_ann = a + if sup_ann and not rs_done: + try: + rs_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation( + sup_ann.result, ann.result + ) + number_of_tasks_contributed_for_rs_wer += 1 + rs_done = True + except Exception as e: + pass submitted_tasks_count = submitted_tasks.count() project_type_lower = project_type.lower() @@ -240,6 +318,13 @@ def get_all_review_reports( "Word Count": total_word_count, "Submitted Tasks": submitted_tasks_count, "Language": user_lang, + "Average Word Error Rate Annotator Vs Superchecker": rs_wer_score + / number_of_tasks_contributed_for_rs_wer + if number_of_tasks_contributed_for_rs_wer + else 0, + "Cumulative Word Error Rate Annotator Vs Superchecker": rs_wer_score + if number_of_tasks_contributed_for_rs_wer + else 0, } if project_type in get_audio_project_types() or project_type == "AllAudioProjects": @@ -509,6 +594,7 @@ def send_user_reports_mail_ws( final_reports = sorted(final_reports, key=lambda x: x["Name"], reverse=False) df = pd.DataFrame.from_dict(final_reports) + df = df.fillna("NA") content = df.to_csv(index=False) content_type = "text/csv" From 9589f3ed8ffaf0384616877d35c047f2ecb19b36 Mon Sep 17 00:00:00 2001 From: Kunal Tiwary Date: Mon, 8 Jul 2024 11:43:20 +0000 Subject: [PATCH 04/12] minor fix --- backend/projects/utils.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/backend/projects/utils.py b/backend/projects/utils.py index 678515b47..71c8fa982 100644 --- a/backend/projects/utils.py +++ b/backend/projects/utils.py @@ -215,8 +215,13 @@ def audio_word_count(annotation_result): def calculate_word_error_rate_between_two_audio_transcription_annotation( annotation_result1, annotation_result2 ): - annotation_result1 = sorted(annotation_result1, key=lambda i: (i["value"]["end"])) - annotation_result2 = sorted(annotation_result2, key=lambda i: (i["value"]["end"])) + if "end" in annotation_result1[0]["value"]: + annotation_result1 = sorted( + annotation_result1, key=lambda i: (i["value"]["end"]) + ) + annotation_result2 = sorted( + annotation_result2, key=lambda i: (i["value"]["end"]) + ) annotation_result1_text = "" annotation_result2_text = "" From 5345aede03fef89c79cba451c0f1540b5257e5f6 Mon Sep 17 00:00:00 2001 From: Kunal Tiwary Date: Wed, 10 Jul 2024 04:56:09 +0000 Subject: [PATCH 05/12] skipped the blank data tasks --- backend/projects/views.py | 55 +++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/backend/projects/views.py b/backend/projects/views.py index 579e26ee1..3ea5f5abd 100644 --- a/backend/projects/views.py +++ b/backend/projects/views.py @@ -4092,35 +4092,38 @@ def download(self, request, pk=None, *args, **kwargs): ) is_OCRSegmentCategorization = project_type == "OCRSegmentCategorization" for task in tasks: - curr_task = process_task( - task, - export_type, - include_input_data_metadata_json, - dataset_model, - is_audio_project_type, - ) - if ( - is_ConversationTranslation - or is_ConversationTranslationEditing - or is_ConversationVerification - ): - process_conversation_tasks( - curr_task, - is_ConversationTranslation, - is_ConversationVerification, + try: + curr_task = process_task( + task, + export_type, + include_input_data_metadata_json, + dataset_model, + is_audio_project_type, ) - elif dataset_type in ["SpeechConversation", "OCRDocument"]: - is_SpeechConversation = dataset_type == "SpeechConversation" - if is_SpeechConversation: - process_speech_tasks( - curr_task, is_AudioSegmentation, project_type - ) - else: - process_ocr_tasks( + if ( + is_ConversationTranslation + or is_ConversationTranslationEditing + or is_ConversationVerification + ): + process_conversation_tasks( curr_task, - is_OCRSegmentCategorization, - is_OCRSegmentCategorizationEditing, + is_ConversationTranslation, + is_ConversationVerification, ) + elif dataset_type in ["SpeechConversation", "OCRDocument"]: + is_SpeechConversation = dataset_type == "SpeechConversation" + if is_SpeechConversation: + process_speech_tasks( + curr_task, is_AudioSegmentation, project_type + ) + else: + process_ocr_tasks( + curr_task, + is_OCRSegmentCategorization, + is_OCRSegmentCategorizationEditing, + ) + except Exception as e: + continue tasks_list.append(curr_task) download_resources = True export_stream, content_type, filename = DataExport.generate_export_file( From efac1e1fc66bb308b0429f2f2ca3b5ec9e70a17d Mon Sep 17 00:00:00 2001 From: Kunal Tiwary Date: Wed, 10 Jul 2024 09:48:19 +0000 Subject: [PATCH 06/12] fix_calculate_word_error_rate_between_two_audio_transcription_annotation --- backend/dataset/views.py | 2 ++ backend/functions/tasks.py | 8 ++++-- backend/projects/utils.py | 50 +++++++++++++++++++++++++++++-------- backend/projects/views.py | 8 +++--- backend/workspaces/tasks.py | 20 ++++++++------- backend/workspaces/views.py | 2 ++ 6 files changed, 64 insertions(+), 26 deletions(-) diff --git a/backend/dataset/views.py b/backend/dataset/views.py index 5c9e1fa21..7e6b4227c 100644 --- a/backend/dataset/views.py +++ b/backend/dataset/views.py @@ -1038,6 +1038,7 @@ def project_analytics(self, request, pk=None): calculate_word_error_rate_between_two_audio_transcription_annotation( review_annotation.result, review_annotation.parent_annotation.result, + project_type, ) ) except: @@ -1067,6 +1068,7 @@ def project_analytics(self, request, pk=None): calculate_word_error_rate_between_two_audio_transcription_annotation( supercheck_annotation.result, supercheck_annotation.parent_annotation.result, + project_type, ) ) except: diff --git a/backend/functions/tasks.py b/backend/functions/tasks.py index c235ad1ca..f6bf51261 100644 --- a/backend/functions/tasks.py +++ b/backend/functions/tasks.py @@ -1371,6 +1371,7 @@ def get_stats_helper( get_most_recent_annotation( ann_obj.parent_annotation ).result, + project_type, ) ) except Exception as error: @@ -1425,6 +1426,7 @@ def get_stats_helper( get_most_recent_annotation( ann_obj.parent_annotation.parent_annotation ).result, + project_type, ) ) except Exception as error: @@ -1436,6 +1438,7 @@ def get_stats_helper( get_most_recent_annotation( ann_obj.parent_annotation ).result, + project_type, ) ) except Exception as error: @@ -1447,6 +1450,7 @@ def get_stats_helper( get_most_recent_annotation( ann_obj.parent_annotation.parent_annotation ).result, + project_type, ) ) except Exception as error: @@ -1518,10 +1522,10 @@ def calculate_ced_between_two_annotations(annotation1, annotation2): return ced_list -def calculate_wer_between_two_annotations(annotation1, annotation2): +def calculate_wer_between_two_annotations(annotation1, annotation2, project_type): try: return calculate_word_error_rate_between_two_audio_transcription_annotation( - annotation1, annotation2 + annotation1, annotation2, project_type ) except Exception as e: return 0 diff --git a/backend/projects/utils.py b/backend/projects/utils.py index 71c8fa982..ab1342162 100644 --- a/backend/projects/utils.py +++ b/backend/projects/utils.py @@ -213,7 +213,7 @@ def audio_word_count(annotation_result): def calculate_word_error_rate_between_two_audio_transcription_annotation( - annotation_result1, annotation_result2 + annotation_result1, annotation_result2, project_type ): if "end" in annotation_result1[0]["value"]: annotation_result1 = sorted( @@ -228,19 +228,47 @@ def calculate_word_error_rate_between_two_audio_transcription_annotation( for result in annotation_result1: if "type" in result and result["type"] == "textarea": - try: - for s in result["value"]["text"]: - annotation_result1_text += s - except: - pass + if project_type in [ + "AcousticNormalisedTranscriptionEditing", + "StandardizedTranscriptionEditing", + ]: + if ( + "from_name" in result + and result["from_name"] == "verbatim_transcribed_json" + ): + try: + for s in result["value"]["text"]: + annotation_result1_text += s + except: + pass + else: + try: + for s in result["value"]["text"]: + annotation_result1_text += s + except: + pass for result in annotation_result2: if "type" in result and result["type"] == "textarea": - try: - for s in result["value"]["text"]: - annotation_result2_text += s - except: - pass + if project_type in [ + "AcousticNormalisedTranscriptionEditing", + "StandardizedTranscriptionEditing", + ]: + if ( + "from_name" in result + and result["from_name"] == "verbatim_transcribed_json" + ): + try: + for s in result["value"]["text"]: + annotation_result1_text += s + except: + pass + else: + try: + for s in result["value"]["text"]: + annotation_result1_text += s + except: + pass if len(annotation_result1_text) == 0 or len(annotation_result2_text) == 0: return 0 return wer(annotation_result1_text, annotation_result2_text) diff --git a/backend/projects/views.py b/backend/projects/views.py index 3ea5f5abd..68e48c472 100644 --- a/backend/projects/views.py +++ b/backend/projects/views.py @@ -300,7 +300,7 @@ def get_review_reports(proj_id, userid, start_date, end_date): try: total_word_error_rate_rs_list.append( calculate_word_error_rate_between_two_audio_transcription_annotation( - anno.result, anno.parent_annotation.result + anno.result, anno.parent_annotation.result, proj_type ) ) except: @@ -309,7 +309,7 @@ def get_review_reports(proj_id, userid, start_date, end_date): try: total_word_error_rate_ar_list.append( calculate_word_error_rate_between_two_audio_transcription_annotation( - anno.result, anno.parent_annotation.result + anno.result, anno.parent_annotation.result, proj_type ) ) except: @@ -603,7 +603,7 @@ def get_supercheck_reports(proj_id, userid, start_date, end_date): try: total_word_error_rate_list.append( calculate_word_error_rate_between_two_audio_transcription_annotation( - anno.result, anno.parent_annotation.result + anno.result, anno.parent_annotation.result, proj_type ) ) except: @@ -3370,7 +3370,7 @@ def get_analytics(self, request, pk=None, *args, **kwargs): try: total_word_error_rate_ar_list.append( calculate_word_error_rate_between_two_audio_transcription_annotation( - anno.result, anno.parent_annotation.result + anno.result, anno.parent_annotation.result, project_type ) ) except: diff --git a/backend/workspaces/tasks.py b/backend/workspaces/tasks.py index 9ffd8afb1..e12c5ff8d 100644 --- a/backend/workspaces/tasks.py +++ b/backend/workspaces/tasks.py @@ -95,7 +95,7 @@ def get_all_annotation_reports( if rev_ann and not ar_done: try: ar_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation( - rev_ann.result, ann.result + rev_ann.result, ann.result, project_type ) number_of_tasks_contributed_for_ar_wer += 1 ar_done = True @@ -104,7 +104,7 @@ def get_all_annotation_reports( if sup_ann and not as_done: try: as_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation( - sup_ann.result, ann.result + sup_ann.result, ann.result, project_type ) number_of_tasks_contributed_for_as_wer += 1 as_done = True @@ -260,7 +260,7 @@ def get_all_review_reports( if sup_ann and not rs_done: try: rs_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation( - sup_ann.result, ann.result + sup_ann.result, ann.result, project_type ) number_of_tasks_contributed_for_rs_wer += 1 rs_done = True @@ -318,11 +318,11 @@ def get_all_review_reports( "Word Count": total_word_count, "Submitted Tasks": submitted_tasks_count, "Language": user_lang, - "Average Word Error Rate Annotator Vs Superchecker": rs_wer_score + "Average Word Error Rate Reviewer Vs Superchecker": rs_wer_score / number_of_tasks_contributed_for_rs_wer if number_of_tasks_contributed_for_rs_wer else 0, - "Cumulative Word Error Rate Annotator Vs Superchecker": rs_wer_score + "Cumulative Word Error Rate Reviewer Vs Superchecker": rs_wer_score if number_of_tasks_contributed_for_rs_wer else 0, } @@ -844,6 +844,7 @@ def send_project_analysis_reports_mail_ws( calculate_word_error_rate_between_two_audio_transcription_annotation( review_annotation.result, review_annotation.parent_annotation.result, + project_type, ) ) except: @@ -878,6 +879,7 @@ def send_project_analysis_reports_mail_ws( calculate_word_error_rate_between_two_audio_transcription_annotation( supercheck_annotation.result, supercheck_annotation.parent_annotation.result, + project_type, ) ) except: @@ -1190,7 +1192,7 @@ def get_supercheck_reports(proj_ids, userid, start_date, end_date, project_type= try: total_word_error_rate_rs_list.append( calculate_word_error_rate_between_two_audio_transcription_annotation( - anno.result, anno.parent_annotation.result + anno.result, anno.parent_annotation.result, project_type ) ) total_raw_audio_duration_list.append( @@ -1202,7 +1204,7 @@ def get_supercheck_reports(proj_ids, userid, start_date, end_date, project_type= try: total_word_error_rate_rs_list.append( calculate_word_error_rate_between_two_audio_transcription_annotation( - anno.result, anno.parent_annotation.result + anno.result, anno.parent_annotation.result, project_type ) ) except: @@ -1469,7 +1471,7 @@ def get_review_reports( ) total_word_error_rate_ar_list.append( calculate_word_error_rate_between_two_audio_transcription_annotation( - anno.result, anno.parent_annotation.result + anno.result, anno.parent_annotation.result, project_type ) ) except: @@ -1478,7 +1480,7 @@ def get_review_reports( try: total_word_error_rate_rs_list.append( calculate_word_error_rate_between_two_audio_transcription_annotation( - anno.result, anno.parent_annotation.result + anno.result, anno.parent_annotation.result, project_type ) ) except: diff --git a/backend/workspaces/views.py b/backend/workspaces/views.py index ccc3fd269..e5cd136d9 100644 --- a/backend/workspaces/views.py +++ b/backend/workspaces/views.py @@ -648,6 +648,7 @@ def project_analytics(self, request, pk=None): calculate_word_error_rate_between_two_audio_transcription_annotation( review_annotation.result, review_annotation.parent_annotation.result, + project_type, ) ) except: @@ -682,6 +683,7 @@ def project_analytics(self, request, pk=None): calculate_word_error_rate_between_two_audio_transcription_annotation( supercheck_annotation.result, supercheck_annotation.parent_annotation.result, + project_type, ) ) except: From f6734d094338a8a3afd25090125b31c4e0997edf Mon Sep 17 00:00:00 2001 From: Kunal Tiwary Date: Thu, 11 Jul 2024 07:03:49 +0000 Subject: [PATCH 07/12] fix wer --- backend/projects/utils.py | 38 +++++++++---------------------------- backend/workspaces/tasks.py | 22 ++++++++++++--------- 2 files changed, 22 insertions(+), 38 deletions(-) diff --git a/backend/projects/utils.py b/backend/projects/utils.py index ab1342162..4987ed878 100644 --- a/backend/projects/utils.py +++ b/backend/projects/utils.py @@ -228,20 +228,10 @@ def calculate_word_error_rate_between_two_audio_transcription_annotation( for result in annotation_result1: if "type" in result and result["type"] == "textarea": - if project_type in [ - "AcousticNormalisedTranscriptionEditing", - "StandardizedTranscriptionEditing", - ]: - if ( - "from_name" in result - and result["from_name"] == "verbatim_transcribed_json" - ): - try: - for s in result["value"]["text"]: - annotation_result1_text += s - except: - pass - else: + if ( + "from_name" in result + and result["from_name"] != "acoustic_normalised_transcribed_json" + ): try: for s in result["value"]["text"]: annotation_result1_text += s @@ -250,23 +240,13 @@ def calculate_word_error_rate_between_two_audio_transcription_annotation( for result in annotation_result2: if "type" in result and result["type"] == "textarea": - if project_type in [ - "AcousticNormalisedTranscriptionEditing", - "StandardizedTranscriptionEditing", - ]: - if ( - "from_name" in result - and result["from_name"] == "verbatim_transcribed_json" - ): - try: - for s in result["value"]["text"]: - annotation_result1_text += s - except: - pass - else: + if ( + "from_name" in result + and result["from_name"] != "acoustic_normalised_transcribed_json" + ): try: for s in result["value"]["text"]: - annotation_result1_text += s + annotation_result2_text += s except: pass if len(annotation_result1_text) == 0 or len(annotation_result2_text) == 0: diff --git a/backend/workspaces/tasks.py b/backend/workspaces/tasks.py index e12c5ff8d..11ad34828 100644 --- a/backend/workspaces/tasks.py +++ b/backend/workspaces/tasks.py @@ -78,9 +78,9 @@ def get_all_annotation_reports( ar_wer_score, as_wer_score = 0, 0 for ann in submitted_tasks: all_annotations = Annotation.objects.filter(task_id=ann.task_id) - ar_done, as_done = False, False # for duplicate annotations + ar_done, as_done = False, False + ann_ann, rev_ann, sup_ann = "", "", "" for a in all_annotations: - rev_ann, sup_ann = "", "" if a.annotation_type == REVIEWER_ANNOTATION and a.annotation_status in [ ACCEPTED, ACCEPTED_WITH_MINOR_CHANGES, @@ -92,19 +92,21 @@ def get_all_annotation_reports( and a.annotation_status in [VALIDATED, VALIDATED_WITH_CHANGES] ): sup_ann = a - if rev_ann and not ar_done: + elif a.annotation_type == ANNOTATOR_ANNOTATION: + ann_ann = a + if ann_ann and rev_ann and not ar_done: try: ar_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation( - rev_ann.result, ann.result, project_type + rev_ann.result, ann_ann.result, project_type ) number_of_tasks_contributed_for_ar_wer += 1 ar_done = True except Exception as e: pass - if sup_ann and not as_done: + if ann_ann and sup_ann and not as_done: try: as_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation( - sup_ann.result, ann.result, project_type + sup_ann.result, ann_ann.result, project_type ) number_of_tasks_contributed_for_as_wer += 1 as_done = True @@ -250,17 +252,19 @@ def get_all_review_reports( for ann in submitted_tasks: all_annotations = Annotation.objects.filter(task_id=ann.task_id) rs_done = False # for duplicate annotations + sup_ann, rev_ann = "", "" for a in all_annotations: - sup_ann = "" if ( a.annotation_type == SUPER_CHECKER_ANNOTATION and a.annotation_status in [VALIDATED, VALIDATED_WITH_CHANGES] ): sup_ann = a - if sup_ann and not rs_done: + elif a.annotation_type == REVIEWER_ANNOTATION: + rev_ann = a + if rev_ann and sup_ann and not rs_done: try: rs_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation( - sup_ann.result, ann.result, project_type + sup_ann.result, rev_ann.result, project_type ) number_of_tasks_contributed_for_rs_wer += 1 rs_done = True From b82a915e6d53d9f61785029c9c1178f6022ddf42 Mon Sep 17 00:00:00 2001 From: Kunal Tiwary Date: Fri, 12 Jul 2024 05:17:44 +0000 Subject: [PATCH 08/12] added org level wer scores --- backend/organizations/tasks.py | 93 +++++++++++++++++++++++++++++++++- 1 file changed, 92 insertions(+), 1 deletion(-) diff --git a/backend/organizations/tasks.py b/backend/organizations/tasks.py index a364f876d..1a300178c 100644 --- a/backend/organizations/tasks.py +++ b/backend/organizations/tasks.py @@ -12,6 +12,11 @@ ANNOTATOR_ANNOTATION, REVIEWER_ANNOTATION, SUPER_CHECKER_ANNOTATION, + ACCEPTED, + ACCEPTED_WITH_MINOR_CHANGES, + ACCEPTED_WITH_MAJOR_CHANGES, + VALIDATED, + VALIDATED_WITH_CHANGES, ) from .models import Organization from users.models import User @@ -23,6 +28,7 @@ get_audio_transcription_duration, get_audio_segments_count, ocr_word_count, + calculate_word_error_rate_between_two_audio_transcription_annotation, ) from workspaces.tasks import ( un_pack_annotation_tasks, @@ -68,6 +74,47 @@ def get_all_annotation_reports( completed_by=userid, updated_at__range=[start_date, end_date], ) + number_of_tasks_contributed_for_ar_wer, number_of_tasks_contributed_for_as_wer = ( + 0, + 0, + ) + ar_wer_score, as_wer_score = 0, 0 + for ann in submitted_tasks: + all_annotations = Annotation.objects.filter(task_id=ann.task_id) + ar_done, as_done = False, False + ann_ann, rev_ann, sup_ann = "", "", "" + for a in all_annotations: + if a.annotation_type == REVIEWER_ANNOTATION and a.annotation_status in [ + ACCEPTED, + ACCEPTED_WITH_MINOR_CHANGES, + ACCEPTED_WITH_MAJOR_CHANGES, + ]: + rev_ann = a + elif ( + a.annotation_type == SUPER_CHECKER_ANNOTATION + and a.annotation_status in [VALIDATED, VALIDATED_WITH_CHANGES] + ): + sup_ann = a + elif a.annotation_type == ANNOTATOR_ANNOTATION: + ann_ann = a + if ann_ann and rev_ann and not ar_done: + try: + ar_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation( + rev_ann.result, ann_ann.result, project_type + ) + number_of_tasks_contributed_for_ar_wer += 1 + ar_done = True + except Exception as e: + pass + if ann_ann and sup_ann and not as_done: + try: + as_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation( + sup_ann.result, ann_ann.result, project_type + ) + number_of_tasks_contributed_for_as_wer += 1 + as_done = True + except Exception as e: + pass submitted_tasks_count = submitted_tasks.count() @@ -122,6 +169,20 @@ def get_all_annotation_reports( "Word Count": total_word_count, "Submitted Tasks": submitted_tasks_count, "Language": user_lang, + "Average Word Error Rate Annotator Vs Reviewer": ar_wer_score + / number_of_tasks_contributed_for_ar_wer + if number_of_tasks_contributed_for_ar_wer + else 0, + "Cumulative Word Error Rate Annotator Vs Reviewer": ar_wer_score + if number_of_tasks_contributed_for_ar_wer + else 0, + "Average Word Error Rate Annotator Vs Superchecker": as_wer_score + / number_of_tasks_contributed_for_as_wer + if number_of_tasks_contributed_for_as_wer + else 0, + "Cumulative Word Error Rate Annotator Vs Superchecker": as_wer_score + if number_of_tasks_contributed_for_as_wer + else 0, } if project_type in get_audio_project_types() or project_type == "AllAudioProjects": @@ -189,7 +250,29 @@ def get_all_review_reports( annotation_type=REVIEWER_ANNOTATION, updated_at__range=[start_date, end_date], ) - + number_of_tasks_contributed_for_rs_wer = 0 + rs_wer_score = 0 + for ann in submitted_tasks: + all_annotations = Annotation.objects.filter(task_id=ann.task_id) + rs_done = False # for duplicate annotations + sup_ann, rev_ann = "", "" + for a in all_annotations: + if ( + a.annotation_type == SUPER_CHECKER_ANNOTATION + and a.annotation_status in [VALIDATED, VALIDATED_WITH_CHANGES] + ): + sup_ann = a + elif a.annotation_type == REVIEWER_ANNOTATION: + rev_ann = a + if rev_ann and sup_ann and not rs_done: + try: + rs_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation( + sup_ann.result, rev_ann.result, project_type + ) + number_of_tasks_contributed_for_rs_wer += 1 + rs_done = True + except Exception as e: + pass submitted_tasks_count = submitted_tasks.count() project_type_lower = project_type.lower() @@ -243,6 +326,13 @@ def get_all_review_reports( "Word Count": total_word_count, "Submitted Tasks": submitted_tasks_count, "Language": user_lang, + "Average Word Error Rate Reviewer Vs Superchecker": rs_wer_score + / number_of_tasks_contributed_for_rs_wer + if number_of_tasks_contributed_for_rs_wer + else 0, + "Cumulative Word Error Rate Reviewer Vs Superchecker": rs_wer_score + if number_of_tasks_contributed_for_rs_wer + else 0, } if project_type in get_audio_project_types() or project_type == "AllAudioProjects": @@ -512,6 +602,7 @@ def send_user_reports_mail_org( final_reports = sorted(final_reports, key=lambda x: x["Name"], reverse=False) df = pd.DataFrame.from_dict(final_reports) + df = df.fillna("NA") content = df.to_csv(index=False) content_type = "text/csv" From 8aa7b03c114068e1b5882a3e2b214dd02bf1b9a9 Mon Sep 17 00:00:00 2001 From: Kunal Tiwary Date: Fri, 12 Jul 2024 12:34:05 +0000 Subject: [PATCH 09/12] added rej score and bleu score --- backend/tasks/views.py | 71 ++++++++++++++--- backend/workspaces/tasks.py | 149 +++++++++++++++++++++++++++++++++++- 2 files changed, 207 insertions(+), 13 deletions(-) diff --git a/backend/tasks/views.py b/backend/tasks/views.py index 50de59c54..43ebf9339 100644 --- a/backend/tasks/views.py +++ b/backend/tasks/views.py @@ -2517,11 +2517,18 @@ def calculate_bleu_score(self, request): sentence1 = request["sentence1"] sentence2 = request["sentence2"] except: - return Response( - {"message": "Invalid parameters in request body!"}, - status=status.HTTP_400_BAD_REQUEST, - ) - + try: + annotation_result1 = request.data.get["annotation_result1"] + annotation_result2 = request.data.get["annotation_result2"] + except: + try: + annotation_result1 = request["annotation_result1"] + annotation_result2 = request["annotation_result2"] + except: + return Response( + {"message": "Invalid parameters in request body!"}, + status=status.HTTP_400_BAD_REQUEST, + ) try: sentence1 = [sentence1] sentence2 = [[sentence2]] @@ -2535,10 +2542,56 @@ def calculate_bleu_score(self, request): status=status.HTTP_200_OK, ) except: - return Response( - {"message": "Invalid parameters in request body!"}, - status=status.HTTP_400_BAD_REQUEST, - ) + try: + if "end" in annotation_result1[0]["value"]: + annotation_result1 = sorted( + annotation_result1, key=lambda i: (i["value"]["end"]) + ) + annotation_result2 = sorted( + annotation_result2, key=lambda i: (i["value"]["end"]) + ) + + annotation_result1_text = "" + annotation_result2_text = "" + + for result in annotation_result1: + if "type" in result and result["type"] == "textarea": + if ( + "from_name" in result + and result["from_name"] + != "acoustic_normalised_transcribed_json" + ): + try: + for s in result["value"]["text"]: + annotation_result1_text += s + except: + pass + + for result in annotation_result2: + if "type" in result and result["type"] == "textarea": + if ( + "from_name" in result + and result["from_name"] + != "acoustic_normalised_transcribed_json" + ): + try: + for s in result["value"]["text"]: + annotation_result2_text += s + except: + pass + bleu = sacrebleu.corpus_bleu( + [annotation_result1_text], [[annotation_result2_text]] + ) + bleu_score = bleu.score + return Response( + {"bleu_score": str(bleu_score)}, + status=status.HTTP_200_OK, + ) + except: + return Response( + {"message": "Invalid parameters in request body!"}, + status=status.HTTP_400_BAD_REQUEST, + ) @swagger_auto_schema( diff --git a/backend/workspaces/tasks.py b/backend/workspaces/tasks.py index 11ad34828..1568121ee 100644 --- a/backend/workspaces/tasks.py +++ b/backend/workspaces/tasks.py @@ -31,6 +31,7 @@ get_audio_segments_count, ocr_word_count, ) +from tasks.views import SentenceOperationViewSet def get_all_annotation_reports( @@ -71,13 +72,33 @@ def get_all_annotation_reports( completed_by=userid, updated_at__range=[start_date, end_date], ) - number_of_tasks_contributed_for_ar_wer, number_of_tasks_contributed_for_as_wer = ( + ( + number_of_tasks_contributed_for_ar_wer, + number_of_tasks_contributed_for_as_wer, + number_of_tasks_contributed_for_ar_bleu, + number_of_tasks_contributed_for_as_bleu, + ) = ( + 0, + 0, 0, 0, ) - ar_wer_score, as_wer_score = 0, 0 + ar_wer_score, as_wer_score, ar_bleu_score, as_bleu_score = 0, 0, 0, 0 + tasks_and_rejection_count_map_ar, number_of_tasks_that_has_review_annotations = ( + {}, + 0, + ) for ann in submitted_tasks: all_annotations = Annotation.objects.filter(task_id=ann.task_id) + try: + task = ann.task + revision_loop_count = task.revision_loop_count + r_count = revision_loop_count["review_count"] + tasks_and_rejection_count_map_ar[r_count] = ( + tasks_and_rejection_count_map_ar.get(r_count, 0) + 1 + ) + except Exception as e: + pass ar_done, as_done = False, False ann_ann, rev_ann, sup_ann = "", "", "" for a in all_annotations: @@ -94,6 +115,8 @@ def get_all_annotation_reports( sup_ann = a elif a.annotation_type == ANNOTATOR_ANNOTATION: ann_ann = a + if a.annotation_type == REVIEWER_ANNOTATION: + number_of_tasks_that_has_review_annotations += 1 if ann_ann and rev_ann and not ar_done: try: ar_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation( @@ -103,6 +126,18 @@ def get_all_annotation_reports( ar_done = True except Exception as e: pass + try: + s1 = SentenceOperationViewSet() + sampleRequest = { + "annotation_result1": rev_ann.result, + "annotation_result2": ann_ann.result, + } + ar_bleu_score += s1.calculate_bleu_score(sampleRequest).data[ + "bleu_score" + ] + number_of_tasks_contributed_for_ar_bleu += 1 + except Exception as e: + pass if ann_ann and sup_ann and not as_done: try: as_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation( @@ -112,6 +147,18 @@ def get_all_annotation_reports( as_done = True except Exception as e: pass + try: + s1 = SentenceOperationViewSet() + sampleRequest = { + "annotation_result1": sup_ann.result, + "annotation_result2": ann_ann.result, + } + as_bleu_score += s1.calculate_bleu_score(sampleRequest).data[ + "bleu_score" + ] + number_of_tasks_contributed_for_as_bleu += 1 + except Exception as e: + pass submitted_tasks_count = submitted_tasks.count() @@ -154,6 +201,10 @@ def get_all_annotation_reports( total_raw_audio_duration = convert_seconds_to_hours( sum(total_raw_audio_duration_list) ) + cumulative_rejection_score_ar = 0 + if tasks_and_rejection_count_map_ar: + for task, rc in tasks_and_rejection_count_map_ar.items(): + cumulative_rejection_score_ar += task * rc result = { "Name": userName, @@ -180,6 +231,18 @@ def get_all_annotation_reports( "Cumulative Word Error Rate Annotator Vs Superchecker": as_wer_score if number_of_tasks_contributed_for_as_wer else 0, + "Average Bleu Score Annotator Vs Reviewer": ar_bleu_score + / number_of_tasks_contributed_for_ar_bleu + if number_of_tasks_contributed_for_ar_bleu + else 0, + "Average Bleu Score Annotator Vs Superchecker": as_bleu_score + / number_of_tasks_contributed_for_as_bleu + if number_of_tasks_contributed_for_as_bleu + else 0, + "Average Rejection Count Annotator Vs Reviewer": cumulative_rejection_score_ar + / number_of_tasks_that_has_review_annotations + if number_of_tasks_that_has_review_annotations + else 0, } if project_type in get_audio_project_types() or project_type == "AllAudioProjects": @@ -247,10 +310,34 @@ def get_all_review_reports( annotation_type=REVIEWER_ANNOTATION, updated_at__range=[start_date, end_date], ) - number_of_tasks_contributed_for_rs_wer = 0 - rs_wer_score = 0 + number_of_tasks_contributed_for_rs_wer, number_of_tasks_contributed_for_rs_bleu = ( + 0, + 0, + ) + rs_wer_score, rs_bleu_score = 0, 0 + ( + tasks_and_rejection_count_map_ar, + tasks_and_rejection_count_map_rs, + number_of_tasks_that_has_sup_annotations, + ) = ({}, {}, 0) for ann in submitted_tasks: all_annotations = Annotation.objects.filter(task_id=ann.task_id) + task = ann.task + revision_loop_count = task.revision_loop_count + try: + r_count = revision_loop_count["review_count"] + tasks_and_rejection_count_map_ar[r_count] = ( + tasks_and_rejection_count_map_ar.get(r_count, 0) + 1 + ) + except Exception as e: + pass + try: + s_count = revision_loop_count["super_check_count"] + tasks_and_rejection_count_map_rs[s_count] = ( + tasks_and_rejection_count_map_rs.get(s_count, 0) + 1 + ) + except Exception as e: + pass rs_done = False # for duplicate annotations sup_ann, rev_ann = "", "" for a in all_annotations: @@ -261,6 +348,8 @@ def get_all_review_reports( sup_ann = a elif a.annotation_type == REVIEWER_ANNOTATION: rev_ann = a + if a.annotation_type == SUPER_CHECKER_ANNOTATION: + number_of_tasks_that_has_sup_annotations += 1 if rev_ann and sup_ann and not rs_done: try: rs_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation( @@ -270,6 +359,18 @@ def get_all_review_reports( rs_done = True except Exception as e: pass + try: + s1 = SentenceOperationViewSet() + sampleRequest = { + "annotation_result1": sup_ann.result, + "annotation_result2": rev_ann.result, + } + rs_bleu_score += s1.calculate_bleu_score(sampleRequest).data[ + "bleu_score" + ] + number_of_tasks_contributed_for_rs_bleu += 1 + except Exception as e: + pass submitted_tasks_count = submitted_tasks.count() project_type_lower = project_type.lower() @@ -310,6 +411,15 @@ def get_all_review_reports( total_raw_audio_duration = convert_seconds_to_hours( sum(total_raw_audio_duration_list) ) + cumulative_rejection_score_ar = 0 + if tasks_and_rejection_count_map_ar: + for task, rc in tasks_and_rejection_count_map_ar.items(): + cumulative_rejection_score_ar += task * rc + + cumulative_rejection_score_rs = 0 + if tasks_and_rejection_count_map_rs: + for task, rc in tasks_and_rejection_count_map_rs.items(): + cumulative_rejection_score_rs += task * rc result = { "Name": userName, @@ -329,6 +439,18 @@ def get_all_review_reports( "Cumulative Word Error Rate Reviewer Vs Superchecker": rs_wer_score if number_of_tasks_contributed_for_rs_wer else 0, + "Average Bleu Score Reviewer Vs Superchecker": rs_bleu_score + / number_of_tasks_contributed_for_rs_bleu + if number_of_tasks_contributed_for_rs_bleu + else 0, + "Average Rejection Count Annotator Vs Reviewer": cumulative_rejection_score_ar + / submitted_tasks_count + if submitted_tasks_count + else 0, + "Average Rejection Count Reviewer Vs Superchecker": cumulative_rejection_score_rs + / number_of_tasks_that_has_sup_annotations + if number_of_tasks_that_has_sup_annotations + else 0, } if project_type in get_audio_project_types() or project_type == "AllAudioProjects": @@ -381,6 +503,17 @@ def get_all_supercheck_reports( annotation_type=SUPER_CHECKER_ANNOTATION, updated_at__range=[start_date, end_date], ) + tasks_and_rejection_count_map_rs = {} + for ann in submitted_tasks: + task = ann.task + revision_loop_count = task.revision_loop_count + try: + s_count = revision_loop_count["super_check_count"] + tasks_and_rejection_count_map_rs[s_count] = ( + tasks_and_rejection_count_map_rs.get(s_count, 0) + 1 + ) + except Exception as e: + pass submitted_tasks_count = submitted_tasks.count() @@ -427,6 +560,10 @@ def get_all_supercheck_reports( validated_raw_audio_duration = convert_seconds_to_hours( sum(validated_raw_audio_duration_list) ) + cumulative_rejection_score_rs = 0 + if tasks_and_rejection_count_map_rs: + for task, rc in tasks_and_rejection_count_map_rs.items(): + cumulative_rejection_score_rs += task * rc result = { "Name": userName, @@ -439,6 +576,10 @@ def get_all_supercheck_reports( "Word Count": validated_word_count, "Submitted Tasks": submitted_tasks_count, "Language": user_lang, + "Average Rejection Count Reviewer Vs Superchecker": cumulative_rejection_score_rs + / submitted_tasks_count + if submitted_tasks_count + else 0, } if project_type in get_audio_project_types() or project_type == "AllAudioProjects": From 5db327eae38930c12cd0ed44a61205ed4d729b27 Mon Sep 17 00:00:00 2001 From: Kunal Tiwary Date: Mon, 15 Jul 2024 06:12:05 +0000 Subject: [PATCH 10/12] added changes to org level --- backend/organizations/tasks.py | 150 +++++++++++++++++++++++++++++++-- backend/workspaces/tasks.py | 2 +- 2 files changed, 145 insertions(+), 7 deletions(-) diff --git a/backend/organizations/tasks.py b/backend/organizations/tasks.py index 1a300178c..85f329735 100644 --- a/backend/organizations/tasks.py +++ b/backend/organizations/tasks.py @@ -74,13 +74,33 @@ def get_all_annotation_reports( completed_by=userid, updated_at__range=[start_date, end_date], ) - number_of_tasks_contributed_for_ar_wer, number_of_tasks_contributed_for_as_wer = ( + ( + number_of_tasks_contributed_for_ar_wer, + number_of_tasks_contributed_for_as_wer, + number_of_tasks_contributed_for_ar_bleu, + number_of_tasks_contributed_for_as_bleu, + ) = ( + 0, + 0, 0, 0, ) - ar_wer_score, as_wer_score = 0, 0 + ar_wer_score, as_wer_score, ar_bleu_score, as_bleu_score = 0, 0, 0, 0 + tasks_and_rejection_count_map_ar, number_of_tasks_that_has_review_annotations = ( + {}, + 0, + ) for ann in submitted_tasks: all_annotations = Annotation.objects.filter(task_id=ann.task_id) + try: + task = ann.task + revision_loop_count = task.revision_loop_count + r_count = revision_loop_count["review_count"] + tasks_and_rejection_count_map_ar[r_count] = ( + tasks_and_rejection_count_map_ar.get(r_count, 0) + 1 + ) + except Exception as e: + pass ar_done, as_done = False, False ann_ann, rev_ann, sup_ann = "", "", "" for a in all_annotations: @@ -97,6 +117,8 @@ def get_all_annotation_reports( sup_ann = a elif a.annotation_type == ANNOTATOR_ANNOTATION: ann_ann = a + if a.annotation_type == REVIEWER_ANNOTATION: + number_of_tasks_that_has_review_annotations += 1 if ann_ann and rev_ann and not ar_done: try: ar_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation( @@ -106,6 +128,18 @@ def get_all_annotation_reports( ar_done = True except Exception as e: pass + try: + s1 = SentenceOperationViewSet() + sampleRequest = { + "annotation_result1": rev_ann.result, + "annotation_result2": ann_ann.result, + } + ar_bleu_score += s1.calculate_bleu_score(sampleRequest).data[ + "bleu_score" + ] + number_of_tasks_contributed_for_ar_bleu += 1 + except Exception as e: + pass if ann_ann and sup_ann and not as_done: try: as_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation( @@ -115,6 +149,18 @@ def get_all_annotation_reports( as_done = True except Exception as e: pass + try: + s1 = SentenceOperationViewSet() + sampleRequest = { + "annotation_result1": sup_ann.result, + "annotation_result2": ann_ann.result, + } + as_bleu_score += s1.calculate_bleu_score(sampleRequest).data[ + "bleu_score" + ] + number_of_tasks_contributed_for_as_bleu += 1 + except Exception as e: + pass submitted_tasks_count = submitted_tasks.count() @@ -157,7 +203,10 @@ def get_all_annotation_reports( total_raw_audio_duration = convert_seconds_to_hours( sum(total_raw_audio_duration_list) ) - + cumulative_rejection_score_ar = 0 + if tasks_and_rejection_count_map_ar: + for task, rc in tasks_and_rejection_count_map_ar.items(): + cumulative_rejection_score_ar += task * rc result = { "Name": userName, "Email": email, @@ -183,6 +232,18 @@ def get_all_annotation_reports( "Cumulative Word Error Rate Annotator Vs Superchecker": as_wer_score if number_of_tasks_contributed_for_as_wer else 0, + "Average Bleu Score Annotator Vs Reviewer": ar_bleu_score + / number_of_tasks_contributed_for_ar_bleu + if number_of_tasks_contributed_for_ar_bleu + else 0, + "Average Bleu Score Annotator Vs Superchecker": as_bleu_score + / number_of_tasks_contributed_for_as_bleu + if number_of_tasks_contributed_for_as_bleu + else 0, + "Average Rejection Count Annotator Vs Reviewer": cumulative_rejection_score_ar + / number_of_tasks_that_has_review_annotations + if number_of_tasks_that_has_review_annotations + else 0, } if project_type in get_audio_project_types() or project_type == "AllAudioProjects": @@ -250,10 +311,34 @@ def get_all_review_reports( annotation_type=REVIEWER_ANNOTATION, updated_at__range=[start_date, end_date], ) - number_of_tasks_contributed_for_rs_wer = 0 - rs_wer_score = 0 + number_of_tasks_contributed_for_rs_wer, number_of_tasks_contributed_for_rs_bleu = ( + 0, + 0, + ) + rs_wer_score, rs_bleu_score = 0, 0 + ( + tasks_and_rejection_count_map_ar, + tasks_and_rejection_count_map_rs, + number_of_tasks_that_has_sup_annotations, + ) = ({}, {}, 0) for ann in submitted_tasks: all_annotations = Annotation.objects.filter(task_id=ann.task_id) + task = ann.task + revision_loop_count = task.revision_loop_count + try: + r_count = revision_loop_count["review_count"] + tasks_and_rejection_count_map_ar[r_count] = ( + tasks_and_rejection_count_map_ar.get(r_count, 0) + 1 + ) + except Exception as e: + pass + try: + s_count = revision_loop_count["super_check_count"] + tasks_and_rejection_count_map_rs[s_count] = ( + tasks_and_rejection_count_map_rs.get(s_count, 0) + 1 + ) + except Exception as e: + pass rs_done = False # for duplicate annotations sup_ann, rev_ann = "", "" for a in all_annotations: @@ -264,6 +349,8 @@ def get_all_review_reports( sup_ann = a elif a.annotation_type == REVIEWER_ANNOTATION: rev_ann = a + if a.annotation_type == SUPER_CHECKER_ANNOTATION: + number_of_tasks_that_has_sup_annotations += 1 if rev_ann and sup_ann and not rs_done: try: rs_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation( @@ -273,6 +360,18 @@ def get_all_review_reports( rs_done = True except Exception as e: pass + try: + s1 = SentenceOperationViewSet() + sampleRequest = { + "annotation_result1": sup_ann.result, + "annotation_result2": rev_ann.result, + } + rs_bleu_score += s1.calculate_bleu_score(sampleRequest).data[ + "bleu_score" + ] + number_of_tasks_contributed_for_rs_bleu += 1 + except Exception as e: + pass submitted_tasks_count = submitted_tasks.count() project_type_lower = project_type.lower() @@ -314,6 +413,15 @@ def get_all_review_reports( total_raw_audio_duration = convert_seconds_to_hours( sum(total_raw_audio_duration_list) ) + cumulative_rejection_score_ar = 0 + if tasks_and_rejection_count_map_ar: + for task, rc in tasks_and_rejection_count_map_ar.items(): + cumulative_rejection_score_ar += task * rc + + cumulative_rejection_score_rs = 0 + if tasks_and_rejection_count_map_rs: + for task, rc in tasks_and_rejection_count_map_rs.items(): + cumulative_rejection_score_rs += task * rc result = { "Name": userName, @@ -333,6 +441,18 @@ def get_all_review_reports( "Cumulative Word Error Rate Reviewer Vs Superchecker": rs_wer_score if number_of_tasks_contributed_for_rs_wer else 0, + "Average Bleu Score Reviewer Vs Superchecker": rs_bleu_score + / number_of_tasks_contributed_for_rs_bleu + if number_of_tasks_contributed_for_rs_bleu + else 0, + "Average Rejection Count Annotator Vs Reviewer": cumulative_rejection_score_ar + / submitted_tasks_count + if submitted_tasks_count + else 0, + "Average Rejection Count Reviewer Vs Superchecker": cumulative_rejection_score_rs + / number_of_tasks_that_has_sup_annotations + if number_of_tasks_that_has_sup_annotations + else 0, } if project_type in get_audio_project_types() or project_type == "AllAudioProjects": @@ -385,7 +505,17 @@ def get_all_supercheck_reports( annotation_type=SUPER_CHECKER_ANNOTATION, updated_at__range=[start_date, end_date], ) - + tasks_and_rejection_count_map_rs = {} + for ann in submitted_tasks: + task = ann.task + revision_loop_count = task.revision_loop_count + try: + s_count = revision_loop_count["super_check_count"] + tasks_and_rejection_count_map_rs[s_count] = ( + tasks_and_rejection_count_map_rs.get(s_count, 0) + 1 + ) + except Exception as e: + pass submitted_tasks_count = submitted_tasks.count() project_type_lower = project_type.lower() @@ -431,6 +561,10 @@ def get_all_supercheck_reports( validated_raw_audio_duration = convert_seconds_to_hours( sum(validated_raw_audio_duration_list) ) + cumulative_rejection_score_rs = 0 + if tasks_and_rejection_count_map_rs: + for task, rc in tasks_and_rejection_count_map_rs.items(): + cumulative_rejection_score_rs += task * rc result = { "Name": userName, @@ -443,6 +577,10 @@ def get_all_supercheck_reports( "Word Count": validated_word_count, "Submitted Tasks": submitted_tasks_count, "Language": user_lang, + "Average Rejection Count Reviewer Vs Superchecker": cumulative_rejection_score_rs + / submitted_tasks_count + if submitted_tasks_count + else 0, } if project_type in get_audio_project_types() or project_type == "AllAudioProjects": diff --git a/backend/workspaces/tasks.py b/backend/workspaces/tasks.py index 1568121ee..27e0b7c15 100644 --- a/backend/workspaces/tasks.py +++ b/backend/workspaces/tasks.py @@ -439,7 +439,7 @@ def get_all_review_reports( "Cumulative Word Error Rate Reviewer Vs Superchecker": rs_wer_score if number_of_tasks_contributed_for_rs_wer else 0, - "Average Bleu Score Reviewer Vs Superchecker": rs_bleu_score + "Average Bleu Score Reviewer Vs Superchecker": rs_bleu_score / number_of_tasks_contributed_for_rs_bleu if number_of_tasks_contributed_for_rs_bleu else 0, From 644d2d7b8b33eafd210f5ff0040c6741dd4deefa Mon Sep 17 00:00:00 2001 From: Kunal Tiwary Date: Tue, 16 Jul 2024 12:24:17 +0000 Subject: [PATCH 11/12] added minor fixes for bleu score --- backend/workspaces/tasks.py | 32 +++++++------------------------- 1 file changed, 7 insertions(+), 25 deletions(-) diff --git a/backend/workspaces/tasks.py b/backend/workspaces/tasks.py index 27e0b7c15..4720d6a6d 100644 --- a/backend/workspaces/tasks.py +++ b/backend/workspaces/tasks.py @@ -76,14 +76,12 @@ def get_all_annotation_reports( number_of_tasks_contributed_for_ar_wer, number_of_tasks_contributed_for_as_wer, number_of_tasks_contributed_for_ar_bleu, - number_of_tasks_contributed_for_as_bleu, ) = ( 0, 0, 0, - 0, ) - ar_wer_score, as_wer_score, ar_bleu_score, as_bleu_score = 0, 0, 0, 0 + ar_wer_score, as_wer_score, ar_bleu_score = 0, 0, 0 tasks_and_rejection_count_map_ar, number_of_tasks_that_has_review_annotations = ( {}, 0, @@ -132,9 +130,9 @@ def get_all_annotation_reports( "annotation_result1": rev_ann.result, "annotation_result2": ann_ann.result, } - ar_bleu_score += s1.calculate_bleu_score(sampleRequest).data[ - "bleu_score" - ] + ar_bleu_score += float( + s1.calculate_bleu_score(sampleRequest).data["bleu_score"] + ) number_of_tasks_contributed_for_ar_bleu += 1 except Exception as e: pass @@ -147,18 +145,6 @@ def get_all_annotation_reports( as_done = True except Exception as e: pass - try: - s1 = SentenceOperationViewSet() - sampleRequest = { - "annotation_result1": sup_ann.result, - "annotation_result2": ann_ann.result, - } - as_bleu_score += s1.calculate_bleu_score(sampleRequest).data[ - "bleu_score" - ] - number_of_tasks_contributed_for_as_bleu += 1 - except Exception as e: - pass submitted_tasks_count = submitted_tasks.count() @@ -235,10 +221,6 @@ def get_all_annotation_reports( / number_of_tasks_contributed_for_ar_bleu if number_of_tasks_contributed_for_ar_bleu else 0, - "Average Bleu Score Annotator Vs Superchecker": as_bleu_score - / number_of_tasks_contributed_for_as_bleu - if number_of_tasks_contributed_for_as_bleu - else 0, "Average Rejection Count Annotator Vs Reviewer": cumulative_rejection_score_ar / number_of_tasks_that_has_review_annotations if number_of_tasks_that_has_review_annotations @@ -365,9 +347,9 @@ def get_all_review_reports( "annotation_result1": sup_ann.result, "annotation_result2": rev_ann.result, } - rs_bleu_score += s1.calculate_bleu_score(sampleRequest).data[ - "bleu_score" - ] + rs_bleu_score += float( + s1.calculate_bleu_score(sampleRequest).data["bleu_score"] + ) number_of_tasks_contributed_for_rs_bleu += 1 except Exception as e: pass From d2b11bd7c5aa752a83737ad56f3b2ad73777fd0c Mon Sep 17 00:00:00 2001 From: Kunal Tiwary Date: Tue, 16 Jul 2024 12:30:05 +0000 Subject: [PATCH 12/12] fix for org reports --- backend/organizations/tasks.py | 32 +++++++------------------------- 1 file changed, 7 insertions(+), 25 deletions(-) diff --git a/backend/organizations/tasks.py b/backend/organizations/tasks.py index 85f329735..b9141e82b 100644 --- a/backend/organizations/tasks.py +++ b/backend/organizations/tasks.py @@ -78,14 +78,12 @@ def get_all_annotation_reports( number_of_tasks_contributed_for_ar_wer, number_of_tasks_contributed_for_as_wer, number_of_tasks_contributed_for_ar_bleu, - number_of_tasks_contributed_for_as_bleu, ) = ( 0, 0, 0, - 0, ) - ar_wer_score, as_wer_score, ar_bleu_score, as_bleu_score = 0, 0, 0, 0 + ar_wer_score, as_wer_score, ar_bleu_score = 0, 0, 0 tasks_and_rejection_count_map_ar, number_of_tasks_that_has_review_annotations = ( {}, 0, @@ -134,9 +132,9 @@ def get_all_annotation_reports( "annotation_result1": rev_ann.result, "annotation_result2": ann_ann.result, } - ar_bleu_score += s1.calculate_bleu_score(sampleRequest).data[ - "bleu_score" - ] + ar_bleu_score += float( + s1.calculate_bleu_score(sampleRequest).data["bleu_score"] + ) number_of_tasks_contributed_for_ar_bleu += 1 except Exception as e: pass @@ -149,18 +147,6 @@ def get_all_annotation_reports( as_done = True except Exception as e: pass - try: - s1 = SentenceOperationViewSet() - sampleRequest = { - "annotation_result1": sup_ann.result, - "annotation_result2": ann_ann.result, - } - as_bleu_score += s1.calculate_bleu_score(sampleRequest).data[ - "bleu_score" - ] - number_of_tasks_contributed_for_as_bleu += 1 - except Exception as e: - pass submitted_tasks_count = submitted_tasks.count() @@ -236,10 +222,6 @@ def get_all_annotation_reports( / number_of_tasks_contributed_for_ar_bleu if number_of_tasks_contributed_for_ar_bleu else 0, - "Average Bleu Score Annotator Vs Superchecker": as_bleu_score - / number_of_tasks_contributed_for_as_bleu - if number_of_tasks_contributed_for_as_bleu - else 0, "Average Rejection Count Annotator Vs Reviewer": cumulative_rejection_score_ar / number_of_tasks_that_has_review_annotations if number_of_tasks_that_has_review_annotations @@ -366,9 +348,9 @@ def get_all_review_reports( "annotation_result1": sup_ann.result, "annotation_result2": rev_ann.result, } - rs_bleu_score += s1.calculate_bleu_score(sampleRequest).data[ - "bleu_score" - ] + rs_bleu_score += float( + s1.calculate_bleu_score(sampleRequest).data["bleu_score"] + ) number_of_tasks_contributed_for_rs_bleu += 1 except Exception as e: pass