Skip to content

Commit

Permalink
Merge pull request #1099 from AI4Bharat/master
Browse files Browse the repository at this point in the history
Update dev to latest master
  • Loading branch information
aparna-aa authored Jul 26, 2024
2 parents 5d51fcb + ec10755 commit c6ed2fe
Show file tree
Hide file tree
Showing 9 changed files with 569 additions and 67 deletions.
2 changes: 2 additions & 0 deletions backend/dataset/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -1038,6 +1038,7 @@ def project_analytics(self, request, pk=None):
calculate_word_error_rate_between_two_audio_transcription_annotation(
review_annotation.result,
review_annotation.parent_annotation.result,
project_type,
)
)
except:
Expand Down Expand Up @@ -1067,6 +1068,7 @@ def project_analytics(self, request, pk=None):
calculate_word_error_rate_between_two_audio_transcription_annotation(
supercheck_annotation.result,
supercheck_annotation.parent_annotation.result,
project_type,
)
)
except:
Expand Down
8 changes: 6 additions & 2 deletions backend/functions/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1372,6 +1372,7 @@ def get_stats_helper(
get_most_recent_annotation(
ann_obj.parent_annotation
).result,
project_type,
)
)
except Exception as error:
Expand Down Expand Up @@ -1426,6 +1427,7 @@ def get_stats_helper(
get_most_recent_annotation(
ann_obj.parent_annotation.parent_annotation
).result,
project_type,
)
)
except Exception as error:
Expand All @@ -1437,6 +1439,7 @@ def get_stats_helper(
get_most_recent_annotation(
ann_obj.parent_annotation
).result,
project_type,
)
)
except Exception as error:
Expand All @@ -1448,6 +1451,7 @@ def get_stats_helper(
get_most_recent_annotation(
ann_obj.parent_annotation.parent_annotation
).result,
project_type,
)
)
except Exception as error:
Expand Down Expand Up @@ -1519,10 +1523,10 @@ def calculate_ced_between_two_annotations(annotation1, annotation2):
return ced_list


def calculate_wer_between_two_annotations(annotation1, annotation2):
def calculate_wer_between_two_annotations(annotation1, annotation2, project_type):
try:
return calculate_word_error_rate_between_two_audio_transcription_annotation(
annotation1, annotation2
annotation1, annotation2, project_type
)
except Exception as e:
return 0
Expand Down
217 changes: 214 additions & 3 deletions backend/organizations/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@
ANNOTATOR_ANNOTATION,
REVIEWER_ANNOTATION,
SUPER_CHECKER_ANNOTATION,
ACCEPTED,
ACCEPTED_WITH_MINOR_CHANGES,
ACCEPTED_WITH_MAJOR_CHANGES,
VALIDATED,
VALIDATED_WITH_CHANGES,
)
from .models import Organization
from users.models import User
Expand All @@ -24,6 +29,7 @@
get_audio_transcription_duration,
get_audio_segments_count,
ocr_word_count,
calculate_word_error_rate_between_two_audio_transcription_annotation,
)
from workspaces.tasks import (
un_pack_annotation_tasks,
Expand Down Expand Up @@ -69,6 +75,79 @@ def get_all_annotation_reports(
completed_by=userid,
updated_at__range=[start_date, end_date],
)
(
number_of_tasks_contributed_for_ar_wer,
number_of_tasks_contributed_for_as_wer,
number_of_tasks_contributed_for_ar_bleu,
) = (
0,
0,
0,
)
ar_wer_score, as_wer_score, ar_bleu_score = 0, 0, 0
tasks_and_rejection_count_map_ar, number_of_tasks_that_has_review_annotations = (
{},
0,
)
for ann in submitted_tasks:
all_annotations = Annotation.objects.filter(task_id=ann.task_id)
try:
task = ann.task
revision_loop_count = task.revision_loop_count
r_count = revision_loop_count["review_count"]
tasks_and_rejection_count_map_ar[r_count] = (
tasks_and_rejection_count_map_ar.get(r_count, 0) + 1
)
except Exception as e:
pass
ar_done, as_done = False, False
ann_ann, rev_ann, sup_ann = "", "", ""
for a in all_annotations:
if a.annotation_type == REVIEWER_ANNOTATION and a.annotation_status in [
ACCEPTED,
ACCEPTED_WITH_MINOR_CHANGES,
ACCEPTED_WITH_MAJOR_CHANGES,
]:
rev_ann = a
elif (
a.annotation_type == SUPER_CHECKER_ANNOTATION
and a.annotation_status in [VALIDATED, VALIDATED_WITH_CHANGES]
):
sup_ann = a
elif a.annotation_type == ANNOTATOR_ANNOTATION:
ann_ann = a
if a.annotation_type == REVIEWER_ANNOTATION:
number_of_tasks_that_has_review_annotations += 1
if ann_ann and rev_ann and not ar_done:
try:
ar_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
rev_ann.result, ann_ann.result, project_type
)
number_of_tasks_contributed_for_ar_wer += 1
ar_done = True
except Exception as e:
pass
try:
s1 = SentenceOperationViewSet()
sampleRequest = {
"annotation_result1": rev_ann.result,
"annotation_result2": ann_ann.result,
}
ar_bleu_score += float(
s1.calculate_bleu_score(sampleRequest).data["bleu_score"]
)
number_of_tasks_contributed_for_ar_bleu += 1
except Exception as e:
pass
if ann_ann and sup_ann and not as_done:
try:
as_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
sup_ann.result, ann_ann.result, project_type
)
number_of_tasks_contributed_for_as_wer += 1
as_done = True
except Exception as e:
pass

submitted_tasks_count = submitted_tasks.count()

Expand Down Expand Up @@ -111,7 +190,10 @@ def get_all_annotation_reports(
total_raw_audio_duration = convert_seconds_to_hours(
sum(total_raw_audio_duration_list)
)

cumulative_rejection_score_ar = 0
if tasks_and_rejection_count_map_ar:
for task, rc in tasks_and_rejection_count_map_ar.items():
cumulative_rejection_score_ar += task * rc
result = {
"Name": userName,
"Email": email,
Expand All @@ -123,6 +205,28 @@ def get_all_annotation_reports(
"Word Count": total_word_count,
"Submitted Tasks": submitted_tasks_count,
"Language": user_lang,
"Average Word Error Rate Annotator Vs Reviewer": ar_wer_score
/ number_of_tasks_contributed_for_ar_wer
if number_of_tasks_contributed_for_ar_wer
else 0,
"Cumulative Word Error Rate Annotator Vs Reviewer": ar_wer_score
if number_of_tasks_contributed_for_ar_wer
else 0,
"Average Word Error Rate Annotator Vs Superchecker": as_wer_score
/ number_of_tasks_contributed_for_as_wer
if number_of_tasks_contributed_for_as_wer
else 0,
"Cumulative Word Error Rate Annotator Vs Superchecker": as_wer_score
if number_of_tasks_contributed_for_as_wer
else 0,
"Average Bleu Score Annotator Vs Reviewer": ar_bleu_score
/ number_of_tasks_contributed_for_ar_bleu
if number_of_tasks_contributed_for_ar_bleu
else 0,
"Average Rejection Count Annotator Vs Reviewer": cumulative_rejection_score_ar
/ number_of_tasks_that_has_review_annotations
if number_of_tasks_that_has_review_annotations
else 0,
}

if project_type in get_audio_project_types() or project_type == "AllAudioProjects":
Expand Down Expand Up @@ -190,7 +294,67 @@ def get_all_review_reports(
annotation_type=REVIEWER_ANNOTATION,
updated_at__range=[start_date, end_date],
)

number_of_tasks_contributed_for_rs_wer, number_of_tasks_contributed_for_rs_bleu = (
0,
0,
)
rs_wer_score, rs_bleu_score = 0, 0
(
tasks_and_rejection_count_map_ar,
tasks_and_rejection_count_map_rs,
number_of_tasks_that_has_sup_annotations,
) = ({}, {}, 0)
for ann in submitted_tasks:
all_annotations = Annotation.objects.filter(task_id=ann.task_id)
task = ann.task
revision_loop_count = task.revision_loop_count
try:
r_count = revision_loop_count["review_count"]
tasks_and_rejection_count_map_ar[r_count] = (
tasks_and_rejection_count_map_ar.get(r_count, 0) + 1
)
except Exception as e:
pass
try:
s_count = revision_loop_count["super_check_count"]
tasks_and_rejection_count_map_rs[s_count] = (
tasks_and_rejection_count_map_rs.get(s_count, 0) + 1
)
except Exception as e:
pass
rs_done = False # for duplicate annotations
sup_ann, rev_ann = "", ""
for a in all_annotations:
if (
a.annotation_type == SUPER_CHECKER_ANNOTATION
and a.annotation_status in [VALIDATED, VALIDATED_WITH_CHANGES]
):
sup_ann = a
elif a.annotation_type == REVIEWER_ANNOTATION:
rev_ann = a
if a.annotation_type == SUPER_CHECKER_ANNOTATION:
number_of_tasks_that_has_sup_annotations += 1
if rev_ann and sup_ann and not rs_done:
try:
rs_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
sup_ann.result, rev_ann.result, project_type
)
number_of_tasks_contributed_for_rs_wer += 1
rs_done = True
except Exception as e:
pass
try:
s1 = SentenceOperationViewSet()
sampleRequest = {
"annotation_result1": sup_ann.result,
"annotation_result2": rev_ann.result,
}
rs_bleu_score += float(
s1.calculate_bleu_score(sampleRequest).data["bleu_score"]
)
number_of_tasks_contributed_for_rs_bleu += 1
except Exception as e:
pass
submitted_tasks_count = submitted_tasks.count()

project_type_lower = project_type.lower()
Expand Down Expand Up @@ -232,6 +396,15 @@ def get_all_review_reports(
total_raw_audio_duration = convert_seconds_to_hours(
sum(total_raw_audio_duration_list)
)
cumulative_rejection_score_ar = 0
if tasks_and_rejection_count_map_ar:
for task, rc in tasks_and_rejection_count_map_ar.items():
cumulative_rejection_score_ar += task * rc

cumulative_rejection_score_rs = 0
if tasks_and_rejection_count_map_rs:
for task, rc in tasks_and_rejection_count_map_rs.items():
cumulative_rejection_score_rs += task * rc

result = {
"Name": userName,
Expand All @@ -244,6 +417,25 @@ def get_all_review_reports(
"Word Count": total_word_count,
"Submitted Tasks": submitted_tasks_count,
"Language": user_lang,
"Average Word Error Rate Reviewer Vs Superchecker": rs_wer_score
/ number_of_tasks_contributed_for_rs_wer
if number_of_tasks_contributed_for_rs_wer
else 0,
"Cumulative Word Error Rate Reviewer Vs Superchecker": rs_wer_score
if number_of_tasks_contributed_for_rs_wer
else 0,
"Average Bleu Score Reviewer Vs Superchecker": rs_bleu_score
/ number_of_tasks_contributed_for_rs_bleu
if number_of_tasks_contributed_for_rs_bleu
else 0,
"Average Rejection Count Annotator Vs Reviewer": cumulative_rejection_score_ar
/ submitted_tasks_count
if submitted_tasks_count
else 0,
"Average Rejection Count Reviewer Vs Superchecker": cumulative_rejection_score_rs
/ number_of_tasks_that_has_sup_annotations
if number_of_tasks_that_has_sup_annotations
else 0,
}

if project_type in get_audio_project_types() or project_type == "AllAudioProjects":
Expand Down Expand Up @@ -296,7 +488,17 @@ def get_all_supercheck_reports(
annotation_type=SUPER_CHECKER_ANNOTATION,
updated_at__range=[start_date, end_date],
)

tasks_and_rejection_count_map_rs = {}
for ann in submitted_tasks:
task = ann.task
revision_loop_count = task.revision_loop_count
try:
s_count = revision_loop_count["super_check_count"]
tasks_and_rejection_count_map_rs[s_count] = (
tasks_and_rejection_count_map_rs.get(s_count, 0) + 1
)
except Exception as e:
pass
submitted_tasks_count = submitted_tasks.count()

project_type_lower = project_type.lower()
Expand Down Expand Up @@ -342,6 +544,10 @@ def get_all_supercheck_reports(
validated_raw_audio_duration = convert_seconds_to_hours(
sum(validated_raw_audio_duration_list)
)
cumulative_rejection_score_rs = 0
if tasks_and_rejection_count_map_rs:
for task, rc in tasks_and_rejection_count_map_rs.items():
cumulative_rejection_score_rs += task * rc

result = {
"Name": userName,
Expand All @@ -354,6 +560,10 @@ def get_all_supercheck_reports(
"Word Count": validated_word_count,
"Submitted Tasks": submitted_tasks_count,
"Language": user_lang,
"Average Rejection Count Reviewer Vs Superchecker": cumulative_rejection_score_rs
/ submitted_tasks_count
if submitted_tasks_count
else 0,
}

if project_type in get_audio_project_types() or project_type == "AllAudioProjects":
Expand Down Expand Up @@ -513,6 +723,7 @@ def send_user_reports_mail_org(
final_reports = sorted(final_reports, key=lambda x: x["Name"], reverse=False)

df = pd.DataFrame.from_dict(final_reports)
df = df.fillna("NA")

content = df.to_csv(index=False)
content_type = "text/csv"
Expand Down
Loading

0 comments on commit c6ed2fe

Please sign in to comment.