Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update dev to latest master #1099

Merged
merged 23 commits into from
Jul 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
72ed6a1
Merge pull request #1072 from AI4Bharat/dev
aparna-aa May 6, 2024
77459a9
Allow org owners to manipulate user details
ishvindersethi22 Jul 8, 2024
062dcee
Update views.py
ishvindersethi22 Jul 8, 2024
64df2db
added wer scores
KunalTiwary Jul 8, 2024
33526af
Merge pull request #1091 from AI4Bharat/admin-changes
aparna-aa Jul 8, 2024
9589f3e
minor fix
KunalTiwary Jul 8, 2024
dfc3b6f
Merge branch 'master' into wer_fix
KunalTiwary Jul 8, 2024
cb187b7
Merge pull request #1093 from AI4Bharat/wer_fix
ishvindersethi22 Jul 8, 2024
5345aed
skipped the blank data tasks
KunalTiwary Jul 10, 2024
c1d27fd
Merge branch 'master' into download_fix
KunalTiwary Jul 10, 2024
627d2fe
Merge pull request #1097 from AI4Bharat/download_fix
ishvindersethi22 Jul 10, 2024
efac1e1
fix_calculate_word_error_rate_between_two_audio_transcription_annotation
KunalTiwary Jul 10, 2024
51c46a1
Merge pull request #1098 from AI4Bharat/minor_fix_wer_audio
ishvindersethi22 Jul 10, 2024
f6734d0
fix wer
KunalTiwary Jul 11, 2024
4764095
Merge pull request #1101 from AI4Bharat/wer_fix_2
ishvindersethi22 Jul 11, 2024
b82a915
added org level wer scores
KunalTiwary Jul 12, 2024
4c228fd
Merge pull request #1102 from AI4Bharat/wer_fix_org
ishvindersethi22 Jul 12, 2024
8aa7b03
added rej score and bleu score
KunalTiwary Jul 12, 2024
5db327e
added changes to org level
KunalTiwary Jul 15, 2024
644d2d7
added minor fixes for bleu score
KunalTiwary Jul 16, 2024
d2b11bd
fix for org reports
KunalTiwary Jul 16, 2024
97af43a
Merge pull request #1103 from AI4Bharat/rej_fix
ishvindersethi22 Jul 24, 2024
ec10755
Merge branch 'dev' into master
KunalTiwary Jul 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions backend/dataset/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -1038,6 +1038,7 @@ def project_analytics(self, request, pk=None):
calculate_word_error_rate_between_two_audio_transcription_annotation(
review_annotation.result,
review_annotation.parent_annotation.result,
project_type,
)
)
except:
Expand Down Expand Up @@ -1067,6 +1068,7 @@ def project_analytics(self, request, pk=None):
calculate_word_error_rate_between_two_audio_transcription_annotation(
supercheck_annotation.result,
supercheck_annotation.parent_annotation.result,
project_type,
)
)
except:
Expand Down
8 changes: 6 additions & 2 deletions backend/functions/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1372,6 +1372,7 @@ def get_stats_helper(
get_most_recent_annotation(
ann_obj.parent_annotation
).result,
project_type,
)
)
except Exception as error:
Expand Down Expand Up @@ -1426,6 +1427,7 @@ def get_stats_helper(
get_most_recent_annotation(
ann_obj.parent_annotation.parent_annotation
).result,
project_type,
)
)
except Exception as error:
Expand All @@ -1437,6 +1439,7 @@ def get_stats_helper(
get_most_recent_annotation(
ann_obj.parent_annotation
).result,
project_type,
)
)
except Exception as error:
Expand All @@ -1448,6 +1451,7 @@ def get_stats_helper(
get_most_recent_annotation(
ann_obj.parent_annotation.parent_annotation
).result,
project_type,
)
)
except Exception as error:
Expand Down Expand Up @@ -1519,10 +1523,10 @@ def calculate_ced_between_two_annotations(annotation1, annotation2):
return ced_list


def calculate_wer_between_two_annotations(annotation1, annotation2):
def calculate_wer_between_two_annotations(annotation1, annotation2, project_type):
try:
return calculate_word_error_rate_between_two_audio_transcription_annotation(
annotation1, annotation2
annotation1, annotation2, project_type
)
except Exception as e:
return 0
Expand Down
217 changes: 214 additions & 3 deletions backend/organizations/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@
ANNOTATOR_ANNOTATION,
REVIEWER_ANNOTATION,
SUPER_CHECKER_ANNOTATION,
ACCEPTED,
ACCEPTED_WITH_MINOR_CHANGES,
ACCEPTED_WITH_MAJOR_CHANGES,
VALIDATED,
VALIDATED_WITH_CHANGES,
)
from .models import Organization
from users.models import User
Expand All @@ -24,6 +29,7 @@
get_audio_transcription_duration,
get_audio_segments_count,
ocr_word_count,
calculate_word_error_rate_between_two_audio_transcription_annotation,
)
from workspaces.tasks import (
un_pack_annotation_tasks,
Expand Down Expand Up @@ -69,6 +75,79 @@ def get_all_annotation_reports(
completed_by=userid,
updated_at__range=[start_date, end_date],
)
(
number_of_tasks_contributed_for_ar_wer,
number_of_tasks_contributed_for_as_wer,
number_of_tasks_contributed_for_ar_bleu,
) = (
0,
0,
0,
)
ar_wer_score, as_wer_score, ar_bleu_score = 0, 0, 0
tasks_and_rejection_count_map_ar, number_of_tasks_that_has_review_annotations = (
{},
0,
)
for ann in submitted_tasks:
all_annotations = Annotation.objects.filter(task_id=ann.task_id)
try:
task = ann.task
revision_loop_count = task.revision_loop_count
r_count = revision_loop_count["review_count"]
tasks_and_rejection_count_map_ar[r_count] = (
tasks_and_rejection_count_map_ar.get(r_count, 0) + 1
)
except Exception as e:
pass
ar_done, as_done = False, False
ann_ann, rev_ann, sup_ann = "", "", ""
for a in all_annotations:
if a.annotation_type == REVIEWER_ANNOTATION and a.annotation_status in [
ACCEPTED,
ACCEPTED_WITH_MINOR_CHANGES,
ACCEPTED_WITH_MAJOR_CHANGES,
]:
rev_ann = a
elif (
a.annotation_type == SUPER_CHECKER_ANNOTATION
and a.annotation_status in [VALIDATED, VALIDATED_WITH_CHANGES]
):
sup_ann = a
elif a.annotation_type == ANNOTATOR_ANNOTATION:
ann_ann = a
if a.annotation_type == REVIEWER_ANNOTATION:
number_of_tasks_that_has_review_annotations += 1
if ann_ann and rev_ann and not ar_done:
try:
ar_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
rev_ann.result, ann_ann.result, project_type
)
number_of_tasks_contributed_for_ar_wer += 1
ar_done = True
except Exception as e:
pass
try:
s1 = SentenceOperationViewSet()
sampleRequest = {
"annotation_result1": rev_ann.result,
"annotation_result2": ann_ann.result,
}
ar_bleu_score += float(
s1.calculate_bleu_score(sampleRequest).data["bleu_score"]
)
number_of_tasks_contributed_for_ar_bleu += 1
except Exception as e:
pass
if ann_ann and sup_ann and not as_done:
try:
as_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
sup_ann.result, ann_ann.result, project_type
)
number_of_tasks_contributed_for_as_wer += 1
as_done = True
except Exception as e:
pass

submitted_tasks_count = submitted_tasks.count()

Expand Down Expand Up @@ -111,7 +190,10 @@ def get_all_annotation_reports(
total_raw_audio_duration = convert_seconds_to_hours(
sum(total_raw_audio_duration_list)
)

cumulative_rejection_score_ar = 0
if tasks_and_rejection_count_map_ar:
for task, rc in tasks_and_rejection_count_map_ar.items():
cumulative_rejection_score_ar += task * rc
result = {
"Name": userName,
"Email": email,
Expand All @@ -123,6 +205,28 @@ def get_all_annotation_reports(
"Word Count": total_word_count,
"Submitted Tasks": submitted_tasks_count,
"Language": user_lang,
"Average Word Error Rate Annotator Vs Reviewer": ar_wer_score
/ number_of_tasks_contributed_for_ar_wer
if number_of_tasks_contributed_for_ar_wer
else 0,
"Cumulative Word Error Rate Annotator Vs Reviewer": ar_wer_score
if number_of_tasks_contributed_for_ar_wer
else 0,
"Average Word Error Rate Annotator Vs Superchecker": as_wer_score
/ number_of_tasks_contributed_for_as_wer
if number_of_tasks_contributed_for_as_wer
else 0,
"Cumulative Word Error Rate Annotator Vs Superchecker": as_wer_score
if number_of_tasks_contributed_for_as_wer
else 0,
"Average Bleu Score Annotator Vs Reviewer": ar_bleu_score
/ number_of_tasks_contributed_for_ar_bleu
if number_of_tasks_contributed_for_ar_bleu
else 0,
"Average Rejection Count Annotator Vs Reviewer": cumulative_rejection_score_ar
/ number_of_tasks_that_has_review_annotations
if number_of_tasks_that_has_review_annotations
else 0,
}

if project_type in get_audio_project_types() or project_type == "AllAudioProjects":
Expand Down Expand Up @@ -190,7 +294,67 @@ def get_all_review_reports(
annotation_type=REVIEWER_ANNOTATION,
updated_at__range=[start_date, end_date],
)

number_of_tasks_contributed_for_rs_wer, number_of_tasks_contributed_for_rs_bleu = (
0,
0,
)
rs_wer_score, rs_bleu_score = 0, 0
(
tasks_and_rejection_count_map_ar,
tasks_and_rejection_count_map_rs,
number_of_tasks_that_has_sup_annotations,
) = ({}, {}, 0)
for ann in submitted_tasks:
all_annotations = Annotation.objects.filter(task_id=ann.task_id)
task = ann.task
revision_loop_count = task.revision_loop_count
try:
r_count = revision_loop_count["review_count"]
tasks_and_rejection_count_map_ar[r_count] = (
tasks_and_rejection_count_map_ar.get(r_count, 0) + 1
)
except Exception as e:
pass
try:
s_count = revision_loop_count["super_check_count"]
tasks_and_rejection_count_map_rs[s_count] = (
tasks_and_rejection_count_map_rs.get(s_count, 0) + 1
)
except Exception as e:
pass
rs_done = False # for duplicate annotations
sup_ann, rev_ann = "", ""
for a in all_annotations:
if (
a.annotation_type == SUPER_CHECKER_ANNOTATION
and a.annotation_status in [VALIDATED, VALIDATED_WITH_CHANGES]
):
sup_ann = a
elif a.annotation_type == REVIEWER_ANNOTATION:
rev_ann = a
if a.annotation_type == SUPER_CHECKER_ANNOTATION:
number_of_tasks_that_has_sup_annotations += 1
if rev_ann and sup_ann and not rs_done:
try:
rs_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
sup_ann.result, rev_ann.result, project_type
)
number_of_tasks_contributed_for_rs_wer += 1
rs_done = True
except Exception as e:
pass
try:
s1 = SentenceOperationViewSet()
sampleRequest = {
"annotation_result1": sup_ann.result,
"annotation_result2": rev_ann.result,
}
rs_bleu_score += float(
s1.calculate_bleu_score(sampleRequest).data["bleu_score"]
)
number_of_tasks_contributed_for_rs_bleu += 1
except Exception as e:
pass
submitted_tasks_count = submitted_tasks.count()

project_type_lower = project_type.lower()
Expand Down Expand Up @@ -232,6 +396,15 @@ def get_all_review_reports(
total_raw_audio_duration = convert_seconds_to_hours(
sum(total_raw_audio_duration_list)
)
cumulative_rejection_score_ar = 0
if tasks_and_rejection_count_map_ar:
for task, rc in tasks_and_rejection_count_map_ar.items():
cumulative_rejection_score_ar += task * rc

cumulative_rejection_score_rs = 0
if tasks_and_rejection_count_map_rs:
for task, rc in tasks_and_rejection_count_map_rs.items():
cumulative_rejection_score_rs += task * rc

result = {
"Name": userName,
Expand All @@ -244,6 +417,25 @@ def get_all_review_reports(
"Word Count": total_word_count,
"Submitted Tasks": submitted_tasks_count,
"Language": user_lang,
"Average Word Error Rate Reviewer Vs Superchecker": rs_wer_score
/ number_of_tasks_contributed_for_rs_wer
if number_of_tasks_contributed_for_rs_wer
else 0,
"Cumulative Word Error Rate Reviewer Vs Superchecker": rs_wer_score
if number_of_tasks_contributed_for_rs_wer
else 0,
"Average Bleu Score Reviewer Vs Superchecker": rs_bleu_score
/ number_of_tasks_contributed_for_rs_bleu
if number_of_tasks_contributed_for_rs_bleu
else 0,
"Average Rejection Count Annotator Vs Reviewer": cumulative_rejection_score_ar
/ submitted_tasks_count
if submitted_tasks_count
else 0,
"Average Rejection Count Reviewer Vs Superchecker": cumulative_rejection_score_rs
/ number_of_tasks_that_has_sup_annotations
if number_of_tasks_that_has_sup_annotations
else 0,
}

if project_type in get_audio_project_types() or project_type == "AllAudioProjects":
Expand Down Expand Up @@ -296,7 +488,17 @@ def get_all_supercheck_reports(
annotation_type=SUPER_CHECKER_ANNOTATION,
updated_at__range=[start_date, end_date],
)

tasks_and_rejection_count_map_rs = {}
for ann in submitted_tasks:
task = ann.task
revision_loop_count = task.revision_loop_count
try:
s_count = revision_loop_count["super_check_count"]
tasks_and_rejection_count_map_rs[s_count] = (
tasks_and_rejection_count_map_rs.get(s_count, 0) + 1
)
except Exception as e:
pass
submitted_tasks_count = submitted_tasks.count()

project_type_lower = project_type.lower()
Expand Down Expand Up @@ -342,6 +544,10 @@ def get_all_supercheck_reports(
validated_raw_audio_duration = convert_seconds_to_hours(
sum(validated_raw_audio_duration_list)
)
cumulative_rejection_score_rs = 0
if tasks_and_rejection_count_map_rs:
for task, rc in tasks_and_rejection_count_map_rs.items():
cumulative_rejection_score_rs += task * rc

result = {
"Name": userName,
Expand All @@ -354,6 +560,10 @@ def get_all_supercheck_reports(
"Word Count": validated_word_count,
"Submitted Tasks": submitted_tasks_count,
"Language": user_lang,
"Average Rejection Count Reviewer Vs Superchecker": cumulative_rejection_score_rs
/ submitted_tasks_count
if submitted_tasks_count
else 0,
}

if project_type in get_audio_project_types() or project_type == "AllAudioProjects":
Expand Down Expand Up @@ -513,6 +723,7 @@ def send_user_reports_mail_org(
final_reports = sorted(final_reports, key=lambda x: x["Name"], reverse=False)

df = pd.DataFrame.from_dict(final_reports)
df = df.fillna("NA")

content = df.to_csv(index=False)
content_type = "text/csv"
Expand Down
Loading
Loading