Skip to content

Commit c6ed2fe

Browse files
authored
Merge pull request #1099 from AI4Bharat/master
Update dev to latest master
2 parents 5d51fcb + ec10755 commit c6ed2fe

File tree

9 files changed

+569
-67
lines changed

9 files changed

+569
-67
lines changed

backend/dataset/views.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1038,6 +1038,7 @@ def project_analytics(self, request, pk=None):
10381038
calculate_word_error_rate_between_two_audio_transcription_annotation(
10391039
review_annotation.result,
10401040
review_annotation.parent_annotation.result,
1041+
project_type,
10411042
)
10421043
)
10431044
except:
@@ -1067,6 +1068,7 @@ def project_analytics(self, request, pk=None):
10671068
calculate_word_error_rate_between_two_audio_transcription_annotation(
10681069
supercheck_annotation.result,
10691070
supercheck_annotation.parent_annotation.result,
1071+
project_type,
10701072
)
10711073
)
10721074
except:

backend/functions/tasks.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1372,6 +1372,7 @@ def get_stats_helper(
13721372
get_most_recent_annotation(
13731373
ann_obj.parent_annotation
13741374
).result,
1375+
project_type,
13751376
)
13761377
)
13771378
except Exception as error:
@@ -1426,6 +1427,7 @@ def get_stats_helper(
14261427
get_most_recent_annotation(
14271428
ann_obj.parent_annotation.parent_annotation
14281429
).result,
1430+
project_type,
14291431
)
14301432
)
14311433
except Exception as error:
@@ -1437,6 +1439,7 @@ def get_stats_helper(
14371439
get_most_recent_annotation(
14381440
ann_obj.parent_annotation
14391441
).result,
1442+
project_type,
14401443
)
14411444
)
14421445
except Exception as error:
@@ -1448,6 +1451,7 @@ def get_stats_helper(
14481451
get_most_recent_annotation(
14491452
ann_obj.parent_annotation.parent_annotation
14501453
).result,
1454+
project_type,
14511455
)
14521456
)
14531457
except Exception as error:
@@ -1519,10 +1523,10 @@ def calculate_ced_between_two_annotations(annotation1, annotation2):
15191523
return ced_list
15201524

15211525

1522-
def calculate_wer_between_two_annotations(annotation1, annotation2):
1526+
def calculate_wer_between_two_annotations(annotation1, annotation2, project_type):
15231527
try:
15241528
return calculate_word_error_rate_between_two_audio_transcription_annotation(
1525-
annotation1, annotation2
1529+
annotation1, annotation2, project_type
15261530
)
15271531
except Exception as e:
15281532
return 0

backend/organizations/tasks.py

Lines changed: 214 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,11 @@
1313
ANNOTATOR_ANNOTATION,
1414
REVIEWER_ANNOTATION,
1515
SUPER_CHECKER_ANNOTATION,
16+
ACCEPTED,
17+
ACCEPTED_WITH_MINOR_CHANGES,
18+
ACCEPTED_WITH_MAJOR_CHANGES,
19+
VALIDATED,
20+
VALIDATED_WITH_CHANGES,
1621
)
1722
from .models import Organization
1823
from users.models import User
@@ -24,6 +29,7 @@
2429
get_audio_transcription_duration,
2530
get_audio_segments_count,
2631
ocr_word_count,
32+
calculate_word_error_rate_between_two_audio_transcription_annotation,
2733
)
2834
from workspaces.tasks import (
2935
un_pack_annotation_tasks,
@@ -69,6 +75,79 @@ def get_all_annotation_reports(
6975
completed_by=userid,
7076
updated_at__range=[start_date, end_date],
7177
)
78+
(
79+
number_of_tasks_contributed_for_ar_wer,
80+
number_of_tasks_contributed_for_as_wer,
81+
number_of_tasks_contributed_for_ar_bleu,
82+
) = (
83+
0,
84+
0,
85+
0,
86+
)
87+
ar_wer_score, as_wer_score, ar_bleu_score = 0, 0, 0
88+
tasks_and_rejection_count_map_ar, number_of_tasks_that_has_review_annotations = (
89+
{},
90+
0,
91+
)
92+
for ann in submitted_tasks:
93+
all_annotations = Annotation.objects.filter(task_id=ann.task_id)
94+
try:
95+
task = ann.task
96+
revision_loop_count = task.revision_loop_count
97+
r_count = revision_loop_count["review_count"]
98+
tasks_and_rejection_count_map_ar[r_count] = (
99+
tasks_and_rejection_count_map_ar.get(r_count, 0) + 1
100+
)
101+
except Exception as e:
102+
pass
103+
ar_done, as_done = False, False
104+
ann_ann, rev_ann, sup_ann = "", "", ""
105+
for a in all_annotations:
106+
if a.annotation_type == REVIEWER_ANNOTATION and a.annotation_status in [
107+
ACCEPTED,
108+
ACCEPTED_WITH_MINOR_CHANGES,
109+
ACCEPTED_WITH_MAJOR_CHANGES,
110+
]:
111+
rev_ann = a
112+
elif (
113+
a.annotation_type == SUPER_CHECKER_ANNOTATION
114+
and a.annotation_status in [VALIDATED, VALIDATED_WITH_CHANGES]
115+
):
116+
sup_ann = a
117+
elif a.annotation_type == ANNOTATOR_ANNOTATION:
118+
ann_ann = a
119+
if a.annotation_type == REVIEWER_ANNOTATION:
120+
number_of_tasks_that_has_review_annotations += 1
121+
if ann_ann and rev_ann and not ar_done:
122+
try:
123+
ar_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
124+
rev_ann.result, ann_ann.result, project_type
125+
)
126+
number_of_tasks_contributed_for_ar_wer += 1
127+
ar_done = True
128+
except Exception as e:
129+
pass
130+
try:
131+
s1 = SentenceOperationViewSet()
132+
sampleRequest = {
133+
"annotation_result1": rev_ann.result,
134+
"annotation_result2": ann_ann.result,
135+
}
136+
ar_bleu_score += float(
137+
s1.calculate_bleu_score(sampleRequest).data["bleu_score"]
138+
)
139+
number_of_tasks_contributed_for_ar_bleu += 1
140+
except Exception as e:
141+
pass
142+
if ann_ann and sup_ann and not as_done:
143+
try:
144+
as_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
145+
sup_ann.result, ann_ann.result, project_type
146+
)
147+
number_of_tasks_contributed_for_as_wer += 1
148+
as_done = True
149+
except Exception as e:
150+
pass
72151

73152
submitted_tasks_count = submitted_tasks.count()
74153

@@ -111,7 +190,10 @@ def get_all_annotation_reports(
111190
total_raw_audio_duration = convert_seconds_to_hours(
112191
sum(total_raw_audio_duration_list)
113192
)
114-
193+
cumulative_rejection_score_ar = 0
194+
if tasks_and_rejection_count_map_ar:
195+
for task, rc in tasks_and_rejection_count_map_ar.items():
196+
cumulative_rejection_score_ar += task * rc
115197
result = {
116198
"Name": userName,
117199
"Email": email,
@@ -123,6 +205,28 @@ def get_all_annotation_reports(
123205
"Word Count": total_word_count,
124206
"Submitted Tasks": submitted_tasks_count,
125207
"Language": user_lang,
208+
"Average Word Error Rate Annotator Vs Reviewer": ar_wer_score
209+
/ number_of_tasks_contributed_for_ar_wer
210+
if number_of_tasks_contributed_for_ar_wer
211+
else 0,
212+
"Cumulative Word Error Rate Annotator Vs Reviewer": ar_wer_score
213+
if number_of_tasks_contributed_for_ar_wer
214+
else 0,
215+
"Average Word Error Rate Annotator Vs Superchecker": as_wer_score
216+
/ number_of_tasks_contributed_for_as_wer
217+
if number_of_tasks_contributed_for_as_wer
218+
else 0,
219+
"Cumulative Word Error Rate Annotator Vs Superchecker": as_wer_score
220+
if number_of_tasks_contributed_for_as_wer
221+
else 0,
222+
"Average Bleu Score Annotator Vs Reviewer": ar_bleu_score
223+
/ number_of_tasks_contributed_for_ar_bleu
224+
if number_of_tasks_contributed_for_ar_bleu
225+
else 0,
226+
"Average Rejection Count Annotator Vs Reviewer": cumulative_rejection_score_ar
227+
/ number_of_tasks_that_has_review_annotations
228+
if number_of_tasks_that_has_review_annotations
229+
else 0,
126230
}
127231

128232
if project_type in get_audio_project_types() or project_type == "AllAudioProjects":
@@ -190,7 +294,67 @@ def get_all_review_reports(
190294
annotation_type=REVIEWER_ANNOTATION,
191295
updated_at__range=[start_date, end_date],
192296
)
193-
297+
number_of_tasks_contributed_for_rs_wer, number_of_tasks_contributed_for_rs_bleu = (
298+
0,
299+
0,
300+
)
301+
rs_wer_score, rs_bleu_score = 0, 0
302+
(
303+
tasks_and_rejection_count_map_ar,
304+
tasks_and_rejection_count_map_rs,
305+
number_of_tasks_that_has_sup_annotations,
306+
) = ({}, {}, 0)
307+
for ann in submitted_tasks:
308+
all_annotations = Annotation.objects.filter(task_id=ann.task_id)
309+
task = ann.task
310+
revision_loop_count = task.revision_loop_count
311+
try:
312+
r_count = revision_loop_count["review_count"]
313+
tasks_and_rejection_count_map_ar[r_count] = (
314+
tasks_and_rejection_count_map_ar.get(r_count, 0) + 1
315+
)
316+
except Exception as e:
317+
pass
318+
try:
319+
s_count = revision_loop_count["super_check_count"]
320+
tasks_and_rejection_count_map_rs[s_count] = (
321+
tasks_and_rejection_count_map_rs.get(s_count, 0) + 1
322+
)
323+
except Exception as e:
324+
pass
325+
rs_done = False # for duplicate annotations
326+
sup_ann, rev_ann = "", ""
327+
for a in all_annotations:
328+
if (
329+
a.annotation_type == SUPER_CHECKER_ANNOTATION
330+
and a.annotation_status in [VALIDATED, VALIDATED_WITH_CHANGES]
331+
):
332+
sup_ann = a
333+
elif a.annotation_type == REVIEWER_ANNOTATION:
334+
rev_ann = a
335+
if a.annotation_type == SUPER_CHECKER_ANNOTATION:
336+
number_of_tasks_that_has_sup_annotations += 1
337+
if rev_ann and sup_ann and not rs_done:
338+
try:
339+
rs_wer_score += calculate_word_error_rate_between_two_audio_transcription_annotation(
340+
sup_ann.result, rev_ann.result, project_type
341+
)
342+
number_of_tasks_contributed_for_rs_wer += 1
343+
rs_done = True
344+
except Exception as e:
345+
pass
346+
try:
347+
s1 = SentenceOperationViewSet()
348+
sampleRequest = {
349+
"annotation_result1": sup_ann.result,
350+
"annotation_result2": rev_ann.result,
351+
}
352+
rs_bleu_score += float(
353+
s1.calculate_bleu_score(sampleRequest).data["bleu_score"]
354+
)
355+
number_of_tasks_contributed_for_rs_bleu += 1
356+
except Exception as e:
357+
pass
194358
submitted_tasks_count = submitted_tasks.count()
195359

196360
project_type_lower = project_type.lower()
@@ -232,6 +396,15 @@ def get_all_review_reports(
232396
total_raw_audio_duration = convert_seconds_to_hours(
233397
sum(total_raw_audio_duration_list)
234398
)
399+
cumulative_rejection_score_ar = 0
400+
if tasks_and_rejection_count_map_ar:
401+
for task, rc in tasks_and_rejection_count_map_ar.items():
402+
cumulative_rejection_score_ar += task * rc
403+
404+
cumulative_rejection_score_rs = 0
405+
if tasks_and_rejection_count_map_rs:
406+
for task, rc in tasks_and_rejection_count_map_rs.items():
407+
cumulative_rejection_score_rs += task * rc
235408

236409
result = {
237410
"Name": userName,
@@ -244,6 +417,25 @@ def get_all_review_reports(
244417
"Word Count": total_word_count,
245418
"Submitted Tasks": submitted_tasks_count,
246419
"Language": user_lang,
420+
"Average Word Error Rate Reviewer Vs Superchecker": rs_wer_score
421+
/ number_of_tasks_contributed_for_rs_wer
422+
if number_of_tasks_contributed_for_rs_wer
423+
else 0,
424+
"Cumulative Word Error Rate Reviewer Vs Superchecker": rs_wer_score
425+
if number_of_tasks_contributed_for_rs_wer
426+
else 0,
427+
"Average Bleu Score Reviewer Vs Superchecker": rs_bleu_score
428+
/ number_of_tasks_contributed_for_rs_bleu
429+
if number_of_tasks_contributed_for_rs_bleu
430+
else 0,
431+
"Average Rejection Count Annotator Vs Reviewer": cumulative_rejection_score_ar
432+
/ submitted_tasks_count
433+
if submitted_tasks_count
434+
else 0,
435+
"Average Rejection Count Reviewer Vs Superchecker": cumulative_rejection_score_rs
436+
/ number_of_tasks_that_has_sup_annotations
437+
if number_of_tasks_that_has_sup_annotations
438+
else 0,
247439
}
248440

249441
if project_type in get_audio_project_types() or project_type == "AllAudioProjects":
@@ -296,7 +488,17 @@ def get_all_supercheck_reports(
296488
annotation_type=SUPER_CHECKER_ANNOTATION,
297489
updated_at__range=[start_date, end_date],
298490
)
299-
491+
tasks_and_rejection_count_map_rs = {}
492+
for ann in submitted_tasks:
493+
task = ann.task
494+
revision_loop_count = task.revision_loop_count
495+
try:
496+
s_count = revision_loop_count["super_check_count"]
497+
tasks_and_rejection_count_map_rs[s_count] = (
498+
tasks_and_rejection_count_map_rs.get(s_count, 0) + 1
499+
)
500+
except Exception as e:
501+
pass
300502
submitted_tasks_count = submitted_tasks.count()
301503

302504
project_type_lower = project_type.lower()
@@ -342,6 +544,10 @@ def get_all_supercheck_reports(
342544
validated_raw_audio_duration = convert_seconds_to_hours(
343545
sum(validated_raw_audio_duration_list)
344546
)
547+
cumulative_rejection_score_rs = 0
548+
if tasks_and_rejection_count_map_rs:
549+
for task, rc in tasks_and_rejection_count_map_rs.items():
550+
cumulative_rejection_score_rs += task * rc
345551

346552
result = {
347553
"Name": userName,
@@ -354,6 +560,10 @@ def get_all_supercheck_reports(
354560
"Word Count": validated_word_count,
355561
"Submitted Tasks": submitted_tasks_count,
356562
"Language": user_lang,
563+
"Average Rejection Count Reviewer Vs Superchecker": cumulative_rejection_score_rs
564+
/ submitted_tasks_count
565+
if submitted_tasks_count
566+
else 0,
357567
}
358568

359569
if project_type in get_audio_project_types() or project_type == "AllAudioProjects":
@@ -513,6 +723,7 @@ def send_user_reports_mail_org(
513723
final_reports = sorted(final_reports, key=lambda x: x["Name"], reverse=False)
514724

515725
df = pd.DataFrame.from_dict(final_reports)
726+
df = df.fillna("NA")
516727

517728
content = df.to_csv(index=False)
518729
content_type = "text/csv"

0 commit comments

Comments
 (0)