@@ -29,27 +29,6 @@ def deduplicate_annotation_dicts(
2929D = TypeVar ("D" , bound = Document )
3030
3131
32- def deduplicate_annotations (document : D ) -> D :
33- """Remove duplicate annotations from a document.
34-
35- Args:
36- document: The document to remove duplicate annotations from.
37-
38- Returns:
39- The document with duplicate annotations removed.
40- """
41- annotation_field_names = [field .name for field in document .annotation_fields ()]
42- doc_dict = document .asdict ()
43- for annotation_field_name in annotation_field_names :
44- doc_dict [annotation_field_name ]["annotations" ] = deduplicate_annotation_dicts (
45- doc_dict [annotation_field_name ]["annotations" ]
46- )
47- doc_dict [annotation_field_name ]["predictions" ] = deduplicate_annotation_dicts (
48- doc_dict [annotation_field_name ]["predictions" ]
49- )
50- return type (document ).fromdict (doc_dict )
51-
52-
5332def save_annotation_sources_to_metadata (
5433 document : D ,
5534 annotation_id2source : Dict [int , List [str ]],
@@ -124,8 +103,6 @@ def merge_annotations_from_documents(
124103 f"Document IDs do not match: { document .id } and { merged_document .id } "
125104 )
126105
127- # TODO: add_all_annotations_from_other needs to be fixed! it should return a mapping from
128- # original annotation *IDs* to new annotations!
129106 # Note: this does not check for duplicates!
130107 added_annotations = merged_document .add_all_annotations_from_other (
131108 other = document , strict = True
@@ -135,7 +112,8 @@ def merge_annotations_from_documents(
135112 for orig_id , new_annotation in orig_id2new_annotation .items ():
136113 added_annotation_id2source_names [new_annotation ._id ].append (source_name )
137114
138- merged_document = deduplicate_annotations (merged_document )
115+ # this will remove duplicates. If duplicates have different scores, the one with the highest score will be kept
116+ merged_document = merged_document .deduplicate_annotations ()
139117
140118 # save source names in metadata (at key metadata_key_source_annotations / metadata_key_source_predictions
141119 # for each layer in the order of the annotations / predictions)
0 commit comments