@@ -34,9 +34,7 @@ def _get_label(tag: str) -> str:
3434 return tag .split ("-" )[1 ]
3535
3636
37- def _handle_unit_tag (
38- tag : str , tokens : List [List [int ]], cur_idx : int , tok_num : int
39- ) -> str :
37+ def _handle_unit_tag (tag : str , tokens : List [List [int ]], cur_idx : int , tok_num : int ) -> str :
4038 """Process a Unit tag
4139
4240 If a Unit tagged token is broken into multiple sub-tokens, we want the first
@@ -54,9 +52,7 @@ def _handle_unit_tag(
5452 return clean_tag
5553
5654
57- def _handle_begin_tag (
58- tag : str , tokens : List [List [int ]], cur_idx : int , tok_num : int
59- ) -> str :
55+ def _handle_begin_tag (tag : str , tokens : List [List [int ]], cur_idx : int , tok_num : int ) -> str :
6056 """Process a Begin tag
6157
6258 For Begin tagged tokens that are broken into sub-tokens, we know that there will be
@@ -69,9 +65,7 @@ def _handle_begin_tag(
6965 return clean_tag
7066
7167
72- def _handle_last_tag (
73- tag : str , tokens : List [List [int ]], cur_idx : int , tok_num : int
74- ) -> str :
68+ def _handle_last_tag (tag : str , tokens : List [List [int ]], cur_idx : int , tok_num : int ) -> str :
7569 """Process a Last tag
7670
7771 For Last tagged tokens that are broken into sub-tokens, we know that there will be
@@ -84,9 +78,7 @@ def _handle_last_tag(
8478 return clean_tag
8579
8680
87- def map_spacy_to_hf_tags (
88- hf_to_spacy : List [List [int ]], spacy_tags : List [str ]
89- ) -> List [str ]:
81+ def map_spacy_to_hf_tags (hf_to_spacy : List [List [int ]], spacy_tags : List [str ]) -> List [str ]:
9082 """Maps the spacy_tags to the required huggingface tags
9183
9284 Leverages the hf_to_spacy map, showing how each huggingface token maps
@@ -155,9 +147,7 @@ def dict_to_dataset(hf_data: Dict[str, List[str]]) -> Dataset:
155147 class_label = Sequence (feature = ClassLabel (num_classes = len (labels ), names = labels ))
156148 # First need to string index the ner_tags
157149 label_to_idx = dict (zip (labels , range (len (labels ))))
158- ds = ds .map (
159- lambda row : {"ner_tags" : [label_to_idx [tag ] for tag in row ["ner_tags" ]]}
160- )
150+ ds = ds .map (lambda row : {"ner_tags" : [label_to_idx [tag ] for tag in row ["ner_tags" ]]})
161151 # Then we can create the ClassLabel
162152 ds = ds .cast_column ("ner_tags" , class_label )
163153 return ds
0 commit comments