Skip to content

Commit f8aff87

Browse files
authored
Merge pull request #42 from xiangking/develop
bug修复
2 parents 165d35c + f146fe1 commit f8aff87

File tree

3 files changed

+12
-7
lines changed

3 files changed

+12
-7
lines changed

ark_nlp/dataset/global_pointer_named_entity_recognition_dataset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def _convert_to_transfomer_ids(self, bert_tokenizer):
6666
continue
6767
global_label[self.cat2id[info_['type']], start_idx+1, end_idx+1] = 1
6868

69-
global_label = torch.tensor(global_label).to_sparse()
69+
global_label = global_label.to_sparse()
7070

7171
features.append({
7272
'input_ids': input_ids,

ark_nlp/model/ner/crf_bert/__init__.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
from ark_nlp.factory.optimizer import get_default_crf_bert_optimizer as get_default_model_optimizer
1414
from ark_nlp.factory.optimizer import get_default_crf_bert_optimizer as get_default_crf_bert_optimizer
1515

16-
from ark_nlp.factory.task import BIONERTask as Task
17-
from ark_nlp.factory.task import BIONERTask as CrfBertNERTask
16+
from ark_nlp.factory.task import CRFNERTask as Task
17+
from ark_nlp.factory.task import CRFNERTask as CrfBertNERTask
1818

19-
from ark_nlp.factory.predictor import BIONERPredictor as Predictor
20-
from ark_nlp.factory.predictor import BIONERPredictor as CrfBertNERPredictor
19+
from ark_nlp.factory.predictor import CRFNERPredictor as Predictor
20+
from ark_nlp.factory.predictor import CRFNERPredictor as CrfBertNERPredictor

ark_nlp/processor/tokenizer/transfomer.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -229,8 +229,13 @@ class TokenTokenizer(TransfomerTokenizer):
229229

230230
def tokenize(self, text, **kwargs):
231231
tokens = []
232-
text = ' '.join([token_ for token_ in text])
233-
tokens = self.vocab.tokenize(text)
232+
for token_ in text:
233+
tokenized_token_ = self.vocab.tokenize(token_)
234+
if tokenized_token_ == []:
235+
tokens.extend([token_])
236+
else:
237+
tokens.extend(tokenized_token_)
238+
234239
return tokens
235240

236241
def sequence_to_ids(self, sequence, **kwargs):

0 commit comments

Comments
 (0)