@@ -14,26 +14,26 @@ def __init__(self):
1414 self .model_name = "airesearch/wangchanberta-base-att-spm-uncased"
1515 self .target_tokenizer = CamembertTokenizer
1616 self .tokenizer = CamembertTokenizer .from_pretrained (
17- self .model_name ,
18- revision = 'main' )
17+ self .model_name , revision = "main"
18+ )
1919 self .tokenizer .additional_special_tokens = [
20- ' <s>NOTUSED' ,
21- ' </s>NOTUSED' ,
22- ' <_>'
20+ " <s>NOTUSED" ,
21+ " </s>NOTUSED" ,
22+ " <_>" ,
2323 ]
2424 self .fill_mask = pipeline (
25- task = ' fill-mask' ,
25+ task = " fill-mask" ,
2626 tokenizer = self .tokenizer ,
27- model = f' { self .model_name } ' ,
28- revision = ' main'
27+ model = f" { self .model_name } " ,
28+ revision = " main" ,
2929 )
3030 self .MASK_TOKEN = self .tokenizer .mask_token
3131
3232 def generate (self , sentence : str , num_replace_tokens : int = 3 ):
3333 self .sent2 = []
3434 self .input_text = sentence
3535 sent = [
36- i for i in self .tokenizer .tokenize (self .input_text ) if i != '▁'
36+ i for i in self .tokenizer .tokenize (self .input_text ) if i != "▁"
3737 ]
3838 if len (sent ) < num_replace_tokens :
3939 num_replace_tokens = len (sent )
@@ -42,18 +42,16 @@ def generate(self, sentence: str, num_replace_tokens: int = 3):
4242 replace_token = [
4343 sent .pop (random .randrange (len (sent ))) for _ in range (1 )
4444 ][0 ]
45- masked_text = masked_text + self .MASK_TOKEN
45+ masked_text = masked_text + self .MASK_TOKEN
4646 self .sent2 += [
47- str (j [' sequence' ]).replace (' <s> ' , '' ).replace (' </s>' , '' )
47+ str (j [" sequence" ]).replace (" <s> " , "" ).replace (" </s>" , "" )
4848 for j in self .fill_mask (masked_text )
49- if j [' sequence' ] not in self .sent2
49+ if j [" sequence" ] not in self .sent2
5050 ]
5151 masked_text = self .input_text
5252 return self .sent2
5353
54- def augment (
55- self , sentence : str , num_replace_tokens : int = 3
56- ) -> List [str ]:
54+ def augment (self , sentence : str , num_replace_tokens : int = 3 ) -> List [str ]:
5755 """
5856 Text Augment from wangchanberta
5957
0 commit comments