Update tokenization_indictrans.py (#2)
Browse files- Update tokenization_indictrans.py (37b9abc369441c1e2f09b49a7818fcc8761c2c68)
Co-authored-by: Varun Gumma <VarunGumma@users.noreply.huggingface.co>
tokenization_indictrans.py
CHANGED
|
@@ -87,10 +87,10 @@ class IndicTransTokenizer(PreTrainedTokenizer):
|
|
| 87 |
self.src_spm_fp = src_spm_fp
|
| 88 |
self.tgt_spm_fp = tgt_spm_fp
|
| 89 |
|
| 90 |
-
self.unk_token = unk_token
|
| 91 |
-
self.pad_token = pad_token
|
| 92 |
-
self.eos_token = eos_token
|
| 93 |
-
self.bos_token = bos_token
|
| 94 |
|
| 95 |
self.encoder = self._load_json(self.src_vocab_fp)
|
| 96 |
if self.unk_token not in self.encoder:
|
|
|
|
| 87 |
self.src_spm_fp = src_spm_fp
|
| 88 |
self.tgt_spm_fp = tgt_spm_fp
|
| 89 |
|
| 90 |
+
self.unk_token = unk_token.content
|
| 91 |
+
self.pad_token = pad_token.content
|
| 92 |
+
self.eos_token = eos_token.content
|
| 93 |
+
self.bos_token = bos_token.content
|
| 94 |
|
| 95 |
self.encoder = self._load_json(self.src_vocab_fp)
|
| 96 |
if self.unk_token not in self.encoder:
|