Skip to content

Commit

Permalink
fix compatibility with higher version of transformers
Browse files Browse the repository at this point in the history
  • Loading branch information
lfoppiano committed Jan 8, 2025
1 parent c66c961 commit 77872d7
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 10 deletions.
10 changes: 8 additions & 2 deletions delft/sequenceLabelling/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,8 +321,14 @@ def convert_single_text(self, text_tokens, chars_tokens, features_tokens, label_
chars_tokens.append(self.empty_char_vector)

# sub-tokenization
encoded_result = self.tokenizer(text_tokens, add_special_tokens=True, is_split_into_words=True,
max_length=max_seq_length, truncation=True, return_offsets_mapping=True)
encoded_result = self.tokenizer(
text_tokens,
add_special_tokens=True,
is_split_into_words=True,
max_length=max_seq_length,
truncation=True,
return_offsets_mapping=True
)

input_ids = encoded_result.input_ids
offsets = encoded_result.offset_mapping
Expand Down
4 changes: 2 additions & 2 deletions delft/sequenceLabelling/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,8 @@ def train_model(self, local_model, x_train, y_train, f_train=None,

# multiple workers should work with transformer layers, but not with ELMo due to GPU memory limit (with GTX 1080Ti 11GB)
if self.model_config.transformer_name is not None or (self.embeddings and self.embeddings.use_ELMo):
# worker at 0 means the training will be executed in the main thread
nb_workers = 0
# worker at 1 means the training will be executed in the main thread
nb_workers = 1
multiprocessing = False

local_model.fit(training_generator,
Expand Down
7 changes: 1 addition & 6 deletions delft/utilities/Transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,35 +128,30 @@ def init_preprocessor(self, max_sequence_length: int,
do_lower_case = False

if do_lower_case is not None:
if self.auth_token != None:
if self.auth_token is not None:
self.tokenizer = AutoTokenizer.from_pretrained(self.name,
add_special_tokens=add_special_tokens,
max_length=max_sequence_length,
add_prefix_space=add_prefix_space,
do_lower_case=do_lower_case,
use_auth_token=self.auth_token)
else:
self.tokenizer = AutoTokenizer.from_pretrained(self.name,
add_special_tokens=add_special_tokens,
max_length=max_sequence_length,
add_prefix_space=add_prefix_space,
do_lower_case=do_lower_case)
else:
if self.auth_token != None:
self.tokenizer = AutoTokenizer.from_pretrained(self.name,
add_special_tokens=add_special_tokens,
max_length=max_sequence_length,
add_prefix_space=add_prefix_space,
use_auth_token=self.auth_token)
else:
self.tokenizer = AutoTokenizer.from_pretrained(self.name,
add_special_tokens=add_special_tokens,
max_length=max_sequence_length,
add_prefix_space=add_prefix_space)

elif self.loading_method == LOADING_METHOD_LOCAL_MODEL_DIR:
self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir_path,
add_special_tokens=add_special_tokens,
max_length=max_sequence_length,
add_prefix_space=add_prefix_space)
elif self.loading_method == LOADING_METHOD_PLAIN_MODEL:
Expand Down

0 comments on commit 77872d7

Please sign in to comment.