From c329a8855fe397740a9775e42171a0c903a256c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=B8ren=20Winkel=20Holm?= Date: Wed, 20 Sep 2023 14:54:17 +0200 Subject: [PATCH] fix: Change w2v2 word delimiter token to space --- src/coral_models/wav2vec2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coral_models/wav2vec2.py b/src/coral_models/wav2vec2.py index 71bd95cd..79d19233 100644 --- a/src/coral_models/wav2vec2.py +++ b/src/coral_models/wav2vec2.py @@ -115,7 +115,7 @@ def load_processor(self) -> Wav2Vec2Processor: pad_token="", bos_token="", eos_token="", - word_delimiter_token="|", + word_delimiter_token=" ", ) # Set the `model_max_length` attribute of the tokenizer, if it hasn't been set,