From 8aeced16fce77e20918906cc5623e7673895e41e Mon Sep 17 00:00:00 2001 From: David Martin Rius Date: Wed, 28 Feb 2024 19:58:25 +0100 Subject: [PATCH] import the spacy language class dynamically with a English fallback when import error --- TTS/tts/layers/xtts/tokenizer.py | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/TTS/tts/layers/xtts/tokenizer.py b/TTS/tts/layers/xtts/tokenizer.py index 1a3cc47a..fb941d70 100644 --- a/TTS/tts/layers/xtts/tokenizer.py +++ b/TTS/tts/layers/xtts/tokenizer.py @@ -8,29 +8,21 @@ import torch from hangul_romanize import Transliter from hangul_romanize.rule import academic from num2words import num2words -from spacy.lang.ar import Arabic from spacy.lang.en import English -from spacy.lang.es import Spanish -from spacy.lang.ja import Japanese -from spacy.lang.zh import Chinese +from spacy.util import get_lang_class + from tokenizers import Tokenizer from TTS.tts.layers.xtts.zh_num2words import TextNorm as zh_num2words def get_spacy_lang(lang): - if lang == "zh": - return Chinese() - elif lang == "ja": - return Japanese() - elif lang == "ar": - return Arabic() - elif lang == "es": - return Spanish() - else: - # For most languages, Enlish does the job - return English() - + try: + lang_model = get_lang_class(lang)() + except ImportError: + # Fallback to English if the language model is not available + lang_model = English() + return lang_model def split_sentence(text, lang, text_split_length=250): """Preprocess the input text"""