Add 2 functions to verify any spacy language can be instantiated. By now, the only one that needs special packages is Korean. So, all languages works well but Korean

This commit is contained in:
David Martin Rius 2024-02-28 20:23:53 +01:00
parent 30a2d8d787
commit 3db0dec08a
1 changed files with 17 additions and 1 deletions

View File

@ -15,11 +15,27 @@ from tokenizers import Tokenizer
from TTS.tts.layers.xtts.zh_num2words import TextNorm as zh_num2words
import spacy
# These 2 functions are to verify that any language can be instantiated
def get_spacy_available_langs():
from pathlib import Path
spacy_path = Path(spacy.__file__.replace('__init__.py',''))
spacy_langs = spacy_path / 'lang'
SPACY_LANGS = [str(x).split('/')[-1] for x in spacy_langs.iterdir() if x.is_dir() and str(x).split('/')[-1] != '__pycache__']
print("Available languages in Spacy:", SPACY_LANGS)
return SPACY_LANGS
def get_all_spacy_langs():
SPACY_LANGS = get_spacy_available_langs()
spacy_lang_instances = []
for lang in SPACY_LANGS:
spacy_lang_instances.append(get_spacy_lang(lang))
def get_spacy_lang(lang):
try:
lang_model = get_lang_class(lang)()
except ImportError:
except ImportError as e:
print("Error", e)
# Fallback to English if the language model is not available
lang_model = English()
return lang_model