mirror of https://github.com/coqui-ai/TTS.git
Add phonemizer for Belarusian language (#2856)
This commit is contained in:
parent
b79b6f0762
commit
fead04f779
|
@ -0,0 +1,34 @@
|
||||||
|
import os
|
||||||
|
|
||||||
|
finder = None
|
||||||
|
|
||||||
|
|
||||||
|
def init():
|
||||||
|
try:
|
||||||
|
import jpype
|
||||||
|
import jpype.imports
|
||||||
|
except ModuleNotFoundError:
|
||||||
|
raise ModuleNotFoundError("Belarusian phonemizer requires to install module 'jpype1' manually. Try `pip install jpype1`.")
|
||||||
|
|
||||||
|
try:
|
||||||
|
jar_path = os.environ["BEL_FANETYKA_JAR"]
|
||||||
|
except KeyError:
|
||||||
|
raise KeyError("You need to define 'BEL_FANETYKA_JAR' environment variable as path to the fanetyka.jar file")
|
||||||
|
|
||||||
|
jpype.startJVM(classpath=[jar_path])
|
||||||
|
|
||||||
|
# import the Java modules
|
||||||
|
from org.alex73.korpus.base import GrammarDB2, GrammarFinder
|
||||||
|
|
||||||
|
grammar_db = GrammarDB2.initializeFromJar()
|
||||||
|
global finder
|
||||||
|
finder = GrammarFinder(grammar_db)
|
||||||
|
|
||||||
|
|
||||||
|
def belarusian_text_to_phonemes(text: str) -> str:
|
||||||
|
# Initialize only on first run
|
||||||
|
if finder is None:
|
||||||
|
init()
|
||||||
|
|
||||||
|
from org.alex73.fanetyka.impl import FanetykaText
|
||||||
|
return str(FanetykaText(finder, text).ipa)
|
|
@ -1,4 +1,5 @@
|
||||||
from TTS.tts.utils.text.phonemizers.bangla_phonemizer import BN_Phonemizer
|
from TTS.tts.utils.text.phonemizers.bangla_phonemizer import BN_Phonemizer
|
||||||
|
from TTS.tts.utils.text.phonemizers.belarusian_phonemizer import BEL_Phonemizer
|
||||||
from TTS.tts.utils.text.phonemizers.base import BasePhonemizer
|
from TTS.tts.utils.text.phonemizers.base import BasePhonemizer
|
||||||
from TTS.tts.utils.text.phonemizers.espeak_wrapper import ESpeak
|
from TTS.tts.utils.text.phonemizers.espeak_wrapper import ESpeak
|
||||||
from TTS.tts.utils.text.phonemizers.gruut_wrapper import Gruut
|
from TTS.tts.utils.text.phonemizers.gruut_wrapper import Gruut
|
||||||
|
@ -35,6 +36,7 @@ DEF_LANG_TO_PHONEMIZER["en"] = DEF_LANG_TO_PHONEMIZER["en-us"]
|
||||||
DEF_LANG_TO_PHONEMIZER["zh-cn"] = ZH_CN_Phonemizer.name()
|
DEF_LANG_TO_PHONEMIZER["zh-cn"] = ZH_CN_Phonemizer.name()
|
||||||
DEF_LANG_TO_PHONEMIZER["ko-kr"] = KO_KR_Phonemizer.name()
|
DEF_LANG_TO_PHONEMIZER["ko-kr"] = KO_KR_Phonemizer.name()
|
||||||
DEF_LANG_TO_PHONEMIZER["bn"] = BN_Phonemizer.name()
|
DEF_LANG_TO_PHONEMIZER["bn"] = BN_Phonemizer.name()
|
||||||
|
DEF_LANG_TO_PHONEMIZER["be"] = BEL_Phonemizer.name()
|
||||||
|
|
||||||
|
|
||||||
# JA phonemizer has deal breaking dependencies like MeCab for some systems.
|
# JA phonemizer has deal breaking dependencies like MeCab for some systems.
|
||||||
|
@ -68,6 +70,8 @@ def get_phonemizer_by_name(name: str, **kwargs) -> BasePhonemizer:
|
||||||
return KO_KR_Phonemizer(**kwargs)
|
return KO_KR_Phonemizer(**kwargs)
|
||||||
if name == "bn_phonemizer":
|
if name == "bn_phonemizer":
|
||||||
return BN_Phonemizer(**kwargs)
|
return BN_Phonemizer(**kwargs)
|
||||||
|
if name == "be_phonemizer":
|
||||||
|
return BEL_Phonemizer(**kwargs)
|
||||||
raise ValueError(f"Phonemizer {name} not found")
|
raise ValueError(f"Phonemizer {name} not found")
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,55 @@
|
||||||
|
from typing import Dict
|
||||||
|
|
||||||
|
from TTS.tts.utils.text.phonemizers.base import BasePhonemizer
|
||||||
|
from TTS.tts.utils.text.belarusian.phonemizer import belarusian_text_to_phonemes
|
||||||
|
|
||||||
|
_DEF_BE_PUNCS = ",!." # TODO
|
||||||
|
|
||||||
|
|
||||||
|
class BEL_Phonemizer(BasePhonemizer):
|
||||||
|
"""🐸TTS be phonemizer using functions in `TTS.tts.utils.text.belarusian.phonemizer`
|
||||||
|
|
||||||
|
Args:
|
||||||
|
punctuations (str):
|
||||||
|
Set of characters to be treated as punctuation. Defaults to `_DEF_BE_PUNCS`.
|
||||||
|
|
||||||
|
keep_puncs (bool):
|
||||||
|
If True, keep the punctuations after phonemization. Defaults to False.
|
||||||
|
"""
|
||||||
|
|
||||||
|
language = "be"
|
||||||
|
|
||||||
|
def __init__(self, punctuations=_DEF_BE_PUNCS, keep_puncs=True, **kwargs): # pylint: disable=unused-argument
|
||||||
|
super().__init__(self.language, punctuations=punctuations, keep_puncs=keep_puncs)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def name():
|
||||||
|
return "be_phonemizer"
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def phonemize_be(text: str, separator: str = "|") -> str: # pylint: disable=unused-argument
|
||||||
|
return belarusian_text_to_phonemes(text)
|
||||||
|
|
||||||
|
def _phonemize(self, text, separator):
|
||||||
|
return self.phonemize_be(text, separator)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def supported_languages() -> Dict:
|
||||||
|
return {"be": "Belarusian"}
|
||||||
|
|
||||||
|
def version(self) -> str:
|
||||||
|
return "0.0.1"
|
||||||
|
|
||||||
|
def is_available(self) -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
txt = "тэст"
|
||||||
|
e = BEL_Phonemizer()
|
||||||
|
print(e.supported_languages())
|
||||||
|
print(e.version())
|
||||||
|
print(e.language)
|
||||||
|
print(e.name())
|
||||||
|
print(e.is_available())
|
||||||
|
print("`" + e.phonemize(txt) + "`")
|
|
@ -60,7 +60,7 @@ config = GlowTTSConfig(
|
||||||
output_path=output_path,
|
output_path=output_path,
|
||||||
add_blank=True,
|
add_blank=True,
|
||||||
datasets=[dataset_config],
|
datasets=[dataset_config],
|
||||||
characters=characters,
|
# characters=characters,
|
||||||
enable_eos_bos_chars=True,
|
enable_eos_bos_chars=True,
|
||||||
mixed_precision=False,
|
mixed_precision=False,
|
||||||
save_step=10000,
|
save_step=10000,
|
||||||
|
@ -69,6 +69,8 @@ config = GlowTTSConfig(
|
||||||
text_cleaner="no_cleaners",
|
text_cleaner="no_cleaners",
|
||||||
audio=audio_config,
|
audio=audio_config,
|
||||||
test_sentences=[],
|
test_sentences=[],
|
||||||
|
use_phonemes=True,
|
||||||
|
phoneme_language="be",
|
||||||
)
|
)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
@ -0,0 +1,29 @@
|
||||||
|
import os
|
||||||
|
import warnings
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
from TTS.tts.utils.text.belarusian.phonemizer import belarusian_text_to_phonemes
|
||||||
|
|
||||||
|
_TEST_CASES = """
|
||||||
|
Фанетычны канвертар/fanʲɛˈtɨt͡ʂnɨ kanˈvʲɛrtar
|
||||||
|
Гэтак мы працавалі/ˈɣɛtak ˈmɨ prat͡saˈvalʲi
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class TestText(unittest.TestCase):
|
||||||
|
def test_belarusian_text_to_phonemes(self):
|
||||||
|
try:
|
||||||
|
os.environ["BEL_FANETYKA_JAR"]
|
||||||
|
except KeyError:
|
||||||
|
warnings.warn(
|
||||||
|
"You need to define 'BEL_FANETYKA_JAR' environment variable as path to the fanetyka.jar file to test Belarusian phonemizer",
|
||||||
|
Warning)
|
||||||
|
return
|
||||||
|
|
||||||
|
for line in _TEST_CASES.strip().split("\n"):
|
||||||
|
text, phonemes = line.split("/")
|
||||||
|
self.assertEqual(belarusian_text_to_phonemes(text), phonemes)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
Loading…
Reference in New Issue