diff --git a/TTS/.models.json b/TTS/.models.json index 02b95cf2..40cf83ec 100644 --- a/TTS/.models.json +++ b/TTS/.models.json @@ -650,6 +650,26 @@ "license": "CC-BY-4.0" } } + }, + "bn":{ + "custom":{ + "vits-male":{ + "github_rls_url":"https://coqui.gateway.scarf.sh/v0.13.3_models/tts_models--bn--custom--vits_male.zip", + "default_vocoder": null, + "commit": null, + "description": "Single speaker Bangla male model. For more information -> https://github.com/mobassir94/comprehensive-bangla-tts", + "author": "@mobassir94", + "license": "Apache 2.0" + }, + "vits-female":{ + "github_rls_url":"https://coqui.gateway.scarf.sh/v0.13.3_models/tts_models--bn--custom--vits_female.zip", + "default_vocoder": null, + "commit": null, + "description": "Single speaker Bangla female model. For more information -> https://github.com/mobassir94/comprehensive-bangla-tts", + "author": "@mobassir94", + "license": "Apache 2.0" + } + } } }, "vocoder_models": { diff --git a/TTS/VERSION b/TTS/VERSION index 9beb74d4..288adf53 100644 --- a/TTS/VERSION +++ b/TTS/VERSION @@ -1 +1 @@ -0.13.2 +0.13.3 diff --git a/TTS/api.py b/TTS/api.py index 4e0731de..8124d3ab 100644 --- a/TTS/api.py +++ b/TTS/api.py @@ -303,6 +303,8 @@ class TTS: @property def is_coqui_studio(self): + if self.model_name is None: + return False return "coqui_studio" in self.model_name @property diff --git a/TTS/tts/utils/text/bangla/__init__.py b/TTS/tts/utils/text/bangla/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/TTS/tts/utils/text/bangla/phonemizer.py b/TTS/tts/utils/text/bangla/phonemizer.py new file mode 100644 index 00000000..e15830fe --- /dev/null +++ b/TTS/tts/utils/text/bangla/phonemizer.py @@ -0,0 +1,121 @@ +import re + +import bangla +from bnnumerizer import numerize +from bnunicodenormalizer import Normalizer + +# initialize +bnorm = Normalizer() + + +attribution_dict = { + "সাঃ": "সাল্লাল্লাহু আলাইহি ওয়া সাল্লাম", + "আঃ": "আলাইহিস সালাম", + "রাঃ": "রাদিআল্লাহু আনহু", + "রহঃ": "রহমাতুল্লাহি আলাইহি", + "রহিঃ": "রহিমাহুল্লাহ", + "হাফিঃ": "হাফিযাহুল্লাহ", + "বায়ান": "বাইআন", + "দাঃবাঃ": "দামাত বারাকাতুহুম,দামাত বারাকাতুল্লাহ", + # "আয়াত" : "আইআত",#আইআত + # "ওয়া" : "ওআ", + # "ওয়াসাল্লাম" : "ওআসাল্লাম", + # "কেন" : "কেনো", + # "কোন" : "কোনো", + # "বল" : "বলো", + # "চল" : "চলো", + # "কর" : "করো", + # "রাখ" : "রাখো", + "’": "", + "‘": "", + # "য়" : "অ", + # "সম্প্রদায়" : "সম্প্রদাই", + # "রয়েছে" : "রইছে", + # "রয়েছ" : "রইছ", + "/": " বাই ", +} + + +def tag_text(text: str): + # remove multiple spaces + text = re.sub(" +", " ", text) + # create start and end + text = "start" + text + "end" + # tag text + parts = re.split("[\u0600-\u06FF]+", text) + # remove non chars + parts = [p for p in parts if p.strip()] + # unique parts + parts = set(parts) + # tag the text + for m in parts: + if len(m.strip()) > 1: + text = text.replace(m, f"{m}") + # clean-tags + text = text.replace("start", "") + text = text.replace("end", "") + return text + + +def normalize(sen): + global bnorm # pylint: disable=global-statement + _words = [bnorm(word)["normalized"] for word in sen.split()] + return " ".join([word for word in _words if word is not None]) + + +def expand_full_attribution(text): + for word, attr in attribution_dict.items(): + if word in text: + text = text.replace(word, normalize(attr)) + return text + + +def collapse_whitespace(text): + # Regular expression matching whitespace: + _whitespace_re = re.compile(r"\s+") + return re.sub(_whitespace_re, " ", text) + + +def bangla_text_to_phonemes(text: str) -> str: + # english numbers to bangla conversion + res = re.search("[0-9]", text) + if res is not None: + text = bangla.convert_english_digit_to_bangla_digit(text) + + # replace ':' in between two bangla numbers with ' এর ' + pattern = r"[০, ১, ২, ৩, ৪, ৫, ৬, ৭, ৮, ৯]:[০, ১, ২, ৩, ৪, ৫, ৬, ৭, ৮, ৯]" + matches = re.findall(pattern, text) + for m in matches: + r = m.replace(":", " এর ") + text = text.replace(m, r) + + # numerize text + text = numerize(text) + + # tag sections + text = tag_text(text) + + # text blocks + # blocks = text.split("") + # blocks = [b for b in blocks if b.strip()] + + # create tuple of (lang,text) + if "" in text: + text = text.replace("", "").replace("", "") + # Split based on sentence ending Characters + bn_text = text.strip() + + sentenceEnders = re.compile("[।!?]") + sentences = sentenceEnders.split(str(bn_text)) + + data = "" + for sent in sentences: + res = re.sub("\n", "", sent) + res = normalize(res) + # expand attributes + res = expand_full_attribution(res) + + res = collapse_whitespace(res) + res += "।" + data += res + return data diff --git a/TTS/tts/utils/text/phonemizers/__init__.py b/TTS/tts/utils/text/phonemizers/__init__.py index a5a341e9..d443fb24 100644 --- a/TTS/tts/utils/text/phonemizers/__init__.py +++ b/TTS/tts/utils/text/phonemizers/__init__.py @@ -1,3 +1,4 @@ +from TTS.tts.utils.text.phonemizers.bangla_phonemizer import BN_Phonemizer from TTS.tts.utils.text.phonemizers.base import BasePhonemizer from TTS.tts.utils.text.phonemizers.espeak_wrapper import ESpeak from TTS.tts.utils.text.phonemizers.gruut_wrapper import Gruut @@ -28,6 +29,7 @@ DEF_LANG_TO_PHONEMIZER["en"] = DEF_LANG_TO_PHONEMIZER["en-us"] DEF_LANG_TO_PHONEMIZER["ja-jp"] = JA_JP_Phonemizer.name() DEF_LANG_TO_PHONEMIZER["zh-cn"] = ZH_CN_Phonemizer.name() DEF_LANG_TO_PHONEMIZER["ko-kr"] = KO_KR_Phonemizer.name() +DEF_LANG_TO_PHONEMIZER["bn"] = BN_Phonemizer.name() def get_phonemizer_by_name(name: str, **kwargs) -> BasePhonemizer: @@ -50,6 +52,8 @@ def get_phonemizer_by_name(name: str, **kwargs) -> BasePhonemizer: return JA_JP_Phonemizer(**kwargs) if name == "ko_kr_phonemizer": return KO_KR_Phonemizer(**kwargs) + if name == "bn_phonemizer": + return BN_Phonemizer(**kwargs) raise ValueError(f"Phonemizer {name} not found") diff --git a/TTS/tts/utils/text/phonemizers/bangla_phonemizer.py b/TTS/tts/utils/text/phonemizers/bangla_phonemizer.py new file mode 100644 index 00000000..3c4a35bb --- /dev/null +++ b/TTS/tts/utils/text/phonemizers/bangla_phonemizer.py @@ -0,0 +1,62 @@ +from typing import Dict + +from TTS.tts.utils.text.bangla.phonemizer import bangla_text_to_phonemes +from TTS.tts.utils.text.phonemizers.base import BasePhonemizer + +_DEF_ZH_PUNCS = "、.,[]()?!〽~『』「」【】" + + +class BN_Phonemizer(BasePhonemizer): + """🐸TTS bn phonemizer using functions in `TTS.tts.utils.text.bangla.phonemizer` + + Args: + punctuations (str): + Set of characters to be treated as punctuation. Defaults to `_DEF_ZH_PUNCS`. + + keep_puncs (bool): + If True, keep the punctuations after phonemization. Defaults to False. + + Example :: + + "这是,样本中文。" -> `d|ʒ|ø|4| |ʂ|ʏ|4| |,| |i|ɑ|ŋ|4|b|œ|n|3| |d|ʒ|o|ŋ|1|w|œ|n|2| |。` + + TODO: someone with Bangla knowledge should check this implementation + """ + + language = "bn" + + def __init__(self, punctuations=_DEF_ZH_PUNCS, keep_puncs=False, **kwargs): # pylint: disable=unused-argument + super().__init__(self.language, punctuations=punctuations, keep_puncs=keep_puncs) + + @staticmethod + def name(): + return "bn_phonemizer" + + @staticmethod + def phonemize_bn(text: str, separator: str = "|") -> str: # pylint: disable=unused-argument + ph = bangla_text_to_phonemes(text) + return ph + + def _phonemize(self, text, separator): + return self.phonemize_bn(text, separator) + + @staticmethod + def supported_languages() -> Dict: + return {"bn": "Bangla"} + + def version(self) -> str: + return "0.0.1" + + def is_available(self) -> bool: + return True + + +if __name__ == "__main__": + txt = "রাসূলুল্লাহ সাল্লাল্লাহু আলাইহি ওয়া সাল্লাম শিক্ষা দিয়েছেন যে, কেউ যদি কোন খারাপ কিছুর সম্মুখীন হয়, তখনও যেন বলে." + e = BN_Phonemizer() + print(e.supported_languages()) + print(e.version()) + print(e.language) + print(e.name()) + print(e.is_available()) + print("`" + e.phonemize(txt) + "`") diff --git a/docs/source/implementing_a_new_language_frontend.md b/docs/source/implementing_a_new_language_frontend.md new file mode 100644 index 00000000..f4f6a04a --- /dev/null +++ b/docs/source/implementing_a_new_language_frontend.md @@ -0,0 +1,12 @@ +# Implementing a New Language Frontend + +- Language frontends are located under `TTS.tts.utils.text` +- Each special language has a separate folder. +- Each folder containst all the utilities for processing the text input. +- `TTS.tts.utils.text.phonemizers` contains the main phonemizer for a language. This is the class that uses the utilities +from the previous step and used to convert the text to phonemes or graphemes for the model. +- After you implement your phonemizer, you need to add it to the `TTS/tts/utils/text/phonemizers/__init__.py` to be able to +map the language code in the model config - `config.phoneme_language` - to the phonemizer class and initiate the phonemizer automatically. +- You should also add tests to `tests/text_tests` if you want to make a PR. + +We suggest you to check the available implementations as reference. Good luck! \ No newline at end of file diff --git a/docs/source/index.md b/docs/source/index.md index 51735928..ccc7f66d 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -22,6 +22,7 @@ inference docker_images implementing_a_new_model + implementing_a_new_language_frontend training_a_model finetuning configuration diff --git a/requirements.txt b/requirements.txt index 89ea7889..a881f4d0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -41,3 +41,7 @@ gruut[de]==2.2.3 jamo nltk g2pkk>=0.1.1 +# deps for bangla +bangla==0.0.2 +bnnumerizer +bnunicodenormalizer==0.1.1 diff --git a/tests/text_tests/test_phonemizer.py b/tests/text_tests/test_phonemizer.py index a96a3a2f..88105544 100644 --- a/tests/text_tests/test_phonemizer.py +++ b/tests/text_tests/test_phonemizer.py @@ -3,6 +3,7 @@ import unittest from packaging.version import Version from TTS.tts.utils.text.phonemizers import ESpeak, Gruut, JA_JP_Phonemizer, ZH_CN_Phonemizer +from TTS.tts.utils.text.phonemizers.bangla_phonemizer import BN_Phonemizer from TTS.tts.utils.text.phonemizers.multi_phonemizer import MultiPhonemizer EXAMPLE_TEXTs = [ @@ -230,6 +231,28 @@ class TestZH_CN_Phonemizer(unittest.TestCase): self.assertTrue(self.phonemizer.is_available()) +class TestBN_Phonemizer(unittest.TestCase): + def setUp(self): + self.phonemizer = BN_Phonemizer() + self._TEST_CASES = "রাসূলুল্লাহ সাল্লাল্লাহু আলাইহি ওয়া সাল্লাম শিক্ষা দিয়েছেন যে, কেউ যদি কোন খারাপ কিছুর সম্মুখীন হয়, তখনও যেন" + self._EXPECTED = "রাসূলুল্লাহ সাল্লাল্লাহু আলাইহি ওয়া সাল্লাম শিক্ষা দিয়েছেন যে কেউ যদি কোন খারাপ কিছুর সম্মুখীন হয় তখনও যেন।" + + def test_phonemize(self): + self.assertEqual(self.phonemizer.phonemize(self._TEST_CASES, separator=""), self._EXPECTED) + + def test_name(self): + self.assertEqual(self.phonemizer.name(), "bn_phonemizer") + + def test_get_supported_languages(self): + self.assertIsInstance(self.phonemizer.supported_languages(), dict) + + def test_get_version(self): + self.assertIsInstance(self.phonemizer.version(), str) + + def test_is_available(self): + self.assertTrue(self.phonemizer.is_available()) + + class TestMultiPhonemizer(unittest.TestCase): def setUp(self): self.phonemizer = MultiPhonemizer({"tr": "espeak", "en-us": "", "de": "gruut", "zh-cn": ""})