mirror of https://github.com/coqui-ai/TTS.git
Merge branch 'coqui-ai:main' into main
This commit is contained in:
commit
26212b2af2
|
@ -650,6 +650,26 @@
|
||||||
"license": "CC-BY-4.0"
|
"license": "CC-BY-4.0"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
"bn":{
|
||||||
|
"custom":{
|
||||||
|
"vits-male":{
|
||||||
|
"github_rls_url":"https://coqui.gateway.scarf.sh/v0.13.3_models/tts_models--bn--custom--vits_male.zip",
|
||||||
|
"default_vocoder": null,
|
||||||
|
"commit": null,
|
||||||
|
"description": "Single speaker Bangla male model. For more information -> https://github.com/mobassir94/comprehensive-bangla-tts",
|
||||||
|
"author": "@mobassir94",
|
||||||
|
"license": "Apache 2.0"
|
||||||
|
},
|
||||||
|
"vits-female":{
|
||||||
|
"github_rls_url":"https://coqui.gateway.scarf.sh/v0.13.3_models/tts_models--bn--custom--vits_female.zip",
|
||||||
|
"default_vocoder": null,
|
||||||
|
"commit": null,
|
||||||
|
"description": "Single speaker Bangla female model. For more information -> https://github.com/mobassir94/comprehensive-bangla-tts",
|
||||||
|
"author": "@mobassir94",
|
||||||
|
"license": "Apache 2.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"vocoder_models": {
|
"vocoder_models": {
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
0.13.2
|
0.13.3
|
||||||
|
|
|
@ -303,6 +303,8 @@ class TTS:
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def is_coqui_studio(self):
|
def is_coqui_studio(self):
|
||||||
|
if self.model_name is None:
|
||||||
|
return False
|
||||||
return "coqui_studio" in self.model_name
|
return "coqui_studio" in self.model_name
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|
|
@ -0,0 +1,121 @@
|
||||||
|
import re
|
||||||
|
|
||||||
|
import bangla
|
||||||
|
from bnnumerizer import numerize
|
||||||
|
from bnunicodenormalizer import Normalizer
|
||||||
|
|
||||||
|
# initialize
|
||||||
|
bnorm = Normalizer()
|
||||||
|
|
||||||
|
|
||||||
|
attribution_dict = {
|
||||||
|
"সাঃ": "সাল্লাল্লাহু আলাইহি ওয়া সাল্লাম",
|
||||||
|
"আঃ": "আলাইহিস সালাম",
|
||||||
|
"রাঃ": "রাদিআল্লাহু আনহু",
|
||||||
|
"রহঃ": "রহমাতুল্লাহি আলাইহি",
|
||||||
|
"রহিঃ": "রহিমাহুল্লাহ",
|
||||||
|
"হাফিঃ": "হাফিযাহুল্লাহ",
|
||||||
|
"বায়ান": "বাইআন",
|
||||||
|
"দাঃবাঃ": "দামাত বারাকাতুহুম,দামাত বারাকাতুল্লাহ",
|
||||||
|
# "আয়াত" : "আইআত",#আইআত
|
||||||
|
# "ওয়া" : "ওআ",
|
||||||
|
# "ওয়াসাল্লাম" : "ওআসাল্লাম",
|
||||||
|
# "কেন" : "কেনো",
|
||||||
|
# "কোন" : "কোনো",
|
||||||
|
# "বল" : "বলো",
|
||||||
|
# "চল" : "চলো",
|
||||||
|
# "কর" : "করো",
|
||||||
|
# "রাখ" : "রাখো",
|
||||||
|
"’": "",
|
||||||
|
"‘": "",
|
||||||
|
# "য়" : "অ",
|
||||||
|
# "সম্প্রদায়" : "সম্প্রদাই",
|
||||||
|
# "রয়েছে" : "রইছে",
|
||||||
|
# "রয়েছ" : "রইছ",
|
||||||
|
"/": " বাই ",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def tag_text(text: str):
|
||||||
|
# remove multiple spaces
|
||||||
|
text = re.sub(" +", " ", text)
|
||||||
|
# create start and end
|
||||||
|
text = "start" + text + "end"
|
||||||
|
# tag text
|
||||||
|
parts = re.split("[\u0600-\u06FF]+", text)
|
||||||
|
# remove non chars
|
||||||
|
parts = [p for p in parts if p.strip()]
|
||||||
|
# unique parts
|
||||||
|
parts = set(parts)
|
||||||
|
# tag the text
|
||||||
|
for m in parts:
|
||||||
|
if len(m.strip()) > 1:
|
||||||
|
text = text.replace(m, f"{m}")
|
||||||
|
# clean-tags
|
||||||
|
text = text.replace("start", "")
|
||||||
|
text = text.replace("end", "")
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
def normalize(sen):
|
||||||
|
global bnorm # pylint: disable=global-statement
|
||||||
|
_words = [bnorm(word)["normalized"] for word in sen.split()]
|
||||||
|
return " ".join([word for word in _words if word is not None])
|
||||||
|
|
||||||
|
|
||||||
|
def expand_full_attribution(text):
|
||||||
|
for word, attr in attribution_dict.items():
|
||||||
|
if word in text:
|
||||||
|
text = text.replace(word, normalize(attr))
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
def collapse_whitespace(text):
|
||||||
|
# Regular expression matching whitespace:
|
||||||
|
_whitespace_re = re.compile(r"\s+")
|
||||||
|
return re.sub(_whitespace_re, " ", text)
|
||||||
|
|
||||||
|
|
||||||
|
def bangla_text_to_phonemes(text: str) -> str:
|
||||||
|
# english numbers to bangla conversion
|
||||||
|
res = re.search("[0-9]", text)
|
||||||
|
if res is not None:
|
||||||
|
text = bangla.convert_english_digit_to_bangla_digit(text)
|
||||||
|
|
||||||
|
# replace ':' in between two bangla numbers with ' এর '
|
||||||
|
pattern = r"[০, ১, ২, ৩, ৪, ৫, ৬, ৭, ৮, ৯]:[০, ১, ২, ৩, ৪, ৫, ৬, ৭, ৮, ৯]"
|
||||||
|
matches = re.findall(pattern, text)
|
||||||
|
for m in matches:
|
||||||
|
r = m.replace(":", " এর ")
|
||||||
|
text = text.replace(m, r)
|
||||||
|
|
||||||
|
# numerize text
|
||||||
|
text = numerize(text)
|
||||||
|
|
||||||
|
# tag sections
|
||||||
|
text = tag_text(text)
|
||||||
|
|
||||||
|
# text blocks
|
||||||
|
# blocks = text.split("")
|
||||||
|
# blocks = [b for b in blocks if b.strip()]
|
||||||
|
|
||||||
|
# create tuple of (lang,text)
|
||||||
|
if "" in text:
|
||||||
|
text = text.replace("", "").replace("", "")
|
||||||
|
# Split based on sentence ending Characters
|
||||||
|
bn_text = text.strip()
|
||||||
|
|
||||||
|
sentenceEnders = re.compile("[।!?]")
|
||||||
|
sentences = sentenceEnders.split(str(bn_text))
|
||||||
|
|
||||||
|
data = ""
|
||||||
|
for sent in sentences:
|
||||||
|
res = re.sub("\n", "", sent)
|
||||||
|
res = normalize(res)
|
||||||
|
# expand attributes
|
||||||
|
res = expand_full_attribution(res)
|
||||||
|
|
||||||
|
res = collapse_whitespace(res)
|
||||||
|
res += "।"
|
||||||
|
data += res
|
||||||
|
return data
|
|
@ -1,3 +1,4 @@
|
||||||
|
from TTS.tts.utils.text.phonemizers.bangla_phonemizer import BN_Phonemizer
|
||||||
from TTS.tts.utils.text.phonemizers.base import BasePhonemizer
|
from TTS.tts.utils.text.phonemizers.base import BasePhonemizer
|
||||||
from TTS.tts.utils.text.phonemizers.espeak_wrapper import ESpeak
|
from TTS.tts.utils.text.phonemizers.espeak_wrapper import ESpeak
|
||||||
from TTS.tts.utils.text.phonemizers.gruut_wrapper import Gruut
|
from TTS.tts.utils.text.phonemizers.gruut_wrapper import Gruut
|
||||||
|
@ -28,6 +29,7 @@ DEF_LANG_TO_PHONEMIZER["en"] = DEF_LANG_TO_PHONEMIZER["en-us"]
|
||||||
DEF_LANG_TO_PHONEMIZER["ja-jp"] = JA_JP_Phonemizer.name()
|
DEF_LANG_TO_PHONEMIZER["ja-jp"] = JA_JP_Phonemizer.name()
|
||||||
DEF_LANG_TO_PHONEMIZER["zh-cn"] = ZH_CN_Phonemizer.name()
|
DEF_LANG_TO_PHONEMIZER["zh-cn"] = ZH_CN_Phonemizer.name()
|
||||||
DEF_LANG_TO_PHONEMIZER["ko-kr"] = KO_KR_Phonemizer.name()
|
DEF_LANG_TO_PHONEMIZER["ko-kr"] = KO_KR_Phonemizer.name()
|
||||||
|
DEF_LANG_TO_PHONEMIZER["bn"] = BN_Phonemizer.name()
|
||||||
|
|
||||||
|
|
||||||
def get_phonemizer_by_name(name: str, **kwargs) -> BasePhonemizer:
|
def get_phonemizer_by_name(name: str, **kwargs) -> BasePhonemizer:
|
||||||
|
@ -50,6 +52,8 @@ def get_phonemizer_by_name(name: str, **kwargs) -> BasePhonemizer:
|
||||||
return JA_JP_Phonemizer(**kwargs)
|
return JA_JP_Phonemizer(**kwargs)
|
||||||
if name == "ko_kr_phonemizer":
|
if name == "ko_kr_phonemizer":
|
||||||
return KO_KR_Phonemizer(**kwargs)
|
return KO_KR_Phonemizer(**kwargs)
|
||||||
|
if name == "bn_phonemizer":
|
||||||
|
return BN_Phonemizer(**kwargs)
|
||||||
raise ValueError(f"Phonemizer {name} not found")
|
raise ValueError(f"Phonemizer {name} not found")
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,62 @@
|
||||||
|
from typing import Dict
|
||||||
|
|
||||||
|
from TTS.tts.utils.text.bangla.phonemizer import bangla_text_to_phonemes
|
||||||
|
from TTS.tts.utils.text.phonemizers.base import BasePhonemizer
|
||||||
|
|
||||||
|
_DEF_ZH_PUNCS = "、.,[]()?!〽~『』「」【】"
|
||||||
|
|
||||||
|
|
||||||
|
class BN_Phonemizer(BasePhonemizer):
|
||||||
|
"""🐸TTS bn phonemizer using functions in `TTS.tts.utils.text.bangla.phonemizer`
|
||||||
|
|
||||||
|
Args:
|
||||||
|
punctuations (str):
|
||||||
|
Set of characters to be treated as punctuation. Defaults to `_DEF_ZH_PUNCS`.
|
||||||
|
|
||||||
|
keep_puncs (bool):
|
||||||
|
If True, keep the punctuations after phonemization. Defaults to False.
|
||||||
|
|
||||||
|
Example ::
|
||||||
|
|
||||||
|
"这是,样本中文。" -> `d|ʒ|ø|4| |ʂ|ʏ|4| |,| |i|ɑ|ŋ|4|b|œ|n|3| |d|ʒ|o|ŋ|1|w|œ|n|2| |。`
|
||||||
|
|
||||||
|
TODO: someone with Bangla knowledge should check this implementation
|
||||||
|
"""
|
||||||
|
|
||||||
|
language = "bn"
|
||||||
|
|
||||||
|
def __init__(self, punctuations=_DEF_ZH_PUNCS, keep_puncs=False, **kwargs): # pylint: disable=unused-argument
|
||||||
|
super().__init__(self.language, punctuations=punctuations, keep_puncs=keep_puncs)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def name():
|
||||||
|
return "bn_phonemizer"
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def phonemize_bn(text: str, separator: str = "|") -> str: # pylint: disable=unused-argument
|
||||||
|
ph = bangla_text_to_phonemes(text)
|
||||||
|
return ph
|
||||||
|
|
||||||
|
def _phonemize(self, text, separator):
|
||||||
|
return self.phonemize_bn(text, separator)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def supported_languages() -> Dict:
|
||||||
|
return {"bn": "Bangla"}
|
||||||
|
|
||||||
|
def version(self) -> str:
|
||||||
|
return "0.0.1"
|
||||||
|
|
||||||
|
def is_available(self) -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
txt = "রাসূলুল্লাহ সাল্লাল্লাহু আলাইহি ওয়া সাল্লাম শিক্ষা দিয়েছেন যে, কেউ যদি কোন খারাপ কিছুর সম্মুখীন হয়, তখনও যেন বলে."
|
||||||
|
e = BN_Phonemizer()
|
||||||
|
print(e.supported_languages())
|
||||||
|
print(e.version())
|
||||||
|
print(e.language)
|
||||||
|
print(e.name())
|
||||||
|
print(e.is_available())
|
||||||
|
print("`" + e.phonemize(txt) + "`")
|
|
@ -0,0 +1,12 @@
|
||||||
|
# Implementing a New Language Frontend
|
||||||
|
|
||||||
|
- Language frontends are located under `TTS.tts.utils.text`
|
||||||
|
- Each special language has a separate folder.
|
||||||
|
- Each folder containst all the utilities for processing the text input.
|
||||||
|
- `TTS.tts.utils.text.phonemizers` contains the main phonemizer for a language. This is the class that uses the utilities
|
||||||
|
from the previous step and used to convert the text to phonemes or graphemes for the model.
|
||||||
|
- After you implement your phonemizer, you need to add it to the `TTS/tts/utils/text/phonemizers/__init__.py` to be able to
|
||||||
|
map the language code in the model config - `config.phoneme_language` - to the phonemizer class and initiate the phonemizer automatically.
|
||||||
|
- You should also add tests to `tests/text_tests` if you want to make a PR.
|
||||||
|
|
||||||
|
We suggest you to check the available implementations as reference. Good luck!
|
|
@ -22,6 +22,7 @@
|
||||||
inference
|
inference
|
||||||
docker_images
|
docker_images
|
||||||
implementing_a_new_model
|
implementing_a_new_model
|
||||||
|
implementing_a_new_language_frontend
|
||||||
training_a_model
|
training_a_model
|
||||||
finetuning
|
finetuning
|
||||||
configuration
|
configuration
|
||||||
|
|
|
@ -41,3 +41,7 @@ gruut[de]==2.2.3
|
||||||
jamo
|
jamo
|
||||||
nltk
|
nltk
|
||||||
g2pkk>=0.1.1
|
g2pkk>=0.1.1
|
||||||
|
# deps for bangla
|
||||||
|
bangla==0.0.2
|
||||||
|
bnnumerizer
|
||||||
|
bnunicodenormalizer==0.1.1
|
||||||
|
|
|
@ -3,6 +3,7 @@ import unittest
|
||||||
from packaging.version import Version
|
from packaging.version import Version
|
||||||
|
|
||||||
from TTS.tts.utils.text.phonemizers import ESpeak, Gruut, JA_JP_Phonemizer, ZH_CN_Phonemizer
|
from TTS.tts.utils.text.phonemizers import ESpeak, Gruut, JA_JP_Phonemizer, ZH_CN_Phonemizer
|
||||||
|
from TTS.tts.utils.text.phonemizers.bangla_phonemizer import BN_Phonemizer
|
||||||
from TTS.tts.utils.text.phonemizers.multi_phonemizer import MultiPhonemizer
|
from TTS.tts.utils.text.phonemizers.multi_phonemizer import MultiPhonemizer
|
||||||
|
|
||||||
EXAMPLE_TEXTs = [
|
EXAMPLE_TEXTs = [
|
||||||
|
@ -230,6 +231,28 @@ class TestZH_CN_Phonemizer(unittest.TestCase):
|
||||||
self.assertTrue(self.phonemizer.is_available())
|
self.assertTrue(self.phonemizer.is_available())
|
||||||
|
|
||||||
|
|
||||||
|
class TestBN_Phonemizer(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.phonemizer = BN_Phonemizer()
|
||||||
|
self._TEST_CASES = "রাসূলুল্লাহ সাল্লাল্লাহু আলাইহি ওয়া সাল্লাম শিক্ষা দিয়েছেন যে, কেউ যদি কোন খারাপ কিছুর সম্মুখীন হয়, তখনও যেন"
|
||||||
|
self._EXPECTED = "রাসূলুল্লাহ সাল্লাল্লাহু আলাইহি ওয়া সাল্লাম শিক্ষা দিয়েছেন যে কেউ যদি কোন খারাপ কিছুর সম্মুখীন হয় তখনও যেন।"
|
||||||
|
|
||||||
|
def test_phonemize(self):
|
||||||
|
self.assertEqual(self.phonemizer.phonemize(self._TEST_CASES, separator=""), self._EXPECTED)
|
||||||
|
|
||||||
|
def test_name(self):
|
||||||
|
self.assertEqual(self.phonemizer.name(), "bn_phonemizer")
|
||||||
|
|
||||||
|
def test_get_supported_languages(self):
|
||||||
|
self.assertIsInstance(self.phonemizer.supported_languages(), dict)
|
||||||
|
|
||||||
|
def test_get_version(self):
|
||||||
|
self.assertIsInstance(self.phonemizer.version(), str)
|
||||||
|
|
||||||
|
def test_is_available(self):
|
||||||
|
self.assertTrue(self.phonemizer.is_available())
|
||||||
|
|
||||||
|
|
||||||
class TestMultiPhonemizer(unittest.TestCase):
|
class TestMultiPhonemizer(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.phonemizer = MultiPhonemizer({"tr": "espeak", "en-us": "", "de": "gruut", "zh-cn": ""})
|
self.phonemizer = MultiPhonemizer({"tr": "espeak", "en-us": "", "de": "gruut", "zh-cn": ""})
|
||||||
|
|
Loading…
Reference in New Issue