Implement BasePhonemizer

2021-11-16 13:25:30 +01:00 · 2021-11-16 13:25:30 +01:00 · c1119bc291
parent dcd01356e0
commit c1119bc291
2 changed files with 187 additions and 0 deletions
--- a/TTS/tts/utils/text/phonemizers/init.py
+++ b/TTS/tts/utils/text/phonemizers/init.py
@ -0,0 +1,51 @@
+from TTS.tts.utils.text.phonemizers.base import BasePhonemizer
+from TTS.tts.utils.text.phonemizers.espeak_wrapper import ESpeak
+from TTS.tts.utils.text.phonemizers.gruut_wrapper import Gruut
+from TTS.tts.utils.text.phonemizers.ja_jp_phonemizer import JA_JP_Phonemizer
+from TTS.tts.utils.text.phonemizers.zh_cn_phonemizer import ZH_CN_Phonemizer
+
+PHONEMIZERS = {b.name(): b for b in (ESpeak, Gruut, JA_JP_Phonemizer)}
+
+
+ESPEAK_LANGS = list(ESpeak.supported_languages().keys())
+GRUUT_LANGS = list(Gruut.supported_languages())
+
+
+# Dict setting default phonemizers for each language
+DEF_LANG_TO_PHONEMIZER = {
+    "ja-jp": JA_JP_Phonemizer.name(),
+    "zh-cn": ZH_CN_Phonemizer.name(),
+}
+
+
+# Add Gruut languages
+_ = [Gruut.name()] * len(GRUUT_LANGS)
+_new_dict = dict(list(zip(GRUUT_LANGS, _)))
+DEF_LANG_TO_PHONEMIZER.update(_new_dict)
+
+
+# Add ESpeak languages and override any existing ones
+_ = [ESpeak.name()] * len(ESPEAK_LANGS)
+_new_dict = dict(list(zip(list(ESPEAK_LANGS), _)))
+DEF_LANG_TO_PHONEMIZER.update(_new_dict)
+
+
+def get_phonemizer_by_name(name: str, **kwargs) -> BasePhonemizer:
+    """Initiate a phonemizer by name
+
+    Args:
+        name (str):
+            Name of the phonemizer that should match `phonemizer.name()`.
+
+        kwargs (dict):
+            Extra keyword arguments that should be passed to the phonemizer.
+    """
+    if name == "espeak":
+        return ESpeak(**kwargs)
+    if name == "gruut":
+        return Gruut(**kwargs)
+    if name == "zh_cn_phonemizer":
+        return ZH_CN_Phonemizer(**kwargs)
+    if name == "ja_jp_phonemizer":
+        return JA_JP_Phonemizer(**kwargs)
+    raise ValueError(f"Phonemizer {name} not found")
--- a/TTS/tts/utils/text/phonemizers/base.py
+++ b/TTS/tts/utils/text/phonemizers/base.py
@ -0,0 +1,136 @@
+import abc
+import itertools
+from typing import List, Tuple, Union
+
+from TTS.tts.utils.text.punctuation import Punctuation
+
+
+class BasePhonemizer(abc.ABC):
+    """Base phonemizer class
+
+    Args:
+        language (str):
+            Language used by the phonemizer.
+
+        punctuations (List[str]):
+            List of punctuation marks to be preserved.
+
+        keep_puncs (bool):
+            Whether to preserve punctuation marks or not.
+    """
+
+    def __init__(self, language, punctuations=Punctuation.default_puncs(), keep_puncs=False):
+
+        # ensure the backend is installed on the system
+        if not self.is_available():
+            raise RuntimeError("{} not installed on your system".format(self.name()))  # pragma: nocover
+
+        # ensure the backend support the requested language
+        self._language = self._init_language(language)
+
+        # setup punctuation processing
+        self._keep_puncs = keep_puncs
+        self._punctuator = Punctuation(punctuations)
+
+
+    def _init_language(self, language):
+        """Language initialization
+
+        This method may be overloaded in child classes (see Segments backend)
+
+        """
+        if not self.is_supported_language(language):
+            raise RuntimeError(f'language "{language}" is not supported by the ' f"{self.name()} backend")
+        return language
+
+    @property
+    def language(self):
+        """The language code configured to be used for phonemization"""
+        return self._language
+
+    @staticmethod
+    @abc.abstractmethod
+    def name():
+        """The name of the backend"""
+
+    @classmethod
+    @abc.abstractmethod
+    def is_available(cls):
+        """Returns True if the backend is installed, False otherwise"""
+
+    @classmethod
+    @abc.abstractmethod
+    def version(cls):
+        """Return the backend version as a tuple (major, minor, patch)"""
+
+    @abc.abstractmethod
+    def supported_languages():
+        """Return a dict of language codes -> name supported by the backend"""
+
+    def is_supported_language(self, language):
+        """Returns True if `language` is supported by the backend"""
+        return language in self.supported_languages()
+
+    fr"""
+        Phonemization follows the following steps:
+            1. Preprocessing:
+                - remove empty lines
+                - remove punctuation
+                - keep track of punctuation marks
+
+            2. Phonemization:
+                - convert text to phonemes
+
+            3. Postprocessing:
+                - join phonemes
+                - restore punctuation marks
+    """
+
+    @abc.abstractmethod
+    def _phonemize(self, text, separator):
+        """The main phonemization method"""
+
+    def _phonemize_preprocess(self, text) -> Tuple[List[str], List]:
+        """Preprocess the text before phonemization
+
+        Override this if you need a different behaviour
+        """
+        if self._keep_puncs:
+            # a tuple (text, punctuation marks)
+            return self._punctuator.strip_to_restore(text)
+        return [self._punctuator.strip(text)], []
+
+    def _phonemize_postprocess(self, phonemized, punctuations) -> str:
+        """Postprocess the raw phonemized output
+
+        Override this if you need a different behaviour
+        """
+        if self._keep_puncs:
+            return self._punctuator.restore(phonemized, punctuations)[0]
+        return phonemized[0]
+
+    def phonemize(self, text: str, separator="|") -> str:
+        """Returns the `text` phonemized for the given language
+
+        Args:
+            text (str):
+                Text to be phonemized.
+
+            separator (str):
+                string separator used between phonemes. Default to '_'.
+
+        Returns:
+            (str): Phonemized text
+        """
+        text, punctuations = self._phonemize_preprocess(text)
+        phonemized = []
+        for t in text:
+            p = self._phonemize(t, separator)
+            phonemized.append(p)
+        phonemized = self._phonemize_postprocess(phonemized, punctuations)
+        return phonemized
+
+    def print_logs(self, level: int=0):
+        indent = "\t" * level
+        print(f"{indent}| > phoneme language: {self.language}")
+        print(f"{indent}| > phoneme backend: {self.name()}")