mirror of https://github.com/coqui-ai/TTS.git
254 lines
8.0 KiB
Python
254 lines
8.0 KiB
Python
"""Wrapper to call the espeak/espeak-ng phonemizer."""
|
||
|
||
import logging
|
||
import re
|
||
import subprocess
|
||
from typing import Optional
|
||
|
||
from packaging.version import Version
|
||
|
||
from TTS.tts.utils.text.phonemizers.base import BasePhonemizer
|
||
from TTS.tts.utils.text.punctuation import Punctuation
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
def _is_tool(name) -> bool:
|
||
from shutil import which
|
||
|
||
return which(name) is not None
|
||
|
||
|
||
# Use a regex pattern to match the espeak version, because it may be
|
||
# symlinked to espeak-ng, which moves the version bits to another spot.
|
||
espeak_version_pattern = re.compile(r"text-to-speech:\s(?P<version>\d+\.\d+(\.\d+)?)")
|
||
|
||
|
||
def get_espeak_version() -> str:
|
||
"""Return version of the `espeak` binary."""
|
||
output = subprocess.getoutput("espeak --version")
|
||
match = espeak_version_pattern.search(output)
|
||
|
||
return match.group("version")
|
||
|
||
|
||
def get_espeakng_version() -> str:
|
||
"""Return version of the `espeak-ng` binary."""
|
||
output = subprocess.getoutput("espeak-ng --version")
|
||
return output.split()[3]
|
||
|
||
|
||
# priority: espeakng > espeak
|
||
if _is_tool("espeak-ng"):
|
||
_DEF_ESPEAK_LIB = "espeak-ng"
|
||
_DEF_ESPEAK_VER = get_espeakng_version()
|
||
elif _is_tool("espeak"):
|
||
_DEF_ESPEAK_LIB = "espeak"
|
||
_DEF_ESPEAK_VER = get_espeak_version()
|
||
else:
|
||
_DEF_ESPEAK_LIB = None
|
||
_DEF_ESPEAK_VER = None
|
||
|
||
|
||
def _espeak_exe(espeak_lib: str, args: list) -> list[str]:
|
||
"""Run espeak with the given arguments."""
|
||
cmd = [
|
||
espeak_lib,
|
||
"-q",
|
||
"-b",
|
||
"1", # UTF8 text encoding
|
||
]
|
||
cmd.extend(args)
|
||
logger.debug("Executing: %s", repr(cmd))
|
||
|
||
p = subprocess.run(cmd, capture_output=True, encoding="utf8", check=True)
|
||
for line in p.stderr.strip().split("\n"):
|
||
if line.strip() != "":
|
||
logger.warning("%s: %s", espeak_lib, line.strip())
|
||
res = []
|
||
for line in p.stdout.strip().split("\n"):
|
||
if line.strip() != "":
|
||
logger.debug("%s: %s", espeak_lib, line.strip())
|
||
res.append(line.strip())
|
||
return res
|
||
|
||
|
||
class ESpeak(BasePhonemizer):
|
||
"""Wrapper calling `espeak` or `espeak-ng` from the command-line to perform G2P.
|
||
|
||
Args:
|
||
language (str):
|
||
Valid language code for the used backend.
|
||
|
||
backend (str):
|
||
Name of the backend library to use. `espeak` or `espeak-ng`. If None, set automatically
|
||
prefering `espeak-ng` over `espeak`. Defaults to None.
|
||
|
||
punctuations (str):
|
||
Characters to be treated as punctuation. Defaults to Punctuation.default_puncs().
|
||
|
||
keep_puncs (bool):
|
||
If True, keep the punctuations after phonemization. Defaults to True.
|
||
|
||
Example:
|
||
|
||
>>> from TTS.tts.utils.text.phonemizers import ESpeak
|
||
>>> phonemizer = ESpeak("tr")
|
||
>>> phonemizer.phonemize("Bu Türkçe, bir örnektir.", separator="|")
|
||
'b|ʊ t|ˈø|r|k|tʃ|ɛ, b|ɪ|r œ|r|n|ˈɛ|c|t|ɪ|r.'
|
||
|
||
"""
|
||
|
||
def __init__(
|
||
self,
|
||
language: str,
|
||
backend: Optional[str] = None,
|
||
punctuations: str = Punctuation.default_puncs(),
|
||
keep_puncs: bool = True,
|
||
):
|
||
if _DEF_ESPEAK_LIB is None:
|
||
msg = "[!] No espeak backend found. Install espeak-ng or espeak to your system."
|
||
raise FileNotFoundError(msg)
|
||
self.backend = _DEF_ESPEAK_LIB
|
||
|
||
# band-aid for backwards compatibility
|
||
if language == "en":
|
||
language = "en-us"
|
||
if language == "zh-cn":
|
||
language = "cmn"
|
||
|
||
super().__init__(language, punctuations=punctuations, keep_puncs=keep_puncs)
|
||
if backend is not None:
|
||
self.backend = backend
|
||
|
||
@property
|
||
def backend(self) -> str:
|
||
return self._ESPEAK_LIB
|
||
|
||
@property
|
||
def backend_version(self) -> str:
|
||
return self._ESPEAK_VER
|
||
|
||
@backend.setter
|
||
def backend(self, backend: str) -> None:
|
||
if backend not in ["espeak", "espeak-ng"]:
|
||
msg = f"Unknown backend: {backend}"
|
||
raise ValueError(msg)
|
||
self._ESPEAK_LIB = backend
|
||
self._ESPEAK_VER = get_espeakng_version() if backend == "espeak-ng" else get_espeak_version()
|
||
|
||
def auto_set_espeak_lib(self) -> None:
|
||
if _is_tool("espeak-ng"):
|
||
self._ESPEAK_LIB = "espeak-ng"
|
||
self._ESPEAK_VER = get_espeakng_version()
|
||
elif _is_tool("espeak"):
|
||
self._ESPEAK_LIB = "espeak"
|
||
self._ESPEAK_VER = get_espeak_version()
|
||
else:
|
||
msg = "Cannot set backend automatically. espeak-ng or espeak not found"
|
||
raise FileNotFoundError(msg)
|
||
|
||
@staticmethod
|
||
def name() -> str:
|
||
return "espeak"
|
||
|
||
def phonemize_espeak(self, text: str, separator: str = "|", *, tie: bool = False) -> str:
|
||
"""Convert input text to phonemes.
|
||
|
||
Args:
|
||
text (str):
|
||
Text to be converted to phonemes.
|
||
|
||
tie (bool, optional) : When True use a '͡' character between
|
||
consecutive characters of a single phoneme. Else separate phoneme
|
||
with '_'. This option requires espeak>=1.49. Default to False.
|
||
"""
|
||
# set arguments
|
||
args = ["-v", f"{self._language}"]
|
||
# espeak and espeak-ng parses `ipa` differently
|
||
if tie:
|
||
# use '͡' between phonemes
|
||
if self.backend == "espeak":
|
||
args.append("--ipa=1")
|
||
else:
|
||
args.append("--ipa=3")
|
||
else:
|
||
# split with '_'
|
||
if self.backend == "espeak":
|
||
if Version(self.backend_version) >= Version("1.48.15"):
|
||
args.append("--ipa=1")
|
||
else:
|
||
args.append("--ipa=3")
|
||
else:
|
||
args.append("--ipa=1")
|
||
if tie:
|
||
args.append("--tie=%s" % tie)
|
||
|
||
args.append(text)
|
||
# compute phonemes
|
||
phonemes = ""
|
||
for line in _espeak_exe(self.backend, args):
|
||
# espeak:
|
||
# version 1.48.15: " p_ɹ_ˈaɪ_ɚ t_ə n_oʊ_v_ˈɛ_m_b_ɚ t_w_ˈɛ_n_t_i t_ˈuː\n"
|
||
# espeak-ng:
|
||
# "p_ɹ_ˈaɪ_ɚ t_ə n_oʊ_v_ˈɛ_m_b_ɚ t_w_ˈɛ_n_t_i t_ˈuː\n"
|
||
|
||
# espeak-ng backend can add language flags that need to be removed:
|
||
# "sɛʁtˈɛ̃ mˈo kɔm (en)fˈʊtbɔːl(fr) ʒenˈɛʁ de- flˈaɡ də- lˈɑ̃ɡ."
|
||
# phonemize needs to remove the language flags of the returned text:
|
||
# "sɛʁtˈɛ̃ mˈo kɔm fˈʊtbɔːl ʒenˈɛʁ de- flˈaɡ də- lˈɑ̃ɡ."
|
||
ph_decoded = re.sub(r"\(.+?\)", "", line)
|
||
|
||
phonemes += ph_decoded.strip()
|
||
return phonemes.replace("_", separator)
|
||
|
||
def _phonemize(self, text: str, separator: str = "") -> str:
|
||
return self.phonemize_espeak(text, separator, tie=False)
|
||
|
||
@staticmethod
|
||
def supported_languages() -> dict[str, str]:
|
||
"""Get a dictionary of supported languages.
|
||
|
||
Returns:
|
||
Dict: Dictionary of language codes.
|
||
"""
|
||
if _DEF_ESPEAK_LIB is None:
|
||
return {}
|
||
args = ["--voices"]
|
||
langs = {}
|
||
for count, line in enumerate(_espeak_exe(_DEF_ESPEAK_LIB, args)):
|
||
if count > 0:
|
||
cols = line.split()
|
||
lang_code = cols[1]
|
||
lang_name = cols[3]
|
||
langs[lang_code] = lang_name
|
||
return langs
|
||
|
||
def version(self) -> str:
|
||
"""Get the version of the used backend.
|
||
|
||
Returns:
|
||
str: Version of the used backend.
|
||
"""
|
||
return self.backend_version
|
||
|
||
@classmethod
|
||
def is_available(cls) -> bool:
|
||
"""Return true if ESpeak is available else false."""
|
||
return _is_tool("espeak") or _is_tool("espeak-ng")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
e = ESpeak(language="en-us")
|
||
print(e.supported_languages())
|
||
print(e.version())
|
||
print(e.language)
|
||
print(e.name())
|
||
print(e.is_available())
|
||
|
||
e = ESpeak(language="en-us", keep_puncs=False)
|
||
print("`" + e.phonemize("hello how are you today?") + "`")
|
||
|
||
e = ESpeak(language="en-us", keep_puncs=True)
|
||
print("`" + e.phonemize("hello how are you today?") + "`")
|