mirror of https://github.com/coqui-ai/TTS.git
Handle espeak 1.48.15 (#2203)
This commit is contained in:
parent
24620743ca
commit
fdeefcc612
|
@ -1,6 +1,7 @@
|
|||
import logging
|
||||
import re
|
||||
import subprocess
|
||||
from distutils.version import LooseVersion
|
||||
from typing import Dict, List
|
||||
|
||||
from TTS.tts.utils.text.phonemizers.base import BasePhonemizer
|
||||
|
@ -13,13 +14,26 @@ def is_tool(name):
|
|||
return which(name) is not None
|
||||
|
||||
|
||||
def get_espeak_version():
|
||||
output = subprocess.getoutput("espeak --version")
|
||||
return output.split()[2]
|
||||
|
||||
|
||||
def get_espeakng_version():
|
||||
output = subprocess.getoutput("espeak-ng --version")
|
||||
return output.split()[3]
|
||||
|
||||
|
||||
# priority: espeakng > espeak
|
||||
if is_tool("espeak-ng"):
|
||||
_DEF_ESPEAK_LIB = "espeak-ng"
|
||||
_DEF_ESPEAK_VER = get_espeakng_version()
|
||||
elif is_tool("espeak"):
|
||||
_DEF_ESPEAK_LIB = "espeak"
|
||||
_DEF_ESPEAK_VER = get_espeak_version()
|
||||
else:
|
||||
_DEF_ESPEAK_LIB = None
|
||||
_DEF_ESPEAK_VER = None
|
||||
|
||||
|
||||
def _espeak_exe(espeak_lib: str, args: List, sync=False) -> List[str]:
|
||||
|
@ -85,6 +99,7 @@ class ESpeak(BasePhonemizer):
|
|||
"""
|
||||
|
||||
_ESPEAK_LIB = _DEF_ESPEAK_LIB
|
||||
_ESPEAK_VER = _DEF_ESPEAK_VER
|
||||
|
||||
def __init__(self, language: str, backend=None, punctuations=Punctuation.default_puncs(), keep_puncs=True):
|
||||
if self._ESPEAK_LIB is None:
|
||||
|
@ -105,17 +120,24 @@ class ESpeak(BasePhonemizer):
|
|||
def backend(self):
|
||||
return self._ESPEAK_LIB
|
||||
|
||||
@property
|
||||
def backend_version(self):
|
||||
return self._ESPEAK_VER
|
||||
|
||||
@backend.setter
|
||||
def backend(self, backend):
|
||||
if backend not in ["espeak", "espeak-ng"]:
|
||||
raise Exception("Unknown backend: %s" % backend)
|
||||
self._ESPEAK_LIB = backend
|
||||
self._ESPEAK_VER = get_espeakng_version() if backend == "espeak-ng" else get_espeak_version()
|
||||
|
||||
def auto_set_espeak_lib(self) -> None:
|
||||
if is_tool("espeak-ng"):
|
||||
self._ESPEAK_LIB = "espeak-ng"
|
||||
self._ESPEAK_VER = get_espeakng_version()
|
||||
elif is_tool("espeak"):
|
||||
self._ESPEAK_LIB = "espeak"
|
||||
self._ESPEAK_VER = get_espeak_version()
|
||||
else:
|
||||
raise Exception("Cannot set backend automatically. espeak-ng or espeak not found")
|
||||
|
||||
|
@ -146,6 +168,9 @@ class ESpeak(BasePhonemizer):
|
|||
else:
|
||||
# split with '_'
|
||||
if self.backend == "espeak":
|
||||
if LooseVersion(self.backend_version) >= LooseVersion("1.48.15"):
|
||||
args.append("--ipa=1")
|
||||
else:
|
||||
args.append("--ipa=3")
|
||||
else:
|
||||
args.append("--ipa=1")
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
import unittest
|
||||
from distutils.version import LooseVersion
|
||||
|
||||
from TTS.tts.utils.text.phonemizers import ESpeak, Gruut, JA_JP_Phonemizer, ZH_CN_Phonemizer
|
||||
|
||||
|
@ -18,6 +19,14 @@ EXPECTED_ESPEAK_PHONEMES = [
|
|||
]
|
||||
|
||||
|
||||
EXPECTED_ESPEAK_v1_48_15_PHONEMES = [
|
||||
"ɹ|ˈiː|s|ə|n|t ɹ|ɪ|s|ˈɜː|tʃ æ|t h|ˈɑːɹ|v|ɚ|d h|ɐ|z ʃ|ˈoʊ|n m|ˈɛ|d|ᵻ|t|ˌeɪ|ɾ|ɪ|ŋ",
|
||||
"f|ɔː|ɹ æ|z l|ˈɪ|ɾ|əl æ|z ˈeɪ|t w|ˈiː|k|s k|æ|n ˈæ|k|tʃ|uː|əl|i| ˈɪ|n|k|ɹ|iː|s, ð|ə ɡ|ɹ|ˈeɪ m|ˈæ|ɾ|ɚ",
|
||||
"ɪ|n|ð|ə p|ˈɑːɹ|t|s ʌ|v|ð|ə b|ɹ|ˈeɪ|n ɹ|ɪ|s|p|ˈɑː|n|s|ə|b|əl",
|
||||
"f|ɔː|ɹ ɪ|m|ˈoʊ|ʃ|ə|n|əl ɹ|ˌɛ|ɡ|j|uː|l|ˈeɪ|ʃ|ə|n|| æ|n|d l|ˈɜː|n|ɪ|ŋ!",
|
||||
]
|
||||
|
||||
|
||||
EXPECTED_ESPEAKNG_PHONEMES = [
|
||||
"ɹ|ˈiː|s|ə|n|t ɹ|ᵻ|s|ˈɜː|tʃ æ|t h|ˈɑːɹ|v|ɚ|d h|ɐ|z ʃ|ˈoʊ|n m|ˈɛ|d|ᵻ|t|ˌeɪ|ɾ|ɪ|ŋ",
|
||||
"f|ɔː|ɹ æ|z l|ˈɪ|ɾ|əl æ|z ˈeɪ|t w|ˈiː|k|s k|æ|n ˈæ|k|tʃ|uː|əl|i| ˈɪ|ŋ|k|ɹ|iː|s, ð|ə ɡ|ɹ|ˈeɪ m|ˈæ|ɾ|ɚ",
|
||||
|
@ -30,13 +39,20 @@ class TestEspeakPhonemizer(unittest.TestCase):
|
|||
def setUp(self):
|
||||
self.phonemizer = ESpeak(language="en-us", backend="espeak")
|
||||
|
||||
for text, ph in zip(EXAMPLE_TEXTs, EXPECTED_ESPEAK_PHONEMES):
|
||||
if LooseVersion(self.phonemizer.backend_version) >= LooseVersion("1.48.15"):
|
||||
target_phonemes = EXPECTED_ESPEAK_v1_48_15_PHONEMES
|
||||
else:
|
||||
target_phonemes = EXPECTED_ESPEAK_PHONEMES
|
||||
|
||||
for text, ph in zip(EXAMPLE_TEXTs, target_phonemes):
|
||||
phonemes = self.phonemizer.phonemize(text)
|
||||
self.assertEqual(phonemes, ph)
|
||||
|
||||
# multiple punctuations
|
||||
text = "Be a voice, not an! echo?"
|
||||
gt = "biː ɐ vˈɔɪs, nˈɑːt ɐn! ˈɛkoʊ?"
|
||||
if LooseVersion(self.phonemizer.backend_version) >= LooseVersion("1.48.15"):
|
||||
gt = "biː ɐ vˈɔɪs, nˈɑːt æn! ˈɛkoʊ?"
|
||||
output = self.phonemizer.phonemize(text, separator="|")
|
||||
output = output.replace("|", "")
|
||||
self.assertEqual(output, gt)
|
||||
|
@ -44,12 +60,16 @@ class TestEspeakPhonemizer(unittest.TestCase):
|
|||
# not ending with punctuation
|
||||
text = "Be a voice, not an! echo"
|
||||
gt = "biː ɐ vˈɔɪs, nˈɑːt ɐn! ˈɛkoʊ"
|
||||
if LooseVersion(self.phonemizer.backend_version) >= LooseVersion("1.48.15"):
|
||||
gt = "biː ɐ vˈɔɪs, nˈɑːt æn! ˈɛkoʊ"
|
||||
output = self.phonemizer.phonemize(text, separator="")
|
||||
self.assertEqual(output, gt)
|
||||
|
||||
# extra space after the sentence
|
||||
text = "Be a voice, not an! echo. "
|
||||
gt = "biː ɐ vˈɔɪs, nˈɑːt ɐn! ˈɛkoʊ."
|
||||
if LooseVersion(self.phonemizer.backend_version) >= LooseVersion("1.48.15"):
|
||||
gt = "biː ɐ vˈɔɪs, nˈɑːt æn! ˈɛkoʊ."
|
||||
output = self.phonemizer.phonemize(text, separator="")
|
||||
self.assertEqual(output, gt)
|
||||
|
||||
|
|
Loading…
Reference in New Issue