mirror of https://github.com/coqui-ai/TTS.git
Handle espeak 1.48.15 (#2203)
This commit is contained in:
parent
24620743ca
commit
fdeefcc612
|
@ -1,6 +1,7 @@
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
|
from distutils.version import LooseVersion
|
||||||
from typing import Dict, List
|
from typing import Dict, List
|
||||||
|
|
||||||
from TTS.tts.utils.text.phonemizers.base import BasePhonemizer
|
from TTS.tts.utils.text.phonemizers.base import BasePhonemizer
|
||||||
|
@ -13,13 +14,26 @@ def is_tool(name):
|
||||||
return which(name) is not None
|
return which(name) is not None
|
||||||
|
|
||||||
|
|
||||||
|
def get_espeak_version():
|
||||||
|
output = subprocess.getoutput("espeak --version")
|
||||||
|
return output.split()[2]
|
||||||
|
|
||||||
|
|
||||||
|
def get_espeakng_version():
|
||||||
|
output = subprocess.getoutput("espeak-ng --version")
|
||||||
|
return output.split()[3]
|
||||||
|
|
||||||
|
|
||||||
# priority: espeakng > espeak
|
# priority: espeakng > espeak
|
||||||
if is_tool("espeak-ng"):
|
if is_tool("espeak-ng"):
|
||||||
_DEF_ESPEAK_LIB = "espeak-ng"
|
_DEF_ESPEAK_LIB = "espeak-ng"
|
||||||
|
_DEF_ESPEAK_VER = get_espeakng_version()
|
||||||
elif is_tool("espeak"):
|
elif is_tool("espeak"):
|
||||||
_DEF_ESPEAK_LIB = "espeak"
|
_DEF_ESPEAK_LIB = "espeak"
|
||||||
|
_DEF_ESPEAK_VER = get_espeak_version()
|
||||||
else:
|
else:
|
||||||
_DEF_ESPEAK_LIB = None
|
_DEF_ESPEAK_LIB = None
|
||||||
|
_DEF_ESPEAK_VER = None
|
||||||
|
|
||||||
|
|
||||||
def _espeak_exe(espeak_lib: str, args: List, sync=False) -> List[str]:
|
def _espeak_exe(espeak_lib: str, args: List, sync=False) -> List[str]:
|
||||||
|
@ -85,6 +99,7 @@ class ESpeak(BasePhonemizer):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_ESPEAK_LIB = _DEF_ESPEAK_LIB
|
_ESPEAK_LIB = _DEF_ESPEAK_LIB
|
||||||
|
_ESPEAK_VER = _DEF_ESPEAK_VER
|
||||||
|
|
||||||
def __init__(self, language: str, backend=None, punctuations=Punctuation.default_puncs(), keep_puncs=True):
|
def __init__(self, language: str, backend=None, punctuations=Punctuation.default_puncs(), keep_puncs=True):
|
||||||
if self._ESPEAK_LIB is None:
|
if self._ESPEAK_LIB is None:
|
||||||
|
@ -105,17 +120,24 @@ class ESpeak(BasePhonemizer):
|
||||||
def backend(self):
|
def backend(self):
|
||||||
return self._ESPEAK_LIB
|
return self._ESPEAK_LIB
|
||||||
|
|
||||||
|
@property
|
||||||
|
def backend_version(self):
|
||||||
|
return self._ESPEAK_VER
|
||||||
|
|
||||||
@backend.setter
|
@backend.setter
|
||||||
def backend(self, backend):
|
def backend(self, backend):
|
||||||
if backend not in ["espeak", "espeak-ng"]:
|
if backend not in ["espeak", "espeak-ng"]:
|
||||||
raise Exception("Unknown backend: %s" % backend)
|
raise Exception("Unknown backend: %s" % backend)
|
||||||
self._ESPEAK_LIB = backend
|
self._ESPEAK_LIB = backend
|
||||||
|
self._ESPEAK_VER = get_espeakng_version() if backend == "espeak-ng" else get_espeak_version()
|
||||||
|
|
||||||
def auto_set_espeak_lib(self) -> None:
|
def auto_set_espeak_lib(self) -> None:
|
||||||
if is_tool("espeak-ng"):
|
if is_tool("espeak-ng"):
|
||||||
self._ESPEAK_LIB = "espeak-ng"
|
self._ESPEAK_LIB = "espeak-ng"
|
||||||
|
self._ESPEAK_VER = get_espeakng_version()
|
||||||
elif is_tool("espeak"):
|
elif is_tool("espeak"):
|
||||||
self._ESPEAK_LIB = "espeak"
|
self._ESPEAK_LIB = "espeak"
|
||||||
|
self._ESPEAK_VER = get_espeak_version()
|
||||||
else:
|
else:
|
||||||
raise Exception("Cannot set backend automatically. espeak-ng or espeak not found")
|
raise Exception("Cannot set backend automatically. espeak-ng or espeak not found")
|
||||||
|
|
||||||
|
@ -146,7 +168,10 @@ class ESpeak(BasePhonemizer):
|
||||||
else:
|
else:
|
||||||
# split with '_'
|
# split with '_'
|
||||||
if self.backend == "espeak":
|
if self.backend == "espeak":
|
||||||
args.append("--ipa=3")
|
if LooseVersion(self.backend_version) >= LooseVersion("1.48.15"):
|
||||||
|
args.append("--ipa=1")
|
||||||
|
else:
|
||||||
|
args.append("--ipa=3")
|
||||||
else:
|
else:
|
||||||
args.append("--ipa=1")
|
args.append("--ipa=1")
|
||||||
if tie:
|
if tie:
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
import unittest
|
import unittest
|
||||||
|
from distutils.version import LooseVersion
|
||||||
|
|
||||||
from TTS.tts.utils.text.phonemizers import ESpeak, Gruut, JA_JP_Phonemizer, ZH_CN_Phonemizer
|
from TTS.tts.utils.text.phonemizers import ESpeak, Gruut, JA_JP_Phonemizer, ZH_CN_Phonemizer
|
||||||
|
|
||||||
|
@ -18,6 +19,14 @@ EXPECTED_ESPEAK_PHONEMES = [
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
EXPECTED_ESPEAK_v1_48_15_PHONEMES = [
|
||||||
|
"ɹ|ˈiː|s|ə|n|t ɹ|ɪ|s|ˈɜː|tʃ æ|t h|ˈɑːɹ|v|ɚ|d h|ɐ|z ʃ|ˈoʊ|n m|ˈɛ|d|ᵻ|t|ˌeɪ|ɾ|ɪ|ŋ",
|
||||||
|
"f|ɔː|ɹ æ|z l|ˈɪ|ɾ|əl æ|z ˈeɪ|t w|ˈiː|k|s k|æ|n ˈæ|k|tʃ|uː|əl|i| ˈɪ|n|k|ɹ|iː|s, ð|ə ɡ|ɹ|ˈeɪ m|ˈæ|ɾ|ɚ",
|
||||||
|
"ɪ|n|ð|ə p|ˈɑːɹ|t|s ʌ|v|ð|ə b|ɹ|ˈeɪ|n ɹ|ɪ|s|p|ˈɑː|n|s|ə|b|əl",
|
||||||
|
"f|ɔː|ɹ ɪ|m|ˈoʊ|ʃ|ə|n|əl ɹ|ˌɛ|ɡ|j|uː|l|ˈeɪ|ʃ|ə|n|| æ|n|d l|ˈɜː|n|ɪ|ŋ!",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
EXPECTED_ESPEAKNG_PHONEMES = [
|
EXPECTED_ESPEAKNG_PHONEMES = [
|
||||||
"ɹ|ˈiː|s|ə|n|t ɹ|ᵻ|s|ˈɜː|tʃ æ|t h|ˈɑːɹ|v|ɚ|d h|ɐ|z ʃ|ˈoʊ|n m|ˈɛ|d|ᵻ|t|ˌeɪ|ɾ|ɪ|ŋ",
|
"ɹ|ˈiː|s|ə|n|t ɹ|ᵻ|s|ˈɜː|tʃ æ|t h|ˈɑːɹ|v|ɚ|d h|ɐ|z ʃ|ˈoʊ|n m|ˈɛ|d|ᵻ|t|ˌeɪ|ɾ|ɪ|ŋ",
|
||||||
"f|ɔː|ɹ æ|z l|ˈɪ|ɾ|əl æ|z ˈeɪ|t w|ˈiː|k|s k|æ|n ˈæ|k|tʃ|uː|əl|i| ˈɪ|ŋ|k|ɹ|iː|s, ð|ə ɡ|ɹ|ˈeɪ m|ˈæ|ɾ|ɚ",
|
"f|ɔː|ɹ æ|z l|ˈɪ|ɾ|əl æ|z ˈeɪ|t w|ˈiː|k|s k|æ|n ˈæ|k|tʃ|uː|əl|i| ˈɪ|ŋ|k|ɹ|iː|s, ð|ə ɡ|ɹ|ˈeɪ m|ˈæ|ɾ|ɚ",
|
||||||
|
@ -30,13 +39,20 @@ class TestEspeakPhonemizer(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.phonemizer = ESpeak(language="en-us", backend="espeak")
|
self.phonemizer = ESpeak(language="en-us", backend="espeak")
|
||||||
|
|
||||||
for text, ph in zip(EXAMPLE_TEXTs, EXPECTED_ESPEAK_PHONEMES):
|
if LooseVersion(self.phonemizer.backend_version) >= LooseVersion("1.48.15"):
|
||||||
|
target_phonemes = EXPECTED_ESPEAK_v1_48_15_PHONEMES
|
||||||
|
else:
|
||||||
|
target_phonemes = EXPECTED_ESPEAK_PHONEMES
|
||||||
|
|
||||||
|
for text, ph in zip(EXAMPLE_TEXTs, target_phonemes):
|
||||||
phonemes = self.phonemizer.phonemize(text)
|
phonemes = self.phonemizer.phonemize(text)
|
||||||
self.assertEqual(phonemes, ph)
|
self.assertEqual(phonemes, ph)
|
||||||
|
|
||||||
# multiple punctuations
|
# multiple punctuations
|
||||||
text = "Be a voice, not an! echo?"
|
text = "Be a voice, not an! echo?"
|
||||||
gt = "biː ɐ vˈɔɪs, nˈɑːt ɐn! ˈɛkoʊ?"
|
gt = "biː ɐ vˈɔɪs, nˈɑːt ɐn! ˈɛkoʊ?"
|
||||||
|
if LooseVersion(self.phonemizer.backend_version) >= LooseVersion("1.48.15"):
|
||||||
|
gt = "biː ɐ vˈɔɪs, nˈɑːt æn! ˈɛkoʊ?"
|
||||||
output = self.phonemizer.phonemize(text, separator="|")
|
output = self.phonemizer.phonemize(text, separator="|")
|
||||||
output = output.replace("|", "")
|
output = output.replace("|", "")
|
||||||
self.assertEqual(output, gt)
|
self.assertEqual(output, gt)
|
||||||
|
@ -44,12 +60,16 @@ class TestEspeakPhonemizer(unittest.TestCase):
|
||||||
# not ending with punctuation
|
# not ending with punctuation
|
||||||
text = "Be a voice, not an! echo"
|
text = "Be a voice, not an! echo"
|
||||||
gt = "biː ɐ vˈɔɪs, nˈɑːt ɐn! ˈɛkoʊ"
|
gt = "biː ɐ vˈɔɪs, nˈɑːt ɐn! ˈɛkoʊ"
|
||||||
|
if LooseVersion(self.phonemizer.backend_version) >= LooseVersion("1.48.15"):
|
||||||
|
gt = "biː ɐ vˈɔɪs, nˈɑːt æn! ˈɛkoʊ"
|
||||||
output = self.phonemizer.phonemize(text, separator="")
|
output = self.phonemizer.phonemize(text, separator="")
|
||||||
self.assertEqual(output, gt)
|
self.assertEqual(output, gt)
|
||||||
|
|
||||||
# extra space after the sentence
|
# extra space after the sentence
|
||||||
text = "Be a voice, not an! echo. "
|
text = "Be a voice, not an! echo. "
|
||||||
gt = "biː ɐ vˈɔɪs, nˈɑːt ɐn! ˈɛkoʊ."
|
gt = "biː ɐ vˈɔɪs, nˈɑːt ɐn! ˈɛkoʊ."
|
||||||
|
if LooseVersion(self.phonemizer.backend_version) >= LooseVersion("1.48.15"):
|
||||||
|
gt = "biː ɐ vˈɔɪs, nˈɑːt æn! ˈɛkoʊ."
|
||||||
output = self.phonemizer.phonemize(text, separator="")
|
output = self.phonemizer.phonemize(text, separator="")
|
||||||
self.assertEqual(output, gt)
|
self.assertEqual(output, gt)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue