Handle espeak 1.48.15 (#2203)

This commit is contained in:
Eren Gölge 2022-12-12 11:23:45 +01:00 committed by GitHub
parent 24620743ca
commit fdeefcc612
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 47 additions and 2 deletions

View File

@ -1,6 +1,7 @@
import logging import logging
import re import re
import subprocess import subprocess
from distutils.version import LooseVersion
from typing import Dict, List from typing import Dict, List
from TTS.tts.utils.text.phonemizers.base import BasePhonemizer from TTS.tts.utils.text.phonemizers.base import BasePhonemizer
@ -13,13 +14,26 @@ def is_tool(name):
return which(name) is not None return which(name) is not None
def get_espeak_version():
output = subprocess.getoutput("espeak --version")
return output.split()[2]
def get_espeakng_version():
output = subprocess.getoutput("espeak-ng --version")
return output.split()[3]
# priority: espeakng > espeak # priority: espeakng > espeak
if is_tool("espeak-ng"): if is_tool("espeak-ng"):
_DEF_ESPEAK_LIB = "espeak-ng" _DEF_ESPEAK_LIB = "espeak-ng"
_DEF_ESPEAK_VER = get_espeakng_version()
elif is_tool("espeak"): elif is_tool("espeak"):
_DEF_ESPEAK_LIB = "espeak" _DEF_ESPEAK_LIB = "espeak"
_DEF_ESPEAK_VER = get_espeak_version()
else: else:
_DEF_ESPEAK_LIB = None _DEF_ESPEAK_LIB = None
_DEF_ESPEAK_VER = None
def _espeak_exe(espeak_lib: str, args: List, sync=False) -> List[str]: def _espeak_exe(espeak_lib: str, args: List, sync=False) -> List[str]:
@ -85,6 +99,7 @@ class ESpeak(BasePhonemizer):
""" """
_ESPEAK_LIB = _DEF_ESPEAK_LIB _ESPEAK_LIB = _DEF_ESPEAK_LIB
_ESPEAK_VER = _DEF_ESPEAK_VER
def __init__(self, language: str, backend=None, punctuations=Punctuation.default_puncs(), keep_puncs=True): def __init__(self, language: str, backend=None, punctuations=Punctuation.default_puncs(), keep_puncs=True):
if self._ESPEAK_LIB is None: if self._ESPEAK_LIB is None:
@ -105,17 +120,24 @@ class ESpeak(BasePhonemizer):
def backend(self): def backend(self):
return self._ESPEAK_LIB return self._ESPEAK_LIB
@property
def backend_version(self):
return self._ESPEAK_VER
@backend.setter @backend.setter
def backend(self, backend): def backend(self, backend):
if backend not in ["espeak", "espeak-ng"]: if backend not in ["espeak", "espeak-ng"]:
raise Exception("Unknown backend: %s" % backend) raise Exception("Unknown backend: %s" % backend)
self._ESPEAK_LIB = backend self._ESPEAK_LIB = backend
self._ESPEAK_VER = get_espeakng_version() if backend == "espeak-ng" else get_espeak_version()
def auto_set_espeak_lib(self) -> None: def auto_set_espeak_lib(self) -> None:
if is_tool("espeak-ng"): if is_tool("espeak-ng"):
self._ESPEAK_LIB = "espeak-ng" self._ESPEAK_LIB = "espeak-ng"
self._ESPEAK_VER = get_espeakng_version()
elif is_tool("espeak"): elif is_tool("espeak"):
self._ESPEAK_LIB = "espeak" self._ESPEAK_LIB = "espeak"
self._ESPEAK_VER = get_espeak_version()
else: else:
raise Exception("Cannot set backend automatically. espeak-ng or espeak not found") raise Exception("Cannot set backend automatically. espeak-ng or espeak not found")
@ -146,7 +168,10 @@ class ESpeak(BasePhonemizer):
else: else:
# split with '_' # split with '_'
if self.backend == "espeak": if self.backend == "espeak":
args.append("--ipa=3") if LooseVersion(self.backend_version) >= LooseVersion("1.48.15"):
args.append("--ipa=1")
else:
args.append("--ipa=3")
else: else:
args.append("--ipa=1") args.append("--ipa=1")
if tie: if tie:

View File

@ -1,4 +1,5 @@
import unittest import unittest
from distutils.version import LooseVersion
from TTS.tts.utils.text.phonemizers import ESpeak, Gruut, JA_JP_Phonemizer, ZH_CN_Phonemizer from TTS.tts.utils.text.phonemizers import ESpeak, Gruut, JA_JP_Phonemizer, ZH_CN_Phonemizer
@ -18,6 +19,14 @@ EXPECTED_ESPEAK_PHONEMES = [
] ]
EXPECTED_ESPEAK_v1_48_15_PHONEMES = [
"ɹ|ˈiː|s|ə|n|t ɹ|ɪ|s|ˈɜː|tʃ æ|t h|ˈɑːɹ|v|ɚ|d h|ɐ|z ʃ|ˈoʊ|n m|ˈɛ|d|ᵻ|t|ˌeɪ|ɾ|ɪ",
"f|ɔː|ɹ æ|z l|ˈɪ|ɾ|əl æ|z ˈeɪ|t w|ˈiː|k|s k|æ|n ˈæ|k|tʃ|uː|əl|i| ˈɪ|n|k|ɹ|iː|s, ð|ə ɡ|ɹ|ˈeɪ m|ˈæ|ɾ|ɚ",
"ɪ|n|ð|ə p|ˈɑːɹ|t|s ʌ|v|ð|ə b|ɹ|ˈeɪ|n ɹ|ɪ|s|p|ˈɑː|n|s|ə|b|əl",
"f|ɔː|ɹ ɪ|m|ˈoʊ|ʃ|ə|n|əl ɹ|ˌɛ|ɡ|j|uː|l|ˈeɪ|ʃ|ə|n|| æ|n|d l|ˈɜː|n|ɪ|ŋ!",
]
EXPECTED_ESPEAKNG_PHONEMES = [ EXPECTED_ESPEAKNG_PHONEMES = [
"ɹ|ˈiː|s|ə|n|t ɹ|ᵻ|s|ˈɜː|tʃ æ|t h|ˈɑːɹ|v|ɚ|d h|ɐ|z ʃ|ˈoʊ|n m|ˈɛ|d|ᵻ|t|ˌeɪ|ɾ|ɪ", "ɹ|ˈiː|s|ə|n|t ɹ|ᵻ|s|ˈɜː|tʃ æ|t h|ˈɑːɹ|v|ɚ|d h|ɐ|z ʃ|ˈoʊ|n m|ˈɛ|d|ᵻ|t|ˌeɪ|ɾ|ɪ",
"f|ɔː|ɹ æ|z l|ˈɪ|ɾ|əl æ|z ˈeɪ|t w|ˈiː|k|s k|æ|n ˈæ|k|tʃ|uː|əl|i| ˈɪ|ŋ|k|ɹ|iː|s, ð|ə ɡ|ɹ|ˈeɪ m|ˈæ|ɾ|ɚ", "f|ɔː|ɹ æ|z l|ˈɪ|ɾ|əl æ|z ˈeɪ|t w|ˈiː|k|s k|æ|n ˈæ|k|tʃ|uː|əl|i| ˈɪ|ŋ|k|ɹ|iː|s, ð|ə ɡ|ɹ|ˈeɪ m|ˈæ|ɾ|ɚ",
@ -30,13 +39,20 @@ class TestEspeakPhonemizer(unittest.TestCase):
def setUp(self): def setUp(self):
self.phonemizer = ESpeak(language="en-us", backend="espeak") self.phonemizer = ESpeak(language="en-us", backend="espeak")
for text, ph in zip(EXAMPLE_TEXTs, EXPECTED_ESPEAK_PHONEMES): if LooseVersion(self.phonemizer.backend_version) >= LooseVersion("1.48.15"):
target_phonemes = EXPECTED_ESPEAK_v1_48_15_PHONEMES
else:
target_phonemes = EXPECTED_ESPEAK_PHONEMES
for text, ph in zip(EXAMPLE_TEXTs, target_phonemes):
phonemes = self.phonemizer.phonemize(text) phonemes = self.phonemizer.phonemize(text)
self.assertEqual(phonemes, ph) self.assertEqual(phonemes, ph)
# multiple punctuations # multiple punctuations
text = "Be a voice, not an! echo?" text = "Be a voice, not an! echo?"
gt = "biː ɐ vˈɔɪs, nˈɑːt ɐn! ˈɛkoʊ?" gt = "biː ɐ vˈɔɪs, nˈɑːt ɐn! ˈɛkoʊ?"
if LooseVersion(self.phonemizer.backend_version) >= LooseVersion("1.48.15"):
gt = "biː ɐ vˈɔɪs, nˈɑːt æn! ˈɛkoʊ?"
output = self.phonemizer.phonemize(text, separator="|") output = self.phonemizer.phonemize(text, separator="|")
output = output.replace("|", "") output = output.replace("|", "")
self.assertEqual(output, gt) self.assertEqual(output, gt)
@ -44,12 +60,16 @@ class TestEspeakPhonemizer(unittest.TestCase):
# not ending with punctuation # not ending with punctuation
text = "Be a voice, not an! echo" text = "Be a voice, not an! echo"
gt = "biː ɐ vˈɔɪs, nˈɑːt ɐn! ˈɛkoʊ" gt = "biː ɐ vˈɔɪs, nˈɑːt ɐn! ˈɛkoʊ"
if LooseVersion(self.phonemizer.backend_version) >= LooseVersion("1.48.15"):
gt = "biː ɐ vˈɔɪs, nˈɑːt æn! ˈɛkoʊ"
output = self.phonemizer.phonemize(text, separator="") output = self.phonemizer.phonemize(text, separator="")
self.assertEqual(output, gt) self.assertEqual(output, gt)
# extra space after the sentence # extra space after the sentence
text = "Be a voice, not an! echo. " text = "Be a voice, not an! echo. "
gt = "biː ɐ vˈɔɪs, nˈɑːt ɐn! ˈɛkoʊ." gt = "biː ɐ vˈɔɪs, nˈɑːt ɐn! ˈɛkoʊ."
if LooseVersion(self.phonemizer.backend_version) >= LooseVersion("1.48.15"):
gt = "biː ɐ vˈɔɪs, nˈɑːt æn! ˈɛkoʊ."
output = self.phonemizer.phonemize(text, separator="") output = self.phonemizer.phonemize(text, separator="")
self.assertEqual(output, gt) self.assertEqual(output, gt)