Handle espeak 1.48.15 (#2203)

This commit is contained in:
Eren Gölge 2022-12-12 11:23:45 +01:00 committed by GitHub
parent 24620743ca
commit fdeefcc612
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 47 additions and 2 deletions

View File

@ -1,6 +1,7 @@
import logging
import re
import subprocess
from distutils.version import LooseVersion
from typing import Dict, List
from TTS.tts.utils.text.phonemizers.base import BasePhonemizer
@ -13,13 +14,26 @@ def is_tool(name):
return which(name) is not None
def get_espeak_version():
output = subprocess.getoutput("espeak --version")
return output.split()[2]
def get_espeakng_version():
output = subprocess.getoutput("espeak-ng --version")
return output.split()[3]
# priority: espeakng > espeak
if is_tool("espeak-ng"):
_DEF_ESPEAK_LIB = "espeak-ng"
_DEF_ESPEAK_VER = get_espeakng_version()
elif is_tool("espeak"):
_DEF_ESPEAK_LIB = "espeak"
_DEF_ESPEAK_VER = get_espeak_version()
else:
_DEF_ESPEAK_LIB = None
_DEF_ESPEAK_VER = None
def _espeak_exe(espeak_lib: str, args: List, sync=False) -> List[str]:
@ -85,6 +99,7 @@ class ESpeak(BasePhonemizer):
"""
_ESPEAK_LIB = _DEF_ESPEAK_LIB
_ESPEAK_VER = _DEF_ESPEAK_VER
def __init__(self, language: str, backend=None, punctuations=Punctuation.default_puncs(), keep_puncs=True):
if self._ESPEAK_LIB is None:
@ -105,17 +120,24 @@ class ESpeak(BasePhonemizer):
def backend(self):
return self._ESPEAK_LIB
@property
def backend_version(self):
return self._ESPEAK_VER
@backend.setter
def backend(self, backend):
if backend not in ["espeak", "espeak-ng"]:
raise Exception("Unknown backend: %s" % backend)
self._ESPEAK_LIB = backend
self._ESPEAK_VER = get_espeakng_version() if backend == "espeak-ng" else get_espeak_version()
def auto_set_espeak_lib(self) -> None:
if is_tool("espeak-ng"):
self._ESPEAK_LIB = "espeak-ng"
self._ESPEAK_VER = get_espeakng_version()
elif is_tool("espeak"):
self._ESPEAK_LIB = "espeak"
self._ESPEAK_VER = get_espeak_version()
else:
raise Exception("Cannot set backend automatically. espeak-ng or espeak not found")
@ -146,7 +168,10 @@ class ESpeak(BasePhonemizer):
else:
# split with '_'
if self.backend == "espeak":
args.append("--ipa=3")
if LooseVersion(self.backend_version) >= LooseVersion("1.48.15"):
args.append("--ipa=1")
else:
args.append("--ipa=3")
else:
args.append("--ipa=1")
if tie:

View File

@ -1,4 +1,5 @@
import unittest
from distutils.version import LooseVersion
from TTS.tts.utils.text.phonemizers import ESpeak, Gruut, JA_JP_Phonemizer, ZH_CN_Phonemizer
@ -18,6 +19,14 @@ EXPECTED_ESPEAK_PHONEMES = [
]
EXPECTED_ESPEAK_v1_48_15_PHONEMES = [
"ɹ|ˈiː|s|ə|n|t ɹ|ɪ|s|ˈɜː|tʃ æ|t h|ˈɑːɹ|v|ɚ|d h|ɐ|z ʃ|ˈoʊ|n m|ˈɛ|d|ᵻ|t|ˌeɪ|ɾ|ɪ",
"f|ɔː|ɹ æ|z l|ˈɪ|ɾ|əl æ|z ˈeɪ|t w|ˈiː|k|s k|æ|n ˈæ|k|tʃ|uː|əl|i| ˈɪ|n|k|ɹ|iː|s, ð|ə ɡ|ɹ|ˈeɪ m|ˈæ|ɾ|ɚ",
"ɪ|n|ð|ə p|ˈɑːɹ|t|s ʌ|v|ð|ə b|ɹ|ˈeɪ|n ɹ|ɪ|s|p|ˈɑː|n|s|ə|b|əl",
"f|ɔː|ɹ ɪ|m|ˈoʊ|ʃ|ə|n|əl ɹ|ˌɛ|ɡ|j|uː|l|ˈeɪ|ʃ|ə|n|| æ|n|d l|ˈɜː|n|ɪ|ŋ!",
]
EXPECTED_ESPEAKNG_PHONEMES = [
"ɹ|ˈiː|s|ə|n|t ɹ|ᵻ|s|ˈɜː|tʃ æ|t h|ˈɑːɹ|v|ɚ|d h|ɐ|z ʃ|ˈoʊ|n m|ˈɛ|d|ᵻ|t|ˌeɪ|ɾ|ɪ",
"f|ɔː|ɹ æ|z l|ˈɪ|ɾ|əl æ|z ˈeɪ|t w|ˈiː|k|s k|æ|n ˈæ|k|tʃ|uː|əl|i| ˈɪ|ŋ|k|ɹ|iː|s, ð|ə ɡ|ɹ|ˈeɪ m|ˈæ|ɾ|ɚ",
@ -30,13 +39,20 @@ class TestEspeakPhonemizer(unittest.TestCase):
def setUp(self):
self.phonemizer = ESpeak(language="en-us", backend="espeak")
for text, ph in zip(EXAMPLE_TEXTs, EXPECTED_ESPEAK_PHONEMES):
if LooseVersion(self.phonemizer.backend_version) >= LooseVersion("1.48.15"):
target_phonemes = EXPECTED_ESPEAK_v1_48_15_PHONEMES
else:
target_phonemes = EXPECTED_ESPEAK_PHONEMES
for text, ph in zip(EXAMPLE_TEXTs, target_phonemes):
phonemes = self.phonemizer.phonemize(text)
self.assertEqual(phonemes, ph)
# multiple punctuations
text = "Be a voice, not an! echo?"
gt = "biː ɐ vˈɔɪs, nˈɑːt ɐn! ˈɛkoʊ?"
if LooseVersion(self.phonemizer.backend_version) >= LooseVersion("1.48.15"):
gt = "biː ɐ vˈɔɪs, nˈɑːt æn! ˈɛkoʊ?"
output = self.phonemizer.phonemize(text, separator="|")
output = output.replace("|", "")
self.assertEqual(output, gt)
@ -44,12 +60,16 @@ class TestEspeakPhonemizer(unittest.TestCase):
# not ending with punctuation
text = "Be a voice, not an! echo"
gt = "biː ɐ vˈɔɪs, nˈɑːt ɐn! ˈɛkoʊ"
if LooseVersion(self.phonemizer.backend_version) >= LooseVersion("1.48.15"):
gt = "biː ɐ vˈɔɪs, nˈɑːt æn! ˈɛkoʊ"
output = self.phonemizer.phonemize(text, separator="")
self.assertEqual(output, gt)
# extra space after the sentence
text = "Be a voice, not an! echo. "
gt = "biː ɐ vˈɔɪs, nˈɑːt ɐn! ˈɛkoʊ."
if LooseVersion(self.phonemizer.backend_version) >= LooseVersion("1.48.15"):
gt = "biː ɐ vˈɔɪs, nˈɑːt æn! ˈɛkoʊ."
output = self.phonemizer.phonemize(text, separator="")
self.assertEqual(output, gt)