From fdeefcc6126dfe1382696d9105992295883be0a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Mon, 12 Dec 2022 11:23:45 +0100 Subject: [PATCH] Handle espeak 1.48.15 (#2203) --- .../utils/text/phonemizers/espeak_wrapper.py | 27 ++++++++++++++++++- tests/text_tests/test_phonemizer.py | 22 ++++++++++++++- 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/TTS/tts/utils/text/phonemizers/espeak_wrapper.py b/TTS/tts/utils/text/phonemizers/espeak_wrapper.py index 89d4abae..5c0865bc 100644 --- a/TTS/tts/utils/text/phonemizers/espeak_wrapper.py +++ b/TTS/tts/utils/text/phonemizers/espeak_wrapper.py @@ -1,6 +1,7 @@ import logging import re import subprocess +from distutils.version import LooseVersion from typing import Dict, List from TTS.tts.utils.text.phonemizers.base import BasePhonemizer @@ -13,13 +14,26 @@ def is_tool(name): return which(name) is not None +def get_espeak_version(): + output = subprocess.getoutput("espeak --version") + return output.split()[2] + + +def get_espeakng_version(): + output = subprocess.getoutput("espeak-ng --version") + return output.split()[3] + + # priority: espeakng > espeak if is_tool("espeak-ng"): _DEF_ESPEAK_LIB = "espeak-ng" + _DEF_ESPEAK_VER = get_espeakng_version() elif is_tool("espeak"): _DEF_ESPEAK_LIB = "espeak" + _DEF_ESPEAK_VER = get_espeak_version() else: _DEF_ESPEAK_LIB = None + _DEF_ESPEAK_VER = None def _espeak_exe(espeak_lib: str, args: List, sync=False) -> List[str]: @@ -85,6 +99,7 @@ class ESpeak(BasePhonemizer): """ _ESPEAK_LIB = _DEF_ESPEAK_LIB + _ESPEAK_VER = _DEF_ESPEAK_VER def __init__(self, language: str, backend=None, punctuations=Punctuation.default_puncs(), keep_puncs=True): if self._ESPEAK_LIB is None: @@ -105,17 +120,24 @@ class ESpeak(BasePhonemizer): def backend(self): return self._ESPEAK_LIB + @property + def backend_version(self): + return self._ESPEAK_VER + @backend.setter def backend(self, backend): if backend not in ["espeak", "espeak-ng"]: raise Exception("Unknown backend: %s" % backend) self._ESPEAK_LIB = backend + self._ESPEAK_VER = get_espeakng_version() if backend == "espeak-ng" else get_espeak_version() def auto_set_espeak_lib(self) -> None: if is_tool("espeak-ng"): self._ESPEAK_LIB = "espeak-ng" + self._ESPEAK_VER = get_espeakng_version() elif is_tool("espeak"): self._ESPEAK_LIB = "espeak" + self._ESPEAK_VER = get_espeak_version() else: raise Exception("Cannot set backend automatically. espeak-ng or espeak not found") @@ -146,7 +168,10 @@ class ESpeak(BasePhonemizer): else: # split with '_' if self.backend == "espeak": - args.append("--ipa=3") + if LooseVersion(self.backend_version) >= LooseVersion("1.48.15"): + args.append("--ipa=1") + else: + args.append("--ipa=3") else: args.append("--ipa=1") if tie: diff --git a/tests/text_tests/test_phonemizer.py b/tests/text_tests/test_phonemizer.py index 9b619f6e..8261f2bb 100644 --- a/tests/text_tests/test_phonemizer.py +++ b/tests/text_tests/test_phonemizer.py @@ -1,4 +1,5 @@ import unittest +from distutils.version import LooseVersion from TTS.tts.utils.text.phonemizers import ESpeak, Gruut, JA_JP_Phonemizer, ZH_CN_Phonemizer @@ -18,6 +19,14 @@ EXPECTED_ESPEAK_PHONEMES = [ ] +EXPECTED_ESPEAK_v1_48_15_PHONEMES = [ + "ɹ|ˈiː|s|ə|n|t ɹ|ɪ|s|ˈɜː|tʃ æ|t h|ˈɑːɹ|v|ɚ|d h|ɐ|z ʃ|ˈoʊ|n m|ˈɛ|d|ᵻ|t|ˌeɪ|ɾ|ɪ|ŋ", + "f|ɔː|ɹ æ|z l|ˈɪ|ɾ|əl æ|z ˈeɪ|t w|ˈiː|k|s k|æ|n ˈæ|k|tʃ|uː|əl|i| ˈɪ|n|k|ɹ|iː|s, ð|ə ɡ|ɹ|ˈeɪ m|ˈæ|ɾ|ɚ", + "ɪ|n|ð|ə p|ˈɑːɹ|t|s ʌ|v|ð|ə b|ɹ|ˈeɪ|n ɹ|ɪ|s|p|ˈɑː|n|s|ə|b|əl", + "f|ɔː|ɹ ɪ|m|ˈoʊ|ʃ|ə|n|əl ɹ|ˌɛ|ɡ|j|uː|l|ˈeɪ|ʃ|ə|n|| æ|n|d l|ˈɜː|n|ɪ|ŋ!", +] + + EXPECTED_ESPEAKNG_PHONEMES = [ "ɹ|ˈiː|s|ə|n|t ɹ|ᵻ|s|ˈɜː|tʃ æ|t h|ˈɑːɹ|v|ɚ|d h|ɐ|z ʃ|ˈoʊ|n m|ˈɛ|d|ᵻ|t|ˌeɪ|ɾ|ɪ|ŋ", "f|ɔː|ɹ æ|z l|ˈɪ|ɾ|əl æ|z ˈeɪ|t w|ˈiː|k|s k|æ|n ˈæ|k|tʃ|uː|əl|i| ˈɪ|ŋ|k|ɹ|iː|s, ð|ə ɡ|ɹ|ˈeɪ m|ˈæ|ɾ|ɚ", @@ -30,13 +39,20 @@ class TestEspeakPhonemizer(unittest.TestCase): def setUp(self): self.phonemizer = ESpeak(language="en-us", backend="espeak") - for text, ph in zip(EXAMPLE_TEXTs, EXPECTED_ESPEAK_PHONEMES): + if LooseVersion(self.phonemizer.backend_version) >= LooseVersion("1.48.15"): + target_phonemes = EXPECTED_ESPEAK_v1_48_15_PHONEMES + else: + target_phonemes = EXPECTED_ESPEAK_PHONEMES + + for text, ph in zip(EXAMPLE_TEXTs, target_phonemes): phonemes = self.phonemizer.phonemize(text) self.assertEqual(phonemes, ph) # multiple punctuations text = "Be a voice, not an! echo?" gt = "biː ɐ vˈɔɪs, nˈɑːt ɐn! ˈɛkoʊ?" + if LooseVersion(self.phonemizer.backend_version) >= LooseVersion("1.48.15"): + gt = "biː ɐ vˈɔɪs, nˈɑːt æn! ˈɛkoʊ?" output = self.phonemizer.phonemize(text, separator="|") output = output.replace("|", "") self.assertEqual(output, gt) @@ -44,12 +60,16 @@ class TestEspeakPhonemizer(unittest.TestCase): # not ending with punctuation text = "Be a voice, not an! echo" gt = "biː ɐ vˈɔɪs, nˈɑːt ɐn! ˈɛkoʊ" + if LooseVersion(self.phonemizer.backend_version) >= LooseVersion("1.48.15"): + gt = "biː ɐ vˈɔɪs, nˈɑːt æn! ˈɛkoʊ" output = self.phonemizer.phonemize(text, separator="") self.assertEqual(output, gt) # extra space after the sentence text = "Be a voice, not an! echo. " gt = "biː ɐ vˈɔɪs, nˈɑːt ɐn! ˈɛkoʊ." + if LooseVersion(self.phonemizer.backend_version) >= LooseVersion("1.48.15"): + gt = "biː ɐ vˈɔɪs, nˈɑːt æn! ˈɛkoʊ." output = self.phonemizer.phonemize(text, separator="") self.assertEqual(output, gt)