Merge pull request #34 from idiap/espeak

Fix Espeak issues on Windows
This commit is contained in:
Enno Hermann 2024-05-29 14:16:10 +01:00 committed by GitHub
commit c5f3d63c67
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 30 additions and 34 deletions

View File

@ -3,6 +3,8 @@
import logging import logging
import re import re
import subprocess import subprocess
import tempfile
from pathlib import Path
from typing import Optional from typing import Optional
from packaging.version import Version from packaging.version import Version
@ -50,7 +52,7 @@ else:
_DEF_ESPEAK_VER = None _DEF_ESPEAK_VER = None
def _espeak_exe(espeak_lib: str, args: list, *, sync: bool = False) -> list[bytes]: def _espeak_exe(espeak_lib: str, args: list) -> list[str]:
"""Run espeak with the given arguments.""" """Run espeak with the given arguments."""
cmd = [ cmd = [
espeak_lib, espeak_lib,
@ -59,32 +61,18 @@ def _espeak_exe(espeak_lib: str, args: list, *, sync: bool = False) -> list[byte
"1", # UTF8 text encoding "1", # UTF8 text encoding
] ]
cmd.extend(args) cmd.extend(args)
logger.debug("espeakng: executing %s", repr(cmd)) logger.debug("Executing: %s", repr(cmd))
with subprocess.Popen( p = subprocess.run(cmd, capture_output=True, encoding="utf8", check=True)
cmd, for line in p.stderr.strip().split("\n"):
stdout=subprocess.PIPE, if line.strip() != "":
stderr=subprocess.PIPE, logger.warning("%s: %s", espeak_lib, line.strip())
) as p: res = []
res = iter(p.stdout.readline, b"") for line in p.stdout.strip().split("\n"):
err = iter(p.stderr.readline, b"") if line.strip() != "":
for line in err: logger.debug("%s: %s", espeak_lib, line.strip())
logger.warning("espeakng: %s", line.decode("utf-8").strip()) res.append(line.strip())
if not sync: return res
p.stdout.close()
if p.stderr:
p.stderr.close()
if p.stdin:
p.stdin.close()
return res
res2 = list(res)
p.stdout.close()
if p.stderr:
p.stderr.close()
if p.stdin:
p.stdin.close()
p.wait()
return res2
class ESpeak(BasePhonemizer): class ESpeak(BasePhonemizer):
@ -198,12 +186,15 @@ class ESpeak(BasePhonemizer):
if tie: if tie:
args.append("--tie=%s" % tie) args.append("--tie=%s" % tie)
args.append(text) tmp = tempfile.NamedTemporaryFile(mode="w+t", delete=False, encoding="utf8")
tmp.write(text)
tmp.close()
args.append("-f")
args.append(tmp.name)
# compute phonemes # compute phonemes
phonemes = "" phonemes = ""
for line in _espeak_exe(self.backend, args, sync=True): for line in _espeak_exe(self.backend, args):
logger.debug("line: %s", repr(line))
ph_decoded = line.decode("utf8").strip()
# espeak: # espeak:
# version 1.48.15: " p_ɹ_ˈaɪ_ɚ t_ə n_oʊ_v_ˈɛ_m_b_ɚ t_w_ˈɛ_n_t_i t_ˈuː\n" # version 1.48.15: " p_ɹ_ˈaɪ_ɚ t_ə n_oʊ_v_ˈɛ_m_b_ɚ t_w_ˈɛ_n_t_i t_ˈuː\n"
# espeak-ng: # espeak-ng:
@ -213,9 +204,10 @@ class ESpeak(BasePhonemizer):
# "sɛʁtˈɛ̃ mˈo kɔm (en)fˈʊtbɔːl(fr) ʒenˈɛʁ de- flˈaɡ də- lˈɑ̃ɡ." # "sɛʁtˈɛ̃ mˈo kɔm (en)fˈʊtbɔːl(fr) ʒenˈɛʁ de- flˈaɡ də- lˈɑ̃ɡ."
# phonemize needs to remove the language flags of the returned text: # phonemize needs to remove the language flags of the returned text:
# "sɛʁtˈɛ̃ mˈo kɔm fˈʊtbɔːl ʒenˈɛʁ de- flˈaɡ də- lˈɑ̃ɡ." # "sɛʁtˈɛ̃ mˈo kɔm fˈʊtbɔːl ʒenˈɛʁ de- flˈaɡ də- lˈɑ̃ɡ."
ph_decoded = re.sub(r"\(.+?\)", "", ph_decoded) ph_decoded = re.sub(r"\(.+?\)", "", line)
phonemes += ph_decoded.strip() phonemes += ph_decoded.strip()
Path(tmp.name).unlink()
return phonemes.replace("_", separator) return phonemes.replace("_", separator)
def _phonemize(self, text: str, separator: str = "") -> str: def _phonemize(self, text: str, separator: str = "") -> str:
@ -232,14 +224,12 @@ class ESpeak(BasePhonemizer):
return {} return {}
args = ["--voices"] args = ["--voices"]
langs = {} langs = {}
for count, line in enumerate(_espeak_exe(_DEF_ESPEAK_LIB, args, sync=True)): for count, line in enumerate(_espeak_exe(_DEF_ESPEAK_LIB, args)):
line = line.decode("utf8").strip()
if count > 0: if count > 0:
cols = line.split() cols = line.split()
lang_code = cols[1] lang_code = cols[1]
lang_name = cols[3] lang_name = cols[3]
langs[lang_code] = lang_name langs[lang_code] = lang_name
logger.debug("line: %s", repr(line))
return langs return langs
def version(self) -> str: def version(self) -> str:

View File

@ -116,6 +116,12 @@ class TestEspeakNgPhonemizer(unittest.TestCase):
output = self.phonemizer.phonemize(text, separator="") output = self.phonemizer.phonemize(text, separator="")
self.assertEqual(output, gt) self.assertEqual(output, gt)
# UTF8 characters
text = "źrebię"
gt = "ʑrˈɛbjɛ"
output = ESpeak("pl").phonemize(text, separator="")
self.assertEqual(output, gt)
def test_name(self): def test_name(self):
self.assertEqual(self.phonemizer.name(), "espeak") self.assertEqual(self.phonemizer.name(), "espeak")