fix(espeak_wrapper): read phonemize() input from file

Avoids utf8 encoding issues on Windows when passing the text directly.
Fixes https://github.com/coqui-ai/TTS/discussions/3761
This commit is contained in:
Enno Hermann 2024-05-29 09:52:18 +02:00
parent 49fcbd908b
commit 07cbcf825c
2 changed files with 15 additions and 1 deletions

View File

@ -3,6 +3,8 @@
import logging
import re
import subprocess
import tempfile
from pathlib import Path
from typing import Optional
from packaging.version import Version
@ -184,7 +186,12 @@ class ESpeak(BasePhonemizer):
if tie:
args.append("--tie=%s" % tie)
args.append(text)
tmp = tempfile.NamedTemporaryFile(mode="w+t", delete=False, encoding="utf8")
tmp.write(text)
tmp.close()
args.append("-f")
args.append(tmp.name)
# compute phonemes
phonemes = ""
for line in _espeak_exe(self.backend, args):
@ -200,6 +207,7 @@ class ESpeak(BasePhonemizer):
ph_decoded = re.sub(r"\(.+?\)", "", line)
phonemes += ph_decoded.strip()
Path(tmp.name).unlink()
return phonemes.replace("_", separator)
def _phonemize(self, text: str, separator: str = "") -> str:

View File

@ -116,6 +116,12 @@ class TestEspeakNgPhonemizer(unittest.TestCase):
output = self.phonemizer.phonemize(text, separator="")
self.assertEqual(output, gt)
# UTF8 characters
text = "źrebię"
gt = "ʑrˈɛbjɛ"
output = ESpeak("pl").phonemize(text, separator="")
self.assertEqual(output, gt)
def test_name(self):
self.assertEqual(self.phonemizer.name(), "espeak")