fix(espeak_wrapper): read phonemize() input from file

Avoids utf8 encoding issues on Windows when passing the text directly.
Fixes https://github.com/coqui-ai/TTS/discussions/3761
This commit is contained in:
Enno Hermann 2024-05-29 09:52:18 +02:00
parent 49fcbd908b
commit 07cbcf825c
2 changed files with 15 additions and 1 deletions

View File

@ -3,6 +3,8 @@
import logging import logging
import re import re
import subprocess import subprocess
import tempfile
from pathlib import Path
from typing import Optional from typing import Optional
from packaging.version import Version from packaging.version import Version
@ -184,7 +186,12 @@ class ESpeak(BasePhonemizer):
if tie: if tie:
args.append("--tie=%s" % tie) args.append("--tie=%s" % tie)
args.append(text) tmp = tempfile.NamedTemporaryFile(mode="w+t", delete=False, encoding="utf8")
tmp.write(text)
tmp.close()
args.append("-f")
args.append(tmp.name)
# compute phonemes # compute phonemes
phonemes = "" phonemes = ""
for line in _espeak_exe(self.backend, args): for line in _espeak_exe(self.backend, args):
@ -200,6 +207,7 @@ class ESpeak(BasePhonemizer):
ph_decoded = re.sub(r"\(.+?\)", "", line) ph_decoded = re.sub(r"\(.+?\)", "", line)
phonemes += ph_decoded.strip() phonemes += ph_decoded.strip()
Path(tmp.name).unlink()
return phonemes.replace("_", separator) return phonemes.replace("_", separator)
def _phonemize(self, text: str, separator: str = "") -> str: def _phonemize(self, text: str, separator: str = "") -> str:

View File

@ -116,6 +116,12 @@ class TestEspeakNgPhonemizer(unittest.TestCase):
output = self.phonemizer.phonemize(text, separator="") output = self.phonemizer.phonemize(text, separator="")
self.assertEqual(output, gt) self.assertEqual(output, gt)
# UTF8 characters
text = "źrebię"
gt = "ʑrˈɛbjɛ"
output = ESpeak("pl").phonemize(text, separator="")
self.assertEqual(output, gt)
def test_name(self): def test_name(self):
self.assertEqual(self.phonemizer.name(), "espeak") self.assertEqual(self.phonemizer.name(), "espeak")