mirror of https://github.com/coqui-ai/TTS.git
fix(espeak_wrapper): read phonemize() input from file
Avoids utf8 encoding issues on Windows when passing the text directly. Fixes https://github.com/coqui-ai/TTS/discussions/3761
This commit is contained in:
parent
49fcbd908b
commit
07cbcf825c
|
@ -3,6 +3,8 @@
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import tempfile
|
||||||
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from packaging.version import Version
|
from packaging.version import Version
|
||||||
|
@ -184,7 +186,12 @@ class ESpeak(BasePhonemizer):
|
||||||
if tie:
|
if tie:
|
||||||
args.append("--tie=%s" % tie)
|
args.append("--tie=%s" % tie)
|
||||||
|
|
||||||
args.append(text)
|
tmp = tempfile.NamedTemporaryFile(mode="w+t", delete=False, encoding="utf8")
|
||||||
|
tmp.write(text)
|
||||||
|
tmp.close()
|
||||||
|
args.append("-f")
|
||||||
|
args.append(tmp.name)
|
||||||
|
|
||||||
# compute phonemes
|
# compute phonemes
|
||||||
phonemes = ""
|
phonemes = ""
|
||||||
for line in _espeak_exe(self.backend, args):
|
for line in _espeak_exe(self.backend, args):
|
||||||
|
@ -200,6 +207,7 @@ class ESpeak(BasePhonemizer):
|
||||||
ph_decoded = re.sub(r"\(.+?\)", "", line)
|
ph_decoded = re.sub(r"\(.+?\)", "", line)
|
||||||
|
|
||||||
phonemes += ph_decoded.strip()
|
phonemes += ph_decoded.strip()
|
||||||
|
Path(tmp.name).unlink()
|
||||||
return phonemes.replace("_", separator)
|
return phonemes.replace("_", separator)
|
||||||
|
|
||||||
def _phonemize(self, text: str, separator: str = "") -> str:
|
def _phonemize(self, text: str, separator: str = "") -> str:
|
||||||
|
|
|
@ -116,6 +116,12 @@ class TestEspeakNgPhonemizer(unittest.TestCase):
|
||||||
output = self.phonemizer.phonemize(text, separator="")
|
output = self.phonemizer.phonemize(text, separator="")
|
||||||
self.assertEqual(output, gt)
|
self.assertEqual(output, gt)
|
||||||
|
|
||||||
|
# UTF8 characters
|
||||||
|
text = "źrebię"
|
||||||
|
gt = "ʑrˈɛbjɛ"
|
||||||
|
output = ESpeak("pl").phonemize(text, separator="")
|
||||||
|
self.assertEqual(output, gt)
|
||||||
|
|
||||||
def test_name(self):
|
def test_name(self):
|
||||||
self.assertEqual(self.phonemizer.name(), "espeak")
|
self.assertEqual(self.phonemizer.name(), "espeak")
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue