diff --git a/TTS/server/server.py b/TTS/server/server.py index 345e4d50..c276a142 100644 --- a/TTS/server/server.py +++ b/TTS/server/server.py @@ -115,8 +115,13 @@ synthesizer = Synthesizer( use_multi_speaker = hasattr(synthesizer.tts_model, "num_speakers") and ( synthesizer.tts_model.num_speakers > 1 or synthesizer.tts_speakers_file is not None ) - speaker_manager = getattr(synthesizer.tts_model, "speaker_manager", None) + +use_multi_language = hasattr(synthesizer.tts_model, "num_languages") and ( + synthesizer.tts_model.num_languages > 1 or synthesizer.tts_languages_file is not None +) +language_manager = getattr(synthesizer.tts_model, "language_manager", None) + # TODO: set this from SpeakerManager use_gst = synthesizer.tts_config.get("use_gst", False) app = Flask(__name__) @@ -147,7 +152,9 @@ def index(): "index.html", show_details=args.show_details, use_multi_speaker=use_multi_speaker, + use_multi_language=use_multi_language, speaker_ids=speaker_manager.name_to_id if speaker_manager is not None else None, + language_ids=language_manager.name_to_id if language_manager is not None else None, use_gst=use_gst, ) @@ -177,11 +184,13 @@ def tts(): with lock: text = request.args.get("text") speaker_idx = request.args.get("speaker_id", "") + language_idx = request.args.get("language_id", "") style_wav = request.args.get("style_wav", "") style_wav = style_wav_uri_to_dict(style_wav) print(" > Model input: {}".format(text)) print(" > Speaker Idx: {}".format(speaker_idx)) - wavs = synthesizer.tts(text, speaker_name=speaker_idx, style_wav=style_wav) + print(" > Language Idx: {}".format(language_idx)) + wavs = synthesizer.tts(text, speaker_name=speaker_idx, language_name=language_idx, style_wav=style_wav) out = io.BytesIO() synthesizer.save_wav(wavs, out) return send_file(out, mimetype="audio/wav") diff --git a/TTS/server/templates/index.html b/TTS/server/templates/index.html index b0eab291..6354d391 100644 --- a/TTS/server/templates/index.html +++ b/TTS/server/templates/index.html @@ -65,7 +65,7 @@ {%if use_gst%} - {%endif%} @@ -81,6 +81,16 @@

{%endif%} + {%if use_multi_language%} + Choose a language: +

+ {%endif%} + + {%if show_details%}

@@ -106,11 +116,12 @@ const text = q('#text').value const speaker_id = getTextValue('#speaker_id') const style_wav = getTextValue('#style_wav') + const language_id = getTextValue('#language_id') if (text) { q('#message').textContent = 'Synthesizing...' q('#speak-button').disabled = true q('#audio').hidden = true - synthesize(text, speaker_id, style_wav) + synthesize(text, speaker_id, style_wav, language_id) } e.preventDefault() return false @@ -121,8 +132,8 @@ do_tts(e) } }) - function synthesize(text, speaker_id = "", style_wav = "") { - fetch(`/api/tts?text=${encodeURIComponent(text)}&speaker_id=${encodeURIComponent(speaker_id)}&style_wav=${encodeURIComponent(style_wav)}`, { cache: 'no-cache' }) + function synthesize(text, speaker_id = "", style_wav = "", language_id = "") { + fetch(`/api/tts?text=${encodeURIComponent(text)}&speaker_id=${encodeURIComponent(speaker_id)}&style_wav=${encodeURIComponent(style_wav)}&language_id=${encodeURIComponent(language_id)}`, { cache: 'no-cache' }) .then(function (res) { if (!res.ok) throw Error(res.statusText) return res.blob() diff --git a/TTS/utils/synthesizer.py b/TTS/utils/synthesizer.py index 4a0ab038..2cef8d70 100644 --- a/TTS/utils/synthesizer.py +++ b/TTS/utils/synthesizer.py @@ -242,7 +242,7 @@ class Synthesizer(object): "Define path for speaker.json if it is a multi-speaker model or remove defined speaker idx. " ) - # handle multi-lingaul + # handle multi-lingual language_id = None if self.tts_languages_file or ( hasattr(self.tts_model, "language_manager") and self.tts_model.language_manager is not None