Basic Mary-TTS API compatibility (#2352)

* added basic Mary-TTS API endpoints to server

- imported `parse_qs` from `urllib.parse` to parse HTTP POST parameters
- imported `render_template_string` from `flask` to return text as endpoint result
- added new routes:
  - `/locales` - returns list of locales (currently locale of active model)
  - `/voices` - returns list of voices (currently locale and name of active model)
  - `/process` - accepts synth. request (GET and POST) with parameter `INPUT_TEXT` (other parameters ignored since we have only one active model)

* better log messages for Mary-TTS API

- smaller tweaks to log output

* use f-string in log print to please linter

* updated server.py to match 'make style' result
This commit is contained in:
Florian Quirin 2023-03-06 10:08:21 +01:00 committed by GitHub
parent d39bc74f57
commit 478c8178b8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 49 additions and 4 deletions

View File

@ -7,8 +7,9 @@ import sys
from pathlib import Path from pathlib import Path
from threading import Lock from threading import Lock
from typing import Union from typing import Union
from urllib.parse import parse_qs
from flask import Flask, render_template, request, send_file from flask import Flask, render_template, render_template_string, request, send_file
from TTS.config import load_config from TTS.config import load_config
from TTS.utils.manage import ModelManager from TTS.utils.manage import ModelManager
@ -187,15 +188,59 @@ def tts():
language_idx = request.args.get("language_id", "") language_idx = request.args.get("language_id", "")
style_wav = request.args.get("style_wav", "") style_wav = request.args.get("style_wav", "")
style_wav = style_wav_uri_to_dict(style_wav) style_wav = style_wav_uri_to_dict(style_wav)
print(" > Model input: {}".format(text)) print(f" > Model input: {text}")
print(" > Speaker Idx: {}".format(speaker_idx)) print(f" > Speaker Idx: {speaker_idx}")
print(" > Language Idx: {}".format(language_idx)) print(f" > Language Idx: {language_idx}")
wavs = synthesizer.tts(text, speaker_name=speaker_idx, language_name=language_idx, style_wav=style_wav) wavs = synthesizer.tts(text, speaker_name=speaker_idx, language_name=language_idx, style_wav=style_wav)
out = io.BytesIO() out = io.BytesIO()
synthesizer.save_wav(wavs, out) synthesizer.save_wav(wavs, out)
return send_file(out, mimetype="audio/wav") return send_file(out, mimetype="audio/wav")
# Basic MaryTTS compatibility layer
@app.route("/locales", methods=["GET"])
def mary_tts_api_locales():
"""MaryTTS-compatible /locales endpoint"""
# NOTE: We currently assume there is only one model active at the same time
if args.model_name is not None:
model_details = args.model_name.split("/")
else:
model_details = ["", "en", "", "default"]
return render_template_string("{{ locale }}\n", locale=model_details[1])
@app.route("/voices", methods=["GET"])
def mary_tts_api_voices():
"""MaryTTS-compatible /voices endpoint"""
# NOTE: We currently assume there is only one model active at the same time
if args.model_name is not None:
model_details = args.model_name.split("/")
else:
model_details = ["", "en", "", "default"]
return render_template_string(
"{{ name }} {{ locale }} {{ gender }}\n", name=model_details[3], locale=model_details[1], gender="u"
)
@app.route("/process", methods=["GET", "POST"])
def mary_tts_api_process():
"""MaryTTS-compatible /process endpoint"""
with lock:
if request.method == "POST":
data = parse_qs(request.get_data(as_text=True))
# NOTE: we ignore param. LOCALE and VOICE for now since we have only one active model
text = data.get("INPUT_TEXT", [""])[0]
else:
text = request.args.get("INPUT_TEXT", "")
print(f" > Model input: {text}")
wavs = synthesizer.tts(text)
out = io.BytesIO()
synthesizer.save_wav(wavs, out)
return send_file(out, mimetype="audio/wav")
def main(): def main():
app.run(debug=args.debug, host="::", port=args.port) app.run(debug=args.debug, host="::", port=args.port)