mirror of https://github.com/coqui-ai/TTS.git
commit
e07c6f54fd
11
TTS/api.py
11
TTS/api.py
|
@ -4,7 +4,7 @@ import os
|
||||||
import tempfile
|
import tempfile
|
||||||
import urllib.request
|
import urllib.request
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Tuple
|
from typing import Tuple, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import requests
|
import requests
|
||||||
|
@ -86,7 +86,6 @@ class CS_API:
|
||||||
return ["Neutral", "Happy", "Sad", "Angry", "Dull"]
|
return ["Neutral", "Happy", "Sad", "Angry", "Dull"]
|
||||||
|
|
||||||
def _check_token(self):
|
def _check_token(self):
|
||||||
self.ping_api()
|
|
||||||
if self.api_token is None:
|
if self.api_token is None:
|
||||||
self.api_token = os.environ.get("COQUI_STUDIO_TOKEN")
|
self.api_token = os.environ.get("COQUI_STUDIO_TOKEN")
|
||||||
self.headers = {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_token}"}
|
self.headers = {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_token}"}
|
||||||
|
@ -183,6 +182,7 @@ class CS_API:
|
||||||
language (str): Language of the text. If None, the default language of the speaker is used.
|
language (str): Language of the text. If None, the default language of the speaker is used.
|
||||||
"""
|
"""
|
||||||
self._check_token()
|
self._check_token()
|
||||||
|
self.ping_api()
|
||||||
if speaker_name is None and speaker_id is None:
|
if speaker_name is None and speaker_id is None:
|
||||||
raise ValueError(" [!] Please provide either a `speaker_name` or a `speaker_id`.")
|
raise ValueError(" [!] Please provide either a `speaker_name` or a `speaker_id`.")
|
||||||
if speaker_id is None:
|
if speaker_id is None:
|
||||||
|
@ -457,7 +457,7 @@ class TTS:
|
||||||
emotion: str = "Neutral",
|
emotion: str = "Neutral",
|
||||||
speed: float = 1.0,
|
speed: float = 1.0,
|
||||||
file_path: str = None,
|
file_path: str = None,
|
||||||
):
|
) -> Union[np.ndarray, str]:
|
||||||
"""Convert text to speech using Coqui Studio models. Use `CS_API` class if you are only interested in the API.
|
"""Convert text to speech using Coqui Studio models. Use `CS_API` class if you are only interested in the API.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
@ -473,9 +473,12 @@ class TTS:
|
||||||
Speed of the speech. Defaults to 1.0.
|
Speed of the speech. Defaults to 1.0.
|
||||||
file_path (str, optional):
|
file_path (str, optional):
|
||||||
Path to save the output file. When None it returns the `np.ndarray` of waveform. Defaults to None.
|
Path to save the output file. When None it returns the `np.ndarray` of waveform. Defaults to None.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Union[np.ndarray, str]: Waveform of the synthesized speech or path to the output file.
|
||||||
"""
|
"""
|
||||||
speaker_name = self.model_name.split("/")[2]
|
speaker_name = self.model_name.split("/")[2]
|
||||||
if file_path is None:
|
if file_path is not None:
|
||||||
return self.csapi.tts_to_file(
|
return self.csapi.tts_to_file(
|
||||||
text=text,
|
text=text,
|
||||||
speaker_name=speaker_name,
|
speaker_name=speaker_name,
|
||||||
|
|
|
@ -8,6 +8,7 @@ from argparse import RawTextHelpFormatter
|
||||||
# pylint: disable=redefined-outer-name, unused-argument
|
# pylint: disable=redefined-outer-name, unused-argument
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
from TTS.api import TTS
|
||||||
from TTS.utils.manage import ModelManager
|
from TTS.utils.manage import ModelManager
|
||||||
from TTS.utils.synthesizer import Synthesizer
|
from TTS.utils.synthesizer import Synthesizer
|
||||||
|
|
||||||
|
@ -183,6 +184,14 @@ If you don't specify any models, then it uses LJSpeech based English model.
|
||||||
)
|
)
|
||||||
parser.add_argument("--encoder_config_path", type=str, help="Path to speaker encoder config file.", default=None)
|
parser.add_argument("--encoder_config_path", type=str, help="Path to speaker encoder config file.", default=None)
|
||||||
|
|
||||||
|
# args for coqui studio
|
||||||
|
parser.add_argument(
|
||||||
|
"--emotion",
|
||||||
|
type=str,
|
||||||
|
help="Emotion to condition the model with. Only available for 🐸Coqui Studio models.",
|
||||||
|
default="Neutral",
|
||||||
|
)
|
||||||
|
|
||||||
# args for multi-speaker synthesis
|
# args for multi-speaker synthesis
|
||||||
parser.add_argument("--speakers_file_path", type=str, help="JSON file for multi-speaker model.", default=None)
|
parser.add_argument("--speakers_file_path", type=str, help="JSON file for multi-speaker model.", default=None)
|
||||||
parser.add_argument("--language_ids_file_path", type=str, help="JSON file for multi-lingual model.", default=None)
|
parser.add_argument("--language_ids_file_path", type=str, help="JSON file for multi-lingual model.", default=None)
|
||||||
|
@ -285,6 +294,7 @@ If you don't specify any models, then it uses LJSpeech based English model.
|
||||||
# load model manager
|
# load model manager
|
||||||
path = Path(__file__).parent / "../.models.json"
|
path = Path(__file__).parent / "../.models.json"
|
||||||
manager = ModelManager(path, progress_bar=args.progress_bar)
|
manager = ModelManager(path, progress_bar=args.progress_bar)
|
||||||
|
api = TTS()
|
||||||
|
|
||||||
tts_path = None
|
tts_path = None
|
||||||
tts_config_path = None
|
tts_config_path = None
|
||||||
|
@ -299,6 +309,7 @@ If you don't specify any models, then it uses LJSpeech based English model.
|
||||||
|
|
||||||
# CASE1 #list : list pre-trained TTS models
|
# CASE1 #list : list pre-trained TTS models
|
||||||
if args.list_models:
|
if args.list_models:
|
||||||
|
manager.add_cs_api_models(api.list_models())
|
||||||
manager.list_models()
|
manager.list_models()
|
||||||
sys.exit()
|
sys.exit()
|
||||||
|
|
||||||
|
@ -313,7 +324,15 @@ If you don't specify any models, then it uses LJSpeech based English model.
|
||||||
manager.model_info_by_full_name(model_query_full_name)
|
manager.model_info_by_full_name(model_query_full_name)
|
||||||
sys.exit()
|
sys.exit()
|
||||||
|
|
||||||
# CASE3: load pre-trained model paths
|
# CASE3: TTS with coqui studio models
|
||||||
|
if "coqui_studio" in args.model_name:
|
||||||
|
print(" > Using 🐸Coqui Studio model: ", args.model_name)
|
||||||
|
api = TTS(model_name=args.model_name)
|
||||||
|
api.tts_to_file(text=args.text, emotion=args.emotion, file_path=args.out_path)
|
||||||
|
print(" > Saving output to ", args.out_path)
|
||||||
|
return
|
||||||
|
|
||||||
|
# CASE4: load pre-trained model paths
|
||||||
if args.model_name is not None and not args.model_path:
|
if args.model_name is not None and not args.model_path:
|
||||||
model_path, config_path, model_item = manager.download_model(args.model_name)
|
model_path, config_path, model_item = manager.download_model(args.model_name)
|
||||||
|
|
||||||
|
@ -333,7 +352,7 @@ If you don't specify any models, then it uses LJSpeech based English model.
|
||||||
if args.vocoder_name is not None and not args.vocoder_path:
|
if args.vocoder_name is not None and not args.vocoder_path:
|
||||||
vocoder_path, vocoder_config_path, _ = manager.download_model(args.vocoder_name)
|
vocoder_path, vocoder_config_path, _ = manager.download_model(args.vocoder_name)
|
||||||
|
|
||||||
# CASE4: set custom model paths
|
# CASE5: set custom model paths
|
||||||
if args.model_path is not None:
|
if args.model_path is not None:
|
||||||
tts_path = args.model_path
|
tts_path = args.model_path
|
||||||
tts_config_path = args.config_path
|
tts_config_path = args.config_path
|
||||||
|
|
|
@ -3,7 +3,7 @@ import os
|
||||||
import zipfile
|
import zipfile
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from shutil import copyfile, rmtree
|
from shutil import copyfile, rmtree
|
||||||
from typing import Dict, Tuple
|
from typing import Dict, List, Tuple
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
@ -63,6 +63,28 @@ class ModelManager(object):
|
||||||
with open(file_path, "r", encoding="utf-8") as json_file:
|
with open(file_path, "r", encoding="utf-8") as json_file:
|
||||||
self.models_dict = json.load(json_file)
|
self.models_dict = json.load(json_file)
|
||||||
|
|
||||||
|
def add_cs_api_models(self, model_list: List[str]):
|
||||||
|
"""Add list of Coqui Studio model names that are returned from the api
|
||||||
|
|
||||||
|
Each has the following format `<coqui_studio_model>/en/<speaker_name>/<coqui_studio_model>`
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _add_model(model_name: str):
|
||||||
|
if not "coqui_studio" in model_name:
|
||||||
|
return
|
||||||
|
model_type, lang, dataset, model = model_name.split("/")
|
||||||
|
if model_type not in self.models_dict:
|
||||||
|
self.models_dict[model_type] = {}
|
||||||
|
if lang not in self.models_dict[model_type]:
|
||||||
|
self.models_dict[model_type][lang] = {}
|
||||||
|
if dataset not in self.models_dict[model_type][lang]:
|
||||||
|
self.models_dict[model_type][lang][dataset] = {}
|
||||||
|
if model not in self.models_dict[model_type][lang][dataset]:
|
||||||
|
self.models_dict[model_type][lang][dataset][model] = {}
|
||||||
|
|
||||||
|
for model_name in model_list:
|
||||||
|
_add_model(model_name)
|
||||||
|
|
||||||
def _list_models(self, model_type, model_count=0):
|
def _list_models(self, model_type, model_count=0):
|
||||||
if self.verbose:
|
if self.verbose:
|
||||||
print(" Name format: type/language/dataset/model")
|
print(" Name format: type/language/dataset/model")
|
||||||
|
|
|
@ -19,9 +19,9 @@ def test_synthesize():
|
||||||
f'--text "This is an example." --out_path "{output_path}"'
|
f'--text "This is an example." --out_path "{output_path}"'
|
||||||
)
|
)
|
||||||
|
|
||||||
# multi-speaker SC-Glow model
|
# 🐸 Coqui studio model
|
||||||
# run_cli("tts --model_name tts_models/en/vctk/sc-glow-tts --list_speaker_idxs")
|
run_cli(
|
||||||
# run_cli(
|
'tts --model_name "coqui_studio/en/Torcull Diarmuid/coqui_studio" '
|
||||||
# f'tts --model_name tts_models/en/vctk/sc-glow-tts --speaker_idx "p304" '
|
'--text "This is it" '
|
||||||
# f'--text "This is an example." --out_path "{output_path}"'
|
f'--out_path "{output_path}"'
|
||||||
# )
|
)
|
||||||
|
|
Loading…
Reference in New Issue