mirror of https://github.com/coqui-ai/TTS.git
Update model file extension (#1422)
* Update model file ext to ```.pth``` * Update docs * Rename more * Find model files
This commit is contained in:
parent
ccdc2300dc
commit
72d85e53c9
|
@ -115,6 +115,7 @@ venv.bak/
|
|||
*.swo
|
||||
|
||||
# pytorch models
|
||||
*.pth
|
||||
*.pth.tar
|
||||
result/
|
||||
|
||||
|
|
|
@ -159,13 +159,13 @@ If you are on Windows, 👑@GuyPaddock wrote installation instructions [here](ht
|
|||
- Run your own TTS model (Using Griffin-Lim Vocoder):
|
||||
|
||||
```
|
||||
$ tts --text "Text for TTS" --model_path path/to/model.pth.tar --config_path path/to/config.json --out_path output/path/speech.wav
|
||||
$ tts --text "Text for TTS" --model_path path/to/model.pth --config_path path/to/config.json --out_path output/path/speech.wav
|
||||
```
|
||||
|
||||
- Run your own TTS and Vocoder models:
|
||||
```
|
||||
$ tts --text "Text for TTS" --model_path path/to/config.json --config_path path/to/model.pth.tar --out_path output/path/speech.wav
|
||||
--vocoder_path path/to/vocoder.pth.tar --vocoder_config_path path/to/vocoder_config.json
|
||||
$ tts --text "Text for TTS" --model_path path/to/config.json --config_path path/to/model.pth --out_path output/path/speech.wav
|
||||
--vocoder_path path/to/vocoder.pth --vocoder_config_path path/to/vocoder_config.json
|
||||
```
|
||||
|
||||
### Multi-speaker Models
|
||||
|
@ -185,7 +185,7 @@ If you are on Windows, 👑@GuyPaddock wrote installation instructions [here](ht
|
|||
- Run your own multi-speaker TTS model:
|
||||
|
||||
```
|
||||
$ tts --text "Text for TTS" --out_path output/path/speech.wav --model_path path/to/config.json --config_path path/to/model.pth.tar --speakers_file_path path/to/speaker.json --speaker_idx <speaker_id>
|
||||
$ tts --text "Text for TTS" --out_path output/path/speech.wav --model_path path/to/config.json --config_path path/to/model.pth --speakers_file_path path/to/speaker.json --speaker_idx <speaker_id>
|
||||
```
|
||||
|
||||
## Directory Structure
|
||||
|
|
|
@ -25,7 +25,7 @@ These masks can be used for different purposes including training a TTS model wi
|
|||
"""
|
||||
Example run:
|
||||
CUDA_VISIBLE_DEVICE="0" python TTS/bin/compute_attention_masks.py
|
||||
--model_path /data/rw/home/Models/ljspeech-dcattn-December-14-2020_11+10AM-9d0e8c7/checkpoint_200000.pth.tar
|
||||
--model_path /data/rw/home/Models/ljspeech-dcattn-December-14-2020_11+10AM-9d0e8c7/checkpoint_200000.pth
|
||||
--config_path /data/rw/home/Models/ljspeech-dcattn-December-14-2020_11+10AM-9d0e8c7/config.json
|
||||
--dataset_metafile metadata.csv
|
||||
--data_path /root/LJSpeech-1.1/
|
||||
|
|
|
@ -12,7 +12,7 @@ parser = argparse.ArgumentParser(
|
|||
description="""Compute embedding vectors for each wav file in a dataset.\n\n"""
|
||||
"""
|
||||
Example runs:
|
||||
python TTS/bin/compute_embeddings.py speaker_encoder_model.pth.tar speaker_encoder_config.json dataset_config.json embeddings_output_path/
|
||||
python TTS/bin/compute_embeddings.py speaker_encoder_model.pth speaker_encoder_config.json dataset_config.json embeddings_output_path/
|
||||
""",
|
||||
formatter_class=RawTextHelpFormatter,
|
||||
)
|
||||
|
|
|
@ -1,55 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import os
|
||||
import pathlib
|
||||
import subprocess
|
||||
import time
|
||||
|
||||
import torch
|
||||
from trainer import TrainerArgs
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
Call train.py as a new process and pass command arguments
|
||||
"""
|
||||
parser = TrainerArgs().init_argparse(arg_prefix="")
|
||||
parser.add_argument("--script", type=str, help="Target training script to distibute.")
|
||||
args, unargs = parser.parse_known_args()
|
||||
|
||||
num_gpus = torch.cuda.device_count()
|
||||
group_id = time.strftime("%Y_%m_%d-%H%M%S")
|
||||
|
||||
# set arguments for train.py
|
||||
folder_path = pathlib.Path(__file__).parent.absolute()
|
||||
if os.path.exists(os.path.join(folder_path, args.script)):
|
||||
command = [os.path.join(folder_path, args.script)]
|
||||
else:
|
||||
command = [args.script]
|
||||
command.append("--continue_path={}".format(args.continue_path))
|
||||
command.append("--restore_path={}".format(args.restore_path))
|
||||
command.append("--config_path={}".format(args.config_path))
|
||||
command.append("--group_id=group_{}".format(group_id))
|
||||
command.append("--use_ddp=true")
|
||||
command += unargs
|
||||
command.append("")
|
||||
|
||||
# run a processes per GPU
|
||||
processes = []
|
||||
for i in range(num_gpus):
|
||||
my_env = os.environ.copy()
|
||||
my_env["PYTHON_EGG_CACHE"] = "/tmp/tmp{}".format(i)
|
||||
command[-1] = "--rank={}".format(i)
|
||||
# prevent stdout for processes with rank != 0
|
||||
stdout = None
|
||||
p = subprocess.Popen(["python3"] + command, stdout=stdout, env=my_env) # pylint: disable=consider-using-with
|
||||
processes.append(p)
|
||||
print(command)
|
||||
|
||||
for p in processes:
|
||||
p.wait()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -56,7 +56,7 @@ if __name__ == "__main__":
|
|||
description="""Compute the accuracy of the encoder.\n\n"""
|
||||
"""
|
||||
Example runs:
|
||||
python TTS/bin/eval_encoder.py emotion_encoder_model.pth.tar emotion_encoder_config.json dataset_config.json
|
||||
python TTS/bin/eval_encoder.py emotion_encoder_model.pth emotion_encoder_config.json dataset_config.json
|
||||
""",
|
||||
formatter_class=RawTextHelpFormatter,
|
||||
)
|
||||
|
|
|
@ -60,13 +60,13 @@ If you don't specify any models, then it uses LJSpeech based English model.
|
|||
- Run your own TTS model (Using Griffin-Lim Vocoder):
|
||||
|
||||
```
|
||||
$ tts --text "Text for TTS" --model_path path/to/model.pth.tar --config_path path/to/config.json --out_path output/path/speech.wav
|
||||
$ tts --text "Text for TTS" --model_path path/to/model.pth --config_path path/to/config.json --out_path output/path/speech.wav
|
||||
```
|
||||
|
||||
- Run your own TTS and Vocoder models:
|
||||
```
|
||||
$ tts --text "Text for TTS" --model_path path/to/config.json --config_path path/to/model.pth.tar --out_path output/path/speech.wav
|
||||
--vocoder_path path/to/vocoder.pth.tar --vocoder_config_path path/to/vocoder_config.json
|
||||
$ tts --text "Text for TTS" --model_path path/to/config.json --config_path path/to/model.pth --out_path output/path/speech.wav
|
||||
--vocoder_path path/to/vocoder.pth --vocoder_config_path path/to/vocoder_config.json
|
||||
```
|
||||
|
||||
### Multi-speaker Models
|
||||
|
@ -86,7 +86,7 @@ If you don't specify any models, then it uses LJSpeech based English model.
|
|||
- Run your own multi-speaker TTS model:
|
||||
|
||||
```
|
||||
$ tts --text "Text for TTS" --out_path output/path/speech.wav --model_path path/to/config.json --config_path path/to/model.pth.tar --speakers_file_path path/to/speaker.json --speaker_idx <speaker_id>
|
||||
$ tts --text "Text for TTS" --out_path output/path/speech.wav --model_path path/to/config.json --config_path path/to/model.pth --speakers_file_path path/to/speaker.json --speaker_idx <speaker_id>
|
||||
```
|
||||
"""
|
||||
# We remove Markdown code formatting programmatically here to allow us to copy-and-paste from main README to keep
|
||||
|
|
|
@ -14,5 +14,5 @@ To run the code, you need to follow the same flow as in TTS.
|
|||
|
||||
- Define 'config.json' for your needs. Note that, audio parameters should match your TTS model.
|
||||
- Example training call ```python speaker_encoder/train.py --config_path speaker_encoder/config.json --data_path ~/Data/Libri-TTS/train-clean-360```
|
||||
- Generate embedding vectors ```python speaker_encoder/compute_embeddings.py --use_cuda true /model/path/best_model.pth.tar model/config/path/config.json dataset/path/ output_path``` . This code parses all .wav files at the given dataset path and generates the same folder structure under the output path with the generated embedding files.
|
||||
- Generate embedding vectors ```python speaker_encoder/compute_embeddings.py --use_cuda true /model/path/best_model.pth model/config/path/config.json dataset/path/ output_path``` . This code parses all .wav files at the given dataset path and generates the same folder structure under the output path with the generated embedding files.
|
||||
- Watch training on Tensorboard as in TTS
|
||||
|
|
|
@ -147,7 +147,7 @@ def setup_speaker_encoder_model(config: "Coqpit"):
|
|||
|
||||
|
||||
def save_checkpoint(model, optimizer, criterion, model_loss, out_path, current_step, epoch):
|
||||
checkpoint_path = "checkpoint_{}.pth.tar".format(current_step)
|
||||
checkpoint_path = "checkpoint_{}.pth".format(current_step)
|
||||
checkpoint_path = os.path.join(out_path, checkpoint_path)
|
||||
print(" | | > Checkpoint saving : {}".format(checkpoint_path))
|
||||
|
||||
|
@ -177,7 +177,7 @@ def save_best_model(model, optimizer, criterion, model_loss, best_loss, out_path
|
|||
"date": datetime.date.today().strftime("%B %d, %Y"),
|
||||
}
|
||||
best_loss = model_loss
|
||||
bestmodel_path = "best_model.pth.tar"
|
||||
bestmodel_path = "best_model.pth"
|
||||
bestmodel_path = os.path.join(out_path, bestmodel_path)
|
||||
print("\n > BEST MODEL ({0:.5f}) : {1:}".format(model_loss, bestmodel_path))
|
||||
save_fsspec(state, bestmodel_path)
|
||||
|
|
|
@ -5,7 +5,7 @@ from TTS.utils.io import save_fsspec
|
|||
|
||||
|
||||
def save_checkpoint(model, optimizer, model_loss, out_path, current_step):
|
||||
checkpoint_path = "checkpoint_{}.pth.tar".format(current_step)
|
||||
checkpoint_path = "checkpoint_{}.pth".format(current_step)
|
||||
checkpoint_path = os.path.join(out_path, checkpoint_path)
|
||||
print(" | | > Checkpoint saving : {}".format(checkpoint_path))
|
||||
|
||||
|
@ -31,7 +31,7 @@ def save_best_model(model, optimizer, model_loss, best_loss, out_path, current_s
|
|||
"date": datetime.date.today().strftime("%B %d, %Y"),
|
||||
}
|
||||
best_loss = model_loss
|
||||
bestmodel_path = "best_model.pth.tar"
|
||||
bestmodel_path = "best_model.pth"
|
||||
bestmodel_path = os.path.join(out_path, bestmodel_path)
|
||||
print("\n > BEST MODEL ({0:.5f}) : {1:}".format(model_loss, bestmodel_path))
|
||||
save_fsspec(state, bestmodel_path)
|
||||
|
|
|
@ -21,4 +21,4 @@ Run the server with the official models on a GPU.
|
|||
```CUDA_VISIBLE_DEVICES="0" python TTS/server/server.py --model_name tts_models/en/ljspeech/tacotron2-DCA --vocoder_name vocoder_models/en/ljspeech/multiband-melgan --use_cuda True```
|
||||
|
||||
Run the server with a custom models.
|
||||
```python TTS/server/server.py --tts_checkpoint /path/to/tts/model.pth.tar --tts_config /path/to/tts/config.json --vocoder_checkpoint /path/to/vocoder/model.pth.tar --vocoder_config /path/to/vocoder/config.json```
|
||||
```python TTS/server/server.py --tts_checkpoint /path/to/tts/model.pth --tts_config /path/to/tts/config.json --vocoder_checkpoint /path/to/vocoder/model.pth --vocoder_config /path/to/vocoder/config.json```
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"tts_path":"/media/erogol/data_ssd/Models/libri_tts/5049/", // tts model root folder
|
||||
"tts_file":"best_model.pth.tar", // tts checkpoint file
|
||||
"tts_file":"best_model.pth", // tts checkpoint file
|
||||
"tts_config":"config.json", // tts config.json file
|
||||
"tts_speakers": null, // json file listing speaker ids. null if no speaker embedding.
|
||||
"vocoder_config":null,
|
||||
|
|
|
@ -371,7 +371,7 @@ class AudioProcessor(object):
|
|||
self.hop_length = hop_length
|
||||
self.win_length = win_length
|
||||
assert min_level_db != 0.0, " [!] min_level_db is 0"
|
||||
assert self.win_length <= self.fft_size, " [!] win_length cannot be larger than fft_size"
|
||||
assert self.win_length <= self.fft_size, f" [!] win_length cannot be larger than fft_size - {self.win_length} vs {self.fft_size}"
|
||||
members = vars(self)
|
||||
if verbose:
|
||||
print(" > Setting up Audio Processor...")
|
||||
|
|
|
@ -67,7 +67,7 @@ def get_experiment_folder_path(root_path, model_name):
|
|||
def remove_experiment_folder(experiment_path):
|
||||
"""Check folder if there is a checkpoint, otherwise remove the folder"""
|
||||
fs = fsspec.get_mapper(experiment_path).fs
|
||||
checkpoint_files = fs.glob(experiment_path + "/*.pth.tar")
|
||||
checkpoint_files = fs.glob(experiment_path + "/*.pth")
|
||||
if not checkpoint_files:
|
||||
if fs.exists(experiment_path):
|
||||
fs.rm(experiment_path, recursive=True)
|
||||
|
|
|
@ -140,7 +140,7 @@ def save_checkpoint(
|
|||
output_folder,
|
||||
**kwargs,
|
||||
):
|
||||
file_name = "checkpoint_{}.pth.tar".format(current_step)
|
||||
file_name = "checkpoint_{}.pth".format(current_step)
|
||||
checkpoint_path = os.path.join(output_folder, file_name)
|
||||
print("\n > CHECKPOINT : {}".format(checkpoint_path))
|
||||
save_model(
|
||||
|
@ -170,7 +170,7 @@ def save_best_model(
|
|||
**kwargs,
|
||||
):
|
||||
if current_loss < best_loss:
|
||||
best_model_name = f"best_model_{current_step}.pth.tar"
|
||||
best_model_name = f"best_model_{current_step}.pth"
|
||||
checkpoint_path = os.path.join(out_path, best_model_name)
|
||||
print(" > BEST MODEL : {}".format(checkpoint_path))
|
||||
save_model(
|
||||
|
@ -187,12 +187,12 @@ def save_best_model(
|
|||
fs = fsspec.get_mapper(out_path).fs
|
||||
# only delete previous if current is saved successfully
|
||||
if not keep_all_best or (current_step < keep_after):
|
||||
model_names = fs.glob(os.path.join(out_path, "best_model*.pth.tar"))
|
||||
model_names = fs.glob(os.path.join(out_path, "best_model*.pth"))
|
||||
for model_name in model_names:
|
||||
if os.path.basename(model_name) != best_model_name:
|
||||
fs.rm(model_name)
|
||||
# create a shortcut which always points to the currently best model
|
||||
shortcut_name = "best_model.pth.tar"
|
||||
shortcut_name = "best_model.pth"
|
||||
shortcut_path = os.path.join(out_path, shortcut_name)
|
||||
fs.copy(checkpoint_path, shortcut_path)
|
||||
best_loss = current_loss
|
||||
|
|
|
@ -3,6 +3,7 @@ import json
|
|||
import os
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
from typing import Tuple
|
||||
from shutil import copyfile, rmtree
|
||||
|
||||
import requests
|
||||
|
@ -114,7 +115,7 @@ class ModelManager(object):
|
|||
e.g. 'tts_model/en/ljspeech/tacotron'
|
||||
|
||||
Every model must have the following files:
|
||||
- *.pth.tar : pytorch model checkpoint file.
|
||||
- *.pth : pytorch model checkpoint file.
|
||||
- config.json : model config file.
|
||||
- scale_stats.npy (if exist): scale values for preprocessing.
|
||||
|
||||
|
@ -127,7 +128,7 @@ class ModelManager(object):
|
|||
model_item = self.models_dict[model_type][lang][dataset][model]
|
||||
# set the model specific output path
|
||||
output_path = os.path.join(self.output_prefix, model_full_name)
|
||||
output_model_path = os.path.join(output_path, "model_file.pth.tar")
|
||||
output_model_path = os.path.join(output_path, "model_file.pth")
|
||||
output_config_path = os.path.join(output_path, "config.json")
|
||||
|
||||
if os.path.exists(output_path):
|
||||
|
@ -139,8 +140,32 @@ class ModelManager(object):
|
|||
self._download_zip_file(model_item["github_rls_url"], output_path)
|
||||
# update paths in the config.json
|
||||
self._update_paths(output_path, output_config_path)
|
||||
# find downloaded files
|
||||
output_model_path, output_config_path = self._find_files(output_path)
|
||||
return output_model_path, output_config_path, model_item
|
||||
|
||||
def _find_files(self, output_path:str) -> Tuple[str, str]:
|
||||
"""Find the model and config files in the output path
|
||||
|
||||
Args:
|
||||
output_path (str): path to the model files
|
||||
|
||||
Returns:
|
||||
Tuple[str, str]: path to the model file and config file
|
||||
"""
|
||||
model_file = None
|
||||
config_file = None
|
||||
for file_name in os.listdir(output_path):
|
||||
if file_name in ["model_file.pth", "model_file.pth.tar", "model.pth"]:
|
||||
model_file = os.path.join(output_path, file_name)
|
||||
elif file_name == "config.json":
|
||||
config_file = os.path.join(output_path, file_name)
|
||||
if model_file is None:
|
||||
raise ValueError(" [!] Model file not found in the output path")
|
||||
if config_file is None:
|
||||
raise ValueError(" [!] Config file not found in the output path")
|
||||
return model_file, config_file
|
||||
|
||||
def _update_paths(self, output_path: str, config_path: str) -> None:
|
||||
"""Update paths for certain files in config.json after download.
|
||||
|
||||
|
@ -152,7 +177,7 @@ class ModelManager(object):
|
|||
output_d_vector_file_path = os.path.join(output_path, "speakers.json")
|
||||
output_speaker_ids_file_path = os.path.join(output_path, "speaker_ids.json")
|
||||
speaker_encoder_config_path = os.path.join(output_path, "config_se.json")
|
||||
speaker_encoder_model_path = os.path.join(output_path, "model_se.pth.tar")
|
||||
speaker_encoder_model_path = os.path.join(output_path, "model_se.pth")
|
||||
|
||||
# update the scale_path.npy file path in the model config.json
|
||||
self._update_path("audio.stats_path", output_stats_path, config_path)
|
||||
|
|
|
@ -29,7 +29,7 @@ You can continue a previous training run by the following command.
|
|||
|
||||
You can fine-tune a pre-trained model by the following command.
|
||||
|
||||
```CUDA_VISIBLE_DEVICES='0' python tts/bin/train_vocoder.py --restore_path path/to/your/model.pth.tar```
|
||||
```CUDA_VISIBLE_DEVICES='0' python tts/bin/train_vocoder.py --restore_path path/to/your/model.pth```
|
||||
|
||||
Restoring a model starts a new training in a different folder. It only restores model weights with the given checkpoint file. However, continuing a training starts from the same directory where the previous training run left off.
|
||||
|
||||
|
|
|
@ -93,13 +93,13 @@ them and fine-tune it for your own dataset. This will help you in two main ways:
|
|||
|
||||
```bash
|
||||
CUDA_VISIBLE_DEVICES="0" python recipes/ljspeech/glow_tts/train_glowtts.py \
|
||||
--restore_path /home/ubuntu/.local/share/tts/tts_models--en--ljspeech--glow-tts/model_file.pth.tar
|
||||
--restore_path /home/ubuntu/.local/share/tts/tts_models--en--ljspeech--glow-tts/model_file.pth
|
||||
```
|
||||
|
||||
```bash
|
||||
CUDA_VISIBLE_DEVICES="0" python TTS/bin/train_tts.py \
|
||||
--config_path /home/ubuntu/.local/share/tts/tts_models--en--ljspeech--glow-tts/config.json \
|
||||
--restore_path /home/ubuntu/.local/share/tts/tts_models--en--ljspeech--glow-tts/model_file.pth.tar
|
||||
--restore_path /home/ubuntu/.local/share/tts/tts_models--en--ljspeech--glow-tts/model_file.pth
|
||||
```
|
||||
|
||||
As stated above, you can also use command-line arguments to change the model configuration.
|
||||
|
@ -107,7 +107,7 @@ them and fine-tune it for your own dataset. This will help you in two main ways:
|
|||
|
||||
```bash
|
||||
CUDA_VISIBLE_DEVICES="0" python recipes/ljspeech/glow_tts/train_glowtts.py \
|
||||
--restore_path /home/ubuntu/.local/share/tts/tts_models--en--ljspeech--glow-tts/model_file.pth.tar
|
||||
--restore_path /home/ubuntu/.local/share/tts/tts_models--en--ljspeech--glow-tts/model_file.pth
|
||||
--coqpit.run_name "glow-tts-finetune" \
|
||||
--coqpit.lr 0.00001
|
||||
```
|
||||
|
|
|
@ -44,7 +44,7 @@ Run your own TTS model (Using Griffin-Lim Vocoder)
|
|||
|
||||
```bash
|
||||
tts --text "Text for TTS" \
|
||||
--model_path path/to/model.pth.tar \
|
||||
--model_path path/to/model.pth \
|
||||
--config_path path/to/config.json \
|
||||
--out_path folder/to/save/output.wav
|
||||
```
|
||||
|
@ -54,9 +54,9 @@ Run your own TTS and Vocoder models
|
|||
```bash
|
||||
tts --text "Text for TTS" \
|
||||
--config_path path/to/config.json \
|
||||
--model_path path/to/model.pth.tar \
|
||||
--model_path path/to/model.pth \
|
||||
--out_path folder/to/save/output.wav \
|
||||
--vocoder_path path/to/vocoder.pth.tar \
|
||||
--vocoder_path path/to/vocoder.pth \
|
||||
--vocoder_config_path path/to/vocoder_config.json
|
||||
```
|
||||
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
If you like to run a multi-gpu training using DDP back-end,
|
||||
|
||||
```bash
|
||||
$ CUDA_VISIBLE_DEVICES="0, 1, 2" python TTS/bin/distribute.py --script <path_to_your_script>/train_glowtts.py
|
||||
$ CUDA_VISIBLE_DEVICES="0, 1, 2" python -m trainer.distribute --script <path_to_your_script>/train_glowtts.py
|
||||
```
|
||||
|
||||
The example above runs a multi-gpu training using GPUs `0, 1, 2`.
|
||||
|
@ -122,7 +122,7 @@
|
|||
|
||||
```bash
|
||||
$ tts --text "Text for TTS" \
|
||||
--model_path path/to/checkpoint_x.pth.tar \
|
||||
--model_path path/to/checkpoint_x.pth \
|
||||
--config_path path/to/config.json \
|
||||
--out_path folder/to/save/output.wav
|
||||
```
|
||||
|
|
|
@ -50,13 +50,13 @@ A breakdown of a simple script that trains a GlowTTS model on the LJspeech datas
|
|||
- Fine-tune a model.
|
||||
|
||||
```bash
|
||||
CUDA_VISIBLE_DEVICES=0 python train.py --restore_path path/to/model/checkpoint.pth.tar
|
||||
CUDA_VISIBLE_DEVICES=0 python train.py --restore_path path/to/model/checkpoint.pth
|
||||
```
|
||||
|
||||
- Run multi-gpu training.
|
||||
|
||||
```bash
|
||||
CUDA_VISIBLE_DEVICES=0,1,2 python TTS/bin/distribute.py --script train.py
|
||||
CUDA_VISIBLE_DEVICES=0,1,2 python -m trainer.distribute --script train.py
|
||||
```
|
||||
|
||||
### CLI Way
|
||||
|
|
|
@ -66,7 +66,7 @@
|
|||
"DATASET = \"ljspeech\"\n",
|
||||
"METADATA_FILE = \"metadata.csv\"\n",
|
||||
"CONFIG_PATH = \"/home/ubuntu/.local/share/tts/tts_models--en--ljspeech--tacotron2-DDC_ph/config.json\"\n",
|
||||
"MODEL_FILE = \"/home/ubuntu/.local/share/tts/tts_models--en--ljspeech--tacotron2-DDC_ph/model_file.pth.tar\"\n",
|
||||
"MODEL_FILE = \"/home/ubuntu/.local/share/tts/tts_models--en--ljspeech--tacotron2-DDC_ph/model_file.pth\"\n",
|
||||
"BATCH_SIZE = 32\n",
|
||||
"\n",
|
||||
"QUANTIZED_WAV = False\n",
|
||||
|
|
|
@ -66,7 +66,7 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"MODEL_RUN_PATH = \"/media/erogol/data_ssd/Models/libri_tts/speaker_encoder/libritts_360-half-October-31-2019_04+54PM-19d2f5f/\"\n",
|
||||
"MODEL_PATH = MODEL_RUN_PATH + \"best_model.pth.tar\"\n",
|
||||
"MODEL_PATH = MODEL_RUN_PATH + \"best_model.pth\"\n",
|
||||
"CONFIG_PATH = MODEL_RUN_PATH + \"config.json\"\n",
|
||||
"\n",
|
||||
"# My single speaker locations\n",
|
||||
|
|
|
@ -73,7 +73,7 @@
|
|||
"\n",
|
||||
"# Set constants\n",
|
||||
"ROOT_PATH = '/home/erogol/Models/LJSpeech/ljspeech-May-20-2020_12+29PM-1835628/'\n",
|
||||
"MODEL_PATH = ROOT_PATH + '/best_model.pth.tar'\n",
|
||||
"MODEL_PATH = ROOT_PATH + '/best_model.pth'\n",
|
||||
"CONFIG_PATH = ROOT_PATH + '/config.json'\n",
|
||||
"OUT_FOLDER = './hard_sentences/'\n",
|
||||
"CONFIG = load_config(CONFIG_PATH)\n",
|
||||
|
|
|
@ -416,7 +416,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.9.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
|
|
@ -15,7 +15,7 @@ class TestExtractTTSSpectrograms(unittest.TestCase):
|
|||
def test_GlowTTS():
|
||||
# set paths
|
||||
config_path = os.path.join(get_tests_input_path(), "test_glow_tts.json")
|
||||
checkpoint_path = os.path.join(get_tests_output_path(), "checkpoint_test.pth.tar")
|
||||
checkpoint_path = os.path.join(get_tests_output_path(), "checkpoint_test.pth")
|
||||
output_path = os.path.join(get_tests_output_path(), "output_extract_tts_spectrograms/")
|
||||
# load config
|
||||
c = load_config(config_path)
|
||||
|
@ -33,7 +33,7 @@ class TestExtractTTSSpectrograms(unittest.TestCase):
|
|||
def test_Tacotron2():
|
||||
# set paths
|
||||
config_path = os.path.join(get_tests_input_path(), "test_tacotron2_config.json")
|
||||
checkpoint_path = os.path.join(get_tests_output_path(), "checkpoint_test.pth.tar")
|
||||
checkpoint_path = os.path.join(get_tests_output_path(), "checkpoint_test.pth")
|
||||
output_path = os.path.join(get_tests_output_path(), "output_extract_tts_spectrograms/")
|
||||
# load config
|
||||
c = load_config(config_path)
|
||||
|
@ -51,7 +51,7 @@ class TestExtractTTSSpectrograms(unittest.TestCase):
|
|||
def test_Tacotron():
|
||||
# set paths
|
||||
config_path = os.path.join(get_tests_input_path(), "test_tacotron_config.json")
|
||||
checkpoint_path = os.path.join(get_tests_output_path(), "checkpoint_test.pth.tar")
|
||||
checkpoint_path = os.path.join(get_tests_output_path(), "checkpoint_test.pth")
|
||||
output_path = os.path.join(get_tests_output_path(), "output_extract_tts_spectrograms/")
|
||||
# load config
|
||||
c = load_config(config_path)
|
||||
|
|
|
@ -12,7 +12,7 @@ from TTS.tts.utils.speakers import SpeakerManager
|
|||
from TTS.utils.audio import AudioProcessor
|
||||
|
||||
encoder_config_path = os.path.join(get_tests_input_path(), "test_speaker_encoder_config.json")
|
||||
encoder_model_path = os.path.join(get_tests_input_path(), "checkpoint_0.pth.tar")
|
||||
encoder_model_path = os.path.join(get_tests_input_path(), "checkpoint_0.pth")
|
||||
sample_wav_path = os.path.join(get_tests_input_path(), "../data/ljspeech/wavs/LJ001-0001.wav")
|
||||
sample_wav_path2 = os.path.join(get_tests_input_path(), "../data/ljspeech/wavs/LJ001-0002.wav")
|
||||
d_vectors_file_path = os.path.join(get_tests_input_path(), "../data/dummy_speakers.json")
|
||||
|
|
|
@ -20,7 +20,7 @@ class SynthesizerTest(unittest.TestCase):
|
|||
def test_in_out(self):
|
||||
self._create_random_model()
|
||||
tts_root_path = get_tests_output_path()
|
||||
tts_checkpoint = os.path.join(tts_root_path, "checkpoint_10.pth.tar")
|
||||
tts_checkpoint = os.path.join(tts_root_path, "checkpoint_10.pth")
|
||||
tts_config = os.path.join(tts_root_path, "dummy_model_config.json")
|
||||
synthesizer = Synthesizer(tts_checkpoint, tts_config, None, None)
|
||||
synthesizer.tts("Better this test works!!")
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
{
|
||||
"tts_checkpoint":"checkpoint_10.pth.tar", // tts checkpoint file
|
||||
"tts_checkpoint":"checkpoint_10.pth", // tts checkpoint file
|
||||
"tts_config":"dummy_model_config.json", // tts config.json file
|
||||
"tts_speakers": null, // json file listing speaker ids. null if no speaker embedding.
|
||||
"wavernn_lib_path": null, // Rootpath to wavernn project folder to be imported. If this is null, model uses GL for speech synthesis.
|
||||
|
|
Loading…
Reference in New Issue