Update model file extension (#1422)

* Update model file ext to ```.pth```

* Update docs

* Rename more

* Find model files
This commit is contained in:
Eren Gölge 2022-03-22 17:55:00 +01:00 committed by GitHub
parent ccdc2300dc
commit 72d85e53c9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
29 changed files with 74 additions and 103 deletions

1
.gitignore vendored
View File

@ -115,6 +115,7 @@ venv.bak/
*.swo *.swo
# pytorch models # pytorch models
*.pth
*.pth.tar *.pth.tar
result/ result/

View File

@ -159,13 +159,13 @@ If you are on Windows, 👑@GuyPaddock wrote installation instructions [here](ht
- Run your own TTS model (Using Griffin-Lim Vocoder): - Run your own TTS model (Using Griffin-Lim Vocoder):
``` ```
$ tts --text "Text for TTS" --model_path path/to/model.pth.tar --config_path path/to/config.json --out_path output/path/speech.wav $ tts --text "Text for TTS" --model_path path/to/model.pth --config_path path/to/config.json --out_path output/path/speech.wav
``` ```
- Run your own TTS and Vocoder models: - Run your own TTS and Vocoder models:
``` ```
$ tts --text "Text for TTS" --model_path path/to/config.json --config_path path/to/model.pth.tar --out_path output/path/speech.wav $ tts --text "Text for TTS" --model_path path/to/config.json --config_path path/to/model.pth --out_path output/path/speech.wav
--vocoder_path path/to/vocoder.pth.tar --vocoder_config_path path/to/vocoder_config.json --vocoder_path path/to/vocoder.pth --vocoder_config_path path/to/vocoder_config.json
``` ```
### Multi-speaker Models ### Multi-speaker Models
@ -185,7 +185,7 @@ If you are on Windows, 👑@GuyPaddock wrote installation instructions [here](ht
- Run your own multi-speaker TTS model: - Run your own multi-speaker TTS model:
``` ```
$ tts --text "Text for TTS" --out_path output/path/speech.wav --model_path path/to/config.json --config_path path/to/model.pth.tar --speakers_file_path path/to/speaker.json --speaker_idx <speaker_id> $ tts --text "Text for TTS" --out_path output/path/speech.wav --model_path path/to/config.json --config_path path/to/model.pth --speakers_file_path path/to/speaker.json --speaker_idx <speaker_id>
``` ```
## Directory Structure ## Directory Structure

View File

@ -25,7 +25,7 @@ These masks can be used for different purposes including training a TTS model wi
""" """
Example run: Example run:
CUDA_VISIBLE_DEVICE="0" python TTS/bin/compute_attention_masks.py CUDA_VISIBLE_DEVICE="0" python TTS/bin/compute_attention_masks.py
--model_path /data/rw/home/Models/ljspeech-dcattn-December-14-2020_11+10AM-9d0e8c7/checkpoint_200000.pth.tar --model_path /data/rw/home/Models/ljspeech-dcattn-December-14-2020_11+10AM-9d0e8c7/checkpoint_200000.pth
--config_path /data/rw/home/Models/ljspeech-dcattn-December-14-2020_11+10AM-9d0e8c7/config.json --config_path /data/rw/home/Models/ljspeech-dcattn-December-14-2020_11+10AM-9d0e8c7/config.json
--dataset_metafile metadata.csv --dataset_metafile metadata.csv
--data_path /root/LJSpeech-1.1/ --data_path /root/LJSpeech-1.1/

View File

@ -12,7 +12,7 @@ parser = argparse.ArgumentParser(
description="""Compute embedding vectors for each wav file in a dataset.\n\n""" description="""Compute embedding vectors for each wav file in a dataset.\n\n"""
""" """
Example runs: Example runs:
python TTS/bin/compute_embeddings.py speaker_encoder_model.pth.tar speaker_encoder_config.json dataset_config.json embeddings_output_path/ python TTS/bin/compute_embeddings.py speaker_encoder_model.pth speaker_encoder_config.json dataset_config.json embeddings_output_path/
""", """,
formatter_class=RawTextHelpFormatter, formatter_class=RawTextHelpFormatter,
) )

View File

@ -1,55 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import pathlib
import subprocess
import time
import torch
from trainer import TrainerArgs
def main():
"""
Call train.py as a new process and pass command arguments
"""
parser = TrainerArgs().init_argparse(arg_prefix="")
parser.add_argument("--script", type=str, help="Target training script to distibute.")
args, unargs = parser.parse_known_args()
num_gpus = torch.cuda.device_count()
group_id = time.strftime("%Y_%m_%d-%H%M%S")
# set arguments for train.py
folder_path = pathlib.Path(__file__).parent.absolute()
if os.path.exists(os.path.join(folder_path, args.script)):
command = [os.path.join(folder_path, args.script)]
else:
command = [args.script]
command.append("--continue_path={}".format(args.continue_path))
command.append("--restore_path={}".format(args.restore_path))
command.append("--config_path={}".format(args.config_path))
command.append("--group_id=group_{}".format(group_id))
command.append("--use_ddp=true")
command += unargs
command.append("")
# run a processes per GPU
processes = []
for i in range(num_gpus):
my_env = os.environ.copy()
my_env["PYTHON_EGG_CACHE"] = "/tmp/tmp{}".format(i)
command[-1] = "--rank={}".format(i)
# prevent stdout for processes with rank != 0
stdout = None
p = subprocess.Popen(["python3"] + command, stdout=stdout, env=my_env) # pylint: disable=consider-using-with
processes.append(p)
print(command)
for p in processes:
p.wait()
if __name__ == "__main__":
main()

View File

@ -56,7 +56,7 @@ if __name__ == "__main__":
description="""Compute the accuracy of the encoder.\n\n""" description="""Compute the accuracy of the encoder.\n\n"""
""" """
Example runs: Example runs:
python TTS/bin/eval_encoder.py emotion_encoder_model.pth.tar emotion_encoder_config.json dataset_config.json python TTS/bin/eval_encoder.py emotion_encoder_model.pth emotion_encoder_config.json dataset_config.json
""", """,
formatter_class=RawTextHelpFormatter, formatter_class=RawTextHelpFormatter,
) )

View File

@ -60,13 +60,13 @@ If you don't specify any models, then it uses LJSpeech based English model.
- Run your own TTS model (Using Griffin-Lim Vocoder): - Run your own TTS model (Using Griffin-Lim Vocoder):
``` ```
$ tts --text "Text for TTS" --model_path path/to/model.pth.tar --config_path path/to/config.json --out_path output/path/speech.wav $ tts --text "Text for TTS" --model_path path/to/model.pth --config_path path/to/config.json --out_path output/path/speech.wav
``` ```
- Run your own TTS and Vocoder models: - Run your own TTS and Vocoder models:
``` ```
$ tts --text "Text for TTS" --model_path path/to/config.json --config_path path/to/model.pth.tar --out_path output/path/speech.wav $ tts --text "Text for TTS" --model_path path/to/config.json --config_path path/to/model.pth --out_path output/path/speech.wav
--vocoder_path path/to/vocoder.pth.tar --vocoder_config_path path/to/vocoder_config.json --vocoder_path path/to/vocoder.pth --vocoder_config_path path/to/vocoder_config.json
``` ```
### Multi-speaker Models ### Multi-speaker Models
@ -86,7 +86,7 @@ If you don't specify any models, then it uses LJSpeech based English model.
- Run your own multi-speaker TTS model: - Run your own multi-speaker TTS model:
``` ```
$ tts --text "Text for TTS" --out_path output/path/speech.wav --model_path path/to/config.json --config_path path/to/model.pth.tar --speakers_file_path path/to/speaker.json --speaker_idx <speaker_id> $ tts --text "Text for TTS" --out_path output/path/speech.wav --model_path path/to/config.json --config_path path/to/model.pth --speakers_file_path path/to/speaker.json --speaker_idx <speaker_id>
``` ```
""" """
# We remove Markdown code formatting programmatically here to allow us to copy-and-paste from main README to keep # We remove Markdown code formatting programmatically here to allow us to copy-and-paste from main README to keep

View File

@ -14,5 +14,5 @@ To run the code, you need to follow the same flow as in TTS.
- Define 'config.json' for your needs. Note that, audio parameters should match your TTS model. - Define 'config.json' for your needs. Note that, audio parameters should match your TTS model.
- Example training call ```python speaker_encoder/train.py --config_path speaker_encoder/config.json --data_path ~/Data/Libri-TTS/train-clean-360``` - Example training call ```python speaker_encoder/train.py --config_path speaker_encoder/config.json --data_path ~/Data/Libri-TTS/train-clean-360```
- Generate embedding vectors ```python speaker_encoder/compute_embeddings.py --use_cuda true /model/path/best_model.pth.tar model/config/path/config.json dataset/path/ output_path``` . This code parses all .wav files at the given dataset path and generates the same folder structure under the output path with the generated embedding files. - Generate embedding vectors ```python speaker_encoder/compute_embeddings.py --use_cuda true /model/path/best_model.pth model/config/path/config.json dataset/path/ output_path``` . This code parses all .wav files at the given dataset path and generates the same folder structure under the output path with the generated embedding files.
- Watch training on Tensorboard as in TTS - Watch training on Tensorboard as in TTS

View File

@ -147,7 +147,7 @@ def setup_speaker_encoder_model(config: "Coqpit"):
def save_checkpoint(model, optimizer, criterion, model_loss, out_path, current_step, epoch): def save_checkpoint(model, optimizer, criterion, model_loss, out_path, current_step, epoch):
checkpoint_path = "checkpoint_{}.pth.tar".format(current_step) checkpoint_path = "checkpoint_{}.pth".format(current_step)
checkpoint_path = os.path.join(out_path, checkpoint_path) checkpoint_path = os.path.join(out_path, checkpoint_path)
print(" | | > Checkpoint saving : {}".format(checkpoint_path)) print(" | | > Checkpoint saving : {}".format(checkpoint_path))
@ -177,7 +177,7 @@ def save_best_model(model, optimizer, criterion, model_loss, best_loss, out_path
"date": datetime.date.today().strftime("%B %d, %Y"), "date": datetime.date.today().strftime("%B %d, %Y"),
} }
best_loss = model_loss best_loss = model_loss
bestmodel_path = "best_model.pth.tar" bestmodel_path = "best_model.pth"
bestmodel_path = os.path.join(out_path, bestmodel_path) bestmodel_path = os.path.join(out_path, bestmodel_path)
print("\n > BEST MODEL ({0:.5f}) : {1:}".format(model_loss, bestmodel_path)) print("\n > BEST MODEL ({0:.5f}) : {1:}".format(model_loss, bestmodel_path))
save_fsspec(state, bestmodel_path) save_fsspec(state, bestmodel_path)

View File

@ -5,7 +5,7 @@ from TTS.utils.io import save_fsspec
def save_checkpoint(model, optimizer, model_loss, out_path, current_step): def save_checkpoint(model, optimizer, model_loss, out_path, current_step):
checkpoint_path = "checkpoint_{}.pth.tar".format(current_step) checkpoint_path = "checkpoint_{}.pth".format(current_step)
checkpoint_path = os.path.join(out_path, checkpoint_path) checkpoint_path = os.path.join(out_path, checkpoint_path)
print(" | | > Checkpoint saving : {}".format(checkpoint_path)) print(" | | > Checkpoint saving : {}".format(checkpoint_path))
@ -31,7 +31,7 @@ def save_best_model(model, optimizer, model_loss, best_loss, out_path, current_s
"date": datetime.date.today().strftime("%B %d, %Y"), "date": datetime.date.today().strftime("%B %d, %Y"),
} }
best_loss = model_loss best_loss = model_loss
bestmodel_path = "best_model.pth.tar" bestmodel_path = "best_model.pth"
bestmodel_path = os.path.join(out_path, bestmodel_path) bestmodel_path = os.path.join(out_path, bestmodel_path)
print("\n > BEST MODEL ({0:.5f}) : {1:}".format(model_loss, bestmodel_path)) print("\n > BEST MODEL ({0:.5f}) : {1:}".format(model_loss, bestmodel_path))
save_fsspec(state, bestmodel_path) save_fsspec(state, bestmodel_path)

View File

@ -21,4 +21,4 @@ Run the server with the official models on a GPU.
```CUDA_VISIBLE_DEVICES="0" python TTS/server/server.py --model_name tts_models/en/ljspeech/tacotron2-DCA --vocoder_name vocoder_models/en/ljspeech/multiband-melgan --use_cuda True``` ```CUDA_VISIBLE_DEVICES="0" python TTS/server/server.py --model_name tts_models/en/ljspeech/tacotron2-DCA --vocoder_name vocoder_models/en/ljspeech/multiband-melgan --use_cuda True```
Run the server with a custom models. Run the server with a custom models.
```python TTS/server/server.py --tts_checkpoint /path/to/tts/model.pth.tar --tts_config /path/to/tts/config.json --vocoder_checkpoint /path/to/vocoder/model.pth.tar --vocoder_config /path/to/vocoder/config.json``` ```python TTS/server/server.py --tts_checkpoint /path/to/tts/model.pth --tts_config /path/to/tts/config.json --vocoder_checkpoint /path/to/vocoder/model.pth --vocoder_config /path/to/vocoder/config.json```

View File

@ -1,6 +1,6 @@
{ {
"tts_path":"/media/erogol/data_ssd/Models/libri_tts/5049/", // tts model root folder "tts_path":"/media/erogol/data_ssd/Models/libri_tts/5049/", // tts model root folder
"tts_file":"best_model.pth.tar", // tts checkpoint file "tts_file":"best_model.pth", // tts checkpoint file
"tts_config":"config.json", // tts config.json file "tts_config":"config.json", // tts config.json file
"tts_speakers": null, // json file listing speaker ids. null if no speaker embedding. "tts_speakers": null, // json file listing speaker ids. null if no speaker embedding.
"vocoder_config":null, "vocoder_config":null,

View File

@ -371,7 +371,7 @@ class AudioProcessor(object):
self.hop_length = hop_length self.hop_length = hop_length
self.win_length = win_length self.win_length = win_length
assert min_level_db != 0.0, " [!] min_level_db is 0" assert min_level_db != 0.0, " [!] min_level_db is 0"
assert self.win_length <= self.fft_size, " [!] win_length cannot be larger than fft_size" assert self.win_length <= self.fft_size, f" [!] win_length cannot be larger than fft_size - {self.win_length} vs {self.fft_size}"
members = vars(self) members = vars(self)
if verbose: if verbose:
print(" > Setting up Audio Processor...") print(" > Setting up Audio Processor...")

View File

@ -67,7 +67,7 @@ def get_experiment_folder_path(root_path, model_name):
def remove_experiment_folder(experiment_path): def remove_experiment_folder(experiment_path):
"""Check folder if there is a checkpoint, otherwise remove the folder""" """Check folder if there is a checkpoint, otherwise remove the folder"""
fs = fsspec.get_mapper(experiment_path).fs fs = fsspec.get_mapper(experiment_path).fs
checkpoint_files = fs.glob(experiment_path + "/*.pth.tar") checkpoint_files = fs.glob(experiment_path + "/*.pth")
if not checkpoint_files: if not checkpoint_files:
if fs.exists(experiment_path): if fs.exists(experiment_path):
fs.rm(experiment_path, recursive=True) fs.rm(experiment_path, recursive=True)

View File

@ -140,7 +140,7 @@ def save_checkpoint(
output_folder, output_folder,
**kwargs, **kwargs,
): ):
file_name = "checkpoint_{}.pth.tar".format(current_step) file_name = "checkpoint_{}.pth".format(current_step)
checkpoint_path = os.path.join(output_folder, file_name) checkpoint_path = os.path.join(output_folder, file_name)
print("\n > CHECKPOINT : {}".format(checkpoint_path)) print("\n > CHECKPOINT : {}".format(checkpoint_path))
save_model( save_model(
@ -170,7 +170,7 @@ def save_best_model(
**kwargs, **kwargs,
): ):
if current_loss < best_loss: if current_loss < best_loss:
best_model_name = f"best_model_{current_step}.pth.tar" best_model_name = f"best_model_{current_step}.pth"
checkpoint_path = os.path.join(out_path, best_model_name) checkpoint_path = os.path.join(out_path, best_model_name)
print(" > BEST MODEL : {}".format(checkpoint_path)) print(" > BEST MODEL : {}".format(checkpoint_path))
save_model( save_model(
@ -187,12 +187,12 @@ def save_best_model(
fs = fsspec.get_mapper(out_path).fs fs = fsspec.get_mapper(out_path).fs
# only delete previous if current is saved successfully # only delete previous if current is saved successfully
if not keep_all_best or (current_step < keep_after): if not keep_all_best or (current_step < keep_after):
model_names = fs.glob(os.path.join(out_path, "best_model*.pth.tar")) model_names = fs.glob(os.path.join(out_path, "best_model*.pth"))
for model_name in model_names: for model_name in model_names:
if os.path.basename(model_name) != best_model_name: if os.path.basename(model_name) != best_model_name:
fs.rm(model_name) fs.rm(model_name)
# create a shortcut which always points to the currently best model # create a shortcut which always points to the currently best model
shortcut_name = "best_model.pth.tar" shortcut_name = "best_model.pth"
shortcut_path = os.path.join(out_path, shortcut_name) shortcut_path = os.path.join(out_path, shortcut_name)
fs.copy(checkpoint_path, shortcut_path) fs.copy(checkpoint_path, shortcut_path)
best_loss = current_loss best_loss = current_loss

View File

@ -3,6 +3,7 @@ import json
import os import os
import zipfile import zipfile
from pathlib import Path from pathlib import Path
from typing import Tuple
from shutil import copyfile, rmtree from shutil import copyfile, rmtree
import requests import requests
@ -114,7 +115,7 @@ class ModelManager(object):
e.g. 'tts_model/en/ljspeech/tacotron' e.g. 'tts_model/en/ljspeech/tacotron'
Every model must have the following files: Every model must have the following files:
- *.pth.tar : pytorch model checkpoint file. - *.pth : pytorch model checkpoint file.
- config.json : model config file. - config.json : model config file.
- scale_stats.npy (if exist): scale values for preprocessing. - scale_stats.npy (if exist): scale values for preprocessing.
@ -127,7 +128,7 @@ class ModelManager(object):
model_item = self.models_dict[model_type][lang][dataset][model] model_item = self.models_dict[model_type][lang][dataset][model]
# set the model specific output path # set the model specific output path
output_path = os.path.join(self.output_prefix, model_full_name) output_path = os.path.join(self.output_prefix, model_full_name)
output_model_path = os.path.join(output_path, "model_file.pth.tar") output_model_path = os.path.join(output_path, "model_file.pth")
output_config_path = os.path.join(output_path, "config.json") output_config_path = os.path.join(output_path, "config.json")
if os.path.exists(output_path): if os.path.exists(output_path):
@ -139,8 +140,32 @@ class ModelManager(object):
self._download_zip_file(model_item["github_rls_url"], output_path) self._download_zip_file(model_item["github_rls_url"], output_path)
# update paths in the config.json # update paths in the config.json
self._update_paths(output_path, output_config_path) self._update_paths(output_path, output_config_path)
# find downloaded files
output_model_path, output_config_path = self._find_files(output_path)
return output_model_path, output_config_path, model_item return output_model_path, output_config_path, model_item
def _find_files(self, output_path:str) -> Tuple[str, str]:
"""Find the model and config files in the output path
Args:
output_path (str): path to the model files
Returns:
Tuple[str, str]: path to the model file and config file
"""
model_file = None
config_file = None
for file_name in os.listdir(output_path):
if file_name in ["model_file.pth", "model_file.pth.tar", "model.pth"]:
model_file = os.path.join(output_path, file_name)
elif file_name == "config.json":
config_file = os.path.join(output_path, file_name)
if model_file is None:
raise ValueError(" [!] Model file not found in the output path")
if config_file is None:
raise ValueError(" [!] Config file not found in the output path")
return model_file, config_file
def _update_paths(self, output_path: str, config_path: str) -> None: def _update_paths(self, output_path: str, config_path: str) -> None:
"""Update paths for certain files in config.json after download. """Update paths for certain files in config.json after download.
@ -152,7 +177,7 @@ class ModelManager(object):
output_d_vector_file_path = os.path.join(output_path, "speakers.json") output_d_vector_file_path = os.path.join(output_path, "speakers.json")
output_speaker_ids_file_path = os.path.join(output_path, "speaker_ids.json") output_speaker_ids_file_path = os.path.join(output_path, "speaker_ids.json")
speaker_encoder_config_path = os.path.join(output_path, "config_se.json") speaker_encoder_config_path = os.path.join(output_path, "config_se.json")
speaker_encoder_model_path = os.path.join(output_path, "model_se.pth.tar") speaker_encoder_model_path = os.path.join(output_path, "model_se.pth")
# update the scale_path.npy file path in the model config.json # update the scale_path.npy file path in the model config.json
self._update_path("audio.stats_path", output_stats_path, config_path) self._update_path("audio.stats_path", output_stats_path, config_path)

View File

@ -29,7 +29,7 @@ You can continue a previous training run by the following command.
You can fine-tune a pre-trained model by the following command. You can fine-tune a pre-trained model by the following command.
```CUDA_VISIBLE_DEVICES='0' python tts/bin/train_vocoder.py --restore_path path/to/your/model.pth.tar``` ```CUDA_VISIBLE_DEVICES='0' python tts/bin/train_vocoder.py --restore_path path/to/your/model.pth```
Restoring a model starts a new training in a different folder. It only restores model weights with the given checkpoint file. However, continuing a training starts from the same directory where the previous training run left off. Restoring a model starts a new training in a different folder. It only restores model weights with the given checkpoint file. However, continuing a training starts from the same directory where the previous training run left off.

View File

@ -93,13 +93,13 @@ them and fine-tune it for your own dataset. This will help you in two main ways:
```bash ```bash
CUDA_VISIBLE_DEVICES="0" python recipes/ljspeech/glow_tts/train_glowtts.py \ CUDA_VISIBLE_DEVICES="0" python recipes/ljspeech/glow_tts/train_glowtts.py \
--restore_path /home/ubuntu/.local/share/tts/tts_models--en--ljspeech--glow-tts/model_file.pth.tar --restore_path /home/ubuntu/.local/share/tts/tts_models--en--ljspeech--glow-tts/model_file.pth
``` ```
```bash ```bash
CUDA_VISIBLE_DEVICES="0" python TTS/bin/train_tts.py \ CUDA_VISIBLE_DEVICES="0" python TTS/bin/train_tts.py \
--config_path /home/ubuntu/.local/share/tts/tts_models--en--ljspeech--glow-tts/config.json \ --config_path /home/ubuntu/.local/share/tts/tts_models--en--ljspeech--glow-tts/config.json \
--restore_path /home/ubuntu/.local/share/tts/tts_models--en--ljspeech--glow-tts/model_file.pth.tar --restore_path /home/ubuntu/.local/share/tts/tts_models--en--ljspeech--glow-tts/model_file.pth
``` ```
As stated above, you can also use command-line arguments to change the model configuration. As stated above, you can also use command-line arguments to change the model configuration.
@ -107,7 +107,7 @@ them and fine-tune it for your own dataset. This will help you in two main ways:
```bash ```bash
CUDA_VISIBLE_DEVICES="0" python recipes/ljspeech/glow_tts/train_glowtts.py \ CUDA_VISIBLE_DEVICES="0" python recipes/ljspeech/glow_tts/train_glowtts.py \
--restore_path /home/ubuntu/.local/share/tts/tts_models--en--ljspeech--glow-tts/model_file.pth.tar --restore_path /home/ubuntu/.local/share/tts/tts_models--en--ljspeech--glow-tts/model_file.pth
--coqpit.run_name "glow-tts-finetune" \ --coqpit.run_name "glow-tts-finetune" \
--coqpit.lr 0.00001 --coqpit.lr 0.00001
``` ```

View File

@ -44,7 +44,7 @@ Run your own TTS model (Using Griffin-Lim Vocoder)
```bash ```bash
tts --text "Text for TTS" \ tts --text "Text for TTS" \
--model_path path/to/model.pth.tar \ --model_path path/to/model.pth \
--config_path path/to/config.json \ --config_path path/to/config.json \
--out_path folder/to/save/output.wav --out_path folder/to/save/output.wav
``` ```
@ -54,9 +54,9 @@ Run your own TTS and Vocoder models
```bash ```bash
tts --text "Text for TTS" \ tts --text "Text for TTS" \
--config_path path/to/config.json \ --config_path path/to/config.json \
--model_path path/to/model.pth.tar \ --model_path path/to/model.pth \
--out_path folder/to/save/output.wav \ --out_path folder/to/save/output.wav \
--vocoder_path path/to/vocoder.pth.tar \ --vocoder_path path/to/vocoder.pth \
--vocoder_config_path path/to/vocoder_config.json --vocoder_config_path path/to/vocoder_config.json
``` ```

View File

@ -33,7 +33,7 @@
If you like to run a multi-gpu training using DDP back-end, If you like to run a multi-gpu training using DDP back-end,
```bash ```bash
$ CUDA_VISIBLE_DEVICES="0, 1, 2" python TTS/bin/distribute.py --script <path_to_your_script>/train_glowtts.py $ CUDA_VISIBLE_DEVICES="0, 1, 2" python -m trainer.distribute --script <path_to_your_script>/train_glowtts.py
``` ```
The example above runs a multi-gpu training using GPUs `0, 1, 2`. The example above runs a multi-gpu training using GPUs `0, 1, 2`.
@ -122,7 +122,7 @@
```bash ```bash
$ tts --text "Text for TTS" \ $ tts --text "Text for TTS" \
--model_path path/to/checkpoint_x.pth.tar \ --model_path path/to/checkpoint_x.pth \
--config_path path/to/config.json \ --config_path path/to/config.json \
--out_path folder/to/save/output.wav --out_path folder/to/save/output.wav
``` ```

View File

@ -50,13 +50,13 @@ A breakdown of a simple script that trains a GlowTTS model on the LJspeech datas
- Fine-tune a model. - Fine-tune a model.
```bash ```bash
CUDA_VISIBLE_DEVICES=0 python train.py --restore_path path/to/model/checkpoint.pth.tar CUDA_VISIBLE_DEVICES=0 python train.py --restore_path path/to/model/checkpoint.pth
``` ```
- Run multi-gpu training. - Run multi-gpu training.
```bash ```bash
CUDA_VISIBLE_DEVICES=0,1,2 python TTS/bin/distribute.py --script train.py CUDA_VISIBLE_DEVICES=0,1,2 python -m trainer.distribute --script train.py
``` ```
### CLI Way ### CLI Way

View File

@ -66,7 +66,7 @@
"DATASET = \"ljspeech\"\n", "DATASET = \"ljspeech\"\n",
"METADATA_FILE = \"metadata.csv\"\n", "METADATA_FILE = \"metadata.csv\"\n",
"CONFIG_PATH = \"/home/ubuntu/.local/share/tts/tts_models--en--ljspeech--tacotron2-DDC_ph/config.json\"\n", "CONFIG_PATH = \"/home/ubuntu/.local/share/tts/tts_models--en--ljspeech--tacotron2-DDC_ph/config.json\"\n",
"MODEL_FILE = \"/home/ubuntu/.local/share/tts/tts_models--en--ljspeech--tacotron2-DDC_ph/model_file.pth.tar\"\n", "MODEL_FILE = \"/home/ubuntu/.local/share/tts/tts_models--en--ljspeech--tacotron2-DDC_ph/model_file.pth\"\n",
"BATCH_SIZE = 32\n", "BATCH_SIZE = 32\n",
"\n", "\n",
"QUANTIZED_WAV = False\n", "QUANTIZED_WAV = False\n",

View File

@ -66,7 +66,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"MODEL_RUN_PATH = \"/media/erogol/data_ssd/Models/libri_tts/speaker_encoder/libritts_360-half-October-31-2019_04+54PM-19d2f5f/\"\n", "MODEL_RUN_PATH = \"/media/erogol/data_ssd/Models/libri_tts/speaker_encoder/libritts_360-half-October-31-2019_04+54PM-19d2f5f/\"\n",
"MODEL_PATH = MODEL_RUN_PATH + \"best_model.pth.tar\"\n", "MODEL_PATH = MODEL_RUN_PATH + \"best_model.pth\"\n",
"CONFIG_PATH = MODEL_RUN_PATH + \"config.json\"\n", "CONFIG_PATH = MODEL_RUN_PATH + \"config.json\"\n",
"\n", "\n",
"# My single speaker locations\n", "# My single speaker locations\n",

View File

@ -73,7 +73,7 @@
"\n", "\n",
"# Set constants\n", "# Set constants\n",
"ROOT_PATH = '/home/erogol/Models/LJSpeech/ljspeech-May-20-2020_12+29PM-1835628/'\n", "ROOT_PATH = '/home/erogol/Models/LJSpeech/ljspeech-May-20-2020_12+29PM-1835628/'\n",
"MODEL_PATH = ROOT_PATH + '/best_model.pth.tar'\n", "MODEL_PATH = ROOT_PATH + '/best_model.pth'\n",
"CONFIG_PATH = ROOT_PATH + '/config.json'\n", "CONFIG_PATH = ROOT_PATH + '/config.json'\n",
"OUT_FOLDER = './hard_sentences/'\n", "OUT_FOLDER = './hard_sentences/'\n",
"CONFIG = load_config(CONFIG_PATH)\n", "CONFIG = load_config(CONFIG_PATH)\n",

View File

@ -416,7 +416,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.9.1" "version": "3.9.5"
} }
}, },
"nbformat": 4, "nbformat": 4,

View File

@ -15,7 +15,7 @@ class TestExtractTTSSpectrograms(unittest.TestCase):
def test_GlowTTS(): def test_GlowTTS():
# set paths # set paths
config_path = os.path.join(get_tests_input_path(), "test_glow_tts.json") config_path = os.path.join(get_tests_input_path(), "test_glow_tts.json")
checkpoint_path = os.path.join(get_tests_output_path(), "checkpoint_test.pth.tar") checkpoint_path = os.path.join(get_tests_output_path(), "checkpoint_test.pth")
output_path = os.path.join(get_tests_output_path(), "output_extract_tts_spectrograms/") output_path = os.path.join(get_tests_output_path(), "output_extract_tts_spectrograms/")
# load config # load config
c = load_config(config_path) c = load_config(config_path)
@ -33,7 +33,7 @@ class TestExtractTTSSpectrograms(unittest.TestCase):
def test_Tacotron2(): def test_Tacotron2():
# set paths # set paths
config_path = os.path.join(get_tests_input_path(), "test_tacotron2_config.json") config_path = os.path.join(get_tests_input_path(), "test_tacotron2_config.json")
checkpoint_path = os.path.join(get_tests_output_path(), "checkpoint_test.pth.tar") checkpoint_path = os.path.join(get_tests_output_path(), "checkpoint_test.pth")
output_path = os.path.join(get_tests_output_path(), "output_extract_tts_spectrograms/") output_path = os.path.join(get_tests_output_path(), "output_extract_tts_spectrograms/")
# load config # load config
c = load_config(config_path) c = load_config(config_path)
@ -51,7 +51,7 @@ class TestExtractTTSSpectrograms(unittest.TestCase):
def test_Tacotron(): def test_Tacotron():
# set paths # set paths
config_path = os.path.join(get_tests_input_path(), "test_tacotron_config.json") config_path = os.path.join(get_tests_input_path(), "test_tacotron_config.json")
checkpoint_path = os.path.join(get_tests_output_path(), "checkpoint_test.pth.tar") checkpoint_path = os.path.join(get_tests_output_path(), "checkpoint_test.pth")
output_path = os.path.join(get_tests_output_path(), "output_extract_tts_spectrograms/") output_path = os.path.join(get_tests_output_path(), "output_extract_tts_spectrograms/")
# load config # load config
c = load_config(config_path) c = load_config(config_path)

View File

@ -12,7 +12,7 @@ from TTS.tts.utils.speakers import SpeakerManager
from TTS.utils.audio import AudioProcessor from TTS.utils.audio import AudioProcessor
encoder_config_path = os.path.join(get_tests_input_path(), "test_speaker_encoder_config.json") encoder_config_path = os.path.join(get_tests_input_path(), "test_speaker_encoder_config.json")
encoder_model_path = os.path.join(get_tests_input_path(), "checkpoint_0.pth.tar") encoder_model_path = os.path.join(get_tests_input_path(), "checkpoint_0.pth")
sample_wav_path = os.path.join(get_tests_input_path(), "../data/ljspeech/wavs/LJ001-0001.wav") sample_wav_path = os.path.join(get_tests_input_path(), "../data/ljspeech/wavs/LJ001-0001.wav")
sample_wav_path2 = os.path.join(get_tests_input_path(), "../data/ljspeech/wavs/LJ001-0002.wav") sample_wav_path2 = os.path.join(get_tests_input_path(), "../data/ljspeech/wavs/LJ001-0002.wav")
d_vectors_file_path = os.path.join(get_tests_input_path(), "../data/dummy_speakers.json") d_vectors_file_path = os.path.join(get_tests_input_path(), "../data/dummy_speakers.json")

View File

@ -20,7 +20,7 @@ class SynthesizerTest(unittest.TestCase):
def test_in_out(self): def test_in_out(self):
self._create_random_model() self._create_random_model()
tts_root_path = get_tests_output_path() tts_root_path = get_tests_output_path()
tts_checkpoint = os.path.join(tts_root_path, "checkpoint_10.pth.tar") tts_checkpoint = os.path.join(tts_root_path, "checkpoint_10.pth")
tts_config = os.path.join(tts_root_path, "dummy_model_config.json") tts_config = os.path.join(tts_root_path, "dummy_model_config.json")
synthesizer = Synthesizer(tts_checkpoint, tts_config, None, None) synthesizer = Synthesizer(tts_checkpoint, tts_config, None, None)
synthesizer.tts("Better this test works!!") synthesizer.tts("Better this test works!!")

View File

@ -1,5 +1,5 @@
{ {
"tts_checkpoint":"checkpoint_10.pth.tar", // tts checkpoint file "tts_checkpoint":"checkpoint_10.pth", // tts checkpoint file
"tts_config":"dummy_model_config.json", // tts config.json file "tts_config":"dummy_model_config.json", // tts config.json file
"tts_speakers": null, // json file listing speaker ids. null if no speaker embedding. "tts_speakers": null, // json file listing speaker ids. null if no speaker embedding.
"wavernn_lib_path": null, // Rootpath to wavernn project folder to be imported. If this is null, model uses GL for speech synthesis. "wavernn_lib_path": null, // Rootpath to wavernn project folder to be imported. If this is null, model uses GL for speech synthesis.