diff --git a/.gitignore b/.gitignore index f8d6e644..2a3cbad4 100644 --- a/.gitignore +++ b/.gitignore @@ -115,6 +115,7 @@ venv.bak/ *.swo # pytorch models +*.pth *.pth.tar result/ diff --git a/README.md b/README.md index 80fa5dea..97a7cc66 100644 --- a/README.md +++ b/README.md @@ -159,13 +159,13 @@ If you are on Windows, 👑@GuyPaddock wrote installation instructions [here](ht - Run your own TTS model (Using Griffin-Lim Vocoder): ``` - $ tts --text "Text for TTS" --model_path path/to/model.pth.tar --config_path path/to/config.json --out_path output/path/speech.wav + $ tts --text "Text for TTS" --model_path path/to/model.pth --config_path path/to/config.json --out_path output/path/speech.wav ``` - Run your own TTS and Vocoder models: ``` - $ tts --text "Text for TTS" --model_path path/to/config.json --config_path path/to/model.pth.tar --out_path output/path/speech.wav - --vocoder_path path/to/vocoder.pth.tar --vocoder_config_path path/to/vocoder_config.json + $ tts --text "Text for TTS" --model_path path/to/config.json --config_path path/to/model.pth --out_path output/path/speech.wav + --vocoder_path path/to/vocoder.pth --vocoder_config_path path/to/vocoder_config.json ``` ### Multi-speaker Models @@ -185,7 +185,7 @@ If you are on Windows, 👑@GuyPaddock wrote installation instructions [here](ht - Run your own multi-speaker TTS model: ``` - $ tts --text "Text for TTS" --out_path output/path/speech.wav --model_path path/to/config.json --config_path path/to/model.pth.tar --speakers_file_path path/to/speaker.json --speaker_idx + $ tts --text "Text for TTS" --out_path output/path/speech.wav --model_path path/to/config.json --config_path path/to/model.pth --speakers_file_path path/to/speaker.json --speaker_idx ``` ## Directory Structure diff --git a/TTS/bin/compute_attention_masks.py b/TTS/bin/compute_attention_masks.py index e58259a6..9ab520be 100644 --- a/TTS/bin/compute_attention_masks.py +++ b/TTS/bin/compute_attention_masks.py @@ -25,7 +25,7 @@ These masks can be used for different purposes including training a TTS model wi """ Example run: CUDA_VISIBLE_DEVICE="0" python TTS/bin/compute_attention_masks.py - --model_path /data/rw/home/Models/ljspeech-dcattn-December-14-2020_11+10AM-9d0e8c7/checkpoint_200000.pth.tar + --model_path /data/rw/home/Models/ljspeech-dcattn-December-14-2020_11+10AM-9d0e8c7/checkpoint_200000.pth --config_path /data/rw/home/Models/ljspeech-dcattn-December-14-2020_11+10AM-9d0e8c7/config.json --dataset_metafile metadata.csv --data_path /root/LJSpeech-1.1/ diff --git a/TTS/bin/compute_embeddings.py b/TTS/bin/compute_embeddings.py index 68571fb4..d7a2c5f6 100644 --- a/TTS/bin/compute_embeddings.py +++ b/TTS/bin/compute_embeddings.py @@ -12,7 +12,7 @@ parser = argparse.ArgumentParser( description="""Compute embedding vectors for each wav file in a dataset.\n\n""" """ Example runs: - python TTS/bin/compute_embeddings.py speaker_encoder_model.pth.tar speaker_encoder_config.json dataset_config.json embeddings_output_path/ + python TTS/bin/compute_embeddings.py speaker_encoder_model.pth speaker_encoder_config.json dataset_config.json embeddings_output_path/ """, formatter_class=RawTextHelpFormatter, ) diff --git a/TTS/bin/distribute.py b/TTS/bin/distribute.py deleted file mode 100644 index b5552e32..00000000 --- a/TTS/bin/distribute.py +++ /dev/null @@ -1,55 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -import os -import pathlib -import subprocess -import time - -import torch -from trainer import TrainerArgs - - -def main(): - """ - Call train.py as a new process and pass command arguments - """ - parser = TrainerArgs().init_argparse(arg_prefix="") - parser.add_argument("--script", type=str, help="Target training script to distibute.") - args, unargs = parser.parse_known_args() - - num_gpus = torch.cuda.device_count() - group_id = time.strftime("%Y_%m_%d-%H%M%S") - - # set arguments for train.py - folder_path = pathlib.Path(__file__).parent.absolute() - if os.path.exists(os.path.join(folder_path, args.script)): - command = [os.path.join(folder_path, args.script)] - else: - command = [args.script] - command.append("--continue_path={}".format(args.continue_path)) - command.append("--restore_path={}".format(args.restore_path)) - command.append("--config_path={}".format(args.config_path)) - command.append("--group_id=group_{}".format(group_id)) - command.append("--use_ddp=true") - command += unargs - command.append("") - - # run a processes per GPU - processes = [] - for i in range(num_gpus): - my_env = os.environ.copy() - my_env["PYTHON_EGG_CACHE"] = "/tmp/tmp{}".format(i) - command[-1] = "--rank={}".format(i) - # prevent stdout for processes with rank != 0 - stdout = None - p = subprocess.Popen(["python3"] + command, stdout=stdout, env=my_env) # pylint: disable=consider-using-with - processes.append(p) - print(command) - - for p in processes: - p.wait() - - -if __name__ == "__main__": - main() diff --git a/TTS/bin/eval_encoder.py b/TTS/bin/eval_encoder.py index de9e5865..089f3645 100644 --- a/TTS/bin/eval_encoder.py +++ b/TTS/bin/eval_encoder.py @@ -56,7 +56,7 @@ if __name__ == "__main__": description="""Compute the accuracy of the encoder.\n\n""" """ Example runs: - python TTS/bin/eval_encoder.py emotion_encoder_model.pth.tar emotion_encoder_config.json dataset_config.json + python TTS/bin/eval_encoder.py emotion_encoder_model.pth emotion_encoder_config.json dataset_config.json """, formatter_class=RawTextHelpFormatter, ) diff --git a/TTS/bin/synthesize.py b/TTS/bin/synthesize.py index 8b3f53db..eb166bc8 100755 --- a/TTS/bin/synthesize.py +++ b/TTS/bin/synthesize.py @@ -60,13 +60,13 @@ If you don't specify any models, then it uses LJSpeech based English model. - Run your own TTS model (Using Griffin-Lim Vocoder): ``` - $ tts --text "Text for TTS" --model_path path/to/model.pth.tar --config_path path/to/config.json --out_path output/path/speech.wav + $ tts --text "Text for TTS" --model_path path/to/model.pth --config_path path/to/config.json --out_path output/path/speech.wav ``` - Run your own TTS and Vocoder models: ``` - $ tts --text "Text for TTS" --model_path path/to/config.json --config_path path/to/model.pth.tar --out_path output/path/speech.wav - --vocoder_path path/to/vocoder.pth.tar --vocoder_config_path path/to/vocoder_config.json + $ tts --text "Text for TTS" --model_path path/to/config.json --config_path path/to/model.pth --out_path output/path/speech.wav + --vocoder_path path/to/vocoder.pth --vocoder_config_path path/to/vocoder_config.json ``` ### Multi-speaker Models @@ -86,7 +86,7 @@ If you don't specify any models, then it uses LJSpeech based English model. - Run your own multi-speaker TTS model: ``` - $ tts --text "Text for TTS" --out_path output/path/speech.wav --model_path path/to/config.json --config_path path/to/model.pth.tar --speakers_file_path path/to/speaker.json --speaker_idx + $ tts --text "Text for TTS" --out_path output/path/speech.wav --model_path path/to/config.json --config_path path/to/model.pth --speakers_file_path path/to/speaker.json --speaker_idx ``` """ # We remove Markdown code formatting programmatically here to allow us to copy-and-paste from main README to keep diff --git a/TTS/encoder/README.md b/TTS/encoder/README.md index b6f541f8..b38b2005 100644 --- a/TTS/encoder/README.md +++ b/TTS/encoder/README.md @@ -14,5 +14,5 @@ To run the code, you need to follow the same flow as in TTS. - Define 'config.json' for your needs. Note that, audio parameters should match your TTS model. - Example training call ```python speaker_encoder/train.py --config_path speaker_encoder/config.json --data_path ~/Data/Libri-TTS/train-clean-360``` -- Generate embedding vectors ```python speaker_encoder/compute_embeddings.py --use_cuda true /model/path/best_model.pth.tar model/config/path/config.json dataset/path/ output_path``` . This code parses all .wav files at the given dataset path and generates the same folder structure under the output path with the generated embedding files. +- Generate embedding vectors ```python speaker_encoder/compute_embeddings.py --use_cuda true /model/path/best_model.pth model/config/path/config.json dataset/path/ output_path``` . This code parses all .wav files at the given dataset path and generates the same folder structure under the output path with the generated embedding files. - Watch training on Tensorboard as in TTS diff --git a/TTS/encoder/utils/generic_utils.py b/TTS/encoder/utils/generic_utils.py index 17f1c3d9..19c00582 100644 --- a/TTS/encoder/utils/generic_utils.py +++ b/TTS/encoder/utils/generic_utils.py @@ -147,7 +147,7 @@ def setup_speaker_encoder_model(config: "Coqpit"): def save_checkpoint(model, optimizer, criterion, model_loss, out_path, current_step, epoch): - checkpoint_path = "checkpoint_{}.pth.tar".format(current_step) + checkpoint_path = "checkpoint_{}.pth".format(current_step) checkpoint_path = os.path.join(out_path, checkpoint_path) print(" | | > Checkpoint saving : {}".format(checkpoint_path)) @@ -177,7 +177,7 @@ def save_best_model(model, optimizer, criterion, model_loss, best_loss, out_path "date": datetime.date.today().strftime("%B %d, %Y"), } best_loss = model_loss - bestmodel_path = "best_model.pth.tar" + bestmodel_path = "best_model.pth" bestmodel_path = os.path.join(out_path, bestmodel_path) print("\n > BEST MODEL ({0:.5f}) : {1:}".format(model_loss, bestmodel_path)) save_fsspec(state, bestmodel_path) diff --git a/TTS/encoder/utils/io.py b/TTS/encoder/utils/io.py index 7a3aadc9..d1dad3e2 100644 --- a/TTS/encoder/utils/io.py +++ b/TTS/encoder/utils/io.py @@ -5,7 +5,7 @@ from TTS.utils.io import save_fsspec def save_checkpoint(model, optimizer, model_loss, out_path, current_step): - checkpoint_path = "checkpoint_{}.pth.tar".format(current_step) + checkpoint_path = "checkpoint_{}.pth".format(current_step) checkpoint_path = os.path.join(out_path, checkpoint_path) print(" | | > Checkpoint saving : {}".format(checkpoint_path)) @@ -31,7 +31,7 @@ def save_best_model(model, optimizer, model_loss, best_loss, out_path, current_s "date": datetime.date.today().strftime("%B %d, %Y"), } best_loss = model_loss - bestmodel_path = "best_model.pth.tar" + bestmodel_path = "best_model.pth" bestmodel_path = os.path.join(out_path, bestmodel_path) print("\n > BEST MODEL ({0:.5f}) : {1:}".format(model_loss, bestmodel_path)) save_fsspec(state, bestmodel_path) diff --git a/TTS/server/README.md b/TTS/server/README.md index 89ee21eb..5458e398 100644 --- a/TTS/server/README.md +++ b/TTS/server/README.md @@ -21,4 +21,4 @@ Run the server with the official models on a GPU. ```CUDA_VISIBLE_DEVICES="0" python TTS/server/server.py --model_name tts_models/en/ljspeech/tacotron2-DCA --vocoder_name vocoder_models/en/ljspeech/multiband-melgan --use_cuda True``` Run the server with a custom models. -```python TTS/server/server.py --tts_checkpoint /path/to/tts/model.pth.tar --tts_config /path/to/tts/config.json --vocoder_checkpoint /path/to/vocoder/model.pth.tar --vocoder_config /path/to/vocoder/config.json``` +```python TTS/server/server.py --tts_checkpoint /path/to/tts/model.pth --tts_config /path/to/tts/config.json --vocoder_checkpoint /path/to/vocoder/model.pth --vocoder_config /path/to/vocoder/config.json``` diff --git a/TTS/server/conf.json b/TTS/server/conf.json index 32e475cf..49b6c09c 100644 --- a/TTS/server/conf.json +++ b/TTS/server/conf.json @@ -1,6 +1,6 @@ { "tts_path":"/media/erogol/data_ssd/Models/libri_tts/5049/", // tts model root folder - "tts_file":"best_model.pth.tar", // tts checkpoint file + "tts_file":"best_model.pth", // tts checkpoint file "tts_config":"config.json", // tts config.json file "tts_speakers": null, // json file listing speaker ids. null if no speaker embedding. "vocoder_config":null, diff --git a/TTS/utils/audio.py b/TTS/utils/audio.py index d0777c11..3ed0a76a 100644 --- a/TTS/utils/audio.py +++ b/TTS/utils/audio.py @@ -371,7 +371,7 @@ class AudioProcessor(object): self.hop_length = hop_length self.win_length = win_length assert min_level_db != 0.0, " [!] min_level_db is 0" - assert self.win_length <= self.fft_size, " [!] win_length cannot be larger than fft_size" + assert self.win_length <= self.fft_size, f" [!] win_length cannot be larger than fft_size - {self.win_length} vs {self.fft_size}" members = vars(self) if verbose: print(" > Setting up Audio Processor...") diff --git a/TTS/utils/generic_utils.py b/TTS/utils/generic_utils.py index 69609bcb..b685210c 100644 --- a/TTS/utils/generic_utils.py +++ b/TTS/utils/generic_utils.py @@ -67,7 +67,7 @@ def get_experiment_folder_path(root_path, model_name): def remove_experiment_folder(experiment_path): """Check folder if there is a checkpoint, otherwise remove the folder""" fs = fsspec.get_mapper(experiment_path).fs - checkpoint_files = fs.glob(experiment_path + "/*.pth.tar") + checkpoint_files = fs.glob(experiment_path + "/*.pth") if not checkpoint_files: if fs.exists(experiment_path): fs.rm(experiment_path, recursive=True) diff --git a/TTS/utils/io.py b/TTS/utils/io.py index 54818ce9..304df5ed 100644 --- a/TTS/utils/io.py +++ b/TTS/utils/io.py @@ -140,7 +140,7 @@ def save_checkpoint( output_folder, **kwargs, ): - file_name = "checkpoint_{}.pth.tar".format(current_step) + file_name = "checkpoint_{}.pth".format(current_step) checkpoint_path = os.path.join(output_folder, file_name) print("\n > CHECKPOINT : {}".format(checkpoint_path)) save_model( @@ -170,7 +170,7 @@ def save_best_model( **kwargs, ): if current_loss < best_loss: - best_model_name = f"best_model_{current_step}.pth.tar" + best_model_name = f"best_model_{current_step}.pth" checkpoint_path = os.path.join(out_path, best_model_name) print(" > BEST MODEL : {}".format(checkpoint_path)) save_model( @@ -187,12 +187,12 @@ def save_best_model( fs = fsspec.get_mapper(out_path).fs # only delete previous if current is saved successfully if not keep_all_best or (current_step < keep_after): - model_names = fs.glob(os.path.join(out_path, "best_model*.pth.tar")) + model_names = fs.glob(os.path.join(out_path, "best_model*.pth")) for model_name in model_names: if os.path.basename(model_name) != best_model_name: fs.rm(model_name) # create a shortcut which always points to the currently best model - shortcut_name = "best_model.pth.tar" + shortcut_name = "best_model.pth" shortcut_path = os.path.join(out_path, shortcut_name) fs.copy(checkpoint_path, shortcut_path) best_loss = current_loss diff --git a/TTS/utils/manage.py b/TTS/utils/manage.py index 01d54ad6..dd397687 100644 --- a/TTS/utils/manage.py +++ b/TTS/utils/manage.py @@ -3,6 +3,7 @@ import json import os import zipfile from pathlib import Path +from typing import Tuple from shutil import copyfile, rmtree import requests @@ -114,7 +115,7 @@ class ModelManager(object): e.g. 'tts_model/en/ljspeech/tacotron' Every model must have the following files: - - *.pth.tar : pytorch model checkpoint file. + - *.pth : pytorch model checkpoint file. - config.json : model config file. - scale_stats.npy (if exist): scale values for preprocessing. @@ -127,7 +128,7 @@ class ModelManager(object): model_item = self.models_dict[model_type][lang][dataset][model] # set the model specific output path output_path = os.path.join(self.output_prefix, model_full_name) - output_model_path = os.path.join(output_path, "model_file.pth.tar") + output_model_path = os.path.join(output_path, "model_file.pth") output_config_path = os.path.join(output_path, "config.json") if os.path.exists(output_path): @@ -139,8 +140,32 @@ class ModelManager(object): self._download_zip_file(model_item["github_rls_url"], output_path) # update paths in the config.json self._update_paths(output_path, output_config_path) + # find downloaded files + output_model_path, output_config_path = self._find_files(output_path) return output_model_path, output_config_path, model_item + def _find_files(self, output_path:str) -> Tuple[str, str]: + """Find the model and config files in the output path + + Args: + output_path (str): path to the model files + + Returns: + Tuple[str, str]: path to the model file and config file + """ + model_file = None + config_file = None + for file_name in os.listdir(output_path): + if file_name in ["model_file.pth", "model_file.pth.tar", "model.pth"]: + model_file = os.path.join(output_path, file_name) + elif file_name == "config.json": + config_file = os.path.join(output_path, file_name) + if model_file is None: + raise ValueError(" [!] Model file not found in the output path") + if config_file is None: + raise ValueError(" [!] Config file not found in the output path") + return model_file, config_file + def _update_paths(self, output_path: str, config_path: str) -> None: """Update paths for certain files in config.json after download. @@ -152,7 +177,7 @@ class ModelManager(object): output_d_vector_file_path = os.path.join(output_path, "speakers.json") output_speaker_ids_file_path = os.path.join(output_path, "speaker_ids.json") speaker_encoder_config_path = os.path.join(output_path, "config_se.json") - speaker_encoder_model_path = os.path.join(output_path, "model_se.pth.tar") + speaker_encoder_model_path = os.path.join(output_path, "model_se.pth") # update the scale_path.npy file path in the model config.json self._update_path("audio.stats_path", output_stats_path, config_path) diff --git a/TTS/vocoder/README.md b/TTS/vocoder/README.md index e0ae8f21..b9fb17c8 100644 --- a/TTS/vocoder/README.md +++ b/TTS/vocoder/README.md @@ -29,7 +29,7 @@ You can continue a previous training run by the following command. You can fine-tune a pre-trained model by the following command. -```CUDA_VISIBLE_DEVICES='0' python tts/bin/train_vocoder.py --restore_path path/to/your/model.pth.tar``` +```CUDA_VISIBLE_DEVICES='0' python tts/bin/train_vocoder.py --restore_path path/to/your/model.pth``` Restoring a model starts a new training in a different folder. It only restores model weights with the given checkpoint file. However, continuing a training starts from the same directory where the previous training run left off. diff --git a/docs/source/finetuning.md b/docs/source/finetuning.md index 7d7ef1cb..fd97daa5 100644 --- a/docs/source/finetuning.md +++ b/docs/source/finetuning.md @@ -93,13 +93,13 @@ them and fine-tune it for your own dataset. This will help you in two main ways: ```bash CUDA_VISIBLE_DEVICES="0" python recipes/ljspeech/glow_tts/train_glowtts.py \ - --restore_path /home/ubuntu/.local/share/tts/tts_models--en--ljspeech--glow-tts/model_file.pth.tar + --restore_path /home/ubuntu/.local/share/tts/tts_models--en--ljspeech--glow-tts/model_file.pth ``` ```bash CUDA_VISIBLE_DEVICES="0" python TTS/bin/train_tts.py \ --config_path /home/ubuntu/.local/share/tts/tts_models--en--ljspeech--glow-tts/config.json \ - --restore_path /home/ubuntu/.local/share/tts/tts_models--en--ljspeech--glow-tts/model_file.pth.tar + --restore_path /home/ubuntu/.local/share/tts/tts_models--en--ljspeech--glow-tts/model_file.pth ``` As stated above, you can also use command-line arguments to change the model configuration. @@ -107,7 +107,7 @@ them and fine-tune it for your own dataset. This will help you in two main ways: ```bash CUDA_VISIBLE_DEVICES="0" python recipes/ljspeech/glow_tts/train_glowtts.py \ - --restore_path /home/ubuntu/.local/share/tts/tts_models--en--ljspeech--glow-tts/model_file.pth.tar + --restore_path /home/ubuntu/.local/share/tts/tts_models--en--ljspeech--glow-tts/model_file.pth --coqpit.run_name "glow-tts-finetune" \ --coqpit.lr 0.00001 ``` diff --git a/docs/source/inference.md b/docs/source/inference.md index 544473bf..1057d04d 100644 --- a/docs/source/inference.md +++ b/docs/source/inference.md @@ -44,7 +44,7 @@ Run your own TTS model (Using Griffin-Lim Vocoder) ```bash tts --text "Text for TTS" \ - --model_path path/to/model.pth.tar \ + --model_path path/to/model.pth \ --config_path path/to/config.json \ --out_path folder/to/save/output.wav ``` @@ -54,9 +54,9 @@ Run your own TTS and Vocoder models ```bash tts --text "Text for TTS" \ --config_path path/to/config.json \ - --model_path path/to/model.pth.tar \ + --model_path path/to/model.pth \ --out_path folder/to/save/output.wav \ - --vocoder_path path/to/vocoder.pth.tar \ + --vocoder_path path/to/vocoder.pth \ --vocoder_config_path path/to/vocoder_config.json ``` diff --git a/docs/source/training_a_model.md b/docs/source/training_a_model.md index a28710d0..22090f6e 100644 --- a/docs/source/training_a_model.md +++ b/docs/source/training_a_model.md @@ -33,7 +33,7 @@ If you like to run a multi-gpu training using DDP back-end, ```bash - $ CUDA_VISIBLE_DEVICES="0, 1, 2" python TTS/bin/distribute.py --script /train_glowtts.py + $ CUDA_VISIBLE_DEVICES="0, 1, 2" python -m trainer.distribute --script /train_glowtts.py ``` The example above runs a multi-gpu training using GPUs `0, 1, 2`. @@ -122,7 +122,7 @@ ```bash $ tts --text "Text for TTS" \ - --model_path path/to/checkpoint_x.pth.tar \ + --model_path path/to/checkpoint_x.pth \ --config_path path/to/config.json \ --out_path folder/to/save/output.wav ``` diff --git a/docs/source/tutorial_for_nervous_beginners.md b/docs/source/tutorial_for_nervous_beginners.md index fa09cb7d..d2d3c4bb 100644 --- a/docs/source/tutorial_for_nervous_beginners.md +++ b/docs/source/tutorial_for_nervous_beginners.md @@ -50,13 +50,13 @@ A breakdown of a simple script that trains a GlowTTS model on the LJspeech datas - Fine-tune a model. ```bash - CUDA_VISIBLE_DEVICES=0 python train.py --restore_path path/to/model/checkpoint.pth.tar + CUDA_VISIBLE_DEVICES=0 python train.py --restore_path path/to/model/checkpoint.pth ``` - Run multi-gpu training. ```bash - CUDA_VISIBLE_DEVICES=0,1,2 python TTS/bin/distribute.py --script train.py + CUDA_VISIBLE_DEVICES=0,1,2 python -m trainer.distribute --script train.py ``` ### CLI Way diff --git a/notebooks/ExtractTTSpectrogram.ipynb b/notebooks/ExtractTTSpectrogram.ipynb index 50b60ff0..a257b6bf 100644 --- a/notebooks/ExtractTTSpectrogram.ipynb +++ b/notebooks/ExtractTTSpectrogram.ipynb @@ -66,7 +66,7 @@ "DATASET = \"ljspeech\"\n", "METADATA_FILE = \"metadata.csv\"\n", "CONFIG_PATH = \"/home/ubuntu/.local/share/tts/tts_models--en--ljspeech--tacotron2-DDC_ph/config.json\"\n", - "MODEL_FILE = \"/home/ubuntu/.local/share/tts/tts_models--en--ljspeech--tacotron2-DDC_ph/model_file.pth.tar\"\n", + "MODEL_FILE = \"/home/ubuntu/.local/share/tts/tts_models--en--ljspeech--tacotron2-DDC_ph/model_file.pth\"\n", "BATCH_SIZE = 32\n", "\n", "QUANTIZED_WAV = False\n", diff --git a/notebooks/PlotUmapLibriTTS.ipynb b/notebooks/PlotUmapLibriTTS.ipynb index c809a5c4..1e29790b 100644 --- a/notebooks/PlotUmapLibriTTS.ipynb +++ b/notebooks/PlotUmapLibriTTS.ipynb @@ -66,7 +66,7 @@ "outputs": [], "source": [ "MODEL_RUN_PATH = \"/media/erogol/data_ssd/Models/libri_tts/speaker_encoder/libritts_360-half-October-31-2019_04+54PM-19d2f5f/\"\n", - "MODEL_PATH = MODEL_RUN_PATH + \"best_model.pth.tar\"\n", + "MODEL_PATH = MODEL_RUN_PATH + \"best_model.pth\"\n", "CONFIG_PATH = MODEL_RUN_PATH + \"config.json\"\n", "\n", "# My single speaker locations\n", diff --git a/notebooks/TestAttention.ipynb b/notebooks/TestAttention.ipynb index 5d8eed85..b257ff70 100644 --- a/notebooks/TestAttention.ipynb +++ b/notebooks/TestAttention.ipynb @@ -73,7 +73,7 @@ "\n", "# Set constants\n", "ROOT_PATH = '/home/erogol/Models/LJSpeech/ljspeech-May-20-2020_12+29PM-1835628/'\n", - "MODEL_PATH = ROOT_PATH + '/best_model.pth.tar'\n", + "MODEL_PATH = ROOT_PATH + '/best_model.pth'\n", "CONFIG_PATH = ROOT_PATH + '/config.json'\n", "OUT_FOLDER = './hard_sentences/'\n", "CONFIG = load_config(CONFIG_PATH)\n", diff --git a/notebooks/dataset_analysis/AnalyzeDataset.ipynb b/notebooks/dataset_analysis/AnalyzeDataset.ipynb index e08f3ab3..51963847 100644 --- a/notebooks/dataset_analysis/AnalyzeDataset.ipynb +++ b/notebooks/dataset_analysis/AnalyzeDataset.ipynb @@ -416,7 +416,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.1" + "version": "3.9.5" } }, "nbformat": 4, diff --git a/tests/aux_tests/test_extract_tts_spectrograms.py b/tests/aux_tests/test_extract_tts_spectrograms.py index 8c795d58..ef751846 100644 --- a/tests/aux_tests/test_extract_tts_spectrograms.py +++ b/tests/aux_tests/test_extract_tts_spectrograms.py @@ -15,7 +15,7 @@ class TestExtractTTSSpectrograms(unittest.TestCase): def test_GlowTTS(): # set paths config_path = os.path.join(get_tests_input_path(), "test_glow_tts.json") - checkpoint_path = os.path.join(get_tests_output_path(), "checkpoint_test.pth.tar") + checkpoint_path = os.path.join(get_tests_output_path(), "checkpoint_test.pth") output_path = os.path.join(get_tests_output_path(), "output_extract_tts_spectrograms/") # load config c = load_config(config_path) @@ -33,7 +33,7 @@ class TestExtractTTSSpectrograms(unittest.TestCase): def test_Tacotron2(): # set paths config_path = os.path.join(get_tests_input_path(), "test_tacotron2_config.json") - checkpoint_path = os.path.join(get_tests_output_path(), "checkpoint_test.pth.tar") + checkpoint_path = os.path.join(get_tests_output_path(), "checkpoint_test.pth") output_path = os.path.join(get_tests_output_path(), "output_extract_tts_spectrograms/") # load config c = load_config(config_path) @@ -51,7 +51,7 @@ class TestExtractTTSSpectrograms(unittest.TestCase): def test_Tacotron(): # set paths config_path = os.path.join(get_tests_input_path(), "test_tacotron_config.json") - checkpoint_path = os.path.join(get_tests_output_path(), "checkpoint_test.pth.tar") + checkpoint_path = os.path.join(get_tests_output_path(), "checkpoint_test.pth") output_path = os.path.join(get_tests_output_path(), "output_extract_tts_spectrograms/") # load config c = load_config(config_path) diff --git a/tests/aux_tests/test_speaker_manager.py b/tests/aux_tests/test_speaker_manager.py index 5fafb56a..57ff6c50 100644 --- a/tests/aux_tests/test_speaker_manager.py +++ b/tests/aux_tests/test_speaker_manager.py @@ -12,7 +12,7 @@ from TTS.tts.utils.speakers import SpeakerManager from TTS.utils.audio import AudioProcessor encoder_config_path = os.path.join(get_tests_input_path(), "test_speaker_encoder_config.json") -encoder_model_path = os.path.join(get_tests_input_path(), "checkpoint_0.pth.tar") +encoder_model_path = os.path.join(get_tests_input_path(), "checkpoint_0.pth") sample_wav_path = os.path.join(get_tests_input_path(), "../data/ljspeech/wavs/LJ001-0001.wav") sample_wav_path2 = os.path.join(get_tests_input_path(), "../data/ljspeech/wavs/LJ001-0002.wav") d_vectors_file_path = os.path.join(get_tests_input_path(), "../data/dummy_speakers.json") diff --git a/tests/inference_tests/test_synthesizer.py b/tests/inference_tests/test_synthesizer.py index d643cb81..b5350b0f 100644 --- a/tests/inference_tests/test_synthesizer.py +++ b/tests/inference_tests/test_synthesizer.py @@ -20,7 +20,7 @@ class SynthesizerTest(unittest.TestCase): def test_in_out(self): self._create_random_model() tts_root_path = get_tests_output_path() - tts_checkpoint = os.path.join(tts_root_path, "checkpoint_10.pth.tar") + tts_checkpoint = os.path.join(tts_root_path, "checkpoint_10.pth") tts_config = os.path.join(tts_root_path, "dummy_model_config.json") synthesizer = Synthesizer(tts_checkpoint, tts_config, None, None) synthesizer.tts("Better this test works!!") diff --git a/tests/inputs/server_config.json b/tests/inputs/server_config.json index 0cb9b948..f0a92283 100644 --- a/tests/inputs/server_config.json +++ b/tests/inputs/server_config.json @@ -1,5 +1,5 @@ { - "tts_checkpoint":"checkpoint_10.pth.tar", // tts checkpoint file + "tts_checkpoint":"checkpoint_10.pth", // tts checkpoint file "tts_config":"dummy_model_config.json", // tts config.json file "tts_speakers": null, // json file listing speaker ids. null if no speaker embedding. "wavernn_lib_path": null, // Rootpath to wavernn project folder to be imported. If this is null, model uses GL for speech synthesis.