diff --git a/TTS/bin/eval_encoder.py b/TTS/bin/eval_encoder.py index de9e5865..089f3645 100644 --- a/TTS/bin/eval_encoder.py +++ b/TTS/bin/eval_encoder.py @@ -56,7 +56,7 @@ if __name__ == "__main__": description="""Compute the accuracy of the encoder.\n\n""" """ Example runs: - python TTS/bin/eval_encoder.py emotion_encoder_model.pth.tar emotion_encoder_config.json dataset_config.json + python TTS/bin/eval_encoder.py emotion_encoder_model.pth emotion_encoder_config.json dataset_config.json """, formatter_class=RawTextHelpFormatter, ) diff --git a/TTS/encoder/README.md b/TTS/encoder/README.md index b6f541f8..b38b2005 100644 --- a/TTS/encoder/README.md +++ b/TTS/encoder/README.md @@ -14,5 +14,5 @@ To run the code, you need to follow the same flow as in TTS. - Define 'config.json' for your needs. Note that, audio parameters should match your TTS model. - Example training call ```python speaker_encoder/train.py --config_path speaker_encoder/config.json --data_path ~/Data/Libri-TTS/train-clean-360``` -- Generate embedding vectors ```python speaker_encoder/compute_embeddings.py --use_cuda true /model/path/best_model.pth.tar model/config/path/config.json dataset/path/ output_path``` . This code parses all .wav files at the given dataset path and generates the same folder structure under the output path with the generated embedding files. +- Generate embedding vectors ```python speaker_encoder/compute_embeddings.py --use_cuda true /model/path/best_model.pth model/config/path/config.json dataset/path/ output_path``` . This code parses all .wav files at the given dataset path and generates the same folder structure under the output path with the generated embedding files. - Watch training on Tensorboard as in TTS diff --git a/TTS/encoder/utils/generic_utils.py b/TTS/encoder/utils/generic_utils.py index 17f1c3d9..19c00582 100644 --- a/TTS/encoder/utils/generic_utils.py +++ b/TTS/encoder/utils/generic_utils.py @@ -147,7 +147,7 @@ def setup_speaker_encoder_model(config: "Coqpit"): def save_checkpoint(model, optimizer, criterion, model_loss, out_path, current_step, epoch): - checkpoint_path = "checkpoint_{}.pth.tar".format(current_step) + checkpoint_path = "checkpoint_{}.pth".format(current_step) checkpoint_path = os.path.join(out_path, checkpoint_path) print(" | | > Checkpoint saving : {}".format(checkpoint_path)) @@ -177,7 +177,7 @@ def save_best_model(model, optimizer, criterion, model_loss, best_loss, out_path "date": datetime.date.today().strftime("%B %d, %Y"), } best_loss = model_loss - bestmodel_path = "best_model.pth.tar" + bestmodel_path = "best_model.pth" bestmodel_path = os.path.join(out_path, bestmodel_path) print("\n > BEST MODEL ({0:.5f}) : {1:}".format(model_loss, bestmodel_path)) save_fsspec(state, bestmodel_path) diff --git a/TTS/encoder/utils/io.py b/TTS/encoder/utils/io.py index 7a3aadc9..d1dad3e2 100644 --- a/TTS/encoder/utils/io.py +++ b/TTS/encoder/utils/io.py @@ -5,7 +5,7 @@ from TTS.utils.io import save_fsspec def save_checkpoint(model, optimizer, model_loss, out_path, current_step): - checkpoint_path = "checkpoint_{}.pth.tar".format(current_step) + checkpoint_path = "checkpoint_{}.pth".format(current_step) checkpoint_path = os.path.join(out_path, checkpoint_path) print(" | | > Checkpoint saving : {}".format(checkpoint_path)) @@ -31,7 +31,7 @@ def save_best_model(model, optimizer, model_loss, best_loss, out_path, current_s "date": datetime.date.today().strftime("%B %d, %Y"), } best_loss = model_loss - bestmodel_path = "best_model.pth.tar" + bestmodel_path = "best_model.pth" bestmodel_path = os.path.join(out_path, bestmodel_path) print("\n > BEST MODEL ({0:.5f}) : {1:}".format(model_loss, bestmodel_path)) save_fsspec(state, bestmodel_path) diff --git a/TTS/utils/manage.py b/TTS/utils/manage.py index 01d54ad6..3566cf2f 100644 --- a/TTS/utils/manage.py +++ b/TTS/utils/manage.py @@ -114,7 +114,7 @@ class ModelManager(object): e.g. 'tts_model/en/ljspeech/tacotron' Every model must have the following files: - - *.pth.tar : pytorch model checkpoint file. + - *.pth : pytorch model checkpoint file. - config.json : model config file. - scale_stats.npy (if exist): scale values for preprocessing. @@ -127,7 +127,7 @@ class ModelManager(object): model_item = self.models_dict[model_type][lang][dataset][model] # set the model specific output path output_path = os.path.join(self.output_prefix, model_full_name) - output_model_path = os.path.join(output_path, "model_file.pth.tar") + output_model_path = os.path.join(output_path, "model_file.pth") output_config_path = os.path.join(output_path, "config.json") if os.path.exists(output_path): @@ -152,7 +152,7 @@ class ModelManager(object): output_d_vector_file_path = os.path.join(output_path, "speakers.json") output_speaker_ids_file_path = os.path.join(output_path, "speaker_ids.json") speaker_encoder_config_path = os.path.join(output_path, "config_se.json") - speaker_encoder_model_path = os.path.join(output_path, "model_se.pth.tar") + speaker_encoder_model_path = os.path.join(output_path, "model_se.pth") # update the scale_path.npy file path in the model config.json self._update_path("audio.stats_path", output_stats_path, config_path)