Fix model manager (#1436)

* Fix manager

* Make style
This commit is contained in:
Eren Gölge 2022-03-23 12:57:14 +01:00 committed by GitHub
parent 72d85e53c9
commit 1c3623af33
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 129 additions and 27 deletions

View File

@ -27,6 +27,7 @@ DEF_LANG_TO_PHONEMIZER["en"] = DEF_LANG_TO_PHONEMIZER["en-us"]
DEF_LANG_TO_PHONEMIZER["ja-jp"] = JA_JP_Phonemizer.name() DEF_LANG_TO_PHONEMIZER["ja-jp"] = JA_JP_Phonemizer.name()
DEF_LANG_TO_PHONEMIZER["zh-cn"] = ZH_CN_Phonemizer.name() DEF_LANG_TO_PHONEMIZER["zh-cn"] = ZH_CN_Phonemizer.name()
def get_phonemizer_by_name(name: str, **kwargs) -> BasePhonemizer: def get_phonemizer_by_name(name: str, **kwargs) -> BasePhonemizer:
"""Initiate a phonemizer by name """Initiate a phonemizer by name

View File

@ -371,7 +371,9 @@ class AudioProcessor(object):
self.hop_length = hop_length self.hop_length = hop_length
self.win_length = win_length self.win_length = win_length
assert min_level_db != 0.0, " [!] min_level_db is 0" assert min_level_db != 0.0, " [!] min_level_db is 0"
assert self.win_length <= self.fft_size, f" [!] win_length cannot be larger than fft_size - {self.win_length} vs {self.fft_size}" assert (
self.win_length <= self.fft_size
), f" [!] win_length cannot be larger than fft_size - {self.win_length} vs {self.fft_size}"
members = vars(self) members = vars(self)
if verbose: if verbose:
print(" > Setting up Audio Processor...") print(" > Setting up Audio Processor...")

View File

@ -3,8 +3,8 @@ import json
import os import os
import zipfile import zipfile
from pathlib import Path from pathlib import Path
from typing import Tuple
from shutil import copyfile, rmtree from shutil import copyfile, rmtree
from typing import Tuple
import requests import requests
@ -128,9 +128,6 @@ class ModelManager(object):
model_item = self.models_dict[model_type][lang][dataset][model] model_item = self.models_dict[model_type][lang][dataset][model]
# set the model specific output path # set the model specific output path
output_path = os.path.join(self.output_prefix, model_full_name) output_path = os.path.join(self.output_prefix, model_full_name)
output_model_path = os.path.join(output_path, "model_file.pth")
output_config_path = os.path.join(output_path, "config.json")
if os.path.exists(output_path): if os.path.exists(output_path):
print(f" > {model_name} is already downloaded.") print(f" > {model_name} is already downloaded.")
else: else:
@ -138,13 +135,14 @@ class ModelManager(object):
print(f" > Downloading model to {output_path}") print(f" > Downloading model to {output_path}")
# download from github release # download from github release
self._download_zip_file(model_item["github_rls_url"], output_path) self._download_zip_file(model_item["github_rls_url"], output_path)
# update paths in the config.json
self._update_paths(output_path, output_config_path)
# find downloaded files # find downloaded files
output_model_path, output_config_path = self._find_files(output_path) output_model_path, output_config_path = self._find_files(output_path)
# update paths in the config.json
self._update_paths(output_path, output_config_path)
return output_model_path, output_config_path, model_item return output_model_path, output_config_path, model_item
def _find_files(self, output_path:str) -> Tuple[str, str]: @staticmethod
def _find_files(output_path: str) -> Tuple[str, str]:
"""Find the model and config files in the output path """Find the model and config files in the output path
Args: Args:
@ -166,6 +164,22 @@ class ModelManager(object):
raise ValueError(" [!] Config file not found in the output path") raise ValueError(" [!] Config file not found in the output path")
return model_file, config_file return model_file, config_file
@staticmethod
def _find_speaker_encoder(output_path: str) -> str:
"""Find the speaker encoder file in the output path
Args:
output_path (str): path to the model files
Returns:
str: path to the speaker encoder file
"""
speaker_encoder_file = None
for file_name in os.listdir(output_path):
if file_name in ["model_se.pth", "model_se.pth.tar"]:
speaker_encoder_file = os.path.join(output_path, file_name)
return speaker_encoder_file
def _update_paths(self, output_path: str, config_path: str) -> None: def _update_paths(self, output_path: str, config_path: str) -> None:
"""Update paths for certain files in config.json after download. """Update paths for certain files in config.json after download.
@ -177,7 +191,7 @@ class ModelManager(object):
output_d_vector_file_path = os.path.join(output_path, "speakers.json") output_d_vector_file_path = os.path.join(output_path, "speakers.json")
output_speaker_ids_file_path = os.path.join(output_path, "speaker_ids.json") output_speaker_ids_file_path = os.path.join(output_path, "speaker_ids.json")
speaker_encoder_config_path = os.path.join(output_path, "config_se.json") speaker_encoder_config_path = os.path.join(output_path, "config_se.json")
speaker_encoder_model_path = os.path.join(output_path, "model_se.pth") speaker_encoder_model_path = self._find_speaker_encoder(output_path)
# update the scale_path.npy file path in the model config.json # update the scale_path.npy file path in the model config.json
self._update_path("audio.stats_path", output_stats_path, config_path) self._update_path("audio.stats_path", output_stats_path, config_path)
@ -199,7 +213,7 @@ class ModelManager(object):
@staticmethod @staticmethod
def _update_path(field_name, new_path, config_path): def _update_path(field_name, new_path, config_path):
"""Update the path in the model config.json for the current environment after download""" """Update the path in the model config.json for the current environment after download"""
if os.path.exists(new_path): if new_path and os.path.exists(new_path):
config = load_config(config_path) config = load_config(config_path)
field_names = field_name.split(".") field_names = field_name.split(".")
if len(field_names) > 1: if len(field_names) > 1:

View File

@ -49,7 +49,12 @@ tokenizer, config = TTSTokenizer.init_from_config(config)
# You can define your custom sample loader returning the list of samples. # You can define your custom sample loader returning the list of samples.
# Or define your custom formatter and pass it to the `load_tts_samples`. # Or define your custom formatter and pass it to the `load_tts_samples`.
# Check `TTS.tts.datasets.load_tts_samples` for more details. # Check `TTS.tts.datasets.load_tts_samples` for more details.
train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, eval_split_max_size=config.eval_split_max_size, eval_split_size=config.eval_split_size) train_samples, eval_samples = load_tts_samples(
dataset_config,
eval_split=True,
eval_split_max_size=config.eval_split_max_size,
eval_split_size=config.eval_split_size,
)
# init model # init model
model = AlignTTS(config, ap, tokenizer) model = AlignTTS(config, ap, tokenizer)

View File

@ -84,7 +84,12 @@ tokenizer, config = TTSTokenizer.init_from_config(config)
# You can define your custom sample loader returning the list of samples. # You can define your custom sample loader returning the list of samples.
# Or define your custom formatter and pass it to the `load_tts_samples`. # Or define your custom formatter and pass it to the `load_tts_samples`.
# Check `TTS.tts.datasets.load_tts_samples` for more details. # Check `TTS.tts.datasets.load_tts_samples` for more details.
train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, eval_split_max_size=config.eval_split_max_size, eval_split_size=config.eval_split_size) train_samples, eval_samples = load_tts_samples(
dataset_config,
eval_split=True,
eval_split_max_size=config.eval_split_max_size,
eval_split_size=config.eval_split_size,
)
# init the model # init the model
model = ForwardTTS(config, ap, tokenizer, speaker_manager=None) model = ForwardTTS(config, ap, tokenizer, speaker_manager=None)

View File

@ -83,7 +83,12 @@ tokenizer, config = TTSTokenizer.init_from_config(config)
# You can define your custom sample loader returning the list of samples. # You can define your custom sample loader returning the list of samples.
# Or define your custom formatter and pass it to the `load_tts_samples`. # Or define your custom formatter and pass it to the `load_tts_samples`.
# Check `TTS.tts.datasets.load_tts_samples` for more details. # Check `TTS.tts.datasets.load_tts_samples` for more details.
train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, eval_split_max_size=config.eval_split_max_size, eval_split_size=config.eval_split_size) train_samples, eval_samples = load_tts_samples(
dataset_config,
eval_split=True,
eval_split_max_size=config.eval_split_max_size,
eval_split_size=config.eval_split_size,
)
# init the model # init the model
model = ForwardTTS(config, ap, tokenizer) model = ForwardTTS(config, ap, tokenizer)

View File

@ -60,7 +60,12 @@ tokenizer, config = TTSTokenizer.init_from_config(config)
# You can define your custom sample loader returning the list of samples. # You can define your custom sample loader returning the list of samples.
# Or define your custom formatter and pass it to the `load_tts_samples`. # Or define your custom formatter and pass it to the `load_tts_samples`.
# Check `TTS.tts.datasets.load_tts_samples` for more details. # Check `TTS.tts.datasets.load_tts_samples` for more details.
train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, eval_split_max_size=config.eval_split_max_size, eval_split_size=config.eval_split_size) train_samples, eval_samples = load_tts_samples(
dataset_config,
eval_split=True,
eval_split_max_size=config.eval_split_max_size,
eval_split_size=config.eval_split_size,
)
# INITIALIZE THE MODEL # INITIALIZE THE MODEL
# Models take a config object and a speaker manager as input # Models take a config object and a speaker manager as input

View File

@ -67,7 +67,12 @@ tokenizer, config = TTSTokenizer.init_from_config(config)
# You can define your custom sample loader returning the list of samples. # You can define your custom sample loader returning the list of samples.
# Or define your custom formatter and pass it to the `load_tts_samples`. # Or define your custom formatter and pass it to the `load_tts_samples`.
# Check `TTS.tts.datasets.load_tts_samples` for more details. # Check `TTS.tts.datasets.load_tts_samples` for more details.
train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, eval_split_max_size=config.eval_split_max_size, eval_split_size=config.eval_split_size) train_samples, eval_samples = load_tts_samples(
dataset_config,
eval_split=True,
eval_split_max_size=config.eval_split_max_size,
eval_split_size=config.eval_split_size,
)
# init model # init model
model = ForwardTTS(config, ap, tokenizer) model = ForwardTTS(config, ap, tokenizer)

View File

@ -77,7 +77,12 @@ tokenizer, config = TTSTokenizer.init_from_config(config)
# You can define your custom sample loader returning the list of samples. # You can define your custom sample loader returning the list of samples.
# Or define your custom formatter and pass it to the `load_tts_samples`. # Or define your custom formatter and pass it to the `load_tts_samples`.
# Check `TTS.tts.datasets.load_tts_samples` for more details. # Check `TTS.tts.datasets.load_tts_samples` for more details.
train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, eval_split_max_size=config.eval_split_max_size, eval_split_size=config.eval_split_size) train_samples, eval_samples = load_tts_samples(
dataset_config,
eval_split=True,
eval_split_max_size=config.eval_split_max_size,
eval_split_size=config.eval_split_size,
)
# INITIALIZE THE MODEL # INITIALIZE THE MODEL
# Models take a config object and a speaker manager as input # Models take a config object and a speaker manager as input

View File

@ -74,7 +74,12 @@ tokenizer, config = TTSTokenizer.init_from_config(config)
# You can define your custom sample loader returning the list of samples. # You can define your custom sample loader returning the list of samples.
# Or define your custom formatter and pass it to the `load_tts_samples`. # Or define your custom formatter and pass it to the `load_tts_samples`.
# Check `TTS.tts.datasets.load_tts_samples` for more details. # Check `TTS.tts.datasets.load_tts_samples` for more details.
train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, eval_split_max_size=config.eval_split_max_size, eval_split_size=config.eval_split_size) train_samples, eval_samples = load_tts_samples(
dataset_config,
eval_split=True,
eval_split_max_size=config.eval_split_max_size,
eval_split_size=config.eval_split_size,
)
# INITIALIZE THE MODEL # INITIALIZE THE MODEL
# Models take a config object and a speaker manager as input # Models take a config object and a speaker manager as input

View File

@ -69,7 +69,12 @@ tokenizer, config = TTSTokenizer.init_from_config(config)
# You can define your custom sample loader returning the list of samples. # You can define your custom sample loader returning the list of samples.
# Or define your custom formatter and pass it to the `load_tts_samples`. # Or define your custom formatter and pass it to the `load_tts_samples`.
# Check `TTS.tts.datasets.load_tts_samples` for more details. # Check `TTS.tts.datasets.load_tts_samples` for more details.
train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, eval_split_max_size=config.eval_split_max_size, eval_split_size=config.eval_split_size) train_samples, eval_samples = load_tts_samples(
dataset_config,
eval_split=True,
eval_split_max_size=config.eval_split_max_size,
eval_split_size=config.eval_split_size,
)
# init model # init model
model = Vits(config, ap, tokenizer, speaker_manager=None) model = Vits(config, ap, tokenizer, speaker_manager=None)

View File

@ -109,7 +109,12 @@ config.from_dict(config.to_dict())
ap = AudioProcessor(**config.audio.to_dict()) ap = AudioProcessor(**config.audio.to_dict())
# load training samples # load training samples
train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, eval_split_max_size=config.eval_split_max_size, eval_split_size=config.eval_split_size) train_samples, eval_samples = load_tts_samples(
dataset_config,
eval_split=True,
eval_split_max_size=config.eval_split_max_size,
eval_split_size=config.eval_split_size,
)
# init speaker manager for multi-speaker training # init speaker manager for multi-speaker training
# it maps speaker-id to speaker-name in the model and data-loader # it maps speaker-id to speaker-name in the model and data-loader

View File

@ -71,7 +71,12 @@ tokenizer, config = TTSTokenizer.init_from_config(config)
# You can define your custom sample loader returning the list of samples. # You can define your custom sample loader returning the list of samples.
# Or define your custom formatter and pass it to the `load_tts_samples`. # Or define your custom formatter and pass it to the `load_tts_samples`.
# Check `TTS.tts.datasets.load_tts_samples` for more details. # Check `TTS.tts.datasets.load_tts_samples` for more details.
train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, eval_split_max_size=config.eval_split_max_size, eval_split_size=config.eval_split_size) train_samples, eval_samples = load_tts_samples(
dataset_config,
eval_split=True,
eval_split_max_size=config.eval_split_max_size,
eval_split_size=config.eval_split_size,
)
# init speaker manager for multi-speaker training # init speaker manager for multi-speaker training
# it maps speaker-id to speaker-name in the model and data-loader # it maps speaker-id to speaker-name in the model and data-loader

View File

@ -69,7 +69,12 @@ tokenizer, config = TTSTokenizer.init_from_config(config)
# You can define your custom sample loader returning the list of samples. # You can define your custom sample loader returning the list of samples.
# Or define your custom formatter and pass it to the `load_tts_samples`. # Or define your custom formatter and pass it to the `load_tts_samples`.
# Check `TTS.tts.datasets.load_tts_samples` for more details. # Check `TTS.tts.datasets.load_tts_samples` for more details.
train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, eval_split_max_size=config.eval_split_max_size, eval_split_size=config.eval_split_size) train_samples, eval_samples = load_tts_samples(
dataset_config,
eval_split=True,
eval_split_max_size=config.eval_split_max_size,
eval_split_size=config.eval_split_size,
)
# init speaker manager for multi-speaker training # init speaker manager for multi-speaker training
# it maps speaker-id to speaker-name in the model and data-loader # it maps speaker-id to speaker-name in the model and data-loader

View File

@ -69,7 +69,12 @@ tokenizer, config = TTSTokenizer.init_from_config(config)
# You can define your custom sample loader returning the list of samples. # You can define your custom sample loader returning the list of samples.
# Or define your custom formatter and pass it to the `load_tts_samples`. # Or define your custom formatter and pass it to the `load_tts_samples`.
# Check `TTS.tts.datasets.load_tts_samples` for more details. # Check `TTS.tts.datasets.load_tts_samples` for more details.
train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, eval_split_max_size=config.eval_split_max_size, eval_split_size=config.eval_split_size) train_samples, eval_samples = load_tts_samples(
dataset_config,
eval_split=True,
eval_split_max_size=config.eval_split_max_size,
eval_split_size=config.eval_split_size,
)
# init speaker manager for multi-speaker training # init speaker manager for multi-speaker training
# it maps speaker-id to speaker-name in the model and data-loader # it maps speaker-id to speaker-name in the model and data-loader

View File

@ -69,7 +69,12 @@ tokenizer, config = TTSTokenizer.init_from_config(config)
# You can define your custom sample loader returning the list of samples. # You can define your custom sample loader returning the list of samples.
# Or define your custom formatter and pass it to the `load_tts_samples`. # Or define your custom formatter and pass it to the `load_tts_samples`.
# Check `TTS.tts.datasets.load_tts_samples` for more details. # Check `TTS.tts.datasets.load_tts_samples` for more details.
train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, eval_split_max_size=config.eval_split_max_size, eval_split_size=config.eval_split_size) train_samples, eval_samples = load_tts_samples(
dataset_config,
eval_split=True,
eval_split_max_size=config.eval_split_max_size,
eval_split_size=config.eval_split_size,
)
# init speaker manager for multi-speaker training # init speaker manager for multi-speaker training
# it maps speaker-id to speaker-name in the model and data-loader # it maps speaker-id to speaker-name in the model and data-loader

View File

@ -72,7 +72,12 @@ tokenizer, config = TTSTokenizer.init_from_config(config)
# You can define your custom sample loader returning the list of samples. # You can define your custom sample loader returning the list of samples.
# Or define your custom formatter and pass it to the `load_tts_samples`. # Or define your custom formatter and pass it to the `load_tts_samples`.
# Check `TTS.tts.datasets.load_tts_samples` for more details. # Check `TTS.tts.datasets.load_tts_samples` for more details.
train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, eval_split_max_size=config.eval_split_max_size, eval_split_size=config.eval_split_size) train_samples, eval_samples = load_tts_samples(
dataset_config,
eval_split=True,
eval_split_max_size=config.eval_split_max_size,
eval_split_size=config.eval_split_size,
)
# init speaker manager for multi-speaker training # init speaker manager for multi-speaker training
# it mainly handles speaker-id to speaker-name for the model and the data-loader # it mainly handles speaker-id to speaker-name for the model and the data-loader

View File

@ -78,7 +78,12 @@ tokenizer, config = TTSTokenizer.init_from_config(config)
# You can define your custom sample loader returning the list of samples. # You can define your custom sample loader returning the list of samples.
# Or define your custom formatter and pass it to the `load_tts_samples`. # Or define your custom formatter and pass it to the `load_tts_samples`.
# Check `TTS.tts.datasets.load_tts_samples` for more details. # Check `TTS.tts.datasets.load_tts_samples` for more details.
train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, eval_split_max_size=config.eval_split_max_size, eval_split_size=config.eval_split_size) train_samples, eval_samples = load_tts_samples(
dataset_config,
eval_split=True,
eval_split_max_size=config.eval_split_max_size,
eval_split_size=config.eval_split_size,
)
# init speaker manager for multi-speaker training # init speaker manager for multi-speaker training
# it mainly handles speaker-id to speaker-name for the model and the data-loader # it mainly handles speaker-id to speaker-name for the model and the data-loader

View File

@ -78,7 +78,12 @@ tokenizer, config = TTSTokenizer.init_from_config(config)
# You can define your custom sample loader returning the list of samples. # You can define your custom sample loader returning the list of samples.
# Or define your custom formatter and pass it to the `load_tts_samples`. # Or define your custom formatter and pass it to the `load_tts_samples`.
# Check `TTS.tts.datasets.load_tts_samples` for more details. # Check `TTS.tts.datasets.load_tts_samples` for more details.
train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, eval_split_max_size=config.eval_split_max_size, eval_split_size=config.eval_split_size) train_samples, eval_samples = load_tts_samples(
dataset_config,
eval_split=True,
eval_split_max_size=config.eval_split_max_size,
eval_split_size=config.eval_split_size,
)
# init speaker manager for multi-speaker training # init speaker manager for multi-speaker training
# it mainly handles speaker-id to speaker-name for the model and the data-loader # it mainly handles speaker-id to speaker-name for the model and the data-loader

View File

@ -79,7 +79,12 @@ tokenizer, config = TTSTokenizer.init_from_config(config)
# You can define your custom sample loader returning the list of samples. # You can define your custom sample loader returning the list of samples.
# Or define your custom formatter and pass it to the `load_tts_samples`. # Or define your custom formatter and pass it to the `load_tts_samples`.
# Check `TTS.tts.datasets.load_tts_samples` for more details. # Check `TTS.tts.datasets.load_tts_samples` for more details.
train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True, eval_split_max_size=config.eval_split_max_size, eval_split_size=config.eval_split_size) train_samples, eval_samples = load_tts_samples(
dataset_config,
eval_split=True,
eval_split_max_size=config.eval_split_max_size,
eval_split_size=config.eval_split_size,
)
# init speaker manager for multi-speaker training # init speaker manager for multi-speaker training
# it maps speaker-id to speaker-name in the model and data-loader # it maps speaker-id to speaker-name in the model and data-loader