style update

This commit is contained in:
Eren Gölge 2021-05-10 23:03:21 +02:00
parent 5aee30443f
commit 19fb1d743d
35 changed files with 160 additions and 152 deletions

View File

@ -16,16 +16,11 @@ from TTS.speaker_encoder.model import SpeakerEncoder
from TTS.speaker_encoder.utils.io import save_best_model, save_checkpoint from TTS.speaker_encoder.utils.io import save_best_model, save_checkpoint
from TTS.speaker_encoder.utils.visual import plot_embeddings from TTS.speaker_encoder.utils.visual import plot_embeddings
from TTS.tts.datasets.preprocess import load_meta_data from TTS.tts.datasets.preprocess import load_meta_data
from TTS.utils.arguments import init_training
from TTS.utils.audio import AudioProcessor from TTS.utils.audio import AudioProcessor
from TTS.utils.generic_utils import ( from TTS.utils.generic_utils import count_parameters, remove_experiment_folder, set_init_dict
count_parameters,
remove_experiment_folder,
set_init_dict,
)
from TTS.utils.radam import RAdam from TTS.utils.radam import RAdam
from TTS.utils.training import NoamLR, check_update from TTS.utils.training import NoamLR, check_update
from TTS.utils.arguments import init_training
torch.backends.cudnn.enabled = True torch.backends.cudnn.enabled = True
torch.backends.cudnn.benchmark = True torch.backends.cudnn.benchmark = True

View File

@ -1,6 +1,7 @@
import json import json
import os import os
import re import re
import yaml import yaml
from TTS.config.shared_configs import * from TTS.config.shared_configs import *
@ -34,7 +35,7 @@ def _search_configs(model_name):
def _process_model_name(config_dict): def _process_model_name(config_dict):
model_name = config_dict["model"] if "model" in config_dict else config_dict["generator_model"] model_name = config_dict["model"] if "model" in config_dict else config_dict["generator_model"]
model_name = model_name.replace('_generator', '').replace('_discriminator', '') model_name = model_name.replace("_generator", "").replace("_discriminator", "")
return model_name return model_name

View File

@ -1,7 +1,9 @@
from coqpit import MISSING from dataclasses import asdict, dataclass, field
from dataclasses import dataclass, field, asdict
from typing import List from typing import List
from TTS.config.shared_configs import BaseTrainingConfig, BaseAudioConfig, BaseDatasetConfig
from coqpit import MISSING
from TTS.config.shared_configs import BaseAudioConfig, BaseDatasetConfig, BaseTrainingConfig
@dataclass @dataclass
@ -13,23 +15,27 @@ class SpeakerEncoderConfig(BaseTrainingConfig):
datasets: List[BaseDatasetConfig] = field(default_factory=lambda: [BaseDatasetConfig()]) datasets: List[BaseDatasetConfig] = field(default_factory=lambda: [BaseDatasetConfig()])
# model params # model params
model_params: dict = field(default_factory=lambda: { model_params: dict = field(
default_factory=lambda: {
"input_dim": 40, "input_dim": 40,
"proj_dim": 256, "proj_dim": 256,
"lstm_dim": 768, "lstm_dim": 768,
"num_lstm_layers": 3, "num_lstm_layers": 3,
"use_lstm_with_projection": True "use_lstm_with_projection": True,
}) }
)
storage: dict = field(default_factory=lambda:{ storage: dict = field(
default_factory=lambda: {
"sample_from_storage_p": 0.66, # the probability with which we'll sample from the DataSet in-memory storage "sample_from_storage_p": 0.66, # the probability with which we'll sample from the DataSet in-memory storage
"storage_size": 15, # the size of the in-memory storage with respect to a single batch "storage_size": 15, # the size of the in-memory storage with respect to a single batch
"additive_noise": 1e-5 # add very small gaussian noise to the data in order to increase robustness "additive_noise": 1e-5, # add very small gaussian noise to the data in order to increase robustness
}) }
)
# training params # training params
max_train_step: int = 1000 # end training when number of training steps reaches this value. max_train_step: int = 1000 # end training when number of training steps reaches this value.
loss: str = 'angleproto' loss: str = "angleproto"
grad_clip: float = 3.0 grad_clip: float = 3.0
lr: float = 0.0001 lr: float = 0.0001
lr_decay: bool = False lr_decay: bool = False
@ -51,4 +57,6 @@ class SpeakerEncoderConfig(BaseTrainingConfig):
def check_values(self): def check_values(self):
super().check_values() super().check_values()
c = asdict(self) c = asdict(self)
assert c['model_params']['input_dim'] == self.audio.num_mels, " [!] model input dimendion must be equal to melspectrogram dimension." assert (
c["model_params"]["input_dim"] == self.audio.num_mels
), " [!] model input dimendion must be equal to melspectrogram dimension."

View File

@ -9,6 +9,10 @@ def to_camel(text):
def setup_model(c): def setup_model(c):
model = SpeakerEncoder(c.model_params["input_dim"], c.model_params["proj_dim"], model = SpeakerEncoder(
c.model_params["lstm_dim"], c.model_params["num_lstm_layers"]) c.model_params["input_dim"],
c.model_params["proj_dim"],
c.model_params["lstm_dim"],
c.model_params["num_lstm_layers"],
)
return model return model

View File

@ -1,5 +1,6 @@
import os
import datetime import datetime
import os
import torch import torch
@ -19,8 +20,7 @@ def save_checkpoint(model, optimizer, model_loss, out_path, current_step):
torch.save(state, checkpoint_path) torch.save(state, checkpoint_path)
def save_best_model(model, optimizer, model_loss, best_loss, out_path, def save_best_model(model, optimizer, model_loss, best_loss, out_path, current_step):
current_step):
if model_loss < best_loss: if model_loss < best_loss:
new_state_dict = model.state_dict() new_state_dict = model.state_dict()
state = { state = {
@ -33,7 +33,6 @@ def save_best_model(model, optimizer, model_loss, best_loss, out_path,
best_loss = model_loss best_loss = model_loss
bestmodel_path = "best_model.pth.tar" bestmodel_path = "best_model.pth.tar"
bestmodel_path = os.path.join(out_path, bestmodel_path) bestmodel_path = os.path.join(out_path, bestmodel_path)
print("\n > BEST MODEL ({0:.5f}) : {1:}".format( print("\n > BEST MODEL ({0:.5f}) : {1:}".format(model_loss, bestmodel_path))
model_loss, bestmodel_path))
torch.save(state, bestmodel_path) torch.save(state, bestmodel_path)
return best_loss return best_loss

View File

@ -6,9 +6,9 @@ from typing import Union
import numpy as np import numpy as np
import torch import torch
from TTS.config import load_config
from TTS.speaker_encoder.utils.generic_utils import setup_model from TTS.speaker_encoder.utils.generic_utils import setup_model
from TTS.utils.audio import AudioProcessor from TTS.utils.audio import AudioProcessor
from TTS.config import load_config
def make_speakers_json_path(out_path): def make_speakers_json_path(out_path):

View File

@ -7,10 +7,11 @@ import re
import shutil import shutil
import subprocess import subprocess
import sys import sys
import torch
from pathlib import Path from pathlib import Path
from typing import List from typing import List
import torch
def get_cuda(): def get_cuda():
use_cuda = torch.cuda.is_available() use_cuda = torch.cuda.is_available()

View File

@ -8,8 +8,8 @@ from shutil import copyfile
import gdown import gdown
import requests import requests
from TTS.utils.generic_utils import get_user_data_dir
from TTS.config import load_config from TTS.config import load_config
from TTS.utils.generic_utils import get_user_data_dir
class ModelManager(object): class ModelManager(object):

View File

@ -5,6 +5,7 @@ import numpy as np
import pysbd import pysbd
import torch import torch
from TTS.config import load_config
from TTS.tts.utils.generic_utils import setup_model from TTS.tts.utils.generic_utils import setup_model
from TTS.tts.utils.speakers import SpeakerManager from TTS.tts.utils.speakers import SpeakerManager
@ -13,7 +14,6 @@ from TTS.tts.utils.speakers import SpeakerManager
from TTS.tts.utils.synthesis import synthesis, trim_silence from TTS.tts.utils.synthesis import synthesis, trim_silence
from TTS.tts.utils.text import make_symbols, phonemes, symbols from TTS.tts.utils.text import make_symbols, phonemes, symbols
from TTS.utils.audio import AudioProcessor from TTS.utils.audio import AudioProcessor
from TTS.config import load_config
from TTS.vocoder.utils.generic_utils import interpolate_vocoder_input, setup_generator from TTS.vocoder.utils.generic_utils import interpolate_vocoder_input, setup_generator
@ -117,7 +117,7 @@ class Synthesizer(object):
self.use_phonemes = self.tts_config.use_phonemes self.use_phonemes = self.tts_config.use_phonemes
self.ap = AudioProcessor(verbose=False, **self.tts_config.audio) self.ap = AudioProcessor(verbose=False, **self.tts_config.audio)
if self.tts_config.has('characters') and self.tts_config.characters: if self.tts_config.has("characters") and self.tts_config.characters:
symbols, phonemes = make_symbols(**self.tts_config.characters) symbols, phonemes = make_symbols(**self.tts_config.characters)
if self.use_phonemes: if self.use_phonemes:

View File

@ -2,8 +2,8 @@ import os
import unittest import unittest
from tests import get_tests_input_path, get_tests_output_path, get_tests_path from tests import get_tests_input_path, get_tests_output_path, get_tests_path
from TTS.utils.audio import AudioProcessor
from TTS.config import BaseAudioConfig from TTS.config import BaseAudioConfig
from TTS.utils.audio import AudioProcessor
TESTS_PATH = get_tests_path() TESTS_PATH = get_tests_path()
OUT_PATH = os.path.join(get_tests_output_path(), "audio_tests") OUT_PATH = os.path.join(get_tests_output_path(), "audio_tests")

View File

@ -6,10 +6,10 @@ import torch
from torch import optim from torch import optim
from tests import get_tests_input_path from tests import get_tests_input_path
from TTS.tts.configs import GlowTTSConfig
from TTS.tts.layers.losses import GlowTTSLoss from TTS.tts.layers.losses import GlowTTSLoss
from TTS.tts.models.glow_tts import GlowTTS from TTS.tts.models.glow_tts import GlowTTS
from TTS.utils.audio import AudioProcessor from TTS.utils.audio import AudioProcessor
from TTS.tts.configs import GlowTTSConfig
# pylint: disable=unused-variable # pylint: disable=unused-variable

View File

@ -7,10 +7,10 @@ import torch
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from tests import get_tests_input_path, get_tests_output_path from tests import get_tests_input_path, get_tests_output_path
from TTS.tts.configs import BaseTTSConfig
from TTS.tts.datasets import TTSDataset from TTS.tts.datasets import TTSDataset
from TTS.tts.datasets.preprocess import ljspeech from TTS.tts.datasets.preprocess import ljspeech
from TTS.utils.audio import AudioProcessor from TTS.utils.audio import AudioProcessor
from TTS.tts.configs import BaseTTSConfig
# pylint: disable=unused-variable # pylint: disable=unused-variable
@ -18,7 +18,7 @@ OUTPATH = os.path.join(get_tests_output_path(), "loader_tests/")
os.makedirs(OUTPATH, exist_ok=True) os.makedirs(OUTPATH, exist_ok=True)
# create a dummy config for testing data loaders. # create a dummy config for testing data loaders.
c = BaseTTSConfig(text_cleaner='english_cleaners', num_loader_workers=0, batch_size=2) c = BaseTTSConfig(text_cleaner="english_cleaners", num_loader_workers=0, batch_size=2)
c.r = 5 c.r = 5
c.data_path = "tests/data/ljspeech/" c.data_path = "tests/data/ljspeech/"
ok_ljspeech = os.path.exists(c.data_path) ok_ljspeech = os.path.exists(c.data_path)

View File

@ -2,10 +2,9 @@ import glob
import os import os
import shutil import shutil
from tests import get_tests_output_path, run_cli, get_device_id from tests import get_device_id, get_tests_output_path, run_cli
from TTS.speaker_encoder.speaker_encoder_config import SpeakerEncoderConfig
from TTS.config.shared_configs import BaseAudioConfig from TTS.config.shared_configs import BaseAudioConfig
from TTS.speaker_encoder.speaker_encoder_config import SpeakerEncoderConfig
config_path = os.path.join(get_tests_output_path(), "test_model_config.json") config_path = os.path.join(get_tests_output_path(), "test_model_config.json")
output_path = os.path.join(get_tests_output_path(), "train_outputs") output_path = os.path.join(get_tests_output_path(), "train_outputs")
@ -20,7 +19,7 @@ config = SpeakerEncoderConfig(
print_step=1, print_step=1,
save_step=1, save_step=1,
print_eval=True, print_eval=True,
audio=BaseAudioConfig(num_mels=40) audio=BaseAudioConfig(num_mels=40),
) )
config.audio.do_trim_silence = True config.audio.do_trim_silence = True
config.audio.trim_db = 60 config.audio.trim_db = 60
@ -42,6 +41,8 @@ run_cli(command_train)
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime) continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
# restore the model and continue training for one more epoch # restore the model and continue training for one more epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_encoder.py --continue_path {continue_path} " command_train = (
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_encoder.py --continue_path {continue_path} "
)
run_cli(command_train) run_cli(command_train)
shutil.rmtree(continue_path) shutil.rmtree(continue_path)

View File

@ -5,10 +5,10 @@ import numpy as np
import torch import torch
from tests import get_tests_input_path from tests import get_tests_input_path
from TTS.speaker_encoder.model import SpeakerEncoder
from TTS.speaker_encoder.utils.io import save_checkpoint
from TTS.speaker_encoder.speaker_encoder_config import SpeakerEncoderConfig
from TTS.config import load_config from TTS.config import load_config
from TTS.speaker_encoder.model import SpeakerEncoder
from TTS.speaker_encoder.speaker_encoder_config import SpeakerEncoderConfig
from TTS.speaker_encoder.utils.io import save_checkpoint
from TTS.tts.utils.speakers import SpeakerManager from TTS.tts.utils.speakers import SpeakerManager
from TTS.utils.audio import AudioProcessor from TTS.utils.audio import AudioProcessor

View File

@ -1,7 +1,6 @@
import os import os
from tests import get_tests_output_path, run_cli, get_device_id from tests import get_device_id, get_tests_output_path, run_cli
def test_synthesize(): def test_synthesize():

View File

@ -2,10 +2,10 @@ import os
import unittest import unittest
from tests import get_tests_input_path, get_tests_output_path from tests import get_tests_input_path, get_tests_output_path
from TTS.config import load_config
from TTS.tts.utils.generic_utils import setup_model from TTS.tts.utils.generic_utils import setup_model
from TTS.tts.utils.io import save_checkpoint from TTS.tts.utils.io import save_checkpoint
from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
from TTS.config import load_config
from TTS.utils.synthesizer import Synthesizer from TTS.utils.synthesizer import Synthesizer
@ -15,7 +15,7 @@ class SynthesizerTest(unittest.TestCase):
# pylint: disable=global-statement # pylint: disable=global-statement
global symbols, phonemes global symbols, phonemes
config = load_config(os.path.join(get_tests_output_path(), "dummy_model_config.json")) config = load_config(os.path.join(get_tests_output_path(), "dummy_model_config.json"))
if config.has('characters') and config.characters: if config.has("characters") and config.characters:
symbols, phonemes = make_symbols(**config.characters.to_dict()) symbols, phonemes = make_symbols(**config.characters.to_dict())
num_chars = len(phonemes) if config.use_phonemes else len(symbols) num_chars = len(phonemes) if config.use_phonemes else len(symbols)
@ -26,8 +26,8 @@ class SynthesizerTest(unittest.TestCase):
def test_in_out(self): def test_in_out(self):
self._create_random_model() self._create_random_model()
tts_root_path = get_tests_output_path() tts_root_path = get_tests_output_path()
tts_checkpoint = os.path.join(tts_root_path, 'checkpoint_10.pth.tar') tts_checkpoint = os.path.join(tts_root_path, "checkpoint_10.pth.tar")
tts_config = os.path.join(tts_root_path, 'dummy_model_config.json') tts_config = os.path.join(tts_root_path, "dummy_model_config.json")
synthesizer = Synthesizer(tts_checkpoint, tts_config, None, None) synthesizer = Synthesizer(tts_checkpoint, tts_config, None, None)
synthesizer.tts("Better this test works!!") synthesizer.tts("Better this test works!!")

View File

@ -6,10 +6,10 @@ import torch
from torch import nn, optim from torch import nn, optim
from tests import get_tests_input_path from tests import get_tests_input_path
from TTS.tts.configs import Tacotron2Config
from TTS.tts.layers.losses import MSELossMasked from TTS.tts.layers.losses import MSELossMasked
from TTS.tts.models.tacotron2 import Tacotron2 from TTS.tts.models.tacotron2 import Tacotron2
from TTS.utils.audio import AudioProcessor from TTS.utils.audio import AudioProcessor
from TTS.tts.configs import Tacotron2Config
# pylint: disable=unused-variable # pylint: disable=unused-variable
@ -148,13 +148,7 @@ class TacotronGSTTrainTest(unittest.TestCase):
criterion = MSELossMasked(seq_len_norm=False).to(device) criterion = MSELossMasked(seq_len_norm=False).to(device)
criterion_st = nn.BCEWithLogitsLoss().to(device) criterion_st = nn.BCEWithLogitsLoss().to(device)
model = Tacotron2( model = Tacotron2(num_chars=24, r=c.r, num_speakers=5, use_gst=True, gst=c.gst).to(device)
num_chars=24,
r=c.r,
num_speakers=5,
use_gst=True,
gst=c.gst
).to(device)
model.train() model.train()
model_ref = copy.deepcopy(model) model_ref = copy.deepcopy(model)
count = 0 count = 0
@ -210,13 +204,7 @@ class TacotronGSTTrainTest(unittest.TestCase):
criterion = MSELossMasked(seq_len_norm=False).to(device) criterion = MSELossMasked(seq_len_norm=False).to(device)
criterion_st = nn.BCEWithLogitsLoss().to(device) criterion_st = nn.BCEWithLogitsLoss().to(device)
model = Tacotron2( model = Tacotron2(num_chars=24, r=c.r, num_speakers=5, use_gst=True, gst=c.gst).to(device)
num_chars=24,
r=c.r,
num_speakers=5,
use_gst=True,
gst =c.gst
).to(device)
model.train() model.train()
model_ref = copy.deepcopy(model) model_ref = copy.deepcopy(model)
count = 0 count = 0
@ -271,14 +259,9 @@ class SCGSTMultiSpeakeTacotronTrainTest(unittest.TestCase):
stop_targets = (stop_targets.sum(2) > 0.0).unsqueeze(2).float().squeeze() stop_targets = (stop_targets.sum(2) > 0.0).unsqueeze(2).float().squeeze()
criterion = MSELossMasked(seq_len_norm=False).to(device) criterion = MSELossMasked(seq_len_norm=False).to(device)
criterion_st = nn.BCEWithLogitsLoss().to(device) criterion_st = nn.BCEWithLogitsLoss().to(device)
model = Tacotron2( model = Tacotron2(num_chars=24, r=c.r, num_speakers=5, speaker_embedding_dim=55, use_gst=True, gst=c.gst).to(
num_chars=24, device
r=c.r, )
num_speakers=5,
speaker_embedding_dim=55,
use_gst=True,
gst=c.gst
).to(device)
model.train() model.train()
model_ref = copy.deepcopy(model) model_ref = copy.deepcopy(model)
count = 0 count = 0

View File

@ -5,10 +5,9 @@ import numpy as np
import tensorflow as tf import tensorflow as tf
import torch import torch
from TTS.tts.configs import Tacotron2Config
from TTS.tts.tf.models.tacotron2 import Tacotron2 from TTS.tts.tf.models.tacotron2 import Tacotron2
from TTS.tts.tf.utils.tflite import convert_tacotron2_to_tflite, load_tflite_model from TTS.tts.tf.utils.tflite import convert_tacotron2_to_tflite, load_tflite_model
from TTS.tts.configs import Tacotron2Config
tf.get_logger().setLevel("INFO") tf.get_logger().setLevel("INFO")

View File

@ -6,11 +6,10 @@ import torch
from torch import nn, optim from torch import nn, optim
from tests import get_tests_input_path from tests import get_tests_input_path
from TTS.tts.configs import TacotronConfig
from TTS.tts.layers.losses import L1LossMasked from TTS.tts.layers.losses import L1LossMasked
from TTS.tts.models.tacotron import Tacotron from TTS.tts.models.tacotron import Tacotron
from TTS.utils.audio import AudioProcessor from TTS.utils.audio import AudioProcessor
from TTS.tts.configs import TacotronConfig
# pylint: disable=unused-variable # pylint: disable=unused-variable

View File

@ -4,9 +4,8 @@ import os
# pylint: disable=wildcard-import # pylint: disable=wildcard-import
# pylint: disable=unused-import # pylint: disable=unused-import
from tests import get_tests_input_path, get_tests_path from tests import get_tests_input_path, get_tests_path
from TTS.tts.utils.text import *
from TTS.tts.configs import TacotronConfig from TTS.tts.configs import TacotronConfig
from TTS.tts.utils.text import *
conf = TacotronConfig() conf = TacotronConfig()

View File

@ -2,8 +2,7 @@ import glob
import os import os
import shutil import shutil
from tests import get_tests_output_path, run_cli, get_device_id from tests import get_device_id, get_tests_output_path, run_cli
from TTS.tts.configs import AlignTTSConfig from TTS.tts.configs import AlignTTSConfig
config_path = os.path.join(get_tests_output_path(), "test_model_config.json") config_path = os.path.join(get_tests_output_path(), "test_model_config.json")
@ -44,6 +43,8 @@ run_cli(command_train)
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime) continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
# restore the model and continue training for one more epoch # restore the model and continue training for one more epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_align_tts.py --continue_path {continue_path} " command_train = (
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_align_tts.py --continue_path {continue_path} "
)
run_cli(command_train) run_cli(command_train)
shutil.rmtree(continue_path) shutil.rmtree(continue_path)

View File

@ -2,8 +2,7 @@ import glob
import os import os
import shutil import shutil
from tests import get_tests_output_path, run_cli, get_device_id from tests import get_device_id, get_tests_output_path, run_cli
from TTS.tts.configs import GlowTTSConfig from TTS.tts.configs import GlowTTSConfig
config_path = os.path.join(get_tests_output_path(), "test_model_config.json") config_path = os.path.join(get_tests_output_path(), "test_model_config.json")
@ -45,6 +44,8 @@ run_cli(command_train)
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime) continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
# restore the model and continue training for one more epoch # restore the model and continue training for one more epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_glow_tts.py --continue_path {continue_path} " command_train = (
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_glow_tts.py --continue_path {continue_path} "
)
run_cli(command_train) run_cli(command_train)
shutil.rmtree(continue_path) shutil.rmtree(continue_path)

View File

@ -2,8 +2,7 @@ import glob
import os import os
import shutil import shutil
from tests import get_tests_output_path, run_cli, get_device_id from tests import get_device_id, get_tests_output_path, run_cli
from TTS.tts.configs import SpeedySpeechConfig from TTS.tts.configs import SpeedySpeechConfig
config_path = os.path.join(get_tests_output_path(), "test_speedy_speech_config.json") config_path = os.path.join(get_tests_output_path(), "test_speedy_speech_config.json")
@ -45,6 +44,8 @@ run_cli(command_train)
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime) continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
# restore the model and continue training for one more epoch # restore the model and continue training for one more epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_speedy_speech.py --continue_path {continue_path} " command_train = (
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_speedy_speech.py --continue_path {continue_path} "
)
run_cli(command_train) run_cli(command_train)
shutil.rmtree(continue_path) shutil.rmtree(continue_path)

View File

@ -2,8 +2,7 @@ import glob
import os import os
import shutil import shutil
from tests import get_tests_output_path, run_cli, get_device_id from tests import get_device_id, get_tests_output_path, run_cli
from TTS.tts.configs import Tacotron2Config from TTS.tts.configs import Tacotron2Config
config_path = os.path.join(get_tests_output_path(), "test_model_config.json") config_path = os.path.join(get_tests_output_path(), "test_model_config.json")
@ -45,6 +44,8 @@ run_cli(command_train)
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime) continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
# restore the model and continue training for one more epoch # restore the model and continue training for one more epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_tacotron.py --continue_path {continue_path} " command_train = (
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_tacotron.py --continue_path {continue_path} "
)
run_cli(command_train) run_cli(command_train)
shutil.rmtree(continue_path) shutil.rmtree(continue_path)

View File

@ -2,8 +2,7 @@ import glob
import os import os
import shutil import shutil
from tests import get_tests_output_path, run_cli, get_device_id from tests import get_device_id, get_tests_output_path, run_cli
from TTS.tts.configs import TacotronConfig from TTS.tts.configs import TacotronConfig
config_path = os.path.join(get_tests_output_path(), "test_model_config.json") config_path = os.path.join(get_tests_output_path(), "test_model_config.json")
@ -44,6 +43,8 @@ run_cli(command_train)
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime) continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
# restore the model and continue training for one more epoch # restore the model and continue training for one more epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_tacotron.py --continue_path {continue_path} " command_train = (
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_tacotron.py --continue_path {continue_path} "
)
run_cli(command_train) run_cli(command_train)
shutil.rmtree(continue_path) shutil.rmtree(continue_path)

View File

@ -2,8 +2,7 @@ import glob
import os import os
import shutil import shutil
from tests import get_tests_output_path, run_cli, get_device_id from tests import get_device_id, get_tests_output_path, run_cli
from TTS.vocoder.configs import FullbandMelganConfig from TTS.vocoder.configs import FullbandMelganConfig
config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json") config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
@ -29,13 +28,17 @@ config.audio.trim_db = 60
config.save_json(config_path) config.save_json(config_path)
# train the model for one epoch # train the model for one epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} " command_train = (
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} "
)
run_cli(command_train) run_cli(command_train)
# Find latest folder # Find latest folder
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime) continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
# restore the model and continue training for one more epoch # restore the model and continue training for one more epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} " command_train = (
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} "
)
run_cli(command_train) run_cli(command_train)
shutil.rmtree(continue_path) shutil.rmtree(continue_path)

View File

@ -2,8 +2,7 @@ import glob
import os import os
import shutil import shutil
from tests import get_tests_output_path, run_cli, get_device_id from tests import get_device_id, get_tests_output_path, run_cli
from TTS.vocoder.configs import HifiganConfig from TTS.vocoder.configs import HifiganConfig
config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json") config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
@ -30,13 +29,17 @@ config.audio.trim_db = 60
config.save_json(config_path) config.save_json(config_path)
# train the model for one epoch # train the model for one epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} " command_train = (
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} "
)
run_cli(command_train) run_cli(command_train)
# Find latest folder # Find latest folder
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime) continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
# restore the model and continue training for one more epoch # restore the model and continue training for one more epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} " command_train = (
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} "
)
run_cli(command_train) run_cli(command_train)
shutil.rmtree(continue_path) shutil.rmtree(continue_path)

View File

@ -2,10 +2,9 @@ import glob
import os import os
import shutil import shutil
from tests import get_tests_output_path, run_cli, get_device_id from tests import get_device_id, get_tests_output_path, run_cli
from TTS.vocoder.configs import MelganConfig from TTS.vocoder.configs import MelganConfig
config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json") config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
output_path = os.path.join(get_tests_output_path(), "train_outputs") output_path = os.path.join(get_tests_output_path(), "train_outputs")
@ -29,13 +28,17 @@ config.audio.trim_db = 60
config.save_json(config_path) config.save_json(config_path)
# train the model for one epoch # train the model for one epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} " command_train = (
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} "
)
run_cli(command_train) run_cli(command_train)
# Find latest folder # Find latest folder
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime) continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
# restore the model and continue training for one more epoch # restore the model and continue training for one more epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} " command_train = (
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} "
)
run_cli(command_train) run_cli(command_train)
shutil.rmtree(continue_path) shutil.rmtree(continue_path)

View File

@ -2,8 +2,7 @@ import glob
import os import os
import shutil import shutil
from tests import get_tests_output_path, run_cli, get_device_id from tests import get_device_id, get_tests_output_path, run_cli
from TTS.vocoder.configs import MultibandMelganConfig from TTS.vocoder.configs import MultibandMelganConfig
config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json") config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
@ -29,13 +28,17 @@ config.audio.trim_db = 60
config.save_json(config_path) config.save_json(config_path)
# train the model for one epoch # train the model for one epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} " command_train = (
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} "
)
run_cli(command_train) run_cli(command_train)
# Find latest folder # Find latest folder
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime) continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
# restore the model and continue training for one more epoch # restore the model and continue training for one more epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} " command_train = (
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} "
)
run_cli(command_train) run_cli(command_train)
shutil.rmtree(continue_path) shutil.rmtree(continue_path)

View File

@ -2,8 +2,7 @@ import glob
import os import os
import shutil import shutil
from tests import get_tests_output_path, run_cli, get_device_id from tests import get_device_id, get_tests_output_path, run_cli
from TTS.vocoder.configs import ParallelWaveganConfig from TTS.vocoder.configs import ParallelWaveganConfig
config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json") config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
@ -29,13 +28,17 @@ config.audio.trim_db = 60
config.save_json(config_path) config.save_json(config_path)
# train the model for one epoch # train the model for one epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} " command_train = (
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} "
)
run_cli(command_train) run_cli(command_train)
# Find latest folder # Find latest folder
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime) continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
# restore the model and continue training for one more epoch # restore the model and continue training for one more epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} " command_train = (
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} "
)
run_cli(command_train) run_cli(command_train)
shutil.rmtree(continue_path) shutil.rmtree(continue_path)

View File

@ -5,9 +5,9 @@ from torch.utils.data import DataLoader
from tests import get_tests_output_path, get_tests_path from tests import get_tests_output_path, get_tests_path
from TTS.utils.audio import AudioProcessor from TTS.utils.audio import AudioProcessor
from TTS.vocoder.configs import BaseGANVocoderConfig
from TTS.vocoder.datasets.gan_dataset import GANDataset from TTS.vocoder.datasets.gan_dataset import GANDataset
from TTS.vocoder.datasets.preprocess import load_wav_data from TTS.vocoder.datasets.preprocess import load_wav_data
from TTS.vocoder.configs import BaseGANVocoderConfig
file_path = os.path.dirname(os.path.realpath(__file__)) file_path = os.path.dirname(os.path.realpath(__file__))
OUTPATH = os.path.join(get_tests_output_path(), "loader_tests/") OUTPATH = os.path.join(get_tests_output_path(), "loader_tests/")

View File

@ -3,8 +3,8 @@ import os
import torch import torch
from tests import get_tests_input_path, get_tests_output_path, get_tests_path from tests import get_tests_input_path, get_tests_output_path, get_tests_path
from TTS.utils.audio import AudioProcessor
from TTS.config import BaseAudioConfig from TTS.config import BaseAudioConfig
from TTS.utils.audio import AudioProcessor
from TTS.vocoder.layers.losses import MelganFeatureLoss, MultiScaleSTFTLoss, STFTLoss, TorchSTFT from TTS.vocoder.layers.losses import MelganFeatureLoss, MultiScaleSTFTLoss, STFTLoss, TorchSTFT
TESTS_PATH = get_tests_path() TESTS_PATH = get_tests_path()

View File

@ -2,14 +2,14 @@ import glob
import os import os
import shutil import shutil
from tests import get_tests_output_path, run_cli, get_device_id from tests import get_device_id, get_tests_output_path, run_cli
from TTS.vocoder.configs import WavegradConfig from TTS.vocoder.configs import WavegradConfig
config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json") config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
output_path = os.path.join(get_tests_output_path(), "train_outputs") output_path = os.path.join(get_tests_output_path(), "train_outputs")
config = WavegradConfig(batch_size=8, config = WavegradConfig(
batch_size=8,
eval_batch_size=8, eval_batch_size=8,
num_loader_workers=0, num_loader_workers=0,
num_val_loader_workers=0, num_val_loader_workers=0,
@ -22,22 +22,20 @@ config = WavegradConfig(batch_size=8,
print_eval=True, print_eval=True,
data_path="tests/data/ljspeech", data_path="tests/data/ljspeech",
output_path=output_path, output_path=output_path,
test_noise_schedule={ test_noise_schedule={"min_val": 1e-6, "max_val": 1e-2, "num_steps": 2},
"min_val": 1e-6, )
"max_val": 1e-2,
"num_steps": 2
})
config.audio.do_trim_silence = True config.audio.do_trim_silence = True
config.audio.trim_db = 60 config.audio.trim_db = 60
config.save_json(config_path) config.save_json(config_path)
# train the model for one epoch # train the model for one epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_wavegrad.py --config_path {config_path} " command_train = (
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_wavegrad.py --config_path {config_path} "
)
run_cli(command_train) run_cli(command_train)
# Find latest folder # Find latest folder
continue_path = max(glob.glob(os.path.join(output_path, "*/")), continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
key=os.path.getmtime)
# restore the model and continue training for one more epoch # restore the model and continue training for one more epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_wavegrad.py --continue_path {continue_path} " command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_wavegrad.py --continue_path {continue_path} "

View File

@ -2,8 +2,7 @@ import glob
import os import os
import shutil import shutil
from tests import get_tests_output_path, run_cli, get_device_id from tests import get_device_id, get_tests_output_path, run_cli
from TTS.vocoder.configs import WavernnConfig from TTS.vocoder.configs import WavernnConfig
config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json") config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
@ -29,14 +28,17 @@ config.audio.trim_db = 60
config.save_json(config_path) config.save_json(config_path)
# train the model for one epoch # train the model for one epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_wavernn.py --config_path {config_path} " command_train = (
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_wavernn.py --config_path {config_path} "
)
run_cli(command_train) run_cli(command_train)
# Find latest folder # Find latest folder
continue_path = max(glob.glob(os.path.join(output_path, "*/")), continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
key=os.path.getmtime)
# restore the model and continue training for one more epoch # restore the model and continue training for one more epoch
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_wavernn.py --continue_path {continue_path} " command_train = (
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_wavernn.py --continue_path {continue_path} "
)
run_cli(command_train) run_cli(command_train)
shutil.rmtree(continue_path) shutil.rmtree(continue_path)