mirror of https://github.com/coqui-ai/TTS.git
style update
This commit is contained in:
parent
5aee30443f
commit
19fb1d743d
|
@ -16,16 +16,11 @@ from TTS.speaker_encoder.model import SpeakerEncoder
|
||||||
from TTS.speaker_encoder.utils.io import save_best_model, save_checkpoint
|
from TTS.speaker_encoder.utils.io import save_best_model, save_checkpoint
|
||||||
from TTS.speaker_encoder.utils.visual import plot_embeddings
|
from TTS.speaker_encoder.utils.visual import plot_embeddings
|
||||||
from TTS.tts.datasets.preprocess import load_meta_data
|
from TTS.tts.datasets.preprocess import load_meta_data
|
||||||
|
from TTS.utils.arguments import init_training
|
||||||
from TTS.utils.audio import AudioProcessor
|
from TTS.utils.audio import AudioProcessor
|
||||||
from TTS.utils.generic_utils import (
|
from TTS.utils.generic_utils import count_parameters, remove_experiment_folder, set_init_dict
|
||||||
count_parameters,
|
|
||||||
remove_experiment_folder,
|
|
||||||
set_init_dict,
|
|
||||||
)
|
|
||||||
from TTS.utils.radam import RAdam
|
from TTS.utils.radam import RAdam
|
||||||
from TTS.utils.training import NoamLR, check_update
|
from TTS.utils.training import NoamLR, check_update
|
||||||
from TTS.utils.arguments import init_training
|
|
||||||
|
|
||||||
|
|
||||||
torch.backends.cudnn.enabled = True
|
torch.backends.cudnn.enabled = True
|
||||||
torch.backends.cudnn.benchmark = True
|
torch.backends.cudnn.benchmark = True
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
from TTS.config.shared_configs import *
|
from TTS.config.shared_configs import *
|
||||||
|
@ -34,7 +35,7 @@ def _search_configs(model_name):
|
||||||
|
|
||||||
def _process_model_name(config_dict):
|
def _process_model_name(config_dict):
|
||||||
model_name = config_dict["model"] if "model" in config_dict else config_dict["generator_model"]
|
model_name = config_dict["model"] if "model" in config_dict else config_dict["generator_model"]
|
||||||
model_name = model_name.replace('_generator', '').replace('_discriminator', '')
|
model_name = model_name.replace("_generator", "").replace("_discriminator", "")
|
||||||
return model_name
|
return model_name
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
from coqpit import MISSING
|
from dataclasses import asdict, dataclass, field
|
||||||
from dataclasses import dataclass, field, asdict
|
|
||||||
from typing import List
|
from typing import List
|
||||||
from TTS.config.shared_configs import BaseTrainingConfig, BaseAudioConfig, BaseDatasetConfig
|
|
||||||
|
from coqpit import MISSING
|
||||||
|
|
||||||
|
from TTS.config.shared_configs import BaseAudioConfig, BaseDatasetConfig, BaseTrainingConfig
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
@ -13,23 +15,27 @@ class SpeakerEncoderConfig(BaseTrainingConfig):
|
||||||
datasets: List[BaseDatasetConfig] = field(default_factory=lambda: [BaseDatasetConfig()])
|
datasets: List[BaseDatasetConfig] = field(default_factory=lambda: [BaseDatasetConfig()])
|
||||||
|
|
||||||
# model params
|
# model params
|
||||||
model_params: dict = field(default_factory=lambda: {
|
model_params: dict = field(
|
||||||
"input_dim": 40,
|
default_factory=lambda: {
|
||||||
"proj_dim": 256,
|
"input_dim": 40,
|
||||||
"lstm_dim": 768,
|
"proj_dim": 256,
|
||||||
"num_lstm_layers": 3,
|
"lstm_dim": 768,
|
||||||
"use_lstm_with_projection": True
|
"num_lstm_layers": 3,
|
||||||
})
|
"use_lstm_with_projection": True,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
storage: dict = field(default_factory=lambda:{
|
storage: dict = field(
|
||||||
"sample_from_storage_p": 0.66, # the probability with which we'll sample from the DataSet in-memory storage
|
default_factory=lambda: {
|
||||||
"storage_size": 15, # the size of the in-memory storage with respect to a single batch
|
"sample_from_storage_p": 0.66, # the probability with which we'll sample from the DataSet in-memory storage
|
||||||
"additive_noise": 1e-5 # add very small gaussian noise to the data in order to increase robustness
|
"storage_size": 15, # the size of the in-memory storage with respect to a single batch
|
||||||
})
|
"additive_noise": 1e-5, # add very small gaussian noise to the data in order to increase robustness
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
# training params
|
# training params
|
||||||
max_train_step: int = 1000 # end training when number of training steps reaches this value.
|
max_train_step: int = 1000 # end training when number of training steps reaches this value.
|
||||||
loss: str = 'angleproto'
|
loss: str = "angleproto"
|
||||||
grad_clip: float = 3.0
|
grad_clip: float = 3.0
|
||||||
lr: float = 0.0001
|
lr: float = 0.0001
|
||||||
lr_decay: bool = False
|
lr_decay: bool = False
|
||||||
|
@ -51,4 +57,6 @@ class SpeakerEncoderConfig(BaseTrainingConfig):
|
||||||
def check_values(self):
|
def check_values(self):
|
||||||
super().check_values()
|
super().check_values()
|
||||||
c = asdict(self)
|
c = asdict(self)
|
||||||
assert c['model_params']['input_dim'] == self.audio.num_mels, " [!] model input dimendion must be equal to melspectrogram dimension."
|
assert (
|
||||||
|
c["model_params"]["input_dim"] == self.audio.num_mels
|
||||||
|
), " [!] model input dimendion must be equal to melspectrogram dimension."
|
||||||
|
|
|
@ -9,6 +9,10 @@ def to_camel(text):
|
||||||
|
|
||||||
|
|
||||||
def setup_model(c):
|
def setup_model(c):
|
||||||
model = SpeakerEncoder(c.model_params["input_dim"], c.model_params["proj_dim"],
|
model = SpeakerEncoder(
|
||||||
c.model_params["lstm_dim"], c.model_params["num_lstm_layers"])
|
c.model_params["input_dim"],
|
||||||
return model
|
c.model_params["proj_dim"],
|
||||||
|
c.model_params["lstm_dim"],
|
||||||
|
c.model_params["num_lstm_layers"],
|
||||||
|
)
|
||||||
|
return model
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import os
|
|
||||||
import datetime
|
import datetime
|
||||||
|
import os
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
|
||||||
|
@ -19,8 +20,7 @@ def save_checkpoint(model, optimizer, model_loss, out_path, current_step):
|
||||||
torch.save(state, checkpoint_path)
|
torch.save(state, checkpoint_path)
|
||||||
|
|
||||||
|
|
||||||
def save_best_model(model, optimizer, model_loss, best_loss, out_path,
|
def save_best_model(model, optimizer, model_loss, best_loss, out_path, current_step):
|
||||||
current_step):
|
|
||||||
if model_loss < best_loss:
|
if model_loss < best_loss:
|
||||||
new_state_dict = model.state_dict()
|
new_state_dict = model.state_dict()
|
||||||
state = {
|
state = {
|
||||||
|
@ -33,7 +33,6 @@ def save_best_model(model, optimizer, model_loss, best_loss, out_path,
|
||||||
best_loss = model_loss
|
best_loss = model_loss
|
||||||
bestmodel_path = "best_model.pth.tar"
|
bestmodel_path = "best_model.pth.tar"
|
||||||
bestmodel_path = os.path.join(out_path, bestmodel_path)
|
bestmodel_path = os.path.join(out_path, bestmodel_path)
|
||||||
print("\n > BEST MODEL ({0:.5f}) : {1:}".format(
|
print("\n > BEST MODEL ({0:.5f}) : {1:}".format(model_loss, bestmodel_path))
|
||||||
model_loss, bestmodel_path))
|
|
||||||
torch.save(state, bestmodel_path)
|
torch.save(state, bestmodel_path)
|
||||||
return best_loss
|
return best_loss
|
||||||
|
|
|
@ -41,7 +41,7 @@ class CharactersConfig(Coqpit):
|
||||||
characters: str = None
|
characters: str = None
|
||||||
punctuations: str = None
|
punctuations: str = None
|
||||||
phonemes: str = None
|
phonemes: str = None
|
||||||
unique: bool = True # for backwards compatibility of models trained with char sets with duplicates
|
unique: bool = True # for backwards compatibility of models trained with char sets with duplicates
|
||||||
|
|
||||||
def check_values(
|
def check_values(
|
||||||
self,
|
self,
|
||||||
|
|
|
@ -6,9 +6,9 @@ from typing import Union
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
from TTS.config import load_config
|
||||||
from TTS.speaker_encoder.utils.generic_utils import setup_model
|
from TTS.speaker_encoder.utils.generic_utils import setup_model
|
||||||
from TTS.utils.audio import AudioProcessor
|
from TTS.utils.audio import AudioProcessor
|
||||||
from TTS.config import load_config
|
|
||||||
|
|
||||||
|
|
||||||
def make_speakers_json_path(out_path):
|
def make_speakers_json_path(out_path):
|
||||||
|
|
|
@ -7,10 +7,11 @@ import re
|
||||||
import shutil
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import torch
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
|
import torch
|
||||||
|
|
||||||
|
|
||||||
def get_cuda():
|
def get_cuda():
|
||||||
use_cuda = torch.cuda.is_available()
|
use_cuda = torch.cuda.is_available()
|
||||||
|
|
|
@ -8,8 +8,8 @@ from shutil import copyfile
|
||||||
import gdown
|
import gdown
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from TTS.utils.generic_utils import get_user_data_dir
|
|
||||||
from TTS.config import load_config
|
from TTS.config import load_config
|
||||||
|
from TTS.utils.generic_utils import get_user_data_dir
|
||||||
|
|
||||||
|
|
||||||
class ModelManager(object):
|
class ModelManager(object):
|
||||||
|
|
|
@ -5,6 +5,7 @@ import numpy as np
|
||||||
import pysbd
|
import pysbd
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
from TTS.config import load_config
|
||||||
from TTS.tts.utils.generic_utils import setup_model
|
from TTS.tts.utils.generic_utils import setup_model
|
||||||
from TTS.tts.utils.speakers import SpeakerManager
|
from TTS.tts.utils.speakers import SpeakerManager
|
||||||
|
|
||||||
|
@ -13,7 +14,6 @@ from TTS.tts.utils.speakers import SpeakerManager
|
||||||
from TTS.tts.utils.synthesis import synthesis, trim_silence
|
from TTS.tts.utils.synthesis import synthesis, trim_silence
|
||||||
from TTS.tts.utils.text import make_symbols, phonemes, symbols
|
from TTS.tts.utils.text import make_symbols, phonemes, symbols
|
||||||
from TTS.utils.audio import AudioProcessor
|
from TTS.utils.audio import AudioProcessor
|
||||||
from TTS.config import load_config
|
|
||||||
from TTS.vocoder.utils.generic_utils import interpolate_vocoder_input, setup_generator
|
from TTS.vocoder.utils.generic_utils import interpolate_vocoder_input, setup_generator
|
||||||
|
|
||||||
|
|
||||||
|
@ -117,7 +117,7 @@ class Synthesizer(object):
|
||||||
self.use_phonemes = self.tts_config.use_phonemes
|
self.use_phonemes = self.tts_config.use_phonemes
|
||||||
self.ap = AudioProcessor(verbose=False, **self.tts_config.audio)
|
self.ap = AudioProcessor(verbose=False, **self.tts_config.audio)
|
||||||
|
|
||||||
if self.tts_config.has('characters') and self.tts_config.characters:
|
if self.tts_config.has("characters") and self.tts_config.characters:
|
||||||
symbols, phonemes = make_symbols(**self.tts_config.characters)
|
symbols, phonemes = make_symbols(**self.tts_config.characters)
|
||||||
|
|
||||||
if self.use_phonemes:
|
if self.use_phonemes:
|
||||||
|
|
|
@ -2,8 +2,8 @@ import os
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from tests import get_tests_input_path, get_tests_output_path, get_tests_path
|
from tests import get_tests_input_path, get_tests_output_path, get_tests_path
|
||||||
from TTS.utils.audio import AudioProcessor
|
|
||||||
from TTS.config import BaseAudioConfig
|
from TTS.config import BaseAudioConfig
|
||||||
|
from TTS.utils.audio import AudioProcessor
|
||||||
|
|
||||||
TESTS_PATH = get_tests_path()
|
TESTS_PATH = get_tests_path()
|
||||||
OUT_PATH = os.path.join(get_tests_output_path(), "audio_tests")
|
OUT_PATH = os.path.join(get_tests_output_path(), "audio_tests")
|
||||||
|
|
|
@ -6,10 +6,10 @@ import torch
|
||||||
from torch import optim
|
from torch import optim
|
||||||
|
|
||||||
from tests import get_tests_input_path
|
from tests import get_tests_input_path
|
||||||
|
from TTS.tts.configs import GlowTTSConfig
|
||||||
from TTS.tts.layers.losses import GlowTTSLoss
|
from TTS.tts.layers.losses import GlowTTSLoss
|
||||||
from TTS.tts.models.glow_tts import GlowTTS
|
from TTS.tts.models.glow_tts import GlowTTS
|
||||||
from TTS.utils.audio import AudioProcessor
|
from TTS.utils.audio import AudioProcessor
|
||||||
from TTS.tts.configs import GlowTTSConfig
|
|
||||||
|
|
||||||
# pylint: disable=unused-variable
|
# pylint: disable=unused-variable
|
||||||
|
|
||||||
|
|
|
@ -7,10 +7,10 @@ import torch
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
|
|
||||||
from tests import get_tests_input_path, get_tests_output_path
|
from tests import get_tests_input_path, get_tests_output_path
|
||||||
|
from TTS.tts.configs import BaseTTSConfig
|
||||||
from TTS.tts.datasets import TTSDataset
|
from TTS.tts.datasets import TTSDataset
|
||||||
from TTS.tts.datasets.preprocess import ljspeech
|
from TTS.tts.datasets.preprocess import ljspeech
|
||||||
from TTS.utils.audio import AudioProcessor
|
from TTS.utils.audio import AudioProcessor
|
||||||
from TTS.tts.configs import BaseTTSConfig
|
|
||||||
|
|
||||||
# pylint: disable=unused-variable
|
# pylint: disable=unused-variable
|
||||||
|
|
||||||
|
@ -18,7 +18,7 @@ OUTPATH = os.path.join(get_tests_output_path(), "loader_tests/")
|
||||||
os.makedirs(OUTPATH, exist_ok=True)
|
os.makedirs(OUTPATH, exist_ok=True)
|
||||||
|
|
||||||
# create a dummy config for testing data loaders.
|
# create a dummy config for testing data loaders.
|
||||||
c = BaseTTSConfig(text_cleaner='english_cleaners', num_loader_workers=0, batch_size=2)
|
c = BaseTTSConfig(text_cleaner="english_cleaners", num_loader_workers=0, batch_size=2)
|
||||||
c.r = 5
|
c.r = 5
|
||||||
c.data_path = "tests/data/ljspeech/"
|
c.data_path = "tests/data/ljspeech/"
|
||||||
ok_ljspeech = os.path.exists(c.data_path)
|
ok_ljspeech = os.path.exists(c.data_path)
|
||||||
|
|
|
@ -2,10 +2,9 @@ import glob
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
from tests import get_tests_output_path, run_cli, get_device_id
|
from tests import get_device_id, get_tests_output_path, run_cli
|
||||||
|
|
||||||
from TTS.speaker_encoder.speaker_encoder_config import SpeakerEncoderConfig
|
|
||||||
from TTS.config.shared_configs import BaseAudioConfig
|
from TTS.config.shared_configs import BaseAudioConfig
|
||||||
|
from TTS.speaker_encoder.speaker_encoder_config import SpeakerEncoderConfig
|
||||||
|
|
||||||
config_path = os.path.join(get_tests_output_path(), "test_model_config.json")
|
config_path = os.path.join(get_tests_output_path(), "test_model_config.json")
|
||||||
output_path = os.path.join(get_tests_output_path(), "train_outputs")
|
output_path = os.path.join(get_tests_output_path(), "train_outputs")
|
||||||
|
@ -20,7 +19,7 @@ config = SpeakerEncoderConfig(
|
||||||
print_step=1,
|
print_step=1,
|
||||||
save_step=1,
|
save_step=1,
|
||||||
print_eval=True,
|
print_eval=True,
|
||||||
audio=BaseAudioConfig(num_mels=40)
|
audio=BaseAudioConfig(num_mels=40),
|
||||||
)
|
)
|
||||||
config.audio.do_trim_silence = True
|
config.audio.do_trim_silence = True
|
||||||
config.audio.trim_db = 60
|
config.audio.trim_db = 60
|
||||||
|
@ -42,6 +41,8 @@ run_cli(command_train)
|
||||||
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
|
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
|
||||||
|
|
||||||
# restore the model and continue training for one more epoch
|
# restore the model and continue training for one more epoch
|
||||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_encoder.py --continue_path {continue_path} "
|
command_train = (
|
||||||
|
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_encoder.py --continue_path {continue_path} "
|
||||||
|
)
|
||||||
run_cli(command_train)
|
run_cli(command_train)
|
||||||
shutil.rmtree(continue_path)
|
shutil.rmtree(continue_path)
|
||||||
|
|
|
@ -5,10 +5,10 @@ import numpy as np
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from tests import get_tests_input_path
|
from tests import get_tests_input_path
|
||||||
from TTS.speaker_encoder.model import SpeakerEncoder
|
|
||||||
from TTS.speaker_encoder.utils.io import save_checkpoint
|
|
||||||
from TTS.speaker_encoder.speaker_encoder_config import SpeakerEncoderConfig
|
|
||||||
from TTS.config import load_config
|
from TTS.config import load_config
|
||||||
|
from TTS.speaker_encoder.model import SpeakerEncoder
|
||||||
|
from TTS.speaker_encoder.speaker_encoder_config import SpeakerEncoderConfig
|
||||||
|
from TTS.speaker_encoder.utils.io import save_checkpoint
|
||||||
from TTS.tts.utils.speakers import SpeakerManager
|
from TTS.tts.utils.speakers import SpeakerManager
|
||||||
from TTS.utils.audio import AudioProcessor
|
from TTS.utils.audio import AudioProcessor
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from tests import get_tests_output_path, run_cli, get_device_id
|
from tests import get_device_id, get_tests_output_path, run_cli
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def test_synthesize():
|
def test_synthesize():
|
||||||
|
|
|
@ -2,10 +2,10 @@ import os
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from tests import get_tests_input_path, get_tests_output_path
|
from tests import get_tests_input_path, get_tests_output_path
|
||||||
|
from TTS.config import load_config
|
||||||
from TTS.tts.utils.generic_utils import setup_model
|
from TTS.tts.utils.generic_utils import setup_model
|
||||||
from TTS.tts.utils.io import save_checkpoint
|
from TTS.tts.utils.io import save_checkpoint
|
||||||
from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
|
from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
|
||||||
from TTS.config import load_config
|
|
||||||
from TTS.utils.synthesizer import Synthesizer
|
from TTS.utils.synthesizer import Synthesizer
|
||||||
|
|
||||||
|
|
||||||
|
@ -15,7 +15,7 @@ class SynthesizerTest(unittest.TestCase):
|
||||||
# pylint: disable=global-statement
|
# pylint: disable=global-statement
|
||||||
global symbols, phonemes
|
global symbols, phonemes
|
||||||
config = load_config(os.path.join(get_tests_output_path(), "dummy_model_config.json"))
|
config = load_config(os.path.join(get_tests_output_path(), "dummy_model_config.json"))
|
||||||
if config.has('characters') and config.characters:
|
if config.has("characters") and config.characters:
|
||||||
symbols, phonemes = make_symbols(**config.characters.to_dict())
|
symbols, phonemes = make_symbols(**config.characters.to_dict())
|
||||||
|
|
||||||
num_chars = len(phonemes) if config.use_phonemes else len(symbols)
|
num_chars = len(phonemes) if config.use_phonemes else len(symbols)
|
||||||
|
@ -26,8 +26,8 @@ class SynthesizerTest(unittest.TestCase):
|
||||||
def test_in_out(self):
|
def test_in_out(self):
|
||||||
self._create_random_model()
|
self._create_random_model()
|
||||||
tts_root_path = get_tests_output_path()
|
tts_root_path = get_tests_output_path()
|
||||||
tts_checkpoint = os.path.join(tts_root_path, 'checkpoint_10.pth.tar')
|
tts_checkpoint = os.path.join(tts_root_path, "checkpoint_10.pth.tar")
|
||||||
tts_config = os.path.join(tts_root_path, 'dummy_model_config.json')
|
tts_config = os.path.join(tts_root_path, "dummy_model_config.json")
|
||||||
synthesizer = Synthesizer(tts_checkpoint, tts_config, None, None)
|
synthesizer = Synthesizer(tts_checkpoint, tts_config, None, None)
|
||||||
synthesizer.tts("Better this test works!!")
|
synthesizer.tts("Better this test works!!")
|
||||||
|
|
||||||
|
|
|
@ -6,10 +6,10 @@ import torch
|
||||||
from torch import nn, optim
|
from torch import nn, optim
|
||||||
|
|
||||||
from tests import get_tests_input_path
|
from tests import get_tests_input_path
|
||||||
|
from TTS.tts.configs import Tacotron2Config
|
||||||
from TTS.tts.layers.losses import MSELossMasked
|
from TTS.tts.layers.losses import MSELossMasked
|
||||||
from TTS.tts.models.tacotron2 import Tacotron2
|
from TTS.tts.models.tacotron2 import Tacotron2
|
||||||
from TTS.utils.audio import AudioProcessor
|
from TTS.utils.audio import AudioProcessor
|
||||||
from TTS.tts.configs import Tacotron2Config
|
|
||||||
|
|
||||||
# pylint: disable=unused-variable
|
# pylint: disable=unused-variable
|
||||||
|
|
||||||
|
@ -148,13 +148,7 @@ class TacotronGSTTrainTest(unittest.TestCase):
|
||||||
|
|
||||||
criterion = MSELossMasked(seq_len_norm=False).to(device)
|
criterion = MSELossMasked(seq_len_norm=False).to(device)
|
||||||
criterion_st = nn.BCEWithLogitsLoss().to(device)
|
criterion_st = nn.BCEWithLogitsLoss().to(device)
|
||||||
model = Tacotron2(
|
model = Tacotron2(num_chars=24, r=c.r, num_speakers=5, use_gst=True, gst=c.gst).to(device)
|
||||||
num_chars=24,
|
|
||||||
r=c.r,
|
|
||||||
num_speakers=5,
|
|
||||||
use_gst=True,
|
|
||||||
gst=c.gst
|
|
||||||
).to(device)
|
|
||||||
model.train()
|
model.train()
|
||||||
model_ref = copy.deepcopy(model)
|
model_ref = copy.deepcopy(model)
|
||||||
count = 0
|
count = 0
|
||||||
|
@ -210,13 +204,7 @@ class TacotronGSTTrainTest(unittest.TestCase):
|
||||||
|
|
||||||
criterion = MSELossMasked(seq_len_norm=False).to(device)
|
criterion = MSELossMasked(seq_len_norm=False).to(device)
|
||||||
criterion_st = nn.BCEWithLogitsLoss().to(device)
|
criterion_st = nn.BCEWithLogitsLoss().to(device)
|
||||||
model = Tacotron2(
|
model = Tacotron2(num_chars=24, r=c.r, num_speakers=5, use_gst=True, gst=c.gst).to(device)
|
||||||
num_chars=24,
|
|
||||||
r=c.r,
|
|
||||||
num_speakers=5,
|
|
||||||
use_gst=True,
|
|
||||||
gst =c.gst
|
|
||||||
).to(device)
|
|
||||||
model.train()
|
model.train()
|
||||||
model_ref = copy.deepcopy(model)
|
model_ref = copy.deepcopy(model)
|
||||||
count = 0
|
count = 0
|
||||||
|
@ -271,14 +259,9 @@ class SCGSTMultiSpeakeTacotronTrainTest(unittest.TestCase):
|
||||||
stop_targets = (stop_targets.sum(2) > 0.0).unsqueeze(2).float().squeeze()
|
stop_targets = (stop_targets.sum(2) > 0.0).unsqueeze(2).float().squeeze()
|
||||||
criterion = MSELossMasked(seq_len_norm=False).to(device)
|
criterion = MSELossMasked(seq_len_norm=False).to(device)
|
||||||
criterion_st = nn.BCEWithLogitsLoss().to(device)
|
criterion_st = nn.BCEWithLogitsLoss().to(device)
|
||||||
model = Tacotron2(
|
model = Tacotron2(num_chars=24, r=c.r, num_speakers=5, speaker_embedding_dim=55, use_gst=True, gst=c.gst).to(
|
||||||
num_chars=24,
|
device
|
||||||
r=c.r,
|
)
|
||||||
num_speakers=5,
|
|
||||||
speaker_embedding_dim=55,
|
|
||||||
use_gst=True,
|
|
||||||
gst=c.gst
|
|
||||||
).to(device)
|
|
||||||
model.train()
|
model.train()
|
||||||
model_ref = copy.deepcopy(model)
|
model_ref = copy.deepcopy(model)
|
||||||
count = 0
|
count = 0
|
||||||
|
|
|
@ -5,10 +5,9 @@ import numpy as np
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
from TTS.tts.configs import Tacotron2Config
|
||||||
from TTS.tts.tf.models.tacotron2 import Tacotron2
|
from TTS.tts.tf.models.tacotron2 import Tacotron2
|
||||||
from TTS.tts.tf.utils.tflite import convert_tacotron2_to_tflite, load_tflite_model
|
from TTS.tts.tf.utils.tflite import convert_tacotron2_to_tflite, load_tflite_model
|
||||||
from TTS.tts.configs import Tacotron2Config
|
|
||||||
|
|
||||||
|
|
||||||
tf.get_logger().setLevel("INFO")
|
tf.get_logger().setLevel("INFO")
|
||||||
|
|
||||||
|
|
|
@ -6,11 +6,10 @@ import torch
|
||||||
from torch import nn, optim
|
from torch import nn, optim
|
||||||
|
|
||||||
from tests import get_tests_input_path
|
from tests import get_tests_input_path
|
||||||
|
from TTS.tts.configs import TacotronConfig
|
||||||
from TTS.tts.layers.losses import L1LossMasked
|
from TTS.tts.layers.losses import L1LossMasked
|
||||||
from TTS.tts.models.tacotron import Tacotron
|
from TTS.tts.models.tacotron import Tacotron
|
||||||
from TTS.utils.audio import AudioProcessor
|
from TTS.utils.audio import AudioProcessor
|
||||||
from TTS.tts.configs import TacotronConfig
|
|
||||||
|
|
||||||
|
|
||||||
# pylint: disable=unused-variable
|
# pylint: disable=unused-variable
|
||||||
|
|
||||||
|
|
|
@ -4,9 +4,8 @@ import os
|
||||||
# pylint: disable=wildcard-import
|
# pylint: disable=wildcard-import
|
||||||
# pylint: disable=unused-import
|
# pylint: disable=unused-import
|
||||||
from tests import get_tests_input_path, get_tests_path
|
from tests import get_tests_input_path, get_tests_path
|
||||||
from TTS.tts.utils.text import *
|
|
||||||
from TTS.tts.configs import TacotronConfig
|
from TTS.tts.configs import TacotronConfig
|
||||||
|
from TTS.tts.utils.text import *
|
||||||
|
|
||||||
conf = TacotronConfig()
|
conf = TacotronConfig()
|
||||||
|
|
||||||
|
|
|
@ -2,8 +2,7 @@ import glob
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
from tests import get_tests_output_path, run_cli, get_device_id
|
from tests import get_device_id, get_tests_output_path, run_cli
|
||||||
|
|
||||||
from TTS.tts.configs import AlignTTSConfig
|
from TTS.tts.configs import AlignTTSConfig
|
||||||
|
|
||||||
config_path = os.path.join(get_tests_output_path(), "test_model_config.json")
|
config_path = os.path.join(get_tests_output_path(), "test_model_config.json")
|
||||||
|
@ -44,6 +43,8 @@ run_cli(command_train)
|
||||||
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
|
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
|
||||||
|
|
||||||
# restore the model and continue training for one more epoch
|
# restore the model and continue training for one more epoch
|
||||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_align_tts.py --continue_path {continue_path} "
|
command_train = (
|
||||||
|
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_align_tts.py --continue_path {continue_path} "
|
||||||
|
)
|
||||||
run_cli(command_train)
|
run_cli(command_train)
|
||||||
shutil.rmtree(continue_path)
|
shutil.rmtree(continue_path)
|
||||||
|
|
|
@ -2,8 +2,7 @@ import glob
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
from tests import get_tests_output_path, run_cli, get_device_id
|
from tests import get_device_id, get_tests_output_path, run_cli
|
||||||
|
|
||||||
from TTS.tts.configs import GlowTTSConfig
|
from TTS.tts.configs import GlowTTSConfig
|
||||||
|
|
||||||
config_path = os.path.join(get_tests_output_path(), "test_model_config.json")
|
config_path = os.path.join(get_tests_output_path(), "test_model_config.json")
|
||||||
|
@ -45,6 +44,8 @@ run_cli(command_train)
|
||||||
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
|
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
|
||||||
|
|
||||||
# restore the model and continue training for one more epoch
|
# restore the model and continue training for one more epoch
|
||||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_glow_tts.py --continue_path {continue_path} "
|
command_train = (
|
||||||
|
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_glow_tts.py --continue_path {continue_path} "
|
||||||
|
)
|
||||||
run_cli(command_train)
|
run_cli(command_train)
|
||||||
shutil.rmtree(continue_path)
|
shutil.rmtree(continue_path)
|
||||||
|
|
|
@ -2,8 +2,7 @@ import glob
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
from tests import get_tests_output_path, run_cli, get_device_id
|
from tests import get_device_id, get_tests_output_path, run_cli
|
||||||
|
|
||||||
from TTS.tts.configs import SpeedySpeechConfig
|
from TTS.tts.configs import SpeedySpeechConfig
|
||||||
|
|
||||||
config_path = os.path.join(get_tests_output_path(), "test_speedy_speech_config.json")
|
config_path = os.path.join(get_tests_output_path(), "test_speedy_speech_config.json")
|
||||||
|
@ -45,6 +44,8 @@ run_cli(command_train)
|
||||||
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
|
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
|
||||||
|
|
||||||
# restore the model and continue training for one more epoch
|
# restore the model and continue training for one more epoch
|
||||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_speedy_speech.py --continue_path {continue_path} "
|
command_train = (
|
||||||
|
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_speedy_speech.py --continue_path {continue_path} "
|
||||||
|
)
|
||||||
run_cli(command_train)
|
run_cli(command_train)
|
||||||
shutil.rmtree(continue_path)
|
shutil.rmtree(continue_path)
|
||||||
|
|
|
@ -2,8 +2,7 @@ import glob
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
from tests import get_tests_output_path, run_cli, get_device_id
|
from tests import get_device_id, get_tests_output_path, run_cli
|
||||||
|
|
||||||
from TTS.tts.configs import Tacotron2Config
|
from TTS.tts.configs import Tacotron2Config
|
||||||
|
|
||||||
config_path = os.path.join(get_tests_output_path(), "test_model_config.json")
|
config_path = os.path.join(get_tests_output_path(), "test_model_config.json")
|
||||||
|
@ -45,6 +44,8 @@ run_cli(command_train)
|
||||||
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
|
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
|
||||||
|
|
||||||
# restore the model and continue training for one more epoch
|
# restore the model and continue training for one more epoch
|
||||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_tacotron.py --continue_path {continue_path} "
|
command_train = (
|
||||||
|
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_tacotron.py --continue_path {continue_path} "
|
||||||
|
)
|
||||||
run_cli(command_train)
|
run_cli(command_train)
|
||||||
shutil.rmtree(continue_path)
|
shutil.rmtree(continue_path)
|
||||||
|
|
|
@ -2,8 +2,7 @@ import glob
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
from tests import get_tests_output_path, run_cli, get_device_id
|
from tests import get_device_id, get_tests_output_path, run_cli
|
||||||
|
|
||||||
from TTS.tts.configs import TacotronConfig
|
from TTS.tts.configs import TacotronConfig
|
||||||
|
|
||||||
config_path = os.path.join(get_tests_output_path(), "test_model_config.json")
|
config_path = os.path.join(get_tests_output_path(), "test_model_config.json")
|
||||||
|
@ -44,6 +43,8 @@ run_cli(command_train)
|
||||||
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
|
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
|
||||||
|
|
||||||
# restore the model and continue training for one more epoch
|
# restore the model and continue training for one more epoch
|
||||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_tacotron.py --continue_path {continue_path} "
|
command_train = (
|
||||||
|
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_tacotron.py --continue_path {continue_path} "
|
||||||
|
)
|
||||||
run_cli(command_train)
|
run_cli(command_train)
|
||||||
shutil.rmtree(continue_path)
|
shutil.rmtree(continue_path)
|
||||||
|
|
|
@ -2,8 +2,7 @@ import glob
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
from tests import get_tests_output_path, run_cli, get_device_id
|
from tests import get_device_id, get_tests_output_path, run_cli
|
||||||
|
|
||||||
from TTS.vocoder.configs import FullbandMelganConfig
|
from TTS.vocoder.configs import FullbandMelganConfig
|
||||||
|
|
||||||
config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
|
config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
|
||||||
|
@ -29,13 +28,17 @@ config.audio.trim_db = 60
|
||||||
config.save_json(config_path)
|
config.save_json(config_path)
|
||||||
|
|
||||||
# train the model for one epoch
|
# train the model for one epoch
|
||||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} "
|
command_train = (
|
||||||
|
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} "
|
||||||
|
)
|
||||||
run_cli(command_train)
|
run_cli(command_train)
|
||||||
|
|
||||||
# Find latest folder
|
# Find latest folder
|
||||||
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
|
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
|
||||||
|
|
||||||
# restore the model and continue training for one more epoch
|
# restore the model and continue training for one more epoch
|
||||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} "
|
command_train = (
|
||||||
|
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} "
|
||||||
|
)
|
||||||
run_cli(command_train)
|
run_cli(command_train)
|
||||||
shutil.rmtree(continue_path)
|
shutil.rmtree(continue_path)
|
||||||
|
|
|
@ -2,8 +2,7 @@ import glob
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
from tests import get_tests_output_path, run_cli, get_device_id
|
from tests import get_device_id, get_tests_output_path, run_cli
|
||||||
|
|
||||||
from TTS.vocoder.configs import HifiganConfig
|
from TTS.vocoder.configs import HifiganConfig
|
||||||
|
|
||||||
config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
|
config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
|
||||||
|
@ -30,13 +29,17 @@ config.audio.trim_db = 60
|
||||||
config.save_json(config_path)
|
config.save_json(config_path)
|
||||||
|
|
||||||
# train the model for one epoch
|
# train the model for one epoch
|
||||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} "
|
command_train = (
|
||||||
|
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} "
|
||||||
|
)
|
||||||
run_cli(command_train)
|
run_cli(command_train)
|
||||||
|
|
||||||
# Find latest folder
|
# Find latest folder
|
||||||
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
|
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
|
||||||
|
|
||||||
# restore the model and continue training for one more epoch
|
# restore the model and continue training for one more epoch
|
||||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} "
|
command_train = (
|
||||||
|
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} "
|
||||||
|
)
|
||||||
run_cli(command_train)
|
run_cli(command_train)
|
||||||
shutil.rmtree(continue_path)
|
shutil.rmtree(continue_path)
|
||||||
|
|
|
@ -2,10 +2,9 @@ import glob
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
from tests import get_tests_output_path, run_cli, get_device_id
|
from tests import get_device_id, get_tests_output_path, run_cli
|
||||||
from TTS.vocoder.configs import MelganConfig
|
from TTS.vocoder.configs import MelganConfig
|
||||||
|
|
||||||
|
|
||||||
config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
|
config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
|
||||||
output_path = os.path.join(get_tests_output_path(), "train_outputs")
|
output_path = os.path.join(get_tests_output_path(), "train_outputs")
|
||||||
|
|
||||||
|
@ -29,13 +28,17 @@ config.audio.trim_db = 60
|
||||||
config.save_json(config_path)
|
config.save_json(config_path)
|
||||||
|
|
||||||
# train the model for one epoch
|
# train the model for one epoch
|
||||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} "
|
command_train = (
|
||||||
|
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} "
|
||||||
|
)
|
||||||
run_cli(command_train)
|
run_cli(command_train)
|
||||||
|
|
||||||
# Find latest folder
|
# Find latest folder
|
||||||
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
|
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
|
||||||
|
|
||||||
# restore the model and continue training for one more epoch
|
# restore the model and continue training for one more epoch
|
||||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} "
|
command_train = (
|
||||||
|
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} "
|
||||||
|
)
|
||||||
run_cli(command_train)
|
run_cli(command_train)
|
||||||
shutil.rmtree(continue_path)
|
shutil.rmtree(continue_path)
|
||||||
|
|
|
@ -2,8 +2,7 @@ import glob
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
from tests import get_tests_output_path, run_cli, get_device_id
|
from tests import get_device_id, get_tests_output_path, run_cli
|
||||||
|
|
||||||
from TTS.vocoder.configs import MultibandMelganConfig
|
from TTS.vocoder.configs import MultibandMelganConfig
|
||||||
|
|
||||||
config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
|
config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
|
||||||
|
@ -29,13 +28,17 @@ config.audio.trim_db = 60
|
||||||
config.save_json(config_path)
|
config.save_json(config_path)
|
||||||
|
|
||||||
# train the model for one epoch
|
# train the model for one epoch
|
||||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} "
|
command_train = (
|
||||||
|
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} "
|
||||||
|
)
|
||||||
run_cli(command_train)
|
run_cli(command_train)
|
||||||
|
|
||||||
# Find latest folder
|
# Find latest folder
|
||||||
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
|
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
|
||||||
|
|
||||||
# restore the model and continue training for one more epoch
|
# restore the model and continue training for one more epoch
|
||||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} "
|
command_train = (
|
||||||
|
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} "
|
||||||
|
)
|
||||||
run_cli(command_train)
|
run_cli(command_train)
|
||||||
shutil.rmtree(continue_path)
|
shutil.rmtree(continue_path)
|
||||||
|
|
|
@ -2,8 +2,7 @@ import glob
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
from tests import get_tests_output_path, run_cli, get_device_id
|
from tests import get_device_id, get_tests_output_path, run_cli
|
||||||
|
|
||||||
from TTS.vocoder.configs import ParallelWaveganConfig
|
from TTS.vocoder.configs import ParallelWaveganConfig
|
||||||
|
|
||||||
config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
|
config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
|
||||||
|
@ -29,13 +28,17 @@ config.audio.trim_db = 60
|
||||||
config.save_json(config_path)
|
config.save_json(config_path)
|
||||||
|
|
||||||
# train the model for one epoch
|
# train the model for one epoch
|
||||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} "
|
command_train = (
|
||||||
|
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --config_path {config_path} "
|
||||||
|
)
|
||||||
run_cli(command_train)
|
run_cli(command_train)
|
||||||
|
|
||||||
# Find latest folder
|
# Find latest folder
|
||||||
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
|
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
|
||||||
|
|
||||||
# restore the model and continue training for one more epoch
|
# restore the model and continue training for one more epoch
|
||||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} "
|
command_train = (
|
||||||
|
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_gan.py --continue_path {continue_path} "
|
||||||
|
)
|
||||||
run_cli(command_train)
|
run_cli(command_train)
|
||||||
shutil.rmtree(continue_path)
|
shutil.rmtree(continue_path)
|
||||||
|
|
|
@ -5,9 +5,9 @@ from torch.utils.data import DataLoader
|
||||||
|
|
||||||
from tests import get_tests_output_path, get_tests_path
|
from tests import get_tests_output_path, get_tests_path
|
||||||
from TTS.utils.audio import AudioProcessor
|
from TTS.utils.audio import AudioProcessor
|
||||||
|
from TTS.vocoder.configs import BaseGANVocoderConfig
|
||||||
from TTS.vocoder.datasets.gan_dataset import GANDataset
|
from TTS.vocoder.datasets.gan_dataset import GANDataset
|
||||||
from TTS.vocoder.datasets.preprocess import load_wav_data
|
from TTS.vocoder.datasets.preprocess import load_wav_data
|
||||||
from TTS.vocoder.configs import BaseGANVocoderConfig
|
|
||||||
|
|
||||||
file_path = os.path.dirname(os.path.realpath(__file__))
|
file_path = os.path.dirname(os.path.realpath(__file__))
|
||||||
OUTPATH = os.path.join(get_tests_output_path(), "loader_tests/")
|
OUTPATH = os.path.join(get_tests_output_path(), "loader_tests/")
|
||||||
|
|
|
@ -3,8 +3,8 @@ import os
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from tests import get_tests_input_path, get_tests_output_path, get_tests_path
|
from tests import get_tests_input_path, get_tests_output_path, get_tests_path
|
||||||
from TTS.utils.audio import AudioProcessor
|
|
||||||
from TTS.config import BaseAudioConfig
|
from TTS.config import BaseAudioConfig
|
||||||
|
from TTS.utils.audio import AudioProcessor
|
||||||
from TTS.vocoder.layers.losses import MelganFeatureLoss, MultiScaleSTFTLoss, STFTLoss, TorchSTFT
|
from TTS.vocoder.layers.losses import MelganFeatureLoss, MultiScaleSTFTLoss, STFTLoss, TorchSTFT
|
||||||
|
|
||||||
TESTS_PATH = get_tests_path()
|
TESTS_PATH = get_tests_path()
|
||||||
|
|
|
@ -2,42 +2,40 @@ import glob
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
from tests import get_tests_output_path, run_cli, get_device_id
|
from tests import get_device_id, get_tests_output_path, run_cli
|
||||||
|
|
||||||
from TTS.vocoder.configs import WavegradConfig
|
from TTS.vocoder.configs import WavegradConfig
|
||||||
|
|
||||||
config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
|
config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
|
||||||
output_path = os.path.join(get_tests_output_path(), "train_outputs")
|
output_path = os.path.join(get_tests_output_path(), "train_outputs")
|
||||||
|
|
||||||
config = WavegradConfig(batch_size=8,
|
config = WavegradConfig(
|
||||||
eval_batch_size=8,
|
batch_size=8,
|
||||||
num_loader_workers=0,
|
eval_batch_size=8,
|
||||||
num_val_loader_workers=0,
|
num_loader_workers=0,
|
||||||
run_eval=True,
|
num_val_loader_workers=0,
|
||||||
test_delay_epochs=-1,
|
run_eval=True,
|
||||||
epochs=1,
|
test_delay_epochs=-1,
|
||||||
seq_len=8192,
|
epochs=1,
|
||||||
eval_split_size=1,
|
seq_len=8192,
|
||||||
print_step=1,
|
eval_split_size=1,
|
||||||
print_eval=True,
|
print_step=1,
|
||||||
data_path="tests/data/ljspeech",
|
print_eval=True,
|
||||||
output_path=output_path,
|
data_path="tests/data/ljspeech",
|
||||||
test_noise_schedule={
|
output_path=output_path,
|
||||||
"min_val": 1e-6,
|
test_noise_schedule={"min_val": 1e-6, "max_val": 1e-2, "num_steps": 2},
|
||||||
"max_val": 1e-2,
|
)
|
||||||
"num_steps": 2
|
|
||||||
})
|
|
||||||
config.audio.do_trim_silence = True
|
config.audio.do_trim_silence = True
|
||||||
config.audio.trim_db = 60
|
config.audio.trim_db = 60
|
||||||
config.save_json(config_path)
|
config.save_json(config_path)
|
||||||
|
|
||||||
# train the model for one epoch
|
# train the model for one epoch
|
||||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_wavegrad.py --config_path {config_path} "
|
command_train = (
|
||||||
|
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_wavegrad.py --config_path {config_path} "
|
||||||
|
)
|
||||||
run_cli(command_train)
|
run_cli(command_train)
|
||||||
|
|
||||||
# Find latest folder
|
# Find latest folder
|
||||||
continue_path = max(glob.glob(os.path.join(output_path, "*/")),
|
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
|
||||||
key=os.path.getmtime)
|
|
||||||
|
|
||||||
# restore the model and continue training for one more epoch
|
# restore the model and continue training for one more epoch
|
||||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_wavegrad.py --continue_path {continue_path} "
|
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_wavegrad.py --continue_path {continue_path} "
|
||||||
|
|
|
@ -2,8 +2,7 @@ import glob
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
from tests import get_tests_output_path, run_cli, get_device_id
|
from tests import get_device_id, get_tests_output_path, run_cli
|
||||||
|
|
||||||
from TTS.vocoder.configs import WavernnConfig
|
from TTS.vocoder.configs import WavernnConfig
|
||||||
|
|
||||||
config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
|
config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json")
|
||||||
|
@ -29,14 +28,17 @@ config.audio.trim_db = 60
|
||||||
config.save_json(config_path)
|
config.save_json(config_path)
|
||||||
|
|
||||||
# train the model for one epoch
|
# train the model for one epoch
|
||||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_wavernn.py --config_path {config_path} "
|
command_train = (
|
||||||
|
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_wavernn.py --config_path {config_path} "
|
||||||
|
)
|
||||||
run_cli(command_train)
|
run_cli(command_train)
|
||||||
|
|
||||||
# Find latest folder
|
# Find latest folder
|
||||||
continue_path = max(glob.glob(os.path.join(output_path, "*/")),
|
continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
|
||||||
key=os.path.getmtime)
|
|
||||||
|
|
||||||
# restore the model and continue training for one more epoch
|
# restore the model and continue training for one more epoch
|
||||||
command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_wavernn.py --continue_path {continue_path} "
|
command_train = (
|
||||||
|
f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder_wavernn.py --continue_path {continue_path} "
|
||||||
|
)
|
||||||
run_cli(command_train)
|
run_cli(command_train)
|
||||||
shutil.rmtree(continue_path)
|
shutil.rmtree(continue_path)
|
||||||
|
|
Loading…
Reference in New Issue