mirror of https://github.com/coqui-ai/TTS.git
cleanup
This commit is contained in:
parent
a273b1a210
commit
6b0621c794
|
@ -42,7 +42,8 @@ def setup_loader(ap, is_val=False, verbose=False):
|
||||||
dataset = MyDataset(ap,
|
dataset = MyDataset(ap,
|
||||||
meta_data_eval if is_val else meta_data_train,
|
meta_data_eval if is_val else meta_data_train,
|
||||||
voice_len=1.6,
|
voice_len=1.6,
|
||||||
num_utter_per_speaker=10,
|
num_utter_per_speaker=c.num_utters_per_speaker,
|
||||||
|
num_speakers_in_batch=c.num_speakers_in_batch,
|
||||||
skip_speakers=False,
|
skip_speakers=False,
|
||||||
storage_size=c.storage["storage_size"],
|
storage_size=c.storage["storage_size"],
|
||||||
sample_from_storage_p=c.storage["sample_from_storage_p"],
|
sample_from_storage_p=c.storage["sample_from_storage_p"],
|
||||||
|
@ -98,11 +99,10 @@ def train(model, criterion, optimizer, scheduler, ap, global_step):
|
||||||
epoch_time += step_time
|
epoch_time += step_time
|
||||||
|
|
||||||
# Averaged Loss and Averaged Loader Time
|
# Averaged Loss and Averaged Loader Time
|
||||||
dataset_number_prefetched = 2 * c.num_loader_workers # this is hardcoded in pytorch
|
|
||||||
avg_loss = 0.01 * loss.item() \
|
avg_loss = 0.01 * loss.item() \
|
||||||
+ 0.99 * avg_loss if avg_loss != 0 else loss.item()
|
+ 0.99 * avg_loss if avg_loss != 0 else loss.item()
|
||||||
avg_loader_time = 1/dataset_number_prefetched * loader_time\
|
avg_loader_time = 1/c.num_loader_workers * loader_time + \
|
||||||
+ (dataset_number_prefetched-1) / dataset_number_prefetched * avg_loader_time if avg_loader_time != 0 else loader_time
|
(c.num_loader_workers-1) / c.num_loader_workers * avg_loader_time if avg_loader_time != 0 else loader_time
|
||||||
current_lr = optimizer.param_groups[0]['lr']
|
current_lr = optimizer.param_groups[0]['lr']
|
||||||
|
|
||||||
if global_step % c.steps_plot_stats == 0:
|
if global_step % c.steps_plot_stats == 0:
|
||||||
|
|
|
@ -36,7 +36,8 @@
|
||||||
"tb_model_param_stats": false, // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging.
|
"tb_model_param_stats": false, // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging.
|
||||||
"steps_plot_stats": 10, // number of steps to plot embeddings.
|
"steps_plot_stats": 10, // number of steps to plot embeddings.
|
||||||
"num_speakers_in_batch": 64, // Batch size for training. Lower values than 32 might cause hard to learn attention. It is overwritten by 'gradual_training'.
|
"num_speakers_in_batch": 64, // Batch size for training. Lower values than 32 might cause hard to learn attention. It is overwritten by 'gradual_training'.
|
||||||
"num_loader_workers": 4, // number of training data loader processes. Don't set it too big. 4-8 are good values.
|
"num_utters_per_speaker": 10, //
|
||||||
|
"num_loader_workers": 8, // number of training data loader processes. Don't set it too big. 4-8 are good values.
|
||||||
"wd": 0.000001, // Weight decay weight.
|
"wd": 0.000001, // Weight decay weight.
|
||||||
"checkpoint": true, // If true, it saves checkpoints per "save_step"
|
"checkpoint": true, // If true, it saves checkpoints per "save_step"
|
||||||
"save_step": 1000, // Number of training steps expected to save traning stats and checkpoints.
|
"save_step": 1000, // Number of training steps expected to save traning stats and checkpoints.
|
||||||
|
@ -50,8 +51,8 @@
|
||||||
"use_lstm_with_projection": true
|
"use_lstm_with_projection": true
|
||||||
},
|
},
|
||||||
"storage": {
|
"storage": {
|
||||||
"sample_from_storage_p": 0.42, // the probability with which we'll sample from the DataSet in-memory storage
|
"sample_from_storage_p": 0.66, // the probability with which we'll sample from the DataSet in-memory storage
|
||||||
"storage_size": 5, // the size of the in-memory storage with respect to a single batch
|
"storage_size": 15, // the size of the in-memory storage with respect to a single batch
|
||||||
"additive_noise": 1e-5 // add very small gaussian noise to the data in order to increase robustness
|
"additive_noise": 1e-5 // add very small gaussian noise to the data in order to increase robustness
|
||||||
},
|
},
|
||||||
"datasets":
|
"datasets":
|
||||||
|
|
|
@ -33,8 +33,10 @@ class MyDataset(Dataset):
|
||||||
self.additive_noise = float(additive_noise)
|
self.additive_noise = float(additive_noise)
|
||||||
if self.verbose:
|
if self.verbose:
|
||||||
print("\n > DataLoader initialization")
|
print("\n > DataLoader initialization")
|
||||||
|
print(f" | > Speakers per Batch: {num_speakers_in_batch}")
|
||||||
print(f" | > Storage Size: {self.storage.maxsize} speakers, each with {num_utter_per_speaker} utters")
|
print(f" | > Storage Size: {self.storage.maxsize} speakers, each with {num_utter_per_speaker} utters")
|
||||||
print(f" | > Sample_from_storage_p : {self.sample_from_storage_p}")
|
print(f" | > Sample_from_storage_p : {self.sample_from_storage_p}")
|
||||||
|
print(f" | > Noise added : {self.additive_noise}")
|
||||||
print(f" | > Number of instances : {len(self.items)}")
|
print(f" | > Number of instances : {len(self.items)}")
|
||||||
print(f" | > Sequence length: {self.seq_len}")
|
print(f" | > Sequence length: {self.seq_len}")
|
||||||
print(f" | > Num speakers: {len(self.speakers)}")
|
print(f" | > Num speakers: {len(self.speakers)}")
|
||||||
|
|
|
@ -7,11 +7,9 @@ from TTS.utils.generic_utils import check_argument
|
||||||
|
|
||||||
|
|
||||||
def split_dataset(items):
|
def split_dataset(items):
|
||||||
is_multi_speaker = False
|
|
||||||
speakers = [item[-1] for item in items]
|
speakers = [item[-1] for item in items]
|
||||||
is_multi_speaker = len(set(speakers)) > 1
|
is_multi_speaker = len(set(speakers)) > 1
|
||||||
eval_split_size = 500 if len(items) * 0.01 > 500 else int(
|
eval_split_size = min(500, int(len(items) * 0.01))
|
||||||
len(items) * 0.01)
|
|
||||||
assert eval_split_size > 0, " [!] You do not have enough samples to train. You need at least 100 samples."
|
assert eval_split_size > 0, " [!] You do not have enough samples to train. You need at least 100 samples."
|
||||||
np.random.seed(0)
|
np.random.seed(0)
|
||||||
np.random.shuffle(items)
|
np.random.shuffle(items)
|
||||||
|
@ -142,6 +140,11 @@ def check_config(c):
|
||||||
check_argument('do_trim_silence', c['audio'], restricted=True, val_type=bool)
|
check_argument('do_trim_silence', c['audio'], restricted=True, val_type=bool)
|
||||||
check_argument('trim_db', c['audio'], restricted=True, val_type=int)
|
check_argument('trim_db', c['audio'], restricted=True, val_type=int)
|
||||||
|
|
||||||
|
# storage parameters
|
||||||
|
check_argument('sample_from_storage_p', c['storage'], restricted=True, val_type=float, min_val=0.0, max_val=1.0)
|
||||||
|
check_argument('storage_size', c['storage'], restricted=True, val_type=int, min_val=1, max_val=100)
|
||||||
|
check_argument('additive_noise', c['storage'], restricted=True, val_type=float, min_val=0.0, max_val=1.0)
|
||||||
|
|
||||||
# training parameters
|
# training parameters
|
||||||
check_argument('batch_size', c, restricted=True, val_type=int, min_val=1)
|
check_argument('batch_size', c, restricted=True, val_type=int, min_val=1)
|
||||||
check_argument('eval_batch_size', c, restricted=True, val_type=int, min_val=1)
|
check_argument('eval_batch_size', c, restricted=True, val_type=int, min_val=1)
|
||||||
|
|
Loading…
Reference in New Issue