{ "datasets": [ { "name": "ljspeech", "path": "DEFINE THIS", "meta_file_train": "metadata.csv", "meta_file_val": null } ], "audio": { "fft_size": 1024, "win_length": 1024, "hop_length": 256, "frame_length_ms": null, "frame_shift_ms": null, "sample_rate": 22050, "preemphasis": 0.0, "ref_level_db": 20, "do_trim_silence": true, "trim_db": 60, "power": 1.5, "griffin_lim_iters": 60, "num_mels": 80, "mel_fmin": 50.0, "mel_fmax": 7600.0, "spec_gain": 1, "signal_norm": true, "min_level_db": -100, "symmetric_norm": true, "max_norm": 4.0, "clip_norm": true, "stats_path": "scale_stats.npy" }, "gst":{ "gst_embedding_dim": 256, "gst_num_heads": 4, "gst_num_style_tokens": 10 }, "distributed_backend": "gloo", "distributed_url": "tcp:\/\/localhost:54321", "model": "Tacotron2", "run_name": "ljspeech-ddc", "run_description": "tacotron2 with double decoder consistency.", "batch_size": 64, "eval_batch_size": 16, "mixed_precision": false, "loss_masking": true, "decoder_loss_alpha": 0.25, "postnet_loss_alpha": 0.25, "postnet_diff_spec_alpha": 0.25, "decoder_diff_spec_alpha": 0.25, "decoder_ssim_alpha": 0.25, "postnet_ssim_alpha": 0.25, "ga_alpha": 5.0, "stopnet_pos_weight": 15.0, "run_eval": true, "test_delay_epochs": 10, "test_sentences_file": null, "max_decoder_steps": 50, "noam_schedule": true, "grad_clip": 0.05, "epochs": 1000, "lr": 0.001, "wd": 1e-06, "warmup_steps": 4000, "memory_size": -1, "prenet_type": "original", "prenet_dropout": true, "attention_type": "original", "location_attn": true, "double_decoder_consistency": true, "ddc_r": 6, "attention_norm": "sigmoid", "r": 6, "gradual_training": [[0, 6, 64], [10000, 4, 32], [50000, 3, 32], [100000, 2, 32]], "stopnet": true, "separate_stopnet": true, "print_step": 25, "tb_plot_step": 100, "print_eval": false, "save_step": 10000, "checkpoint": true, "text_cleaner": "phoneme_cleaners", "num_loader_workers": 4, "num_val_loader_workers": 4, "batch_group_size": 4, "min_seq_len": 6, "max_seq_len": 180, "compute_input_seq_cache": true, "output_path": "DEFINE THIS", "phoneme_cache_path": "DEFINE THIS", "use_phonemes": false, "phoneme_language": "en-us" }