mirror of https://github.com/coqui-ai/TTS.git
add: Mozilla Commonvoice, VoxCeleb1+2, LibriTTS to Speaker Encoder Training
This commit is contained in:
parent
c909ca3855
commit
95d2906307
|
@ -51,36 +51,42 @@
|
|||
},
|
||||
"datasets":
|
||||
[
|
||||
{
|
||||
"name": "common_voice_wav",
|
||||
"path": "../../audio-datasets/en/MozillaCommonVoice",
|
||||
"meta_file_train": "train.tsv",
|
||||
"meta_file_val": "test.tsv"
|
||||
},
|
||||
{
|
||||
"name": "voxceleb1",
|
||||
"path": "../../audio-datasets/en/voxceleb1/",
|
||||
"meta_file_train": null,
|
||||
"meta_file_val": null
|
||||
},
|
||||
// {
|
||||
// "name": "voxceleb2",
|
||||
// "path": "../../audio-datasets/en/voxceleb2/",
|
||||
// "meta_file_train": null,
|
||||
// "meta_file_val": null
|
||||
// },
|
||||
// {
|
||||
// "name": "vctk",
|
||||
// "path": "../../audio-datasets/en/VCTK-Corpus/",
|
||||
// "meta_file_train": null,
|
||||
// "meta_file_val": null
|
||||
// },
|
||||
// {
|
||||
// "name": "libri_tts",
|
||||
// "path": "../../audio-datasets/en/LibriTTS/train-clean-100",
|
||||
// "meta_file_train": null,
|
||||
// "meta_file_val": null
|
||||
// },
|
||||
// {
|
||||
// "name": "libri_tts",
|
||||
// "path": "../../audio-datasets/en/LibriTTS/train-clean-360",
|
||||
// "meta_file_train": null,
|
||||
// "meta_file_val": null
|
||||
// },
|
||||
{
|
||||
"name": "voxceleb2",
|
||||
"path": "../../audio-datasets/en/voxceleb2/",
|
||||
"meta_file_train": null,
|
||||
"meta_file_val": null
|
||||
},
|
||||
{
|
||||
"name": "vctk",
|
||||
"path": "../../audio-datasets/en/VCTK-Corpus/",
|
||||
"meta_file_train": null,
|
||||
"meta_file_val": null
|
||||
},
|
||||
{
|
||||
"name": "libri_tts",
|
||||
"path": "../../audio-datasets/en/LibriTTS/train-clean-100",
|
||||
"meta_file_train": null,
|
||||
"meta_file_val": null
|
||||
},
|
||||
{
|
||||
"name": "libri_tts",
|
||||
"path": "../../audio-datasets/en/LibriTTS/train-clean-360",
|
||||
"meta_file_train": null,
|
||||
"meta_file_val": null
|
||||
},
|
||||
{
|
||||
"name": "libri_tts",
|
||||
"path": "../../audio-datasets/en/LibriTTS/train-other-500",
|
||||
|
|
|
@ -161,7 +161,7 @@ def nancy(root_path, meta_file):
|
|||
return items
|
||||
|
||||
|
||||
def common_voice(root_path, meta_file):
|
||||
def common_voice_wav(root_path, meta_file):
|
||||
"""Normalize the common voice meta data file to TTS format."""
|
||||
txt_file = os.path.join(root_path, meta_file)
|
||||
items = []
|
||||
|
@ -172,8 +172,8 @@ def common_voice(root_path, meta_file):
|
|||
cols = line.split("\t")
|
||||
text = cols[2]
|
||||
speaker_name = cols[0]
|
||||
wav_file = os.path.join(root_path, "clips", cols[1] + ".wav")
|
||||
items.append([text, wav_file, speaker_name])
|
||||
wav_file = os.path.join(root_path, "clips", cols[1].replace(".mp3", ".wav"))
|
||||
items.append([text, wav_file, 'MCV_' + speaker_name])
|
||||
return items
|
||||
|
||||
|
||||
|
@ -251,9 +251,9 @@ def vctk(root_path, meta_files=None, wavs_path='wav48'):
|
|||
continue
|
||||
with open(meta_file) as file_text:
|
||||
text = file_text.readlines()[0]
|
||||
wav_file = os.path.join(root_path, wavs_path, 'VCTK_' + speaker_id,
|
||||
wav_file = os.path.join(root_path, wavs_path, speaker_id,
|
||||
file_id + '.wav')
|
||||
items.append([text, wav_file, speaker_id])
|
||||
items.append([text, wav_file, 'VCTK_' + speaker_id])
|
||||
|
||||
return items
|
||||
|
||||
|
@ -298,3 +298,5 @@ def _voxcel_x(root_path, voxcel_idx):
|
|||
|
||||
with open(str(cache_to), 'r') as f:
|
||||
return [x.strip().split('|') for x in f.readlines()]
|
||||
|
||||
|
||||
|
|
|
@ -50,7 +50,7 @@ def save_best_model(target_loss, best_loss, model, optimizer, current_step, epoc
|
|||
if target_loss < best_loss:
|
||||
file_name = 'best_model.pth.tar'
|
||||
checkpoint_path = os.path.join(output_folder, file_name)
|
||||
print(" > BEST MODEL : {}".format(checkpoint_path))
|
||||
print(" >> BEST MODEL : {}".format(checkpoint_path))
|
||||
save_model(model, optimizer, current_step, epoch, r, checkpoint_path, model_loss=target_loss, **kwargs)
|
||||
best_loss = target_loss
|
||||
return best_loss
|
||||
|
|
Loading…
Reference in New Issue