mirror of https://github.com/coqui-ai/TTS.git
add: Mozilla Commonvoice, VoxCeleb1+2, LibriTTS to Speaker Encoder Training
This commit is contained in:
parent
c909ca3855
commit
95d2906307
|
@ -51,36 +51,42 @@
|
||||||
},
|
},
|
||||||
"datasets":
|
"datasets":
|
||||||
[
|
[
|
||||||
|
{
|
||||||
|
"name": "common_voice_wav",
|
||||||
|
"path": "../../audio-datasets/en/MozillaCommonVoice",
|
||||||
|
"meta_file_train": "train.tsv",
|
||||||
|
"meta_file_val": "test.tsv"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "voxceleb1",
|
"name": "voxceleb1",
|
||||||
"path": "../../audio-datasets/en/voxceleb1/",
|
"path": "../../audio-datasets/en/voxceleb1/",
|
||||||
"meta_file_train": null,
|
"meta_file_train": null,
|
||||||
"meta_file_val": null
|
"meta_file_val": null
|
||||||
},
|
},
|
||||||
// {
|
{
|
||||||
// "name": "voxceleb2",
|
"name": "voxceleb2",
|
||||||
// "path": "../../audio-datasets/en/voxceleb2/",
|
"path": "../../audio-datasets/en/voxceleb2/",
|
||||||
// "meta_file_train": null,
|
"meta_file_train": null,
|
||||||
// "meta_file_val": null
|
"meta_file_val": null
|
||||||
// },
|
},
|
||||||
// {
|
{
|
||||||
// "name": "vctk",
|
"name": "vctk",
|
||||||
// "path": "../../audio-datasets/en/VCTK-Corpus/",
|
"path": "../../audio-datasets/en/VCTK-Corpus/",
|
||||||
// "meta_file_train": null,
|
"meta_file_train": null,
|
||||||
// "meta_file_val": null
|
"meta_file_val": null
|
||||||
// },
|
},
|
||||||
// {
|
{
|
||||||
// "name": "libri_tts",
|
"name": "libri_tts",
|
||||||
// "path": "../../audio-datasets/en/LibriTTS/train-clean-100",
|
"path": "../../audio-datasets/en/LibriTTS/train-clean-100",
|
||||||
// "meta_file_train": null,
|
"meta_file_train": null,
|
||||||
// "meta_file_val": null
|
"meta_file_val": null
|
||||||
// },
|
},
|
||||||
// {
|
{
|
||||||
// "name": "libri_tts",
|
"name": "libri_tts",
|
||||||
// "path": "../../audio-datasets/en/LibriTTS/train-clean-360",
|
"path": "../../audio-datasets/en/LibriTTS/train-clean-360",
|
||||||
// "meta_file_train": null,
|
"meta_file_train": null,
|
||||||
// "meta_file_val": null
|
"meta_file_val": null
|
||||||
// },
|
},
|
||||||
{
|
{
|
||||||
"name": "libri_tts",
|
"name": "libri_tts",
|
||||||
"path": "../../audio-datasets/en/LibriTTS/train-other-500",
|
"path": "../../audio-datasets/en/LibriTTS/train-other-500",
|
||||||
|
|
|
@ -161,7 +161,7 @@ def nancy(root_path, meta_file):
|
||||||
return items
|
return items
|
||||||
|
|
||||||
|
|
||||||
def common_voice(root_path, meta_file):
|
def common_voice_wav(root_path, meta_file):
|
||||||
"""Normalize the common voice meta data file to TTS format."""
|
"""Normalize the common voice meta data file to TTS format."""
|
||||||
txt_file = os.path.join(root_path, meta_file)
|
txt_file = os.path.join(root_path, meta_file)
|
||||||
items = []
|
items = []
|
||||||
|
@ -172,8 +172,8 @@ def common_voice(root_path, meta_file):
|
||||||
cols = line.split("\t")
|
cols = line.split("\t")
|
||||||
text = cols[2]
|
text = cols[2]
|
||||||
speaker_name = cols[0]
|
speaker_name = cols[0]
|
||||||
wav_file = os.path.join(root_path, "clips", cols[1] + ".wav")
|
wav_file = os.path.join(root_path, "clips", cols[1].replace(".mp3", ".wav"))
|
||||||
items.append([text, wav_file, speaker_name])
|
items.append([text, wav_file, 'MCV_' + speaker_name])
|
||||||
return items
|
return items
|
||||||
|
|
||||||
|
|
||||||
|
@ -251,9 +251,9 @@ def vctk(root_path, meta_files=None, wavs_path='wav48'):
|
||||||
continue
|
continue
|
||||||
with open(meta_file) as file_text:
|
with open(meta_file) as file_text:
|
||||||
text = file_text.readlines()[0]
|
text = file_text.readlines()[0]
|
||||||
wav_file = os.path.join(root_path, wavs_path, 'VCTK_' + speaker_id,
|
wav_file = os.path.join(root_path, wavs_path, speaker_id,
|
||||||
file_id + '.wav')
|
file_id + '.wav')
|
||||||
items.append([text, wav_file, speaker_id])
|
items.append([text, wav_file, 'VCTK_' + speaker_id])
|
||||||
|
|
||||||
return items
|
return items
|
||||||
|
|
||||||
|
@ -298,3 +298,5 @@ def _voxcel_x(root_path, voxcel_idx):
|
||||||
|
|
||||||
with open(str(cache_to), 'r') as f:
|
with open(str(cache_to), 'r') as f:
|
||||||
return [x.strip().split('|') for x in f.readlines()]
|
return [x.strip().split('|') for x in f.readlines()]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -50,7 +50,7 @@ def save_best_model(target_loss, best_loss, model, optimizer, current_step, epoc
|
||||||
if target_loss < best_loss:
|
if target_loss < best_loss:
|
||||||
file_name = 'best_model.pth.tar'
|
file_name = 'best_model.pth.tar'
|
||||||
checkpoint_path = os.path.join(output_folder, file_name)
|
checkpoint_path = os.path.join(output_folder, file_name)
|
||||||
print(" > BEST MODEL : {}".format(checkpoint_path))
|
print(" >> BEST MODEL : {}".format(checkpoint_path))
|
||||||
save_model(model, optimizer, current_step, epoch, r, checkpoint_path, model_loss=target_loss, **kwargs)
|
save_model(model, optimizer, current_step, epoch, r, checkpoint_path, model_loss=target_loss, **kwargs)
|
||||||
best_loss = target_loss
|
best_loss = target_loss
|
||||||
return best_loss
|
return best_loss
|
||||||
|
|
Loading…
Reference in New Issue