mirror of https://github.com/coqui-ai/TTS.git
Merge branch 'bokeh-interactive-1' into dev
This commit is contained in:
commit
574de86b9b
|
@ -1,8 +1,12 @@
|
||||||
### Speaker embedding (Experimental)
|
### Speaker embedding (Experimental)
|
||||||
|
|
||||||
This is an implementation of https://arxiv.org/abs/1710.10467. This model can be used for voice and speaker embedding. So you can generate d-vectors for multi-speaker TTS or prune bad samples from your TTS dataset. Below is an example showing embedding results of various speakers. You can generate the same plot with the provided notebook.
|
This is an implementation of https://arxiv.org/abs/1710.10467. This model can be used for voice and speaker embedding.
|
||||||
|
|
||||||

|
With the code here you can generate d-vectors for both multi-speaker and single-speaker TTS datasets, then visualise and explore them along with the associated audio files in an interactive chart.
|
||||||
|
|
||||||
|
Below is an example showing embedding results of various speakers. You can generate the same plot with the provided notebook as demonstrated in [this video](https://youtu.be/KW3oO7JVa7Q).
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
Download a pretrained model from [Released Models](https://github.com/mozilla/TTS/wiki/Released-Models) page.
|
Download a pretrained model from [Released Models](https://github.com/mozilla/TTS/wiki/Released-Models) page.
|
||||||
|
|
||||||
|
|
|
@ -14,27 +14,52 @@ parser = argparse.ArgumentParser(
|
||||||
description="Compute embedding vectors for each wav file in a dataset. "
|
description="Compute embedding vectors for each wav file in a dataset. "
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"model_path", type=str, help="Path to model outputs (checkpoint, tensorboard etc.)."
|
'data_path',
|
||||||
)
|
type=str,
|
||||||
|
help='Data path for wav files - directory or CSV file')
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"config_path", type=str, help="Path to config file for training.",
|
"config_path", type=str, help="Path to config file for training.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"data_path", type=str, help="Defines the data path. It overwrites config.json."
|
"data_path", type=str, help="Defines the data path. It overwrites config.json."
|
||||||
)
|
)
|
||||||
parser.add_argument("output_path", type=str, help="path for training outputs.")
|
parser.add_argument(
|
||||||
parser.add_argument("--use_cuda", type=bool, help="flag to set cuda.", default=False)
|
'--separator', type=str, help='Separator used in file if CSV is passed for data_path', default='|'
|
||||||
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
c = load_config(args.config_path)
|
c = load_config(args.config_path)
|
||||||
ap = AudioProcessor(**c["audio"])
|
ap = AudioProcessor(**c["audio"])
|
||||||
|
|
||||||
wav_files = glob.glob(args.data_path + "/**/*.wav", recursive=True)
|
data_path = args.data_path
|
||||||
output_files = [
|
split_ext = os.path.splitext(data_path)
|
||||||
wav_file.replace(args.data_path, args.output_path).replace(".wav", ".npy")
|
sep = args.separator
|
||||||
for wav_file in wav_files
|
|
||||||
]
|
if len(split_ext) > 0 and split_ext[1].lower() == '.csv':
|
||||||
|
# Parse CSV
|
||||||
|
print(f'CSV file: {data_path}')
|
||||||
|
with open(data_path) as f:
|
||||||
|
wav_path = os.path.join(os.path.dirname(data_path), 'wavs')
|
||||||
|
wav_files = []
|
||||||
|
print(f'Separator is: {sep}')
|
||||||
|
for line in f:
|
||||||
|
components = line.split(sep)
|
||||||
|
if len(components) != 2:
|
||||||
|
print("Invalid line")
|
||||||
|
continue
|
||||||
|
wav_file = os.path.join(wav_path, components[0] + '.wav')
|
||||||
|
#print(f'wav_file: {wav_file}')
|
||||||
|
if os.path.exists(wav_file):
|
||||||
|
wav_files.append(wav_file)
|
||||||
|
print(f'Count of wavs imported: {len(wav_files)}')
|
||||||
|
else:
|
||||||
|
# Parse all wav files in data_path
|
||||||
|
wav_path = data_path
|
||||||
|
wav_files = glob.glob(data_path + '/**/*.wav', recursive=True)
|
||||||
|
|
||||||
|
output_files = [wav_file.replace(wav_path, args.output_path).replace(
|
||||||
|
'.wav', '.npy') for wav_file in wav_files]
|
||||||
|
|
||||||
for output_file in output_files:
|
for output_file in output_files:
|
||||||
os.makedirs(os.path.dirname(output_file), exist_ok=True)
|
os.makedirs(os.path.dirname(output_file), exist_ok=True)
|
||||||
|
|
File diff suppressed because one or more lines are too long
Binary file not shown.
Before Width: | Height: | Size: 23 KiB After Width: | Height: | Size: 24 KiB |
Loading…
Reference in New Issue