mirror of https://github.com/coqui-ai/TTS.git
new arguments to synthesize.py for loading speaker encoder and speaker wavs
This commit is contained in:
parent
dfa415a8b8
commit
179722e3a7
|
@ -100,6 +100,13 @@ def main():
|
||||||
default=None,
|
default=None,
|
||||||
)
|
)
|
||||||
parser.add_argument("--vocoder_config_path", type=str, help="Path to vocoder model config file.", default=None)
|
parser.add_argument("--vocoder_config_path", type=str, help="Path to vocoder model config file.", default=None)
|
||||||
|
parser.add_argument(
|
||||||
|
"--encoder_path",
|
||||||
|
type=str,
|
||||||
|
help="Path to speaker encoder model file.",
|
||||||
|
default=None,
|
||||||
|
)
|
||||||
|
parser.add_argument("--encoder_config_path", type=str, help="Path to speaker encoder config file.", default=None)
|
||||||
|
|
||||||
# args for multi-speaker synthesis
|
# args for multi-speaker synthesis
|
||||||
parser.add_argument("--speakers_file_path", type=str, help="JSON file for multi-speaker model.", default=None)
|
parser.add_argument("--speakers_file_path", type=str, help="JSON file for multi-speaker model.", default=None)
|
||||||
|
@ -109,6 +116,12 @@ def main():
|
||||||
help="if the tts model is trained with x-vectors, then speaker_idx is a file present in speakers.json else speaker_idx is the speaker id corresponding to a speaker in the speaker embedding layer.",
|
help="if the tts model is trained with x-vectors, then speaker_idx is a file present in speakers.json else speaker_idx is the speaker id corresponding to a speaker in the speaker embedding layer.",
|
||||||
default=None,
|
default=None,
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--speaker_wav",
|
||||||
|
nargs="+",
|
||||||
|
help="wav file(s) to condition a multi-speaker model. You can give multiple file paths. The x_vectors is computed as their average.",
|
||||||
|
default=None,
|
||||||
|
)
|
||||||
parser.add_argument("--gst_style", help="Wav path file for GST stylereference.", default=None)
|
parser.add_argument("--gst_style", help="Wav path file for GST stylereference.", default=None)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--list_speaker_idxs",
|
"--list_speaker_idxs",
|
||||||
|
@ -139,6 +152,8 @@ def main():
|
||||||
speakers_file_path = None
|
speakers_file_path = None
|
||||||
vocoder_path = None
|
vocoder_path = None
|
||||||
vocoder_config_path = None
|
vocoder_config_path = None
|
||||||
|
encoder_path = None
|
||||||
|
encoder_config_path = None
|
||||||
|
|
||||||
# CASE1: list pre-trained TTS models
|
# CASE1: list pre-trained TTS models
|
||||||
if args.list_models:
|
if args.list_models:
|
||||||
|
@ -163,9 +178,14 @@ def main():
|
||||||
vocoder_path = args.vocoder_path
|
vocoder_path = args.vocoder_path
|
||||||
vocoder_config_path = args.vocoder_config_path
|
vocoder_config_path = args.vocoder_config_path
|
||||||
|
|
||||||
|
if args.encoder_path is not None:
|
||||||
|
encoder_path = args.encoder_path
|
||||||
|
encoder_config_path = args.encoder_config_path
|
||||||
|
|
||||||
# load models
|
# load models
|
||||||
synthesizer = Synthesizer(
|
synthesizer = Synthesizer(
|
||||||
model_path, config_path, speakers_file_path, vocoder_path, vocoder_config_path, args.use_cuda
|
model_path, config_path, speakers_file_path, vocoder_path, vocoder_config_path, encoder_path,
|
||||||
|
encoder_config_path, args.use_cuda
|
||||||
)
|
)
|
||||||
|
|
||||||
# query speaker ids of a multi-speaker model.
|
# query speaker ids of a multi-speaker model.
|
||||||
|
@ -180,7 +200,7 @@ def main():
|
||||||
print(" > Text: {}".format(args.text))
|
print(" > Text: {}".format(args.text))
|
||||||
|
|
||||||
# kick it
|
# kick it
|
||||||
wav = synthesizer.tts(args.text, args.speaker_idx)
|
wav = synthesizer.tts(args.text, args.speaker_idx, args.speaker_wav)
|
||||||
|
|
||||||
# save the results
|
# save the results
|
||||||
print(" > Saving output to {}".format(args.out_path))
|
print(" > Saving output to {}".format(args.out_path))
|
||||||
|
|
Loading…
Reference in New Issue