fix: support multi-speaker models in tts_with_vc/tts_with_vc_to_file

This commit is contained in:
Enno Hermann 2023-11-20 15:56:02 +01:00
parent 8a7cc18494
commit cde319874e
1 changed files with 15 additions and 4 deletions

View File

@ -440,7 +440,7 @@ class TTS(nn.Module):
save_wav(wav=wav, path=file_path, sample_rate=self.voice_converter.vc_config.audio.output_sample_rate) save_wav(wav=wav, path=file_path, sample_rate=self.voice_converter.vc_config.audio.output_sample_rate)
return file_path return file_path
def tts_with_vc(self, text: str, language: str = None, speaker_wav: str = None): def tts_with_vc(self, text: str, language: str = None, speaker_wav: str = None, speaker: str = None):
"""Convert text to speech with voice conversion. """Convert text to speech with voice conversion.
It combines tts with voice conversion to fake voice cloning. It combines tts with voice conversion to fake voice cloning.
@ -457,17 +457,25 @@ class TTS(nn.Module):
speaker_wav (str, optional): speaker_wav (str, optional):
Path to a reference wav file to use for voice cloning with supporting models like YourTTS. Path to a reference wav file to use for voice cloning with supporting models like YourTTS.
Defaults to None. Defaults to None.
speaker (str, optional):
Speaker name for multi-speaker. You can check whether loaded model is multi-speaker by
`tts.is_multi_speaker` and list speakers by `tts.speakers`. Defaults to None.
""" """
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
# Lazy code... save it to a temp file to resample it while reading it for VC # Lazy code... save it to a temp file to resample it while reading it for VC
self.tts_to_file(text=text, speaker=None, language=language, file_path=fp.name) self.tts_to_file(text=text, speaker=speaker, language=language, file_path=fp.name)
if self.voice_converter is None: if self.voice_converter is None:
self.load_vc_model_by_name("voice_conversion_models/multilingual/vctk/freevc24") self.load_vc_model_by_name("voice_conversion_models/multilingual/vctk/freevc24")
wav = self.voice_converter.voice_conversion(source_wav=fp.name, target_wav=speaker_wav) wav = self.voice_converter.voice_conversion(source_wav=fp.name, target_wav=speaker_wav)
return wav return wav
def tts_with_vc_to_file( def tts_with_vc_to_file(
self, text: str, language: str = None, speaker_wav: str = None, file_path: str = "output.wav" self,
text: str,
language: str = None,
speaker_wav: str = None,
file_path: str = "output.wav",
speaker: str = None,
): ):
"""Convert text to speech with voice conversion and save to file. """Convert text to speech with voice conversion and save to file.
@ -484,6 +492,9 @@ class TTS(nn.Module):
Defaults to None. Defaults to None.
file_path (str, optional): file_path (str, optional):
Output file path. Defaults to "output.wav". Output file path. Defaults to "output.wav".
speaker (str, optional):
Speaker name for multi-speaker. You can check whether loaded model is multi-speaker by
`tts.is_multi_speaker` and list speakers by `tts.speakers`. Defaults to None.
""" """
wav = self.tts_with_vc(text=text, language=language, speaker_wav=speaker_wav) wav = self.tts_with_vc(text=text, language=language, speaker_wav=speaker_wav, speaker=speaker)
save_wav(wav=wav, path=file_path, sample_rate=self.voice_converter.vc_config.audio.output_sample_rate) save_wav(wav=wav, path=file_path, sample_rate=self.voice_converter.vc_config.audio.output_sample_rate)