diff --git a/TTS/api.py b/TTS/api.py index e9c9e12a..fdf97d10 100644 --- a/TTS/api.py +++ b/TTS/api.py @@ -440,7 +440,7 @@ class TTS(nn.Module): save_wav(wav=wav, path=file_path, sample_rate=self.voice_converter.vc_config.audio.output_sample_rate) return file_path - def tts_with_vc(self, text: str, language: str = None, speaker_wav: str = None): + def tts_with_vc(self, text: str, language: str = None, speaker_wav: str = None, speaker: str = None): """Convert text to speech with voice conversion. It combines tts with voice conversion to fake voice cloning. @@ -457,17 +457,25 @@ class TTS(nn.Module): speaker_wav (str, optional): Path to a reference wav file to use for voice cloning with supporting models like YourTTS. Defaults to None. + speaker (str, optional): + Speaker name for multi-speaker. You can check whether loaded model is multi-speaker by + `tts.is_multi_speaker` and list speakers by `tts.speakers`. Defaults to None. """ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: # Lazy code... save it to a temp file to resample it while reading it for VC - self.tts_to_file(text=text, speaker=None, language=language, file_path=fp.name) + self.tts_to_file(text=text, speaker=speaker, language=language, file_path=fp.name) if self.voice_converter is None: self.load_vc_model_by_name("voice_conversion_models/multilingual/vctk/freevc24") wav = self.voice_converter.voice_conversion(source_wav=fp.name, target_wav=speaker_wav) return wav def tts_with_vc_to_file( - self, text: str, language: str = None, speaker_wav: str = None, file_path: str = "output.wav" + self, + text: str, + language: str = None, + speaker_wav: str = None, + file_path: str = "output.wav", + speaker: str = None, ): """Convert text to speech with voice conversion and save to file. @@ -484,6 +492,9 @@ class TTS(nn.Module): Defaults to None. file_path (str, optional): Output file path. Defaults to "output.wav". + speaker (str, optional): + Speaker name for multi-speaker. You can check whether loaded model is multi-speaker by + `tts.is_multi_speaker` and list speakers by `tts.speakers`. Defaults to None. """ - wav = self.tts_with_vc(text=text, language=language, speaker_wav=speaker_wav) + wav = self.tts_with_vc(text=text, language=language, speaker_wav=speaker_wav, speaker=speaker) save_wav(wav=wav, path=file_path, sample_rate=self.voice_converter.vc_config.audio.output_sample_rate)