From 5bd1fb6b2c51735e811c185e5f2e4f92ab0121d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Mon, 10 Apr 2023 13:32:16 +0200 Subject: [PATCH] Fix API for voice conversion --- TTS/api.py | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/TTS/api.py b/TTS/api.py index 7376cfa4..460e9374 100644 --- a/TTS/api.py +++ b/TTS/api.py @@ -85,7 +85,7 @@ class CS_API: self.headers = {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_token}"} if not self.api_token: raise ValueError( - "No API token found for 🐸Coqui Studio voices - https://coqui.ai.\n" + "No API token found for 🐸Coqui Studio voices - https://coqui.ai \n" "Visit 🔗https://app.coqui.ai/account to get one.\n" "Set it as an environment variable `export COQUI_STUDIO_TOKEN=`\n" "" @@ -274,7 +274,10 @@ class TTS: self.model_name = None if model_name: - self.load_tts_model_by_name(model_name, gpu) + if "tts_models" in model_name: + self.load_tts_model_by_name(model_name, gpu) + elif "voice_conversion_models" in model_name: + self.load_vc_model_by_name(model_name, gpu) if model_path: self.load_tts_model_by_path( @@ -565,19 +568,39 @@ class TTS: def voice_conversion( self, - sourve_wav: str, + source_wav: str, target_wav: str, ): """Voice conversion with FreeVC. Convert source wav to target speaker. + Args:`` + source_wav (str): + Path to the source wav file. + target_wav (str):` + Path to the target wav file. + """ + wav = self.voice_converter.voice_conversion(source_wav=source_wav, target_wav=target_wav) + return wav + + def voice_conversion_to_file( + self, + source_wav: str, + target_wav: str, + file_path: str = "output.wav", + ): + """Voice conversion with FreeVC. Convert source wav to target speaker. + Args: source_wav (str): Path to the source wav file. target_wav (str): Path to the target wav file. + file_path (str, optional): + Output file path. Defaults to "output.wav". """ - wav = self.synthesizer.voice_conversion(source_wav=sourve_wav, target_wav=target_wav) - return wav + wav = self.voice_conversion(source_wav=source_wav, target_wav=target_wav) + save_wav(wav=wav, path=file_path, sample_rate=self.voice_converter.vc_config.audio.output_sample_rate) + return file_path def tts_with_vc(self, text: str, language: str = None, speaker_wav: str = None): """Convert text to speech with voice conversion.