From 21cbfe964e31fecbfcb091e95996236455b200be Mon Sep 17 00:00:00 2001 From: Edresson Casanova Date: Thu, 21 Apr 2022 08:26:27 -0300 Subject: [PATCH] Add RMS based norm in save_wav method --- TTS/server/server.py | 3 ++- TTS/utils/audio.py | 6 +++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/TTS/server/server.py b/TTS/server/server.py index aef507fd..0f29b56b 100644 --- a/TTS/server/server.py +++ b/TTS/server/server.py @@ -111,7 +111,8 @@ synthesizer = Synthesizer( use_cuda=args.use_cuda, ) -use_multi_speaker = hasattr(synthesizer.tts_model, "num_speakers") and synthesizer.tts_model.num_speakers > 1 +use_multi_speaker = hasattr(synthesizer.tts_model, "num_speakers") and (synthesizer.tts_model.num_speakers > 1 or synthesizer.tts_speakers_file is not None) +print("Multispeaker?", use_multi_speaker, synthesizer.tts_model.num_speakers) speaker_manager = getattr(synthesizer.tts_model, "speaker_manager", None) # TODO: set this from SpeakerManager use_gst = synthesizer.tts_config.get("use_gst", False) diff --git a/TTS/utils/audio.py b/TTS/utils/audio.py index 4d435162..fc9d1942 100644 --- a/TTS/utils/audio.py +++ b/TTS/utils/audio.py @@ -859,7 +859,11 @@ class AudioProcessor(object): path (str): Path to a output file. sr (int, optional): Sampling rate used for saving to the file. Defaults to None. """ - wav_norm = wav * (32767 / max(0.01, np.max(np.abs(wav)))) + if self.do_rms_norm: + wav_norm = self.rms_volume_norm(wav, self.db_level) * 32767 + else: + wav_norm = wav * (32767 / max(0.01, np.max(np.abs(wav)))) + scipy.io.wavfile.write(path, sr if sr else self.sample_rate, wav_norm.astype(np.int16)) def get_duration(self, filename: str) -> float: