From 37eaefc0852a9cf39fb325b658d67b9ee5758628 Mon Sep 17 00:00:00 2001 From: George <25833833+george-roussos@users.noreply.github.com> Date: Fri, 29 Oct 2021 17:28:55 +0100 Subject: [PATCH] Optional silence trimming during inference and find_endpoint() fix (#898) * Set find_endpoint db threshold in config.json * Optional silence trimming during inference * Make trim_db value negative --- TTS/utils/audio.py | 4 ++-- TTS/utils/synthesizer.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/TTS/utils/audio.py b/TTS/utils/audio.py index dd9c5701..e64b95e0 100644 --- a/TTS/utils/audio.py +++ b/TTS/utils/audio.py @@ -674,7 +674,7 @@ class AudioProcessor(object): return f0 ### Audio Processing ### - def find_endpoint(self, wav: np.ndarray, threshold_db=-40, min_silence_sec=0.8) -> int: + def find_endpoint(self, wav: np.ndarray, min_silence_sec=0.8) -> int: """Find the last point without silence at the end of a audio signal. Args: @@ -687,7 +687,7 @@ class AudioProcessor(object): """ window_length = int(self.sample_rate * min_silence_sec) hop_length = int(window_length / 4) - threshold = self._db_to_amp(threshold_db) + threshold = self._db_to_amp(-self.trim_db) for x in range(hop_length, len(wav) - window_length, hop_length): if np.max(wav[x : x + window_length]) < threshold: return x + hop_length diff --git a/TTS/utils/synthesizer.py b/TTS/utils/synthesizer.py index af07419f..78681967 100644 --- a/TTS/utils/synthesizer.py +++ b/TTS/utils/synthesizer.py @@ -265,7 +265,8 @@ class Synthesizer(object): waveform = waveform.squeeze() # trim silence - waveform = trim_silence(waveform, self.ap) + if self.tts_config.audio["do_trim_silence"] is True: + waveform = trim_silence(waveform, self.ap) wavs += list(waveform) wavs += [0] * 10000