From 92b6d98443212b31ce0798af517e0168cddaab57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Wed, 20 Oct 2021 18:12:38 +0000 Subject: [PATCH] Set pitch frame alignment wrt spec computation --- TTS/utils/audio.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/TTS/utils/audio.py b/TTS/utils/audio.py index 19a16e5e..dd9c5701 100644 --- a/TTS/utils/audio.py +++ b/TTS/utils/audio.py @@ -645,6 +645,10 @@ class AudioProcessor(object): >>> wav = ap.load_wav(WAV_FILE, sr=22050)[:5 * 22050] >>> pitch = ap.compute_f0(wav) """ + # align F0 length to the spectrogram length + if len(x) % self.hop_length == 0: + x = np.pad(x, (0, self.hop_length // 2), mode="reflect") + f0, t = pw.dio( x.astype(np.double), fs=self.sample_rate, @@ -747,6 +751,14 @@ class AudioProcessor(object): wav_norm = wav * (32767 / max(0.01, np.max(np.abs(wav)))) scipy.io.wavfile.write(path, sr if sr else self.sample_rate, wav_norm.astype(np.int16)) + def get_duration(self, filename: str) -> float: + """Get the duration of a wav file using Librosa. + + Args: + filename (str): Path to the wav file. + """ + return librosa.get_duration(filename) + @staticmethod def mulaw_encode(wav: np.ndarray, qc: int) -> np.ndarray: mu = 2 ** qc - 1