From 92b6d98443212b31ce0798af517e0168cddaab57 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eren=20G=C3=B6lge?= <egolge@coqui.ai>
Date: Wed, 20 Oct 2021 18:12:38 +0000
Subject: [PATCH] Set pitch frame alignment wrt spec computation

---
 TTS/utils/audio.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/TTS/utils/audio.py b/TTS/utils/audio.py
index 19a16e5e..dd9c5701 100644
--- a/TTS/utils/audio.py
+++ b/TTS/utils/audio.py
@@ -645,6 +645,10 @@ class AudioProcessor(object):
             >>> wav = ap.load_wav(WAV_FILE, sr=22050)[:5 * 22050]
             >>> pitch = ap.compute_f0(wav)
         """
+        # align F0 length to the spectrogram length
+        if len(x) % self.hop_length == 0:
+            x = np.pad(x, (0, self.hop_length // 2), mode="reflect")
+
         f0, t = pw.dio(
             x.astype(np.double),
             fs=self.sample_rate,
@@ -747,6 +751,14 @@ class AudioProcessor(object):
         wav_norm = wav * (32767 / max(0.01, np.max(np.abs(wav))))
         scipy.io.wavfile.write(path, sr if sr else self.sample_rate, wav_norm.astype(np.int16))
 
+    def get_duration(self, filename: str) -> float:
+        """Get the duration of a wav file using Librosa.
+
+        Args:
+            filename (str): Path to the wav file.
+        """
+        return librosa.get_duration(filename)
+
     @staticmethod
     def mulaw_encode(wav: np.ndarray, qc: int) -> np.ndarray:
         mu = 2 ** qc - 1