Set pitch frame alignment wrt spec computation

2021-10-20 18:12:38 +00:00 · 2021-10-20 18:12:38 +00:00 · 92b6d98443
parent 0a3d1cc7ee
commit 92b6d98443
1 changed files with 12 additions and 0 deletions
--- a/TTS/utils/audio.py
+++ b/TTS/utils/audio.py
@ -645,6 +645,10 @@ class AudioProcessor(object):
            >>> wav = ap.load_wav(WAV_FILE, sr=22050)[:5 * 22050]
            >>> pitch = ap.compute_f0(wav)
        """
+        # align F0 length to the spectrogram length
+        if len(x) % self.hop_length == 0:
+            x = np.pad(x, (0, self.hop_length // 2), mode="reflect")
+
        f0, t = pw.dio(
            x.astype(np.double),
            fs=self.sample_rate,
@ -747,6 +751,14 @@ class AudioProcessor(object):
        wav_norm = wav * (32767 / max(0.01, np.max(np.abs(wav))))
        scipy.io.wavfile.write(path, sr if sr else self.sample_rate, wav_norm.astype(np.int16))

+    def get_duration(self, filename: str) -> float:
+        """Get the duration of a wav file using Librosa.
+
+        Args:
+            filename (str): Path to the wav file.
+        """
+        return librosa.get_duration(filename)
+
    @staticmethod
    def mulaw_encode(wav: np.ndarray, qc: int) -> np.ndarray:
        mu = 2 ** qc - 1