mirror of https://github.com/coqui-ai/TTS.git
Set pitch frame alignment wrt spec computation
This commit is contained in:
parent
0a3d1cc7ee
commit
92b6d98443
|
@ -645,6 +645,10 @@ class AudioProcessor(object):
|
||||||
>>> wav = ap.load_wav(WAV_FILE, sr=22050)[:5 * 22050]
|
>>> wav = ap.load_wav(WAV_FILE, sr=22050)[:5 * 22050]
|
||||||
>>> pitch = ap.compute_f0(wav)
|
>>> pitch = ap.compute_f0(wav)
|
||||||
"""
|
"""
|
||||||
|
# align F0 length to the spectrogram length
|
||||||
|
if len(x) % self.hop_length == 0:
|
||||||
|
x = np.pad(x, (0, self.hop_length // 2), mode="reflect")
|
||||||
|
|
||||||
f0, t = pw.dio(
|
f0, t = pw.dio(
|
||||||
x.astype(np.double),
|
x.astype(np.double),
|
||||||
fs=self.sample_rate,
|
fs=self.sample_rate,
|
||||||
|
@ -747,6 +751,14 @@ class AudioProcessor(object):
|
||||||
wav_norm = wav * (32767 / max(0.01, np.max(np.abs(wav))))
|
wav_norm = wav * (32767 / max(0.01, np.max(np.abs(wav))))
|
||||||
scipy.io.wavfile.write(path, sr if sr else self.sample_rate, wav_norm.astype(np.int16))
|
scipy.io.wavfile.write(path, sr if sr else self.sample_rate, wav_norm.astype(np.int16))
|
||||||
|
|
||||||
|
def get_duration(self, filename: str) -> float:
|
||||||
|
"""Get the duration of a wav file using Librosa.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
filename (str): Path to the wav file.
|
||||||
|
"""
|
||||||
|
return librosa.get_duration(filename)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def mulaw_encode(wav: np.ndarray, qc: int) -> np.ndarray:
|
def mulaw_encode(wav: np.ndarray, qc: int) -> np.ndarray:
|
||||||
mu = 2 ** qc - 1
|
mu = 2 ** qc - 1
|
||||||
|
|
Loading…
Reference in New Issue