mirror of https://github.com/coqui-ai/TTS.git
Add mfcc to AudioProcessor
This commit is contained in:
parent
21cc0517a3
commit
355dfee98d
|
@ -122,6 +122,9 @@ class AudioProcessor(object):
|
||||||
num_mels (int, optional):
|
num_mels (int, optional):
|
||||||
number of melspectrogram dimensions. Defaults to None.
|
number of melspectrogram dimensions. Defaults to None.
|
||||||
|
|
||||||
|
num_mfcc (int):
|
||||||
|
Number of MFCC values to compute. Defaults to None.
|
||||||
|
|
||||||
log_func (int, optional):
|
log_func (int, optional):
|
||||||
log exponent used for converting spectrogram aplitude to DB.
|
log exponent used for converting spectrogram aplitude to DB.
|
||||||
|
|
||||||
|
@ -207,6 +210,7 @@ class AudioProcessor(object):
|
||||||
sample_rate=None,
|
sample_rate=None,
|
||||||
resample=False,
|
resample=False,
|
||||||
num_mels=None,
|
num_mels=None,
|
||||||
|
num_mfcc=None,
|
||||||
log_func="np.log10",
|
log_func="np.log10",
|
||||||
min_level_db=None,
|
min_level_db=None,
|
||||||
frame_shift_ms=None,
|
frame_shift_ms=None,
|
||||||
|
@ -240,6 +244,7 @@ class AudioProcessor(object):
|
||||||
self.sample_rate = sample_rate
|
self.sample_rate = sample_rate
|
||||||
self.resample = resample
|
self.resample = resample
|
||||||
self.num_mels = num_mels
|
self.num_mels = num_mels
|
||||||
|
self.num_mfcc = num_mfcc
|
||||||
self.log_func = log_func
|
self.log_func = log_func
|
||||||
self.min_level_db = min_level_db or 0
|
self.min_level_db = min_level_db or 0
|
||||||
self.frame_shift_ms = frame_shift_ms
|
self.frame_shift_ms = frame_shift_ms
|
||||||
|
@ -546,6 +551,22 @@ class AudioProcessor(object):
|
||||||
S = self._linear_to_mel(np.abs(D))
|
S = self._linear_to_mel(np.abs(D))
|
||||||
return self.normalize(S).astype(np.float32)
|
return self.normalize(S).astype(np.float32)
|
||||||
|
|
||||||
|
def mfcc(self, y: np.ndarray) -> np.ndarray:
|
||||||
|
"""Compute MFCC values from a waveform."""
|
||||||
|
mel_args = {
|
||||||
|
"n_fft": self.fft_size,
|
||||||
|
"n_mels": self.num_mels,
|
||||||
|
"hop_length": self.hop_length,
|
||||||
|
"win_length": self.win_length,
|
||||||
|
"window": "hann",
|
||||||
|
"center": True,
|
||||||
|
"pad_mode": self.stft_pad_mode,
|
||||||
|
"fmin": self.mel_fmin,
|
||||||
|
"fmax": self.mel_fmax,
|
||||||
|
}
|
||||||
|
mfcc = librosa.feature.mfcc(y=y, sr=self.sample_rate, n_mfcc=self.num_mfcc, **mel_args)
|
||||||
|
return mfcc
|
||||||
|
|
||||||
def inv_spectrogram(self, spectrogram: np.ndarray) -> np.ndarray:
|
def inv_spectrogram(self, spectrogram: np.ndarray) -> np.ndarray:
|
||||||
"""Convert a spectrogram to a waveform using Griffi-Lim vocoder."""
|
"""Convert a spectrogram to a waveform using Griffi-Lim vocoder."""
|
||||||
S = self.denormalize(spectrogram)
|
S = self.denormalize(spectrogram)
|
||||||
|
|
Loading…
Reference in New Issue