Add mfcc to AudioProcessor

This commit is contained in:
Eren Gölge 2021-09-30 14:38:10 +00:00
parent 21cc0517a3
commit 355dfee98d
1 changed files with 21 additions and 0 deletions

View File

@ -122,6 +122,9 @@ class AudioProcessor(object):
num_mels (int, optional):
number of melspectrogram dimensions. Defaults to None.
num_mfcc (int):
Number of MFCC values to compute. Defaults to None.
log_func (int, optional):
log exponent used for converting spectrogram aplitude to DB.
@ -207,6 +210,7 @@ class AudioProcessor(object):
sample_rate=None,
resample=False,
num_mels=None,
num_mfcc=None,
log_func="np.log10",
min_level_db=None,
frame_shift_ms=None,
@ -240,6 +244,7 @@ class AudioProcessor(object):
self.sample_rate = sample_rate
self.resample = resample
self.num_mels = num_mels
self.num_mfcc = num_mfcc
self.log_func = log_func
self.min_level_db = min_level_db or 0
self.frame_shift_ms = frame_shift_ms
@ -546,6 +551,22 @@ class AudioProcessor(object):
S = self._linear_to_mel(np.abs(D))
return self.normalize(S).astype(np.float32)
def mfcc(self, y: np.ndarray) -> np.ndarray:
"""Compute MFCC values from a waveform."""
mel_args = {
"n_fft": self.fft_size,
"n_mels": self.num_mels,
"hop_length": self.hop_length,
"win_length": self.win_length,
"window": "hann",
"center": True,
"pad_mode": self.stft_pad_mode,
"fmin": self.mel_fmin,
"fmax": self.mel_fmax,
}
mfcc = librosa.feature.mfcc(y=y, sr=self.sample_rate, n_mfcc=self.num_mfcc, **mel_args)
return mfcc
def inv_spectrogram(self, spectrogram: np.ndarray) -> np.ndarray:
"""Convert a spectrogram to a waveform using Griffi-Lim vocoder."""
S = self.denormalize(spectrogram)