change the way how ref_level_db is handled in audio.py

This commit is contained in:
erogol 2020-03-17 18:24:05 +01:00
parent 5223678a4b
commit 20dd509430
1 changed files with 9 additions and 8 deletions

View File

@ -113,6 +113,7 @@ class AudioProcessor(object):
else:
raise RuntimeError(' [!] Mean-Var stats does not match the given feature dimensions.')
# range normalization
S -= self.ref_level_db # discard certain range of DB assuming it is air noise
S_norm = ((S - self.min_level_db) / - self.min_level_db)
if self.symmetric_norm:
S_norm = ((2 * self.max_norm) * S_norm) - self.max_norm
@ -144,13 +145,13 @@ class AudioProcessor(object):
if self.clip_norm:
S_denorm = np.clip(S_denorm, -self.max_norm, self.max_norm)
S_denorm = ((S_denorm + self.max_norm) * -self.min_level_db / (2 * self.max_norm)) + self.min_level_db
return S_denorm
return S_denorm + self.ref_level_db
else:
if self.clip_norm:
S_denorm = np.clip(S_denorm, 0, self.max_norm)
S_denorm = (S_denorm * -self.min_level_db /
self.max_norm) + self.min_level_db
return S_denorm
return S_denorm + self.ref_level_db
else:
return S_denorm
@ -208,7 +209,7 @@ class AudioProcessor(object):
D = self._stft(self.apply_preemphasis(y))
else:
D = self._stft(y)
S = self._amp_to_db(np.abs(D)) - self.ref_level_db
S = self._amp_to_db(np.abs(D))
return self._normalize(S)
def melspectrogram(self, y):
@ -216,13 +217,13 @@ class AudioProcessor(object):
D = self._stft(self.apply_preemphasis(y))
else:
D = self._stft(y)
S = self._amp_to_db(self._linear_to_mel(np.abs(D))) - self.ref_level_db
S = self._amp_to_db(self._linear_to_mel(np.abs(D)))
return self._normalize(S)
def inv_spectrogram(self, spectrogram):
"""Converts spectrogram to waveform using librosa"""
S = self._denormalize(spectrogram)
S = self._db_to_amp(S + self.ref_level_db)
S = self._db_to_amp(S)
# Reconstruct phase
if self.preemphasis != 0:
return self.apply_inv_preemphasis(self._griffin_lim(S**self.power))
@ -231,7 +232,7 @@ class AudioProcessor(object):
def inv_melspectrogram(self, mel_spectrogram):
'''Converts melspectrogram to waveform using librosa'''
D = self._denormalize(mel_spectrogram)
S = self._db_to_amp(D + self.ref_level_db)
S = self._db_to_amp(D)
S = self._mel_to_linear(S) # Convert back to linear
if self.preemphasis != 0:
return self.apply_inv_preemphasis(self._griffin_lim(S**self.power))
@ -239,9 +240,9 @@ class AudioProcessor(object):
def out_linear_to_mel(self, linear_spec):
S = self._denormalize(linear_spec)
S = self._db_to_amp(S + self.ref_level_db)
S = self._db_to_amp(S)
S = self._linear_to_mel(np.abs(S))
S = self._amp_to_db(S) - self.ref_level_db
S = self._amp_to_db(S)
mel = self._normalize(S)
return mel