diff --git a/utils/audio.py b/utils/audio.py index 0feb3653..25e825fc 100644 --- a/utils/audio.py +++ b/utils/audio.py @@ -218,25 +218,22 @@ class AudioProcessor(object): return librosa.effects.trim( wav, top_db=40, frame_length=1024, hop_length=256)[0] - # WaveRNN repo specific functions - # def mulaw_encode(self, wav, qc): - # mu = qc - 1 - # wav_abs = np.minimum(np.abs(wav), 1.0) - # magnitude = np.log(1 + mu * wav_abs) / np.log(1. + mu) - # signal = np.sign(wav) * magnitude - # # Quantize signal to the specified number of levels. - # signal = (signal + 1) / 2 * mu + 0.5 - # return signal.astype(np.int32) + @staticmethod + def mulaw_encode(wav, qc): + mu = 2 ** qc - 1 + # wav_abs = np.minimum(np.abs(wav), 1.0) + signal = np.sign(wav) * np.log(1 + mu * np.abs(wav)) / np.log(1. + mu) + # Quantize signal to the specified number of levels. + signal = (signal + 1) / 2 * mu + 0.5 + return np.floor(signal,) - # def mulaw_decode(self, wav, qc): - # """Recovers waveform from quantized values.""" - # mu = qc - 1 - # # Map values back to [-1, 1]. - # casted = wav.astype(np.float32) - # signal = 2 * (casted / mu) - 1 - # # Perform inverse of mu-law transformation. - # magnitude = (1 / mu) * ((1 + mu) ** abs(signal) - 1) - # return np.sign(signal) * magnitude + @staticmethod + def mulaw_decode(wav, qc): + """Recovers waveform from quantized values.""" + mu = 2 ** qc - 1 + mu = mu - 1 + signal = np.sign(wav) / mu * ((1 + mu) ** np.abs(wav) - 1) + return signal def load_wav(self, filename, encode=False): x, sr = sf.read(filename)