refactor(audio.processor): use load_wav from numpy_transforms

This commit is contained in:
Enno Hermann 2023-11-14 13:46:57 +01:00
parent 9a43eafd60
commit 13e640f17e
1 changed files with 4 additions and 9 deletions

View File

@ -5,7 +5,6 @@ import librosa
import numpy as np import numpy as np
import scipy.io.wavfile import scipy.io.wavfile
import scipy.signal import scipy.signal
import soundfile as sf
from TTS.tts.utils.helpers import StandardScaler from TTS.tts.utils.helpers import StandardScaler
from TTS.utils.audio.numpy_transforms import ( from TTS.utils.audio.numpy_transforms import (
@ -16,6 +15,7 @@ from TTS.utils.audio.numpy_transforms import (
deemphasis, deemphasis,
find_endpoint, find_endpoint,
griffin_lim, griffin_lim,
load_wav,
mel_to_spec, mel_to_spec,
millisec_to_length, millisec_to_length,
preemphasis, preemphasis,
@ -587,15 +587,10 @@ class AudioProcessor(object):
Returns: Returns:
np.ndarray: Loaded waveform. np.ndarray: Loaded waveform.
""" """
if self.resample: if sr is not None:
# loading with resampling. It is significantly slower. x = load_wav(filename=filename, sample_rate=sr, resample=True)
x, sr = librosa.load(filename, sr=self.sample_rate)
elif sr is None:
# SF is faster than librosa for loading files
x, sr = sf.read(filename)
assert self.sample_rate == sr, "%s vs %s" % (self.sample_rate, sr)
else: else:
x, sr = librosa.load(filename, sr=sr) x = load_wav(filename=filename, sample_rate=self.sample_rate, resample=self.resample)
if self.do_trim_silence: if self.do_trim_silence:
try: try:
x = self.trim_silence(x) x = self.trim_silence(x)