mirror of https://github.com/coqui-ai/TTS.git
refactor(audio.processor): use load_wav from numpy_transforms
This commit is contained in:
parent
9a43eafd60
commit
13e640f17e
|
@ -5,7 +5,6 @@ import librosa
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import scipy.io.wavfile
|
import scipy.io.wavfile
|
||||||
import scipy.signal
|
import scipy.signal
|
||||||
import soundfile as sf
|
|
||||||
|
|
||||||
from TTS.tts.utils.helpers import StandardScaler
|
from TTS.tts.utils.helpers import StandardScaler
|
||||||
from TTS.utils.audio.numpy_transforms import (
|
from TTS.utils.audio.numpy_transforms import (
|
||||||
|
@ -16,6 +15,7 @@ from TTS.utils.audio.numpy_transforms import (
|
||||||
deemphasis,
|
deemphasis,
|
||||||
find_endpoint,
|
find_endpoint,
|
||||||
griffin_lim,
|
griffin_lim,
|
||||||
|
load_wav,
|
||||||
mel_to_spec,
|
mel_to_spec,
|
||||||
millisec_to_length,
|
millisec_to_length,
|
||||||
preemphasis,
|
preemphasis,
|
||||||
|
@ -587,15 +587,10 @@ class AudioProcessor(object):
|
||||||
Returns:
|
Returns:
|
||||||
np.ndarray: Loaded waveform.
|
np.ndarray: Loaded waveform.
|
||||||
"""
|
"""
|
||||||
if self.resample:
|
if sr is not None:
|
||||||
# loading with resampling. It is significantly slower.
|
x = load_wav(filename=filename, sample_rate=sr, resample=True)
|
||||||
x, sr = librosa.load(filename, sr=self.sample_rate)
|
|
||||||
elif sr is None:
|
|
||||||
# SF is faster than librosa for loading files
|
|
||||||
x, sr = sf.read(filename)
|
|
||||||
assert self.sample_rate == sr, "%s vs %s" % (self.sample_rate, sr)
|
|
||||||
else:
|
else:
|
||||||
x, sr = librosa.load(filename, sr=sr)
|
x = load_wav(filename=filename, sample_rate=self.sample_rate, resample=self.resample)
|
||||||
if self.do_trim_silence:
|
if self.do_trim_silence:
|
||||||
try:
|
try:
|
||||||
x = self.trim_silence(x)
|
x = self.trim_silence(x)
|
||||||
|
|
Loading…
Reference in New Issue