refactor(audio.processor): use load_wav from numpy_transforms

2023-11-14 13:46:57 +01:00 · 2023-11-14 13:46:57 +01:00 · 13e640f17e
parent 9a43eafd60
commit 13e640f17e
1 changed files with 4 additions and 9 deletions
--- a/TTS/utils/audio/processor.py
+++ b/TTS/utils/audio/processor.py
@ -5,7 +5,6 @@ import librosa
 import numpy as np
 import scipy.io.wavfile
 import scipy.signal
 import soundfile as sf
 from TTS.tts.utils.helpers import StandardScaler
 from TTS.utils.audio.numpy_transforms import (
@ -16,6 +15,7 @@ from TTS.utils.audio.numpy_transforms import (
    deemphasis,
    find_endpoint,
    griffin_lim,
    load_wav,
    mel_to_spec,
    millisec_to_length,
    preemphasis,
@ -587,15 +587,10 @@ class AudioProcessor(object):
        Returns:
            np.ndarray: Loaded waveform.
        """
-        if self.resample:
+        if sr is not None:
-            # loading with resampling. It is significantly slower.
+            x = load_wav(filename=filename, sample_rate=sr, resample=True)
            x, sr = librosa.load(filename, sr=self.sample_rate)
        elif sr is None:
            # SF is faster than librosa for loading files
            x, sr = sf.read(filename)
            assert self.sample_rate == sr, "%s vs %s" % (self.sample_rate, sr)
        else:
-            x, sr = librosa.load(filename, sr=sr)
+            x = load_wav(filename=filename, sample_rate=self.sample_rate, resample=self.resample)
        if self.do_trim_silence:
            try:
                x = self.trim_silence(x)