mirror of https://github.com/coqui-ai/TTS.git
Add pitch_fmin pitch_fmax args to the audio
This commit is contained in:
parent
1445a46e9e
commit
cd5d1497cf
|
@ -239,6 +239,12 @@ class AudioProcessor(object):
|
||||||
mel_fmax (int, optional):
|
mel_fmax (int, optional):
|
||||||
maximum filter frequency for computing melspectrograms. Defaults to None.
|
maximum filter frequency for computing melspectrograms. Defaults to None.
|
||||||
|
|
||||||
|
pitch_fmin (int, optional):
|
||||||
|
minimum filter frequency for computing pitch. Defaults to None.
|
||||||
|
|
||||||
|
pitch_fmax (int, optional):
|
||||||
|
maximum filter frequency for computing pitch. Defaults to None.
|
||||||
|
|
||||||
spec_gain (int, optional):
|
spec_gain (int, optional):
|
||||||
gain applied when converting amplitude to DB. Defaults to 20.
|
gain applied when converting amplitude to DB. Defaults to 20.
|
||||||
|
|
||||||
|
@ -300,6 +306,8 @@ class AudioProcessor(object):
|
||||||
max_norm=None,
|
max_norm=None,
|
||||||
mel_fmin=None,
|
mel_fmin=None,
|
||||||
mel_fmax=None,
|
mel_fmax=None,
|
||||||
|
pitch_fmax=None,
|
||||||
|
pitch_fmin=None,
|
||||||
spec_gain=20,
|
spec_gain=20,
|
||||||
stft_pad_mode="reflect",
|
stft_pad_mode="reflect",
|
||||||
clip_norm=True,
|
clip_norm=True,
|
||||||
|
@ -333,6 +341,8 @@ class AudioProcessor(object):
|
||||||
self.symmetric_norm = symmetric_norm
|
self.symmetric_norm = symmetric_norm
|
||||||
self.mel_fmin = mel_fmin or 0
|
self.mel_fmin = mel_fmin or 0
|
||||||
self.mel_fmax = mel_fmax
|
self.mel_fmax = mel_fmax
|
||||||
|
self.pitch_fmin = pitch_fmin
|
||||||
|
self.pitch_fmax = pitch_fmax
|
||||||
self.spec_gain = float(spec_gain)
|
self.spec_gain = float(spec_gain)
|
||||||
self.stft_pad_mode = stft_pad_mode
|
self.stft_pad_mode = stft_pad_mode
|
||||||
self.max_norm = 1.0 if max_norm is None else float(max_norm)
|
self.max_norm = 1.0 if max_norm is None else float(max_norm)
|
||||||
|
@ -726,12 +736,12 @@ class AudioProcessor(object):
|
||||||
>>> WAV_FILE = filename = librosa.util.example_audio_file()
|
>>> WAV_FILE = filename = librosa.util.example_audio_file()
|
||||||
>>> from TTS.config import BaseAudioConfig
|
>>> from TTS.config import BaseAudioConfig
|
||||||
>>> from TTS.utils.audio import AudioProcessor
|
>>> from TTS.utils.audio import AudioProcessor
|
||||||
>>> conf = BaseAudioConfig(mel_fmax=8000)
|
>>> conf = BaseAudioConfig(pitch_fmax=8000)
|
||||||
>>> ap = AudioProcessor(**conf)
|
>>> ap = AudioProcessor(**conf)
|
||||||
>>> wav = ap.load_wav(WAV_FILE, sr=22050)[:5 * 22050]
|
>>> wav = ap.load_wav(WAV_FILE, sr=22050)[:5 * 22050]
|
||||||
>>> pitch = ap.compute_f0(wav)
|
>>> pitch = ap.compute_f0(wav)
|
||||||
"""
|
"""
|
||||||
assert self.mel_fmax is not None, " [!] Set `mel_fmax` before caling `compute_f0`."
|
assert self.pitch_fmax is not None, " [!] Set `pitch_fmax` before caling `compute_f0`."
|
||||||
# align F0 length to the spectrogram length
|
# align F0 length to the spectrogram length
|
||||||
if len(x) % self.hop_length == 0:
|
if len(x) % self.hop_length == 0:
|
||||||
x = np.pad(x, (0, self.hop_length // 2), mode="reflect")
|
x = np.pad(x, (0, self.hop_length // 2), mode="reflect")
|
||||||
|
@ -739,7 +749,7 @@ class AudioProcessor(object):
|
||||||
f0, t = pw.dio(
|
f0, t = pw.dio(
|
||||||
x.astype(np.double),
|
x.astype(np.double),
|
||||||
fs=self.sample_rate,
|
fs=self.sample_rate,
|
||||||
f0_ceil=self.mel_fmax,
|
f0_ceil=self.pitch_fmax,
|
||||||
frame_period=1000 * self.hop_length / self.sample_rate,
|
frame_period=1000 * self.hop_length / self.sample_rate,
|
||||||
)
|
)
|
||||||
f0 = pw.stonemask(x.astype(np.double), f0, t, self.sample_rate)
|
f0 = pw.stonemask(x.astype(np.double), f0, t, self.sample_rate)
|
||||||
|
|
Loading…
Reference in New Issue