From cd5d1497cff1f1616ade65a0962bd72c55f085e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eren=20G=C3=B6lge?= <egolge@coqui.ai>
Date: Tue, 25 Jan 2022 09:26:23 +0000
Subject: [PATCH] Add pitch_fmin pitch_fmax args to the audio

---
 TTS/utils/audio.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/TTS/utils/audio.py b/TTS/utils/audio.py
index 4d20f468..d0777c11 100644
--- a/TTS/utils/audio.py
+++ b/TTS/utils/audio.py
@@ -239,6 +239,12 @@ class AudioProcessor(object):
         mel_fmax (int, optional):
             maximum filter frequency for computing melspectrograms. Defaults to None.
 
+        pitch_fmin (int, optional):
+            minimum filter frequency for computing pitch. Defaults to None.
+
+        pitch_fmax (int, optional):
+            maximum filter frequency for computing pitch. Defaults to None.
+
         spec_gain (int, optional):
             gain applied when converting amplitude to DB. Defaults to 20.
 
@@ -300,6 +306,8 @@ class AudioProcessor(object):
         max_norm=None,
         mel_fmin=None,
         mel_fmax=None,
+        pitch_fmax=None,
+        pitch_fmin=None,
         spec_gain=20,
         stft_pad_mode="reflect",
         clip_norm=True,
@@ -333,6 +341,8 @@ class AudioProcessor(object):
         self.symmetric_norm = symmetric_norm
         self.mel_fmin = mel_fmin or 0
         self.mel_fmax = mel_fmax
+        self.pitch_fmin = pitch_fmin
+        self.pitch_fmax = pitch_fmax
         self.spec_gain = float(spec_gain)
         self.stft_pad_mode = stft_pad_mode
         self.max_norm = 1.0 if max_norm is None else float(max_norm)
@@ -726,12 +736,12 @@ class AudioProcessor(object):
             >>> WAV_FILE = filename = librosa.util.example_audio_file()
             >>> from TTS.config import BaseAudioConfig
             >>> from TTS.utils.audio import AudioProcessor
-            >>> conf = BaseAudioConfig(mel_fmax=8000)
+            >>> conf = BaseAudioConfig(pitch_fmax=8000)
             >>> ap = AudioProcessor(**conf)
             >>> wav = ap.load_wav(WAV_FILE, sr=22050)[:5 * 22050]
             >>> pitch = ap.compute_f0(wav)
         """
-        assert self.mel_fmax is not None, " [!] Set `mel_fmax` before caling `compute_f0`."
+        assert self.pitch_fmax is not None, " [!] Set `pitch_fmax` before caling `compute_f0`."
         # align F0 length to the spectrogram length
         if len(x) % self.hop_length == 0:
             x = np.pad(x, (0, self.hop_length // 2), mode="reflect")
@@ -739,7 +749,7 @@ class AudioProcessor(object):
         f0, t = pw.dio(
             x.astype(np.double),
             fs=self.sample_rate,
-            f0_ceil=self.mel_fmax,
+            f0_ceil=self.pitch_fmax,
             frame_period=1000 * self.hop_length / self.sample_rate,
         )
         f0 = pw.stonemask(x.astype(np.double), f0, t, self.sample_rate)