diff --git a/TTS/config/shared_configs.py b/TTS/config/shared_configs.py
index c52cfe8a..9e9d4692 100644
--- a/TTS/config/shared_configs.py
+++ b/TTS/config/shared_configs.py
@@ -60,6 +60,12 @@ class BaseAudioConfig(Coqpit):
         trim_db (int):
             Silence threshold used for silence trimming. Defaults to 45.
 
+        do_rms_norm (bool, optional):
+            enable/disable RMS volume normalization when loading an audio file. Defaults to False.
+
+        db_level (int, optional):
+            dB level used for rms normalization. The range is -99 to 0. Defaults to None.
+
         power (float):
             Exponent used for expanding spectrogra levels before running Griffin Lim. It helps to reduce the
             artifacts in the synthesized voice. Defaults to 1.5.
@@ -116,6 +122,9 @@ class BaseAudioConfig(Coqpit):
     # silence trimming
     do_trim_silence: bool = True
     trim_db: int = 45
+    # rms volume normalization
+    do_rms_norm: bool = False
+    db_level: float = None
     # griffin-lim params
     power: float = 1.5
     griffin_lim_iters: int = 60
diff --git a/TTS/utils/audio.py b/TTS/utils/audio.py
index d01196c4..25f93c34 100644
--- a/TTS/utils/audio.py
+++ b/TTS/utils/audio.py
@@ -266,6 +266,12 @@ class AudioProcessor(object):
         do_amp_to_db_mel (bool, optional):
             enable/disable amplitude to dB conversion of mel spectrograms. Defaults to True.
 
+        do_rms_norm (bool, optional):
+            enable/disable RMS volume normalization when loading an audio file. Defaults to False.
+
+        db_level (int, optional):
+            dB level used for rms normalization. The range is -99 to 0. Defaults to None.
+
         stats_path (str, optional):
             Path to the computed stats file. Defaults to None.
 
@@ -303,6 +309,8 @@ class AudioProcessor(object):
         do_sound_norm=False,
         do_amp_to_db_linear=True,
         do_amp_to_db_mel=True,
+        do_rms_norm=False,
+        db_level=None,
         stats_path=None,
         verbose=True,
         **_,
@@ -334,6 +342,8 @@ class AudioProcessor(object):
         self.do_sound_norm = do_sound_norm
         self.do_amp_to_db_linear = do_amp_to_db_linear
         self.do_amp_to_db_mel = do_amp_to_db_mel
+        self.do_rms_norm = do_rms_norm
+        self.db_level = db_level
         self.stats_path = stats_path
         # setup exp_func for db to amp conversion
         if log_func == "np.log":
@@ -726,21 +736,6 @@ class AudioProcessor(object):
             frame_period=1000 * self.hop_length / self.sample_rate,
         )
         f0 = pw.stonemask(x.astype(np.double), f0, t, self.sample_rate)
-        # pad = int((self.win_length / self.hop_length) / 2)
-        # f0 = [0.0] * pad + f0 + [0.0] * pad
-        # f0 = np.pad(f0, (pad, pad), mode="constant", constant_values=0)
-        # f0 = np.array(f0, dtype=np.float32)
-
-        # f01, _, _ = librosa.pyin(
-        #     x,
-        #     fmin=65 if self.mel_fmin == 0 else self.mel_fmin,
-        #     fmax=self.mel_fmax,
-        #     frame_length=self.win_length,
-        #     sr=self.sample_rate,
-        #     fill_na=0.0,
-        # )
-
-        # spec = self.melspectrogram(x)
         return f0
 
     ### Audio Processing ###
@@ -783,10 +778,33 @@ class AudioProcessor(object):
         """
         return x / abs(x).max() * 0.95
 
+    @staticmethod
+    def _rms_norm(wav, db_level=-27):
+        r = 10 ** (db_level / 20)
+        a = np.sqrt((len(wav) * (r ** 2)) / np.sum(wav ** 2))
+        return wav * a
+
+    def rms_volume_norm(self, x: np.ndarray, db_level: float = None) -> np.ndarray:
+        """Normalize the volume based on RMS of the signal.
+
+        Args:
+            x (np.ndarray): Raw waveform.
+
+        Returns:
+            np.ndarray: RMS normalized waveform.
+        """
+        if db_level is None:
+            db_level = self.db_level
+        assert -99 <= db_level <= 0, " [!] db_level should be between -99 and 0"
+        wav = self._rms_norm(x, db_level)
+        return wav
+
     ### save and load ###
     def load_wav(self, filename: str, sr: int = None) -> np.ndarray:
         """Read a wav file using Librosa and optionally resample, silence trim, volume normalize.
 
+        Resampling slows down loading the file significantly. Therefore it is recommended to resample the file before.
+
         Args:
             filename (str): Path to the wav file.
             sr (int, optional): Sampling rate for resampling. Defaults to None.
@@ -795,8 +813,10 @@ class AudioProcessor(object):
             np.ndarray: Loaded waveform.
         """
         if self.resample:
+            # loading with resampling. It is significantly slower.
             x, sr = librosa.load(filename, sr=self.sample_rate)
         elif sr is None:
+            # SF is faster than librosa for loading files
             x, sr = sf.read(filename)
             assert self.sample_rate == sr, "%s vs %s" % (self.sample_rate, sr)
         else:
@@ -808,6 +828,8 @@ class AudioProcessor(object):
                 print(f" [!] File cannot be trimmed for silence - {filename}")
         if self.do_sound_norm:
             x = self.sound_norm(x)
+        if self.do_rms_norm:
+            x = self.rms_volume_norm(x, self.db_level)
         return x
 
     def save_wav(self, wav: np.ndarray, path: str, sr: int = None) -> None: