Approximate audio length from file size

2021-10-18 08:54:02 +00:00 · 2021-10-18 08:54:02 +00:00 · a0a5d580e9
parent b4b890df03
commit a0a5d580e9
4 changed files with 5 additions and 5 deletions
--- a/TTS/tts/configs/tacotron_config.py
+++ b/TTS/tts/configs/tacotron_config.py
@ -106,7 +106,7 @@ class TacotronConfig(BaseTTSConfig):
            Weight decay coefficient. Defaults to `1e-6`.
        grad_clip (float):
            Gradient clipping threshold. Defaults to `5`.
-        seq_len_notm (bool):
+        seq_len_norm (bool):
            enable / disable the sequnce length normalization in the loss functions. If set True, loss of a sample
            is divided by the sequence length. Defaults to False.
        loss_masking (bool):
--- a/TTS/tts/datasets/dataset.py
+++ b/TTS/tts/datasets/dataset.py
@ -330,7 +330,7 @@ class TTSDataset(Dataset):
        if by_audio_len:
            lengths = []
            for item in self.items:
-                lengths.append(os.path.getsize(item[1]))
+                lengths.append(os.path.getsize(item[1]) / 16 * 8)  # assuming 16bit audio
            lengths = np.array(lengths)
        else:
            lengths = np.array([len(ins[0]) for ins in self.items])
--- a/TTS/tts/models/base_tacotron.py
+++ b/TTS/tts/models/base_tacotron.py
@ -242,4 +242,4 @@ class BaseTacotron(BaseTTS):
            self.decoder.set_r(r)
            if trainer.config.bidirectional_decoder:
                trainer.model.decoder_backward.set_r(r)
-            print(f"\n > Number of output frames: {self.decoder.r}")
+            print(f"\n > Number of output frames: {self.decoder.r}")
--- a/TTS/tts/models/base_tts.py
+++ b/TTS/tts/models/base_tts.py
@ -20,9 +20,9 @@ from TTS.utils.audio import AudioProcessor


 class BaseTTS(BaseModel):
-    """Abstract `tts` class. Every new `tts` model must inherit this.
+    """Base `tts` class. Every new `tts` model must inherit this.

-    It defines `tts` specific functions on top of `Model`.
+    It defines common `tts` specific functions on top of `Model` implementation.

    Notes on input/output tensor shapes:
        Any input or output tensor of the model must be shaped as