From a0a5d580e97e852939dbe9e3113e4c5cd983d9cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eren=20G=C3=B6lge?= Date: Mon, 18 Oct 2021 08:54:02 +0000 Subject: [PATCH] Approximate audio length from file size --- TTS/tts/configs/tacotron_config.py | 2 +- TTS/tts/datasets/dataset.py | 2 +- TTS/tts/models/base_tacotron.py | 2 +- TTS/tts/models/base_tts.py | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/TTS/tts/configs/tacotron_config.py b/TTS/tts/configs/tacotron_config.py index 89fb8d81..2577fc51 100644 --- a/TTS/tts/configs/tacotron_config.py +++ b/TTS/tts/configs/tacotron_config.py @@ -106,7 +106,7 @@ class TacotronConfig(BaseTTSConfig): Weight decay coefficient. Defaults to `1e-6`. grad_clip (float): Gradient clipping threshold. Defaults to `5`. - seq_len_notm (bool): + seq_len_norm (bool): enable / disable the sequnce length normalization in the loss functions. If set True, loss of a sample is divided by the sequence length. Defaults to False. loss_masking (bool): diff --git a/TTS/tts/datasets/dataset.py b/TTS/tts/datasets/dataset.py index c81e0e6c..bfe0d778 100644 --- a/TTS/tts/datasets/dataset.py +++ b/TTS/tts/datasets/dataset.py @@ -330,7 +330,7 @@ class TTSDataset(Dataset): if by_audio_len: lengths = [] for item in self.items: - lengths.append(os.path.getsize(item[1])) + lengths.append(os.path.getsize(item[1]) / 16 * 8) # assuming 16bit audio lengths = np.array(lengths) else: lengths = np.array([len(ins[0]) for ins in self.items]) diff --git a/TTS/tts/models/base_tacotron.py b/TTS/tts/models/base_tacotron.py index b47a5751..c661c4cc 100644 --- a/TTS/tts/models/base_tacotron.py +++ b/TTS/tts/models/base_tacotron.py @@ -242,4 +242,4 @@ class BaseTacotron(BaseTTS): self.decoder.set_r(r) if trainer.config.bidirectional_decoder: trainer.model.decoder_backward.set_r(r) - print(f"\n > Number of output frames: {self.decoder.r}") + print(f"\n > Number of output frames: {self.decoder.r}") \ No newline at end of file diff --git a/TTS/tts/models/base_tts.py b/TTS/tts/models/base_tts.py index 0c9f60e8..9f4d70c8 100644 --- a/TTS/tts/models/base_tts.py +++ b/TTS/tts/models/base_tts.py @@ -20,9 +20,9 @@ from TTS.utils.audio import AudioProcessor class BaseTTS(BaseModel): - """Abstract `tts` class. Every new `tts` model must inherit this. + """Base `tts` class. Every new `tts` model must inherit this. - It defines `tts` specific functions on top of `Model`. + It defines common `tts` specific functions on top of `Model` implementation. Notes on input/output tensor shapes: Any input or output tensor of the model must be shaped as