From adbcba06dac530ce410f213a5a56e773c7f63b84 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 14 Mar 2024 20:48:29 +0100 Subject: [PATCH] refactor(dataset): get audio length with torchaudio Removes a (GPL) dependency --- TTS/tts/datasets/dataset.py | 8 ++++---- requirements.txt | 1 - 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/TTS/tts/datasets/dataset.py b/TTS/tts/datasets/dataset.py index 9d0c45ad..257d1c31 100644 --- a/TTS/tts/datasets/dataset.py +++ b/TTS/tts/datasets/dataset.py @@ -4,9 +4,9 @@ import os import random from typing import Dict, List, Union -import mutagen import numpy as np import torch +import torchaudio import tqdm from torch.utils.data import Dataset @@ -43,15 +43,15 @@ def string2filename(string): return filename -def get_audio_size(audiopath): +def get_audio_size(audiopath) -> int: + """Return the number of samples in the audio file.""" extension = audiopath.rpartition(".")[-1].lower() if extension not in {"mp3", "wav", "flac"}: raise RuntimeError( f"The audio format {extension} is not supported, please convert the audio files to mp3, flac, or wav format!" ) - audio_info = mutagen.File(audiopath).info - return int(audio_info.length * audio_info.sample_rate) + return torchaudio.info(audiopath).num_frames class TTSDataset(Dataset): diff --git a/requirements.txt b/requirements.txt index 6d5fbc24..a01efaa6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,7 +12,6 @@ anyascii>=0.3.0 pyyaml>=6.0 fsspec[http]>=2023.6.0 # <= 2023.9.1 makes aux tests fail packaging>=23.1 -mutagen==1.47.0 # deps for inference pysbd>=0.3.4 # deps for notebooks