mirror of https://github.com/coqui-ai/TTS.git
Uses mutagen for all audio formats
This commit is contained in:
parent
dc7624adb4
commit
d551a597e0
|
@ -14,6 +14,8 @@ from TTS.utils.audio import AudioProcessor
|
||||||
from TTS.utils.audio.numpy_transforms import compute_energy as calculate_energy
|
from TTS.utils.audio.numpy_transforms import compute_energy as calculate_energy
|
||||||
|
|
||||||
from mutagen.mp3 import MP3
|
from mutagen.mp3 import MP3
|
||||||
|
from mutagen.flac import FLAC
|
||||||
|
from mutagen.wave import WAVE
|
||||||
|
|
||||||
# to prevent too many open files error as suggested here
|
# to prevent too many open files error as suggested here
|
||||||
# https://github.com/pytorch/pytorch/issues/11201#issuecomment-421146936
|
# https://github.com/pytorch/pytorch/issues/11201#issuecomment-421146936
|
||||||
|
@ -48,13 +50,15 @@ def get_audio_size(audiopath):
|
||||||
extension = audiopath.rpartition(".")[-1].lower()
|
extension = audiopath.rpartition(".")[-1].lower()
|
||||||
if extension == "mp3":
|
if extension == "mp3":
|
||||||
audio_info = MP3(audiopath).info
|
audio_info = MP3(audiopath).info
|
||||||
return int(audio_info.length * audio_info.sample_rate)
|
elif extension == "wav":
|
||||||
if extension in ("wav", "flac"):
|
audio_info = WAVE(audiopath).info
|
||||||
compress_factor = 8
|
elif extension == "flac":
|
||||||
bitrate = 16 # assuming 16bit audio
|
audio_info = FLAC(audiopath).info
|
||||||
return int(os.path.getsize(audiopath) / bitrate * compress_factor)
|
else:
|
||||||
raise RuntimeError(f"The audio format {extension} is not supported, please convert the audio files for mp3, flac or wav format!")
|
raise RuntimeError(f"The audio format {extension} is not supported, please convert the audio files for mp3, flac or wav format!")
|
||||||
|
|
||||||
|
return int(audio_info.length * audio_info.sample_rate)
|
||||||
|
|
||||||
|
|
||||||
class TTSDataset(Dataset):
|
class TTSDataset(Dataset):
|
||||||
def __init__(
|
def __init__(
|
||||||
|
|
Loading…
Reference in New Issue