coqui-tts/TTS/stt/datasets/formatters.py

59 lines
2.0 KiB
Python

import os
import re
import xml.etree.ElementTree as ET
from glob import glob
from pathlib import Path
from typing import List
from tqdm import tqdm
########################
# DATASETS
########################
def ljspeech(root_path, meta_file):
"""Normalizes the LJSpeech meta data file to TTS format
https://keithito.com/LJ-Speech-Dataset/"""
txt_file = os.path.join(root_path, meta_file)
items = []
speaker_name = "ljspeech"
with open(txt_file, "r", encoding="utf-8") as ttf:
for line in ttf:
cols = line.split("|")
wav_file = os.path.join(root_path, "wavs", cols[0] + ".wav")
text = cols[2]
items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name})
return items
def librispeech(root_path, meta_files=None):
"""http://www.openslr.org/resources/12/"""
_delimiter = " "
_audio_ext = ".flac"
items = []
if meta_files is None or meta_files == "":
meta_files = glob(f"{root_path}/**/*trans.txt", recursive=True)
else:
if isinstance(meta_files, str):
meta_files = [os.path.join(root_path, meta_files)]
for meta_file in meta_files:
_meta_file = os.path.basename(meta_file).split(".")[0]
with open(meta_file, "r", encoding="utf-8") as ttf:
for line in ttf:
cols = line.split(_delimiter, 1)
file_name = cols[0]
speaker_name, chapter_id, *_ = cols[0].split("-")
_root_path = os.path.join(root_path, f"{speaker_name}/{chapter_id}")
wav_file = os.path.join(_root_path, file_name + _audio_ext)
text = cols[1]
items.append({"text": text, "audio_file": wav_file, "speaker_name": "LibriSpeech_" + speaker_name})
for item in items:
assert os.path.exists(item["audio_file"]), f" [!] wav files don't exist - {item['audio_file']}"
return items
if __name__ == "__main__":
items_ = librispeech(root_path="/home/ubuntu/librispeech/pforLibriSpeech/train-clean-100/", meta_files=None)