mirror of https://github.com/coqui-ai/TTS.git
parent
68cef28a88
commit
f09ea11c71
|
@ -5,6 +5,7 @@ from glob import glob
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
########################
|
########################
|
||||||
|
@ -12,6 +13,34 @@ from tqdm import tqdm
|
||||||
########################
|
########################
|
||||||
|
|
||||||
|
|
||||||
|
def coqui(root_path, meta_file, ignored_speakers=None):
|
||||||
|
"""Interal dataset formatter."""
|
||||||
|
metadata = pd.read_csv(os.path.join(root_path, meta_file), sep="|")
|
||||||
|
assert all(x in metadata.columns for x in ["audio_file", "text"])
|
||||||
|
speaker_name = None if "speaker_name" in metadata.columns else "coqui"
|
||||||
|
emotion_name = None if "emotion_name" in metadata.columns else "neutral"
|
||||||
|
items = []
|
||||||
|
not_found_counter = 0
|
||||||
|
for row in metadata.itertuples():
|
||||||
|
if speaker_name is None and ignored_speakers is not None and row.speaker_name in ignored_speakers:
|
||||||
|
continue
|
||||||
|
audio_path = os.path.join(root_path, row.audio_file)
|
||||||
|
if not os.path.exists(audio_path):
|
||||||
|
not_found_counter += 1
|
||||||
|
continue
|
||||||
|
items.append(
|
||||||
|
{
|
||||||
|
"text": row.text,
|
||||||
|
"audio_file": audio_path,
|
||||||
|
"speaker_name": speaker_name if speaker_name is not None else row.speaker_name,
|
||||||
|
"emotion_name": emotion_name if emotion_name is not None else row.emotion_name,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
if not_found_counter > 0:
|
||||||
|
print(f" | > [!] {not_found_counter} files not found")
|
||||||
|
return items
|
||||||
|
|
||||||
|
|
||||||
def tweb(root_path, meta_file, **kwargs): # pylint: disable=unused-argument
|
def tweb(root_path, meta_file, **kwargs): # pylint: disable=unused-argument
|
||||||
"""Normalize TWEB dataset.
|
"""Normalize TWEB dataset.
|
||||||
https://www.kaggle.com/bryanpark/the-world-english-bible-speech-dataset
|
https://www.kaggle.com/bryanpark/the-world-english-bible-speech-dataset
|
||||||
|
|
Loading…
Reference in New Issue