Internal formatter (#1629)

* Add coqui formatter * Make style
2022-06-08 14:31:03 +02:00 · 2022-06-08 14:31:03 +02:00 · f09ea11c71
parent 68cef28a88
commit f09ea11c71
1 changed files with 29 additions and 0 deletions
--- a/TTS/tts/datasets/formatters.py
+++ b/TTS/tts/datasets/formatters.py
@ -5,6 +5,7 @@ from glob import glob
 from pathlib import Path
 from typing import List
 import pandas as pd
 from tqdm import tqdm
 ########################
@ -12,6 +13,34 @@ from tqdm import tqdm
 ########################
 def coqui(root_path, meta_file, ignored_speakers=None):
    """Interal dataset formatter."""
    metadata = pd.read_csv(os.path.join(root_path, meta_file), sep="|")
    assert all(x in metadata.columns for x in ["audio_file", "text"])
    speaker_name = None if "speaker_name" in metadata.columns else "coqui"
    emotion_name = None if "emotion_name" in metadata.columns else "neutral"
    items = []
    not_found_counter = 0
    for row in metadata.itertuples():
        if speaker_name is None and ignored_speakers is not None and row.speaker_name in ignored_speakers:
            continue
        audio_path = os.path.join(root_path, row.audio_file)
        if not os.path.exists(audio_path):
            not_found_counter += 1
            continue
        items.append(
            {
                "text": row.text,
                "audio_file": audio_path,
                "speaker_name": speaker_name if speaker_name is not None else row.speaker_name,
                "emotion_name": emotion_name if emotion_name is not None else row.emotion_name,
            }
        )
    if not_found_counter > 0:
        print(f" | > [!] {not_found_counter} files not found")
    return items
 def tweb(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
    """Normalize TWEB dataset.
    https://www.kaggle.com/bryanpark/the-world-english-bible-speech-dataset