Rename `load_meta_data` as `load_tts_data`

This commit is contained in:
Eren Gölge 2021-09-30 14:28:53 +00:00
parent 9f23ad6a0f
commit 043dca61b4
6 changed files with 17 additions and 15 deletions

View File

@ -5,7 +5,7 @@ from argparse import RawTextHelpFormatter
from tqdm import tqdm from tqdm import tqdm
from TTS.config import load_config from TTS.config import load_config
from TTS.tts.datasets import load_meta_data from TTS.tts.datasets import load_tts_samples
from TTS.tts.utils.speakers import SpeakerManager from TTS.tts.utils.speakers import SpeakerManager
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
@ -36,7 +36,7 @@ args = parser.parse_args()
c_dataset = load_config(args.config_dataset_path) c_dataset = load_config(args.config_dataset_path)
meta_data_train, meta_data_eval = load_meta_data(c_dataset.datasets, eval_split=args.eval) meta_data_train, meta_data_eval = load_tts_samples(c_dataset.datasets, eval_split=args.eval)
wav_files = meta_data_train + meta_data_eval wav_files = meta_data_train + meta_data_eval
speaker_manager = SpeakerManager( speaker_manager = SpeakerManager(

View File

@ -10,7 +10,7 @@ from tqdm import tqdm
# from TTS.utils.io import load_config # from TTS.utils.io import load_config
from TTS.config import load_config from TTS.config import load_config
from TTS.tts.datasets import load_meta_data from TTS.tts.datasets import load_tts_samples
from TTS.utils.audio import AudioProcessor from TTS.utils.audio import AudioProcessor
@ -41,7 +41,7 @@ def main():
if args.data_path: if args.data_path:
dataset_items = glob.glob(os.path.join(args.data_path, "**", "*.wav"), recursive=True) dataset_items = glob.glob(os.path.join(args.data_path, "**", "*.wav"), recursive=True)
else: else:
dataset_items = load_meta_data(CONFIG.datasets)[0] # take only train data dataset_items = load_tts_samples(CONFIG.datasets)[0] # take only train data
print(f" > There are {len(dataset_items)} files.") print(f" > There are {len(dataset_items)} files.")
mel_sum = 0 mel_sum = 0

View File

@ -10,8 +10,7 @@ from torch.utils.data import DataLoader
from tqdm import tqdm from tqdm import tqdm
from TTS.config import load_config from TTS.config import load_config
from TTS.tts.datasets import load_meta_data from TTS.tts.datasets import TTSDataset, load_tts_samples
from TTS.tts.datasets.TTSDataset import TTSDataset
from TTS.tts.models import setup_model from TTS.tts.models import setup_model
from TTS.tts.utils.speakers import get_speaker_manager from TTS.tts.utils.speakers import get_speaker_manager
from TTS.utils.audio import AudioProcessor from TTS.utils.audio import AudioProcessor
@ -230,7 +229,7 @@ def main(args): # pylint: disable=redefined-outer-name
ap = AudioProcessor(**c.audio) ap = AudioProcessor(**c.audio)
# load data instances # load data instances
meta_data_train, meta_data_eval = load_meta_data(c.datasets, eval_split=args.eval) meta_data_train, meta_data_eval = load_tts_samples(c.datasets, eval_split=args.eval)
# use eval and training partitions # use eval and training partitions
meta_data = meta_data_train + meta_data_eval meta_data = meta_data_train + meta_data_eval

View File

@ -3,7 +3,7 @@ import argparse
from argparse import RawTextHelpFormatter from argparse import RawTextHelpFormatter
from TTS.config import load_config from TTS.config import load_config
from TTS.tts.datasets import load_meta_data from TTS.tts.datasets import load_tts_samples
def main(): def main():
@ -23,7 +23,7 @@ def main():
c = load_config(args.config_path) c = load_config(args.config_path)
# load all datasets # load all datasets
train_items, eval_items = load_meta_data(c.datasets, eval_split=True) train_items, eval_items = load_tts_samples(c.datasets, eval_split=True)
items = train_items + eval_items items = train_items + eval_items
texts = "".join(item[0] for item in items) texts = "".join(item[0] for item in items)

View File

@ -1,12 +1,12 @@
import sys import sys
from collections import Counter from collections import Counter
from pathlib import Path from pathlib import Path
from typing import Dict, List, Tuple from typing import Dict, List, Tuple, Union
import numpy as np import numpy as np
from TTS.tts.datasets.dataset import *
from TTS.tts.datasets.formatters import * from TTS.tts.datasets.formatters import *
from TTS.tts.datasets.TTSDataset import TTSDataset
def split_dataset(items): def split_dataset(items):
@ -31,11 +31,12 @@ def split_dataset(items):
return items[:eval_split_size], items[eval_split_size:] return items[:eval_split_size], items[eval_split_size:]
def load_meta_data(datasets: List[Dict], eval_split=True) -> Tuple[List[List], List[List]]: def load_tts_samples(datasets: Union[List[Dict], Dict], eval_split=True) -> Tuple[List[List], List[List]]:
"""Parse the dataset, load the samples as a list and load the attention alignments if provided. """Parse the dataset, load the samples as a list and load the attention alignments if provided.
Args: Args:
datasets (List[Dict]): A list of dataset dictionaries or dataset configs. datasets (List[Dict], Dict): A list of datasets or a single dataset dictionary. If multiple datasets are
in the list, they are all merged.
eval_split (bool, optional): If true, create a evaluation split. If an eval split provided explicitly, generate eval_split (bool, optional): If true, create a evaluation split. If an eval split provided explicitly, generate
an eval split automatically. Defaults to True. an eval split automatically. Defaults to True.
@ -44,6 +45,8 @@ def load_meta_data(datasets: List[Dict], eval_split=True) -> Tuple[List[List], L
""" """
meta_data_train_all = [] meta_data_train_all = []
meta_data_eval_all = [] if eval_split else None meta_data_eval_all = [] if eval_split else None
if not isinstance(datasets, list):
datasets = [datasets]
for dataset in datasets: for dataset in datasets:
name = dataset["name"] name = dataset["name"]
root_path = dataset["path"] root_path = dataset["path"]

View File

@ -50,7 +50,7 @@
"source": [ "source": [
"# import stuff\n", "# import stuff\n",
"from TTS.utils.io import load_config\n", "from TTS.utils.io import load_config\n",
"from TTS.tts.datasets.formatters import load_meta_data\n", "from TTS.tts.datasets.formatters import load_tts_samples\n",
"from TTS.tts.utils.text import phoneme_to_sequence, sequence_to_phoneme\n", "from TTS.tts.utils.text import phoneme_to_sequence, sequence_to_phoneme\n",
"from tqdm import tqdm\n", "from tqdm import tqdm\n",
"from matplotlib import pylab as plt\n", "from matplotlib import pylab as plt\n",
@ -75,7 +75,7 @@
"CONFIG = load_config(CONFIG_FILE)\n", "CONFIG = load_config(CONFIG_FILE)\n",
"\n", "\n",
"# Load some properties from config.json\n", "# Load some properties from config.json\n",
"CONFIG_METADATA = sorted(load_meta_data(CONFIG.datasets)[0])\n", "CONFIG_METADATA = sorted(load_tts_samples(CONFIG.datasets)[0])\n",
"CONFIG_METADATA = CONFIG_METADATA\n", "CONFIG_METADATA = CONFIG_METADATA\n",
"CONFIG_DATASET = CONFIG.datasets[0]\n", "CONFIG_DATASET = CONFIG.datasets[0]\n",
"CONFIG_PHONEME_LANGUAGE = CONFIG.phoneme_language\n", "CONFIG_PHONEME_LANGUAGE = CONFIG.phoneme_language\n",