From 1896db7e2c539a1f02c6e389726530d145f89fc1 Mon Sep 17 00:00:00 2001
From: Jindrich Matousek <jmatouse@kky.zcu.cz>
Date: Sun, 10 Jul 2022 22:08:11 +0200
Subject: [PATCH] Add formatter for artic multispeaker dataset

---
 TTS/tts/datasets/formatters.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/TTS/tts/datasets/formatters.py b/TTS/tts/datasets/formatters.py
index 4e120bc6..eadf0529 100644
--- a/TTS/tts/datasets/formatters.py
+++ b/TTS/tts/datasets/formatters.py
@@ -579,3 +579,21 @@ def artic(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
             text = cols[-1]
             items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name})
     return items
+
+
+def artic_multispeaker(root_path, meta_file, **kwargs): # pylint: disable=unused-argument
+    """Normalizes the ARTIC multi-speaker meta data files to TTS format
+    
+    Args:
+        root_path (str): path to the artic dataset
+        meta_file (str): name of the meta file containing names of wav to select and
+                         transcripts of the corresponding utterances
+                         !Must be the same for all speakers!
+    Returns:
+        List[List[str]]: List of (text, wav_path, speaker_name) associated with each utterance
+    """
+    items = []
+    # Loop over speakers: speaker names are subdirs of `root_path`
+    for pth in glob(f"{root_path}/*/**/", recursive=False):
+        items.extend(artic(pth, meta_file))
+    return items