sam-accenture model preprocessor

This commit is contained in:
Eren Gölge 2021-04-01 03:41:41 +02:00
parent e3c052382b
commit e84f120a04
1 changed files with 19 additions and 2 deletions

View File

@ -1,12 +1,12 @@
import os
from glob import glob
import re
import sys
import xml.etree.ElementTree as ET
from glob import glob
from pathlib import Path
from typing import List
from tqdm import tqdm
from TTS.tts.utils.generic_utils import split_dataset
####################
@ -168,6 +168,23 @@ def ljspeech(root_path, meta_file):
return items
def sam_accenture(root_path, meta_file):
"""Normalizes the sam-accenture meta data file to TTS format
https://github.com/Sam-Accenture-Non-Binary-Voice/non-binary-voice-files"""
xml_file = os.path.join(root_path, 'voice_over_recordings', meta_file)
xml_root = ET.parse(xml_file).getroot()
items = []
speaker_name = "sam_accenture"
for item in xml_root.findall('./fileid'):
text = item.text
wav_file = os.path.join(root_path, 'vo_voice_quality_transformation', item.get('id')+'.wav')
if not os.path.exists(wav_file):
print(f' [!] {wav_file} in metafile does not exist. Skipping...')
continue
items.append([text, wav_file, speaker_name])
return items
def ruslan(root_path, meta_file):
"""Normalizes the RUSLAN meta data file to TTS format
https://ruslan-corpus.github.io/"""