mirror of https://github.com/coqui-ai/TTS.git
sam-accenture model preprocessor
This commit is contained in:
parent
e3c052382b
commit
e84f120a04
|
@ -1,12 +1,12 @@
|
|||
import os
|
||||
from glob import glob
|
||||
import re
|
||||
import sys
|
||||
import xml.etree.ElementTree as ET
|
||||
from glob import glob
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
from TTS.tts.utils.generic_utils import split_dataset
|
||||
|
||||
####################
|
||||
|
@ -168,6 +168,23 @@ def ljspeech(root_path, meta_file):
|
|||
return items
|
||||
|
||||
|
||||
def sam_accenture(root_path, meta_file):
|
||||
"""Normalizes the sam-accenture meta data file to TTS format
|
||||
https://github.com/Sam-Accenture-Non-Binary-Voice/non-binary-voice-files"""
|
||||
xml_file = os.path.join(root_path, 'voice_over_recordings', meta_file)
|
||||
xml_root = ET.parse(xml_file).getroot()
|
||||
items = []
|
||||
speaker_name = "sam_accenture"
|
||||
for item in xml_root.findall('./fileid'):
|
||||
text = item.text
|
||||
wav_file = os.path.join(root_path, 'vo_voice_quality_transformation', item.get('id')+'.wav')
|
||||
if not os.path.exists(wav_file):
|
||||
print(f' [!] {wav_file} in metafile does not exist. Skipping...')
|
||||
continue
|
||||
items.append([text, wav_file, speaker_name])
|
||||
return items
|
||||
|
||||
|
||||
def ruslan(root_path, meta_file):
|
||||
"""Normalizes the RUSLAN meta data file to TTS format
|
||||
https://ruslan-corpus.github.io/"""
|
||||
|
|
Loading…
Reference in New Issue