mirror of https://github.com/coqui-ai/TTS.git
sam-accenture model preprocessor
This commit is contained in:
parent
e3c052382b
commit
e84f120a04
|
@ -1,12 +1,12 @@
|
||||||
import os
|
import os
|
||||||
from glob import glob
|
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
from glob import glob
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
from TTS.tts.utils.generic_utils import split_dataset
|
from TTS.tts.utils.generic_utils import split_dataset
|
||||||
|
|
||||||
####################
|
####################
|
||||||
|
@ -168,6 +168,23 @@ def ljspeech(root_path, meta_file):
|
||||||
return items
|
return items
|
||||||
|
|
||||||
|
|
||||||
|
def sam_accenture(root_path, meta_file):
|
||||||
|
"""Normalizes the sam-accenture meta data file to TTS format
|
||||||
|
https://github.com/Sam-Accenture-Non-Binary-Voice/non-binary-voice-files"""
|
||||||
|
xml_file = os.path.join(root_path, 'voice_over_recordings', meta_file)
|
||||||
|
xml_root = ET.parse(xml_file).getroot()
|
||||||
|
items = []
|
||||||
|
speaker_name = "sam_accenture"
|
||||||
|
for item in xml_root.findall('./fileid'):
|
||||||
|
text = item.text
|
||||||
|
wav_file = os.path.join(root_path, 'vo_voice_quality_transformation', item.get('id')+'.wav')
|
||||||
|
if not os.path.exists(wav_file):
|
||||||
|
print(f' [!] {wav_file} in metafile does not exist. Skipping...')
|
||||||
|
continue
|
||||||
|
items.append([text, wav_file, speaker_name])
|
||||||
|
return items
|
||||||
|
|
||||||
|
|
||||||
def ruslan(root_path, meta_file):
|
def ruslan(root_path, meta_file):
|
||||||
"""Normalizes the RUSLAN meta data file to TTS format
|
"""Normalizes the RUSLAN meta data file to TTS format
|
||||||
https://ruslan-corpus.github.io/"""
|
https://ruslan-corpus.github.io/"""
|
||||||
|
|
Loading…
Reference in New Issue