mirror of https://github.com/coqui-ai/TTS.git
turkish cleaner and data preprocessor
This commit is contained in:
parent
fd4e6d0245
commit
fa795347a9
|
@ -187,3 +187,21 @@ def libri_tts(root_path, meta_files=None):
|
||||||
for item in items:
|
for item in items:
|
||||||
assert os.path.exists(item[1]), f" [!] wav file is not exist - {item[1]}"
|
assert os.path.exists(item[1]), f" [!] wav file is not exist - {item[1]}"
|
||||||
return items
|
return items
|
||||||
|
|
||||||
|
|
||||||
|
def custom_turkish(root_path, meta_file):
|
||||||
|
txt_file = os.path.join(root_path, meta_file)
|
||||||
|
items = []
|
||||||
|
speaker_name = "turkish-female"
|
||||||
|
skipped_files = []
|
||||||
|
with open(txt_file, 'r', encoding='utf-8') as ttf:
|
||||||
|
for line in ttf:
|
||||||
|
cols = line.split('|')
|
||||||
|
wav_file = os.path.join(root_path, 'wavs', cols[0].strip() + '.wav')
|
||||||
|
if not os.path.exists(wav_file):
|
||||||
|
skipped_files.append(wav_file)
|
||||||
|
continue
|
||||||
|
text = cols[1].strip()
|
||||||
|
items.append([text, wav_file, speaker_name])
|
||||||
|
print(f" [!] {len(skipped_files)} files skipped. They are not exist...")
|
||||||
|
return items
|
||||||
|
|
|
@ -91,6 +91,14 @@ def transliteration_cleaners(text):
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: elaborate it
|
||||||
|
def basic_turkish_cleaners(text):
|
||||||
|
'''Pipeline for Turkish text'''
|
||||||
|
text = text.replace("I", "ı")
|
||||||
|
text = lowercase(text)
|
||||||
|
text = collapse_whitespace(text)
|
||||||
|
|
||||||
|
|
||||||
def english_cleaners(text):
|
def english_cleaners(text):
|
||||||
'''Pipeline for English text, including number and abbreviation expansion.'''
|
'''Pipeline for English text, including number and abbreviation expansion.'''
|
||||||
text = convert_to_ascii(text)
|
text = convert_to_ascii(text)
|
||||||
|
|
Loading…
Reference in New Issue