mirror of https://github.com/coqui-ai/TTS.git
phonem extraction for training
This commit is contained in:
parent
656542db93
commit
9c9aea276c
|
@ -87,7 +87,7 @@ class MyDataset(Dataset):
|
||||||
else:
|
else:
|
||||||
text, wav_file = self.items[idx]
|
text, wav_file = self.items[idx]
|
||||||
text = np.asarray(
|
text = np.asarray(
|
||||||
text_to_sequence(text, [self.cleaners]), dtype=np.int32)
|
phoneme_to_sequence(text, [self.cleaners]), dtype=np.int32)
|
||||||
wav = np.asarray(self.load_wav(wav_file), dtype=np.float32)
|
wav = np.asarray(self.load_wav(wav_file), dtype=np.float32)
|
||||||
sample = {'text': text, 'wav': wav, 'item_idx': self.items[idx][1]}
|
sample = {'text': text, 'wav': wav, 'item_idx': self.items[idx][1]}
|
||||||
return sample
|
return sample
|
||||||
|
|
|
@ -3,7 +3,11 @@
|
||||||
import re
|
import re
|
||||||
import phonemizer
|
import phonemizer
|
||||||
from utils.text import cleaners
|
from utils.text import cleaners
|
||||||
|
<<<<<<< HEAD
|
||||||
from utils.text.symbols import symbols, phonemes, _punctuations
|
from utils.text.symbols import symbols, phonemes, _punctuations
|
||||||
|
=======
|
||||||
|
from utils.text.symbols import symbols, phonemes
|
||||||
|
>>>>>>> phonem extraction for training
|
||||||
|
|
||||||
# Mappings from symbol to numeric ID and vice versa:
|
# Mappings from symbol to numeric ID and vice versa:
|
||||||
_symbol_to_id = {s: i for i, s in enumerate(symbols)}
|
_symbol_to_id = {s: i for i, s in enumerate(symbols)}
|
||||||
|
@ -67,6 +71,13 @@ def sequence_to_phoneme(sequence):
|
||||||
return result.replace('}{', ' ')
|
return result.replace('}{', ' ')
|
||||||
|
|
||||||
|
|
||||||
|
def phoneme_to_sequence(text, cleaner_names):
|
||||||
|
sequence = []
|
||||||
|
sequence += _phonem_to_sequence(_clean_text(text, cleaner_names))
|
||||||
|
sequence.append(_phonemes_to_id['~'])
|
||||||
|
return sequence
|
||||||
|
|
||||||
|
|
||||||
def text_to_sequence(text, cleaner_names):
|
def text_to_sequence(text, cleaner_names):
|
||||||
'''Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
|
'''Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
import phonemizer
|
||||||
|
|
||||||
# valid_symbols = [
|
# valid_symbols = [
|
||||||
# 'AA', 'AA0', 'AA1', 'AA2', 'AE', 'AE0', 'AE1', 'AE2', 'AH', 'AH0', 'AH1',
|
# 'AA', 'AA0', 'AA1', 'AA2', 'AE', 'AE0', 'AE1', 'AE2', 'AH', 'AH0', 'AH1',
|
||||||
|
|
Loading…
Reference in New Issue