diff --git a/datasets/TTSDataset.py b/datasets/TTSDataset.py index d6282de3..36dde917 100644 --- a/datasets/TTSDataset.py +++ b/datasets/TTSDataset.py @@ -87,7 +87,7 @@ class MyDataset(Dataset): else: text, wav_file = self.items[idx] text = np.asarray( - text_to_sequence(text, [self.cleaners]), dtype=np.int32) + phoneme_to_sequence(text, [self.cleaners]), dtype=np.int32) wav = np.asarray(self.load_wav(wav_file), dtype=np.float32) sample = {'text': text, 'wav': wav, 'item_idx': self.items[idx][1]} return sample diff --git a/utils/text/__init__.py b/utils/text/__init__.py index 0f27c9eb..0ff3110f 100644 --- a/utils/text/__init__.py +++ b/utils/text/__init__.py @@ -3,7 +3,11 @@ import re import phonemizer from utils.text import cleaners +<<<<<<< HEAD from utils.text.symbols import symbols, phonemes, _punctuations +======= +from utils.text.symbols import symbols, phonemes +>>>>>>> phonem extraction for training # Mappings from symbol to numeric ID and vice versa: _symbol_to_id = {s: i for i, s in enumerate(symbols)} @@ -67,6 +71,13 @@ def sequence_to_phoneme(sequence): return result.replace('}{', ' ') +def phoneme_to_sequence(text, cleaner_names): + sequence = [] + sequence += _phonem_to_sequence(_clean_text(text, cleaner_names)) + sequence.append(_phonemes_to_id['~']) + return sequence + + def text_to_sequence(text, cleaner_names): '''Converts a string of text to a sequence of IDs corresponding to the symbols in the text. diff --git a/utils/text/cmudict.py b/utils/text/cmudict.py index 1202bf3d..dbe32af4 100644 --- a/utils/text/cmudict.py +++ b/utils/text/cmudict.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import re +import phonemizer # valid_symbols = [ # 'AA', 'AA0', 'AA1', 'AA2', 'AE', 'AE0', 'AE1', 'AE2', 'AH', 'AH0', 'AH1',