diff --git a/datasets/TTSDataset.py b/datasets/TTSDataset.py index ecf8e9ea..0305da7f 100644 --- a/datasets/TTSDataset.py +++ b/datasets/TTSDataset.py @@ -102,6 +102,7 @@ class MyDataset(Dataset): cache_path) if self.enable_eos_bos: phonemes = pad_with_eos_bos(phonemes) + phonemes = np.asarray(phonemes, dtype=np.int32) return phonemes diff --git a/utils/text/__init__.py b/utils/text/__init__.py index 332163d2..77cc23a5 100644 --- a/utils/text/__init__.py +++ b/utils/text/__init__.py @@ -47,7 +47,7 @@ def text2phone(text, language): def pad_with_eos_bos(phoneme_sequence): - return [_PHONEMES_TO_ID[_bos]] + phoneme_sequence + [_PHONEMES_TO_ID[_eos]] + return [_PHONEMES_TO_ID[_bos]] + list(phoneme_sequence) + [_PHONEMES_TO_ID[_eos]] def phoneme_to_sequence(text, cleaner_names, language, enable_eos_bos=False):