mirror of https://github.com/coqui-ai/TTS.git
60 lines
2.2 KiB
Python
60 lines
2.2 KiB
Python
# -*- coding: utf-8 -*-
|
||
'''
|
||
Defines the set of symbols used in text input to the model.
|
||
|
||
The default is a set of ASCII characters that works well for English or text that has been run
|
||
through Unidecode. For other data, you can modify _characters. See TRAINING_DATA.md for details.
|
||
'''
|
||
|
||
|
||
def make_symbols(characters, phonemes=None, punctuations='!\'(),-.:;? ', pad='_', eos='~', bos='^'):# pylint: disable=redefined-outer-name
|
||
''' Function to create symbols and phonemes '''
|
||
_symbols = [pad, eos, bos] + list(characters)
|
||
_phonemes = None
|
||
if phonemes is not None:
|
||
_phonemes_sorted = sorted(list(set(phonemes)))
|
||
# Prepend "@" to ARPAbet symbols to ensure uniqueness (some are the same as uppercase letters):
|
||
_arpabet = ['@' + s for s in _phonemes_sorted]
|
||
# Export all symbols:
|
||
_phonemes = [pad, eos, bos] + list(_phonemes_sorted) + list(punctuations)
|
||
_symbols += _arpabet
|
||
return _symbols, _phonemes
|
||
|
||
_pad = '_'
|
||
_eos = '~'
|
||
_bos = '^'
|
||
_characters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!\'(),-.:;? '
|
||
_punctuations = '!\'(),-.:;? '
|
||
# _phoneme_punctuations = '.!;:,?'
|
||
|
||
# Phonemes definition (All IPA characters)
|
||
_vowels = 'iyɨʉɯuɪʏʊeøɘəɵɤoɛœɜɞʌɔæɐaɶɑɒᵻ'
|
||
_non_pulmonic_consonants = 'ʘɓǀɗǃʄǂɠǁʛ'
|
||
_pulmonic_consonants = 'pbtdʈɖcɟkɡqɢʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟ'
|
||
_suprasegmentals = 'ˈˌːˑ'
|
||
_other_symbols = 'ʍwɥʜʢʡɕʑɺɧʲ'
|
||
_diacrilics = 'ɚ˞ɫ'
|
||
_phonemes = _vowels + _non_pulmonic_consonants + _pulmonic_consonants + _suprasegmentals + _other_symbols + _diacrilics
|
||
|
||
symbols, phonemes = make_symbols(_characters, _phonemes, _punctuations, _pad, _eos, _bos)
|
||
|
||
# Generate ALIEN language
|
||
# from random import shuffle
|
||
# shuffle(phonemes)
|
||
|
||
|
||
def parse_symbols():
|
||
return {'pad': _pad,
|
||
'eos': _eos,
|
||
'bos': _bos,
|
||
'characters': _characters,
|
||
'punctuations': _punctuations,
|
||
'phonemes': _phonemes}
|
||
|
||
|
||
if __name__ == '__main__':
|
||
print(" > TTS symbols {}".format(len(symbols)))
|
||
print(symbols)
|
||
print(" > TTS phonemes {}".format(len(phonemes)))
|
||
print(''.join(sorted(phonemes)))
|