# -*- coding: utf-8 -*- """ Defines the set of symbols used in text input to the model. The default is a set of ASCII characters that works well for English or text that has been run through Unidecode. For other data, you can modify _characters. See TRAINING_DATA.md for details. """ def make_symbols( characters, phonemes=None, punctuations="!'(),-.:;? ", pad="_", eos="~", bos="^", unique=True, ): # pylint: disable=redefined-outer-name """Function to create symbols and phonemes TODO: create phonemes_to_id and symbols_to_id dicts here.""" _symbols = list(characters) _symbols = [bos] + _symbols if len(bos) > 0 and bos is not None else _symbols _symbols = [eos] + _symbols if len(bos) > 0 and eos is not None else _symbols _symbols = [pad] + _symbols if len(bos) > 0 and pad is not None else _symbols _phonemes = None if phonemes is not None: _phonemes_sorted = ( sorted(list(set(phonemes))) if unique else sorted(list(phonemes)) ) # this is to keep previous models compatible. # Prepend "@" to ARPAbet symbols to ensure uniqueness (some are the same as uppercase letters): # _arpabet = ["@" + s for s in _phonemes_sorted] # Export all symbols: _phonemes = [pad, eos, bos] + list(_phonemes_sorted) + list(punctuations) # _symbols += _arpabet return _symbols, _phonemes _pad = "_" _eos = "~" _bos = "^" _characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!'(),-.:;? " _punctuations = "!'(),-.:;? " # Phonemes definition (All IPA characters) _vowels = "iyɨʉɯuɪʏʊeøɘəɵɤoɛœɜɞʌɔæɐaɶɑɒᵻ" _non_pulmonic_consonants = "ʘɓǀɗǃʄǂɠǁʛ" _pulmonic_consonants = "pbtdʈɖcɟkɡqɢʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟ" _suprasegmentals = "ˈˌːˑ" _other_symbols = "ʍwɥʜʢʡɕʑɺɧʲ" _diacrilics = "ɚ˞ɫ" _phonemes = _vowels + _non_pulmonic_consonants + _pulmonic_consonants + _suprasegmentals + _other_symbols + _diacrilics symbols, phonemes = make_symbols(_characters, _phonemes, _punctuations, _pad, _eos, _bos) # Generate ALIEN language # from random import shuffle # shuffle(phonemes) def parse_symbols(): return { "pad": _pad, "eos": _eos, "bos": _bos, "characters": _characters, "punctuations": _punctuations, "phonemes": _phonemes, } if __name__ == "__main__": print(" > TTS symbols {}".format(len(symbols))) print(symbols) print(" > TTS phonemes {}".format(len(phonemes))) print("".join(sorted(phonemes)))