mirror of https://github.com/coqui-ai/TTS.git
rename text to characters in config.json
This commit is contained in:
parent
4e53896438
commit
36235c5e3f
|
@ -28,7 +28,7 @@
|
|||
},
|
||||
|
||||
// VOCABULARY PARAMETERS
|
||||
"text":{
|
||||
"characters":{
|
||||
"pad": "_",
|
||||
"eos": "~",
|
||||
"bos": "^",
|
||||
|
|
|
@ -143,8 +143,8 @@
|
|||
" speaker_id = None\n",
|
||||
"\n",
|
||||
"# if the vocabulary was passed, replace the default\n",
|
||||
"if 'text' in CONFIG.keys():\n",
|
||||
" symbols, phonemes = make_symbols(**CONFIG.text)\n",
|
||||
"if 'characters' in CONFIG.keys():\n",
|
||||
" symbols, phonemes = make_symbols(**CONFIG.characters)\n",
|
||||
"\n",
|
||||
"# load the model\n",
|
||||
"num_chars = len(phonemes) if CONFIG.use_phonemes else len(symbols)\n",
|
||||
|
|
|
@ -150,8 +150,8 @@
|
|||
" speaker_id = None\n",
|
||||
"\n",
|
||||
"# if the vocabulary was passed, replace the default\n",
|
||||
"if 'text' in CONFIG.keys():\n",
|
||||
" symbols, phonemes = make_symbols(**CONFIG.text)\n",
|
||||
"if 'characters' in CONFIG.keys():\n",
|
||||
" symbols, phonemes = make_symbols(**CONFIG.characters)\n",
|
||||
"\n",
|
||||
"# load the model\n",
|
||||
"num_chars = len(phonemes) if CONFIG.use_phonemes else len(symbols)\n",
|
||||
|
|
|
@ -95,8 +95,8 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"# if the vocabulary was passed, replace the default\n",
|
||||
"if 'text' in C.keys():\n",
|
||||
" symbols, phonemes = make_symbols(**C.text)\n",
|
||||
"if 'characters' in C.keys():\n",
|
||||
" symbols, phonemes = make_symbols(**C.characters)\n",
|
||||
"\n",
|
||||
"# load the model\n",
|
||||
"num_chars = len(phonemes) if C.use_phonemes else len(symbols)\n",
|
||||
|
@ -120,7 +120,7 @@
|
|||
"preprocessor = importlib.import_module('TTS.datasets.preprocess')\n",
|
||||
"preprocessor = getattr(preprocessor, DATASET.lower())\n",
|
||||
"meta_data = preprocessor(DATA_PATH,METADATA_FILE)\n",
|
||||
"dataset = MyDataset(checkpoint['r'], C.text_cleaner, ap, meta_data,tp=C.text if 'text' in C.keys() else None, use_phonemes=C.use_phonemes, phoneme_cache_path=C.phoneme_cache_path, enable_eos_bos=C.enable_eos_bos_chars)\n",
|
||||
"dataset = MyDataset(checkpoint['r'], C.text_cleaner, ap, meta_data,tp=C.characters if 'characters' in C.keys() else None, use_phonemes=C.use_phonemes, phoneme_cache_path=C.phoneme_cache_path, enable_eos_bos=C.enable_eos_bos_chars)\n",
|
||||
"loader = DataLoader(dataset, batch_size=BATCH_SIZE, num_workers=4, collate_fn=dataset.collate_fn, shuffle=False, drop_last=False)"
|
||||
]
|
||||
},
|
||||
|
|
|
@ -111,8 +111,8 @@
|
|||
" speaker_id = None\n",
|
||||
"\n",
|
||||
"# if the vocabulary was passed, replace the default\n",
|
||||
"if 'text' in CONFIG.keys():\n",
|
||||
" symbols, phonemes = make_symbols(**CONFIG.text)\n",
|
||||
"if 'characters' in CONFIG.keys():\n",
|
||||
" symbols, phonemes = make_symbols(**CONFIG.characters)\n",
|
||||
"\n",
|
||||
"# load the model\n",
|
||||
"num_chars = len(phonemes) if CONFIG.use_phonemes else len(symbols)\n",
|
||||
|
|
|
@ -52,8 +52,8 @@ class Synthesizer(object):
|
|||
self.use_phonemes = self.tts_config.use_phonemes
|
||||
self.ap = AudioProcessor(**self.tts_config.audio)
|
||||
|
||||
if 'text' in self.tts_config.keys():
|
||||
symbols, phonemes = make_symbols(**self.tts_config.text)
|
||||
if 'characters' in self.tts_config.keys():
|
||||
symbols, phonemes = make_symbols(**self.tts_config.characters)
|
||||
|
||||
if self.use_phonemes:
|
||||
self.input_size = len(phonemes)
|
||||
|
|
|
@ -108,8 +108,8 @@ if __name__ == "__main__":
|
|||
ap = AudioProcessor(**C.audio)
|
||||
|
||||
# if the vocabulary was passed, replace the default
|
||||
if 'text' in C.keys():
|
||||
symbols, phonemes = make_symbols(**C.text)
|
||||
if 'characters' in C.keys():
|
||||
symbols, phonemes = make_symbols(**C.characters)
|
||||
|
||||
# load speakers
|
||||
if args.speakers_json != '':
|
||||
|
|
|
@ -15,8 +15,8 @@ class DemoServerTest(unittest.TestCase):
|
|||
# pylint: disable=global-statement
|
||||
global symbols, phonemes
|
||||
config = load_config(os.path.join(get_tests_output_path(), 'dummy_model_config.json'))
|
||||
if 'text' in config.keys():
|
||||
symbols, phonemes = make_symbols(**config.text)
|
||||
if 'characters' in config.keys():
|
||||
symbols, phonemes = make_symbols(**config.characters)
|
||||
|
||||
num_chars = len(phonemes) if config.use_phonemes else len(symbols)
|
||||
model = setup_model(num_chars, 0, config)
|
||||
|
|
|
@ -38,7 +38,7 @@ class TestTTSDataset(unittest.TestCase):
|
|||
c.text_cleaner,
|
||||
ap=self.ap,
|
||||
meta_data=items,
|
||||
tp=c.text if 'text' in c.keys() else None,
|
||||
tp=c.characters if 'characters' in c.keys() else None,
|
||||
batch_group_size=bgs,
|
||||
min_seq_len=c.min_seq_len,
|
||||
max_seq_len=float("inf"),
|
||||
|
|
6
train.py
6
train.py
|
@ -49,7 +49,7 @@ def setup_loader(ap, r, is_val=False, verbose=False):
|
|||
c.text_cleaner,
|
||||
meta_data=meta_data_eval if is_val else meta_data_train,
|
||||
ap=ap,
|
||||
tp=c.text if 'text' in c.keys() else None,
|
||||
tp=c.characters if 'characters' in c.keys() else None,
|
||||
batch_group_size=0 if is_val else c.batch_group_size *
|
||||
c.batch_size,
|
||||
min_seq_len=c.min_seq_len,
|
||||
|
@ -520,8 +520,8 @@ def main(args): # pylint: disable=redefined-outer-name
|
|||
global meta_data_train, meta_data_eval, symbols, phonemes
|
||||
# Audio processor
|
||||
ap = AudioProcessor(**c.audio)
|
||||
if 'text' in c.keys():
|
||||
symbols, phonemes = make_symbols(**c.text)
|
||||
if 'characters' in c.keys():
|
||||
symbols, phonemes = make_symbols(**c.characters)
|
||||
|
||||
# DISTRUBUTED
|
||||
if num_gpus > 1:
|
||||
|
|
|
@ -426,13 +426,13 @@ def check_config(c):
|
|||
_check_argument('griffin_lim_iters', c['audio'], restricted=True, val_type=int, min_val=10, max_val=1000)
|
||||
|
||||
# vocabulary parameters
|
||||
_check_argument('text', c, restricted=False, val_type=dict)
|
||||
_check_argument('pad', c['text'] if 'text' in c.keys() else {}, restricted='text' in c.keys(), val_type=str)
|
||||
_check_argument('eos', c['text'] if 'text' in c.keys() else {}, restricted='text' in c.keys(), val_type=str)
|
||||
_check_argument('bos', c['text'] if 'text' in c.keys() else {}, restricted='text' in c.keys(), val_type=str)
|
||||
_check_argument('characters', c['text'] if 'text' in c.keys() else {}, restricted='text' in c.keys(), val_type=str)
|
||||
_check_argument('phonemes', c['text'] if 'text' in c.keys() else {}, restricted='text' in c.keys(), val_type=str)
|
||||
_check_argument('punctuations', c['text'] if 'text' in c.keys() else {}, restricted='text' in c.keys(), val_type=str)
|
||||
_check_argument('characters', c, restricted=False, val_type=dict)
|
||||
_check_argument('pad', c['characters'] if 'characters' in c.keys() else {}, restricted='characters' in c.keys(), val_type=str)
|
||||
_check_argument('eos', c['characters'] if 'characters' in c.keys() else {}, restricted='characters' in c.keys(), val_type=str)
|
||||
_check_argument('bos', c['characters'] if 'characters' in c.keys() else {}, restricted='characters' in c.keys(), val_type=str)
|
||||
_check_argument('characters', c['characters'] if 'characters' in c.keys() else {}, restricted='characters' in c.keys(), val_type=str)
|
||||
_check_argument('phonemes', c['characters'] if 'characters' in c.keys() else {}, restricted='characters' in c.keys(), val_type=str)
|
||||
_check_argument('punctuations', c['characters'] if 'characters' in c.keys() else {}, restricted='characters' in c.keys(), val_type=str)
|
||||
|
||||
# normalization parameters
|
||||
_check_argument('signal_norm', c['audio'], restricted=True, val_type=bool)
|
||||
|
|
|
@ -10,10 +10,10 @@ def text_to_seqvec(text, CONFIG, use_cuda):
|
|||
seq = np.asarray(
|
||||
phoneme_to_sequence(text, text_cleaner, CONFIG.phoneme_language,
|
||||
CONFIG.enable_eos_bos_chars,
|
||||
tp=CONFIG.text if 'text' in CONFIG.keys() else None),
|
||||
tp=CONFIG.characters if 'characters' in CONFIG.keys() else None),
|
||||
dtype=np.int32)
|
||||
else:
|
||||
seq = np.asarray(text_to_sequence(text, text_cleaner, tp=CONFIG.text if 'text' in CONFIG.keys() else None), dtype=np.int32)
|
||||
seq = np.asarray(text_to_sequence(text, text_cleaner, tp=CONFIG.characters if 'characters' in CONFIG.keys() else None), dtype=np.int32)
|
||||
# torch tensor
|
||||
chars_var = torch.from_numpy(seq).unsqueeze(0)
|
||||
if use_cuda:
|
||||
|
|
|
@ -54,8 +54,8 @@ def visualize(alignment, spectrogram_postnet, stop_tokens, text, hop_length, CON
|
|||
plt.xlabel("Decoder timestamp", fontsize=label_fontsize)
|
||||
plt.ylabel("Encoder timestamp", fontsize=label_fontsize)
|
||||
if CONFIG.use_phonemes:
|
||||
seq = phoneme_to_sequence(text, [CONFIG.text_cleaner], CONFIG.phoneme_language, CONFIG.enable_eos_bos_chars, tp=CONFIG.text if 'text' in CONFIG.keys() else None)
|
||||
text = sequence_to_phoneme(seq, tp=CONFIG.text if 'text' in CONFIG.keys() else None)
|
||||
seq = phoneme_to_sequence(text, [CONFIG.text_cleaner], CONFIG.phoneme_language, CONFIG.enable_eos_bos_chars, tp=CONFIG.characters if 'characters' in CONFIG.keys() else None)
|
||||
text = sequence_to_phoneme(seq, tp=CONFIG.characters if 'characters' in CONFIG.keys() else None)
|
||||
print(text)
|
||||
|
||||
plt.yticks(range(len(text)), list(text))
|
||||
|
|
Loading…
Reference in New Issue