mirror of https://github.com/coqui-ai/TTS.git
rename text to characters in config.json
This commit is contained in:
parent
4e53896438
commit
36235c5e3f
|
@ -28,7 +28,7 @@
|
||||||
},
|
},
|
||||||
|
|
||||||
// VOCABULARY PARAMETERS
|
// VOCABULARY PARAMETERS
|
||||||
"text":{
|
"characters":{
|
||||||
"pad": "_",
|
"pad": "_",
|
||||||
"eos": "~",
|
"eos": "~",
|
||||||
"bos": "^",
|
"bos": "^",
|
||||||
|
|
|
@ -143,8 +143,8 @@
|
||||||
" speaker_id = None\n",
|
" speaker_id = None\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# if the vocabulary was passed, replace the default\n",
|
"# if the vocabulary was passed, replace the default\n",
|
||||||
"if 'text' in CONFIG.keys():\n",
|
"if 'characters' in CONFIG.keys():\n",
|
||||||
" symbols, phonemes = make_symbols(**CONFIG.text)\n",
|
" symbols, phonemes = make_symbols(**CONFIG.characters)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# load the model\n",
|
"# load the model\n",
|
||||||
"num_chars = len(phonemes) if CONFIG.use_phonemes else len(symbols)\n",
|
"num_chars = len(phonemes) if CONFIG.use_phonemes else len(symbols)\n",
|
||||||
|
|
|
@ -150,8 +150,8 @@
|
||||||
" speaker_id = None\n",
|
" speaker_id = None\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# if the vocabulary was passed, replace the default\n",
|
"# if the vocabulary was passed, replace the default\n",
|
||||||
"if 'text' in CONFIG.keys():\n",
|
"if 'characters' in CONFIG.keys():\n",
|
||||||
" symbols, phonemes = make_symbols(**CONFIG.text)\n",
|
" symbols, phonemes = make_symbols(**CONFIG.characters)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# load the model\n",
|
"# load the model\n",
|
||||||
"num_chars = len(phonemes) if CONFIG.use_phonemes else len(symbols)\n",
|
"num_chars = len(phonemes) if CONFIG.use_phonemes else len(symbols)\n",
|
||||||
|
|
|
@ -95,8 +95,8 @@
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# if the vocabulary was passed, replace the default\n",
|
"# if the vocabulary was passed, replace the default\n",
|
||||||
"if 'text' in C.keys():\n",
|
"if 'characters' in C.keys():\n",
|
||||||
" symbols, phonemes = make_symbols(**C.text)\n",
|
" symbols, phonemes = make_symbols(**C.characters)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# load the model\n",
|
"# load the model\n",
|
||||||
"num_chars = len(phonemes) if C.use_phonemes else len(symbols)\n",
|
"num_chars = len(phonemes) if C.use_phonemes else len(symbols)\n",
|
||||||
|
@ -120,7 +120,7 @@
|
||||||
"preprocessor = importlib.import_module('TTS.datasets.preprocess')\n",
|
"preprocessor = importlib.import_module('TTS.datasets.preprocess')\n",
|
||||||
"preprocessor = getattr(preprocessor, DATASET.lower())\n",
|
"preprocessor = getattr(preprocessor, DATASET.lower())\n",
|
||||||
"meta_data = preprocessor(DATA_PATH,METADATA_FILE)\n",
|
"meta_data = preprocessor(DATA_PATH,METADATA_FILE)\n",
|
||||||
"dataset = MyDataset(checkpoint['r'], C.text_cleaner, ap, meta_data,tp=C.text if 'text' in C.keys() else None, use_phonemes=C.use_phonemes, phoneme_cache_path=C.phoneme_cache_path, enable_eos_bos=C.enable_eos_bos_chars)\n",
|
"dataset = MyDataset(checkpoint['r'], C.text_cleaner, ap, meta_data,tp=C.characters if 'characters' in C.keys() else None, use_phonemes=C.use_phonemes, phoneme_cache_path=C.phoneme_cache_path, enable_eos_bos=C.enable_eos_bos_chars)\n",
|
||||||
"loader = DataLoader(dataset, batch_size=BATCH_SIZE, num_workers=4, collate_fn=dataset.collate_fn, shuffle=False, drop_last=False)"
|
"loader = DataLoader(dataset, batch_size=BATCH_SIZE, num_workers=4, collate_fn=dataset.collate_fn, shuffle=False, drop_last=False)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
|
@ -111,8 +111,8 @@
|
||||||
" speaker_id = None\n",
|
" speaker_id = None\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# if the vocabulary was passed, replace the default\n",
|
"# if the vocabulary was passed, replace the default\n",
|
||||||
"if 'text' in CONFIG.keys():\n",
|
"if 'characters' in CONFIG.keys():\n",
|
||||||
" symbols, phonemes = make_symbols(**CONFIG.text)\n",
|
" symbols, phonemes = make_symbols(**CONFIG.characters)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# load the model\n",
|
"# load the model\n",
|
||||||
"num_chars = len(phonemes) if CONFIG.use_phonemes else len(symbols)\n",
|
"num_chars = len(phonemes) if CONFIG.use_phonemes else len(symbols)\n",
|
||||||
|
|
|
@ -52,8 +52,8 @@ class Synthesizer(object):
|
||||||
self.use_phonemes = self.tts_config.use_phonemes
|
self.use_phonemes = self.tts_config.use_phonemes
|
||||||
self.ap = AudioProcessor(**self.tts_config.audio)
|
self.ap = AudioProcessor(**self.tts_config.audio)
|
||||||
|
|
||||||
if 'text' in self.tts_config.keys():
|
if 'characters' in self.tts_config.keys():
|
||||||
symbols, phonemes = make_symbols(**self.tts_config.text)
|
symbols, phonemes = make_symbols(**self.tts_config.characters)
|
||||||
|
|
||||||
if self.use_phonemes:
|
if self.use_phonemes:
|
||||||
self.input_size = len(phonemes)
|
self.input_size = len(phonemes)
|
||||||
|
|
|
@ -108,8 +108,8 @@ if __name__ == "__main__":
|
||||||
ap = AudioProcessor(**C.audio)
|
ap = AudioProcessor(**C.audio)
|
||||||
|
|
||||||
# if the vocabulary was passed, replace the default
|
# if the vocabulary was passed, replace the default
|
||||||
if 'text' in C.keys():
|
if 'characters' in C.keys():
|
||||||
symbols, phonemes = make_symbols(**C.text)
|
symbols, phonemes = make_symbols(**C.characters)
|
||||||
|
|
||||||
# load speakers
|
# load speakers
|
||||||
if args.speakers_json != '':
|
if args.speakers_json != '':
|
||||||
|
|
|
@ -15,8 +15,8 @@ class DemoServerTest(unittest.TestCase):
|
||||||
# pylint: disable=global-statement
|
# pylint: disable=global-statement
|
||||||
global symbols, phonemes
|
global symbols, phonemes
|
||||||
config = load_config(os.path.join(get_tests_output_path(), 'dummy_model_config.json'))
|
config = load_config(os.path.join(get_tests_output_path(), 'dummy_model_config.json'))
|
||||||
if 'text' in config.keys():
|
if 'characters' in config.keys():
|
||||||
symbols, phonemes = make_symbols(**config.text)
|
symbols, phonemes = make_symbols(**config.characters)
|
||||||
|
|
||||||
num_chars = len(phonemes) if config.use_phonemes else len(symbols)
|
num_chars = len(phonemes) if config.use_phonemes else len(symbols)
|
||||||
model = setup_model(num_chars, 0, config)
|
model = setup_model(num_chars, 0, config)
|
||||||
|
|
|
@ -38,7 +38,7 @@ class TestTTSDataset(unittest.TestCase):
|
||||||
c.text_cleaner,
|
c.text_cleaner,
|
||||||
ap=self.ap,
|
ap=self.ap,
|
||||||
meta_data=items,
|
meta_data=items,
|
||||||
tp=c.text if 'text' in c.keys() else None,
|
tp=c.characters if 'characters' in c.keys() else None,
|
||||||
batch_group_size=bgs,
|
batch_group_size=bgs,
|
||||||
min_seq_len=c.min_seq_len,
|
min_seq_len=c.min_seq_len,
|
||||||
max_seq_len=float("inf"),
|
max_seq_len=float("inf"),
|
||||||
|
|
6
train.py
6
train.py
|
@ -49,7 +49,7 @@ def setup_loader(ap, r, is_val=False, verbose=False):
|
||||||
c.text_cleaner,
|
c.text_cleaner,
|
||||||
meta_data=meta_data_eval if is_val else meta_data_train,
|
meta_data=meta_data_eval if is_val else meta_data_train,
|
||||||
ap=ap,
|
ap=ap,
|
||||||
tp=c.text if 'text' in c.keys() else None,
|
tp=c.characters if 'characters' in c.keys() else None,
|
||||||
batch_group_size=0 if is_val else c.batch_group_size *
|
batch_group_size=0 if is_val else c.batch_group_size *
|
||||||
c.batch_size,
|
c.batch_size,
|
||||||
min_seq_len=c.min_seq_len,
|
min_seq_len=c.min_seq_len,
|
||||||
|
@ -520,8 +520,8 @@ def main(args): # pylint: disable=redefined-outer-name
|
||||||
global meta_data_train, meta_data_eval, symbols, phonemes
|
global meta_data_train, meta_data_eval, symbols, phonemes
|
||||||
# Audio processor
|
# Audio processor
|
||||||
ap = AudioProcessor(**c.audio)
|
ap = AudioProcessor(**c.audio)
|
||||||
if 'text' in c.keys():
|
if 'characters' in c.keys():
|
||||||
symbols, phonemes = make_symbols(**c.text)
|
symbols, phonemes = make_symbols(**c.characters)
|
||||||
|
|
||||||
# DISTRUBUTED
|
# DISTRUBUTED
|
||||||
if num_gpus > 1:
|
if num_gpus > 1:
|
||||||
|
|
|
@ -426,13 +426,13 @@ def check_config(c):
|
||||||
_check_argument('griffin_lim_iters', c['audio'], restricted=True, val_type=int, min_val=10, max_val=1000)
|
_check_argument('griffin_lim_iters', c['audio'], restricted=True, val_type=int, min_val=10, max_val=1000)
|
||||||
|
|
||||||
# vocabulary parameters
|
# vocabulary parameters
|
||||||
_check_argument('text', c, restricted=False, val_type=dict)
|
_check_argument('characters', c, restricted=False, val_type=dict)
|
||||||
_check_argument('pad', c['text'] if 'text' in c.keys() else {}, restricted='text' in c.keys(), val_type=str)
|
_check_argument('pad', c['characters'] if 'characters' in c.keys() else {}, restricted='characters' in c.keys(), val_type=str)
|
||||||
_check_argument('eos', c['text'] if 'text' in c.keys() else {}, restricted='text' in c.keys(), val_type=str)
|
_check_argument('eos', c['characters'] if 'characters' in c.keys() else {}, restricted='characters' in c.keys(), val_type=str)
|
||||||
_check_argument('bos', c['text'] if 'text' in c.keys() else {}, restricted='text' in c.keys(), val_type=str)
|
_check_argument('bos', c['characters'] if 'characters' in c.keys() else {}, restricted='characters' in c.keys(), val_type=str)
|
||||||
_check_argument('characters', c['text'] if 'text' in c.keys() else {}, restricted='text' in c.keys(), val_type=str)
|
_check_argument('characters', c['characters'] if 'characters' in c.keys() else {}, restricted='characters' in c.keys(), val_type=str)
|
||||||
_check_argument('phonemes', c['text'] if 'text' in c.keys() else {}, restricted='text' in c.keys(), val_type=str)
|
_check_argument('phonemes', c['characters'] if 'characters' in c.keys() else {}, restricted='characters' in c.keys(), val_type=str)
|
||||||
_check_argument('punctuations', c['text'] if 'text' in c.keys() else {}, restricted='text' in c.keys(), val_type=str)
|
_check_argument('punctuations', c['characters'] if 'characters' in c.keys() else {}, restricted='characters' in c.keys(), val_type=str)
|
||||||
|
|
||||||
# normalization parameters
|
# normalization parameters
|
||||||
_check_argument('signal_norm', c['audio'], restricted=True, val_type=bool)
|
_check_argument('signal_norm', c['audio'], restricted=True, val_type=bool)
|
||||||
|
|
|
@ -10,10 +10,10 @@ def text_to_seqvec(text, CONFIG, use_cuda):
|
||||||
seq = np.asarray(
|
seq = np.asarray(
|
||||||
phoneme_to_sequence(text, text_cleaner, CONFIG.phoneme_language,
|
phoneme_to_sequence(text, text_cleaner, CONFIG.phoneme_language,
|
||||||
CONFIG.enable_eos_bos_chars,
|
CONFIG.enable_eos_bos_chars,
|
||||||
tp=CONFIG.text if 'text' in CONFIG.keys() else None),
|
tp=CONFIG.characters if 'characters' in CONFIG.keys() else None),
|
||||||
dtype=np.int32)
|
dtype=np.int32)
|
||||||
else:
|
else:
|
||||||
seq = np.asarray(text_to_sequence(text, text_cleaner, tp=CONFIG.text if 'text' in CONFIG.keys() else None), dtype=np.int32)
|
seq = np.asarray(text_to_sequence(text, text_cleaner, tp=CONFIG.characters if 'characters' in CONFIG.keys() else None), dtype=np.int32)
|
||||||
# torch tensor
|
# torch tensor
|
||||||
chars_var = torch.from_numpy(seq).unsqueeze(0)
|
chars_var = torch.from_numpy(seq).unsqueeze(0)
|
||||||
if use_cuda:
|
if use_cuda:
|
||||||
|
|
|
@ -54,8 +54,8 @@ def visualize(alignment, spectrogram_postnet, stop_tokens, text, hop_length, CON
|
||||||
plt.xlabel("Decoder timestamp", fontsize=label_fontsize)
|
plt.xlabel("Decoder timestamp", fontsize=label_fontsize)
|
||||||
plt.ylabel("Encoder timestamp", fontsize=label_fontsize)
|
plt.ylabel("Encoder timestamp", fontsize=label_fontsize)
|
||||||
if CONFIG.use_phonemes:
|
if CONFIG.use_phonemes:
|
||||||
seq = phoneme_to_sequence(text, [CONFIG.text_cleaner], CONFIG.phoneme_language, CONFIG.enable_eos_bos_chars, tp=CONFIG.text if 'text' in CONFIG.keys() else None)
|
seq = phoneme_to_sequence(text, [CONFIG.text_cleaner], CONFIG.phoneme_language, CONFIG.enable_eos_bos_chars, tp=CONFIG.characters if 'characters' in CONFIG.keys() else None)
|
||||||
text = sequence_to_phoneme(seq, tp=CONFIG.text if 'text' in CONFIG.keys() else None)
|
text = sequence_to_phoneme(seq, tp=CONFIG.characters if 'characters' in CONFIG.keys() else None)
|
||||||
print(text)
|
print(text)
|
||||||
|
|
||||||
plt.yticks(range(len(text)), list(text))
|
plt.yticks(range(len(text)), list(text))
|
||||||
|
|
Loading…
Reference in New Issue