From 36235c5e3fc0f47c56253a99941fc769d744469d Mon Sep 17 00:00:00 2001 From: Edresson Casanova Date: Tue, 3 Mar 2020 09:17:56 -0300 Subject: [PATCH] rename text to characters in config.json --- config.json | 2 +- notebooks/Benchmark-PWGAN.ipynb | 4 ++-- notebooks/Benchmark.ipynb | 4 ++-- notebooks/ExtractTTSpectrogram.ipynb | 6 +++--- notebooks/TestAttention.ipynb | 4 ++-- server/synthesizer.py | 4 ++-- synthesize.py | 4 ++-- tests/test_demo_server.py | 4 ++-- tests/test_loader.py | 2 +- train.py | 6 +++--- utils/generic_utils.py | 14 +++++++------- utils/synthesis.py | 4 ++-- utils/visual.py | 4 ++-- 13 files changed, 31 insertions(+), 31 deletions(-) diff --git a/config.json b/config.json index 2a7c4551..3722de9d 100644 --- a/config.json +++ b/config.json @@ -28,7 +28,7 @@ }, // VOCABULARY PARAMETERS - "text":{ + "characters":{ "pad": "_", "eos": "~", "bos": "^", diff --git a/notebooks/Benchmark-PWGAN.ipynb b/notebooks/Benchmark-PWGAN.ipynb index 19a1a79c..840da10e 100644 --- a/notebooks/Benchmark-PWGAN.ipynb +++ b/notebooks/Benchmark-PWGAN.ipynb @@ -143,8 +143,8 @@ " speaker_id = None\n", "\n", "# if the vocabulary was passed, replace the default\n", - "if 'text' in CONFIG.keys():\n", - " symbols, phonemes = make_symbols(**CONFIG.text)\n", + "if 'characters' in CONFIG.keys():\n", + " symbols, phonemes = make_symbols(**CONFIG.characters)\n", "\n", "# load the model\n", "num_chars = len(phonemes) if CONFIG.use_phonemes else len(symbols)\n", diff --git a/notebooks/Benchmark.ipynb b/notebooks/Benchmark.ipynb index bf6f2774..7d3a45cf 100644 --- a/notebooks/Benchmark.ipynb +++ b/notebooks/Benchmark.ipynb @@ -150,8 +150,8 @@ " speaker_id = None\n", "\n", "# if the vocabulary was passed, replace the default\n", - "if 'text' in CONFIG.keys():\n", - " symbols, phonemes = make_symbols(**CONFIG.text)\n", + "if 'characters' in CONFIG.keys():\n", + " symbols, phonemes = make_symbols(**CONFIG.characters)\n", "\n", "# load the model\n", "num_chars = len(phonemes) if CONFIG.use_phonemes else len(symbols)\n", diff --git a/notebooks/ExtractTTSpectrogram.ipynb b/notebooks/ExtractTTSpectrogram.ipynb index 2313e47e..b5a88611 100644 --- a/notebooks/ExtractTTSpectrogram.ipynb +++ b/notebooks/ExtractTTSpectrogram.ipynb @@ -95,8 +95,8 @@ "outputs": [], "source": [ "# if the vocabulary was passed, replace the default\n", - "if 'text' in C.keys():\n", - " symbols, phonemes = make_symbols(**C.text)\n", + "if 'characters' in C.keys():\n", + " symbols, phonemes = make_symbols(**C.characters)\n", "\n", "# load the model\n", "num_chars = len(phonemes) if C.use_phonemes else len(symbols)\n", @@ -120,7 +120,7 @@ "preprocessor = importlib.import_module('TTS.datasets.preprocess')\n", "preprocessor = getattr(preprocessor, DATASET.lower())\n", "meta_data = preprocessor(DATA_PATH,METADATA_FILE)\n", - "dataset = MyDataset(checkpoint['r'], C.text_cleaner, ap, meta_data,tp=C.text if 'text' in C.keys() else None, use_phonemes=C.use_phonemes, phoneme_cache_path=C.phoneme_cache_path, enable_eos_bos=C.enable_eos_bos_chars)\n", + "dataset = MyDataset(checkpoint['r'], C.text_cleaner, ap, meta_data,tp=C.characters if 'characters' in C.keys() else None, use_phonemes=C.use_phonemes, phoneme_cache_path=C.phoneme_cache_path, enable_eos_bos=C.enable_eos_bos_chars)\n", "loader = DataLoader(dataset, batch_size=BATCH_SIZE, num_workers=4, collate_fn=dataset.collate_fn, shuffle=False, drop_last=False)" ] }, diff --git a/notebooks/TestAttention.ipynb b/notebooks/TestAttention.ipynb index b0599d80..9d3e5e75 100644 --- a/notebooks/TestAttention.ipynb +++ b/notebooks/TestAttention.ipynb @@ -111,8 +111,8 @@ " speaker_id = None\n", "\n", "# if the vocabulary was passed, replace the default\n", - "if 'text' in CONFIG.keys():\n", - " symbols, phonemes = make_symbols(**CONFIG.text)\n", + "if 'characters' in CONFIG.keys():\n", + " symbols, phonemes = make_symbols(**CONFIG.characters)\n", "\n", "# load the model\n", "num_chars = len(phonemes) if CONFIG.use_phonemes else len(symbols)\n", diff --git a/server/synthesizer.py b/server/synthesizer.py index f0921513..f73b73fc 100644 --- a/server/synthesizer.py +++ b/server/synthesizer.py @@ -52,8 +52,8 @@ class Synthesizer(object): self.use_phonemes = self.tts_config.use_phonemes self.ap = AudioProcessor(**self.tts_config.audio) - if 'text' in self.tts_config.keys(): - symbols, phonemes = make_symbols(**self.tts_config.text) + if 'characters' in self.tts_config.keys(): + symbols, phonemes = make_symbols(**self.tts_config.characters) if self.use_phonemes: self.input_size = len(phonemes) diff --git a/synthesize.py b/synthesize.py index 6f3a235f..1f1ce36f 100644 --- a/synthesize.py +++ b/synthesize.py @@ -108,8 +108,8 @@ if __name__ == "__main__": ap = AudioProcessor(**C.audio) # if the vocabulary was passed, replace the default - if 'text' in C.keys(): - symbols, phonemes = make_symbols(**C.text) + if 'characters' in C.keys(): + symbols, phonemes = make_symbols(**C.characters) # load speakers if args.speakers_json != '': diff --git a/tests/test_demo_server.py b/tests/test_demo_server.py index 36848942..a0837686 100644 --- a/tests/test_demo_server.py +++ b/tests/test_demo_server.py @@ -15,8 +15,8 @@ class DemoServerTest(unittest.TestCase): # pylint: disable=global-statement global symbols, phonemes config = load_config(os.path.join(get_tests_output_path(), 'dummy_model_config.json')) - if 'text' in config.keys(): - symbols, phonemes = make_symbols(**config.text) + if 'characters' in config.keys(): + symbols, phonemes = make_symbols(**config.characters) num_chars = len(phonemes) if config.use_phonemes else len(symbols) model = setup_model(num_chars, 0, config) diff --git a/tests/test_loader.py b/tests/test_loader.py index eb23ed19..d835c5d3 100644 --- a/tests/test_loader.py +++ b/tests/test_loader.py @@ -38,7 +38,7 @@ class TestTTSDataset(unittest.TestCase): c.text_cleaner, ap=self.ap, meta_data=items, - tp=c.text if 'text' in c.keys() else None, + tp=c.characters if 'characters' in c.keys() else None, batch_group_size=bgs, min_seq_len=c.min_seq_len, max_seq_len=float("inf"), diff --git a/train.py b/train.py index bf5429e9..4bb22a34 100644 --- a/train.py +++ b/train.py @@ -49,7 +49,7 @@ def setup_loader(ap, r, is_val=False, verbose=False): c.text_cleaner, meta_data=meta_data_eval if is_val else meta_data_train, ap=ap, - tp=c.text if 'text' in c.keys() else None, + tp=c.characters if 'characters' in c.keys() else None, batch_group_size=0 if is_val else c.batch_group_size * c.batch_size, min_seq_len=c.min_seq_len, @@ -520,8 +520,8 @@ def main(args): # pylint: disable=redefined-outer-name global meta_data_train, meta_data_eval, symbols, phonemes # Audio processor ap = AudioProcessor(**c.audio) - if 'text' in c.keys(): - symbols, phonemes = make_symbols(**c.text) + if 'characters' in c.keys(): + symbols, phonemes = make_symbols(**c.characters) # DISTRUBUTED if num_gpus > 1: diff --git a/utils/generic_utils.py b/utils/generic_utils.py index 7c2f033a..cf0a05b4 100644 --- a/utils/generic_utils.py +++ b/utils/generic_utils.py @@ -426,13 +426,13 @@ def check_config(c): _check_argument('griffin_lim_iters', c['audio'], restricted=True, val_type=int, min_val=10, max_val=1000) # vocabulary parameters - _check_argument('text', c, restricted=False, val_type=dict) - _check_argument('pad', c['text'] if 'text' in c.keys() else {}, restricted='text' in c.keys(), val_type=str) - _check_argument('eos', c['text'] if 'text' in c.keys() else {}, restricted='text' in c.keys(), val_type=str) - _check_argument('bos', c['text'] if 'text' in c.keys() else {}, restricted='text' in c.keys(), val_type=str) - _check_argument('characters', c['text'] if 'text' in c.keys() else {}, restricted='text' in c.keys(), val_type=str) - _check_argument('phonemes', c['text'] if 'text' in c.keys() else {}, restricted='text' in c.keys(), val_type=str) - _check_argument('punctuations', c['text'] if 'text' in c.keys() else {}, restricted='text' in c.keys(), val_type=str) + _check_argument('characters', c, restricted=False, val_type=dict) + _check_argument('pad', c['characters'] if 'characters' in c.keys() else {}, restricted='characters' in c.keys(), val_type=str) + _check_argument('eos', c['characters'] if 'characters' in c.keys() else {}, restricted='characters' in c.keys(), val_type=str) + _check_argument('bos', c['characters'] if 'characters' in c.keys() else {}, restricted='characters' in c.keys(), val_type=str) + _check_argument('characters', c['characters'] if 'characters' in c.keys() else {}, restricted='characters' in c.keys(), val_type=str) + _check_argument('phonemes', c['characters'] if 'characters' in c.keys() else {}, restricted='characters' in c.keys(), val_type=str) + _check_argument('punctuations', c['characters'] if 'characters' in c.keys() else {}, restricted='characters' in c.keys(), val_type=str) # normalization parameters _check_argument('signal_norm', c['audio'], restricted=True, val_type=bool) diff --git a/utils/synthesis.py b/utils/synthesis.py index c5ff2e70..42f0408c 100644 --- a/utils/synthesis.py +++ b/utils/synthesis.py @@ -10,10 +10,10 @@ def text_to_seqvec(text, CONFIG, use_cuda): seq = np.asarray( phoneme_to_sequence(text, text_cleaner, CONFIG.phoneme_language, CONFIG.enable_eos_bos_chars, - tp=CONFIG.text if 'text' in CONFIG.keys() else None), + tp=CONFIG.characters if 'characters' in CONFIG.keys() else None), dtype=np.int32) else: - seq = np.asarray(text_to_sequence(text, text_cleaner, tp=CONFIG.text if 'text' in CONFIG.keys() else None), dtype=np.int32) + seq = np.asarray(text_to_sequence(text, text_cleaner, tp=CONFIG.characters if 'characters' in CONFIG.keys() else None), dtype=np.int32) # torch tensor chars_var = torch.from_numpy(seq).unsqueeze(0) if use_cuda: diff --git a/utils/visual.py b/utils/visual.py index 3b24364c..1cb9ac5d 100644 --- a/utils/visual.py +++ b/utils/visual.py @@ -54,8 +54,8 @@ def visualize(alignment, spectrogram_postnet, stop_tokens, text, hop_length, CON plt.xlabel("Decoder timestamp", fontsize=label_fontsize) plt.ylabel("Encoder timestamp", fontsize=label_fontsize) if CONFIG.use_phonemes: - seq = phoneme_to_sequence(text, [CONFIG.text_cleaner], CONFIG.phoneme_language, CONFIG.enable_eos_bos_chars, tp=CONFIG.text if 'text' in CONFIG.keys() else None) - text = sequence_to_phoneme(seq, tp=CONFIG.text if 'text' in CONFIG.keys() else None) + seq = phoneme_to_sequence(text, [CONFIG.text_cleaner], CONFIG.phoneme_language, CONFIG.enable_eos_bos_chars, tp=CONFIG.characters if 'characters' in CONFIG.keys() else None) + text = sequence_to_phoneme(seq, tp=CONFIG.characters if 'characters' in CONFIG.keys() else None) print(text) plt.yticks(range(len(text)), list(text))