Soem bug fixes

2019-01-17 15:48:22 +01:00 · 2019-01-17 15:48:22 +01:00 · 562d73d3d1
parent 4431e04b48
commit 562d73d3d1
2 changed files with 5 additions and 5 deletions
--- a/config.json
+++ b/config.json
@ -10,9 +10,9 @@
        "sample_rate": 16000,   // wav sample-rate. If different than the original data, it is resampled.
        "frame_length_ms": 50,  // stft window length in ms.
        "frame_shift_ms": 12.5, // stft window hop-lengh in ms.
-        "preemphasis": 0.97,    // pre-emphasis to reduce spec noise and make it more structured. If 0.0, no -pre-emphasis.
+        "preemphasis": 0.98,    // pre-emphasis to reduce spec noise and make it more structured. If 0.0, no -pre-emphasis.
        "min_level_db": -100,   // normalization range
-        "ref_level_db": 20,     // reference level db, theoretically 20db is the sound of air.
+        "ref_level_db": 40,     // reference level db, theoretically 20db is the sound of air.
        "power": 1.5,           // value to sharpen wav signals after GL algorithm.
        "griffin_lim_iters": 60,// #griffin-lim iterations. 30-60 is a good range. Larger the value, slower the generation.
        // Normalization parameters
@ -52,7 +52,7 @@
    "output_path": "/media/erogol/data_ssd/Data/models/en_UK/",      // DATASET-RELATED: output path for all training outputs.
    "num_loader_workers": 8,        // number of training data loader processes. Don't set it too big. 4-8 are good values.
    "num_val_loader_workers": 4,    // number of evaluation data loader processes.
-    "phoneme_cache_path": "tmp_en_uk",  // phoneme computation is slow, therefore, it caches results in the given folder.
+    "phoneme_cache_path": "tmp_phonemes_gb",  // phoneme computation is slow, therefore, it caches results in the given folder.
    "use_phonemes": true,           // use phonemes instead of raw characters. It is suggested for better pronounciation.
-    "phoneme_language": "en-us"     // depending on your target language, pick one from  https://github.com/bootphon/phonemizer#languages
+    "phoneme_language": "en-gb"     // depending on your target language, pick one from  https://github.com/bootphon/phonemizer#languages
 }
--- a/layers/tacotron.py
+++ b/layers/tacotron.py
@ -413,7 +413,7 @@ class Decoder(nn.Module):
            for idx in range(len(self.decoder_rnns)):
                decoder_rnn_hiddens[idx] = self.decoder_rnns[idx](
                    decoder_input, decoder_rnn_hiddens[idx])
-                # Residual connectinon
+                # Residual connection
                decoder_input = decoder_rnn_hiddens[idx] + decoder_input
            decoder_output = decoder_input
            del decoder_input