From 0a3dba4279f87f788ddb63db7d3ad0a6cf0744aa Mon Sep 17 00:00:00 2001
From: Eren Golge <erogol@hotmail.com>
Date: Thu, 28 Feb 2019 14:27:36 +0100
Subject: [PATCH 1/3] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 56c81650..2e47f24d 100644
--- a/README.md
+++ b/README.md
@@ -140,7 +140,7 @@ Please feel free to offer new changes and pull things off. We are happy to discu
 - [x] Generate human-like speech on LJSpeech dataset.
 - [x] Generate human-like speech on a different dataset (Nancy) (TWEB).
 - [x] Train TTS with r=1 successfully.
-- [ ] Enable process based distributed training. Similar [to] (https://github.com/fastai/imagenet-fast/).
+- [x] Enable process based distributed training. Similar [to] (https://github.com/fastai/imagenet-fast/).
 - [ ] Adapting Neural Vocoder. The most active work is [here] (https://github.com/erogol/WaveRNN)
 - [ ] Multi-speaker embedding.
 

From 95de2cd5595a403bdd59565bcf6dc4b80b00dfa5 Mon Sep 17 00:00:00 2001
From: gnosly <fabriziogiovannetti@gmail.com>
Date: Mon, 11 Mar 2019 21:56:40 +0100
Subject: [PATCH 2/3] added missing phonemes, synthesizer.py now setup the
 correct input layer

---
 server/synthesizer.py  | 41 ++++++++++++++++++++++++-----------------
 tests/symbols_tests.py |  7 +++++++
 utils/text/symbols.py  | 21 +++++++++++----------
 3 files changed, 42 insertions(+), 27 deletions(-)
 create mode 100644 tests/symbols_tests.py

diff --git a/server/synthesizer.py b/server/synthesizer.py
index 5c88c309..89ceb703 100644
--- a/server/synthesizer.py
+++ b/server/synthesizer.py
@@ -1,16 +1,13 @@
 import io
 import os
-import librosa
-import torch
-import scipy
-import numpy as np
-import soundfile as sf
-from utils.text import text_to_sequence
-from utils.generic_utils import load_config
-from utils.audio import AudioProcessor
-from models.tacotron import Tacotron
-from matplotlib import pylab as plt
 
+import numpy as np
+import torch
+
+from models.tacotron import Tacotron
+from utils.audio import AudioProcessor
+from utils.generic_utils import load_config
+from utils.text import phoneme_to_sequence, phonemes, symbols, text_to_sequence
 
 class Synthesizer(object):
     def load_model(self, model_path, model_name, model_config, use_cuda):
@@ -22,14 +19,22 @@ class Synthesizer(object):
         config = load_config(model_config)
         self.config = config
         self.use_cuda = use_cuda
+        self.use_phonemes = config.use_phonemes
         self.ap = AudioProcessor(**config.audio)
-        self.model = Tacotron(config.embedding_size, self.ap.num_freq, self.ap.num_mels, config.r)
+
+        if self.use_phonemes:
+            self.input_size = len(phonemes)
+            self.input_adapter = lambda sen: phoneme_to_sequence(sen, [self.config.text_cleaner], self.config.phoneme_language)
+        else:
+            self.input_size = len(symbols)
+            self.input_adapter = lambda sen: text_to_sequence(sen, [self.config.text_cleaner])
+
+        self.model = Tacotron(self.input_size, config.embedding_size, self.ap.num_freq, self.ap.num_mels, config.r)
         # load model state
         if use_cuda:
             cp = torch.load(self.model_file)
         else:
-            cp = torch.load(
-                self.model_file, map_location=lambda storage, loc: storage)
+            cp = torch.load(self.model_file, map_location=lambda storage, loc: storage)
         # load the model
         self.model.load_state_dict(cp['model'])
         if use_cuda:
@@ -42,7 +47,6 @@ class Synthesizer(object):
         self.ap.save_wav(wav, path)
 
     def tts(self, text):
-        text_cleaner = [self.config.text_cleaner]
         wavs = []
         for sen in text.split('.'):
             if len(sen) < 3:
@@ -51,7 +55,9 @@ class Synthesizer(object):
             sen += '.'
             print(sen)
             sen = sen.strip()
-            seq = np.array(text_to_sequence(sen, text_cleaner))
+
+            seq = np.array(self.input_adapter(sen))
+
             chars_var = torch.from_numpy(seq).unsqueeze(0).long()
             if self.use_cuda:
                 chars_var = chars_var.cuda()
@@ -59,8 +65,9 @@ class Synthesizer(object):
                 chars_var)
             linear_out = linear_out[0].data.cpu().numpy()
             wav = self.ap.inv_spectrogram(linear_out.T)
-            out = io.BytesIO()
             wavs += list(wav)
             wavs += [0] * 10000
+
+        out = io.BytesIO()
         self.save_wav(wavs, out)
-        return out
\ No newline at end of file
+        return out
diff --git a/tests/symbols_tests.py b/tests/symbols_tests.py
new file mode 100644
index 00000000..68c909c5
--- /dev/null
+++ b/tests/symbols_tests.py
@@ -0,0 +1,7 @@
+import unittest
+
+from utils.text import phonemes
+
+class SymbolsTest(unittest.TestCase):
+    def test_uniqueness(self):
+        assert sorted(phonemes) == sorted(list(set(phonemes)))
diff --git a/utils/text/symbols.py b/utils/text/symbols.py
index a25f7c99..ccec2888 100644
--- a/utils/text/symbols.py
+++ b/utils/text/symbols.py
@@ -5,7 +5,6 @@ Defines the set of symbols used in text input to the model.
 The default is a set of ASCII characters that works well for English or text that has been run
 through Unidecode. For other data, you can modify _characters. See TRAINING_DATA.md for details.
 '''
-from utils.text import cmudict
 
 _pad = '_'
 _eos = '~'
@@ -13,22 +12,24 @@ _characters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!\'(),-.:;? '
 _punctuations = '!\'(),-.:;? '
 _phoneme_punctuations = '.!;:,?'
 
-# TODO: include more phoneme characters for other languages.
-_phonemes = ['l','ɹ','ɜ','ɚ','k','u','ʔ','ð','ɐ','ɾ','ɑ','ɔ','b','ɛ','t','v','n','m','ʊ','ŋ','s',
-             'ʌ','o','ʃ','i','p','æ','e','a','ʒ',' ','h','ɪ','ɡ','f','r','w','ɫ','ɬ','d','x','ː',
-             'ᵻ','ə','j','θ','z','ɒ']
-
-_phonemes = sorted(list(set(_phonemes)))
+# Phonemes definition
+_vowels = 'iyɨʉɯuɪʏʊeøɘəɵɤoɛœɜɞʌɔæɐaɶɑɒᵻ'
+_non_pulmonic_consonants = 'ʘɓǀɗǃʄǂɠǁʛ'
+_pulmonic_consonants = 'pbtdʈɖcɟkɡqɢʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟ'
+_suprasegmentals = 'ˈˌːˑ'
+_other_symbols = 'ʍwɥʜʢʡɕʑɺɧ'
+_diacrilics = 'ɚ˞ɫ'
+_phonemes = sorted(list(_vowels + _non_pulmonic_consonants + _pulmonic_consonants + _suprasegmentals + _other_symbols + _diacrilics))
 
 # Prepend "@" to ARPAbet symbols to ensure uniqueness (some are the same as uppercase letters):
 _arpabet = ['@' + s for s in _phonemes]
 
 # Export all symbols:
 symbols = [_pad, _eos] + list(_characters) + _arpabet
-phonemes = [_pad, _eos] + list(_phonemes) + list(_punctuations)
+phonemes = [_pad, _eos] + _phonemes + list(_punctuations)
 
 if __name__ == '__main__':
-    print(" > TTS symbols ")
+    print(" > TTS symbols {}".format(len(symbols)))
     print(symbols)
-    print(" > TTS phonemes ")
+    print(" > TTS phonemes {}".format(len(phonemes)))
     print(phonemes)

From 5acc9db4ac95bb014fa04fdeb473c6d8ad09fb23 Mon Sep 17 00:00:00 2001
From: Eren Golge <erogol@hotmail.com>
Date: Tue, 12 Mar 2019 10:16:42 +0100
Subject: [PATCH 3/3] Add empty character to phonemes

---
 utils/text/symbols.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/text/symbols.py b/utils/text/symbols.py
index ccec2888..f3e69073 100644
--- a/utils/text/symbols.py
+++ b/utils/text/symbols.py
@@ -17,7 +17,7 @@ _vowels = 'iyɨʉɯuɪʏʊeøɘəɵɤoɛœɜɞʌɔæɐaɶɑɒᵻ'
 _non_pulmonic_consonants = 'ʘɓǀɗǃʄǂɠǁʛ'
 _pulmonic_consonants = 'pbtdʈɖcɟkɡqɢʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟ'
 _suprasegmentals = 'ˈˌːˑ'
-_other_symbols = 'ʍwɥʜʢʡɕʑɺɧ'
+_other_symbols = 'ʍwɥʜʢʡɕʑɺɧ '
 _diacrilics = 'ɚ˞ɫ'
 _phonemes = sorted(list(_vowels + _non_pulmonic_consonants + _pulmonic_consonants + _suprasegmentals + _other_symbols + _diacrilics))