From d1a7ad545de7115ffabd6124585debf25cbb396d Mon Sep 17 00:00:00 2001
From: Thomas Werkmeister <thomas@twerkmeister.com>
Date: Wed, 24 Apr 2019 11:58:55 +0200
Subject: [PATCH 1/4] common voice preprocessor and tests, small refactoring
 within tests

---
 datasets/preprocess.py        | 21 +++++++++++++++++++--
 tests/__init__.py             | 16 ++++++++++++++++
 tests/audio_tests.py          | 20 +++++++++++---------
 tests/inputs/common_voice.tsv | 10 ++++++++++
 tests/preprocess_tests.py     | 28 ++++++++++++++++++++++++++++
 5 files changed, 84 insertions(+), 11 deletions(-)
 create mode 100644 tests/inputs/common_voice.tsv
 create mode 100644 tests/preprocess_tests.py

diff --git a/datasets/preprocess.py b/datasets/preprocess.py
index c1b8469a..e4651306 100644
--- a/datasets/preprocess.py
+++ b/datasets/preprocess.py
@@ -1,6 +1,7 @@
 import os
 import random
 
+
 def tts_cache(root_path, meta_file):
     """This format is set for the meta-file generated by extract_features.py"""
     txt_file = os.path.join(root_path, meta_file)
@@ -109,7 +110,23 @@ def nancy(root_path, meta_file):
         for line in ttf:
             id = line.split()[1]
             text = line[line.find('"')+1:line.rfind('"')-1]
-            wav_file = root_path + 'wavn/' + id + '.wav'
+            wav_file = os.path.join(root_path, "wavn", id + ".wav")
             items.append([text, wav_file])
     random.shuffle(items)    
-    return items
\ No newline at end of file
+    return items
+
+
+def common_voice(root_path, meta_file):
+    """Normalize the common voice meta data file to TTS format."""
+    txt_file = os.path.join(root_path, meta_file)
+    items = []
+    with open(txt_file, 'r') as ttf:
+        for line in ttf:
+            if line.startswith("client_id"):
+                continue
+            cols = line.split("\t")
+            text = cols[2]
+            # Files need to be first converted to wav...
+            wav_file = os.path.join(root_path, "clips", cols[1] + ".wav")
+            items.append([text, wav_file])
+    return items
diff --git a/tests/__init__.py b/tests/__init__.py
index e69de29b..487a5519 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -0,0 +1,16 @@
+import os
+
+
+def get_tests_path():
+    """Returns the path to the test directory."""
+    return os.path.dirname(os.path.realpath(__file__))
+
+
+def get_tests_input_path():
+    """Returns the path to the test data directory."""
+    return os.path.join(get_tests_path(), "inputs")
+
+
+def get_tests_output_path():
+    """Returns the path to the directory for test outputs."""
+    return os.path.join(get_tests_path(), "outputs")
diff --git a/tests/audio_tests.py b/tests/audio_tests.py
index 8c432d2f..4021a284 100644
--- a/tests/audio_tests.py
+++ b/tests/audio_tests.py
@@ -2,21 +2,23 @@ import os
 import unittest
 import numpy as np
 import torch as T
+
+from tests import get_tests_path, get_tests_input_path, get_tests_output_path
 from utils.audio import AudioProcessor
 from utils.generic_utils import load_config
 
-file_path = os.path.dirname(os.path.realpath(__file__))
-INPUTPATH = os.path.join(file_path, 'inputs')
-OUTPATH = os.path.join(file_path, "outputs/audio_tests")
-os.makedirs(OUTPATH, exist_ok=True)
+TESTS_PATH = get_tests_path()
+OUT_PATH = os.path.join(get_tests_output_path(), "audio_tests")
+WAV_FILE = os.path.join(get_tests_input_path(), "example_1.wav")
 
-c = load_config(os.path.join(file_path, 'test_config.json'))
+os.makedirs(OUT_PATH, exist_ok=True)
+conf = load_config(os.path.join(TESTS_PATH, 'test_config.json'))
 
 
 class TestAudio(unittest.TestCase):
     def __init__(self, *args, **kwargs):
         super(TestAudio, self).__init__(*args, **kwargs)
-        self.ap = AudioProcessor(**c.audio)
+        self.ap = AudioProcessor(**conf.audio)
 
     def test_audio_synthesis(self):
         """ 1. load wav
@@ -31,13 +33,13 @@ class TestAudio(unittest.TestCase):
             self.ap.signal_norm = signal_norm
             self.ap.symmetric_norm = symmetric_norm
             self.ap.clip_norm = clip_norm
-            wav = self.ap.load_wav(INPUTPATH + "/example_1.wav")
+            wav = self.ap.load_wav(WAV_FILE)
             mel = self.ap.melspectrogram(wav)
             wav_ = self.ap.inv_mel_spectrogram(mel)
             file_name = "/audio_test-melspec_max_norm_{}-signal_norm_{}-symmetric_{}-clip_norm_{}.wav"\
                 .format(max_norm, signal_norm, symmetric_norm, clip_norm)
             print(" | > Creating wav file at : ", file_name)
-            self.ap.save_wav(wav_, OUTPATH + file_name)
+            self.ap.save_wav(wav_, OUT_PATH + file_name)
 
         # maxnorm = 1.0
         _test(1., False, False, False)
@@ -55,7 +57,7 @@ class TestAudio(unittest.TestCase):
     def test_normalize(self):
         """Check normalization and denormalization for range values and consistency """
         print(" > Testing normalization and denormalization.")
-        wav = self.ap.load_wav(INPUTPATH + "/example_1.wav")
+        wav = self.ap.load_wav(WAV_FILE)
         self.ap.signal_norm = False
         x = self.ap.melspectrogram(wav)
         x_old = x
diff --git a/tests/inputs/common_voice.tsv b/tests/inputs/common_voice.tsv
new file mode 100644
index 00000000..a6ea30dd
--- /dev/null
+++ b/tests/inputs/common_voice.tsv
@@ -0,0 +1,10 @@
+client_id	path	sentence	up_votes	down_votes	age	gender	accent
+aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3	21fce545b24d9a5af0403b949e95e8dd3c10c4ff3e371f14e4d5b4ebf588670b7c9e618285fc872d94a89ed7f0217d9019fe5de33f1577b49dcd518eacf63c4b	Man sollte den Länderfinanzausgleich durch einen Bundesliga-Soli ersetzen.	2	0	fourties	male	germany
+aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3	42758baa4e91ef6b82b78b11a04bc5117a035a8d3bc42c33c0bb3084909af17043a194cfd8cd9839f0d6ef1ea5413acda5de5d1936abcc8ca073e2da7f9488ea	Folgende Lektüre kann ich Ihnen zum Thema Kognitionspsychologie empfehlen.	2	0	fourties	male	germany
+aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3	478f172c2dbda6675247e9674ade79a5b49efeefb7c9e99040dcc69a847a01d69398cf180570859b0cdb6fc887717e04cd8b149c723d48d00b5d18f41314667c	Touristen winkten den Leuten am Ufer zu.	2	0	fourties	male	germany
+aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3	4854368d6d21cb44103e432b5332f31e8d14030582a40850501bcf9377d699314a5ff27a8206fa89254ddde7f3f1c65d33836f3dfcfa16bcabec08537f2b5f08	Valentin hat das Handtuch geworfen.	2	0	fourties	male	germany
+aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3	a841a9f3e032495dd47560e65fba99eeacb3618c07de8b1351c20188e5b71e33cc52f73315f721a3a24b65763c65bb52fbf3ae052eb5774e834dcb57f296db5c	Ohne Gehörschutz bei der Arbeit wäre Klaus wohl nach zwei Wochen taub.	2	0	fourties	male	germany
+aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3	03ab970a5bf5410bc3260b073cce1c7f49c688ace83dc8836b1c0f79a09fea45a27725c769f4a9d2e6181defd016d22642789d7ac51da252b42958a9192bd4c7	Gerrit erinnerte sich daran, dass er einst einen Eid geschworen hatte.	2	0	fourties	male	germany
+aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3	c4a94df443ad5f2c7241413ef7145d5f0de41ae929759073917fe96166da3c7d3a612c920ed7b0f3d5950a38d6205e9dba24af5bfb27e390a220d004e6e26744	Auf das, was jetzt kommt, habe ich nämlich absolut keinen Bock.	2	0	fourties	male	germany
+aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3	104695983b1112229b4a48696405d044dad9ddef713aa6eb1a6240cc16b7b7a2a96354ae9da99783850dde08a982091e48d3037288a3a58269cac9fe70a6bd7a	Von Salzburg ist es doch nicht weit bis zum Chiemsee.	2	0	fourties	male	germany
+d5b5da343bb0f65e3580bc2e1902a4f5d004241488d751503f2020bc1c93f89715e355e35f6e25def2b90cb3eea99fda403eb92ae3afbb84d039a54a4ed2d875	ad2f69e053b0e20e01c82b9821fe5787f1cc8e4b0b97f0e4cab1e9a652c577169c8244fb222281a60ee3081854014113e04c4ca43643100b7c01dab0fac11974	Warum werden da keine strafrechtlichen Konsequenzen gezogen?	2	0	thirties	male	germany
diff --git a/tests/preprocess_tests.py b/tests/preprocess_tests.py
new file mode 100644
index 00000000..6f4b6df1
--- /dev/null
+++ b/tests/preprocess_tests.py
@@ -0,0 +1,28 @@
+import unittest
+import os
+from tests import get_tests_input_path
+
+from datasets.preprocess import common_voice
+
+
+class TestPreprocessors(unittest.TestCase):
+
+    def test_common_voice_preprocessor(self):
+        root_path = get_tests_input_path()
+        meta_file = "common_voice.tsv"
+        items = common_voice(root_path, meta_file)
+        assert items[0][0] == "Man sollte den Länderfinanzausgleich durch " \
+                              "einen Bundesliga-Soli ersetzen."
+        assert items[0][1] == os.path.join(get_tests_input_path(), "clips",
+                                           "21fce545b24d9a5af0403b949e95e8dd3"
+                                           "c10c4ff3e371f14e4d5b4ebf588670b7c"
+                                           "9e618285fc872d94a89ed7f0217d9019f"
+                                           "e5de33f1577b49dcd518eacf63c4b.wav")
+
+        assert items[-1][0] == "Warum werden da keine strafrechtlichen " \
+                               "Konsequenzen gezogen?"
+        assert items[-1][1] == os.path.join(get_tests_input_path(), "clips",
+                                            "ad2f69e053b0e20e01c82b9821fe5787f1"
+                                            "cc8e4b0b97f0e4cab1e9a652c577169c82"
+                                            "44fb222281a60ee3081854014113e04c4c"
+                                            "a43643100b7c01dab0fac11974.wav")

From a9f6c400828c88b48046a5d9e60b5ac6d20afecb Mon Sep 17 00:00:00 2001
From: Thomas Werkmeister <thomas@twerkmeister.com>
Date: Wed, 24 Apr 2019 11:58:10 +0200
Subject: [PATCH 2/4] removed duplicate lws requirement

---
 requirements.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 82dfab04..2e145a8f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,4 @@
 numpy==1.14.3
-lws
 torch>=0.4.1
 librosa==0.5.1
 Unidecode==0.4.20

From 41e3e42989e680a184e2b56f64ed33cad9ca2fb7 Mon Sep 17 00:00:00 2001
From: Thomas Werkmeister <thomas@twerkmeister.com>
Date: Wed, 24 Apr 2019 11:57:50 +0200
Subject: [PATCH 3/4] added .idea to gitignore for pycharm users

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index bfc8ea1a..b0fe0bee 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+.idea/
 *.pyc
 .DS_Store
 ./__init__.py

From f279fe9e8b7a291fec7d27feb94ca27d26574ed4 Mon Sep 17 00:00:00 2001
From: Thomas Werkmeister <thomas@twerkmeister.com>
Date: Thu, 25 Apr 2019 11:46:11 +0200
Subject: [PATCH 4/4] removed shuffling of data in the preprocessor, uniform
 indentation

---
 datasets/preprocess.py | 97 ++++++++++++++++++++----------------------
 1 file changed, 46 insertions(+), 51 deletions(-)

diff --git a/datasets/preprocess.py b/datasets/preprocess.py
index e4651306..43b13359 100644
--- a/datasets/preprocess.py
+++ b/datasets/preprocess.py
@@ -1,5 +1,4 @@
 import os
-import random
 
 
 def tts_cache(root_path, meta_file):
@@ -9,9 +8,9 @@ def tts_cache(root_path, meta_file):
     with open(txt_file, 'r', encoding='utf8') as f:
         for line in f:
             cols = line.split('| ')
-            items.append(cols)  # text, wav_full_path, mel_name, linear_name, wav_len, mel_len
-    random.shuffle(items)
-    return items            
+            # text, wav_full_path, mel_name, linear_name, wav_len, mel_len
+            items.append(cols)
+    return items
 
 
 def tweb(root_path, meta_file):
@@ -23,12 +22,11 @@ def tweb(root_path, meta_file):
     with open(txt_file, 'r') as ttf:
         for line in ttf:
             cols = line.split('\t')
-            wav_file = os.path.join(root_path, cols[0]+'.wav')
+            wav_file = os.path.join(root_path, cols[0] + '.wav')
             text = cols[1]
             items.append([text, wav_file])
-    random.shuffle(items)
     return items
-    
+
 
 # def kusal(root_path, meta_file):
 #     txt_file = os.path.join(root_path, meta_file)
@@ -44,48 +42,48 @@ def tweb(root_path, meta_file):
 
 
 def mozilla(root_path, meta_files):
-        """Normalizes Mozilla meta data files to TTS format"""
-        import glob
-        meta_files = glob.glob(root_path + "/**/batch*.txt", recursive=True)
-        folders = [os.path.dirname(f.strip()) for f in meta_files]
-        items = []
-        for idx, meta_file in enumerate(meta_files):
-                folder = folders[idx]
-                txt_file = os.path.join(root_path, meta_file)
-                with open(txt_file, 'r') as ttf:
-                        for line in ttf:
-                                cols = line.split('|')
-                                wav_file = os.path.join(root_path, folder, 'wavs_no_processing', cols[1].strip())
-                                if os.path.isfile(wav_file):
-                                        text = cols[0].strip()
-                                        items.append([text, wav_file])
-                                else: 
-                                        print(" > Error: {}".format(cols))
-                                        continue
-        random.shuffle(items)
-        return items
+    """Normalizes Mozilla meta data files to TTS format"""
+    import glob
+    meta_files = glob.glob(root_path + "/**/batch*.txt", recursive=True)
+    folders = [os.path.dirname(f.strip()) for f in meta_files]
+    items = []
+    for idx, meta_file in enumerate(meta_files):
+        folder = folders[idx]
+        txt_file = os.path.join(root_path, meta_file)
+        with open(txt_file, 'r') as ttf:
+            for line in ttf:
+                cols = line.split('|')
+                wav_file = os.path.join(root_path, folder, 'wavs_no_processing',
+                                        cols[1].strip())
+                if os.path.isfile(wav_file):
+                    text = cols[0].strip()
+                    items.append([text, wav_file])
+                else:
+                    print(" > Error: {}".format(cols))
+                    continue
+    return items
 
 
 def mailabs(root_path, meta_files):
-        """Normalizes M-AI-Labs meta data files to TTS format"""
-        folders = [os.path.dirname(f.strip()) for f in meta_files.split(",")]
-        meta_files = [f.strip() for f in meta_files.split(",")]
-        items = []
-        for idx, meta_file in enumerate(meta_files):
-                print(" | > {}".format(meta_file))
-                folder = folders[idx]
-                txt_file = os.path.join(root_path, meta_file)
-                with open(txt_file, 'r') as ttf:
-                        for line in ttf:
-                                cols = line.split('|')
-                                wav_file = os.path.join(root_path, folder, 'wavs', cols[0]+'.wav')
-                                if os.path.isfile(wav_file):
-                                        text = cols[1]
-                                        items.append([text, wav_file])
-                                else: 
-                                        continue
-        random.shuffle(items)
-        return items
+    """Normalizes M-AI-Labs meta data files to TTS format"""
+    folders = [os.path.dirname(f.strip()) for f in meta_files.split(",")]
+    meta_files = [f.strip() for f in meta_files.split(",")]
+    items = []
+    for idx, meta_file in enumerate(meta_files):
+        print(" | > {}".format(meta_file))
+        folder = folders[idx]
+        txt_file = os.path.join(root_path, meta_file)
+        with open(txt_file, 'r') as ttf:
+            for line in ttf:
+                cols = line.split('|')
+                wav_file = os.path.join(root_path, folder, 'wavs',
+                                        cols[0] + '.wav')
+                if os.path.isfile(wav_file):
+                    text = cols[1]
+                    items.append([text, wav_file])
+                else:
+                    continue
+    return items
 
 
 def ljspeech(root_path, meta_file):
@@ -95,10 +93,9 @@ def ljspeech(root_path, meta_file):
     with open(txt_file, 'r') as ttf:
         for line in ttf:
             cols = line.split('|')
-            wav_file = os.path.join(root_path, 'wavs', cols[0]+'.wav')
+            wav_file = os.path.join(root_path, 'wavs', cols[0] + '.wav')
             text = cols[1]
             items.append([text, wav_file])
-    random.shuffle(items)
     return items
 
 
@@ -109,10 +106,9 @@ def nancy(root_path, meta_file):
     with open(txt_file, 'r') as ttf:
         for line in ttf:
             id = line.split()[1]
-            text = line[line.find('"')+1:line.rfind('"')-1]
+            text = line[line.find('"') + 1:line.rfind('"') - 1]
             wav_file = os.path.join(root_path, "wavn", id + ".wav")
             items.append([text, wav_file])
-    random.shuffle(items)    
     return items
 
 
@@ -126,7 +122,6 @@ def common_voice(root_path, meta_file):
                 continue
             cols = line.split("\t")
             text = cols[2]
-            # Files need to be first converted to wav...
             wav_file = os.path.join(root_path, "clips", cols[1] + ".wav")
             items.append([text, wav_file])
     return items