From d1a7ad545de7115ffabd6124585debf25cbb396d Mon Sep 17 00:00:00 2001 From: Thomas Werkmeister Date: Wed, 24 Apr 2019 11:58:55 +0200 Subject: [PATCH 1/4] common voice preprocessor and tests, small refactoring within tests --- datasets/preprocess.py | 21 +++++++++++++++++++-- tests/__init__.py | 16 ++++++++++++++++ tests/audio_tests.py | 20 +++++++++++--------- tests/inputs/common_voice.tsv | 10 ++++++++++ tests/preprocess_tests.py | 28 ++++++++++++++++++++++++++++ 5 files changed, 84 insertions(+), 11 deletions(-) create mode 100644 tests/inputs/common_voice.tsv create mode 100644 tests/preprocess_tests.py diff --git a/datasets/preprocess.py b/datasets/preprocess.py index c1b8469a..e4651306 100644 --- a/datasets/preprocess.py +++ b/datasets/preprocess.py @@ -1,6 +1,7 @@ import os import random + def tts_cache(root_path, meta_file): """This format is set for the meta-file generated by extract_features.py""" txt_file = os.path.join(root_path, meta_file) @@ -109,7 +110,23 @@ def nancy(root_path, meta_file): for line in ttf: id = line.split()[1] text = line[line.find('"')+1:line.rfind('"')-1] - wav_file = root_path + 'wavn/' + id + '.wav' + wav_file = os.path.join(root_path, "wavn", id + ".wav") items.append([text, wav_file]) random.shuffle(items) - return items \ No newline at end of file + return items + + +def common_voice(root_path, meta_file): + """Normalize the common voice meta data file to TTS format.""" + txt_file = os.path.join(root_path, meta_file) + items = [] + with open(txt_file, 'r') as ttf: + for line in ttf: + if line.startswith("client_id"): + continue + cols = line.split("\t") + text = cols[2] + # Files need to be first converted to wav... + wav_file = os.path.join(root_path, "clips", cols[1] + ".wav") + items.append([text, wav_file]) + return items diff --git a/tests/__init__.py b/tests/__init__.py index e69de29b..487a5519 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -0,0 +1,16 @@ +import os + + +def get_tests_path(): + """Returns the path to the test directory.""" + return os.path.dirname(os.path.realpath(__file__)) + + +def get_tests_input_path(): + """Returns the path to the test data directory.""" + return os.path.join(get_tests_path(), "inputs") + + +def get_tests_output_path(): + """Returns the path to the directory for test outputs.""" + return os.path.join(get_tests_path(), "outputs") diff --git a/tests/audio_tests.py b/tests/audio_tests.py index 8c432d2f..4021a284 100644 --- a/tests/audio_tests.py +++ b/tests/audio_tests.py @@ -2,21 +2,23 @@ import os import unittest import numpy as np import torch as T + +from tests import get_tests_path, get_tests_input_path, get_tests_output_path from utils.audio import AudioProcessor from utils.generic_utils import load_config -file_path = os.path.dirname(os.path.realpath(__file__)) -INPUTPATH = os.path.join(file_path, 'inputs') -OUTPATH = os.path.join(file_path, "outputs/audio_tests") -os.makedirs(OUTPATH, exist_ok=True) +TESTS_PATH = get_tests_path() +OUT_PATH = os.path.join(get_tests_output_path(), "audio_tests") +WAV_FILE = os.path.join(get_tests_input_path(), "example_1.wav") -c = load_config(os.path.join(file_path, 'test_config.json')) +os.makedirs(OUT_PATH, exist_ok=True) +conf = load_config(os.path.join(TESTS_PATH, 'test_config.json')) class TestAudio(unittest.TestCase): def __init__(self, *args, **kwargs): super(TestAudio, self).__init__(*args, **kwargs) - self.ap = AudioProcessor(**c.audio) + self.ap = AudioProcessor(**conf.audio) def test_audio_synthesis(self): """ 1. load wav @@ -31,13 +33,13 @@ class TestAudio(unittest.TestCase): self.ap.signal_norm = signal_norm self.ap.symmetric_norm = symmetric_norm self.ap.clip_norm = clip_norm - wav = self.ap.load_wav(INPUTPATH + "/example_1.wav") + wav = self.ap.load_wav(WAV_FILE) mel = self.ap.melspectrogram(wav) wav_ = self.ap.inv_mel_spectrogram(mel) file_name = "/audio_test-melspec_max_norm_{}-signal_norm_{}-symmetric_{}-clip_norm_{}.wav"\ .format(max_norm, signal_norm, symmetric_norm, clip_norm) print(" | > Creating wav file at : ", file_name) - self.ap.save_wav(wav_, OUTPATH + file_name) + self.ap.save_wav(wav_, OUT_PATH + file_name) # maxnorm = 1.0 _test(1., False, False, False) @@ -55,7 +57,7 @@ class TestAudio(unittest.TestCase): def test_normalize(self): """Check normalization and denormalization for range values and consistency """ print(" > Testing normalization and denormalization.") - wav = self.ap.load_wav(INPUTPATH + "/example_1.wav") + wav = self.ap.load_wav(WAV_FILE) self.ap.signal_norm = False x = self.ap.melspectrogram(wav) x_old = x diff --git a/tests/inputs/common_voice.tsv b/tests/inputs/common_voice.tsv new file mode 100644 index 00000000..a6ea30dd --- /dev/null +++ b/tests/inputs/common_voice.tsv @@ -0,0 +1,10 @@ +client_id path sentence up_votes down_votes age gender accent +aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 21fce545b24d9a5af0403b949e95e8dd3c10c4ff3e371f14e4d5b4ebf588670b7c9e618285fc872d94a89ed7f0217d9019fe5de33f1577b49dcd518eacf63c4b Man sollte den Länderfinanzausgleich durch einen Bundesliga-Soli ersetzen. 2 0 fourties male germany +aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 42758baa4e91ef6b82b78b11a04bc5117a035a8d3bc42c33c0bb3084909af17043a194cfd8cd9839f0d6ef1ea5413acda5de5d1936abcc8ca073e2da7f9488ea Folgende Lektüre kann ich Ihnen zum Thema Kognitionspsychologie empfehlen. 2 0 fourties male germany +aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 478f172c2dbda6675247e9674ade79a5b49efeefb7c9e99040dcc69a847a01d69398cf180570859b0cdb6fc887717e04cd8b149c723d48d00b5d18f41314667c Touristen winkten den Leuten am Ufer zu. 2 0 fourties male germany +aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 4854368d6d21cb44103e432b5332f31e8d14030582a40850501bcf9377d699314a5ff27a8206fa89254ddde7f3f1c65d33836f3dfcfa16bcabec08537f2b5f08 Valentin hat das Handtuch geworfen. 2 0 fourties male germany +aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 a841a9f3e032495dd47560e65fba99eeacb3618c07de8b1351c20188e5b71e33cc52f73315f721a3a24b65763c65bb52fbf3ae052eb5774e834dcb57f296db5c Ohne Gehörschutz bei der Arbeit wäre Klaus wohl nach zwei Wochen taub. 2 0 fourties male germany +aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 03ab970a5bf5410bc3260b073cce1c7f49c688ace83dc8836b1c0f79a09fea45a27725c769f4a9d2e6181defd016d22642789d7ac51da252b42958a9192bd4c7 Gerrit erinnerte sich daran, dass er einst einen Eid geschworen hatte. 2 0 fourties male germany +aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 c4a94df443ad5f2c7241413ef7145d5f0de41ae929759073917fe96166da3c7d3a612c920ed7b0f3d5950a38d6205e9dba24af5bfb27e390a220d004e6e26744 Auf das, was jetzt kommt, habe ich nämlich absolut keinen Bock. 2 0 fourties male germany +aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 104695983b1112229b4a48696405d044dad9ddef713aa6eb1a6240cc16b7b7a2a96354ae9da99783850dde08a982091e48d3037288a3a58269cac9fe70a6bd7a Von Salzburg ist es doch nicht weit bis zum Chiemsee. 2 0 fourties male germany +d5b5da343bb0f65e3580bc2e1902a4f5d004241488d751503f2020bc1c93f89715e355e35f6e25def2b90cb3eea99fda403eb92ae3afbb84d039a54a4ed2d875 ad2f69e053b0e20e01c82b9821fe5787f1cc8e4b0b97f0e4cab1e9a652c577169c8244fb222281a60ee3081854014113e04c4ca43643100b7c01dab0fac11974 Warum werden da keine strafrechtlichen Konsequenzen gezogen? 2 0 thirties male germany diff --git a/tests/preprocess_tests.py b/tests/preprocess_tests.py new file mode 100644 index 00000000..6f4b6df1 --- /dev/null +++ b/tests/preprocess_tests.py @@ -0,0 +1,28 @@ +import unittest +import os +from tests import get_tests_input_path + +from datasets.preprocess import common_voice + + +class TestPreprocessors(unittest.TestCase): + + def test_common_voice_preprocessor(self): + root_path = get_tests_input_path() + meta_file = "common_voice.tsv" + items = common_voice(root_path, meta_file) + assert items[0][0] == "Man sollte den Länderfinanzausgleich durch " \ + "einen Bundesliga-Soli ersetzen." + assert items[0][1] == os.path.join(get_tests_input_path(), "clips", + "21fce545b24d9a5af0403b949e95e8dd3" + "c10c4ff3e371f14e4d5b4ebf588670b7c" + "9e618285fc872d94a89ed7f0217d9019f" + "e5de33f1577b49dcd518eacf63c4b.wav") + + assert items[-1][0] == "Warum werden da keine strafrechtlichen " \ + "Konsequenzen gezogen?" + assert items[-1][1] == os.path.join(get_tests_input_path(), "clips", + "ad2f69e053b0e20e01c82b9821fe5787f1" + "cc8e4b0b97f0e4cab1e9a652c577169c82" + "44fb222281a60ee3081854014113e04c4c" + "a43643100b7c01dab0fac11974.wav") From a9f6c400828c88b48046a5d9e60b5ac6d20afecb Mon Sep 17 00:00:00 2001 From: Thomas Werkmeister Date: Wed, 24 Apr 2019 11:58:10 +0200 Subject: [PATCH 2/4] removed duplicate lws requirement --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 82dfab04..2e145a8f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,4 @@ numpy==1.14.3 -lws torch>=0.4.1 librosa==0.5.1 Unidecode==0.4.20 From 41e3e42989e680a184e2b56f64ed33cad9ca2fb7 Mon Sep 17 00:00:00 2001 From: Thomas Werkmeister Date: Wed, 24 Apr 2019 11:57:50 +0200 Subject: [PATCH 3/4] added .idea to gitignore for pycharm users --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index bfc8ea1a..b0fe0bee 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.idea/ *.pyc .DS_Store ./__init__.py From f279fe9e8b7a291fec7d27feb94ca27d26574ed4 Mon Sep 17 00:00:00 2001 From: Thomas Werkmeister Date: Thu, 25 Apr 2019 11:46:11 +0200 Subject: [PATCH 4/4] removed shuffling of data in the preprocessor, uniform indentation --- datasets/preprocess.py | 97 ++++++++++++++++++++---------------------- 1 file changed, 46 insertions(+), 51 deletions(-) diff --git a/datasets/preprocess.py b/datasets/preprocess.py index e4651306..43b13359 100644 --- a/datasets/preprocess.py +++ b/datasets/preprocess.py @@ -1,5 +1,4 @@ import os -import random def tts_cache(root_path, meta_file): @@ -9,9 +8,9 @@ def tts_cache(root_path, meta_file): with open(txt_file, 'r', encoding='utf8') as f: for line in f: cols = line.split('| ') - items.append(cols) # text, wav_full_path, mel_name, linear_name, wav_len, mel_len - random.shuffle(items) - return items + # text, wav_full_path, mel_name, linear_name, wav_len, mel_len + items.append(cols) + return items def tweb(root_path, meta_file): @@ -23,12 +22,11 @@ def tweb(root_path, meta_file): with open(txt_file, 'r') as ttf: for line in ttf: cols = line.split('\t') - wav_file = os.path.join(root_path, cols[0]+'.wav') + wav_file = os.path.join(root_path, cols[0] + '.wav') text = cols[1] items.append([text, wav_file]) - random.shuffle(items) return items - + # def kusal(root_path, meta_file): # txt_file = os.path.join(root_path, meta_file) @@ -44,48 +42,48 @@ def tweb(root_path, meta_file): def mozilla(root_path, meta_files): - """Normalizes Mozilla meta data files to TTS format""" - import glob - meta_files = glob.glob(root_path + "/**/batch*.txt", recursive=True) - folders = [os.path.dirname(f.strip()) for f in meta_files] - items = [] - for idx, meta_file in enumerate(meta_files): - folder = folders[idx] - txt_file = os.path.join(root_path, meta_file) - with open(txt_file, 'r') as ttf: - for line in ttf: - cols = line.split('|') - wav_file = os.path.join(root_path, folder, 'wavs_no_processing', cols[1].strip()) - if os.path.isfile(wav_file): - text = cols[0].strip() - items.append([text, wav_file]) - else: - print(" > Error: {}".format(cols)) - continue - random.shuffle(items) - return items + """Normalizes Mozilla meta data files to TTS format""" + import glob + meta_files = glob.glob(root_path + "/**/batch*.txt", recursive=True) + folders = [os.path.dirname(f.strip()) for f in meta_files] + items = [] + for idx, meta_file in enumerate(meta_files): + folder = folders[idx] + txt_file = os.path.join(root_path, meta_file) + with open(txt_file, 'r') as ttf: + for line in ttf: + cols = line.split('|') + wav_file = os.path.join(root_path, folder, 'wavs_no_processing', + cols[1].strip()) + if os.path.isfile(wav_file): + text = cols[0].strip() + items.append([text, wav_file]) + else: + print(" > Error: {}".format(cols)) + continue + return items def mailabs(root_path, meta_files): - """Normalizes M-AI-Labs meta data files to TTS format""" - folders = [os.path.dirname(f.strip()) for f in meta_files.split(",")] - meta_files = [f.strip() for f in meta_files.split(",")] - items = [] - for idx, meta_file in enumerate(meta_files): - print(" | > {}".format(meta_file)) - folder = folders[idx] - txt_file = os.path.join(root_path, meta_file) - with open(txt_file, 'r') as ttf: - for line in ttf: - cols = line.split('|') - wav_file = os.path.join(root_path, folder, 'wavs', cols[0]+'.wav') - if os.path.isfile(wav_file): - text = cols[1] - items.append([text, wav_file]) - else: - continue - random.shuffle(items) - return items + """Normalizes M-AI-Labs meta data files to TTS format""" + folders = [os.path.dirname(f.strip()) for f in meta_files.split(",")] + meta_files = [f.strip() for f in meta_files.split(",")] + items = [] + for idx, meta_file in enumerate(meta_files): + print(" | > {}".format(meta_file)) + folder = folders[idx] + txt_file = os.path.join(root_path, meta_file) + with open(txt_file, 'r') as ttf: + for line in ttf: + cols = line.split('|') + wav_file = os.path.join(root_path, folder, 'wavs', + cols[0] + '.wav') + if os.path.isfile(wav_file): + text = cols[1] + items.append([text, wav_file]) + else: + continue + return items def ljspeech(root_path, meta_file): @@ -95,10 +93,9 @@ def ljspeech(root_path, meta_file): with open(txt_file, 'r') as ttf: for line in ttf: cols = line.split('|') - wav_file = os.path.join(root_path, 'wavs', cols[0]+'.wav') + wav_file = os.path.join(root_path, 'wavs', cols[0] + '.wav') text = cols[1] items.append([text, wav_file]) - random.shuffle(items) return items @@ -109,10 +106,9 @@ def nancy(root_path, meta_file): with open(txt_file, 'r') as ttf: for line in ttf: id = line.split()[1] - text = line[line.find('"')+1:line.rfind('"')-1] + text = line[line.find('"') + 1:line.rfind('"') - 1] wav_file = os.path.join(root_path, "wavn", id + ".wav") items.append([text, wav_file]) - random.shuffle(items) return items @@ -126,7 +122,6 @@ def common_voice(root_path, meta_file): continue cols = line.split("\t") text = cols[2] - # Files need to be first converted to wav... wav_file = os.path.join(root_path, "clips", cols[1] + ".wav") items.append([text, wav_file]) return items