Merge pull request #174 from twerkmeister/dev-tacotron2

Mozilla common voice preprocessor
This commit is contained in:
Eren Golge 2019-04-26 09:35:12 +02:00 committed by GitHub
commit 2e8446539f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 133 additions and 64 deletions

1
.gitignore vendored
View File

@ -1,3 +1,4 @@
.idea/
*.pyc
.DS_Store
./__init__.py

View File

@ -1,5 +1,5 @@
import os
import random
def tts_cache(root_path, meta_file):
"""This format is set for the meta-file generated by extract_features.py"""
@ -8,9 +8,9 @@ def tts_cache(root_path, meta_file):
with open(txt_file, 'r', encoding='utf8') as f:
for line in f:
cols = line.split('| ')
items.append(cols) # text, wav_full_path, mel_name, linear_name, wav_len, mel_len
random.shuffle(items)
return items
# text, wav_full_path, mel_name, linear_name, wav_len, mel_len
items.append(cols)
return items
def tweb(root_path, meta_file):
@ -22,12 +22,11 @@ def tweb(root_path, meta_file):
with open(txt_file, 'r') as ttf:
for line in ttf:
cols = line.split('\t')
wav_file = os.path.join(root_path, cols[0]+'.wav')
wav_file = os.path.join(root_path, cols[0] + '.wav')
text = cols[1]
items.append([text, wav_file])
random.shuffle(items)
return items
# def kusal(root_path, meta_file):
# txt_file = os.path.join(root_path, meta_file)
@ -43,50 +42,51 @@ def tweb(root_path, meta_file):
def mozilla(root_path, meta_files):
"""Normalizes Mozilla meta data files to TTS format"""
import glob
meta_files = glob.glob(root_path + "/**/batch*.txt", recursive=True)
folders = [os.path.dirname(f.strip()) for f in meta_files]
items = []
for idx, meta_file in enumerate(meta_files):
folder = folders[idx]
# txt_file = os.path.join(root_path, meta_file)
txt_file = meta_file
with open(txt_file, 'r') as ttf:
for line in ttf:
cols = line.split('|')
# wav_file = os.path.join(root_path, folder, 'wavs_no_processing', cols[1].strip())
wav_file = os.path.join(folder, 'wavs_no_processing', cols[1].strip())
if os.path.isfile(wav_file):
text = cols[0].strip()
items.append([text, wav_file])
else:
print(" > Error: {}".format(wav_file))
continue
random.shuffle(items)
return items
"""Normalizes Mozilla meta data files to TTS format"""
import glob
meta_files = glob.glob(root_path + "/**/batch*.txt", recursive=True)
folders = [os.path.dirname(f.strip()) for f in meta_files]
items = []
for idx, meta_file in enumerate(meta_files):
folder = folders[idx]
# txt_file = os.path.join(root_path, meta_file)
txt_file = meta_file
with open(txt_file, 'r') as ttf:
for line in ttf:
cols = line.split('|')
# wav_file = os.path.join(root_path, folder,
# 'wavs_no_processing', cols[1].strip())
wav_file = os.path.join(folder, 'wavs_no_processing',
cols[1].strip())
if os.path.isfile(wav_file):
text = cols[0].strip()
items.append([text, wav_file])
else:
print(" > Error: {}".format(wav_file))
continue
return items
def mailabs(root_path, meta_files):
"""Normalizes M-AI-Labs meta data files to TTS format"""
folders = [os.path.dirname(f.strip()) for f in meta_files.split(",")]
meta_files = [f.strip() for f in meta_files.split(",")]
items = []
for idx, meta_file in enumerate(meta_files):
print(" | > {}".format(meta_file))
folder = folders[idx]
txt_file = os.path.join(root_path, meta_file)
with open(txt_file, 'r') as ttf:
for line in ttf:
cols = line.split('|')
wav_file = os.path.join(root_path, folder, 'wavs', cols[0]+'.wav')
if os.path.isfile(wav_file):
text = cols[1]
items.append([text, wav_file])
else:
continue
random.shuffle(items)
return items
"""Normalizes M-AI-Labs meta data files to TTS format"""
folders = [os.path.dirname(f.strip()) for f in meta_files.split(",")]
meta_files = [f.strip() for f in meta_files.split(",")]
items = []
for idx, meta_file in enumerate(meta_files):
print(" | > {}".format(meta_file))
folder = folders[idx]
txt_file = os.path.join(root_path, meta_file)
with open(txt_file, 'r') as ttf:
for line in ttf:
cols = line.split('|')
wav_file = os.path.join(root_path, folder, 'wavs',
cols[0] + '.wav')
if os.path.isfile(wav_file):
text = cols[1]
items.append([text, wav_file])
else:
continue
return items
def ljspeech(root_path, meta_file):
@ -96,10 +96,9 @@ def ljspeech(root_path, meta_file):
with open(txt_file, 'r') as ttf:
for line in ttf:
cols = line.split('|')
wav_file = os.path.join(root_path, 'wavs', cols[0]+'.wav')
wav_file = os.path.join(root_path, 'wavs', cols[0] + '.wav')
text = cols[1]
items.append([text, wav_file])
random.shuffle(items)
return items
@ -110,8 +109,22 @@ def nancy(root_path, meta_file):
with open(txt_file, 'r') as ttf:
for line in ttf:
id = line.split()[1]
text = line[line.find('"')+1:line.rfind('"')-1]
wav_file = root_path + 'wavn/' + id + '.wav'
text = line[line.find('"') + 1:line.rfind('"') - 1]
wav_file = os.path.join(root_path, "wavn", id + ".wav")
items.append([text, wav_file])
random.shuffle(items)
return items
return items
def common_voice(root_path, meta_file):
"""Normalize the common voice meta data file to TTS format."""
txt_file = os.path.join(root_path, meta_file)
items = []
with open(txt_file, 'r') as ttf:
for line in ttf:
if line.startswith("client_id"):
continue
cols = line.split("\t")
text = cols[2]
wav_file = os.path.join(root_path, "clips", cols[1] + ".wav")
items.append([text, wav_file])
return items

View File

@ -1,5 +1,4 @@
numpy==1.14.3
lws
torch>=0.4.1
librosa==0.5.1
Unidecode==0.4.20

View File

@ -0,0 +1,16 @@
import os
def get_tests_path():
"""Returns the path to the test directory."""
return os.path.dirname(os.path.realpath(__file__))
def get_tests_input_path():
"""Returns the path to the test data directory."""
return os.path.join(get_tests_path(), "inputs")
def get_tests_output_path():
"""Returns the path to the directory for test outputs."""
return os.path.join(get_tests_path(), "outputs")

View File

@ -2,21 +2,23 @@ import os
import unittest
import numpy as np
import torch as T
from tests import get_tests_path, get_tests_input_path, get_tests_output_path
from utils.audio import AudioProcessor
from utils.generic_utils import load_config
file_path = os.path.dirname(os.path.realpath(__file__))
INPUTPATH = os.path.join(file_path, 'inputs')
OUTPATH = os.path.join(file_path, "outputs/audio_tests")
os.makedirs(OUTPATH, exist_ok=True)
TESTS_PATH = get_tests_path()
OUT_PATH = os.path.join(get_tests_output_path(), "audio_tests")
WAV_FILE = os.path.join(get_tests_input_path(), "example_1.wav")
c = load_config(os.path.join(file_path, 'test_config.json'))
os.makedirs(OUT_PATH, exist_ok=True)
conf = load_config(os.path.join(TESTS_PATH, 'test_config.json'))
class TestAudio(unittest.TestCase):
def __init__(self, *args, **kwargs):
super(TestAudio, self).__init__(*args, **kwargs)
self.ap = AudioProcessor(**c.audio)
self.ap = AudioProcessor(**conf.audio)
def test_audio_synthesis(self):
""" 1. load wav
@ -31,13 +33,13 @@ class TestAudio(unittest.TestCase):
self.ap.signal_norm = signal_norm
self.ap.symmetric_norm = symmetric_norm
self.ap.clip_norm = clip_norm
wav = self.ap.load_wav(INPUTPATH + "/example_1.wav")
wav = self.ap.load_wav(WAV_FILE)
mel = self.ap.melspectrogram(wav)
wav_ = self.ap.inv_mel_spectrogram(mel)
file_name = "/audio_test-melspec_max_norm_{}-signal_norm_{}-symmetric_{}-clip_norm_{}.wav"\
.format(max_norm, signal_norm, symmetric_norm, clip_norm)
print(" | > Creating wav file at : ", file_name)
self.ap.save_wav(wav_, OUTPATH + file_name)
self.ap.save_wav(wav_, OUT_PATH + file_name)
# maxnorm = 1.0
_test(1., False, False, False)
@ -55,7 +57,7 @@ class TestAudio(unittest.TestCase):
def test_normalize(self):
"""Check normalization and denormalization for range values and consistency """
print(" > Testing normalization and denormalization.")
wav = self.ap.load_wav(INPUTPATH + "/example_1.wav")
wav = self.ap.load_wav(WAV_FILE)
self.ap.signal_norm = False
x = self.ap.melspectrogram(wav)
x_old = x

View File

@ -0,0 +1,10 @@
client_id path sentence up_votes down_votes age gender accent
aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 21fce545b24d9a5af0403b949e95e8dd3c10c4ff3e371f14e4d5b4ebf588670b7c9e618285fc872d94a89ed7f0217d9019fe5de33f1577b49dcd518eacf63c4b Man sollte den Länderfinanzausgleich durch einen Bundesliga-Soli ersetzen. 2 0 fourties male germany
aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 42758baa4e91ef6b82b78b11a04bc5117a035a8d3bc42c33c0bb3084909af17043a194cfd8cd9839f0d6ef1ea5413acda5de5d1936abcc8ca073e2da7f9488ea Folgende Lektüre kann ich Ihnen zum Thema Kognitionspsychologie empfehlen. 2 0 fourties male germany
aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 478f172c2dbda6675247e9674ade79a5b49efeefb7c9e99040dcc69a847a01d69398cf180570859b0cdb6fc887717e04cd8b149c723d48d00b5d18f41314667c Touristen winkten den Leuten am Ufer zu. 2 0 fourties male germany
aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 4854368d6d21cb44103e432b5332f31e8d14030582a40850501bcf9377d699314a5ff27a8206fa89254ddde7f3f1c65d33836f3dfcfa16bcabec08537f2b5f08 Valentin hat das Handtuch geworfen. 2 0 fourties male germany
aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 a841a9f3e032495dd47560e65fba99eeacb3618c07de8b1351c20188e5b71e33cc52f73315f721a3a24b65763c65bb52fbf3ae052eb5774e834dcb57f296db5c Ohne Gehörschutz bei der Arbeit wäre Klaus wohl nach zwei Wochen taub. 2 0 fourties male germany
aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 03ab970a5bf5410bc3260b073cce1c7f49c688ace83dc8836b1c0f79a09fea45a27725c769f4a9d2e6181defd016d22642789d7ac51da252b42958a9192bd4c7 Gerrit erinnerte sich daran, dass er einst einen Eid geschworen hatte. 2 0 fourties male germany
aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 c4a94df443ad5f2c7241413ef7145d5f0de41ae929759073917fe96166da3c7d3a612c920ed7b0f3d5950a38d6205e9dba24af5bfb27e390a220d004e6e26744 Auf das, was jetzt kommt, habe ich nämlich absolut keinen Bock. 2 0 fourties male germany
aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 104695983b1112229b4a48696405d044dad9ddef713aa6eb1a6240cc16b7b7a2a96354ae9da99783850dde08a982091e48d3037288a3a58269cac9fe70a6bd7a Von Salzburg ist es doch nicht weit bis zum Chiemsee. 2 0 fourties male germany
d5b5da343bb0f65e3580bc2e1902a4f5d004241488d751503f2020bc1c93f89715e355e35f6e25def2b90cb3eea99fda403eb92ae3afbb84d039a54a4ed2d875 ad2f69e053b0e20e01c82b9821fe5787f1cc8e4b0b97f0e4cab1e9a652c577169c8244fb222281a60ee3081854014113e04c4ca43643100b7c01dab0fac11974 Warum werden da keine strafrechtlichen Konsequenzen gezogen? 2 0 thirties male germany
1 client_id path sentence up_votes down_votes age gender accent
2 aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 21fce545b24d9a5af0403b949e95e8dd3c10c4ff3e371f14e4d5b4ebf588670b7c9e618285fc872d94a89ed7f0217d9019fe5de33f1577b49dcd518eacf63c4b Man sollte den Länderfinanzausgleich durch einen Bundesliga-Soli ersetzen. 2 0 fourties male germany
3 aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 42758baa4e91ef6b82b78b11a04bc5117a035a8d3bc42c33c0bb3084909af17043a194cfd8cd9839f0d6ef1ea5413acda5de5d1936abcc8ca073e2da7f9488ea Folgende Lektüre kann ich Ihnen zum Thema Kognitionspsychologie empfehlen. 2 0 fourties male germany
4 aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 478f172c2dbda6675247e9674ade79a5b49efeefb7c9e99040dcc69a847a01d69398cf180570859b0cdb6fc887717e04cd8b149c723d48d00b5d18f41314667c Touristen winkten den Leuten am Ufer zu. 2 0 fourties male germany
5 aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 4854368d6d21cb44103e432b5332f31e8d14030582a40850501bcf9377d699314a5ff27a8206fa89254ddde7f3f1c65d33836f3dfcfa16bcabec08537f2b5f08 Valentin hat das Handtuch geworfen. 2 0 fourties male germany
6 aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 a841a9f3e032495dd47560e65fba99eeacb3618c07de8b1351c20188e5b71e33cc52f73315f721a3a24b65763c65bb52fbf3ae052eb5774e834dcb57f296db5c Ohne Gehörschutz bei der Arbeit wäre Klaus wohl nach zwei Wochen taub. 2 0 fourties male germany
7 aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 03ab970a5bf5410bc3260b073cce1c7f49c688ace83dc8836b1c0f79a09fea45a27725c769f4a9d2e6181defd016d22642789d7ac51da252b42958a9192bd4c7 Gerrit erinnerte sich daran, dass er einst einen Eid geschworen hatte. 2 0 fourties male germany
8 aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 c4a94df443ad5f2c7241413ef7145d5f0de41ae929759073917fe96166da3c7d3a612c920ed7b0f3d5950a38d6205e9dba24af5bfb27e390a220d004e6e26744 Auf das, was jetzt kommt, habe ich nämlich absolut keinen Bock. 2 0 fourties male germany
9 aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 104695983b1112229b4a48696405d044dad9ddef713aa6eb1a6240cc16b7b7a2a96354ae9da99783850dde08a982091e48d3037288a3a58269cac9fe70a6bd7a Von Salzburg ist es doch nicht weit bis zum Chiemsee. 2 0 fourties male germany
10 d5b5da343bb0f65e3580bc2e1902a4f5d004241488d751503f2020bc1c93f89715e355e35f6e25def2b90cb3eea99fda403eb92ae3afbb84d039a54a4ed2d875 ad2f69e053b0e20e01c82b9821fe5787f1cc8e4b0b97f0e4cab1e9a652c577169c8244fb222281a60ee3081854014113e04c4ca43643100b7c01dab0fac11974 Warum werden da keine strafrechtlichen Konsequenzen gezogen? 2 0 thirties male germany

28
tests/preprocess_tests.py Normal file
View File

@ -0,0 +1,28 @@
import unittest
import os
from tests import get_tests_input_path
from datasets.preprocess import common_voice
class TestPreprocessors(unittest.TestCase):
def test_common_voice_preprocessor(self):
root_path = get_tests_input_path()
meta_file = "common_voice.tsv"
items = common_voice(root_path, meta_file)
assert items[0][0] == "Man sollte den Länderfinanzausgleich durch " \
"einen Bundesliga-Soli ersetzen."
assert items[0][1] == os.path.join(get_tests_input_path(), "clips",
"21fce545b24d9a5af0403b949e95e8dd3"
"c10c4ff3e371f14e4d5b4ebf588670b7c"
"9e618285fc872d94a89ed7f0217d9019f"
"e5de33f1577b49dcd518eacf63c4b.wav")
assert items[-1][0] == "Warum werden da keine strafrechtlichen " \
"Konsequenzen gezogen?"
assert items[-1][1] == os.path.join(get_tests_input_path(), "clips",
"ad2f69e053b0e20e01c82b9821fe5787f1"
"cc8e4b0b97f0e4cab1e9a652c577169c82"
"44fb222281a60ee3081854014113e04c4c"
"a43643100b7c01dab0fac11974.wav")