Merge branch 'dev' of https://github.com/mozilla/TTS into dev

This commit is contained in:
erogol 2020-09-21 14:21:55 +02:00
commit 8150d5727e
10 changed files with 175 additions and 65 deletions

View File

@ -1,10 +1,10 @@
import argparse import argparse
from difflib import SequenceMatcher
import os import os
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
import torch import torch
from fuzzywuzzy import fuzz
from TTS.utils.io import load_config from TTS.utils.io import load_config
from TTS.vocoder.tf.utils.convert_torch_to_tf_utils import ( from TTS.vocoder.tf.utils.convert_torch_to_tf_utils import (
@ -67,7 +67,7 @@ for tf_name in tf_var_names:
continue continue
tf_name_edited = convert_tf_name(tf_name) tf_name_edited = convert_tf_name(tf_name)
ratios = [ ratios = [
fuzz.ratio(torch_name, tf_name_edited) SequenceMatcher(None, torch_name, tf_name_edited).ratio()
for torch_name in torch_var_names for torch_name in torch_var_names
] ]
max_idx = np.argmax(ratios) max_idx = np.argmax(ratios)

View File

@ -1,6 +1,7 @@
# %% # %%
# %% # %%
import argparse import argparse
from difflib import SequenceMatcher
import os import os
import sys import sys
# %% # %%
@ -10,7 +11,6 @@ from pprint import pprint
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
import torch import torch
from fuzzywuzzy import fuzz
from TTS.tts.tf.models.tacotron2 import Tacotron2 from TTS.tts.tf.models.tacotron2 import Tacotron2
from TTS.tts.tf.utils.convert_torch_to_tf_utils import ( from TTS.tts.tf.utils.convert_torch_to_tf_utils import (
compare_torch_tf, convert_tf_name, transfer_weights_torch_to_tf) compare_torch_tf, convert_tf_name, transfer_weights_torch_to_tf)
@ -106,7 +106,7 @@ for tf_name in tf_var_names:
continue continue
tf_name_edited = convert_tf_name(tf_name) tf_name_edited = convert_tf_name(tf_name)
ratios = [ ratios = [
fuzz.ratio(torch_name, tf_name_edited) SequenceMatcher(None, torch_name, tf_name_edited).ratio()
for torch_name in torch_var_names for torch_name in torch_var_names
] ]
max_idx = np.argmax(ratios) max_idx = np.argmax(ratios)

View File

@ -42,8 +42,12 @@ def setup_loader(ap, is_val=False, verbose=False):
dataset = MyDataset(ap, dataset = MyDataset(ap,
meta_data_eval if is_val else meta_data_train, meta_data_eval if is_val else meta_data_train,
voice_len=1.6, voice_len=1.6,
num_utter_per_speaker=10, num_utter_per_speaker=c.num_utters_per_speaker,
num_speakers_in_batch=c.num_speakers_in_batch,
skip_speakers=False, skip_speakers=False,
storage_size=c.storage["storage_size"],
sample_from_storage_p=c.storage["sample_from_storage_p"],
additive_noise=c.storage["additive_noise"],
verbose=verbose) verbose=verbose)
# sampler = DistributedSampler(dataset) if num_gpus > 1 else None # sampler = DistributedSampler(dataset) if num_gpus > 1 else None
loader = DataLoader(dataset, loader = DataLoader(dataset,
@ -60,6 +64,7 @@ def train(model, criterion, optimizer, scheduler, ap, global_step):
epoch_time = 0 epoch_time = 0
best_loss = float('inf') best_loss = float('inf')
avg_loss = 0 avg_loss = 0
avg_loader_time = 0
end_time = time.time() end_time = time.time()
for _, data in enumerate(data_loader): for _, data in enumerate(data_loader):
start_time = time.time() start_time = time.time()
@ -93,8 +98,11 @@ def train(model, criterion, optimizer, scheduler, ap, global_step):
step_time = time.time() - start_time step_time = time.time() - start_time
epoch_time += step_time epoch_time += step_time
avg_loss = 0.01 * loss.item( # Averaged Loss and Averaged Loader Time
) + 0.99 * avg_loss if avg_loss != 0 else loss.item() avg_loss = 0.01 * loss.item() \
+ 0.99 * avg_loss if avg_loss != 0 else loss.item()
avg_loader_time = 1/c.num_loader_workers * loader_time + \
(c.num_loader_workers-1) / c.num_loader_workers * avg_loader_time if avg_loader_time != 0 else loader_time
current_lr = optimizer.param_groups[0]['lr'] current_lr = optimizer.param_groups[0]['lr']
if global_step % c.steps_plot_stats == 0: if global_step % c.steps_plot_stats == 0:
@ -103,7 +111,8 @@ def train(model, criterion, optimizer, scheduler, ap, global_step):
"loss": avg_loss, "loss": avg_loss,
"lr": current_lr, "lr": current_lr,
"grad_norm": grad_norm, "grad_norm": grad_norm,
"step_time": step_time "step_time": step_time,
"avg_loader_time": avg_loader_time
} }
tb_logger.tb_train_epoch_stats(global_step, train_stats) tb_logger.tb_train_epoch_stats(global_step, train_stats)
figures = { figures = {
@ -116,9 +125,9 @@ def train(model, criterion, optimizer, scheduler, ap, global_step):
if global_step % c.print_step == 0: if global_step % c.print_step == 0:
print( print(
" | > Step:{} Loss:{:.5f} AvgLoss:{:.5f} GradNorm:{:.5f} " " | > Step:{} Loss:{:.5f} AvgLoss:{:.5f} GradNorm:{:.5f} "
"StepTime:{:.2f} LoaderTime:{:.2f} LR:{:.6f}".format( "StepTime:{:.2f} LoaderTime:{:.2f} AvGLoaderTime:{:.2f} LR:{:.6f}".format(
global_step, loss.item(), avg_loss, grad_norm, step_time, global_step, loss.item(), avg_loss, grad_norm, step_time,
loader_time, current_lr), loader_time, avg_loader_time, current_lr),
flush=True) flush=True)
# save best model # save best model

View File

@ -1,11 +1,15 @@
import numpy
import numpy as np import numpy as np
import queue
import torch import torch
import random import random
from torch.utils.data import Dataset from torch.utils.data import Dataset
from tqdm import tqdm
class MyDataset(Dataset): class MyDataset(Dataset):
def __init__(self, ap, meta_data, voice_len=1.6, num_speakers_in_batch=64, def __init__(self, ap, meta_data, voice_len=1.6, num_speakers_in_batch=64,
storage_size=1, sample_from_storage_p=0.5, additive_noise=0,
num_utter_per_speaker=10, skip_speakers=False, verbose=False): num_utter_per_speaker=10, skip_speakers=False, verbose=False):
""" """
Args: Args:
@ -24,8 +28,15 @@ class MyDataset(Dataset):
self.ap = ap self.ap = ap
self.verbose = verbose self.verbose = verbose
self.__parse_items() self.__parse_items()
self.storage = queue.Queue(maxsize=storage_size*num_speakers_in_batch)
self.sample_from_storage_p = float(sample_from_storage_p)
self.additive_noise = float(additive_noise)
if self.verbose: if self.verbose:
print("\n > DataLoader initialization") print("\n > DataLoader initialization")
print(f" | > Speakers per Batch: {num_speakers_in_batch}")
print(f" | > Storage Size: {self.storage.maxsize} speakers, each with {num_utter_per_speaker} utters")
print(f" | > Sample_from_storage_p : {self.sample_from_storage_p}")
print(f" | > Noise added : {self.additive_noise}")
print(f" | > Number of instances : {len(self.items)}") print(f" | > Number of instances : {len(self.items)}")
print(f" | > Sequence length: {self.seq_len}") print(f" | > Sequence length: {self.seq_len}")
print(f" | > Num speakers: {len(self.speakers)}") print(f" | > Num speakers: {len(self.speakers)}")
@ -51,21 +62,37 @@ class MyDataset(Dataset):
return sample return sample
def __parse_items(self): def __parse_items(self):
"""
Find unique speaker ids and create a dict mapping utterances from speaker id
"""
speakers = list({item[-1] for item in self.items})
self.speaker_to_utters = {} self.speaker_to_utters = {}
self.speakers = [] for i in self.items:
for speaker in speakers: path_ = i[1]
speaker_utters = [item[1] for item in self.items if item[2] == speaker] speaker_ = i[2]
if len(speaker_utters) < self.num_utter_per_speaker and self.skip_speakers: if speaker_ in self.speaker_to_utters.keys():
print( self.speaker_to_utters[speaker_].append(path_)
f" [!] Skipped speaker {speaker}. Not enough utterances {self.num_utter_per_speaker} vs {len(speaker_utters)}."
)
else: else:
self.speakers.append(speaker) self.speaker_to_utters[speaker_] = [path_, ]
self.speaker_to_utters[speaker] = speaker_utters
if self.skip_speakers:
self.speaker_to_utters = {k: v for (k, v) in self.speaker_to_utters.items() if
len(v) >= self.num_utter_per_speaker}
self.speakers = [k for (k, v) in self.speaker_to_utters.items()]
# def __parse_items(self):
# """
# Find unique speaker ids and create a dict mapping utterances from speaker id
# """
# speakers = list({item[-1] for item in self.items})
# self.speaker_to_utters = {}
# self.speakers = []
# for speaker in speakers:
# speaker_utters = [item[1] for item in self.items if item[2] == speaker]
# if len(speaker_utters) < self.num_utter_per_speaker and self.skip_speakers:
# print(
# f" [!] Skipped speaker {speaker}. Not enough utterances {self.num_utter_per_speaker} vs {len(speaker_utters)}."
# )
# else:
# self.speakers.append(speaker)
# self.speaker_to_utters[speaker] = speaker_utters
def __len__(self): def __len__(self):
return int(1e10) return int(1e10)
@ -86,7 +113,7 @@ class MyDataset(Dataset):
""" """
Sample all M utterances for the given speaker. Sample all M utterances for the given speaker.
""" """
feats = [] wavs = []
labels = [] labels = []
for _ in range(self.num_utter_per_speaker): for _ in range(self.num_utter_per_speaker):
# TODO:dummy but works # TODO:dummy but works
@ -102,11 +129,9 @@ class MyDataset(Dataset):
break break
self.speaker_to_utters[speaker].remove(utter) self.speaker_to_utters[speaker].remove(utter)
offset = random.randint(0, wav.shape[0] - self.seq_len) wavs.append(wav)
mel = self.ap.melspectrogram(wav[offset : offset + self.seq_len])
feats.append(torch.FloatTensor(mel))
labels.append(speaker) labels.append(speaker)
return feats, labels return wavs, labels
def __getitem__(self, idx): def __getitem__(self, idx):
speaker, _ = self.__sample_speaker() speaker, _ = self.__sample_speaker()
@ -116,7 +141,28 @@ class MyDataset(Dataset):
labels = [] labels = []
feats = [] feats = []
for speaker in batch: for speaker in batch:
feats_, labels_ = self.__sample_speaker_utterances(speaker) if random.random() < self.sample_from_storage_p and self.storage.full():
# sample from storage (if full), ignoring the speaker
wavs_, labels_ = random.choice(self.storage.queue)
else:
# don't sample from storage, but from HDD
wavs_, labels_ = self.__sample_speaker_utterances(speaker)
# if storage is full, remove an item
if self.storage.full():
_ = self.storage.get_nowait()
# put the newly loaded item into storage
self.storage.put_nowait((wavs_, labels_))
# add random gaussian noise
if self.additive_noise > 0:
noises_ = [numpy.random.normal(0, self.additive_noise, size=len(w)) for w in wavs_]
wavs_ = [wavs_[i] + noises_[i] for i in range(len(wavs_))]
# get a random subset of each of the wavs and convert to MFCC.
offsets_ = [random.randint(0, wav.shape[0] - self.seq_len) for wav in wavs_]
mels_ = [self.ap.melspectrogram(wavs_[i][offsets_[i]: offsets_[i] + self.seq_len]) for i in range(len(wavs_))]
feats_ = [torch.FloatTensor(mel) for mel in mels_]
labels.append(labels_) labels.append(labels_)
feats.extend(feats_) feats.extend(feats_)
feats = torch.stack(feats) feats = torch.stack(feats)

View File

@ -2,6 +2,10 @@ import os
from glob import glob from glob import glob
import re import re
import sys import sys
from pathlib import Path
from tqdm import tqdm
from TTS.tts.utils.generic_utils import split_dataset from TTS.tts.utils.generic_utils import split_dataset
@ -14,8 +18,8 @@ def load_meta_data(datasets):
meta_file_train = dataset['meta_file_train'] meta_file_train = dataset['meta_file_train']
meta_file_val = dataset['meta_file_val'] meta_file_val = dataset['meta_file_val']
preprocessor = get_preprocessor_by_name(name) preprocessor = get_preprocessor_by_name(name)
meta_data_train = preprocessor(root_path, meta_file_train) meta_data_train = preprocessor(root_path, meta_file_train)
print(f" | > Found {len(meta_data_train)} files in {Path(root_path).resolve()}")
if meta_file_val is None: if meta_file_val is None:
meta_data_eval, meta_data_train = split_dataset(meta_data_train) meta_data_eval, meta_data_train = split_dataset(meta_data_train)
else: else:
@ -167,8 +171,8 @@ def common_voice(root_path, meta_file):
cols = line.split("\t") cols = line.split("\t")
text = cols[2] text = cols[2]
speaker_name = cols[0] speaker_name = cols[0]
wav_file = os.path.join(root_path, "clips", cols[1] + ".wav") wav_file = os.path.join(root_path, "clips", cols[1].replace(".mp3", ".wav"))
items.append([text, wav_file, speaker_name]) items.append([text, wav_file, 'MCV_' + speaker_name])
return items return items
@ -187,7 +191,7 @@ def libri_tts(root_path, meta_files=None):
cols = line.split('\t') cols = line.split('\t')
wav_file = os.path.join(_root_path, cols[0] + '.wav') wav_file = os.path.join(_root_path, cols[0] + '.wav')
text = cols[1] text = cols[1]
items.append([text, wav_file, speaker_name]) items.append([text, wav_file, 'LTTS_' + speaker_name])
for item in items: for item in items:
assert os.path.exists( assert os.path.exists(
item[1]), f" [!] wav files don't exist - {item[1]}" item[1]), f" [!] wav files don't exist - {item[1]}"
@ -235,8 +239,7 @@ def vctk(root_path, meta_files=None, wavs_path='wav48'):
"""homepages.inf.ed.ac.uk/jyamagis/release/VCTK-Corpus.tar.gz""" """homepages.inf.ed.ac.uk/jyamagis/release/VCTK-Corpus.tar.gz"""
test_speakers = meta_files test_speakers = meta_files
items = [] items = []
meta_files = glob(f"{os.path.join(root_path,'txt')}/**/*.txt", meta_files = glob(f"{os.path.join(root_path,'txt')}/**/*.txt", recursive=True)
recursive=True)
for meta_file in meta_files: for meta_file in meta_files:
_, speaker_id, txt_file = os.path.relpath(meta_file, _, speaker_id, txt_file = os.path.relpath(meta_file,
root_path).split(os.sep) root_path).split(os.sep)
@ -249,6 +252,70 @@ def vctk(root_path, meta_files=None, wavs_path='wav48'):
text = file_text.readlines()[0] text = file_text.readlines()[0]
wav_file = os.path.join(root_path, wavs_path, speaker_id, wav_file = os.path.join(root_path, wavs_path, speaker_id,
file_id + '.wav') file_id + '.wav')
items.append([text, wav_file, speaker_id]) items.append([text, wav_file, 'VCTK_' + speaker_id])
return items return items
def vctk_slim(root_path, meta_files=None, wavs_path='wav48'):
"""homepages.inf.ed.ac.uk/jyamagis/release/VCTK-Corpus.tar.gz"""
items = []
txt_files = glob(f"{os.path.join(root_path,'txt')}/**/*.txt", recursive=True)
for text_file in txt_files:
_, speaker_id, txt_file = os.path.relpath(text_file,
root_path).split(os.sep)
file_id = txt_file.split('.')[0]
if isinstance(meta_files, list): # if is list ignore this speakers ids
if speaker_id in meta_files:
continue
wav_file = os.path.join(root_path, wavs_path, speaker_id,
file_id + '.wav')
items.append([None, wav_file, 'VCTK_' + speaker_id])
return items
# ======================================== VOX CELEB ===========================================
def voxceleb2(root_path, meta_file=None):
"""
:param meta_file Used only for consistency with load_meta_data api
"""
return _voxcel_x(root_path, meta_file, voxcel_idx="2")
def voxceleb1(root_path, meta_file=None):
"""
:param meta_file Used only for consistency with load_meta_data api
"""
return _voxcel_x(root_path, meta_file, voxcel_idx="1")
def _voxcel_x(root_path, meta_file, voxcel_idx):
assert voxcel_idx in ["1", "2"]
expected_count = 148_000 if voxcel_idx == "1" else 1_000_000
voxceleb_path = Path(root_path)
cache_to = voxceleb_path / f"metafile_voxceleb{voxcel_idx}.csv"
cache_to.parent.mkdir(exist_ok=True)
# if not exists meta file, crawl recursively for 'wav' files
if meta_file is not None:
with open(str(meta_file), 'r') as f:
return [x.strip().split('|') for x in f.readlines()]
elif not cache_to.exists():
cnt = 0
meta_data = ""
wav_files = voxceleb_path.rglob("**/*.wav")
for path in tqdm(wav_files, desc=f"Building VoxCeleb {voxcel_idx} Meta file ... this needs to be done only once.",
total=expected_count):
speaker_id = str(Path(path).parent.parent.stem)
assert speaker_id.startswith('id')
text = None # VoxCel does not provide transciptions, and they are not needed for training the SE
meta_data += f"{text}|{path}|voxcel{voxcel_idx}_{speaker_id}\n"
cnt += 1
with open(str(cache_to), 'w') as f:
f.write(meta_data)
if cnt < expected_count:
raise ValueError(f"Found too few instances for Voxceleb. Should be around {expected_count}, is: {cnt}")
with open(str(cache_to), 'r') as f:
return [x.strip().split('|') for x in f.readlines()]

View File

@ -7,11 +7,9 @@ from TTS.utils.generic_utils import check_argument
def split_dataset(items): def split_dataset(items):
is_multi_speaker = False
speakers = [item[-1] for item in items] speakers = [item[-1] for item in items]
is_multi_speaker = len(set(speakers)) > 1 is_multi_speaker = len(set(speakers)) > 1
eval_split_size = 500 if len(items) * 0.01 > 500 else int( eval_split_size = min(500, int(len(items) * 0.01))
len(items) * 0.01)
assert eval_split_size > 0, " [!] You do not have enough samples to train. You need at least 100 samples." assert eval_split_size > 0, " [!] You do not have enough samples to train. You need at least 100 samples."
np.random.seed(0) np.random.seed(0)
np.random.shuffle(items) np.random.shuffle(items)
@ -142,6 +140,11 @@ def check_config(c):
check_argument('do_trim_silence', c['audio'], restricted=True, val_type=bool) check_argument('do_trim_silence', c['audio'], restricted=True, val_type=bool)
check_argument('trim_db', c['audio'], restricted=True, val_type=int) check_argument('trim_db', c['audio'], restricted=True, val_type=int)
# storage parameters
check_argument('sample_from_storage_p', c['storage'], restricted=True, val_type=float, min_val=0.0, max_val=1.0)
check_argument('storage_size', c['storage'], restricted=True, val_type=int, min_val=1, max_val=100)
check_argument('additive_noise', c['storage'], restricted=True, val_type=float, min_val=0.0, max_val=1.0)
# training parameters # training parameters
check_argument('batch_size', c, restricted=True, val_type=int, min_val=1) check_argument('batch_size', c, restricted=True, val_type=int, min_val=1)
check_argument('eval_batch_size', c, restricted=True, val_type=int, min_val=1) check_argument('eval_batch_size', c, restricted=True, val_type=int, min_val=1)

View File

@ -50,7 +50,7 @@ def save_best_model(target_loss, best_loss, model, optimizer, current_step, epoc
if target_loss < best_loss: if target_loss < best_loss:
file_name = 'best_model.pth.tar' file_name = 'best_model.pth.tar'
checkpoint_path = os.path.join(output_folder, file_name) checkpoint_path = os.path.join(output_folder, file_name)
print(" > BEST MODEL : {}".format(checkpoint_path)) print(" >> BEST MODEL : {}".format(checkpoint_path))
save_model(model, optimizer, current_step, epoch, r, checkpoint_path, model_loss=target_loss, **kwargs) save_model(model, optimizer, current_step, epoch, r, checkpoint_path, model_loss=target_loss, **kwargs)
best_loss = target_loss best_loss = target_loss
return best_loss return best_loss

View File

@ -20,5 +20,4 @@ soundfile
nose==1.3.7 nose==1.3.7
cardboardlint==1.3.0 cardboardlint==1.3.0
pylint==2.5.3 pylint==2.5.3
fuzzywuzzy
gdown gdown

View File

@ -1,10 +1,6 @@
client_id path sentence up_votes down_votes age gender accent client_id path sentence up_votes down_votes age gender accent locale segment
aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 21fce545b24d9a5af0403b949e95e8dd3c10c4ff3e371f14e4d5b4ebf588670b7c9e618285fc872d94a89ed7f0217d9019fe5de33f1577b49dcd518eacf63c4b Man sollte den Länderfinanzausgleich durch einen Bundesliga-Soli ersetzen. 2 0 fourties male germany 95324d489b122a800b840e0b0d068f7363a1a6c2cd2e7365672cc7033e38deaa794bd59edcf8196aa35c9791652b9085ac3839a98bb50ebab4a1e8538a94846b common_voice_en_20005954.mp3 The applicants are invited for coffee and visa is given immediately. 3 0 en
aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 42758baa4e91ef6b82b78b11a04bc5117a035a8d3bc42c33c0bb3084909af17043a194cfd8cd9839f0d6ef1ea5413acda5de5d1936abcc8ca073e2da7f9488ea Folgende Lektüre kann ich Ihnen zum Thema Kognitionspsychologie empfehlen. 2 0 fourties male germany 95324d489b122a800b840e0b0d068f7363a1a6c2cd2e7365672cc7033e38deaa794bd59edcf8196aa35c9791652b9085ac3839a98bb50ebab4a1e8538a94846b common_voice_en_20005955.mp3 Developmental robotics is related to, but differs from, evolutionary robotics. 2 0 en
aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 478f172c2dbda6675247e9674ade79a5b49efeefb7c9e99040dcc69a847a01d69398cf180570859b0cdb6fc887717e04cd8b149c723d48d00b5d18f41314667c Touristen winkten den Leuten am Ufer zu. 2 0 fourties male germany 95324d489b122a800b840e0b0d068f7363a1a6c2cd2e7365672cc7033e38deaa794bd59edcf8196aa35c9791652b9085ac3839a98bb50ebab4a1e8538a94846b common_voice_en_20005956.mp3 The musical was originally directed and choreographed by Alan Lund. 2 0 en
aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 4854368d6d21cb44103e432b5332f31e8d14030582a40850501bcf9377d699314a5ff27a8206fa89254ddde7f3f1c65d33836f3dfcfa16bcabec08537f2b5f08 Valentin hat das Handtuch geworfen. 2 0 fourties male germany 954a4181ae9fba89d1b1570f2ae148b3ee18ee2311de978e698f598db859f830d93d35574596d713518e8c96cdae01fce7a08c60c2e0a22bcf01e020924440a6 common_voice_en_19737073.mp3 He graduated from Columbia High School, in Brown County, South Dakota. 2 0 en
aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 a841a9f3e032495dd47560e65fba99eeacb3618c07de8b1351c20188e5b71e33cc52f73315f721a3a24b65763c65bb52fbf3ae052eb5774e834dcb57f296db5c Ohne Gehörschutz bei der Arbeit wäre Klaus wohl nach zwei Wochen taub. 2 0 fourties male germany 954a4181ae9fba89d1b1570f2ae148b3ee18ee2311de978e698f598db859f830d93d35574596d713518e8c96cdae01fce7a08c60c2e0a22bcf01e020924440a6 common_voice_en_19737074.mp3 Competition for limited resources has also resulted in some local conflicts. 2 0 en
aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 03ab970a5bf5410bc3260b073cce1c7f49c688ace83dc8836b1c0f79a09fea45a27725c769f4a9d2e6181defd016d22642789d7ac51da252b42958a9192bd4c7 Gerrit erinnerte sich daran, dass er einst einen Eid geschworen hatte. 2 0 fourties male germany
aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 c4a94df443ad5f2c7241413ef7145d5f0de41ae929759073917fe96166da3c7d3a612c920ed7b0f3d5950a38d6205e9dba24af5bfb27e390a220d004e6e26744 Auf das, was jetzt kommt, habe ich nämlich absolut keinen Bock. 2 0 fourties male germany
aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 104695983b1112229b4a48696405d044dad9ddef713aa6eb1a6240cc16b7b7a2a96354ae9da99783850dde08a982091e48d3037288a3a58269cac9fe70a6bd7a Von Salzburg ist es doch nicht weit bis zum Chiemsee. 2 0 fourties male germany
d5b5da343bb0f65e3580bc2e1902a4f5d004241488d751503f2020bc1c93f89715e355e35f6e25def2b90cb3eea99fda403eb92ae3afbb84d039a54a4ed2d875 ad2f69e053b0e20e01c82b9821fe5787f1cc8e4b0b97f0e4cab1e9a652c577169c8244fb222281a60ee3081854014113e04c4ca43643100b7c01dab0fac11974 Warum werden da keine strafrechtlichen Konsequenzen gezogen? 2 0 thirties male germany

1 client_id path sentence up_votes down_votes age gender accent locale segment
2 aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 95324d489b122a800b840e0b0d068f7363a1a6c2cd2e7365672cc7033e38deaa794bd59edcf8196aa35c9791652b9085ac3839a98bb50ebab4a1e8538a94846b 21fce545b24d9a5af0403b949e95e8dd3c10c4ff3e371f14e4d5b4ebf588670b7c9e618285fc872d94a89ed7f0217d9019fe5de33f1577b49dcd518eacf63c4b common_voice_en_20005954.mp3 Man sollte den Länderfinanzausgleich durch einen Bundesliga-Soli ersetzen. The applicants are invited for coffee and visa is given immediately. 2 3 0 fourties male germany en
3 aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 95324d489b122a800b840e0b0d068f7363a1a6c2cd2e7365672cc7033e38deaa794bd59edcf8196aa35c9791652b9085ac3839a98bb50ebab4a1e8538a94846b 42758baa4e91ef6b82b78b11a04bc5117a035a8d3bc42c33c0bb3084909af17043a194cfd8cd9839f0d6ef1ea5413acda5de5d1936abcc8ca073e2da7f9488ea common_voice_en_20005955.mp3 Folgende Lektüre kann ich Ihnen zum Thema Kognitionspsychologie empfehlen. Developmental robotics is related to, but differs from, evolutionary robotics. 2 0 fourties male germany en
4 aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 95324d489b122a800b840e0b0d068f7363a1a6c2cd2e7365672cc7033e38deaa794bd59edcf8196aa35c9791652b9085ac3839a98bb50ebab4a1e8538a94846b 478f172c2dbda6675247e9674ade79a5b49efeefb7c9e99040dcc69a847a01d69398cf180570859b0cdb6fc887717e04cd8b149c723d48d00b5d18f41314667c common_voice_en_20005956.mp3 Touristen winkten den Leuten am Ufer zu. The musical was originally directed and choreographed by Alan Lund. 2 0 fourties male germany en
5 aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 954a4181ae9fba89d1b1570f2ae148b3ee18ee2311de978e698f598db859f830d93d35574596d713518e8c96cdae01fce7a08c60c2e0a22bcf01e020924440a6 4854368d6d21cb44103e432b5332f31e8d14030582a40850501bcf9377d699314a5ff27a8206fa89254ddde7f3f1c65d33836f3dfcfa16bcabec08537f2b5f08 common_voice_en_19737073.mp3 Valentin hat das Handtuch geworfen. He graduated from Columbia High School, in Brown County, South Dakota. 2 0 fourties male germany en
6 aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 954a4181ae9fba89d1b1570f2ae148b3ee18ee2311de978e698f598db859f830d93d35574596d713518e8c96cdae01fce7a08c60c2e0a22bcf01e020924440a6 a841a9f3e032495dd47560e65fba99eeacb3618c07de8b1351c20188e5b71e33cc52f73315f721a3a24b65763c65bb52fbf3ae052eb5774e834dcb57f296db5c common_voice_en_19737074.mp3 Ohne Gehörschutz bei der Arbeit wäre Klaus wohl nach zwei Wochen taub. Competition for limited resources has also resulted in some local conflicts. 2 0 fourties male germany en
aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 03ab970a5bf5410bc3260b073cce1c7f49c688ace83dc8836b1c0f79a09fea45a27725c769f4a9d2e6181defd016d22642789d7ac51da252b42958a9192bd4c7 Gerrit erinnerte sich daran, dass er einst einen Eid geschworen hatte. 2 0 fourties male germany
aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 c4a94df443ad5f2c7241413ef7145d5f0de41ae929759073917fe96166da3c7d3a612c920ed7b0f3d5950a38d6205e9dba24af5bfb27e390a220d004e6e26744 Auf das, was jetzt kommt, habe ich nämlich absolut keinen Bock. 2 0 fourties male germany
aa7af576605fee2c78c26b85497c64cb9c9fd97228071f8666d9f49f15bce01899bbb930fa60b76d212091d779d83b92e0b54c73cbb21d2c7e1eedc817e41cb3 104695983b1112229b4a48696405d044dad9ddef713aa6eb1a6240cc16b7b7a2a96354ae9da99783850dde08a982091e48d3037288a3a58269cac9fe70a6bd7a Von Salzburg ist es doch nicht weit bis zum Chiemsee. 2 0 fourties male germany
d5b5da343bb0f65e3580bc2e1902a4f5d004241488d751503f2020bc1c93f89715e355e35f6e25def2b90cb3eea99fda403eb92ae3afbb84d039a54a4ed2d875 ad2f69e053b0e20e01c82b9821fe5787f1cc8e4b0b97f0e4cab1e9a652c577169c8244fb222281a60ee3081854014113e04c4ca43643100b7c01dab0fac11974 Warum werden da keine strafrechtlichen Konsequenzen gezogen? 2 0 thirties male germany

View File

@ -11,18 +11,8 @@ class TestPreprocessors(unittest.TestCase):
root_path = get_tests_input_path() root_path = get_tests_input_path()
meta_file = "common_voice.tsv" meta_file = "common_voice.tsv"
items = common_voice(root_path, meta_file) items = common_voice(root_path, meta_file)
assert items[0][0] == "Man sollte den Länderfinanzausgleich durch " \ assert items[0][0] == 'The applicants are invited for coffee and visa is given immediately.'
"einen Bundesliga-Soli ersetzen." assert items[0][1] == os.path.join(get_tests_input_path(), "clips", "common_voice_en_20005954.wav")
assert items[0][1] == os.path.join(get_tests_input_path(), "clips",
"21fce545b24d9a5af0403b949e95e8dd3"
"c10c4ff3e371f14e4d5b4ebf588670b7c"
"9e618285fc872d94a89ed7f0217d9019f"
"e5de33f1577b49dcd518eacf63c4b.wav")
assert items[-1][0] == "Warum werden da keine strafrechtlichen " \ assert items[-1][0] == "Competition for limited resources has also resulted in some local conflicts."
"Konsequenzen gezogen?" assert items[-1][1] == os.path.join(get_tests_input_path(), "clips", "common_voice_en_19737074.wav")
assert items[-1][1] == os.path.join(get_tests_input_path(), "clips",
"ad2f69e053b0e20e01c82b9821fe5787f1"
"cc8e4b0b97f0e4cab1e9a652c577169c82"
"44fb222281a60ee3081854014113e04c4c"
"a43643100b7c01dab0fac11974.wav")