From b0b97d636fdd31b07a18c492a00466ce880c6094 Mon Sep 17 00:00:00 2001 From: Qingping Hou Date: Sat, 14 Nov 2020 23:43:03 -0800 Subject: [PATCH] speed up metafile build for voxceleb --- TTS/bin/train_encoder.py | 2 +- TTS/tts/datasets/preprocess.py | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/TTS/bin/train_encoder.py b/TTS/bin/train_encoder.py index 078f7b84..dba866db 100644 --- a/TTS/bin/train_encoder.py +++ b/TTS/bin/train_encoder.py @@ -35,7 +35,7 @@ print(" > Using CUDA: ", use_cuda) print(" > Number of GPUs: ", num_gpus) -def setup_loader(ap, is_val=False, verbose=False): +def setup_loader(ap: AudioProcessor, is_val: bool=False, verbose: bool=False): if is_val: loader = None else: diff --git a/TTS/tts/datasets/preprocess.py b/TTS/tts/datasets/preprocess.py index 469da07e..31d4b2b5 100644 --- a/TTS/tts/datasets/preprocess.py +++ b/TTS/tts/datasets/preprocess.py @@ -228,7 +228,6 @@ def brspeech(root_path, meta_file): if line.startswith("wav_filename"): continue cols = line.split('|') - #print(cols) wav_file = os.path.join(root_path, cols[0]) text = cols[2] speaker_name = cols[3] @@ -304,17 +303,17 @@ def _voxcel_x(root_path, meta_file, voxcel_idx): elif not cache_to.exists(): cnt = 0 - meta_data = "" + meta_data = [] wav_files = voxceleb_path.rglob("**/*.wav") for path in tqdm(wav_files, desc=f"Building VoxCeleb {voxcel_idx} Meta file ... this needs to be done only once.", total=expected_count): speaker_id = str(Path(path).parent.parent.stem) assert speaker_id.startswith('id') text = None # VoxCel does not provide transciptions, and they are not needed for training the SE - meta_data += f"{text}|{path}|voxcel{voxcel_idx}_{speaker_id}\n" + meta_data.append(f"{text}|{path}|voxcel{voxcel_idx}_{speaker_id}\n") cnt += 1 with open(str(cache_to), 'w') as f: - f.write(meta_data) + f.write("".join(meta_data)) if cnt < expected_count: raise ValueError(f"Found too few instances for Voxceleb. Should be around {expected_count}, is: {cnt}")