Merge pull request #573 from houqp/qp_voxceleb

speed up metafile build for voxceleb
2020-11-17 13:47:08 +01:00 · 2020-11-17 13:47:08 +01:00 · 4f2201c143
parent a2a142dc39 b0b97d636f
commit 4f2201c143
2 changed files with 4 additions and 5 deletions
--- a/TTS/bin/train_encoder.py
+++ b/TTS/bin/train_encoder.py
@ -35,7 +35,7 @@ print(" > Using CUDA: ", use_cuda)
 print(" > Number of GPUs: ", num_gpus)


-def setup_loader(ap, is_val=False, verbose=False):
+def setup_loader(ap: AudioProcessor, is_val: bool=False, verbose: bool=False):
    if is_val:
        loader = None
    else:
--- a/TTS/tts/datasets/preprocess.py
+++ b/TTS/tts/datasets/preprocess.py
@ -228,7 +228,6 @@ def brspeech(root_path, meta_file):
            if line.startswith("wav_filename"):
                continue
            cols = line.split('|')
-            #print(cols)
            wav_file = os.path.join(root_path, cols[0])
            text = cols[2]
            speaker_name = cols[3]
@ -304,17 +303,17 @@ def _voxcel_x(root_path, meta_file, voxcel_idx):

    elif not cache_to.exists():
        cnt = 0
-        meta_data = ""
+        meta_data = []
        wav_files = voxceleb_path.rglob("**/*.wav")
        for path in tqdm(wav_files, desc=f"Building VoxCeleb {voxcel_idx} Meta file ... this needs to be done only once.",
                         total=expected_count):
            speaker_id = str(Path(path).parent.parent.stem)
            assert speaker_id.startswith('id')
            text = None  # VoxCel does not provide transciptions, and they are not needed for training the SE
-            meta_data += f"{text}|{path}|voxcel{voxcel_idx}_{speaker_id}\n"
+            meta_data.append(f"{text}|{path}|voxcel{voxcel_idx}_{speaker_id}\n")
            cnt += 1
        with open(str(cache_to), 'w') as f:
-            f.write(meta_data)
+            f.write("".join(meta_data))
        if cnt < expected_count:
            raise ValueError(f"Found too few instances for Voxceleb. Should be around {expected_count}, is: {cnt}")