diff --git a/datasets/preprocess.py b/datasets/preprocess.py index f32c132b..23599836 100644 --- a/datasets/preprocess.py +++ b/datasets/preprocess.py @@ -144,3 +144,24 @@ def common_voice(root_path, meta_file): wav_file = os.path.join(root_path, "clips", cols[1] + ".wav") items.append([text, wav_file, speaker_name]) return items + + +def libri_tts(root_path, meta_files=None): + """https://ai.google/tools/datasets/libri-tts/""" + items = [] + if meta_files is None: + meta_files = glob(f"{root_path}/**/*trans.tsv", recursive=True) + for meta_file in meta_files: + _meta_file = os.path.basename(meta_file).split('.')[0] + speaker_name = _meta_file.split('_')[0] + chapter_id = _meta_file.split('_')[1] + _root_path = os.path.join(root_path, f"{speaker_name}/{chapter_id}") + with open(meta_file, 'r') as ttf: + for line in ttf: + cols = line.split('\t') + wav_file = os.path.join(_root_path, cols[0] + '.wav') + text = cols[1] + items.append([text, wav_file, speaker_name]) + for item in items: + assert os.path.exists(item[1]), f" [!] wav file is not exist - {item[1]}" + return items \ No newline at end of file diff --git a/utils/audio.py b/utils/audio.py index e985dbf2..4c797280 100644 --- a/utils/audio.py +++ b/utils/audio.py @@ -236,11 +236,11 @@ class AudioProcessor(object): else: x, sr = librosa.load(filename, sr=sr) if self.do_trim_silence: - x = self.trim_silence(x) - assert self.sample_rate == sr, "Expected sampling rate {} but file " \ - "{} has {}.".format(self.sample_rate, - filename, - sr) + try: + x = self.trim_silence(x) + except ValueError as e: + print(f' [!] File cannot be trimmed for silence - {filename}') + assert self.sample_rate == sr, "%s vs %s"%(self.sample_rate, sr) return x def encode_16bits(self, x):