Add vctk_wav formatter: it is the same as vctk but uses wav extension instead of flac

This commit is contained in:
Jindrich Matousek 2023-08-23 11:52:14 +01:00
parent 4085a229fe
commit 37807fef8b
1 changed files with 48 additions and 0 deletions

View File

@ -403,6 +403,54 @@ def vctk(root_path, meta_files=None, wavs_path="wav48_silence_trimmed", mic="mic
return items
# JMa: VCTK with wav files (not flac)
def vctk_wav(root_path, meta_files=None, wavs_path="wav48_silence_trimmed", mic="mic1", ignored_speakers=None):
"""VCTK dataset v0.92.
URL:
https://datashare.ed.ac.uk/bitstream/handle/10283/3443/VCTK-Corpus-0.92.zip
This dataset has 2 recordings per speaker that are annotated with ```mic1``` and ```mic2```.
It is believed that (😄 ) ```mic1``` files are the same as the previous version of the dataset.
mic1:
Audio recorded using an omni-directional microphone (DPA 4035).
Contains very low frequency noises.
This is the same audio released in previous versions of VCTK:
https://doi.org/10.7488/ds/1994
mic2:
Audio recorded using a small diaphragm condenser microphone with
very wide bandwidth (Sennheiser MKH 800).
Two speakers, p280 and p315 had technical issues of the audio
recordings using MKH 800.
"""
file_ext = "wav"
items = []
meta_files = glob(f"{os.path.join(root_path,'txt')}/**/*.txt", recursive=True)
for meta_file in meta_files:
_, speaker_id, txt_file = os.path.relpath(meta_file, root_path).split(os.sep)
file_id = txt_file.split(".")[0]
# ignore speakers
if isinstance(ignored_speakers, list):
if speaker_id in ignored_speakers:
continue
with open(meta_file, "r", encoding="utf-8") as file_text:
text = file_text.readlines()[0]
# p280 has no mic2 recordings
if speaker_id == "p280":
wav_file = os.path.join(root_path, wavs_path, speaker_id, file_id + f"_mic1.{file_ext}")
else:
wav_file = os.path.join(root_path, wavs_path, speaker_id, file_id + f"_{mic}.{file_ext}")
if os.path.exists(wav_file):
items.append(
{"text": text, "audio_file": wav_file, "speaker_name": "VCTK_" + speaker_id, "root_path": root_path}
)
else:
print(f" [!] wav files don't exist - {wav_file}")
return items
def vctk_old(root_path, meta_files=None, wavs_path="wav48", ignored_speakers=None):
"""homepages.inf.ed.ac.uk/jyamagis/release/VCTK-Corpus.tar.gz"""
items = []