fix: linter issues

This commit is contained in:
mueller91 2020-09-21 12:13:02 +02:00
parent 45b3c3d1b0
commit 9b4aac94a8
3 changed files with 24 additions and 24 deletions

View File

@ -59,43 +59,43 @@
[
{
"name": "vctk_slim",
"path": "../../audio-datasets/en/VCTK-Corpus/",
"path": "../../../audio-datasets/en/VCTK-Corpus/",
"meta_file_train": null,
"meta_file_val": null
},
{
"name": "libri_tts",
"path": "../../audio-datasets/en/LibriTTS/train-clean-100",
"path": "../../../audio-datasets/en/LibriTTS/train-clean-100",
"meta_file_train": null,
"meta_file_val": null
},
{
"name": "libri_tts",
"path": "../../audio-datasets/en/LibriTTS/train-clean-360",
"path": "../../../audio-datasets/en/LibriTTS/train-clean-360",
"meta_file_train": null,
"meta_file_val": null
},
{
"name": "libri_tts",
"path": "../../audio-datasets/en/LibriTTS/train-other-500",
"path": "../../../audio-datasets/en/LibriTTS/train-other-500",
"meta_file_train": null,
"meta_file_val": null
},
{
"name": "voxceleb1",
"path": "../../audio-datasets/en/voxceleb1/",
"path": "../../../audio-datasets/en/voxceleb1/",
"meta_file_train": null,
"meta_file_val": null
},
{
"name": "voxceleb2",
"path": "../../audio-datasets/en/voxceleb2/",
"path": "../../../audio-datasets/en/voxceleb2/",
"meta_file_train": null,
"meta_file_val": null
},
{
"name": "common_voice_wav",
"path": "../../audio-datasets/en/MozillaCommonVoice",
"name": "common_voice",
"path": "../../../audio-datasets/en/MozillaCommonVoice",
"meta_file_train": "train.tsv",
"meta_file_val": "test.tsv"
}

View File

@ -64,7 +64,6 @@ class MyDataset(Dataset):
def __parse_items(self):
self.speaker_to_utters = {}
for i in self.items:
text_ = i[0]
path_ = i[1]
speaker_ = i[2]
if speaker_ in self.speaker_to_utters.keys():

View File

@ -160,7 +160,7 @@ def nancy(root_path, meta_file):
return items
def common_voice_wav(root_path, meta_file):
def common_voice(root_path, meta_file):
"""Normalize the common voice meta data file to TTS format."""
txt_file = os.path.join(root_path, meta_file)
items = []
@ -258,16 +258,15 @@ def vctk(root_path, meta_files=None, wavs_path='wav48'):
def vctk_slim(root_path, meta_files=None, wavs_path='wav48'):
test_speakers = meta_files
"""homepages.inf.ed.ac.uk/jyamagis/release/VCTK-Corpus.tar.gz"""
items = []
meta_files = glob(f"{os.path.join(root_path,'txt')}/**/*.txt", recursive=True)
for meta_file in meta_files:
_, speaker_id, txt_file = os.path.relpath(meta_file,
txt_files = glob(f"{os.path.join(root_path,'txt')}/**/*.txt", recursive=True)
for text_file in txt_files:
_, speaker_id, txt_file = os.path.relpath(text_file,
root_path).split(os.sep)
file_id = txt_file.split('.')[0]
if isinstance(test_speakers, list): # if is list ignore this speakers ids
if speaker_id in test_speakers:
if isinstance(meta_files, list): # if is list ignore this speakers ids
if speaker_id in meta_files:
continue
wav_file = os.path.join(root_path, wavs_path, speaker_id,
file_id + '.wav')
@ -276,21 +275,21 @@ def vctk_slim(root_path, meta_files=None, wavs_path='wav48'):
return items
# ======================================== VOX CELEB ===========================================
def voxceleb2(root_path, meta_file):
def voxceleb2(root_path, meta_file=None):
"""
:param meta_file Used only for consistency with load_meta_data api
"""
return _voxcel_x(root_path, voxcel_idx="2")
return _voxcel_x(root_path, meta_file, voxcel_idx="2")
def voxceleb1(root_path, meta_file):
def voxceleb1(root_path, meta_file=None):
"""
:param meta_file Used only for consistency with load_meta_data api
"""
return _voxcel_x(root_path, voxcel_idx="1")
return _voxcel_x(root_path, meta_file, voxcel_idx="1")
def _voxcel_x(root_path, voxcel_idx):
def _voxcel_x(root_path, meta_file, voxcel_idx):
assert voxcel_idx in ["1", "2"]
expected_count = 148_000 if voxcel_idx == "1" else 1_000_000
voxceleb_path = Path(root_path)
@ -298,7 +297,11 @@ def _voxcel_x(root_path, voxcel_idx):
cache_to.parent.mkdir(exist_ok=True)
# if not exists meta file, crawl recursively for 'wav' files
if not cache_to.exists():
if meta_file is not None:
with open(str(meta_file), 'r') as f:
return [x.strip().split('|') for x in f.readlines()]
elif not cache_to.exists():
cnt = 0
meta_data = ""
wav_files = voxceleb_path.rglob("**/*.wav")
@ -316,5 +319,3 @@ def _voxcel_x(root_path, voxcel_idx):
with open(str(cache_to), 'r') as f:
return [x.strip().split('|') for x in f.readlines()]