diff --git a/datasets/preprocess.py b/datasets/preprocess.py index a78abab9..64efc665 100644 --- a/datasets/preprocess.py +++ b/datasets/preprocess.py @@ -60,22 +60,6 @@ def tweb(root_path, meta_file): # return {'text': texts, 'wavs': wavs} -def mozilla_old(root_path, meta_file): - """Normalizes Mozilla meta data files to TTS format""" - txt_file = os.path.join(root_path, meta_file) - items = [] - speaker_name = "mozilla_old" - with open(txt_file, 'r') as ttf: - for line in ttf: - cols = line.split('|') - batch_no = int(cols[1].strip().split("_")[0]) - wav_folder = "batch{}".format(batch_no) - wav_file = os.path.join(root_path, wav_folder, "wavs_no_processing", cols[1].strip()) - text = cols[0].strip() - items.append([text, wav_file, speaker_name]) - return items - - def mozilla(root_path, meta_file): """Normalizes Mozilla meta data files to TTS format""" txt_file = os.path.join(root_path, meta_file) @@ -91,6 +75,22 @@ def mozilla(root_path, meta_file): return items +def mozilla_de(root_path, meta_file): + """Normalizes Mozilla meta data files to TTS format""" + txt_file = os.path.join(root_path, meta_file) + items = [] + speaker_name = "mozilla" + with open(txt_file, 'r', encoding="ISO 8859-1") as ttf: + for line in ttf: + cols = line.strip().split('|') + wav_file = cols[0].strip() + text = cols[1].strip() + folder_name = f"BATCH_{wav_file.split('_')[0]}_FINAL" + wav_file = os.path.join(root_path, folder_name, wav_file) + items.append([text, wav_file, speaker_name]) + return items + + def mailabs(root_path, meta_files=None): """Normalizes M-AI-Labs meta data files to TTS format""" speaker_regex = re.compile("by_book/(male|female)/(?P[^/]+)/") diff --git a/layers/tacotron2.py b/layers/tacotron2.py index c195b277..fa76a6b2 100644 --- a/layers/tacotron2.py +++ b/layers/tacotron2.py @@ -64,7 +64,6 @@ class Encoder(nn.Module): def forward(self, x, input_lengths): x = self.convolutions(x) x = x.transpose(1, 2) - input_lengths = input_lengths.cpu().numpy() x = nn.utils.rnn.pack_padded_sequence(x, input_lengths, batch_first=True)