diff --git a/mozilla_voice_tts/tts/datasets/TTSDataset.py b/mozilla_voice_tts/tts/datasets/TTSDataset.py index 1002a292..2ef78e11 100644 --- a/mozilla_voice_tts/tts/datasets/TTSDataset.py +++ b/mozilla_voice_tts/tts/datasets/TTSDataset.py @@ -199,7 +199,7 @@ class MyDataset(Dataset): for idx in ids_sorted_decreasing] # get speaker embeddings if self.speaker_mapping is not None: - wav_files_names = [batch[idx]['wav_file_name'] for idx in ids_sorted_decreasing] + wav_files_names = [batch[idx]['wav_file_name'] for idx in ids_sorted_decreasing] speaker_embedding = [self.speaker_mapping[w]['embedding'] for w in wav_files_names] else: speaker_embedding = None diff --git a/mozilla_voice_tts/tts/datasets/preprocess.py b/mozilla_voice_tts/tts/datasets/preprocess.py index 2ad414fb..317673e3 100644 --- a/mozilla_voice_tts/tts/datasets/preprocess.py +++ b/mozilla_voice_tts/tts/datasets/preprocess.py @@ -229,14 +229,14 @@ def vctk(root_path, meta_files=None, wavs_path='wav48'): items = [] meta_files = glob(f"{os.path.join(root_path,'txt')}/**/*.txt", recursive=True) for meta_file in meta_files: - txt, speaker_id, txt_file = os.path.relpath(meta_file,root_path).split(os.sep) + _, speaker_id, txt_file = os.path.relpath(meta_file, root_path).split(os.sep) file_id = txt_file.split('.')[0] if isinstance(test_speakers, list): # if is list ignore this speakers ids if speaker_id in test_speakers: continue with open(meta_file) as file_text: text = file_text.readlines()[0] - wav_file = os.path.join(root_path, wavs_path, speaker_id,file_id+'.wav') + wav_file = os.path.join(root_path, wavs_path, speaker_id, file_id+'.wav') items.append([text, wav_file, speaker_id]) return items \ No newline at end of file diff --git a/mozilla_voice_tts/tts/models/tacotron.py b/mozilla_voice_tts/tts/models/tacotron.py index 9dfdbf63..f6bd07ed 100644 --- a/mozilla_voice_tts/tts/models/tacotron.py +++ b/mozilla_voice_tts/tts/models/tacotron.py @@ -55,7 +55,7 @@ class Tacotron(TacotronAbstract): self.embeddings_per_sample = True # speaker and gst embeddings is concat in decoder input - if num_speakers > 1: + if num_speakers > 1: decoder_in_features = decoder_in_features + speaker_embedding_dim # add speaker embedding dim if self.gst: decoder_in_features = decoder_in_features + gst_embedding_dim # add gst embedding dim diff --git a/mozilla_voice_tts/tts/utils/text/cleaners.py b/mozilla_voice_tts/tts/utils/text/cleaners.py index b1930834..a36ebe67 100644 --- a/mozilla_voice_tts/tts/utils/text/cleaners.py +++ b/mozilla_voice_tts/tts/utils/text/cleaners.py @@ -119,7 +119,7 @@ def english_cleaners(text): return text def portuguese_cleaners(text): - '''Basic pipeline for Portuguese text. There is no need to expand abbreviation and + '''Basic pipeline for Portuguese text. There is no need to expand abbreviation and numbers, phonemizer already does that''' text = lowercase(text) text = replace_symbols(text, lang='pt') diff --git a/mozilla_voice_tts/utils/generic_utils.py b/mozilla_voice_tts/utils/generic_utils.py index add5120d..dcfbbdc3 100644 --- a/mozilla_voice_tts/utils/generic_utils.py +++ b/mozilla_voice_tts/utils/generic_utils.py @@ -147,7 +147,6 @@ def check_argument(name, c, enum_list=None, max_val=None, min_val=None, restrict if enum_list: assert c[name].lower() in enum_list, f' [!] {name} is not a valid value' if isinstance(val_type, list): - valid_types = val_type is_valid = False for typ in val_type: if isinstance(c[name], typ):