diff --git a/TTS/tts/utils/measures.py b/TTS/tts/utils/measures.py index fdd31242..90e862e1 100644 --- a/TTS/tts/utils/measures.py +++ b/TTS/tts/utils/measures.py @@ -7,7 +7,7 @@ def alignment_diagonal_score(alignments, binary=False): binary (bool): if True, ignore scores and consider attention as a binary mask. Shape: - alignments : batch x decoder_steps x encoder_steps + - alignments : :math:`[B, T_de, T_en]` """ maxs = alignments.max(dim=1)[0] if binary: diff --git a/TTS/utils/audio.py b/TTS/utils/audio.py index 3d45b325..96b9a1a1 100644 --- a/TTS/utils/audio.py +++ b/TTS/utils/audio.py @@ -693,7 +693,6 @@ class AudioProcessor(object): # # plt.figure() # plt.imshow(spec, aspect="auto", origin="lower") # plt.savefig('save_img2.png') - # breakpoint() return f0 ### Audio Processing ### diff --git a/recipes/ljspeech/fast_pitch/train_fast_pitch.py b/recipes/ljspeech/fast_pitch/train_fast_pitch.py index 91fe4bd2..5bc5f448 100644 --- a/recipes/ljspeech/fast_pitch/train_fast_pitch.py +++ b/recipes/ljspeech/fast_pitch/train_fast_pitch.py @@ -8,7 +8,12 @@ from TTS.utils.manage import ModelManager output_path = os.path.dirname(os.path.abspath(__file__)) # init configs -dataset_config = BaseDatasetConfig(name="ljspeech", meta_file_train="metadata.csv", meta_file_attn_mask=os.path.join(output_path, "../LJSpeech-1.1/metadata_attn_mask.txt"), path=os.path.join(output_path, "../LJSpeech-1.1/")) +dataset_config = BaseDatasetConfig( + name="ljspeech", + meta_file_train="metadata.csv", + meta_file_attn_mask=os.path.join(output_path, "../LJSpeech-1.1/metadata_attn_mask.txt"), + path=os.path.join(output_path, "../LJSpeech-1.1/"), +) audio_config = BaseAudioConfig( sample_rate=22050, do_trim_silence=False,