From e995a63bd6f44d7fc1222aeaf15d227e134deac6 Mon Sep 17 00:00:00 2001 From: WeberJulian Date: Tue, 2 Nov 2021 19:10:18 +0100 Subject: [PATCH] fix linter --- TTS/bin/find_unique_phonemes.py | 1 - TTS/bin/remove_silence_using_vad.py | 27 ++++++++++----------------- TTS/tts/datasets/formatters.py | 2 +- TTS/tts/models/vits.py | 3 +-- TTS/tts/utils/speakers.py | 1 + notebooks/dataset_analysis/analyze.py | 2 +- 6 files changed, 14 insertions(+), 22 deletions(-) diff --git a/TTS/bin/find_unique_phonemes.py b/TTS/bin/find_unique_phonemes.py index bbc88fb6..ffad6891 100644 --- a/TTS/bin/find_unique_phonemes.py +++ b/TTS/bin/find_unique_phonemes.py @@ -3,7 +3,6 @@ import argparse import multiprocessing from argparse import RawTextHelpFormatter -import numpy from tqdm.contrib.concurrent import process_map from TTS.config import load_config diff --git a/TTS/bin/remove_silence_using_vad.py b/TTS/bin/remove_silence_using_vad.py index 25ae26ef..8951662b 100755 --- a/TTS/bin/remove_silence_using_vad.py +++ b/TTS/bin/remove_silence_using_vad.py @@ -6,12 +6,7 @@ import glob import multiprocessing import os import pathlib -import sys import wave -from itertools import chain - -import numpy as np -import tqdm import webrtcvad from tqdm.contrib.concurrent import process_map @@ -47,8 +42,8 @@ def write_wave(path, audio, sample_rate): class Frame(object): """Represents a "frame" of audio data.""" - def __init__(self, bytes, timestamp, duration): - self.bytes = bytes + def __init__(self, _bytes, timestamp, duration): + self.bytes =_bytes self.timestamp = timestamp self.duration = duration @@ -121,7 +116,7 @@ def vad_collector(sample_rate, frame_duration_ms, padding_duration_ms, vad, fram # We want to yield all the audio we see from now until # we are NOTTRIGGERED, but we have to start with the # audio that's already in the ring buffer. - for f, s in ring_buffer: + for f, _ in ring_buffer: voiced_frames.append(f) ring_buffer.clear() else: @@ -146,11 +141,10 @@ def vad_collector(sample_rate, frame_duration_ms, padding_duration_ms, vad, fram def remove_silence(filepath): - filename = os.path.basename(filepath) output_path = filepath.replace(os.path.join(args.input_dir, ""), os.path.join(args.output_dir, "")) # ignore if the file exists if os.path.exists(output_path) and not args.force: - return False + return # create all directory structure pathlib.Path(output_path).parent.mkdir(parents=True, exist_ok=True) padding_duration_ms = 300 # default 300 @@ -166,7 +160,7 @@ def remove_silence(filepath): if num_segments != 0: for i, segment in reversed(list(enumerate(segments))): if i >= 1: - if flag == False: + if not flag: concat_segment = segment flag = True else: @@ -176,11 +170,12 @@ def remove_silence(filepath): segment = segment + concat_segment write_wave(output_path, segment, sample_rate) print(output_path) - return True + return else: print("> Just Copying the file to:", output_path) # if fail to remove silence just write the file write_wave(output_path, audio, sample_rate) + return def preprocess_audios(): @@ -198,11 +193,9 @@ def preprocess_audios(): if __name__ == "__main__": - """ - usage - python remove_silence.py -i=VCTK-Corpus-bk/ -o=../VCTK-Corpus-removed-silence -g=wav48/*/*.wav -a=2 - """ - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser( + description="python remove_silence.py -i=VCTK-Corpus-bk/ -o=../VCTK-Corpus-removed-silence -g=wav48/*/*.wav -a=2" + ) parser.add_argument("-i", "--input_dir", type=str, default="../VCTK-Corpus", help="Dataset root dir") parser.add_argument( "-o", "--output_dir", type=str, default="../VCTK-Corpus-removed-silence", help="Output Dataset dir" diff --git a/TTS/tts/datasets/formatters.py b/TTS/tts/datasets/formatters.py index 7e65f21a..49a1ced4 100644 --- a/TTS/tts/datasets/formatters.py +++ b/TTS/tts/datasets/formatters.py @@ -59,7 +59,7 @@ def mozilla_de(root_path, meta_file, **kwargs): # pylint: disable=unused-argume return items -def mailabs(root_path, meta_files=None): +def mailabs(root_path, meta_files=None, ununsed_speakers=None): """Normalizes M-AI-Labs meta data files to TTS format Args: diff --git a/TTS/tts/models/vits.py b/TTS/tts/models/vits.py index c185150b..94d5bfc9 100644 --- a/TTS/tts/models/vits.py +++ b/TTS/tts/models/vits.py @@ -1,5 +1,4 @@ import math -import random from dataclasses import dataclass, field from itertools import chain from typing import Dict, List, Tuple @@ -747,7 +746,7 @@ class Vits(BaseTTS): # inverse decoder and get the output z_f_pred = self.flow(z_ft, y_mask, g=g, reverse=True) - z_slice, slice_ids = rand_segment(z_f_pred, y_lengths, self.spec_segment_size) + z_slice, slice_ids = rand_segments(z_f_pred, y_lengths, self.spec_segment_size) o = self.waveform_decoder(z_slice, g=g) diff --git a/TTS/tts/utils/speakers.py b/TTS/tts/utils/speakers.py index d6381a70..8c248658 100644 --- a/TTS/tts/utils/speakers.py +++ b/TTS/tts/utils/speakers.py @@ -7,6 +7,7 @@ import fsspec import numpy as np import torch from coqpit import Coqpit +from torch.utils.data.sampler import WeightedRandomSampler from TTS.config import load_config from TTS.speaker_encoder.utils.generic_utils import setup_model diff --git a/notebooks/dataset_analysis/analyze.py b/notebooks/dataset_analysis/analyze.py index 9ba42fb9..4855886e 100644 --- a/notebooks/dataset_analysis/analyze.py +++ b/notebooks/dataset_analysis/analyze.py @@ -180,7 +180,7 @@ def plot_phonemes(train_path, cmu_dict_path, save_path): plt.figure() plt.rcParams["figure.figsize"] = (50, 20) - barplot = sns.barplot(x, y) + barplot = sns.barplot(x=x, y=y) if save_path: fig = barplot.get_figure() fig.savefig(os.path.join(save_path, "phoneme_dist"))