Make lint

2021-12-07 12:51:58 +00:00 · 2021-12-07 12:51:58 +00:00 · c9972e6f14
parent 30cfafce56
commit c9972e6f14
14 changed files with 319 additions and 236 deletions
--- a/TTS/tts/datasets/init.py
+++ b/TTS/tts/datasets/init.py
@ -111,8 +111,8 @@ def load_tts_samples(
            meta_data_eval_all += meta_data_eval
        meta_data_train_all += meta_data_train
        # load attention masks for the duration predictor training
-        if dataset.meta_file_attn_mask:
+        if d.meta_file_attn_mask:
-            meta_data = dict(load_attention_mask_meta_data(dataset["meta_file_attn_mask"]))
+            meta_data = dict(load_attention_mask_meta_data(d["meta_file_attn_mask"]))
            for idx, ins in enumerate(meta_data_train_all):
                attn_file = meta_data[ins["audio_file"]].strip()
                meta_data_train_all[idx].update({"alignment_file": attn_file})
--- a/TTS/tts/datasets/dataset.py
+++ b/TTS/tts/datasets/dataset.py
@ -1,7 +1,6 @@
 import collections
 import os
 import random
 from multiprocessing import Pool
 from typing import Dict, List, Union
 import numpy as np
@ -10,7 +9,6 @@ import tqdm
 from torch.utils.data import Dataset
 from TTS.tts.utils.data import prepare_data, prepare_stop_target, prepare_tensor
 from TTS.tts.utils.text import TTSTokenizer
 from TTS.utils.audio import AudioProcessor
@ -183,7 +181,7 @@ class TTSDataset(Dataset):
    def get_phonemes(self, idx, text):
        out_dict = self.phoneme_dataset[idx]
        assert text == out_dict["text"], f"{text} != {out_dict['text']}"
-        assert out_dict["token_ids"].size > 0
+        assert len(out_dict["token_ids"]) > 0
        return out_dict
    def get_f0(self, idx):
@ -192,7 +190,8 @@ class TTSDataset(Dataset):
        assert wav_file == out_dict["audio_file"]
        return out_dict
-    def get_attn_maks(self, attn_file):
+    @staticmethod
    def get_attn_mask(attn_file):
        return np.load(attn_file)
    def get_token_ids(self, idx, text):
@ -207,7 +206,7 @@ class TTSDataset(Dataset):
        raw_text = item["text"]
-        wav = np.asarray(self.load_wav(item[]), dtype=np.float32)
+        wav = np.asarray(self.load_wav(item["audio_file"]), dtype=np.float32)
        # apply noise for augmentation
        if self.use_noise_augment:
@ -262,7 +261,7 @@ class TTSDataset(Dataset):
        idxs = np.argsort(lengths)  # ascending order
        ignore_idx = []
        keep_idx = []
-        for i, idx in enumerate(idxs):
+        for idx in idxs:
            length = lengths[idx]
            if length < min_len or length > max_len:
                ignore_idx.append(idx)
@ -277,6 +276,7 @@ class TTSDataset(Dataset):
    @staticmethod
    def create_buckets(samples, batch_group_size: int):
        assert batch_group_size > 0
        for i in range(len(samples) // batch_group_size):
            offset = i * batch_group_size
            end_offset = offset + batch_group_size
@ -319,6 +319,7 @@ class TTSDataset(Dataset):
        # shuffle batch groups
        # create batches with similar length items
        # the larger the `batch_group_size`, the higher the length variety in a batch.
        if self.batch_group_size > 0:
            samples = self.create_buckets(samples, self.batch_group_size)
        # update items to the new sorted items
@ -571,6 +572,7 @@ class PhonemeDataset(Dataset):
        We use pytorch dataloader because we are lazy.
        """
        print("[*] Pre-computing phonemes...")
        with tqdm.tqdm(total=len(self)) as pbar:
            batch_size = num_workers if num_workers > 0 else 1
            dataloder = torch.utils.data.DataLoader(
@ -658,16 +660,21 @@ class F0Dataset:
        return len(self.samples)
    def precompute(self, num_workers=0):
        print("[*] Pre-computing F0s...")
        with tqdm.tqdm(total=len(self)) as pbar:
            batch_size = num_workers if num_workers > 0 else 1
            # we do not normalize at preproessing
            normalize_f0 = self.normalize_f0
            self.normalize_f0 = False
            dataloder = torch.utils.data.DataLoader(
                batch_size=batch_size, dataset=self, shuffle=False, num_workers=num_workers, collate_fn=self.collate_fn
            )
            computed_data = []
            for batch in dataloder:
                f0 = batch["f0"]
-                computed_data.append([f for f in f0])
+                computed_data.append(f for f in f0)
                pbar.update(batch_size)
            self.normalize_f0 = normalize_f0
        if self.normalize_f0:
            computed_data = [tensor for batch in computed_data for tensor in batch]  # flatten
@ -746,80 +753,80 @@ class F0Dataset:
        print(f"{indent}| > Number of instances : {len(self.samples)}")
-if __name__ == "__main__":
+# if __name__ == "__main__":
-    from torch.utils.data import DataLoader
+#     from torch.utils.data import DataLoader
-    from TTS.config.shared_configs import BaseAudioConfig, BaseDatasetConfig
+#     from TTS.config.shared_configs import BaseAudioConfig, BaseDatasetConfig
-    from TTS.tts.datasets import load_tts_samples
+#     from TTS.tts.datasets import load_tts_samples
-    from TTS.tts.utils.text.characters import IPAPhonemes
+#     from TTS.tts.utils.text.characters import IPAPhonemes
-    from TTS.tts.utils.text.phonemizers import ESpeak
+#     from TTS.tts.utils.text.phonemizers import ESpeak
-    dataset_config = BaseDatasetConfig(
+#     dataset_config = BaseDatasetConfig(
-        name="ljspeech",
+#         name="ljspeech",
-        meta_file_train="metadata.csv",
+#         meta_file_train="metadata.csv",
-        path="/Users/erengolge/Projects/TTS/recipes/ljspeech/LJSpeech-1.1",
+#         path="/Users/erengolge/Projects/TTS/recipes/ljspeech/LJSpeech-1.1",
-    )
+#     )
-    train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True)
+#     train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True)
-    samples = train_samples + eval_samples
+#     samples = train_samples + eval_samples
-    phonemizer = ESpeak(language="en-us")
+#     phonemizer = ESpeak(language="en-us")
-    tokenizer = TTSTokenizer(use_phonemes=True, characters=IPAPhonemes(), phonemizer=phonemizer)
+#     tokenizer = TTSTokenizer(use_phonemes=True, characters=IPAPhonemes(), phonemizer=phonemizer)
-    # ph_dataset = PhonemeDataset(samples, tokenizer, phoneme_cache_path="/Users/erengolge/Projects/TTS/phonemes_tests")
+#     # ph_dataset = PhonemeDataset(samples, tokenizer, phoneme_cache_path="/Users/erengolge/Projects/TTS/phonemes_tests")
-    # ph_dataset.precompute(num_workers=4)
+#     # ph_dataset.precompute(num_workers=4)
-    # dataloader = DataLoader(ph_dataset, batch_size=4, shuffle=False, num_workers=4, collate_fn=ph_dataset.collate_fn)
+#     # dataloader = DataLoader(ph_dataset, batch_size=4, shuffle=False, num_workers=4, collate_fn=ph_dataset.collate_fn)
 #     # for batch in dataloader:
 #     #     print(batch)
 #     #     break
 #     audio_config = BaseAudioConfig(
 #         sample_rate=22050,
 #         win_length=1024,
 #         hop_length=256,
 #         num_mels=80,
 #         preemphasis=0.0,
 #         ref_level_db=20,
 #         log_func="np.log",
 #         do_trim_silence=True,
 #         trim_db=45,
 #         mel_fmin=0,
 #         mel_fmax=8000,
 #         spec_gain=1.0,
 #         signal_norm=False,
 #         do_amp_to_db_linear=False,
 #     )
 #     ap = AudioProcessor.init_from_config(audio_config)
 #     # f0_dataset = F0Dataset(samples, ap, cache_path="/Users/erengolge/Projects/TTS/f0_tests", verbose=False, precompute_num_workers=4)
 #     # dataloader = DataLoader(f0_dataset, batch_size=4, shuffle=False, num_workers=4, collate_fn=f0_dataset.collate_fn)
 #     # for batch in dataloader:
 #     #     print(batch)
 #     #     breakpoint()
 #     #     break
 #     dataset = TTSDataset(
 #         outputs_per_step=1,
 #         compute_linear_spec=False,
 #         samples=samples,
 #         ap=ap,
 #         return_wav=False,
 #         batch_group_size=0,
 #         min_seq_len=0,
 #         max_seq_len=500,
 #         use_noise_augment=False,
 #         verbose=True,
 #         speaker_id_mapping=None,
 #         d_vector_mapping=None,
 #         compute_f0=True,
 #         f0_cache_path="/Users/erengolge/Projects/TTS/f0_tests",
 #         tokenizer=tokenizer,
 #         phoneme_cache_path="/Users/erengolge/Projects/TTS/phonemes_tests",
 #         precompute_num_workers=4,
 #     )
 #     dataloader = DataLoader(dataset, batch_size=4, shuffle=False, num_workers=0, collate_fn=dataset.collate_fn)
 #     for batch in dataloader:
 #         print(batch)
 #         break
    audio_config = BaseAudioConfig(
        sample_rate=22050,
        win_length=1024,
        hop_length=256,
        num_mels=80,
        preemphasis=0.0,
        ref_level_db=20,
        log_func="np.log",
        do_trim_silence=True,
        trim_db=45,
        mel_fmin=0,
        mel_fmax=8000,
        spec_gain=1.0,
        signal_norm=False,
        do_amp_to_db_linear=False,
    )
    ap = AudioProcessor.init_from_config(audio_config)
    # f0_dataset = F0Dataset(samples, ap, cache_path="/Users/erengolge/Projects/TTS/f0_tests", verbose=False, precompute_num_workers=4)
    # dataloader = DataLoader(f0_dataset, batch_size=4, shuffle=False, num_workers=4, collate_fn=f0_dataset.collate_fn)
    # for batch in dataloader:
    #     print(batch)
    #     breakpoint()
    #     break
    dataset = TTSDataset(
        outputs_per_step=1,
        compute_linear_spec=False,
        samples=samples,
        ap=ap,
        return_wav=False,
        batch_group_size=0,
        min_seq_len=0,
        max_seq_len=500,
        use_noise_augment=False,
        verbose=True,
        speaker_id_mapping=None,
        d_vector_mapping=None,
        compute_f0=True,
        f0_cache_path="/Users/erengolge/Projects/TTS/f0_tests",
        tokenizer=tokenizer,
        phoneme_cache_path="/Users/erengolge/Projects/TTS/phonemes_tests",
        precompute_num_workers=4,
    )
    dataloader = DataLoader(dataset, batch_size=4, shuffle=False, num_workers=0, collate_fn=dataset.collate_fn)
    for batch in dataloader:
        print(batch)
        break
--- a/TTS/tts/utils/synthesis.py
+++ b/TTS/tts/utils/synthesis.py
@ -199,10 +199,10 @@ def synthesis(
        wav = model_outputs.squeeze(0)
    else:
        if use_griffin_lim:
-            wav = inv_spectrogram(model_outputs, ap, CONFIG)
+            wav = inv_spectrogram(model_outputs, model.ap, CONFIG)
            # trim silence
            if do_trim_silence:
-                wav = trim_silence(wav, ap)
+                wav = trim_silence(wav, model.ap)
    return_dict = {
        "wav": wav,
        "alignments": alignments,
--- a/TTS/tts/utils/text/characters.py
+++ b/TTS/tts/utils/text/characters.py
@ -1,3 +1,8 @@
 from dataclasses import replace
 from TTS.tts.configs.shared_configs import CharactersConfig
 def parse_symbols():
    return {
        "pad": _pad,
@ -29,46 +34,49 @@ _diacrilics = "ɚ˞ɫ"
 _phonemes = _vowels + _non_pulmonic_consonants + _pulmonic_consonants + _suprasegmentals + _other_symbols + _diacrilics
-def create_graphemes(
+# def create_graphemes(
-    characters=_characters,
+#     characters=_characters,
-    punctuations=_punctuations,
+#     punctuations=_punctuations,
-    pad=_pad,
+#     pad=_pad,
-    eos=_eos,
+#     eos=_eos,
-    bos=_bos,
+#     bos=_bos,
-    blank=_blank,
+#     blank=_blank,
-    unique=True,
+#     unique=True,
-):  # pylint: disable=redefined-outer-name
+# ):  # pylint: disable=redefined-outer-name
-    """Function to create default characters and phonemes"""
+#     """Function to create default characters and phonemes"""
-    # create graphemes
+#     # create graphemes
-    _graphemes = list(characters)
+#     = (
-    _graphemes = [bos] + _graphemes if len(bos) > 0 and bos is not None else _graphemes
+#         sorted(list(set(phonemes))) if unique else sorted(list(phonemes))
-    _graphemes = [eos] + _graphemes if len(bos) > 0 and eos is not None else _graphemes
+#     )  # this is to keep previous models compatible.
-    _graphemes = [pad] + _graphemes if len(bos) > 0 and pad is not None else _graphemes
+#     _graphemes = list(characters)
-    _graphemes = [blank] + _graphemes if len(bos) > 0 and blank is not None else _graphemes
+#     _graphemes = [bos] + _graphemes if len(bos) > 0 and bos is not None else _graphemes
-    _graphemes = _graphemes + list(punctuations)
+#     _graphemes = [eos] + _graphemes if len(bos) > 0 and eos is not None else _graphemes
-    return _graphemes, _phonemes
+#     _graphemes = [pad] + _graphemes if len(bos) > 0 and pad is not None else _graphemes
 #     _graphemes = [blank] + _graphemes if len(bos) > 0 and blank is not None else _graphemes
 #     _graphemes = _graphemes + list(punctuations)
 #     return _graphemes, _phonemes
-def create_phonemes(
+# def create_phonemes(
-    phonemes=_phonemes, punctuations=_punctuations, pad=_pad, eos=_eos, bos=_bos, blank=_blank, unique=True
+#     phonemes=_phonemes, punctuations=_punctuations, pad=_pad, eos=_eos, bos=_bos, blank=_blank, unique=True
-):
+# ):
-    # create phonemes
+#     # create phonemes
-    _phonemes = None
+#     _phonemes = None
-    _phonemes_sorted = (
+#     _phonemes_sorted = (
-        sorted(list(set(phonemes))) if unique else sorted(list(phonemes))
+#         sorted(list(set(phonemes))) if unique else sorted(list(phonemes))
-    )  # this is to keep previous models compatible.
+#     )  # this is to keep previous models compatible.
-    _phonemes = list(_phonemes_sorted)
+#     _phonemes = list(_phonemes_sorted)
-    _phonemes = [bos] + _phonemes if len(bos) > 0 and bos is not None else _phonemes
+#     _phonemes = [bos] + _phonemes if len(bos) > 0 and bos is not None else _phonemes
-    _phonemes = [eos] + _phonemes if len(bos) > 0 and eos is not None else _phonemes
+#     _phonemes = [eos] + _phonemes if len(bos) > 0 and eos is not None else _phonemes
-    _phonemes = [pad] + _phonemes if len(bos) > 0 and pad is not None else _phonemes
+#     _phonemes = [pad] + _phonemes if len(bos) > 0 and pad is not None else _phonemes
-    _phonemes = [blank] + _phonemes if len(bos) > 0 and blank is not None else _phonemes
+#     _phonemes = [blank] + _phonemes if len(bos) > 0 and blank is not None else _phonemes
-    _phonemes = _phonemes + list(punctuations)
+#     _phonemes = _phonemes + list(punctuations)
-    _phonemes = [pad, eos, bos] + list(_phonemes_sorted) + list(punctuations)
+#     _phonemes = [pad, eos, bos] + list(_phonemes_sorted) + list(punctuations)
-    return _phonemes
+#     return _phonemes
-graphemes = create_graphemes(_characters, _phonemes, _punctuations, _pad, _eos, _bos)
+# DEF_GRAPHEMES = create_graphemes(_characters, _phonemes, _punctuations, _pad, _eos, _bos)
-phonemes = create_phonemes(_phonemes, _punctuations, _pad, _eos, _bos, _blank)
+# DEF_PHONEMES = create_phonemes(_phonemes, _punctuations, _pad, _eos, _bos, _blank)
 class BaseCharacters:
@ -114,7 +122,7 @@ class BaseCharacters:
        eos: str,
        bos: str,
        blank: str,
-        is_unique: bool = True,
+        is_unique: bool = False,
        is_sorted: bool = True,
    ) -> None:
        self._characters = characters
@ -202,14 +210,20 @@ class BaseCharacters:
        _vocab = [self._pad] + _vocab if self._pad is not None and len(self._pad) > 0 else _vocab
        self._vocab = _vocab + list(self._punctuations)
        self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
-        self._id_to_char = {idx: char for idx, char in enumerate(self.vocab)}
+        self._id_to_char = {
            idx: char for idx, char in enumerate(self.vocab)  # pylint: disable=unnecessary-comprehension
        }
        if self.is_unique:
            duplicates = {x for x in self.vocab if self.vocab.count(x) > 1}
            assert (
                len(self.vocab) == len(self._char_to_id) == len(self._id_to_char)
-            ), f" [!] There are duplicate characters in the character set. {set([x for x in self.vocab if self.vocab.count(x) > 1])}"
+            ), f" [!] There are duplicate characters in the character set. {duplicates}"
    def char_to_id(self, char: str) -> int:
        try:
            return self._char_to_id[char]
        except KeyError as e:
            raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
    def id_to_char(self, idx: int) -> str:
        return self._id_to_char[idx]
@ -229,9 +243,23 @@ class BaseCharacters:
        print(f"{indent}| > Num chars: {self.num_chars}")
    @staticmethod
-    def init_from_config(config: "Coqpit"):
+    def init_from_config(config: "Coqpit"):  # pylint: disable=unused-argument
-        return BaseCharacters(
+        """Init your character class from a config.
-            **config.characters if config.characters is not None else {},
+
        Implement this method for your subclass.
        """
        ...
    def to_config(self) -> "CharactersConfig":
        return CharactersConfig(
            characters=self._characters,
            punctuations=self._punctuations,
            pad=self._pad,
            eos=self._eos,
            bos=self._bos,
            blank=self._blank,
            is_unique=self.is_unique,
            is_sorted=self.is_sorted,
        )
@ -275,18 +303,24 @@ class IPAPhonemes(BaseCharacters):
        eos: str = _eos,
        bos: str = _bos,
        blank: str = _blank,
-        is_unique: bool = True,
+        is_unique: bool = False,
        is_sorted: bool = True,
    ) -> None:
        super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
    @staticmethod
    def init_from_config(config: "Coqpit"):
        """Init a IPAPhonemes object from a model config
        If characters are not defined in the config, it will be set to the default characters and the config
        will be updated.
        """
        # band-aid for compatibility with old models
        if "characters" in config and config.characters is not None:
            if "phonemes" in config.characters and config.characters.phonemes is not None:
                config.characters["characters"] = config.characters["phonemes"]
-            return IPAPhonemes(
+            return (
                IPAPhonemes(
                    characters=config.characters["characters"],
                    punctuations=config.characters["punctuations"],
                    pad=config.characters["pad"],
@ -295,11 +329,16 @@ class IPAPhonemes(BaseCharacters):
                    blank=config.characters["blank"],
                    is_unique=config.characters["is_unique"],
                    is_sorted=config.characters["is_sorted"],
                ),
                config,
            )
-        else:
+        # use character set from config
-            return IPAPhonemes(
+        if config.characters is not None:
-                **config.characters if config.characters is not None else {},
+            return IPAPhonemes(**config.characters), config
-            )
+        # return default character set
        characters = IPAPhonemes()
        new_config = replace(config, characters=characters.to_config())
        return characters, new_config
 class Graphemes(BaseCharacters):
@ -339,24 +378,42 @@ class Graphemes(BaseCharacters):
        eos: str = _eos,
        bos: str = _bos,
        blank: str = _blank,
-        is_unique: bool = True,
+        is_unique: bool = False,
        is_sorted: bool = True,
    ) -> None:
        super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
    @staticmethod
    def init_from_config(config: "Coqpit"):
-        return Graphemes(
+        """Init a Graphemes object from a model config
-            **config.characters if config.characters is not None else {},
+
        If characters are not defined in the config, it will be set to the default characters and the config
        will be updated.
        """
        if config.characters is not None:
            # band-aid for compatibility with old models
            if "phonemes" in config.characters:
                return (
                    Graphemes(
                        characters=config.characters["characters"],
                        punctuations=config.characters["punctuations"],
                        pad=config.characters["pad"],
                        eos=config.characters["eos"],
                        bos=config.characters["bos"],
                        blank=config.characters["blank"],
                        is_unique=config.characters["is_unique"],
                        is_sorted=config.characters["is_sorted"],
                    ),
                    config,
                )
            return Graphemes(**config.characters), config
        characters = Graphemes()
        new_config = replace(config, characters=characters.to_config())
        return characters, new_config
 if __name__ == "__main__":
    gr = Graphemes()
    ph = IPAPhonemes()
-
+    gr.print_log()
-    print(gr.vocab)
+    ph.print_log()
    print(ph.vocab)
    print(gr.num_chars)
    assert "a" == gr.id_to_char(gr.char_to_id("a"))
--- a/TTS/tts/utils/text/phonemizers/base.py
+++ b/TTS/tts/utils/text/phonemizers/base.py
@ -1,6 +1,5 @@
 import abc
-import itertools
+from typing import List, Tuple
 from typing import List, Tuple, Union
 from TTS.tts.utils.text.punctuation import Punctuation
@ -8,6 +7,19 @@ from TTS.tts.utils.text.punctuation import Punctuation
 class BasePhonemizer(abc.ABC):
    """Base phonemizer class
    Phonemization follows the following steps:
        1. Preprocessing:
            - remove empty lines
            - remove punctuation
            - keep track of punctuation marks
        2. Phonemization:
            - convert text to phonemes
        3. Postprocessing:
            - join phonemes
            - restore punctuation marks
    Args:
        language (str):
            Language used by the phonemizer.
@ -51,40 +63,30 @@ class BasePhonemizer(abc.ABC):
    @abc.abstractmethod
    def name():
        """The name of the backend"""
        ...
    @classmethod
    @abc.abstractmethod
    def is_available(cls):
        """Returns True if the backend is installed, False otherwise"""
        ...
    @classmethod
    @abc.abstractmethod
    def version(cls):
        """Return the backend version as a tuple (major, minor, patch)"""
        ...
    @staticmethod
    @abc.abstractmethod
    def supported_languages():
        """Return a dict of language codes -> name supported by the backend"""
        ...
    def is_supported_language(self, language):
        """Returns True if `language` is supported by the backend"""
        return language in self.supported_languages()
    fr"""
        Phonemization follows the following steps:
            1. Preprocessing:
                - remove empty lines
                - remove punctuation
                - keep track of punctuation marks
            2. Phonemization:
                - convert text to phonemes
            3. Postprocessing:
                - join phonemes
                - restore punctuation marks
    """
    @abc.abstractmethod
    def _phonemize(self, text, separator):
        """The main phonemization method"""
--- a/TTS/tts/utils/text/phonemizers/espeak_wrapper.py
+++ b/TTS/tts/utils/text/phonemizers/espeak_wrapper.py
@ -28,12 +28,13 @@ def _espeak_exe(espeak_lib: str, args: List, sync=False) -> List[str]:
        "1",  # UTF8 text encoding
    ]
    cmd.extend(args)
-    logging.debug("espeakng: executing %s" % repr(cmd))
+    logging.debug("espeakng: executing %s", repr(cmd))
-    p = subprocess.Popen(
+
    with subprocess.Popen(
        cmd,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
-    )
+    ) as p:
        res = iter(p.stdout.readline, b"")
        if not sync:
            p.stdout.close()
@ -85,7 +86,24 @@ class ESpeak(BasePhonemizer):
    def __init__(self, language: str, backend=None, punctuations=Punctuation.default_puncs(), keep_puncs=True):
        if self._ESPEAK_LIB is None:
            raise Exception("Unknown backend: %s" % backend)
        # band-aid for backwards compatibility
        if language == "en":
            language = "en-us"
        super().__init__(language, punctuations=punctuations, keep_puncs=keep_puncs)
        if backend is not None:
            self.backend = backend
    @property
    def backend(self):
        return self._ESPEAK_LIB
    @backend.setter
    def backend(self, backend):
        if backend not in ["espeak", "espeak-ng"]:
            raise Exception("Unknown backend: %s" % backend)
        self._ESPEAK_LIB = backend
    def auto_set_espeak_lib(self) -> None:
        if is_tool("espeak-ng"):
@ -115,24 +133,25 @@ class ESpeak(BasePhonemizer):
        # espeak and espeak-ng parses `ipa` differently
        if tie:
            # use '͡' between phonemes
-            if _DEF_ESPEAK_LIB == "espeak":
+            if self.backend == "espeak":
                args.append("--ipa=1")
            else:
                args.append("--ipa=3")
        else:
            # split with '_'
-            if _DEF_ESPEAK_LIB == "espeak":
+            if self.backend == "espeak":
                args.append("--ipa=3")
            else:
                args.append("--ipa=1")
        if tie:
            args.append("--tie=%s" % tie)
        args.append('"' + text + '"')
        # compute phonemes
        phonemes = ""
        for line in _espeak_exe(self._ESPEAK_LIB, args, sync=True):
-            logging.debug("line: %s" % repr(line))
+            logging.debug("line: %s", repr(line))
-            phonemes += line.decode("utf8").strip()
+            phonemes += line.decode("utf8").strip()[2:]  # skip two redundant characters
        return phonemes.replace("_", separator)
    def _phonemize(self, text, separator=None):
@ -146,7 +165,7 @@ class ESpeak(BasePhonemizer):
            Dict: Dictionary of language codes.
        """
        if _DEF_ESPEAK_LIB is None:
-            raise {}
+            return {}
        args = ["--voices"]
        langs = {}
        count = 0
@ -157,7 +176,7 @@ class ESpeak(BasePhonemizer):
                lang_code = cols[1]
                lang_name = cols[3]
                langs[lang_code] = lang_name
-            logging.debug("line: %s" % repr(line))
+            logging.debug("line: %s", repr(line))
            count += 1
        return langs
@ -168,9 +187,9 @@ class ESpeak(BasePhonemizer):
            str: Version of the used backend.
        """
        args = ["--version"]
-        for line in _espeak_exe(_DEF_ESPEAK_LIB, args, sync=True):
+        for line in _espeak_exe(self.backend, args, sync=True):
            version = line.decode("utf8").strip().split()[2]
-            logging.debug("line: %s" % repr(line))
+            logging.debug("line: %s", repr(line))
            return version
    @classmethod
--- a/TTS/tts/utils/text/phonemizers/gruut_wrapper.py
+++ b/TTS/tts/utils/text/phonemizers/gruut_wrapper.py
@ -1,5 +1,4 @@
 import importlib
 from os import stat
 from typing import List
 import gruut
@ -55,7 +54,7 @@ class Gruut(BasePhonemizer):
    def name():
        return "gruut"
-    def phonemize_gruut(self, text: str, separator: str = "|", tie=False) -> str:
+    def phonemize_gruut(self, text: str, separator: str = "|", tie=False) -> str:  # pylint: disable=unused-argument
        """Convert input text to phonemes.
        Gruut phonemizes the given `str` by seperating each phoneme character with `separator`, even for characters
--- a/TTS/tts/utils/text/phonemizers/ja_jp_phonemizer.py
+++ b/TTS/tts/utils/text/phonemizers/ja_jp_phonemizer.py
@ -30,7 +30,7 @@ class JA_JP_Phonemizer(BasePhonemizer):
    language = "ja-jp"
-    def __init__(self, punctuations=_DEF_JA_PUNCS, keep_puncs=True, **kwargs):
+    def __init__(self, punctuations=_DEF_JA_PUNCS, keep_puncs=True, **kwargs):  # pylint: disable=unused-argument
        super().__init__(self.language, punctuations=punctuations, keep_puncs=keep_puncs)
    @staticmethod
@ -61,12 +61,12 @@ class JA_JP_Phonemizer(BasePhonemizer):
        return True
-if __name__ == "__main__":
+# if __name__ == "__main__":
-    text = "これは、電話をかけるための私の日本語の例のテキストです。"
+#     text = "これは、電話をかけるための私の日本語の例のテキストです。"
-    e = JA_JP_Phonemizer()
+#     e = JA_JP_Phonemizer()
-    print(e.supported_languages())
+#     print(e.supported_languages())
-    print(e.version())
+#     print(e.version())
-    print(e.language)
+#     print(e.language)
-    print(e.name())
+#     print(e.name())
-    print(e.is_available())
+#     print(e.is_available())
-    print("`" + e.phonemize(text) + "`")
+#     print("`" + e.phonemize(text) + "`")
--- a/TTS/tts/utils/text/phonemizers/multi_phonemizer.py
+++ b/TTS/tts/utils/text/phonemizers/multi_phonemizer.py
@ -17,7 +17,7 @@ class MultiPhonemizer:
    lang_to_phonemizer_name = DEF_LANG_TO_PHONEMIZER
    language = "multi-lingual"
-    def __init__(self, custom_lang_to_phonemizer: Dict = {}) -> None:
+    def __init__(self, custom_lang_to_phonemizer: Dict = {}) -> None:  # pylint: disable=dangerous-default-value
        self.lang_to_phonemizer_name.update(custom_lang_to_phonemizer)
        self.lang_to_phonemizer = self.init_phonemizers(self.lang_to_phonemizer_name)
@ -40,16 +40,16 @@ class MultiPhonemizer:
        return list(self.lang_to_phonemizer_name.keys())
-if __name__ == "__main__":
+# if __name__ == "__main__":
-    texts = {
+#     texts = {
-        "tr": "Merhaba, bu Türkçe bit örnek!",
+#         "tr": "Merhaba, bu Türkçe bit örnek!",
-        "en-us": "Hello, this is English example!",
+#         "en-us": "Hello, this is English example!",
-        "de": "Hallo, das ist ein Deutches Beipiel!",
+#         "de": "Hallo, das ist ein Deutches Beipiel!",
-        "zh-cn": "这是中国的例子",
+#         "zh-cn": "这是中国的例子",
-    }
+#     }
-    phonemes = {}
+#     phonemes = {}
-    ph = MultiPhonemizer()
+#     ph = MultiPhonemizer()
-    for lang, text in texts.items():
+#     for lang, text in texts.items():
-        phoneme = ph.phonemize(text, lang)
+#         phoneme = ph.phonemize(text, lang)
-        phonemes[lang] = phoneme
+#         phonemes[lang] = phoneme
-    print(phonemes)
+#     print(phonemes)
--- a/TTS/tts/utils/text/phonemizers/zh_cn_phonemizer.py
+++ b/TTS/tts/utils/text/phonemizers/zh_cn_phonemizer.py
@ -25,14 +25,15 @@ class ZH_CN_Phonemizer(BasePhonemizer):
    language = "zh-cn"
-    def __init__(self, punctuations=_DEF_ZH_PUNCS, keep_puncs=False, **kwargs):
+    def __init__(self, punctuations=_DEF_ZH_PUNCS, keep_puncs=False, **kwargs):  # pylint: disable=unused-argument
        super().__init__(self.language, punctuations=punctuations, keep_puncs=keep_puncs)
    @staticmethod
    def name():
        return "zh_cn_phonemizer"
-    def phonemize_zh_cn(self, text: str, separator: str = "|") -> str:
+    @staticmethod
    def phonemize_zh_cn(text: str, separator: str = "|") -> str:
        ph = chinese_text_to_phonemes(text, separator)
        return ph
@ -50,12 +51,12 @@ class ZH_CN_Phonemizer(BasePhonemizer):
        return True
-if __name__ == "__main__":
+# if __name__ == "__main__":
-    text = "这是，样本中文。"
+#     text = "这是，样本中文。"
-    e = ZH_CN_Phonemizer()
+#     e = ZH_CN_Phonemizer()
-    print(e.supported_languages())
+#     print(e.supported_languages())
-    print(e.version())
+#     print(e.version())
-    print(e.language)
+#     print(e.language)
-    print(e.name())
+#     print(e.name())
-    print(e.is_available())
+#     print(e.is_available())
-    print("`" + e.phonemize(text) + "`")
+#     print("`" + e.phonemize(text) + "`")
--- a/TTS/tts/utils/text/punctuation.py
+++ b/TTS/tts/utils/text/punctuation.py
@ -130,7 +130,7 @@ class Punctuation:
        return cls._restore(text, puncs, 0)
    @classmethod
-    def _restore(cls, text, puncs, num):
+    def _restore(cls, text, puncs, num):  # pylint: disable=too-many-return-statements
        """Auxiliary method for Punctuation.restore()"""
        if not puncs:
            return text
@ -159,14 +159,14 @@ class Punctuation:
        return cls._restore([text[0] + current.punc + text[1]] + text[2:], puncs[1:], num)
-if __name__ == "__main__":
+# if __name__ == "__main__":
-    punc = Punctuation()
+#     punc = Punctuation()
-    text = "This is. This is, example!"
+#     text = "This is. This is, example!"
-    print(punc.strip(text))
+#     print(punc.strip(text))
-    split_text, puncs = punc.strip_to_restore(text)
+#     split_text, puncs = punc.strip_to_restore(text)
-    print(split_text, " ---- ", puncs)
+#     print(split_text, " ---- ", puncs)
-    restored_text = punc.restore(split_text, puncs)
+#     restored_text = punc.restore(split_text, puncs)
-    print(restored_text)
+#     print(restored_text)
--- a/TTS/utils/audio.py
+++ b/TTS/utils/audio.py
@ -383,7 +383,6 @@ class AudioProcessor(object):
    def init_from_config(config: "Coqpit"):
        if "audio" in config:
            return AudioProcessor(**config.audio)
        else:
        return AudioProcessor(**config)
    ### setting up the parameters ###
--- a/TTS/utils/synthesizer.py
+++ b/TTS/utils/synthesizer.py
@ -13,7 +13,6 @@ from TTS.tts.utils.speakers import SpeakerManager
 # pylint: disable=unused-wildcard-import
 # pylint: disable=wildcard-import
 from TTS.tts.utils.synthesis import synthesis, trim_silence
 from TTS.tts.utils.text import TTSTokenizer
 from TTS.utils.audio import AudioProcessor
 from TTS.vocoder.models import setup_model as setup_vocoder_model
 from TTS.vocoder.utils.generic_utils import interpolate_vocoder_input
--- a/TTS/vocoder/models/gan.py
+++ b/TTS/vocoder/models/gan.py
@ -314,7 +314,7 @@ class GAN(BaseVocoder):
        data_items: List,
        verbose: bool,
        num_gpus: int,
-        rank: int = 0,  # pylint: disable=unused-argument
+        rank: int = None,  # pylint: disable=unused-argument
    ):
        """Initiate and return the GAN dataloader.