From c9972e6f145a3cbf3b79c24b5c18c6d654b86f46 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eren=20G=C3=B6lge?= <egolge@coqui.ai>
Date: Tue, 7 Dec 2021 12:51:58 +0000
Subject: [PATCH] Make lint

---
 TTS/tts/datasets/__init__.py                  |   4 +-
 TTS/tts/datasets/dataset.py                   | 157 +++++++-------
 TTS/tts/utils/synthesis.py                    |   4 +-
 TTS/tts/utils/text/characters.py              | 191 ++++++++++++------
 TTS/tts/utils/text/phonemizers/base.py        |  36 ++--
 .../utils/text/phonemizers/espeak_wrapper.py  |  65 +++---
 .../utils/text/phonemizers/gruut_wrapper.py   |   3 +-
 .../text/phonemizers/ja_jp_phonemizer.py      |  20 +-
 .../text/phonemizers/multi_phonemizer.py      |  28 +--
 .../text/phonemizers/zh_cn_phonemizer.py      |  23 ++-
 TTS/tts/utils/text/punctuation.py             |  18 +-
 TTS/utils/audio.py                            |   3 +-
 TTS/utils/synthesizer.py                      |   1 -
 TTS/vocoder/models/gan.py                     |   2 +-
 14 files changed, 319 insertions(+), 236 deletions(-)

diff --git a/TTS/tts/datasets/__init__.py b/TTS/tts/datasets/__init__.py
index d80e92c9..f0a6ea95 100644
--- a/TTS/tts/datasets/__init__.py
+++ b/TTS/tts/datasets/__init__.py
@@ -111,8 +111,8 @@ def load_tts_samples(
             meta_data_eval_all += meta_data_eval
         meta_data_train_all += meta_data_train
         # load attention masks for the duration predictor training
-        if dataset.meta_file_attn_mask:
-            meta_data = dict(load_attention_mask_meta_data(dataset["meta_file_attn_mask"]))
+        if d.meta_file_attn_mask:
+            meta_data = dict(load_attention_mask_meta_data(d["meta_file_attn_mask"]))
             for idx, ins in enumerate(meta_data_train_all):
                 attn_file = meta_data[ins["audio_file"]].strip()
                 meta_data_train_all[idx].update({"alignment_file": attn_file})
diff --git a/TTS/tts/datasets/dataset.py b/TTS/tts/datasets/dataset.py
index d4a12c07..210de803 100644
--- a/TTS/tts/datasets/dataset.py
+++ b/TTS/tts/datasets/dataset.py
@@ -1,7 +1,6 @@
 import collections
 import os
 import random
-from multiprocessing import Pool
 from typing import Dict, List, Union
 
 import numpy as np
@@ -10,7 +9,6 @@ import tqdm
 from torch.utils.data import Dataset
 
 from TTS.tts.utils.data import prepare_data, prepare_stop_target, prepare_tensor
-from TTS.tts.utils.text import TTSTokenizer
 from TTS.utils.audio import AudioProcessor
 
 
@@ -183,7 +181,7 @@ class TTSDataset(Dataset):
     def get_phonemes(self, idx, text):
         out_dict = self.phoneme_dataset[idx]
         assert text == out_dict["text"], f"{text} != {out_dict['text']}"
-        assert out_dict["token_ids"].size > 0
+        assert len(out_dict["token_ids"]) > 0
         return out_dict
 
     def get_f0(self, idx):
@@ -192,7 +190,8 @@ class TTSDataset(Dataset):
         assert wav_file == out_dict["audio_file"]
         return out_dict
 
-    def get_attn_maks(self, attn_file):
+    @staticmethod
+    def get_attn_mask(attn_file):
         return np.load(attn_file)
 
     def get_token_ids(self, idx, text):
@@ -207,7 +206,7 @@ class TTSDataset(Dataset):
 
         raw_text = item["text"]
 
-        wav = np.asarray(self.load_wav(item[]), dtype=np.float32)
+        wav = np.asarray(self.load_wav(item["audio_file"]), dtype=np.float32)
 
         # apply noise for augmentation
         if self.use_noise_augment:
@@ -262,7 +261,7 @@ class TTSDataset(Dataset):
         idxs = np.argsort(lengths)  # ascending order
         ignore_idx = []
         keep_idx = []
-        for i, idx in enumerate(idxs):
+        for idx in idxs:
             length = lengths[idx]
             if length < min_len or length > max_len:
                 ignore_idx.append(idx)
@@ -277,6 +276,7 @@ class TTSDataset(Dataset):
 
     @staticmethod
     def create_buckets(samples, batch_group_size: int):
+        assert batch_group_size > 0
         for i in range(len(samples) // batch_group_size):
             offset = i * batch_group_size
             end_offset = offset + batch_group_size
@@ -319,7 +319,8 @@ class TTSDataset(Dataset):
         # shuffle batch groups
         # create batches with similar length items
         # the larger the `batch_group_size`, the higher the length variety in a batch.
-        samples = self.create_buckets(samples, self.batch_group_size)
+        if self.batch_group_size > 0:
+            samples = self.create_buckets(samples, self.batch_group_size)
 
         # update items to the new sorted items
         self.samples = samples
@@ -571,6 +572,7 @@ class PhonemeDataset(Dataset):
 
         We use pytorch dataloader because we are lazy.
         """
+        print("[*] Pre-computing phonemes...")
         with tqdm.tqdm(total=len(self)) as pbar:
             batch_size = num_workers if num_workers > 0 else 1
             dataloder = torch.utils.data.DataLoader(
@@ -658,16 +660,21 @@ class F0Dataset:
         return len(self.samples)
 
     def precompute(self, num_workers=0):
+        print("[*] Pre-computing F0s...")
         with tqdm.tqdm(total=len(self)) as pbar:
             batch_size = num_workers if num_workers > 0 else 1
+            # we do not normalize at preproessing
+            normalize_f0 = self.normalize_f0
+            self.normalize_f0 = False
             dataloder = torch.utils.data.DataLoader(
                 batch_size=batch_size, dataset=self, shuffle=False, num_workers=num_workers, collate_fn=self.collate_fn
             )
             computed_data = []
             for batch in dataloder:
                 f0 = batch["f0"]
-                computed_data.append([f for f in f0])
+                computed_data.append(f for f in f0)
                 pbar.update(batch_size)
+            self.normalize_f0 = normalize_f0
 
         if self.normalize_f0:
             computed_data = [tensor for batch in computed_data for tensor in batch]  # flatten
@@ -746,80 +753,80 @@ class F0Dataset:
         print(f"{indent}| > Number of instances : {len(self.samples)}")
 
 
-if __name__ == "__main__":
-    from torch.utils.data import DataLoader
+# if __name__ == "__main__":
+#     from torch.utils.data import DataLoader
 
-    from TTS.config.shared_configs import BaseAudioConfig, BaseDatasetConfig
-    from TTS.tts.datasets import load_tts_samples
-    from TTS.tts.utils.text.characters import IPAPhonemes
-    from TTS.tts.utils.text.phonemizers import ESpeak
+#     from TTS.config.shared_configs import BaseAudioConfig, BaseDatasetConfig
+#     from TTS.tts.datasets import load_tts_samples
+#     from TTS.tts.utils.text.characters import IPAPhonemes
+#     from TTS.tts.utils.text.phonemizers import ESpeak
 
-    dataset_config = BaseDatasetConfig(
-        name="ljspeech",
-        meta_file_train="metadata.csv",
-        path="/Users/erengolge/Projects/TTS/recipes/ljspeech/LJSpeech-1.1",
-    )
-    train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True)
-    samples = train_samples + eval_samples
+#     dataset_config = BaseDatasetConfig(
+#         name="ljspeech",
+#         meta_file_train="metadata.csv",
+#         path="/Users/erengolge/Projects/TTS/recipes/ljspeech/LJSpeech-1.1",
+#     )
+#     train_samples, eval_samples = load_tts_samples(dataset_config, eval_split=True)
+#     samples = train_samples + eval_samples
 
-    phonemizer = ESpeak(language="en-us")
-    tokenizer = TTSTokenizer(use_phonemes=True, characters=IPAPhonemes(), phonemizer=phonemizer)
-    # ph_dataset = PhonemeDataset(samples, tokenizer, phoneme_cache_path="/Users/erengolge/Projects/TTS/phonemes_tests")
-    # ph_dataset.precompute(num_workers=4)
+#     phonemizer = ESpeak(language="en-us")
+#     tokenizer = TTSTokenizer(use_phonemes=True, characters=IPAPhonemes(), phonemizer=phonemizer)
+#     # ph_dataset = PhonemeDataset(samples, tokenizer, phoneme_cache_path="/Users/erengolge/Projects/TTS/phonemes_tests")
+#     # ph_dataset.precompute(num_workers=4)
 
-    # dataloader = DataLoader(ph_dataset, batch_size=4, shuffle=False, num_workers=4, collate_fn=ph_dataset.collate_fn)
-    # for batch in dataloader:
-    #     print(batch)
-    #     break
+#     # dataloader = DataLoader(ph_dataset, batch_size=4, shuffle=False, num_workers=4, collate_fn=ph_dataset.collate_fn)
+#     # for batch in dataloader:
+#     #     print(batch)
+#     #     break
 
-    audio_config = BaseAudioConfig(
-        sample_rate=22050,
-        win_length=1024,
-        hop_length=256,
-        num_mels=80,
-        preemphasis=0.0,
-        ref_level_db=20,
-        log_func="np.log",
-        do_trim_silence=True,
-        trim_db=45,
-        mel_fmin=0,
-        mel_fmax=8000,
-        spec_gain=1.0,
-        signal_norm=False,
-        do_amp_to_db_linear=False,
-    )
+#     audio_config = BaseAudioConfig(
+#         sample_rate=22050,
+#         win_length=1024,
+#         hop_length=256,
+#         num_mels=80,
+#         preemphasis=0.0,
+#         ref_level_db=20,
+#         log_func="np.log",
+#         do_trim_silence=True,
+#         trim_db=45,
+#         mel_fmin=0,
+#         mel_fmax=8000,
+#         spec_gain=1.0,
+#         signal_norm=False,
+#         do_amp_to_db_linear=False,
+#     )
 
-    ap = AudioProcessor.init_from_config(audio_config)
+#     ap = AudioProcessor.init_from_config(audio_config)
 
-    # f0_dataset = F0Dataset(samples, ap, cache_path="/Users/erengolge/Projects/TTS/f0_tests", verbose=False, precompute_num_workers=4)
+#     # f0_dataset = F0Dataset(samples, ap, cache_path="/Users/erengolge/Projects/TTS/f0_tests", verbose=False, precompute_num_workers=4)
 
-    # dataloader = DataLoader(f0_dataset, batch_size=4, shuffle=False, num_workers=4, collate_fn=f0_dataset.collate_fn)
-    # for batch in dataloader:
-    #     print(batch)
-    #     breakpoint()
-    #     break
+#     # dataloader = DataLoader(f0_dataset, batch_size=4, shuffle=False, num_workers=4, collate_fn=f0_dataset.collate_fn)
+#     # for batch in dataloader:
+#     #     print(batch)
+#     #     breakpoint()
+#     #     break
 
-    dataset = TTSDataset(
-        outputs_per_step=1,
-        compute_linear_spec=False,
-        samples=samples,
-        ap=ap,
-        return_wav=False,
-        batch_group_size=0,
-        min_seq_len=0,
-        max_seq_len=500,
-        use_noise_augment=False,
-        verbose=True,
-        speaker_id_mapping=None,
-        d_vector_mapping=None,
-        compute_f0=True,
-        f0_cache_path="/Users/erengolge/Projects/TTS/f0_tests",
-        tokenizer=tokenizer,
-        phoneme_cache_path="/Users/erengolge/Projects/TTS/phonemes_tests",
-        precompute_num_workers=4,
-    )
+#     dataset = TTSDataset(
+#         outputs_per_step=1,
+#         compute_linear_spec=False,
+#         samples=samples,
+#         ap=ap,
+#         return_wav=False,
+#         batch_group_size=0,
+#         min_seq_len=0,
+#         max_seq_len=500,
+#         use_noise_augment=False,
+#         verbose=True,
+#         speaker_id_mapping=None,
+#         d_vector_mapping=None,
+#         compute_f0=True,
+#         f0_cache_path="/Users/erengolge/Projects/TTS/f0_tests",
+#         tokenizer=tokenizer,
+#         phoneme_cache_path="/Users/erengolge/Projects/TTS/phonemes_tests",
+#         precompute_num_workers=4,
+#     )
 
-    dataloader = DataLoader(dataset, batch_size=4, shuffle=False, num_workers=0, collate_fn=dataset.collate_fn)
-    for batch in dataloader:
-        print(batch)
-        break
+#     dataloader = DataLoader(dataset, batch_size=4, shuffle=False, num_workers=0, collate_fn=dataset.collate_fn)
+#     for batch in dataloader:
+#         print(batch)
+#         break
diff --git a/TTS/tts/utils/synthesis.py b/TTS/tts/utils/synthesis.py
index 979769a8..65dcc1ad 100644
--- a/TTS/tts/utils/synthesis.py
+++ b/TTS/tts/utils/synthesis.py
@@ -199,10 +199,10 @@ def synthesis(
         wav = model_outputs.squeeze(0)
     else:
         if use_griffin_lim:
-            wav = inv_spectrogram(model_outputs, ap, CONFIG)
+            wav = inv_spectrogram(model_outputs, model.ap, CONFIG)
             # trim silence
             if do_trim_silence:
-                wav = trim_silence(wav, ap)
+                wav = trim_silence(wav, model.ap)
     return_dict = {
         "wav": wav,
         "alignments": alignments,
diff --git a/TTS/tts/utils/text/characters.py b/TTS/tts/utils/text/characters.py
index 24ce51f1..aae6844f 100644
--- a/TTS/tts/utils/text/characters.py
+++ b/TTS/tts/utils/text/characters.py
@@ -1,3 +1,8 @@
+from dataclasses import replace
+
+from TTS.tts.configs.shared_configs import CharactersConfig
+
+
 def parse_symbols():
     return {
         "pad": _pad,
@@ -29,46 +34,49 @@ _diacrilics = "ɚ˞ɫ"
 _phonemes = _vowels + _non_pulmonic_consonants + _pulmonic_consonants + _suprasegmentals + _other_symbols + _diacrilics
 
 
-def create_graphemes(
-    characters=_characters,
-    punctuations=_punctuations,
-    pad=_pad,
-    eos=_eos,
-    bos=_bos,
-    blank=_blank,
-    unique=True,
-):  # pylint: disable=redefined-outer-name
-    """Function to create default characters and phonemes"""
-    # create graphemes
-    _graphemes = list(characters)
-    _graphemes = [bos] + _graphemes if len(bos) > 0 and bos is not None else _graphemes
-    _graphemes = [eos] + _graphemes if len(bos) > 0 and eos is not None else _graphemes
-    _graphemes = [pad] + _graphemes if len(bos) > 0 and pad is not None else _graphemes
-    _graphemes = [blank] + _graphemes if len(bos) > 0 and blank is not None else _graphemes
-    _graphemes = _graphemes + list(punctuations)
-    return _graphemes, _phonemes
+# def create_graphemes(
+#     characters=_characters,
+#     punctuations=_punctuations,
+#     pad=_pad,
+#     eos=_eos,
+#     bos=_bos,
+#     blank=_blank,
+#     unique=True,
+# ):  # pylint: disable=redefined-outer-name
+#     """Function to create default characters and phonemes"""
+#     # create graphemes
+#     = (
+#         sorted(list(set(phonemes))) if unique else sorted(list(phonemes))
+#     )  # this is to keep previous models compatible.
+#     _graphemes = list(characters)
+#     _graphemes = [bos] + _graphemes if len(bos) > 0 and bos is not None else _graphemes
+#     _graphemes = [eos] + _graphemes if len(bos) > 0 and eos is not None else _graphemes
+#     _graphemes = [pad] + _graphemes if len(bos) > 0 and pad is not None else _graphemes
+#     _graphemes = [blank] + _graphemes if len(bos) > 0 and blank is not None else _graphemes
+#     _graphemes = _graphemes + list(punctuations)
+#     return _graphemes, _phonemes
 
 
-def create_phonemes(
-    phonemes=_phonemes, punctuations=_punctuations, pad=_pad, eos=_eos, bos=_bos, blank=_blank, unique=True
-):
-    # create phonemes
-    _phonemes = None
-    _phonemes_sorted = (
-        sorted(list(set(phonemes))) if unique else sorted(list(phonemes))
-    )  # this is to keep previous models compatible.
-    _phonemes = list(_phonemes_sorted)
-    _phonemes = [bos] + _phonemes if len(bos) > 0 and bos is not None else _phonemes
-    _phonemes = [eos] + _phonemes if len(bos) > 0 and eos is not None else _phonemes
-    _phonemes = [pad] + _phonemes if len(bos) > 0 and pad is not None else _phonemes
-    _phonemes = [blank] + _phonemes if len(bos) > 0 and blank is not None else _phonemes
-    _phonemes = _phonemes + list(punctuations)
-    _phonemes = [pad, eos, bos] + list(_phonemes_sorted) + list(punctuations)
-    return _phonemes
+# def create_phonemes(
+#     phonemes=_phonemes, punctuations=_punctuations, pad=_pad, eos=_eos, bos=_bos, blank=_blank, unique=True
+# ):
+#     # create phonemes
+#     _phonemes = None
+#     _phonemes_sorted = (
+#         sorted(list(set(phonemes))) if unique else sorted(list(phonemes))
+#     )  # this is to keep previous models compatible.
+#     _phonemes = list(_phonemes_sorted)
+#     _phonemes = [bos] + _phonemes if len(bos) > 0 and bos is not None else _phonemes
+#     _phonemes = [eos] + _phonemes if len(bos) > 0 and eos is not None else _phonemes
+#     _phonemes = [pad] + _phonemes if len(bos) > 0 and pad is not None else _phonemes
+#     _phonemes = [blank] + _phonemes if len(bos) > 0 and blank is not None else _phonemes
+#     _phonemes = _phonemes + list(punctuations)
+#     _phonemes = [pad, eos, bos] + list(_phonemes_sorted) + list(punctuations)
+#     return _phonemes
 
 
-graphemes = create_graphemes(_characters, _phonemes, _punctuations, _pad, _eos, _bos)
-phonemes = create_phonemes(_phonemes, _punctuations, _pad, _eos, _bos, _blank)
+# DEF_GRAPHEMES = create_graphemes(_characters, _phonemes, _punctuations, _pad, _eos, _bos)
+# DEF_PHONEMES = create_phonemes(_phonemes, _punctuations, _pad, _eos, _bos, _blank)
 
 
 class BaseCharacters:
@@ -114,7 +122,7 @@ class BaseCharacters:
         eos: str,
         bos: str,
         blank: str,
-        is_unique: bool = True,
+        is_unique: bool = False,
         is_sorted: bool = True,
     ) -> None:
         self._characters = characters
@@ -202,14 +210,20 @@ class BaseCharacters:
         _vocab = [self._pad] + _vocab if self._pad is not None and len(self._pad) > 0 else _vocab
         self._vocab = _vocab + list(self._punctuations)
         self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)}
-        self._id_to_char = {idx: char for idx, char in enumerate(self.vocab)}
+        self._id_to_char = {
+            idx: char for idx, char in enumerate(self.vocab)  # pylint: disable=unnecessary-comprehension
+        }
         if self.is_unique:
+            duplicates = {x for x in self.vocab if self.vocab.count(x) > 1}
             assert (
                 len(self.vocab) == len(self._char_to_id) == len(self._id_to_char)
-            ), f" [!] There are duplicate characters in the character set. {set([x for x in self.vocab if self.vocab.count(x) > 1])}"
+            ), f" [!] There are duplicate characters in the character set. {duplicates}"
 
     def char_to_id(self, char: str) -> int:
-        return self._char_to_id[char]
+        try:
+            return self._char_to_id[char]
+        except KeyError as e:
+            raise KeyError(f" [!] {repr(char)} is not in the vocabulary.") from e
 
     def id_to_char(self, idx: int) -> str:
         return self._id_to_char[idx]
@@ -229,9 +243,23 @@ class BaseCharacters:
         print(f"{indent}| > Num chars: {self.num_chars}")
 
     @staticmethod
-    def init_from_config(config: "Coqpit"):
-        return BaseCharacters(
-            **config.characters if config.characters is not None else {},
+    def init_from_config(config: "Coqpit"):  # pylint: disable=unused-argument
+        """Init your character class from a config.
+
+        Implement this method for your subclass.
+        """
+        ...
+
+    def to_config(self) -> "CharactersConfig":
+        return CharactersConfig(
+            characters=self._characters,
+            punctuations=self._punctuations,
+            pad=self._pad,
+            eos=self._eos,
+            bos=self._bos,
+            blank=self._blank,
+            is_unique=self.is_unique,
+            is_sorted=self.is_sorted,
         )
 
 
@@ -275,31 +303,42 @@ class IPAPhonemes(BaseCharacters):
         eos: str = _eos,
         bos: str = _bos,
         blank: str = _blank,
-        is_unique: bool = True,
+        is_unique: bool = False,
         is_sorted: bool = True,
     ) -> None:
         super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
 
     @staticmethod
     def init_from_config(config: "Coqpit"):
+        """Init a IPAPhonemes object from a model config
+
+        If characters are not defined in the config, it will be set to the default characters and the config
+        will be updated.
+        """
         # band-aid for compatibility with old models
         if "characters" in config and config.characters is not None:
             if "phonemes" in config.characters and config.characters.phonemes is not None:
                 config.characters["characters"] = config.characters["phonemes"]
-            return IPAPhonemes(
-                characters=config.characters["characters"],
-                punctuations=config.characters["punctuations"],
-                pad=config.characters["pad"],
-                eos=config.characters["eos"],
-                bos=config.characters["bos"],
-                blank=config.characters["blank"],
-                is_unique=config.characters["is_unique"],
-                is_sorted=config.characters["is_sorted"],
-            )
-        else:
-            return IPAPhonemes(
-                **config.characters if config.characters is not None else {},
+            return (
+                IPAPhonemes(
+                    characters=config.characters["characters"],
+                    punctuations=config.characters["punctuations"],
+                    pad=config.characters["pad"],
+                    eos=config.characters["eos"],
+                    bos=config.characters["bos"],
+                    blank=config.characters["blank"],
+                    is_unique=config.characters["is_unique"],
+                    is_sorted=config.characters["is_sorted"],
+                ),
+                config,
             )
+        # use character set from config
+        if config.characters is not None:
+            return IPAPhonemes(**config.characters), config
+        # return default character set
+        characters = IPAPhonemes()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
 
 
 class Graphemes(BaseCharacters):
@@ -339,24 +378,42 @@ class Graphemes(BaseCharacters):
         eos: str = _eos,
         bos: str = _bos,
         blank: str = _blank,
-        is_unique: bool = True,
+        is_unique: bool = False,
         is_sorted: bool = True,
     ) -> None:
         super().__init__(characters, punctuations, pad, eos, bos, blank, is_unique, is_sorted)
 
     @staticmethod
     def init_from_config(config: "Coqpit"):
-        return Graphemes(
-            **config.characters if config.characters is not None else {},
-        )
+        """Init a Graphemes object from a model config
+
+        If characters are not defined in the config, it will be set to the default characters and the config
+        will be updated.
+        """
+        if config.characters is not None:
+            # band-aid for compatibility with old models
+            if "phonemes" in config.characters:
+                return (
+                    Graphemes(
+                        characters=config.characters["characters"],
+                        punctuations=config.characters["punctuations"],
+                        pad=config.characters["pad"],
+                        eos=config.characters["eos"],
+                        bos=config.characters["bos"],
+                        blank=config.characters["blank"],
+                        is_unique=config.characters["is_unique"],
+                        is_sorted=config.characters["is_sorted"],
+                    ),
+                    config,
+                )
+            return Graphemes(**config.characters), config
+        characters = Graphemes()
+        new_config = replace(config, characters=characters.to_config())
+        return characters, new_config
 
 
 if __name__ == "__main__":
     gr = Graphemes()
     ph = IPAPhonemes()
-
-    print(gr.vocab)
-    print(ph.vocab)
-
-    print(gr.num_chars)
-    assert "a" == gr.id_to_char(gr.char_to_id("a"))
+    gr.print_log()
+    ph.print_log()
diff --git a/TTS/tts/utils/text/phonemizers/base.py b/TTS/tts/utils/text/phonemizers/base.py
index 249c8bce..08fa8e13 100644
--- a/TTS/tts/utils/text/phonemizers/base.py
+++ b/TTS/tts/utils/text/phonemizers/base.py
@@ -1,6 +1,5 @@
 import abc
-import itertools
-from typing import List, Tuple, Union
+from typing import List, Tuple
 
 from TTS.tts.utils.text.punctuation import Punctuation
 
@@ -8,6 +7,19 @@ from TTS.tts.utils.text.punctuation import Punctuation
 class BasePhonemizer(abc.ABC):
     """Base phonemizer class
 
+    Phonemization follows the following steps:
+        1. Preprocessing:
+            - remove empty lines
+            - remove punctuation
+            - keep track of punctuation marks
+
+        2. Phonemization:
+            - convert text to phonemes
+
+        3. Postprocessing:
+            - join phonemes
+            - restore punctuation marks
+
     Args:
         language (str):
             Language used by the phonemizer.
@@ -51,40 +63,30 @@ class BasePhonemizer(abc.ABC):
     @abc.abstractmethod
     def name():
         """The name of the backend"""
+        ...
 
     @classmethod
     @abc.abstractmethod
     def is_available(cls):
         """Returns True if the backend is installed, False otherwise"""
+        ...
 
     @classmethod
     @abc.abstractmethod
     def version(cls):
         """Return the backend version as a tuple (major, minor, patch)"""
+        ...
 
+    @staticmethod
     @abc.abstractmethod
     def supported_languages():
         """Return a dict of language codes -> name supported by the backend"""
+        ...
 
     def is_supported_language(self, language):
         """Returns True if `language` is supported by the backend"""
         return language in self.supported_languages()
 
-    fr"""
-        Phonemization follows the following steps:
-            1. Preprocessing:
-                - remove empty lines
-                - remove punctuation
-                - keep track of punctuation marks
-
-            2. Phonemization:
-                - convert text to phonemes
-
-            3. Postprocessing:
-                - join phonemes
-                - restore punctuation marks
-    """
-
     @abc.abstractmethod
     def _phonemize(self, text, separator):
         """The main phonemization method"""
diff --git a/TTS/tts/utils/text/phonemizers/espeak_wrapper.py b/TTS/tts/utils/text/phonemizers/espeak_wrapper.py
index f1d0b6cd..3cccee41 100644
--- a/TTS/tts/utils/text/phonemizers/espeak_wrapper.py
+++ b/TTS/tts/utils/text/phonemizers/espeak_wrapper.py
@@ -28,29 +28,30 @@ def _espeak_exe(espeak_lib: str, args: List, sync=False) -> List[str]:
         "1",  # UTF8 text encoding
     ]
     cmd.extend(args)
-    logging.debug("espeakng: executing %s" % repr(cmd))
-    p = subprocess.Popen(
+    logging.debug("espeakng: executing %s", repr(cmd))
+
+    with subprocess.Popen(
         cmd,
         stdout=subprocess.PIPE,
         stderr=subprocess.STDOUT,
-    )
-    res = iter(p.stdout.readline, b"")
-    if not sync:
+    ) as p:
+        res = iter(p.stdout.readline, b"")
+        if not sync:
+            p.stdout.close()
+            if p.stderr:
+                p.stderr.close()
+            if p.stdin:
+                p.stdin.close()
+            return res
+        res2 = []
+        for line in res:
+            res2.append(line)
         p.stdout.close()
         if p.stderr:
             p.stderr.close()
         if p.stdin:
             p.stdin.close()
-        return res
-    res2 = []
-    for line in res:
-        res2.append(line)
-    p.stdout.close()
-    if p.stderr:
-        p.stderr.close()
-    if p.stdin:
-        p.stdin.close()
-    p.wait()
+        p.wait()
     return res2
 
 
@@ -85,7 +86,24 @@ class ESpeak(BasePhonemizer):
     def __init__(self, language: str, backend=None, punctuations=Punctuation.default_puncs(), keep_puncs=True):
         if self._ESPEAK_LIB is None:
             raise Exception("Unknown backend: %s" % backend)
+
+        # band-aid for backwards compatibility
+        if language == "en":
+            language = "en-us"
+
         super().__init__(language, punctuations=punctuations, keep_puncs=keep_puncs)
+        if backend is not None:
+            self.backend = backend
+
+    @property
+    def backend(self):
+        return self._ESPEAK_LIB
+
+    @backend.setter
+    def backend(self, backend):
+        if backend not in ["espeak", "espeak-ng"]:
+            raise Exception("Unknown backend: %s" % backend)
+        self._ESPEAK_LIB = backend
 
     def auto_set_espeak_lib(self) -> None:
         if is_tool("espeak-ng"):
@@ -115,24 +133,25 @@ class ESpeak(BasePhonemizer):
         # espeak and espeak-ng parses `ipa` differently
         if tie:
             # use '͡' between phonemes
-            if _DEF_ESPEAK_LIB == "espeak":
+            if self.backend == "espeak":
                 args.append("--ipa=1")
             else:
                 args.append("--ipa=3")
         else:
             # split with '_'
-            if _DEF_ESPEAK_LIB == "espeak":
+            if self.backend == "espeak":
                 args.append("--ipa=3")
             else:
                 args.append("--ipa=1")
         if tie:
             args.append("--tie=%s" % tie)
+
         args.append('"' + text + '"')
         # compute phonemes
         phonemes = ""
         for line in _espeak_exe(self._ESPEAK_LIB, args, sync=True):
-            logging.debug("line: %s" % repr(line))
-            phonemes += line.decode("utf8").strip()
+            logging.debug("line: %s", repr(line))
+            phonemes += line.decode("utf8").strip()[2:]  # skip two redundant characters
         return phonemes.replace("_", separator)
 
     def _phonemize(self, text, separator=None):
@@ -146,7 +165,7 @@ class ESpeak(BasePhonemizer):
             Dict: Dictionary of language codes.
         """
         if _DEF_ESPEAK_LIB is None:
-            raise {}
+            return {}
         args = ["--voices"]
         langs = {}
         count = 0
@@ -157,7 +176,7 @@ class ESpeak(BasePhonemizer):
                 lang_code = cols[1]
                 lang_name = cols[3]
                 langs[lang_code] = lang_name
-            logging.debug("line: %s" % repr(line))
+            logging.debug("line: %s", repr(line))
             count += 1
         return langs
 
@@ -168,9 +187,9 @@ class ESpeak(BasePhonemizer):
             str: Version of the used backend.
         """
         args = ["--version"]
-        for line in _espeak_exe(_DEF_ESPEAK_LIB, args, sync=True):
+        for line in _espeak_exe(self.backend, args, sync=True):
             version = line.decode("utf8").strip().split()[2]
-            logging.debug("line: %s" % repr(line))
+            logging.debug("line: %s", repr(line))
             return version
 
     @classmethod
diff --git a/TTS/tts/utils/text/phonemizers/gruut_wrapper.py b/TTS/tts/utils/text/phonemizers/gruut_wrapper.py
index d0aa469e..f3e9c9ab 100644
--- a/TTS/tts/utils/text/phonemizers/gruut_wrapper.py
+++ b/TTS/tts/utils/text/phonemizers/gruut_wrapper.py
@@ -1,5 +1,4 @@
 import importlib
-from os import stat
 from typing import List
 
 import gruut
@@ -55,7 +54,7 @@ class Gruut(BasePhonemizer):
     def name():
         return "gruut"
 
-    def phonemize_gruut(self, text: str, separator: str = "|", tie=False) -> str:
+    def phonemize_gruut(self, text: str, separator: str = "|", tie=False) -> str:  # pylint: disable=unused-argument
         """Convert input text to phonemes.
 
         Gruut phonemizes the given `str` by seperating each phoneme character with `separator`, even for characters
diff --git a/TTS/tts/utils/text/phonemizers/ja_jp_phonemizer.py b/TTS/tts/utils/text/phonemizers/ja_jp_phonemizer.py
index 4f93edeb..60b965f9 100644
--- a/TTS/tts/utils/text/phonemizers/ja_jp_phonemizer.py
+++ b/TTS/tts/utils/text/phonemizers/ja_jp_phonemizer.py
@@ -30,7 +30,7 @@ class JA_JP_Phonemizer(BasePhonemizer):
 
     language = "ja-jp"
 
-    def __init__(self, punctuations=_DEF_JA_PUNCS, keep_puncs=True, **kwargs):
+    def __init__(self, punctuations=_DEF_JA_PUNCS, keep_puncs=True, **kwargs):  # pylint: disable=unused-argument
         super().__init__(self.language, punctuations=punctuations, keep_puncs=keep_puncs)
 
     @staticmethod
@@ -61,12 +61,12 @@ class JA_JP_Phonemizer(BasePhonemizer):
         return True
 
 
-if __name__ == "__main__":
-    text = "これは、電話をかけるための私の日本語の例のテキストです。"
-    e = JA_JP_Phonemizer()
-    print(e.supported_languages())
-    print(e.version())
-    print(e.language)
-    print(e.name())
-    print(e.is_available())
-    print("`" + e.phonemize(text) + "`")
+# if __name__ == "__main__":
+#     text = "これは、電話をかけるための私の日本語の例のテキストです。"
+#     e = JA_JP_Phonemizer()
+#     print(e.supported_languages())
+#     print(e.version())
+#     print(e.language)
+#     print(e.name())
+#     print(e.is_available())
+#     print("`" + e.phonemize(text) + "`")
diff --git a/TTS/tts/utils/text/phonemizers/multi_phonemizer.py b/TTS/tts/utils/text/phonemizers/multi_phonemizer.py
index e8b2ce34..e36b0a2a 100644
--- a/TTS/tts/utils/text/phonemizers/multi_phonemizer.py
+++ b/TTS/tts/utils/text/phonemizers/multi_phonemizer.py
@@ -17,7 +17,7 @@ class MultiPhonemizer:
     lang_to_phonemizer_name = DEF_LANG_TO_PHONEMIZER
     language = "multi-lingual"
 
-    def __init__(self, custom_lang_to_phonemizer: Dict = {}) -> None:
+    def __init__(self, custom_lang_to_phonemizer: Dict = {}) -> None:  # pylint: disable=dangerous-default-value
         self.lang_to_phonemizer_name.update(custom_lang_to_phonemizer)
         self.lang_to_phonemizer = self.init_phonemizers(self.lang_to_phonemizer_name)
 
@@ -40,16 +40,16 @@ class MultiPhonemizer:
         return list(self.lang_to_phonemizer_name.keys())
 
 
-if __name__ == "__main__":
-    texts = {
-        "tr": "Merhaba, bu Türkçe bit örnek!",
-        "en-us": "Hello, this is English example!",
-        "de": "Hallo, das ist ein Deutches Beipiel!",
-        "zh-cn": "这是中国的例子",
-    }
-    phonemes = {}
-    ph = MultiPhonemizer()
-    for lang, text in texts.items():
-        phoneme = ph.phonemize(text, lang)
-        phonemes[lang] = phoneme
-    print(phonemes)
+# if __name__ == "__main__":
+#     texts = {
+#         "tr": "Merhaba, bu Türkçe bit örnek!",
+#         "en-us": "Hello, this is English example!",
+#         "de": "Hallo, das ist ein Deutches Beipiel!",
+#         "zh-cn": "这是中国的例子",
+#     }
+#     phonemes = {}
+#     ph = MultiPhonemizer()
+#     for lang, text in texts.items():
+#         phoneme = ph.phonemize(text, lang)
+#         phonemes[lang] = phoneme
+#     print(phonemes)
diff --git a/TTS/tts/utils/text/phonemizers/zh_cn_phonemizer.py b/TTS/tts/utils/text/phonemizers/zh_cn_phonemizer.py
index e1bd77c7..5a4a5591 100644
--- a/TTS/tts/utils/text/phonemizers/zh_cn_phonemizer.py
+++ b/TTS/tts/utils/text/phonemizers/zh_cn_phonemizer.py
@@ -25,14 +25,15 @@ class ZH_CN_Phonemizer(BasePhonemizer):
 
     language = "zh-cn"
 
-    def __init__(self, punctuations=_DEF_ZH_PUNCS, keep_puncs=False, **kwargs):
+    def __init__(self, punctuations=_DEF_ZH_PUNCS, keep_puncs=False, **kwargs):  # pylint: disable=unused-argument
         super().__init__(self.language, punctuations=punctuations, keep_puncs=keep_puncs)
 
     @staticmethod
     def name():
         return "zh_cn_phonemizer"
 
-    def phonemize_zh_cn(self, text: str, separator: str = "|") -> str:
+    @staticmethod
+    def phonemize_zh_cn(text: str, separator: str = "|") -> str:
         ph = chinese_text_to_phonemes(text, separator)
         return ph
 
@@ -50,12 +51,12 @@ class ZH_CN_Phonemizer(BasePhonemizer):
         return True
 
 
-if __name__ == "__main__":
-    text = "这是，样本中文。"
-    e = ZH_CN_Phonemizer()
-    print(e.supported_languages())
-    print(e.version())
-    print(e.language)
-    print(e.name())
-    print(e.is_available())
-    print("`" + e.phonemize(text) + "`")
+# if __name__ == "__main__":
+#     text = "这是，样本中文。"
+#     e = ZH_CN_Phonemizer()
+#     print(e.supported_languages())
+#     print(e.version())
+#     print(e.language)
+#     print(e.name())
+#     print(e.is_available())
+#     print("`" + e.phonemize(text) + "`")
diff --git a/TTS/tts/utils/text/punctuation.py b/TTS/tts/utils/text/punctuation.py
index 414ac253..09087d5f 100644
--- a/TTS/tts/utils/text/punctuation.py
+++ b/TTS/tts/utils/text/punctuation.py
@@ -130,7 +130,7 @@ class Punctuation:
         return cls._restore(text, puncs, 0)
 
     @classmethod
-    def _restore(cls, text, puncs, num):
+    def _restore(cls, text, puncs, num):  # pylint: disable=too-many-return-statements
         """Auxiliary method for Punctuation.restore()"""
         if not puncs:
             return text
@@ -159,14 +159,14 @@ class Punctuation:
         return cls._restore([text[0] + current.punc + text[1]] + text[2:], puncs[1:], num)
 
 
-if __name__ == "__main__":
-    punc = Punctuation()
-    text = "This is. This is, example!"
+# if __name__ == "__main__":
+#     punc = Punctuation()
+#     text = "This is. This is, example!"
 
-    print(punc.strip(text))
+#     print(punc.strip(text))
 
-    split_text, puncs = punc.strip_to_restore(text)
-    print(split_text, " ---- ", puncs)
+#     split_text, puncs = punc.strip_to_restore(text)
+#     print(split_text, " ---- ", puncs)
 
-    restored_text = punc.restore(split_text, puncs)
-    print(restored_text)
+#     restored_text = punc.restore(split_text, puncs)
+#     print(restored_text)
diff --git a/TTS/utils/audio.py b/TTS/utils/audio.py
index bdee8615..bfa0e5e1 100644
--- a/TTS/utils/audio.py
+++ b/TTS/utils/audio.py
@@ -383,8 +383,7 @@ class AudioProcessor(object):
     def init_from_config(config: "Coqpit"):
         if "audio" in config:
             return AudioProcessor(**config.audio)
-        else:
-            return AudioProcessor(**config)
+        return AudioProcessor(**config)
 
     ### setting up the parameters ###
     def _build_mel_basis(
diff --git a/TTS/utils/synthesizer.py b/TTS/utils/synthesizer.py
index 2e4f4735..f6a1ae6a 100644
--- a/TTS/utils/synthesizer.py
+++ b/TTS/utils/synthesizer.py
@@ -13,7 +13,6 @@ from TTS.tts.utils.speakers import SpeakerManager
 # pylint: disable=unused-wildcard-import
 # pylint: disable=wildcard-import
 from TTS.tts.utils.synthesis import synthesis, trim_silence
-from TTS.tts.utils.text import TTSTokenizer
 from TTS.utils.audio import AudioProcessor
 from TTS.vocoder.models import setup_model as setup_vocoder_model
 from TTS.vocoder.utils.generic_utils import interpolate_vocoder_input
diff --git a/TTS/vocoder/models/gan.py b/TTS/vocoder/models/gan.py
index e56d1db4..f78d69b8 100644
--- a/TTS/vocoder/models/gan.py
+++ b/TTS/vocoder/models/gan.py
@@ -314,7 +314,7 @@ class GAN(BaseVocoder):
         data_items: List,
         verbose: bool,
         num_gpus: int,
-        rank: int = 0,  # pylint: disable=unused-argument
+        rank: int = None,  # pylint: disable=unused-argument
     ):
         """Initiate and return the GAN dataloader.