Merge pull request #19 from idiap/toml

Move from setup.py to pyproject.toml, simplify requirements
2024-05-27 08:59:09 +01:00 · 2024-05-27 08:59:09 +01:00 · df088e99df
parent 642cbd472f 602325021b
commit df088e99df
78 changed files with 445 additions and 374 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -6,4 +6,4 @@ TTS.egg-info/
 tests/outputs/*
 tests/train_outputs/*
 __pycache__/
-*.pyc
+*.pyc
--- a/.github/stale.yml
+++ b/.github/stale.yml
@ -15,4 +15,3 @@ markComment: >
  for your contributions. You might also look our discussion channels.
 # Comment to post when closing a stale issue. Set to `false` to disable
 closeComment: false
-
--- a/.github/workflows/docker.yaml
+++ b/.github/workflows/docker.yaml
@ -42,7 +42,7 @@ jobs:
            branch=${github_ref#*refs/heads/} # strip prefix to get branch name
            tags="${base}:${branch},${base}:${{ github.sha }},"
          elif [[ "${{ startsWith(github.ref, 'refs/tags/') }}" = "true" ]]; then
-            VERSION="v$(cat TTS/VERSION)"
+            VERSION="v$(grep -m 1 version pyproject.toml | grep -P '\d+\.\d+\.\d+' -o)"
            if [[ "${{ github.ref }}" != "refs/tags/${VERSION}" ]]; then
              echo "Pushed tag does not match VERSION file. Aborting push."
              exit 1
@ -63,3 +63,58 @@ jobs:
          push: ${{ github.event_name == 'push' }}
          build-args: "BASE=${{ matrix.base }}"
          tags: ${{ steps.compute-tag.outputs.tags }}
+  docker-dev-build:
+    name: "Build the development Docker image"
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        arch: ["amd64"]
+        base:
+        - "nvidia/cuda:11.8.0-base-ubuntu22.04" # GPU enabled
+    steps:
+      - uses: actions/checkout@v4
+      - name: Log in to the Container registry
+        uses: docker/login-action@v1
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Compute Docker tags, check VERSION file matches tag
+        id: compute-tag
+        run: |
+          set -ex
+          base="ghcr.io/idiap/coqui-tts-dev"
+          tags="" # PR build
+
+          if [[ ${{ matrix.base }} = "python:3.10.8-slim" ]]; then
+            base="ghcr.io/idiap/coqui-tts-dev-cpu"
+          fi
+
+          if [[ "${{ startsWith(github.ref, 'refs/heads/') }}" = "true" ]]; then
+            # Push to branch
+            github_ref="${{ github.ref }}"
+            branch=${github_ref#*refs/heads/} # strip prefix to get branch name
+            tags="${base}:${branch},${base}:${{ github.sha }},"
+          elif [[ "${{ startsWith(github.ref, 'refs/tags/') }}" = "true" ]]; then
+            VERSION="v$(grep -m 1 version pyproject.toml | grep -P '\d+\.\d+\.\d+' -o)"
+            if [[ "${{ github.ref }}" != "refs/tags/${VERSION}" ]]; then
+              echo "Pushed tag does not match VERSION file. Aborting push."
+              exit 1
+            fi
+            tags="${base}:${VERSION},${base}:latest,${base}:${{ github.sha }}"
+          fi
+          echo "::set-output name=tags::${tags}"
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v1
+      - name: Set up Docker Buildx
+        id: buildx
+        uses: docker/setup-buildx-action@v1
+      - name: Build and push
+        uses: docker/build-push-action@v2
+        with:
+          context: .
+          file: dockerfiles/Dockerfile.dev
+          platforms: linux/${{ matrix.arch }}
+          push: false
+          build-args: "BASE=${{ matrix.base }}"
+          tags: ${{ steps.compute-tag.outputs.tags }}
--- a/.github/workflows/pypi-release.yml
+++ b/.github/workflows/pypi-release.yml
@ -14,7 +14,7 @@ jobs:
      - name: Verify tag matches version
        run: |
          set -ex
-          version=$(cat TTS/VERSION)
+          version=$(grep -m 1 version pyproject.toml | grep -P '\d+\.\d+\.\d+' -o)
          tag="${GITHUB_REF/refs\/tags\/}"
          if [[ "v$version" != "$tag" ]]; then
            exit 1
@ -42,10 +42,9 @@ jobs:
      - uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}
-      - name: Install pip requirements
+      - name: Install build requirements
        run: |
-          python -m pip install -U pip setuptools wheel build
-          python -m pip install -r requirements.txt
+          python -m pip install -U pip setuptools wheel build numpy cython
      - name: Setup and install manylinux1_x86_64 wheel
        run: |
          python setup.py bdist_wheel --plat-name=manylinux1_x86_64
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@ -45,7 +45,7 @@ jobs:
          sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json
      - name: Install TTS
        run: |
-          python3 -m uv pip install --system "coqui-tts[dev,server,ja] @ ."
+          python3 -m uv pip install --system "coqui-tts[dev,server,languages] @ ."
          python3 setup.py egg_info
      - name: Unit tests
        run: make ${{ matrix.subset }}
--- a/.gitignore
+++ b/.gitignore
@ -169,4 +169,4 @@ wandb
 depot/*
 coqui_recipes/*
 local_scripts/*
-coqui_demos/*
+coqui_demos/*
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -3,10 +3,8 @@ repos:
    rev: v4.5.0
    hooks:
      - id: check-yaml
-      # TODO: enable these later; there are plenty of violating
-      #       files that need to be fixed first
-      # - id: end-of-file-fixer
-      # - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: trailing-whitespace
  - repo: "https://github.com/psf/black"
    rev: 24.2.0
    hooks:
@ -17,3 +15,10 @@ repos:
    hooks:
      - id: ruff
        args: [--fix, --exit-non-zero-on-fix]
+  - repo: local
+    hooks:
+      - id: generate_requirements.py
+        name: generate_requirements.py
+        language: system
+        entry: python scripts/generate_requirements.py
+        files: "pyproject.toml|requirements.*\\.txt|tools/generate_requirements.py"
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@ -14,8 +14,9 @@ build:
 # Optionally set the version of Python and requirements required to build your docs
 python:
  install:
-    - requirements: docs/requirements.txt
-    - requirements: requirements.txt
+    - path: .
+      extra_requirements:
+        - docs

 # Build documentation in the docs/ directory with Sphinx
 sphinx:
--- a/CITATION.cff
+++ b/CITATION.cff
@ -17,4 +17,4 @@ keywords:
  - deep learning
  - artificial intelligence
  - text to speech
-  - TTS
+  - TTS
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@ -119,11 +119,11 @@ This Code of Conduct is adapted from the [Contributor Covenant][homepage],
 version 2.0, available at
 [https://www.contributor-covenant.org/version/2/0/code_of_conduct.html][v2.0].

-Community Impact Guidelines were inspired by 
+Community Impact Guidelines were inspired by
 [Mozilla's code of conduct enforcement ladder][Mozilla CoC].

 For answers to common questions about this code of conduct, see the FAQ at
-[https://www.contributor-covenant.org/faq][FAQ]. Translations are available 
+[https://www.contributor-covenant.org/faq][FAQ]. Translations are available
 at [https://www.contributor-covenant.org/translations][translations].

 [homepage]: https://www.contributor-covenant.org
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -60,7 +60,7 @@ The following steps are tested on an Ubuntu system.

    ```bash
    $ make system-deps  # intended to be used on Ubuntu (Debian). Let us know if you have a different OS.
-    $ make install
+    $ make install_dev
    ```

 4. Create a new branch with an informative name for your goal.
--- a/1
+++ b/1
@ -3,6 +3,7 @@ FROM ${BASE}

 RUN apt-get update && apt-get upgrade -y
 RUN apt-get install -y --no-install-recommends gcc g++ make python3 python3-dev python3-pip python3-venv python3-wheel espeak-ng libsndfile1-dev && rm -rf /var/lib/apt/lists/*
+RUN pip3 install -U pip setuptools
 RUN pip3 install llvmlite --ignore-installed

 # Install Dependencies:
--- a/LICENSE.txt
+++ b/LICENSE.txt
@ -35,7 +35,7 @@ Mozilla Public License Version 2.0
    means any form of the work other than Source Code Form.

 1.7. "Larger Work"
-    means a work that combines Covered Software with other material, in 
+    means a work that combines Covered Software with other material, in
    a separate file or files, that is not Covered Software.

 1.8. "License"
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -1,9 +1,6 @@
 include README.md
 include LICENSE.txt
-include requirements.*.txt
 include *.cff
-include requirements.txt
-include TTS/VERSION
 recursive-include TTS *.json
 recursive-include TTS *.html
 recursive-include TTS *.png
@ -11,5 +8,3 @@ recursive-include TTS *.md
 recursive-include TTS *.py
 recursive-include TTS *.pyx
 recursive-include images *.png
-recursive-exclude tests *
-prune tests*
--- a/17
+++ b/17
@ -1,5 +1,5 @@
 .DEFAULT_GOAL := help
-.PHONY: test system-deps dev-deps deps style lint install help docs
+.PHONY: test system-deps dev-deps style lint install install_dev help docs

 help:
 	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
@ -62,20 +62,15 @@ system-deps:	## install linux system deps
 dev-deps:  ## install development deps
 	pip install -r requirements.dev.txt

-doc-deps:  ## install docs dependencies
-	pip install -r docs/requirements.txt
-
 build-docs: ## build the docs
 	cd docs && make clean && make build

-hub-deps:  ## install deps for torch hub use
-	pip install -r requirements.hub.txt
-
-deps:	## install 🐸 requirements.
-	pip install -r requirements.txt
-
-install:	## install 🐸 TTS for development.
+install:	## install 🐸 TTS
 	pip install -e .[all]

+install_dev:	## install 🐸 TTS for development.
+	pip install -e .[all,dev]
+	pre-commit install
+
 docs:	## build the docs
 	$(MAKE) -C docs clean && $(MAKE) -C docs html
--- a/README.md
+++ b/README.md
@ -143,9 +143,35 @@ If you plan to code or train models, clone 🐸TTS and install it locally.

 ```bash
 git clone https://github.com/idiap/coqui-ai-TTS
-pip install -e .[all,dev,notebooks,server]  # Select the relevant extras
+pip install -e .
 ```

+### Optional dependencies
+
+The following extras allow the installation of optional dependencies:
+
+| Name | Description |
+|------|-------------|
+| `all` | All optional dependencies, except `dev` and `docs` |
+| `dev` | Development dependencies |
+| `dev` | Dependencies for building the documentation |
+| `notebooks` | Dependencies only used in notebooks |
+| `server` | Dependencies to run the TTS server |
+| `bn` | Bangla G2P |
+| `ja` | Japanese G2P |
+| `ko` | Korean G2P |
+| `zh` | Chinese G2P |
+| `languages` | All language-specific dependencies |
+
+You can install extras with one of the following commands:
+
+```bash
+pip install coqui-tts[server,ja]
+pip install -e .[server,ja]
+```
+
+### Platforms
+
 If you are on Ubuntu (Debian), you can also run following commands for installation.

 ```bash
--- a/TTS/VERSION
+++ b/TTS/VERSION
@ -1 +0,0 @@
-0.23.1
--- a/TTS/init.py
+++ b/TTS/init.py
@ -1,6 +0,0 @@
-import os
-
-with open(os.path.join(os.path.dirname(__file__), "VERSION"), "r", encoding="utf-8") as f:
-    version = f.read().strip()
-
-__version__ = version
--- a/TTS/demos/xtts_ft_demo/requirements.txt
+++ b/TTS/demos/xtts_ft_demo/requirements.txt
@ -1,2 +1,2 @@
 faster_whisper==0.9.0
-gradio==4.7.1
+gradio==4.7.1
--- a/TTS/server/templates/details.html
+++ b/TTS/server/templates/details.html
@ -128,4 +128,4 @@

 </body>

-</html>
+</html>
--- a/TTS/tts/layers/glow_tts/glow.py
+++ b/TTS/tts/layers/glow_tts/glow.py
@ -1,5 +1,4 @@
 import torch
-from packaging.version import Version
 from torch import nn
 from torch.nn import functional as F

@ -90,10 +89,7 @@ class InvConvNear(nn.Module):
        self.no_jacobian = no_jacobian
        self.weight_inv = None

-        if Version(torch.__version__) < Version("1.9"):
-            w_init = torch.qr(torch.FloatTensor(self.num_splits, self.num_splits).normal_())[0]
-        else:
-            w_init = torch.linalg.qr(torch.FloatTensor(self.num_splits, self.num_splits).normal_(), "complete")[0]
+        w_init = torch.linalg.qr(torch.FloatTensor(self.num_splits, self.num_splits).normal_(), "complete")[0]

        if torch.det(w_init) < 0:
            w_init[:, 0] = -1 * w_init[:, 0]
--- a/TTS/tts/layers/xtts/perceiver_encoder.py
+++ b/TTS/tts/layers/xtts/perceiver_encoder.py
@ -7,7 +7,6 @@ import torch
 import torch.nn.functional as F
 from einops import rearrange, repeat
 from einops.layers.torch import Rearrange
-from packaging import version
 from torch import einsum, nn


@ -44,9 +43,6 @@ class Attend(nn.Module):
        self.register_buffer("mask", None, persistent=False)

        self.use_flash = use_flash
-        assert not (
-            use_flash and version.parse(torch.__version__) < version.parse("2.0.0")
-        ), "in order to use flash attention, you must be using pytorch 2.0 or above"

        # determine efficient attention configs for cuda and cpu
        self.config = namedtuple("EfficientAttentionConfig", ["enable_flash", "enable_math", "enable_mem_efficient"])
--- a/TTS/tts/layers/xtts/tokenizer.py
+++ b/TTS/tts/layers/xtts/tokenizer.py
@ -4,10 +4,7 @@ import re
 import textwrap
 from functools import cached_property

-import pypinyin
 import torch
-from hangul_romanize import Transliter
-from hangul_romanize.rule import academic
 from num2words import num2words
 from spacy.lang.ar import Arabic
 from spacy.lang.en import English
@ -577,6 +574,10 @@ def basic_cleaners(text):


 def chinese_transliterate(text):
+    try:
+        import pypinyin
+    except ImportError as e:
+        raise ImportError("Chinese requires: pypinyin") from e
    return "".join(
        [p[0] for p in pypinyin.pinyin(text, style=pypinyin.Style.TONE3, heteronym=False, neutral_tone_with_five=True)]
    )
@ -589,6 +590,11 @@ def japanese_cleaners(text, katsu):


 def korean_transliterate(text):
+    try:
+        from hangul_romanize import Transliter
+        from hangul_romanize.rule import academic
+    except ImportError as e:
+        raise ImportError("Korean requires: hangul_romanize") from e
    r = Transliter(academic)
    return r.translit(text)

--- a/TTS/tts/utils/assets/tortoise/tokenizer.json
+++ b/TTS/tts/utils/assets/tortoise/tokenizer.json
@ -1 +1 @@
-{"version":"1.0","truncation":null,"padding":null,"added_tokens":[{"id":0,"special":true,"content":"[STOP]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":1,"special":true,"content":"[UNK]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":2,"special":true,"content":"[SPACE]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false}],"normalizer":null,"pre_tokenizer":{"type":"Whitespace"},"post_processor":null,"decoder":null,"model":{"type":"BPE","dropout":null,"unk_token":"[UNK]","continuing_subword_prefix":null,"end_of_word_suffix":null,"fuse_unk":false,"vocab":{"[STOP]":0,"[UNK]":1,"[SPACE]":2,"!":3,"'":4,"(":5,")":6,",":7,"-":8,".":9,"/":10,":":11,";":12,"?":13,"a":14,"b":15,"c":16,"d":17,"e":18,"f":19,"g":20,"h":21,"i":22,"j":23,"k":24,"l":25,"m":26,"n":27,"o":28,"p":29,"q":30,"r":31,"s":32,"t":33,"u":34,"v":35,"w":36,"x":37,"y":38,"z":39,"th":40,"in":41,"the":42,"an":43,"er":44,"ou":45,"re":46,"on":47,"at":48,"ed":49,"en":50,"to":51,"ing":52,"and":53,"is":54,"as":55,"al":56,"or":57,"of":58,"ar":59,"it":60,"es":61,"he":62,"st":63,"le":64,"om":65,"se":66,"be":67,"ad":68,"ow":69,"ly":70,"ch":71,"wh":72,"that":73,"you":74,"li":75,"ve":76,"ac":77,"ti":78,"ld":79,"me":80,"was":81,"gh":82,"id":83,"ll":84,"wi":85,"ent":86,"for":87,"ay":88,"ro":89,"ver":90,"ic":91,"her":92,"ke":93,"his":94,"no":95,"ut":96,"un":97,"ir":98,"lo":99,"we":100,"ri":101,"ha":102,"with":103,"ght":104,"out":105,"im":106,"ion":107,"all":108,"ab":109,"one":110,"ne":111,"ge":112,"ould":113,"ter":114,"mo":115,"had":116,"ce":117,"she":118,"go":119,"sh":120,"ur":121,"am":122,"so":123,"pe":124,"my":125,"de":126,"are":127,"but":128,"ome":129,"fr":130,"ther":131,"fe":132,"su":133,"do":134,"con":135,"te":136,"ain":137,"ere":138,"po":139,"if":140,"they":141,"us":142,"ag":143,"tr":144,"now":145,"oun":146,"this":147,"have":148,"not":149,"sa":150,"il":151,"up":152,"thing":153,"from":154,"ap":155,"him":156,"ack":157,"ation":158,"ant":159,"our":160,"op":161,"like":162,"ust":163,"ess":164,"bo":165,"ok":166,"ul":167,"ind":168,"ex":169,"com":170,"some":171,"there":172,"ers":173,"co":174,"res":175,"man":176,"ard":177,"pl":178,"wor":179,"way":180,"tion":181,"fo":182,"ca":183,"were":184,"by":185,"ate":186,"pro":187,"ted":188,"ound":189,"own":190,"would":191,"ts":192,"what":193,"qu":194,"ally":195,"ight":196,"ck":197,"gr":198,"when":199,"ven":200,"can":201,"ough":202,"ine":203,"end":204,"per":205,"ous":206,"od":207,"ide":208,"know":209,"ty":210,"very":211,"si":212,"ak":213,"who":214,"about":215,"ill":216,"them":217,"est":218,"red":219,"ye":220,"could":221,"ong":222,"your":223,"their":224,"em":225,"just":226,"other":227,"into":228,"any":229,"whi":230,"um":231,"tw":232,"ast":233,"der":234,"did":235,"ie":236,"been":237,"ace":238,"ink":239,"ity":240,"back":241,"ting":242,"br":243,"more":244,"ake":245,"pp":246,"then":247,"sp":248,"el":249,"use":250,"bl":251,"said":252,"over":253,"get":254},"merges":["t h","i n","th e","a n","e r","o u","r e","o n","a t","e d","e n","t o","in g","an d","i s","a s","a l","o r","o f","a r","i t","e s","h e","s t","l e","o m","s e","b e","a d","o w","l y","c h","w h","th at","y ou","l i","v e","a c","t i","l d","m e","w as","g h","i d","l l","w i","en t","f or","a y","r o","v er","i c","h er","k e","h is","n o","u t","u n","i r","l o","w e","r i","h a","wi th","gh t","ou t","i m","i on","al l","a b","on e","n e","g e","ou ld","t er","m o","h ad","c e","s he","g o","s h","u r","a m","s o","p e","m y","d e","a re","b ut","om e","f r","the r","f e","s u","d o","c on","t e","a in","er e","p o","i f","the y","u s","a g","t r","n ow","ou n","th is","ha ve","no t","s a","i l","u p","th ing","fr om","a p","h im","ac k","at ion","an t","ou r","o p","li ke","u st","es s","b o","o k","u l","in d","e x","c om","s ome","the re","er s","c o","re s","m an","ar d","p l","w or","w ay","ti on","f o","c a","w ere","b y","at e","p ro","t ed","oun d","ow n","w ould","t s","wh at","q u","al ly","i ght","c k","g r","wh en","v en","c an","ou gh","in e","en d","p er","ou s","o d","id e","k now","t y","ver y","s i","a k","wh o","ab out","i ll","the m","es t","re d","y e","c ould","on g","you r","the ir","e m","j ust","o ther","in to","an y","wh i","u m","t w","as t","d er","d id","i e","be en","ac e","in k","it y","b ack","t ing","b r","mo re","a ke","p p","the n","s p","e l","u se","b l","sa id","o ver","ge t"]}}
+{"version":"1.0","truncation":null,"padding":null,"added_tokens":[{"id":0,"special":true,"content":"[STOP]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":1,"special":true,"content":"[UNK]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":2,"special":true,"content":"[SPACE]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false}],"normalizer":null,"pre_tokenizer":{"type":"Whitespace"},"post_processor":null,"decoder":null,"model":{"type":"BPE","dropout":null,"unk_token":"[UNK]","continuing_subword_prefix":null,"end_of_word_suffix":null,"fuse_unk":false,"vocab":{"[STOP]":0,"[UNK]":1,"[SPACE]":2,"!":3,"'":4,"(":5,")":6,",":7,"-":8,".":9,"/":10,":":11,";":12,"?":13,"a":14,"b":15,"c":16,"d":17,"e":18,"f":19,"g":20,"h":21,"i":22,"j":23,"k":24,"l":25,"m":26,"n":27,"o":28,"p":29,"q":30,"r":31,"s":32,"t":33,"u":34,"v":35,"w":36,"x":37,"y":38,"z":39,"th":40,"in":41,"the":42,"an":43,"er":44,"ou":45,"re":46,"on":47,"at":48,"ed":49,"en":50,"to":51,"ing":52,"and":53,"is":54,"as":55,"al":56,"or":57,"of":58,"ar":59,"it":60,"es":61,"he":62,"st":63,"le":64,"om":65,"se":66,"be":67,"ad":68,"ow":69,"ly":70,"ch":71,"wh":72,"that":73,"you":74,"li":75,"ve":76,"ac":77,"ti":78,"ld":79,"me":80,"was":81,"gh":82,"id":83,"ll":84,"wi":85,"ent":86,"for":87,"ay":88,"ro":89,"ver":90,"ic":91,"her":92,"ke":93,"his":94,"no":95,"ut":96,"un":97,"ir":98,"lo":99,"we":100,"ri":101,"ha":102,"with":103,"ght":104,"out":105,"im":106,"ion":107,"all":108,"ab":109,"one":110,"ne":111,"ge":112,"ould":113,"ter":114,"mo":115,"had":116,"ce":117,"she":118,"go":119,"sh":120,"ur":121,"am":122,"so":123,"pe":124,"my":125,"de":126,"are":127,"but":128,"ome":129,"fr":130,"ther":131,"fe":132,"su":133,"do":134,"con":135,"te":136,"ain":137,"ere":138,"po":139,"if":140,"they":141,"us":142,"ag":143,"tr":144,"now":145,"oun":146,"this":147,"have":148,"not":149,"sa":150,"il":151,"up":152,"thing":153,"from":154,"ap":155,"him":156,"ack":157,"ation":158,"ant":159,"our":160,"op":161,"like":162,"ust":163,"ess":164,"bo":165,"ok":166,"ul":167,"ind":168,"ex":169,"com":170,"some":171,"there":172,"ers":173,"co":174,"res":175,"man":176,"ard":177,"pl":178,"wor":179,"way":180,"tion":181,"fo":182,"ca":183,"were":184,"by":185,"ate":186,"pro":187,"ted":188,"ound":189,"own":190,"would":191,"ts":192,"what":193,"qu":194,"ally":195,"ight":196,"ck":197,"gr":198,"when":199,"ven":200,"can":201,"ough":202,"ine":203,"end":204,"per":205,"ous":206,"od":207,"ide":208,"know":209,"ty":210,"very":211,"si":212,"ak":213,"who":214,"about":215,"ill":216,"them":217,"est":218,"red":219,"ye":220,"could":221,"ong":222,"your":223,"their":224,"em":225,"just":226,"other":227,"into":228,"any":229,"whi":230,"um":231,"tw":232,"ast":233,"der":234,"did":235,"ie":236,"been":237,"ace":238,"ink":239,"ity":240,"back":241,"ting":242,"br":243,"more":244,"ake":245,"pp":246,"then":247,"sp":248,"el":249,"use":250,"bl":251,"said":252,"over":253,"get":254},"merges":["t h","i n","th e","a n","e r","o u","r e","o n","a t","e d","e n","t o","in g","an d","i s","a s","a l","o r","o f","a r","i t","e s","h e","s t","l e","o m","s e","b e","a d","o w","l y","c h","w h","th at","y ou","l i","v e","a c","t i","l d","m e","w as","g h","i d","l l","w i","en t","f or","a y","r o","v er","i c","h er","k e","h is","n o","u t","u n","i r","l o","w e","r i","h a","wi th","gh t","ou t","i m","i on","al l","a b","on e","n e","g e","ou ld","t er","m o","h ad","c e","s he","g o","s h","u r","a m","s o","p e","m y","d e","a re","b ut","om e","f r","the r","f e","s u","d o","c on","t e","a in","er e","p o","i f","the y","u s","a g","t r","n ow","ou n","th is","ha ve","no t","s a","i l","u p","th ing","fr om","a p","h im","ac k","at ion","an t","ou r","o p","li ke","u st","es s","b o","o k","u l","in d","e x","c om","s ome","the re","er s","c o","re s","m an","ar d","p l","w or","w ay","ti on","f o","c a","w ere","b y","at e","p ro","t ed","oun d","ow n","w ould","t s","wh at","q u","al ly","i ght","c k","g r","wh en","v en","c an","ou gh","in e","en d","p er","ou s","o d","id e","k now","t y","ver y","s i","a k","wh o","ab out","i ll","the m","es t","re d","y e","c ould","on g","you r","the ir","e m","j ust","o ther","in to","an y","wh i","u m","t w","as t","d er","d id","i e","be en","ac e","in k","it y","b ack","t ing","b r","mo re","a ke","p p","the n","s p","e l","u se","b l","sa id","o ver","ge t"]}}
--- a/TTS/tts/utils/monotonic_align/setup.py
+++ b/TTS/tts/utils/monotonic_align/setup.py
@ -1,7 +0,0 @@
-# from distutils.core import setup
-# from Cython.Build import cythonize
-# import numpy
-
-# setup(name='monotonic_align',
-#       ext_modules=cythonize("core.pyx"),
-#       include_dirs=[numpy.get_include()])
--- a/TTS/tts/utils/text/bangla/phonemizer.py
+++ b/TTS/tts/utils/text/bangla/phonemizer.py
@ -1,8 +1,11 @@
 import re

-import bangla
-from bnnumerizer import numerize
-from bnunicodenormalizer import Normalizer
+try:
+    import bangla
+    from bnnumerizer import numerize
+    from bnunicodenormalizer import Normalizer
+except ImportError as e:
+    raise ImportError("Bangla requires: bangla, bnnumerizer, bnunicodenormalizer") from e

 # initialize
 bnorm = Normalizer()
--- a/TTS/tts/utils/text/chinese_mandarin/phonemizer.py
+++ b/TTS/tts/utils/text/chinese_mandarin/phonemizer.py
@ -1,7 +1,10 @@
 from typing import List

-import jieba
-import pypinyin
+try:
+    import jieba
+    import pypinyin
+except ImportError as e:
+    raise ImportError("Chinese requires: jieba, pypinyin") from e

 from .pinyinToPhonemes import PINYIN_DICT

--- a/TTS/tts/utils/text/korean/phonemizer.py
+++ b/TTS/tts/utils/text/korean/phonemizer.py
@ -1,4 +1,7 @@
-from jamo import hangul_to_jamo
+try:
+    from jamo import hangul_to_jamo
+except ImportError as e:
+    raise ImportError("Korean requires: g2pkk, jamo") from e

 from TTS.tts.utils.text.korean.korean import normalize

--- a/TTS/tts/utils/text/phonemizers/init.py
+++ b/TTS/tts/utils/text/phonemizers/init.py
@ -1,17 +1,29 @@
-from TTS.tts.utils.text.phonemizers.bangla_phonemizer import BN_Phonemizer
 from TTS.tts.utils.text.phonemizers.base import BasePhonemizer
 from TTS.tts.utils.text.phonemizers.belarusian_phonemizer import BEL_Phonemizer
 from TTS.tts.utils.text.phonemizers.espeak_wrapper import ESpeak
 from TTS.tts.utils.text.phonemizers.gruut_wrapper import Gruut
-from TTS.tts.utils.text.phonemizers.ko_kr_phonemizer import KO_KR_Phonemizer
-from TTS.tts.utils.text.phonemizers.zh_cn_phonemizer import ZH_CN_Phonemizer
+
+try:
+    from TTS.tts.utils.text.phonemizers.bangla_phonemizer import BN_Phonemizer
+except ImportError:
+    BN_Phonemizer = None

 try:
    from TTS.tts.utils.text.phonemizers.ja_jp_phonemizer import JA_JP_Phonemizer
 except ImportError:
    JA_JP_Phonemizer = None

-PHONEMIZERS = {b.name(): b for b in (ESpeak, Gruut, KO_KR_Phonemizer, BN_Phonemizer)}
+try:
+    from TTS.tts.utils.text.phonemizers.ko_kr_phonemizer import KO_KR_Phonemizer
+except ImportError:
+    KO_KR_Phonemizer = None
+
+try:
+    from TTS.tts.utils.text.phonemizers.zh_cn_phonemizer import ZH_CN_Phonemizer
+except ImportError:
+    ZH_CN_Phonemizer = None
+
+PHONEMIZERS = {b.name(): b for b in (ESpeak, Gruut)}


 ESPEAK_LANGS = list(ESpeak.supported_languages().keys())
@ -32,17 +44,21 @@ DEF_LANG_TO_PHONEMIZER.update(_new_dict)

 # Force default for some languages
 DEF_LANG_TO_PHONEMIZER["en"] = DEF_LANG_TO_PHONEMIZER["en-us"]
-DEF_LANG_TO_PHONEMIZER["zh-cn"] = ZH_CN_Phonemizer.name()
-DEF_LANG_TO_PHONEMIZER["ko-kr"] = KO_KR_Phonemizer.name()
-DEF_LANG_TO_PHONEMIZER["bn"] = BN_Phonemizer.name()
 DEF_LANG_TO_PHONEMIZER["be"] = BEL_Phonemizer.name()


-# JA phonemizer has deal breaking dependencies like MeCab for some systems.
-# So we only have it when we have it.
+if BN_Phonemizer is not None:
+    PHONEMIZERS[BN_Phonemizer.name()] = BN_Phonemizer
+    DEF_LANG_TO_PHONEMIZER["bn"] = BN_Phonemizer.name()
 if JA_JP_Phonemizer is not None:
    PHONEMIZERS[JA_JP_Phonemizer.name()] = JA_JP_Phonemizer
    DEF_LANG_TO_PHONEMIZER["ja-jp"] = JA_JP_Phonemizer.name()
+if KO_KR_Phonemizer is not None:
+    PHONEMIZERS[KO_KR_Phonemizer.name()] = KO_KR_Phonemizer
+    DEF_LANG_TO_PHONEMIZER["ko-kr"] = KO_KR_Phonemizer.name()
+if ZH_CN_Phonemizer is not None:
+    PHONEMIZERS[ZH_CN_Phonemizer.name()] = ZH_CN_Phonemizer
+    DEF_LANG_TO_PHONEMIZER["zh-cn"] = ZH_CN_Phonemizer.name()


 def get_phonemizer_by_name(name: str, **kwargs) -> BasePhonemizer:
@ -60,14 +76,20 @@ def get_phonemizer_by_name(name: str, **kwargs) -> BasePhonemizer:
    if name == "gruut":
        return Gruut(**kwargs)
    if name == "zh_cn_phonemizer":
+        if ZH_CN_Phonemizer is None:
+            raise ValueError("You need to install ZH phonemizer dependencies. Try `pip install coqui-tts[zh]`.")
        return ZH_CN_Phonemizer(**kwargs)
    if name == "ja_jp_phonemizer":
        if JA_JP_Phonemizer is None:
-            raise ValueError(" ❗ You need to install JA phonemizer dependencies. Try `pip install coqui-tts[ja]`.")
+            raise ValueError("You need to install JA phonemizer dependencies. Try `pip install coqui-tts[ja]`.")
        return JA_JP_Phonemizer(**kwargs)
    if name == "ko_kr_phonemizer":
+        if KO_KR_Phonemizer is None:
+            raise ValueError("You need to install KO phonemizer dependencies. Try `pip install coqui-tts[ko]`.")
        return KO_KR_Phonemizer(**kwargs)
    if name == "bn_phonemizer":
+        if BN_Phonemizer is None:
+            raise ValueError("You need to install BN phonemizer dependencies. Try `pip install coqui-tts[bn]`.")
        return BN_Phonemizer(**kwargs)
    if name == "be_phonemizer":
        return BEL_Phonemizer(**kwargs)
--- a/TTS/vc/modules/freevc/wavlm/config.json
+++ b/TTS/vc/modules/freevc/wavlm/config.json
@ -96,4 +96,4 @@
    "transformers_version": "4.15.0.dev0",
    "use_weighted_layer_sum": false,
    "vocab_size": 32
-  }
+  }
--- a/dockerfiles/Dockerfile.dev
+++ b/dockerfiles/Dockerfile.dev
@ -11,34 +11,13 @@ RUN apt-get install -y --no-install-recommends \
    && rm -rf /var/lib/apt/lists/*

 # Install Major Python Dependencies:
+RUN pip3 install -U pip setuptools
 RUN pip3 install llvmlite --ignore-installed
 RUN pip3 install torch torchaudio --extra-index-url https://download.pytorch.org/whl/cu118
 RUN rm -rf /root/.cache/pip

-WORKDIR /root
-
-# Copy Dependency Lock Files:
-COPY \
-    Makefile \
-    pyproject.toml \
-    setup.py \
-    requirements.dev.txt \
-    requirements.ja.txt \
-    requirements.notebooks.txt \
-    requirements.txt \
-    /root/
-
-# Install Project Dependencies
-# Separate stage to limit re-downloading:
-RUN pip install \
-    -r requirements.txt \
-    -r requirements.dev.txt \
-    -r requirements.ja.txt \
-    -r requirements.notebooks.txt
-
 # Copy TTS repository contents:
+WORKDIR /root
 COPY . /root

-# Installing the TTS package itself:
 RUN make install
-
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@ -3,4 +3,4 @@ myst-parser == 2.0.0
 sphinx == 7.2.5
 sphinx_inline_tabs
 sphinx_copybutton
-linkify-it-py
+linkify-it-py
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@ -10,26 +10,24 @@
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #
+import importlib.metadata
 import os
 import sys

-sys.path.insert(0, os.path.abspath('../..'))
+sys.path.insert(0, os.path.abspath("../.."))

 # mock deps with system level requirements.
 autodoc_mock_imports = ["soundfile"]

 # -- Project information -----------------------------------------------------
-project = 'TTS'
+project = "TTS"
 copyright = "2021 Coqui GmbH, 2020 TTS authors"
-author = 'Coqui GmbH'
-
-with open("../../TTS/VERSION", "r") as ver:
-    version = ver.read().strip()
+author = "Coqui GmbH"

 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
 # built documents.
-release = version
+release = importlib.metadata.version(project)

 # The main toctree document.
 master_doc = "index"
@ -40,32 +38,34 @@ master_doc = "index"
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = [
-    'sphinx.ext.autodoc',
-    'sphinx.ext.autosummary',
-    'sphinx.ext.doctest',
-    'sphinx.ext.intersphinx',
-    'sphinx.ext.todo',
-    'sphinx.ext.coverage',
-    'sphinx.ext.napoleon',
-    'sphinx.ext.viewcode',
-    'sphinx.ext.autosectionlabel',
-    'myst_parser',
+    "sphinx.ext.autodoc",
+    "sphinx.ext.autosummary",
+    "sphinx.ext.doctest",
+    "sphinx.ext.intersphinx",
+    "sphinx.ext.todo",
+    "sphinx.ext.coverage",
+    "sphinx.ext.napoleon",
+    "sphinx.ext.viewcode",
+    "sphinx.ext.autosectionlabel",
+    "myst_parser",
    "sphinx_copybutton",
    "sphinx_inline_tabs",
 ]


 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]

 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'TODO/*']
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "TODO/*"]

 source_suffix = [".rst", ".md"]

-myst_enable_extensions = ['linkify',]
+myst_enable_extensions = [
+    "linkify",
+]

 # 'sphinxcontrib.katex',
 # 'sphinx.ext.autosectionlabel',
@ -76,17 +76,17 @@ myst_enable_extensions = ['linkify',]
 # duplicated section names that are in different documents.
 autosectionlabel_prefix_document = True

-language = 'en'
+language = "en"

 autodoc_inherit_docstrings = False

 # Disable displaying type annotations, these can be very verbose
-autodoc_typehints = 'none'
+autodoc_typehints = "none"

 # Enable overriding of function signatures in the first line of the docstring.
 autodoc_docstring_signature = True

-napoleon_custom_sections = [('Shapes', 'shape')]
+napoleon_custom_sections = [("Shapes", "shape")]


 # -- Options for HTML output -------------------------------------------------
@ -94,7 +94,7 @@ napoleon_custom_sections = [('Shapes', 'shape')]
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 #
-html_theme = 'furo'
+html_theme = "furo"
 html_tite = "TTS"
 html_theme_options = {
    "light_logo": "logo.png",
@ -103,18 +103,18 @@ html_theme_options = {
 }

 html_sidebars = {
-        '**': [
-               "sidebar/scroll-start.html",
-    "sidebar/brand.html",
-    "sidebar/search.html",
-    "sidebar/navigation.html",
-    "sidebar/ethical-ads.html",
-    "sidebar/scroll-end.html",
-        ]
-    }
+    "**": [
+        "sidebar/scroll-start.html",
+        "sidebar/brand.html",
+        "sidebar/search.html",
+        "sidebar/navigation.html",
+        "sidebar/ethical-ads.html",
+        "sidebar/scroll-end.html",
+    ]
+}


 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = ["_static"]
--- a/docs/source/docker_images.md
+++ b/docs/source/docker_images.md
@ -53,4 +53,4 @@ python3 TTS/server/server.py --list_models #To get the list of available models
 python3 TTS/server/server.py --model_name tts_models/en/vctk/vits --use_cuda true
 ```

-Click [there](http://[::1]:5002/) and have fun with the server!
+Click [there](http://[::1]:5002/) and have fun with the server!
--- a/docs/source/finetuning.md
+++ b/docs/source/finetuning.md
@ -111,4 +111,3 @@ them and fine-tune it for your own dataset. This will help you in two main ways:
        --coqpit.run_name "glow-tts-finetune" \
        --coqpit.lr 0.00001
    ```
-
--- a/docs/source/installation.md
+++ b/docs/source/installation.md
@ -26,7 +26,12 @@ This is recommended for development and more control over 🐸TTS.
 git clone https://github.com/idiap/coqui-ai-TTS
 cd coqui-ai-TTS
 make system-deps  # only on Linux systems.
+
+# Install package and optional extras
 make install
+
+# Same as above + dev dependencies and pre-commit
+make install_dev
 ```

 ## On Windows
--- a/docs/source/main_classes/audio_processor.md
+++ b/docs/source/main_classes/audio_processor.md
@ -22,4 +22,4 @@ also must inherit or initiate `BaseAudioConfig`.
 ```{eval-rst}
 .. autoclass:: TTS.config.shared_configs.BaseAudioConfig
    :members:
-```
+```
--- a/docs/source/main_classes/dataset.md
+++ b/docs/source/main_classes/dataset.md
@ -22,4 +22,4 @@
 ```{eval-rst}
 .. autoclass:: TTS.vocoder.datasets.wavernn_dataset.WaveRNNDataset
    :members:
-```
+```
--- a/docs/source/main_classes/gan.md
+++ b/docs/source/main_classes/gan.md
@ -9,4 +9,4 @@ to do its ✨️.
 ```{eval-rst}
 .. autoclass:: TTS.vocoder.models.gan.GAN
    :members:
-```
+```
--- a/docs/source/main_classes/model_api.md
+++ b/docs/source/main_classes/model_api.md
@ -21,4 +21,4 @@ Model API provides you a set of functions that easily make your model compatible
 ```{eval-rst}
 .. autoclass:: TTS.vocoder.models.base_vocoder.BaseVocoder
    :members:
-```
+```
--- a/docs/source/main_classes/speaker_manager.md
+++ b/docs/source/main_classes/speaker_manager.md
@ -8,4 +8,4 @@ especially useful for multi-speaker models.
 ```{eval-rst}
 .. automodule:: TTS.tts.utils.speakers
    :members:
-```
+```
--- a/docs/source/models/forward_tts.md
+++ b/docs/source/models/forward_tts.md
@ -61,5 +61,3 @@ Currently we provide the following pre-configured architectures:
 .. autoclass:: TTS.tts.configs.fast_speech_config.FastSpeechConfig
    :members:
 ```
-
-
--- a/docs/source/models/overflow.md
+++ b/docs/source/models/overflow.md
@ -33,4 +33,4 @@ are available at https://shivammehta25.github.io/OverFlow/.
 ```{eval-rst}
 .. autoclass:: TTS.tts.models.overflow.Overflow
    :members:
-```
+```
--- a/docs/source/models/tacotron1-2.md
+++ b/docs/source/models/tacotron1-2.md
@ -59,5 +59,3 @@ If you have a limited VRAM, then you can try using the Guided Attention Loss or
 .. autoclass:: TTS.tts.configs.tacotron2_config.Tacotron2Config
    :members:
 ```
-
-
--- a/docs/source/what_makes_a_good_dataset.md
+++ b/docs/source/what_makes_a_good_dataset.md
@ -17,4 +17,4 @@ If you like to use a bespoken dataset, you might like to perform a couple of qua
 * **CheckSpectrograms** is to measure the noise level of the clips and find good audio processing parameters. The noise level might be observed by checking spectrograms. If spectrograms look cluttered, especially in silent parts, this dataset might not be a good candidate for a TTS project. If your voice clips are too noisy in the background, it makes things harder for your model to learn the alignment, and the final result might be different than the voice you are given.
 If the spectrograms look good, then the next step is to find a good set of audio processing parameters, defined in ```config.json```. In the notebook, you can compare different sets of parameters and see the resynthesis results in relation to the given ground-truth. Find the best parameters that give the best possible synthesis performance.

-Another practical detail is the quantization level of the clips. If your dataset has a very high bit-rate, that might cause slow data-load time and consequently slow training. It is better to reduce the sample-rate of your dataset to around 16000-22050.
+Another practical detail is the quantization level of the clips. If your dataset has a very high bit-rate, that might cause slow data-load time and consequently slow training. It is better to reduce the sample-rate of your dataset to around 16000-22050.
--- a/hubconf.py
+++ b/hubconf.py
@ -1,15 +1,11 @@
-dependencies = [
-    'torch', 'gdown', 'pysbd', 'gruut', 'anyascii', 'pypinyin', 'coqpit', 'mecab-python3', 'unidic-lite'
-]
+dependencies = ["torch", "gdown", "pysbd", "gruut", "anyascii", "pypinyin", "coqpit", "mecab-python3", "unidic-lite"]
 import torch

 from TTS.utils.manage import ModelManager
 from TTS.utils.synthesizer import Synthesizer


-def tts(model_name='tts_models/en/ljspeech/tacotron2-DCA',
-        vocoder_name=None,
-        use_cuda=False):
+def tts(model_name="tts_models/en/ljspeech/tacotron2-DCA", vocoder_name=None, use_cuda=False):
    """TTS entry point for PyTorch Hub that provides a Synthesizer object to synthesize speech from a give text.

    Example:
@ -28,19 +24,20 @@ def tts(model_name='tts_models/en/ljspeech/tacotron2-DCA',
    manager = ModelManager()

    model_path, config_path, model_item = manager.download_model(model_name)
-    vocoder_name = model_item[
-        'default_vocoder'] if vocoder_name is None else vocoder_name
+    vocoder_name = model_item["default_vocoder"] if vocoder_name is None else vocoder_name
    vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)

    # create synthesizer
-    synt = Synthesizer(tts_checkpoint=model_path,
-                       tts_config_path=config_path,
-                       vocoder_checkpoint=vocoder_path,
-                       vocoder_config=vocoder_config_path,
-                       use_cuda=use_cuda)
+    synt = Synthesizer(
+        tts_checkpoint=model_path,
+        tts_config_path=config_path,
+        vocoder_checkpoint=vocoder_path,
+        vocoder_config=vocoder_config_path,
+        use_cuda=use_cuda,
+    )
    return synt


-if __name__ == '__main__':
-    synthesizer = torch.hub.load('coqui-ai/TTS:dev', 'tts', source='github')
+if __name__ == "__main__":
+    synthesizer = torch.hub.load("coqui-ai/TTS:dev", "tts", source="github")
    synthesizer.tts("This is a test!")
--- a/notebooks/TestAttention.ipynb
+++ b/notebooks/TestAttention.ipynb
@ -185,4 +185,4 @@
    },
    "nbformat": 4,
    "nbformat_minor": 4
-}
+}
--- a/notebooks/dataset_analysis/CheckPitch.ipynb
+++ b/notebooks/dataset_analysis/CheckPitch.ipynb
@ -176,4 +176,4 @@
 },
 "nbformat": 4,
 "nbformat_minor": 2
-}
+}
--- a/notebooks/dataset_analysis/README.md
+++ b/notebooks/dataset_analysis/README.md
@ -2,6 +2,6 @@

 By the use of this notebook, you can easily analyze a brand new dataset, find exceptional cases and define your training set.

-What we are looking in here is reasonable distribution of instances in terms of sequence-length, audio-length and word-coverage. 
+What we are looking in here is reasonable distribution of instances in terms of sequence-length, audio-length and word-coverage.

 This notebook is inspired from https://github.com/MycroftAI/mimic2
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,11 +1,151 @@
 [build-system]
 requires = [
    "setuptools",
-    "wheel",
    "cython~=0.29.30",
-    "numpy>=1.22.0",
-    "packaging",
+    "numpy>=1.24.3",
 ]
+build-backend = "setuptools.build_meta"
+
+[tool.setuptools.packages.find]
+include = ["TTS*"]
+
+[project]
+name = "coqui-tts"
+version = "0.24.0"
+description = "Deep learning for Text to Speech."
+readme = "README.md"
+requires-python = ">=3.9, <3.13"
+license = {text = "MPL-2.0"}
+authors = [
+    {name = "Eren Gölge", email = "egolge@coqui.ai"}
+]
+maintainers = [
+    {name = "Enno Hermann", email = "enno.hermann@gmail.com"}
+]
+classifiers = [
+    "Programming Language :: Python",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Science/Research",
+    "Intended Audience :: Developers",
+    "Operating System :: POSIX :: Linux",
+    "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)",
+    "Topic :: Software Development",
+    "Topic :: Software Development :: Libraries :: Python Modules",
+    "Topic :: Multimedia :: Sound/Audio :: Speech",
+    "Topic :: Multimedia :: Sound/Audio",
+    "Topic :: Multimedia",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+]
+dependencies = [
+    # Core
+    "numpy>=1.24.3",
+    "cython>=0.29.30",
+    "scipy>=1.11.2",
+    "torch>=2.1",
+    "torchaudio",
+    "soundfile>=0.12.0",
+    "librosa>=0.10.1",
+    "inflect>=5.6.0",
+    "tqdm>=4.64.1",
+    "anyascii>=0.3.0",
+    "pyyaml>=6.0",
+    "fsspec[http]>=2023.6.0",
+    "packaging>=23.1",
+    # Inference
+    "pysbd>=0.3.4",
+    # Notebooks
+    "umap-learn>=0.5.1",
+    # Training
+    "matplotlib>=3.7.0",
+    # Coqui stack
+    "coqui-tts-trainer>=0.1",
+    "coqpit>=0.0.16",
+    # Gruut + supported languages
+    "gruut[de,es,fr]==2.2.3",
+    # Tortoise
+    "einops>=0.6.0",
+    "transformers>=4.33.0",
+    # Bark
+    "encodec>=0.1.1",
+    # XTTS
+    "num2words",
+    "spacy[ja]>=3"
+]
+
+[project.optional-dependencies]
+# Development dependencies
+dev = [
+    "black==24.2.0",
+    "coverage[toml]",
+    "nose2",
+    "pre-commit",
+    "ruff==0.3.0",
+    "tomli; python_version < '3.11'",
+]
+# Dependencies for building the documentation
+docs = [
+    "furo",
+    "myst-parser==2.0.0",
+    "sphinx==7.2.5",
+    "sphinx_inline_tabs",
+    "sphinx_copybutton",
+    "linkify-it-py",
+]
+# Only used in notebooks
+notebooks = [
+    "bokeh==1.4.0",
+    "pandas>=1.4,<2.0",
+]
+# For running the TTS server
+server = ["flask>=2.0.1"]
+# Language-specific dependencies, mainly for G2P
+# Bangla
+bn = [
+    "bangla",
+    "bnnumerizer",
+    "bnunicodenormalizer",
+]
+# Korean
+ko = [
+    "hangul_romanize",
+    "jamo",
+    "g2pkk>=0.1.1",
+]
+# Japanese
+ja = [
+    "mecab-python3",
+    "unidic-lite==1.0.8",
+    "cutlet",
+]
+# Chinese
+zh = [
+    "jieba",
+    "pypinyin",
+]
+# All language-specific dependencies
+languages = [
+    "coqui-tts[bn,ja,ko,zh]",
+]
+# Installs all extras (except dev and docs)
+all = [
+    "coqui-tts[notebooks,server,bn,ja,ko,zh]",
+]
+
+[project.urls]
+Homepage = "https://github.com/idiap/coqui-ai-TTS"
+Documentation = "https://coqui-tts.readthedocs.io"
+Repository = "https://github.com/idiap/coqui-ai-TTS"
+Issues = "https://github.com/idiap/coqui-ai-TTS/issues"
+Discussions = "https://github.com/idiap/coqui-ai-TTS/discussions"
+
+[project.scripts]
+tts = "TTS.bin.synthesize:main"
+tts-server = "TTS.server.server:main"

 [tool.ruff]
 target-version = "py39"
@ -32,14 +172,9 @@ lint.extend-select = [
 ]

 lint.ignore = [
-    "E501", # line too long
    "E722", # bare except (TODO: fix these)
    "E731", # don't use lambdas
    "E741", # ambiguous variable name
-    "PLR0912", # too-many-branches
-    "PLR0913", # too-many-arguments
-    "PLR0915", # too-many-statements
-    "UP004", # useless-object-inheritance
    "F821", # TODO: enable
    "F841", # TODO: enable
    "PLW0602", # TODO: enable
--- a/recipes/README.md
+++ b/recipes/README.md
@ -19,4 +19,4 @@ python TTS/bin/resample.py --input_dir recipes/vctk/VCTK/wav48_silence_trimmed -

 If you train a new model using TTS, feel free to share your training to expand the list of recipes.

-You can also open a new discussion and share your progress with the 🐸 community.
+You can also open a new discussion and share your progress with the 🐸 community.
--- a/recipes/blizzard2013/README.md
+++ b/recipes/blizzard2013/README.md
@ -9,4 +9,4 @@ To get a license and download link for this dataset, you need to visit the [webs
 You get access to the raw dataset in a couple of days. There are a few preprocessing steps you need to do to be able to use the high fidelity dataset.

 1. Get the forced time alignments for the blizzard dataset from [here](https://github.com/mueller91/tts_alignments).
-2. Segment the high fidelity audio-book files based on the instructions [here](https://github.com/Tomiinek/Blizzard2013_Segmentation).
+2. Segment the high fidelity audio-book files based on the instructions [here](https://github.com/Tomiinek/Blizzard2013_Segmentation).
--- a/recipes/kokoro/tacotron2-DDC/run.sh
+++ b/recipes/kokoro/tacotron2-DDC/run.sh
@ -20,4 +20,4 @@ CUDA_VISIBLE_DEVICES="0" python TTS/bin/train_tts.py --config_path $RUN_DIR/taco
                                                     --coqpit.output_path $RUN_DIR \
                                                     --coqpit.datasets.0.path $RUN_DIR/$CORPUS \
                                                     --coqpit.audio.stats_path $RUN_DIR/scale_stats.npy \
-                                                     --coqpit.phoneme_cache_path $RUN_DIR/phoneme_cache \
+                                                     --coqpit.phoneme_cache_path $RUN_DIR/phoneme_cache \
--- a/recipes/kokoro/tacotron2-DDC/tacotron2-DDC.json
+++ b/recipes/kokoro/tacotron2-DDC/tacotron2-DDC.json
@ -122,4 +122,4 @@
    "use_gst": false,
    "use_external_speaker_embedding_file": false,
    "external_speaker_embedding_file": "../../speakers-vctk-en.json"
-}
+}
--- a/recipes/ljspeech/download_ljspeech.sh
+++ b/recipes/ljspeech/download_ljspeech.sh
@ -11,4 +11,4 @@ shuf LJSpeech-1.1/metadata.csv > LJSpeech-1.1/metadata_shuf.csv
 head -n 12000 LJSpeech-1.1/metadata_shuf.csv > LJSpeech-1.1/metadata_train.csv
 tail -n 1100 LJSpeech-1.1/metadata_shuf.csv > LJSpeech-1.1/metadata_val.csv
 mv LJSpeech-1.1 $RUN_DIR/recipes/ljspeech/
-rm LJSpeech-1.1.tar.bz2
+rm LJSpeech-1.1.tar.bz2
--- a/requirements.dev.txt
+++ b/requirements.dev.txt
@ -1,4 +1,8 @@
+# Generated via scripts/generate_requirements.py and pre-commit hook.
+# Do not edit this file; modify pyproject.toml instead.
 black==24.2.0
 coverage[toml]
 nose2
+pre-commit
 ruff==0.3.0
+tomli; python_version < '3.11'
--- a/requirements.ja.txt
+++ b/requirements.ja.txt
@ -1,5 +0,0 @@
-# These cause some compatibility issues on some systems and are not strictly necessary
-# japanese g2p deps
-mecab-python3
-unidic-lite==1.0.8
-cutlet
--- a/requirements.notebooks.txt
+++ b/requirements.notebooks.txt
@ -1,2 +0,0 @@
-bokeh==1.4.0
-pandas>=1.4,<2.0
--- a/requirements.txt
+++ b/requirements.txt
@ -1,46 +0,0 @@
-# core deps
-numpy>=1.24.3
-cython>=0.29.30
-scipy>=1.11.2
-torch>=2.1
-torchaudio
-soundfile>=0.12.0
-librosa>=0.10.1
-inflect>=5.6.0
-tqdm>=4.64.1
-anyascii>=0.3.0
-pyyaml>=6.0
-fsspec[http]>=2023.6.0 # <= 2023.9.1 makes aux tests fail
-packaging>=23.1
-# deps for inference
-pysbd>=0.3.4
-# deps for notebooks
-umap-learn>=0.5.1
-# deps for training
-matplotlib>=3.7.0
-# coqui stack
-coqui-tts-trainer>=0.1
-# config management
-coqpit>=0.0.16
-# chinese g2p deps
-jieba
-pypinyin
-# korean
-hangul_romanize
-# gruut+supported langs
-gruut[de,es,fr]==2.2.3
-# deps for korean
-jamo
-g2pkk>=0.1.1
-# deps for bangla
-bangla
-bnnumerizer
-bnunicodenormalizer
-#deps for tortoise
-einops>=0.6.0
-transformers>=4.33.0
-#deps for bark
-encodec>=0.1.1
-# deps for XTTS
-num2words
-spacy[ja]>=3
--- a/scripts/generate_requirements.py
+++ b/scripts/generate_requirements.py
@ -0,0 +1,39 @@
+#!/usr/bin/env python
+"""Generate requirements/*.txt files from pyproject.toml.
+
+Adapted from:
+https://github.com/numpy/numpydoc/blob/e7c6baf00f5f73a4a8f8318d0cb4e04949c9a5d1/tools/generate_requirements.py
+"""
+
+import sys
+from pathlib import Path
+
+try:  # standard module since Python 3.11
+    import tomllib as toml
+except ImportError:
+    try:  # available for older Python via pip
+        import tomli as toml
+    except ImportError:
+        sys.exit("Please install `tomli` first: `pip install tomli`")
+
+script_pth = Path(__file__)
+repo_dir = script_pth.parent.parent
+script_relpth = script_pth.relative_to(repo_dir)
+header = [
+    f"# Generated via {script_relpth.as_posix()} and pre-commit hook.",
+    "# Do not edit this file; modify pyproject.toml instead.",
+]
+
+
+def generate_requirement_file(name: str, req_list: list[str]) -> None:
+    req_fname = repo_dir / f"requirements.{name}.txt"
+    req_fname.write_text("\n".join(header + req_list) + "\n")
+
+
+def main() -> None:
+    pyproject = toml.loads((repo_dir / "pyproject.toml").read_text())
+    generate_requirement_file("dev", pyproject["project"]["optional-dependencies"]["dev"])
+
+
+if __name__ == "__main__":
+    main()
--- a/setup.cfg
+++ b/setup.cfg
@ -1,8 +0,0 @@
-[build_py]
-build_lib=temp_build
-
-[bdist_wheel]
-bdist_dir=temp_build
-
-[install_lib]
-build_dir=temp_build
--- a/setup.py
+++ b/setup.py
@ -20,54 +20,9 @@
 #                           .,*++++::::::++++*,.
 #                                  ``````

-import os
-import subprocess
-import sys
-
 import numpy
-import setuptools.command.build_py
-import setuptools.command.develop
 from Cython.Build import cythonize
-from setuptools import Extension, find_packages, setup
-
-if sys.version_info < (3, 9) or sys.version_info >= (3, 13):
-    raise RuntimeError("Trainer requires python >= 3.6 and <3.13 " "but your Python version is {}".format(sys.version))
-
-cwd = os.path.dirname(os.path.abspath(__file__))
-with open(os.path.join(cwd, "TTS", "VERSION")) as fin:
-    version = fin.read().strip()
-
-
-class build_py(setuptools.command.build_py.build_py):  # pylint: disable=too-many-ancestors
-    def run(self):
-        setuptools.command.build_py.build_py.run(self)
-
-
-class develop(setuptools.command.develop.develop):
-    def run(self):
-        setuptools.command.develop.develop.run(self)
-
-
-# The documentation for this feature is in server/README.md
-package_data = ["TTS/server/templates/*"]
-
-
-def pip_install(package_name):
-    subprocess.call([sys.executable, "-m", "pip", "install", package_name])
-
-
-requirements = open(os.path.join(cwd, "requirements.txt"), "r").readlines()
-with open(os.path.join(cwd, "requirements.notebooks.txt"), "r") as f:
-    requirements_notebooks = f.readlines()
-with open(os.path.join(cwd, "requirements.dev.txt"), "r") as f:
-    requirements_dev = f.readlines()
-with open(os.path.join(cwd, "requirements.ja.txt"), "r") as f:
-    requirements_ja = f.readlines()
-requirements_server = ["flask>=2.0.1"]
-requirements_all = requirements_dev + requirements_notebooks + requirements_ja + requirements_server
-
-with open("README.md", "r", encoding="utf-8") as readme_file:
-    README = readme_file.read()
+from setuptools import Extension, setup

 exts = [
    Extension(
@ -76,68 +31,7 @@ exts = [
    )
 ]
 setup(
-    name="coqui-tts",
-    version=version,
-    url="https://github.com/idiap/coqui-ai-TTS",
-    author="Eren Gölge",
-    author_email="egolge@coqui.ai",
-    maintainer="Enno Hermann",
-    maintainer_email="enno.hermann@gmail.com",
-    description="Deep learning for Text to Speech.",
-    long_description=README,
-    long_description_content_type="text/markdown",
-    license="MPL-2.0",
-    # cython
    include_dirs=numpy.get_include(),
    ext_modules=cythonize(exts, language_level=3),
-    # ext_modules=find_cython_extensions(),
-    # package
-    include_package_data=True,
-    packages=find_packages(include=["TTS"], exclude=["*.tests", "*tests.*", "tests.*", "*tests", "tests"]),
-    package_data={
-        "TTS": [
-            "VERSION",
-        ]
-    },
-    project_urls={
-        "Documentation": "https://coqui-tts.readthedocs.io",
-        "Tracker": "https://github.com/idiap/coqui-ai-TTS/issues",
-        "Repository": "https://github.com/idiap/coqui-ai-TTS",
-        "Discussions": "https://github.com/idiap/coqui-ai-TTS/discussions",
-    },
-    cmdclass={
-        "build_py": build_py,
-        "develop": develop,
-        # 'build_ext': build_ext
-    },
-    install_requires=requirements,
-    extras_require={
-        "all": requirements_all,
-        "dev": requirements_dev,
-        "notebooks": requirements_notebooks,
-        "server": requirements_server,
-        "ja": requirements_ja,
-    },
-    python_requires=">=3.9.0, <3.13",
-    entry_points={"console_scripts": ["tts=TTS.bin.synthesize:main", "tts-server = TTS.server.server:main"]},
-    classifiers=[
-        "Programming Language :: Python",
-        "Programming Language :: Python :: 3",
-        "Programming Language :: Python :: 3.9",
-        "Programming Language :: Python :: 3.10",
-        "Programming Language :: Python :: 3.11",
-        "Programming Language :: Python :: 3.12",
-        "Development Status :: 3 - Alpha",
-        "Intended Audience :: Science/Research",
-        "Intended Audience :: Developers",
-        "Operating System :: POSIX :: Linux",
-        "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)",
-        "Topic :: Software Development",
-        "Topic :: Software Development :: Libraries :: Python Modules",
-        "Topic :: Multimedia :: Sound/Audio :: Speech",
-        "Topic :: Multimedia :: Sound/Audio",
-        "Topic :: Multimedia",
-        "Topic :: Scientific/Engineering :: Artificial Intelligence",
-    ],
    zip_safe=False,
 )
--- a/tests/bash_tests/test_compute_statistics.sh
+++ b/tests/bash_tests/test_compute_statistics.sh
@ -4,4 +4,3 @@ BASEDIR=$(dirname "$0")
 echo "$BASEDIR"
 # run training
 CUDA_VISIBLE_DEVICES="" python TTS/bin/compute_statistics.py --config_path $BASEDIR/../inputs/test_glow_tts.json --out_path $BASEDIR/../outputs/scale_stats.npy
-
--- a/tests/data/dummy_speakers.json
+++ b/tests/data/dummy_speakers.json
@ -100222,5 +100222,5 @@
            0.04999300092458725,
            -0.12125937640666962
        ]
-    } 
+    }
 }
--- a/tests/data/ljspeech/metadata_flac.csv
+++ b/tests/data/ljspeech/metadata_flac.csv
@ -6,4 +6,4 @@ wavs/LJ001-0004.flac|produced the block books, which were the immediate predeces
 wavs/LJ001-0005.flac|the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing.|the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing.|ljspeech-2
 wavs/LJ001-0006.flac|And it is worth mention in passing that, as an example of fine typography,|And it is worth mention in passing that, as an example of fine typography,|ljspeech-2
 wavs/LJ001-0007.flac|the earliest book printed with movable types, the Gutenberg, or "forty-two line Bible" of about 1455,|the earliest book printed with movable types, the Gutenberg, or "forty-two line Bible" of about fourteen fifty-five,|ljspeech-3
-wavs/LJ001-0008.flac|has never been surpassed.|has never been surpassed.|ljspeech-3
+wavs/LJ001-0008.flac|has never been surpassed.|has never been surpassed.|ljspeech-3
--- a/tests/data/ljspeech/metadata_mp3.csv
+++ b/tests/data/ljspeech/metadata_mp3.csv
@ -6,4 +6,4 @@ wavs/LJ001-0004.mp3|produced the block books, which were the immediate predecess
 wavs/LJ001-0005.mp3|the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing.|the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing.|ljspeech-2
 wavs/LJ001-0006.mp3|And it is worth mention in passing that, as an example of fine typography,|And it is worth mention in passing that, as an example of fine typography,|ljspeech-2
 wavs/LJ001-0007.mp3|the earliest book printed with movable types, the Gutenberg, or "forty-two line Bible" of about 1455,|the earliest book printed with movable types, the Gutenberg, or "forty-two line Bible" of about fourteen fifty-five,|ljspeech-3
-wavs/LJ001-0008.mp3|has never been surpassed.|has never been surpassed.|ljspeech-3
+wavs/LJ001-0008.mp3|has never been surpassed.|has never been surpassed.|ljspeech-3
--- a/tests/data/ljspeech/metadata_wav.csv
+++ b/tests/data/ljspeech/metadata_wav.csv
@ -6,4 +6,4 @@ wavs/LJ001-0004.wav|produced the block books, which were the immediate predecess
 wavs/LJ001-0005.wav|the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing.|the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing.|ljspeech-2
 wavs/LJ001-0006.wav|And it is worth mention in passing that, as an example of fine typography,|And it is worth mention in passing that, as an example of fine typography,|ljspeech-2
 wavs/LJ001-0007.wav|the earliest book printed with movable types, the Gutenberg, or "forty-two line Bible" of about 1455,|the earliest book printed with movable types, the Gutenberg, or "forty-two line Bible" of about fourteen fifty-five,|ljspeech-3
-wavs/LJ001-0008.wav|has never been surpassed.|has never been surpassed.|ljspeech-3
+wavs/LJ001-0008.wav|has never been surpassed.|has never been surpassed.|ljspeech-3
--- a/tests/inputs/common_voice.tsv
+++ b/tests/inputs/common_voice.tsv
@ -1,6 +1,6 @@
 client_id	path	sentence	up_votes	down_votes	age	gender	accent	locale	segment
-95324d489b122a800b840e0b0d068f7363a1a6c2cd2e7365672cc7033e38deaa794bd59edcf8196aa35c9791652b9085ac3839a98bb50ebab4a1e8538a94846b	common_voice_en_20005954.mp3	The applicants are invited for coffee and visa is given immediately.	3	0				en	
-95324d489b122a800b840e0b0d068f7363a1a6c2cd2e7365672cc7033e38deaa794bd59edcf8196aa35c9791652b9085ac3839a98bb50ebab4a1e8538a94846b	common_voice_en_20005955.mp3	Developmental robotics is related to, but differs from, evolutionary robotics.	2	0				en	
-95324d489b122a800b840e0b0d068f7363a1a6c2cd2e7365672cc7033e38deaa794bd59edcf8196aa35c9791652b9085ac3839a98bb50ebab4a1e8538a94846b	common_voice_en_20005956.mp3	The musical was originally directed and choreographed by Alan Lund.	2	0				en	
-954a4181ae9fba89d1b1570f2ae148b3ee18ee2311de978e698f598db859f830d93d35574596d713518e8c96cdae01fce7a08c60c2e0a22bcf01e020924440a6	common_voice_en_19737073.mp3	He graduated from Columbia High School, in Brown County, South Dakota.	2	0				en	
-954a4181ae9fba89d1b1570f2ae148b3ee18ee2311de978e698f598db859f830d93d35574596d713518e8c96cdae01fce7a08c60c2e0a22bcf01e020924440a6	common_voice_en_19737074.mp3	Competition for limited resources has also resulted in some local conflicts.	2	0				en	
+95324d489b122a800b840e0b0d068f7363a1a6c2cd2e7365672cc7033e38deaa794bd59edcf8196aa35c9791652b9085ac3839a98bb50ebab4a1e8538a94846b	common_voice_en_20005954.mp3	The applicants are invited for coffee and visa is given immediately.	3	0				en
+95324d489b122a800b840e0b0d068f7363a1a6c2cd2e7365672cc7033e38deaa794bd59edcf8196aa35c9791652b9085ac3839a98bb50ebab4a1e8538a94846b	common_voice_en_20005955.mp3	Developmental robotics is related to, but differs from, evolutionary robotics.	2	0				en
+95324d489b122a800b840e0b0d068f7363a1a6c2cd2e7365672cc7033e38deaa794bd59edcf8196aa35c9791652b9085ac3839a98bb50ebab4a1e8538a94846b	common_voice_en_20005956.mp3	The musical was originally directed and choreographed by Alan Lund.	2	0				en
+954a4181ae9fba89d1b1570f2ae148b3ee18ee2311de978e698f598db859f830d93d35574596d713518e8c96cdae01fce7a08c60c2e0a22bcf01e020924440a6	common_voice_en_19737073.mp3	He graduated from Columbia High School, in Brown County, South Dakota.	2	0				en
+954a4181ae9fba89d1b1570f2ae148b3ee18ee2311de978e698f598db859f830d93d35574596d713518e8c96cdae01fce7a08c60c2e0a22bcf01e020924440a6	common_voice_en_19737074.mp3	Competition for limited resources has also resulted in some local conflicts.	2	0				en
--- a/tests/inputs/dummy_model_config.json
+++ b/tests/inputs/dummy_model_config.json
@ -98,5 +98,3 @@
        "gst_style_tokens": 10
        }
 }
-
-
--- a/tests/inputs/language_ids.json
+++ b/tests/inputs/language_ids.json
@ -2,4 +2,4 @@
    "en": 0,
    "fr-fr": 1,
    "pt-br": 2
-}
+}
--- a/tests/inputs/test_align_tts.json
+++ b/tests/inputs/test_align_tts.json
@ -155,4 +155,4 @@
            "meta_file_attn_mask": null
        }
    ]
-}
+}
--- a/tests/inputs/test_speaker_encoder_config.json
+++ b/tests/inputs/test_speaker_encoder_config.json
@ -58,4 +58,4 @@
        "storage_size": 15  // the size of the in-memory storage with respect to a single batch
    },
    "datasets":null
-}
+}
--- a/tests/inputs/test_speedy_speech.json
+++ b/tests/inputs/test_speedy_speech.json
@ -152,4 +152,4 @@
            "meta_file_attn_mask": "tests/data/ljspeech/metadata_attn_mask.txt"
        }
    ]
-}
+}
--- a/tests/inputs/test_vocoder_audio_config.json
+++ b/tests/inputs/test_vocoder_audio_config.json
@ -21,4 +21,3 @@
        "do_trim_silence": false
    }
 }
-
--- a/tests/inputs/test_vocoder_multiband_melgan_config.json
+++ b/tests/inputs/test_vocoder_multiband_melgan_config.json
@ -163,4 +163,3 @@
    // PATHS
    "output_path": "tests/train_outputs/"
 }
-
--- a/tests/inputs/test_vocoder_wavegrad.json
+++ b/tests/inputs/test_vocoder_wavegrad.json
@ -113,4 +113,3 @@
    // PATHS
    "output_path": "tests/train_outputs/"
 }
-
--- a/tests/inputs/test_vocoder_wavernn_config.json
+++ b/tests/inputs/test_vocoder_wavernn_config.json
@ -109,4 +109,3 @@
    // PATHS
    "output_path": "tests/train_outputs/"
 }
-
--- a/tests/inputs/xtts_vocab.json
+++ b/tests/inputs/xtts_vocab.json
@ -12666,4 +12666,4 @@
            "da kara"
        ]
    }
-}
+}