diff --git a/.github/workflows/main.yml b/.github/workflows/aux_tests.yml
similarity index 93%
rename from .github/workflows/main.yml
rename to .github/workflows/aux_tests.yml
index 68be9274..d5fe1bb3 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/aux_tests.yml
@@ -1,4 +1,4 @@
-name: CI
+name: aux-tests
on:
push:
@@ -45,8 +45,5 @@ jobs:
run: |
python3 -m pip install .[all]
python3 setup.py egg_info
- - name: Lint check
- run: |
- make lint
- name: Unit tests
- run: make test
+ run: make test_aux
diff --git a/.github/workflows/style_check.yml b/.github/workflows/style_check.yml
new file mode 100644
index 00000000..4a30c26d
--- /dev/null
+++ b/.github/workflows/style_check.yml
@@ -0,0 +1,50 @@
+name: style-check
+
+on:
+ push:
+ branches:
+ - main
+ pull_request:
+ types: [opened, synchronize, reopened]
+jobs:
+ check_skip:
+ runs-on: ubuntu-latest
+ if: "! contains(github.event.head_commit.message, '[ci skip]')"
+ steps:
+ - run: echo "${{ github.event.head_commit.message }}"
+
+ test:
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: false
+ matrix:
+ python-version: [3.9]
+ experimental: [false]
+ steps:
+ - uses: actions/checkout@v2
+ - uses: actions/cache@v1
+ with:
+ path: ~/.cache/pip
+ key: ${{ runner.os }}-pip-${{ matrix.python-version }}-${{ hashFiles('**/setup.py') }}
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v2
+ with:
+ python-version: ${{ matrix.python-version }}
+ architecture: x64
+ - name: check OS
+ run: cat /etc/os-release
+ - name: Install dependencies
+ run: |
+ sudo apt update
+ sudo apt install -y git make
+ sudo apt install -y python3-wheel gcc
+ make system-deps
+ - name: Upgrade pip
+ run: python3 -m pip install --upgrade pip
+ - name: Install TTS
+ run: |
+ python3 -m pip install .[all]
+ python3 setup.py egg_info
+ - name: Lint check
+ run: |
+ make lint
\ No newline at end of file
diff --git a/.github/workflows/tts_tests.yml b/.github/workflows/tts_tests.yml
new file mode 100644
index 00000000..d05dca90
--- /dev/null
+++ b/.github/workflows/tts_tests.yml
@@ -0,0 +1,49 @@
+name: tts-tests
+
+on:
+ push:
+ branches:
+ - main
+ pull_request:
+ types: [opened, synchronize, reopened]
+jobs:
+ check_skip:
+ runs-on: ubuntu-latest
+ if: "! contains(github.event.head_commit.message, '[ci skip]')"
+ steps:
+ - run: echo "${{ github.event.head_commit.message }}"
+
+ test:
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: false
+ matrix:
+ python-version: [3.6, 3.7, 3.8, 3.9]
+ experimental: [false]
+ steps:
+ - uses: actions/checkout@v2
+ - uses: actions/cache@v1
+ with:
+ path: ~/.cache/pip
+ key: ${{ runner.os }}-pip-${{ matrix.python-version }}-${{ hashFiles('**/setup.py') }}
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v2
+ with:
+ python-version: ${{ matrix.python-version }}
+ architecture: x64
+ - name: check OS
+ run: cat /etc/os-release
+ - name: Install dependencies
+ run: |
+ sudo apt update
+ sudo apt install -y git make
+ sudo apt install -y python3-wheel gcc
+ make system-deps
+ - name: Upgrade pip
+ run: python3 -m pip install --upgrade pip
+ - name: Install TTS
+ run: |
+ python3 -m pip install .[all]
+ python3 setup.py egg_info
+ - name: Unit tests
+ run: make test_tts
diff --git a/.github/workflows/vocoder_tests.yml b/.github/workflows/vocoder_tests.yml
new file mode 100644
index 00000000..69e74dbf
--- /dev/null
+++ b/.github/workflows/vocoder_tests.yml
@@ -0,0 +1,49 @@
+name: vocoder-tests
+
+on:
+ push:
+ branches:
+ - main
+ pull_request:
+ types: [opened, synchronize, reopened]
+jobs:
+ check_skip:
+ runs-on: ubuntu-latest
+ if: "! contains(github.event.head_commit.message, '[ci skip]')"
+ steps:
+ - run: echo "${{ github.event.head_commit.message }}"
+
+ test:
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: false
+ matrix:
+ python-version: [3.6, 3.7, 3.8, 3.9]
+ experimental: [false]
+ steps:
+ - uses: actions/checkout@v2
+ - uses: actions/cache@v1
+ with:
+ path: ~/.cache/pip
+ key: ${{ runner.os }}-pip-${{ matrix.python-version }}-${{ hashFiles('**/setup.py') }}
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v2
+ with:
+ python-version: ${{ matrix.python-version }}
+ architecture: x64
+ - name: check OS
+ run: cat /etc/os-release
+ - name: Install dependencies
+ run: |
+ sudo apt update
+ sudo apt install -y git make
+ sudo apt install -y python3-wheel gcc
+ make system-deps
+ - name: Upgrade pip
+ run: python3 -m pip install --upgrade pip
+ - name: Install TTS
+ run: |
+ python3 -m pip install .[all]
+ python3 setup.py egg_info
+ - name: Unit tests
+ run: make test_vocoder
diff --git a/.gitignore b/.gitignore
index 95939d32..64d1f0d5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -124,6 +124,15 @@ version.py
# jupyter dummy files
core
+# ignore local datasets
+recipes/WIP/*
+recipes/ljspeech/LJSpeech-1.1/*
+recipes/vctk/VCTK/*
+VCTK-Corpus-removed-silence/*
+
+# ignore training logs
+trainer_*_log.txt
+
# files used internally fro dev, test etc.
tests/outputs/*
tests/train_outputs/*
@@ -131,12 +140,9 @@ TODO.txt
.vscode/*
data/*
notebooks/data/*
-TTS/tts/layers/glow_tts/monotonic_align/core.c
+TTS/tts/utils/monotonic_align/core.c
.vscode-upload.json
temp_build/*
-recipes/WIP/*
-recipes/ljspeech/LJSpeech-1.1/*
-recipes/ljspeech/tacotron2-DDC/LJSpeech-1.1/*
events.out*
old_configs/*
model_importers/*
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 89138e47..7175cf34 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -76,7 +76,8 @@ The following steps are tested on an Ubuntu system.
8. Run the tests to see how your updates work with the rest of the project. You can repeat this step multiple times as you implement your changes to make sure you are on the right direction.
```bash
- $ make tests
+ $ make test # stop at the first error
+ $ make test_all # run all the tests, report all the errors
```
9. Format your code. We use ```black``` for code and ```isort``` for ```import``` formatting.
diff --git a/Makefile b/Makefile
index bffed7ff..c2091ca0 100644
--- a/Makefile
+++ b/Makefile
@@ -12,6 +12,15 @@ test_all: ## run tests and don't stop on an error.
test: ## run tests.
nosetests -x --with-cov -cov --cover-erase --cover-package TTS tests --nologcapture --with-id
+
+test_vocoder: ## run vocoder tests.
+ nosetests tests.vocoder_tests -x --with-cov -cov --cover-erase --cover-package TTS tests.vocoder_tests --nologcapture --with-id
+
+test_tts: ## run tts tests.
+ nosetests tests.tts_tests -x --with-cov -cov --cover-erase --cover-package TTS tests.tts_tests --nologcapture --with-id
+
+test_aux: ## run aux tests.
+ nosetests tests.aux_tests -x --with-cov -cov --cover-erase --cover-package TTS tests.aux_tests --nologcapture --with-id
./run_bash_tests.sh
test_failed: ## only run tests failed the last time.
diff --git a/README.md b/README.md
index 577eb3e9..fd9cd27c 100644
--- a/README.md
+++ b/README.md
@@ -19,6 +19,8 @@
📄 [Text-to-Speech paper collection](https://github.com/erogol/TTS-papers)
+
+
## 💬 Where to ask questions
Please use our dedicated channels for questions and discussion. Help is much more valuable if it's shared publicly so that more people can benefit from it.
@@ -154,5 +156,3 @@ If you are on Windows, 👑@GuyPaddock wrote installation instructions [here](ht
|- vocoder/ (Vocoder models.)
|- (same)
```
-
-
\ No newline at end of file
diff --git a/TTS/.models.json b/TTS/.models.json
index 37288dc5..6d353764 100644
--- a/TTS/.models.json
+++ b/TTS/.models.json
@@ -4,7 +4,7 @@
"ek1": {
"tacotron2": {
"description": "EK1 en-rp tacotron2 by NMStoker",
- "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.1.0/tts_models--en--ek1--tacotron2.zip",
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.1.0/tts_models--en--ek1--tacotron2.zip",
"default_vocoder": "vocoder_models/en/ek1/wavegrad",
"commit": "c802255"
}
@@ -12,7 +12,7 @@
"ljspeech": {
"tacotron2-DDC": {
"description": "Tacotron2 with Double Decoder Consistency.",
- "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.12/tts_models--en--ljspeech--tacotron2-DDC.zip",
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.12/tts_models--en--ljspeech--tacotron2-DDC.zip",
"default_vocoder": "vocoder_models/en/ljspeech/hifigan_v2",
"commit": "bae2ad0f",
"author": "Eren Gölge @erogol",
@@ -21,7 +21,7 @@
},
"tacotron2-DDC_ph": {
"description": "Tacotron2 with Double Decoder Consistency with phonemes.",
- "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.2.0/tts_models--en--ljspeech--tacotronDDC_ph.zip",
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.2.0/tts_models--en--ljspeech--tacotronDDC_ph.zip",
"default_vocoder": "vocoder_models/en/ljspeech/univnet",
"commit": "3900448",
"author": "Eren Gölge @erogol",
@@ -30,7 +30,7 @@
},
"glow-tts": {
"description": "",
- "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.9/tts_models--en--ljspeech--glow-tts.zip",
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.9/tts_models--en--ljspeech--glow-tts.zip",
"stats_file": null,
"default_vocoder": "vocoder_models/en/ljspeech/multiband-melgan",
"commit": "",
@@ -40,7 +40,7 @@
},
"speedy-speech": {
"description": "Speedy Speech model trained on LJSpeech dataset using the Alignment Network for learning the durations.",
- "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.3.0/tts_models--en--ljspeech--speedy_speech.zip",
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.3.0/tts_models--en--ljspeech--speedy_speech.zip",
"stats_file": null,
"default_vocoder": "vocoder_models/en/ljspeech/hifigan_v2",
"commit": "4581e3d",
@@ -50,7 +50,7 @@
},
"tacotron2-DCA": {
"description": "",
- "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.9/tts_models--en--ljspeech--tacotron2-DCA.zip",
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.9/tts_models--en--ljspeech--tacotron2-DCA.zip",
"default_vocoder": "vocoder_models/en/ljspeech/multiband-melgan",
"commit": "",
"author": "Eren Gölge @erogol",
@@ -59,7 +59,7 @@
},
"vits": {
"description": "VITS is an End2End TTS model trained on LJSpeech dataset with phonemes.",
- "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.2.0/tts_models--en--ljspeech--vits.zip",
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.2.0/tts_models--en--ljspeech--vits.zip",
"default_vocoder": null,
"commit": "3900448",
"author": "Eren Gölge @erogol",
@@ -68,7 +68,7 @@
},
"fast_pitch": {
"description": "FastPitch model trained on LJSpeech using the Aligner Network",
- "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.2.2/tts_models--en--ljspeech--fast_pitch.zip",
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.2.2/tts_models--en--ljspeech--fast_pitch.zip",
"default_vocoder": "vocoder_models/en/ljspeech/hifigan_v2",
"commit": "b27b3ba",
"author": "Eren Gölge @erogol",
@@ -79,7 +79,7 @@
"vctk": {
"sc-glow-tts": {
"description": "Multi-Speaker Transformers based SC-Glow model from https://arxiv.org/abs/2104.05557.",
- "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.1.0/tts_models--en--vctk--sc-glow-tts.zip",
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.1.0/tts_models--en--vctk--sc-glow-tts.zip",
"default_vocoder": "vocoder_models/en/vctk/hifigan_v2",
"commit": "b531fa69",
"author": "Edresson Casanova",
@@ -88,18 +88,27 @@
},
"vits": {
"description": "VITS End2End TTS model trained on VCTK dataset with 109 different speakers with EN accent.",
- "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.2.0/tts_models--en--vctk--vits.zip",
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.2.0/tts_models--en--vctk--vits.zip",
"default_vocoder": null,
"commit": "3900448",
"author": "Eren @erogol",
"license": "",
"contact": "egolge@coqui.ai"
+ },
+ "fast_pitch":{
+ "description": "FastPitch model trained on VCTK dataseset.",
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.4.0/tts_models--en--vctk--fast_pitch.zip",
+ "default_vocoder": "vocoder_models/en/vctk/hifigan_v2",
+ "commit": "bdab788d",
+ "author": "Eren @erogol",
+ "license": "CC BY-NC-ND 4.0",
+ "contact": "egolge@coqui.ai"
}
},
"sam": {
"tacotron-DDC": {
"description": "Tacotron2 with Double Decoder Consistency trained with Aceenture's Sam dataset.",
- "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.13/tts_models--en--sam--tacotron_DDC.zip",
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.13/tts_models--en--sam--tacotron_DDC.zip",
"default_vocoder": "vocoder_models/en/sam/hifigan_v2",
"commit": "bae2ad0f",
"author": "Eren Gölge @erogol",
@@ -111,7 +120,7 @@
"es": {
"mai": {
"tacotron2-DDC": {
- "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.9/tts_models--es--mai--tacotron2-DDC.zip",
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.9/tts_models--es--mai--tacotron2-DDC.zip",
"default_vocoder": "vocoder_models/universal/libri-tts/fullband-melgan",
"commit": "",
"author": "Eren Gölge @erogol",
@@ -123,7 +132,7 @@
"fr": {
"mai": {
"tacotron2-DDC": {
- "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.9/tts_models--fr--mai--tacotron2-DDC.zip",
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.9/tts_models--fr--mai--tacotron2-DDC.zip",
"default_vocoder": "vocoder_models/universal/libri-tts/fullband-melgan",
"commit": "",
"author": "Eren Gölge @erogol",
@@ -132,10 +141,21 @@
}
}
},
+ "uk":{
+ "mai": {
+ "glow-tts": {
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.4.0/tts_models--uk--mailabs--glow-tts.zip",
+ "author":"@robinhad",
+ "commit": "bdab788d",
+ "license": "MIT",
+ "contact": ""
+ }
+ }
+ },
"zh-CN": {
"baker": {
"tacotron2-DDC-GST": {
- "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.10/tts_models--zh-CN--baker--tacotron2-DDC-GST.zip",
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.10/tts_models--zh-CN--baker--tacotron2-DDC-GST.zip",
"commit": "unknown",
"author": "@kirianguiller",
"default_vocoder": null
@@ -145,7 +165,7 @@
"nl": {
"mai": {
"tacotron2-DDC": {
- "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.10/tts_models--nl--mai--tacotron2-DDC.zip",
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.10/tts_models--nl--mai--tacotron2-DDC.zip",
"author": "@r-dh",
"default_vocoder": "vocoder_models/nl/mai/parallel-wavegan",
"stats_file": null,
@@ -156,7 +176,7 @@
"de": {
"thorsten": {
"tacotron2-DCA": {
- "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.11/tts_models--de--thorsten--tacotron2-DCA.zip",
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.11/tts_models--de--thorsten--tacotron2-DCA.zip",
"default_vocoder": "vocoder_models/de/thorsten/fullband-melgan",
"author": "@thorstenMueller",
"commit": "unknown"
@@ -166,7 +186,7 @@
"ja": {
"kokoro": {
"tacotron2-DDC": {
- "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.15/tts_models--jp--kokoro--tacotron2-DDC.zip",
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.15/tts_models--jp--kokoro--tacotron2-DDC.zip",
"default_vocoder": "vocoder_models/ja/kokoro/hifigan_v1",
"description": "Tacotron2 with Double Decoder Consistency trained with Kokoro Speech Dataset.",
"author": "@kaiidams",
@@ -179,14 +199,14 @@
"universal": {
"libri-tts": {
"wavegrad": {
- "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.9/vocoder_models--universal--libri-tts--wavegrad.zip",
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.9/vocoder_models--universal--libri-tts--wavegrad.zip",
"commit": "ea976b0",
"author": "Eren Gölge @erogol",
"license": "MPL",
"contact": "egolge@coqui.com"
},
"fullband-melgan": {
- "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.9/vocoder_models--universal--libri-tts--fullband-melgan.zip",
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.9/vocoder_models--universal--libri-tts--fullband-melgan.zip",
"commit": "4132240",
"author": "Eren Gölge @erogol",
"license": "MPL",
@@ -198,13 +218,13 @@
"ek1": {
"wavegrad": {
"description": "EK1 en-rp wavegrad by NMStoker",
- "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.10/vocoder_models--en--ek1--wavegrad.zip",
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.10/vocoder_models--en--ek1--wavegrad.zip",
"commit": "c802255"
}
},
"ljspeech": {
"multiband-melgan": {
- "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.9/vocoder_models--en--ljspeech--mulitband-melgan.zip",
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.9/vocoder_models--en--ljspeech--mulitband-melgan.zip",
"commit": "ea976b0",
"author": "Eren Gölge @erogol",
"license": "MPL",
@@ -212,7 +232,7 @@
},
"hifigan_v2": {
"description": "HiFiGAN_v2 LJSpeech vocoder from https://arxiv.org/abs/2010.05646.",
- "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.12/vocoder_model--en--ljspeech-hifigan_v2.zip",
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.12/vocoder_model--en--ljspeech-hifigan_v2.zip",
"commit": "bae2ad0f",
"author": "@erogol",
"license": "",
@@ -220,7 +240,7 @@
},
"univnet": {
"description": "UnivNet model finetuned on TacotronDDC_ph spectrograms for better compatibility.",
- "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.3.0/vocoder_models--en--ljspeech--univnet_v2.zip",
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.3.0/vocoder_models--en--ljspeech--univnet_v2.zip",
"commit": "4581e3d",
"author": "Eren @erogol",
"license": "TBD",
@@ -230,7 +250,7 @@
"vctk": {
"hifigan_v2": {
"description": "Finetuned and intended to be used with tts_models/en/vctk/sc-glow-tts",
- "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.12/vocoder_model--en--vctk--hifigan_v2.zip",
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.12/vocoder_model--en--vctk--hifigan_v2.zip",
"commit": "2f07160",
"author": "Edresson Casanova",
"license": "",
@@ -240,7 +260,7 @@
"sam": {
"hifigan_v2": {
"description": "Finetuned and intended to be used with tts_models/en/sam/tacotron_DDC",
- "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.13/vocoder_models--en--sam--hifigan_v2.zip",
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.13/vocoder_models--en--sam--hifigan_v2.zip",
"commit": "2f07160",
"author": "Eren Gölge @erogol",
"license": "",
@@ -251,7 +271,7 @@
"nl": {
"mai": {
"parallel-wavegan": {
- "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.10/vocoder_models--nl--mai--parallel-wavegan.zip",
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.10/vocoder_models--nl--mai--parallel-wavegan.zip",
"author": "@r-dh",
"commit": "unknown"
}
@@ -260,12 +280,12 @@
"de": {
"thorsten": {
"wavegrad": {
- "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.11/vocoder_models--de--thorsten--wavegrad.zip",
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.0.11/vocoder_models--de--thorsten--wavegrad.zip",
"author": "@thorstenMueller",
"commit": "unknown"
},
"fullband-melgan": {
- "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.1.3/vocoder_models--de--thorsten--fullband-melgan.zip",
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.1.3/vocoder_models--de--thorsten--fullband-melgan.zip",
"author": "@thorstenMueller",
"commit": "unknown"
}
@@ -274,7 +294,7 @@
"ja": {
"kokoro": {
"hifigan_v1": {
- "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.2.0/vocoder_models--ja--kokoro--hifigan_v1.zip",
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.2.0/vocoder_models--ja--kokoro--hifigan_v1.zip",
"description": "HifiGAN model trained for kokoro dataset by @kaiidams",
"author": "@kaiidams",
"commit": "3900448"
diff --git a/TTS/bin/compute_embeddings.py b/TTS/bin/compute_embeddings.py
index 8c4d275f..83a5aeae 100644
--- a/TTS/bin/compute_embeddings.py
+++ b/TTS/bin/compute_embeddings.py
@@ -5,7 +5,7 @@ from argparse import RawTextHelpFormatter
from tqdm import tqdm
from TTS.config import load_config
-from TTS.tts.datasets import load_meta_data
+from TTS.tts.datasets import load_tts_samples
from TTS.tts.utils.speakers import SpeakerManager
parser = argparse.ArgumentParser(
@@ -36,7 +36,7 @@ args = parser.parse_args()
c_dataset = load_config(args.config_dataset_path)
-meta_data_train, meta_data_eval = load_meta_data(c_dataset.datasets, eval_split=args.eval)
+meta_data_train, meta_data_eval = load_tts_samples(c_dataset.datasets, eval_split=args.eval)
wav_files = meta_data_train + meta_data_eval
speaker_manager = SpeakerManager(
diff --git a/TTS/bin/compute_statistics.py b/TTS/bin/compute_statistics.py
index 6179dafc..e1974ae7 100755
--- a/TTS/bin/compute_statistics.py
+++ b/TTS/bin/compute_statistics.py
@@ -10,7 +10,7 @@ from tqdm import tqdm
# from TTS.utils.io import load_config
from TTS.config import load_config
-from TTS.tts.datasets import load_meta_data
+from TTS.tts.datasets import load_tts_samples
from TTS.utils.audio import AudioProcessor
@@ -41,7 +41,7 @@ def main():
if args.data_path:
dataset_items = glob.glob(os.path.join(args.data_path, "**", "*.wav"), recursive=True)
else:
- dataset_items = load_meta_data(CONFIG.datasets)[0] # take only train data
+ dataset_items = load_tts_samples(CONFIG.datasets)[0] # take only train data
print(f" > There are {len(dataset_items)} files.")
mel_sum = 0
diff --git a/TTS/bin/extract_tts_spectrograms.py b/TTS/bin/extract_tts_spectrograms.py
index 681fcc36..0af98ff1 100755
--- a/TTS/bin/extract_tts_spectrograms.py
+++ b/TTS/bin/extract_tts_spectrograms.py
@@ -10,8 +10,7 @@ from torch.utils.data import DataLoader
from tqdm import tqdm
from TTS.config import load_config
-from TTS.tts.datasets import load_meta_data
-from TTS.tts.datasets.TTSDataset import TTSDataset
+from TTS.tts.datasets import TTSDataset, load_tts_samples
from TTS.tts.models import setup_model
from TTS.tts.utils.speakers import get_speaker_manager
from TTS.utils.audio import AudioProcessor
@@ -230,7 +229,7 @@ def main(args): # pylint: disable=redefined-outer-name
ap = AudioProcessor(**c.audio)
# load data instances
- meta_data_train, meta_data_eval = load_meta_data(c.datasets, eval_split=args.eval)
+ meta_data_train, meta_data_eval = load_tts_samples(c.datasets, eval_split=args.eval)
# use eval and training partitions
meta_data = meta_data_train + meta_data_eval
diff --git a/TTS/bin/find_unique_chars.py b/TTS/bin/find_unique_chars.py
index 16768e43..437c2d60 100644
--- a/TTS/bin/find_unique_chars.py
+++ b/TTS/bin/find_unique_chars.py
@@ -3,7 +3,7 @@ import argparse
from argparse import RawTextHelpFormatter
from TTS.config import load_config
-from TTS.tts.datasets import load_meta_data
+from TTS.tts.datasets import load_tts_samples
def main():
@@ -23,7 +23,7 @@ def main():
c = load_config(args.config_path)
# load all datasets
- train_items, eval_items = load_meta_data(c.datasets, eval_split=True)
+ train_items, eval_items = load_tts_samples(c.datasets, eval_split=True)
items = train_items + eval_items
texts = "".join(item[0] for item in items)
diff --git a/TTS/bin/train_encoder.py b/TTS/bin/train_encoder.py
index 7ff35486..ad6d95f7 100644
--- a/TTS/bin/train_encoder.py
+++ b/TTS/bin/train_encoder.py
@@ -12,9 +12,9 @@ from torch.utils.data import DataLoader
from TTS.speaker_encoder.dataset import SpeakerEncoderDataset
from TTS.speaker_encoder.losses import AngleProtoLoss, GE2ELoss, SoftmaxAngleProtoLoss
from TTS.speaker_encoder.utils.generic_utils import save_best_model, setup_model
+from TTS.speaker_encoder.utils.training import init_training
from TTS.speaker_encoder.utils.visual import plot_embeddings
-from TTS.trainer import init_training
-from TTS.tts.datasets import load_meta_data
+from TTS.tts.datasets import load_tts_samples
from TTS.utils.audio import AudioProcessor
from TTS.utils.generic_utils import count_parameters, remove_experiment_folder, set_init_dict
from TTS.utils.io import load_fsspec
@@ -156,7 +156,7 @@ def main(args): # pylint: disable=redefined-outer-name
optimizer = RAdam(model.parameters(), lr=c.lr)
# pylint: disable=redefined-outer-name
- meta_data_train, meta_data_eval = load_meta_data(c.datasets, eval_split=False)
+ meta_data_train, meta_data_eval = load_tts_samples(c.datasets, eval_split=False)
data_loader, num_speakers = setup_loader(ap, is_val=False, verbose=True)
@@ -208,7 +208,7 @@ def main(args): # pylint: disable=redefined-outer-name
if __name__ == "__main__":
- args, c, OUT_PATH, AUDIO_PATH, c_logger, dashboard_logger = init_training(sys.argv)
+ args, c, OUT_PATH, AUDIO_PATH, c_logger, dashboard_logger = init_training()
try:
main(args)
diff --git a/TTS/bin/train_tts.py b/TTS/bin/train_tts.py
index 863bd3b9..e28e9dec 100644
--- a/TTS/bin/train_tts.py
+++ b/TTS/bin/train_tts.py
@@ -1,12 +1,71 @@
-import sys
+import os
-from TTS.trainer import Trainer, init_training
+from TTS.config import load_config, register_config
+from TTS.trainer import Trainer, TrainingArgs
+from TTS.tts.datasets import load_tts_samples
+from TTS.tts.models import setup_model
+from TTS.tts.utils.speakers import SpeakerManager
+from TTS.utils.audio import AudioProcessor
def main():
- """Run 🐸TTS trainer from terminal. This is also necessary to run DDP training by ```distribute.py```"""
- args, config, output_path, _, c_logger, dashboard_logger = init_training(sys.argv)
- trainer = Trainer(args, config, output_path, c_logger, dashboard_logger, cudnn_benchmark=False)
+ """Run `tts` model training directly by a `config.json` file."""
+ # init trainer args
+ train_args = TrainingArgs()
+ parser = train_args.init_argparse(arg_prefix="")
+
+ # override trainer args from comman-line args
+ args, config_overrides = parser.parse_known_args()
+ train_args.parse_args(args)
+
+ # load config.json and register
+ if args.config_path or args.continue_path:
+ if args.config_path:
+ # init from a file
+ config = load_config(args.config_path)
+ if len(config_overrides) > 0:
+ config.parse_known_args(config_overrides, relaxed_parser=True)
+ elif args.continue_path:
+ # continue from a prev experiment
+ config = load_config(os.path.join(args.continue_path, "config.json"))
+ if len(config_overrides) > 0:
+ config.parse_known_args(config_overrides, relaxed_parser=True)
+ else:
+ # init from console args
+ from TTS.config.shared_configs import BaseTrainingConfig # pylint: disable=import-outside-toplevel
+
+ config_base = BaseTrainingConfig()
+ config_base.parse_known_args(config_overrides)
+ config = register_config(config_base.model)()
+
+ # load training samples
+ train_samples, eval_samples = load_tts_samples(config.datasets, eval_split=True)
+
+ # setup audio processor
+ ap = AudioProcessor(**config.audio)
+
+ # init speaker manager
+ if config.use_speaker_embedding:
+ speaker_manager = SpeakerManager(data_items=train_samples + eval_samples)
+ elif config.use_d_vector_file:
+ speaker_manager = SpeakerManager(d_vectors_file_path=config.d_vector_file)
+ else:
+ speaker_manager = None
+
+ # init the model from config
+ model = setup_model(config, speaker_manager)
+
+ # init the trainer and 🚀
+ trainer = Trainer(
+ train_args,
+ config,
+ config.output_path,
+ model=model,
+ train_samples=train_samples,
+ eval_samples=eval_samples,
+ training_assets={"audio_processor": ap},
+ parse_command_line_args=False,
+ )
trainer.fit()
diff --git a/TTS/bin/train_vocoder.py b/TTS/bin/train_vocoder.py
index 000083e0..cd665f29 100644
--- a/TTS/bin/train_vocoder.py
+++ b/TTS/bin/train_vocoder.py
@@ -1,26 +1,69 @@
import os
-import sys
-import traceback
-from TTS.trainer import Trainer, init_training
-from TTS.utils.generic_utils import remove_experiment_folder
+from TTS.config import load_config, register_config
+from TTS.trainer import Trainer, TrainingArgs
+from TTS.utils.audio import AudioProcessor
+from TTS.vocoder.datasets.preprocess import load_wav_data, load_wav_feat_data
+from TTS.vocoder.models import setup_model
def main():
- try:
- args, config, output_path, _, c_logger, dashboard_logger = init_training(sys.argv)
- trainer = Trainer(args, config, output_path, c_logger, dashboard_logger)
- trainer.fit()
- except KeyboardInterrupt:
- remove_experiment_folder(output_path)
- try:
- sys.exit(0)
- except SystemExit:
- os._exit(0) # pylint: disable=protected-access
- except Exception: # pylint: disable=broad-except
- remove_experiment_folder(output_path)
- traceback.print_exc()
- sys.exit(1)
+ """Run `tts` model training directly by a `config.json` file."""
+ # init trainer args
+ train_args = TrainingArgs()
+ parser = train_args.init_argparse(arg_prefix="")
+
+ # override trainer args from comman-line args
+ args, config_overrides = parser.parse_known_args()
+ train_args.parse_args(args)
+
+ # load config.json and register
+ if args.config_path or args.continue_path:
+ if args.config_path:
+ # init from a file
+ config = load_config(args.config_path)
+ if len(config_overrides) > 0:
+ config.parse_known_args(config_overrides, relaxed_parser=True)
+ elif args.continue_path:
+ # continue from a prev experiment
+ config = load_config(os.path.join(args.continue_path, "config.json"))
+ if len(config_overrides) > 0:
+ config.parse_known_args(config_overrides, relaxed_parser=True)
+ else:
+ # init from console args
+ from TTS.config.shared_configs import BaseTrainingConfig # pylint: disable=import-outside-toplevel
+
+ config_base = BaseTrainingConfig()
+ config_base.parse_known_args(config_overrides)
+ config = register_config(config_base.model)()
+
+ # load training samples
+ if "feature_path" in config and config.feature_path:
+ # load pre-computed features
+ print(f" > Loading features from: {config.feature_path}")
+ eval_samples, train_samples = load_wav_feat_data(config.data_path, config.feature_path, config.eval_split_size)
+ else:
+ # load data raw wav files
+ eval_samples, train_samples = load_wav_data(config.data_path, config.eval_split_size)
+
+ # setup audio processor
+ ap = AudioProcessor(**config.audio)
+
+ # init the model from config
+ model = setup_model(config)
+
+ # init the trainer and 🚀
+ trainer = Trainer(
+ train_args,
+ config,
+ config.output_path,
+ model=model,
+ train_samples=train_samples,
+ eval_samples=eval_samples,
+ training_assets={"audio_processor": ap},
+ parse_command_line_args=False,
+ )
+ trainer.fit()
if __name__ == "__main__":
diff --git a/TTS/config/__init__.py b/TTS/config/__init__.py
index ea98f431..f626163f 100644
--- a/TTS/config/__init__.py
+++ b/TTS/config/__init__.py
@@ -36,10 +36,11 @@ def register_config(model_name: str) -> Coqpit:
Coqpit: config class.
"""
config_class = None
+ config_name = model_name + "_config"
paths = ["TTS.tts.configs", "TTS.vocoder.configs", "TTS.speaker_encoder"]
for path in paths:
try:
- config_class = find_module(path, model_name + "_config")
+ config_class = find_module(path, config_name)
except ModuleNotFoundError:
pass
if config_class is None:
diff --git a/TTS/model.py b/TTS/model.py
index cfd1ec62..532d05a6 100644
--- a/TTS/model.py
+++ b/TTS/model.py
@@ -6,8 +6,6 @@ import torch
from coqpit import Coqpit
from torch import nn
-from TTS.utils.audio import AudioProcessor
-
# pylint: skip-file
@@ -22,6 +20,14 @@ class BaseModel(nn.Module, ABC):
- 1D tensors `batch x 1`
"""
+ def __init__(self, config: Coqpit):
+ super().__init__()
+ self._set_model_args(config)
+
+ def _set_model_args(self, config: Coqpit):
+ """Set model arguments from the config. Override this."""
+ pass
+
@abstractmethod
def forward(self, input: torch.Tensor, *args, aux_input={}, **kwargs) -> Dict:
"""Forward pass for the model mainly used in training.
@@ -73,7 +79,7 @@ class BaseModel(nn.Module, ABC):
...
return outputs_dict, loss_dict
- def train_log(self, ap: AudioProcessor, batch: Dict, outputs: Dict) -> Tuple[Dict, np.ndarray]:
+ def train_log(self, batch: Dict, outputs: Dict, logger: "Logger", assets: Dict, steps: int) -> None:
"""Create visualizations and waveform examples for training.
For example, here you can plot spectrograms and generate sample sample waveforms from these spectrograms to
@@ -87,7 +93,7 @@ class BaseModel(nn.Module, ABC):
Returns:
Tuple[Dict, np.ndarray]: training plots and output waveform.
"""
- return None, None
+ pass
@abstractmethod
def eval_step(self, batch: Dict, criterion: nn.Module) -> Tuple[Dict, Dict]:
@@ -106,9 +112,9 @@ class BaseModel(nn.Module, ABC):
...
return outputs_dict, loss_dict
- def eval_log(self, ap: AudioProcessor, batch: Dict, outputs: Dict) -> Tuple[Dict, np.ndarray]:
+ def eval_log(self, batch: Dict, outputs: Dict, logger: "Logger", assets: Dict, steps: int) -> None:
"""The same as `train_log()`"""
- return None, None
+ pass
@abstractmethod
def load_checkpoint(self, config: Coqpit, checkpoint_path: str, eval: bool = False) -> None:
diff --git a/TTS/speaker_encoder/utils/training.py b/TTS/speaker_encoder/utils/training.py
new file mode 100644
index 00000000..a32f43bd
--- /dev/null
+++ b/TTS/speaker_encoder/utils/training.py
@@ -0,0 +1,94 @@
+import os
+
+from coqpit import Coqpit
+
+from TTS.config import load_config, register_config
+from TTS.trainer import TrainingArgs
+from TTS.tts.utils.text.symbols import parse_symbols
+from TTS.utils.generic_utils import get_experiment_folder_path, get_git_branch
+from TTS.utils.io import copy_model_files
+from TTS.utils.logging import init_dashboard_logger
+from TTS.utils.logging.console_logger import ConsoleLogger
+from TTS.utils.trainer_utils import get_last_checkpoint
+
+
+def getarguments():
+ train_config = TrainingArgs()
+ parser = train_config.init_argparse(arg_prefix="")
+ return parser
+
+
+def process_args(args, config=None):
+ """Process parsed comand line arguments and initialize the config if not provided.
+ Args:
+ args (argparse.Namespace or dict like): Parsed input arguments.
+ config (Coqpit): Model config. If none, it is generated from `args`. Defaults to None.
+ Returns:
+ c (TTS.utils.io.AttrDict): Config paramaters.
+ out_path (str): Path to save models and logging.
+ audio_path (str): Path to save generated test audios.
+ c_logger (TTS.utils.console_logger.ConsoleLogger): Class that does
+ logging to the console.
+ dashboard_logger (WandbLogger or TensorboardLogger): Class that does the dashboard Logging
+ TODO:
+ - Interactive config definition.
+ """
+ if isinstance(args, tuple):
+ args, coqpit_overrides = args
+ if args.continue_path:
+ # continue a previous training from its output folder
+ experiment_path = args.continue_path
+ args.config_path = os.path.join(args.continue_path, "config.json")
+ args.restore_path, best_model = get_last_checkpoint(args.continue_path)
+ if not args.best_path:
+ args.best_path = best_model
+ # init config if not already defined
+ if config is None:
+ if args.config_path:
+ # init from a file
+ config = load_config(args.config_path)
+ else:
+ # init from console args
+ from TTS.config.shared_configs import BaseTrainingConfig # pylint: disable=import-outside-toplevel
+
+ config_base = BaseTrainingConfig()
+ config_base.parse_known_args(coqpit_overrides)
+ config = register_config(config_base.model)()
+ # override values from command-line args
+ config.parse_known_args(coqpit_overrides, relaxed_parser=True)
+ experiment_path = args.continue_path
+ if not experiment_path:
+ experiment_path = get_experiment_folder_path(config.output_path, config.run_name)
+ audio_path = os.path.join(experiment_path, "test_audios")
+ config.output_log_path = experiment_path
+ # setup rank 0 process in distributed training
+ dashboard_logger = None
+ if args.rank == 0:
+ new_fields = {}
+ if args.restore_path:
+ new_fields["restore_path"] = args.restore_path
+ new_fields["github_branch"] = get_git_branch()
+ # if model characters are not set in the config file
+ # save the default set to the config file for future
+ # compatibility.
+ if config.has("characters") and config.characters is None:
+ used_characters = parse_symbols()
+ new_fields["characters"] = used_characters
+ copy_model_files(config, experiment_path, new_fields)
+ dashboard_logger = init_dashboard_logger(config)
+ c_logger = ConsoleLogger()
+ return config, experiment_path, audio_path, c_logger, dashboard_logger
+
+
+def init_arguments():
+ train_config = TrainingArgs()
+ parser = train_config.init_argparse(arg_prefix="")
+ return parser
+
+
+def init_training(config: Coqpit = None):
+ """Initialization of a training run."""
+ parser = init_arguments()
+ args = parser.parse_known_args()
+ config, OUT_PATH, AUDIO_PATH, c_logger, dashboard_logger = process_args(args, config)
+ return args[0], config, OUT_PATH, AUDIO_PATH, c_logger, dashboard_logger
diff --git a/TTS/trainer.py b/TTS/trainer.py
index 8589ae5c..9fcd77a7 100644
--- a/TTS/trainer.py
+++ b/TTS/trainer.py
@@ -4,16 +4,14 @@ import importlib
import multiprocessing
import os
import platform
-import re
import sys
import time
import traceback
from argparse import Namespace
from dataclasses import dataclass, field
-from typing import Dict, List, Tuple, Union
-from urllib.parse import urlparse
+from inspect import signature
+from typing import Callable, Dict, List, Tuple, Union
-import fsspec
import torch
import torch.distributed as dist
from coqpit import Coqpit
@@ -21,11 +19,6 @@ from torch import nn
from torch.nn.parallel import DistributedDataParallel as DDP_th
from torch.utils.data import DataLoader
-from TTS.config import load_config, register_config
-from TTS.tts.datasets import load_meta_data
-from TTS.tts.models import setup_model as setup_tts_model
-from TTS.tts.utils.text.symbols import parse_symbols
-from TTS.utils.audio import AudioProcessor
from TTS.utils.callbacks import TrainerCallback
from TTS.utils.distribute import init_distributed
from TTS.utils.generic_utils import (
@@ -39,9 +32,13 @@ from TTS.utils.generic_utils import (
)
from TTS.utils.io import copy_model_files, load_fsspec, save_best_model, save_checkpoint
from TTS.utils.logging import ConsoleLogger, TensorboardLogger, WandbLogger, init_dashboard_logger
-from TTS.utils.trainer_utils import get_optimizer, get_scheduler, is_apex_available, setup_torch_training_env
-from TTS.vocoder.datasets.preprocess import load_wav_data, load_wav_feat_data
-from TTS.vocoder.models import setup_model as setup_vocoder_model
+from TTS.utils.trainer_utils import (
+ get_last_checkpoint,
+ get_optimizer,
+ get_scheduler,
+ is_apex_available,
+ setup_torch_training_env,
+)
multiprocessing.set_start_method("fork")
@@ -80,6 +77,9 @@ class TrainingArgs(Coqpit):
"help": "Best model file to be used for extracting the best loss. If not specified, the latest best model in continue path is used"
},
)
+ skip_train_epoch: bool = field(
+ default=False, metadata={"help": "Run only evaluation iteration. Useful for debugging."}
+ )
config_path: str = field(default="", metadata={"help": "Path to the configuration file."})
rank: int = field(default=0, metadata={"help": "Process rank in distributed training."})
group_id: str = field(default="", metadata={"help": "Process group id in distributed training."})
@@ -90,7 +90,7 @@ class TrainingArgs(Coqpit):
class Trainer:
- def __init__(
+ def __init__( # pylint: disable=dangerous-default-value
self,
args: Union[Coqpit, Namespace],
config: Coqpit,
@@ -98,7 +98,13 @@ class Trainer:
c_logger: ConsoleLogger = None,
dashboard_logger: Union[TensorboardLogger, WandbLogger] = None,
model: nn.Module = None,
+ get_model: Callable = None,
+ get_data_samples: Callable = None,
+ train_samples: List = None,
+ eval_samples: List = None,
cudnn_benchmark: bool = False,
+ training_assets: Dict = {},
+ parse_command_line_args: bool = True,
) -> None:
"""Simple yet powerful 🐸💬 TTS trainer for PyTorch. It can train all the available `tts` and `vocoder` models
or easily be customized.
@@ -127,24 +133,44 @@ class Trainer:
model (nn.Module, optional): Initialized and ready-to-train model. If it is not defined, `Trainer`
initializes a model from the provided config. Defaults to None.
+ get_model (Callable):
+ A function that returns a model. It is used to initialize the model when `model` is not provided.
+ It either takes the config as the only argument or does not take any argument.
+ Defaults to None
+
+ get_data_samples (Callable):
+ A function that returns a list of training and evaluation samples. Used if `train_samples` and
+ `eval_samples` are None. Defaults to None.
+
+ train_samples (List):
+ A list of training samples used by the model's `get_data_loader` to init the `dataset` and the
+ `data_loader`. Defaults to None.
+
+ eval_samples (List):
+ A list of evaluation samples used by the model's `get_data_loader` to init the `dataset` and the
+ `data_loader`. Defaults to None.
+
cudnn_benchmark (bool): enable/disable PyTorch cudnn benchmarking. It is better to disable if the model input
length is changing batch to batch along the training.
+ training_assets (Dict):
+ A dictionary of assets to be used at training and passed to the model's ```train_log(), eval_log(), get_data_loader()```
+ during training. It can include `AudioProcessor` or/and `Tokenizer`. Defaults to {}.
+
+ parse_command_line_args (bool):
+ If true, parse command-line arguments and update `TrainingArgs` and model `config` values. Set it
+ to false if you parse the arguments yourself. Defaults to True.
+
Examples:
- Running trainer on a model.
+ Running trainer with HifiGAN model.
>>> args = TrainingArgs(...)
>>> config = HifiganConfig(...)
>>> model = GANModel(config)
- >>> trainer = Trainer(args, config, output_path, model=model)
- >>> trainer.fit()
-
- Running trainer on a config.
-
- >>> config = WavegradConfig(data_path="/home/erogol/nvme/gdrive/Datasets/LJSpeech-1.1/wavs/", output_path=output_path,)
- >>> args, config, output_path, _, c_logger, dashboard_logger = init_training(TrainingArgs(), config)
- >>> trainer = Trainer(args, config, output_path, c_logger, dashboard_logger)
+ >>> ap = AudioProcessor(**config.audio)
+ >>> assets = {"audio_processor": ap}
+ >>> trainer = Trainer(args, config, output_path, model=model, training_assets=assets)
>>> trainer.fit()
TODO:
@@ -154,20 +180,41 @@ class Trainer:
- Profiler integration.
- Overfitting to a batch.
- TPU training
+ - NOTE: Consider moving `training_assets` to the model implementation.
"""
- if config is None:
- # parse config from console arguments
- config, output_path, _, c_logger, dashboard_logger = process_args(args)
+ if parse_command_line_args:
+ # parse command-line arguments for TrainerArgs()
+ args, coqpit_overrides = self.parse_argv(args)
+ # get ready for training and parse command-line arguments for the model config
+ config = self.init_training(args, coqpit_overrides, config)
+
+ # set the output path
+ if args.continue_path:
+ # use the same path as the continuing run
+ output_path = args.continue_path
+ else:
+ # override the output path if it is provided
+ output_path = config.output_path if output_path is None else output_path
+ # create a new output folder name
+ output_path = get_experiment_folder_path(config.output_path, config.run_name)
+ os.makedirs(output_path, exist_ok=True)
+
+ # copy training assets to the output folder
+ copy_model_files(config, output_path, new_fields=None)
+
+ # init class members
self.args = args
self.config = config
self.output_path = output_path
self.config.output_log_path = output_path
+ self.training_assets = training_assets
# setup logging
log_file = os.path.join(self.output_path, f"trainer_{args.rank}_log.txt")
self._setup_logger_config(log_file)
+ time.sleep(1.0) # wait for the logger to be ready
# set and initialize Pytorch runtime
self.use_cuda, self.num_gpus = setup_torch_training_env(True, cudnn_benchmark, args.use_ddp)
@@ -196,33 +243,22 @@ class Trainer:
self.use_apex = self._is_apex_available()
self.use_amp_scaler = self.config.mixed_precision and self.use_cuda
- # init audio processor
- self.ap = AudioProcessor(**self.config.audio.to_dict())
-
# load data samples
- # TODO: refactor this
- if "datasets" in self.config:
- # load data for `tts` models
- self.data_train, self.data_eval = load_meta_data(self.config.datasets)
- elif self.config.feature_path is not None:
- # load pre-comnputed features for `vocoder`models
- print(f" > Loading features from: {self.config.feature_path}")
- self.data_eval, self.data_train = load_wav_feat_data(
- self.config.data_path, self.config.feature_path, self.config.eval_split_size
- )
+ if train_samples is None and get_data_samples is None:
+ raise ValueError("[!] `train_samples` and `get_data_samples` cannot both be None.")
+ if train_samples is not None:
+ self.train_samples = train_samples
+ self.eval_samples = eval_samples
else:
- # load data for `vocoder`models
- self.data_eval, self.data_train = load_wav_data(self.config.data_path, self.config.eval_split_size)
+ self.train_samples, self.eval_samples = self.run_get_data_samples(config, get_data_samples)
# init TTS model
+ if model is None and get_model is None:
+ raise ValueError("[!] `model` and `get_model` cannot both be None.")
if model is not None:
self.model = model
else:
- self.model = self.get_model(self.config)
-
- # init multispeaker settings of the model
- if hasattr(self.model, "init_multispeaker"):
- self.model.init_multispeaker(self.config, self.data_train + self.data_eval)
+ self.run_get_model(self.config, get_model)
# setup criterion
self.criterion = self.get_criterion(self.model)
@@ -247,7 +283,7 @@ class Trainer:
# setup optimizer
self.optimizer = self.get_optimizer(self.model, self.config)
- # callback
+ # CALLBACK
self.callbacks = TrainerCallback(self)
self.callbacks.on_init_start()
@@ -280,7 +316,7 @@ class Trainer:
else:
self.scheduler.last_epoch = self.restore_step
- # DISTRUBUTED
+ # DISTRIBUTED
if self.num_gpus > 1:
self.model = DDP_th(self.model, device_ids=[args.rank], output_device=args.rank)
@@ -291,8 +327,56 @@ class Trainer:
self.callbacks.on_init_end()
@staticmethod
- def get_model(config: Coqpit) -> nn.Module:
- """Initialize model from config.
+ def parse_argv(args: Union[Coqpit, List]):
+ """Parse command line arguments to init or override `TrainingArgs()`."""
+ if isinstance(args, Coqpit):
+ parser = args.init_argparse(arg_prefix="")
+ else:
+ train_config = TrainingArgs()
+ parser = train_config.init_argparse(arg_prefix="")
+ training_args, coqpit_overrides = parser.parse_known_args()
+ args.parse_args(training_args)
+ return args, coqpit_overrides
+
+ def init_training(
+ self, args: TrainingArgs, coqpit_overrides: Dict, config: Coqpit = None
+ ): # pylint: disable=no-self-use
+ """Initialize training and update model configs from command line arguments.
+
+ Args:
+ args (argparse.Namespace or dict like): Parsed input arguments.
+ config_overrides (argparse.Namespace or dict like): Parsed config overriding arguments.
+ config (Coqpit): Model config. If none, it is generated from `args`. Defaults to None.
+
+ Returns:
+ c (TTS.utils.io.AttrDict): Config paramaters.
+ """
+ # set arguments for continuing training
+ if args.continue_path:
+ experiment_path = args.continue_path
+ args.config_path = os.path.join(args.continue_path, "config.json")
+ args.restore_path, best_model = get_last_checkpoint(args.continue_path)
+ if not args.best_path:
+ args.best_path = best_model
+
+ # override config values from command-line args
+ # TODO: Maybe it is better to do it outside
+ if len(coqpit_overrides) > 0:
+ config.parse_known_args(coqpit_overrides, arg_prefix="coqpit", relaxed_parser=True)
+ experiment_path = args.continue_path
+
+ # update the config.json fields and copy it to the output folder
+ if args.rank == 0:
+ new_fields = {}
+ if args.restore_path:
+ new_fields["restore_path"] = args.restore_path
+ new_fields["github_branch"] = get_git_branch()
+ copy_model_files(config, experiment_path, new_fields)
+ return config
+
+ @staticmethod
+ def run_get_model(config: Coqpit, get_model: Callable) -> nn.Module:
+ """Run the `get_model` function and return the model.
Args:
config (Coqpit): Model config.
@@ -300,12 +384,22 @@ class Trainer:
Returns:
nn.Module: initialized model.
"""
- try:
- model = setup_vocoder_model(config)
- except ModuleNotFoundError:
- model = setup_tts_model(config)
+ if len(signature(get_model).sig.parameters) == 1:
+ model = get_model(config)
+ else:
+ model = get_model()
return model
+ @staticmethod
+ def run_get_data_samples(config: Coqpit, get_data_samples: Callable) -> nn.Module:
+ if callable(get_data_samples):
+ if len(signature(get_data_samples).sig.parameters) == 1:
+ train_samples, eval_samples = get_data_samples(config)
+ else:
+ train_samples, eval_samples = get_data_samples()
+ return train_samples, eval_samples
+ return None, None
+
def restore_model(
self,
config: Coqpit,
@@ -366,11 +460,15 @@ class Trainer:
torch.cuda.empty_cache()
return model, optimizer, scaler, restore_step
+ #########################
+ # DATA LOADING FUNCTIONS
+ #########################
+
def _get_loader(
self,
model: nn.Module,
config: Coqpit,
- ap: AudioProcessor,
+ assets: Dict,
is_eval: bool,
data_items: List,
verbose: bool,
@@ -379,14 +477,14 @@ class Trainer:
if num_gpus > 1:
if hasattr(model.module, "get_data_loader"):
loader = model.module.get_data_loader(
- config, ap, is_eval, data_items, verbose, num_gpus, self.args.rank
+ config, assets, is_eval, data_items, verbose, num_gpus, self.args.rank
)
else:
if hasattr(model, "get_data_loader"):
- loader = model.get_data_loader(config, ap, is_eval, data_items, verbose, num_gpus)
+ loader = model.get_data_loader(config, assets, is_eval, data_items, verbose, num_gpus)
return loader
- def get_train_dataloader(self, ap: AudioProcessor, data_items: List, verbose: bool) -> DataLoader:
+ def get_train_dataloader(self, training_assets: Dict, data_items: List, verbose: bool) -> DataLoader:
"""Initialize and return a training data loader.
Args:
@@ -397,10 +495,10 @@ class Trainer:
Returns:
DataLoader: Initialized training data loader.
"""
- return self._get_loader(self.model, self.config, ap, False, data_items, verbose, self.num_gpus)
+ return self._get_loader(self.model, self.config, training_assets, False, data_items, verbose, self.num_gpus)
- def get_eval_dataloader(self, ap: AudioProcessor, data_items: List, verbose: bool) -> DataLoader:
- return self._get_loader(self.model, self.config, ap, True, data_items, verbose, self.num_gpus)
+ def get_eval_dataloader(self, training_assets: Dict, data_items: List, verbose: bool) -> DataLoader:
+ return self._get_loader(self.model, self.config, training_assets, True, data_items, verbose, self.num_gpus)
def format_batch(self, batch: List) -> Dict:
"""Format the dataloader output and return a batch.
@@ -420,6 +518,10 @@ class Trainer:
batch[k] = to_cuda(v)
return batch
+ ######################
+ # TRAIN FUNCTIONS
+ ######################
+
@staticmethod
def master_params(optimizer: torch.optim.Optimizer):
"""Generator over parameters owned by the optimizer.
@@ -516,10 +618,8 @@ class Trainer:
else:
grad_clip = 0.0 # meaning no gradient clipping
- if grad_clip <= 0:
- grad_norm = 0
-
# optimizer step
+ grad_norm = 0
update_lr_scheduler = True
if self.use_amp_scaler:
if self.use_apex:
@@ -527,31 +627,29 @@ class Trainer:
# https://nvidia.github.io/apex/advanced.html?highlight=accumulate#backward-passes-with-multiple-optimizers
with amp.scale_loss(loss_dict["loss"], optimizer) as scaled_loss:
scaled_loss.backward()
- grad_norm = torch.nn.utils.clip_grad_norm_(
- amp.master_params(optimizer), grad_clip, error_if_nonfinite=False
- )
+ grad_norm = torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), grad_clip)
else:
# model optimizer step in mixed precision mode
scaler.scale(loss_dict["loss"]).backward()
if grad_clip > 0:
scaler.unscale_(optimizer)
- grad_norm = torch.nn.utils.clip_grad_norm_(
- self.master_params(optimizer), grad_clip, error_if_nonfinite=False
- )
- # pytorch skips the step when the norm is 0. So ignore the norm value when it is NaN
- if torch.isnan(grad_norm) or torch.isinf(grad_norm):
- grad_norm = 0
+ grad_norm = torch.nn.utils.clip_grad_norm_(self.master_params(optimizer), grad_clip)
scale_prev = scaler.get_scale()
scaler.step(optimizer)
scaler.update()
update_lr_scheduler = scale_prev <= scaler.get_scale()
+ loss_dict["amp_scaler"] = scaler.get_scale() # for logging
else:
# main model optimizer step
loss_dict["loss"].backward()
if grad_clip > 0:
- grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip, error_if_nonfinite=False)
+ grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
optimizer.step()
+ # pytorch skips the step when the norm is 0. So ignore the norm value when it is NaN
+ if isinstance(grad_norm, torch.Tensor) and (torch.isnan(grad_norm) or torch.isinf(grad_norm)):
+ grad_norm = 0
+
step_time = time.time() - step_start_time
# setup lr
@@ -567,24 +665,6 @@ class Trainer:
loss_dict["grad_norm"] = grad_norm
return outputs, loss_dict, step_time
- @staticmethod
- def _detach_loss_dict(loss_dict: Dict) -> Dict:
- """Detach loss values from autograp.
-
- Args:
- loss_dict (Dict): losses.
-
- Returns:
- Dict: losses detached from autograph.
- """
- loss_dict_detached = {}
- for key, value in loss_dict.items():
- if isinstance(value, (int, float)):
- loss_dict_detached[key] = value
- else:
- loss_dict_detached[key] = value.item()
- return loss_dict_detached
-
def train_step(self, batch: Dict, batch_n_steps: int, step: int, loader_start_time: float) -> Tuple[Dict, Dict]:
"""Perform a training step on a batch of inputs and log the process.
@@ -700,15 +780,14 @@ class Trainer:
self.dashboard_logger.log_artifact(self.output_path, "checkpoint", "model", aliases)
# training visualizations
- figures, audios = None, None
if hasattr(self.model, "module") and hasattr(self.model.module, "train_log"):
- figures, audios = self.model.module.train_log(self.ap, batch, outputs)
+ self.model.module.train_log(
+ batch, outputs, self.dashboard_logger, self.training_assets, self.total_steps_done
+ )
elif hasattr(self.model, "train_log"):
- figures, audios = self.model.train_log(self.ap, batch, outputs)
- if figures is not None:
- self.dashboard_logger.train_figures(self.total_steps_done, figures)
- if audios is not None:
- self.dashboard_logger.train_audios(self.total_steps_done, audios, self.ap.sample_rate)
+ self.model.train_log(
+ batch, outputs, self.dashboard_logger, self.training_assets, self.total_steps_done
+ )
self.dashboard_logger.flush()
@@ -718,11 +797,13 @@ class Trainer:
def train_epoch(self) -> None:
"""Main entry point for the training loop. Run training on the all training samples."""
+ # initialize the data loader
self.train_loader = self.get_train_dataloader(
- self.ap,
- self.data_train,
+ self.training_assets,
+ self.train_samples,
verbose=True,
)
+ # set model to training mode
if self.num_gpus > 1:
self.model.module.train()
else:
@@ -734,11 +815,12 @@ class Trainer:
batch_num_steps = int(len(self.train_loader.dataset) / self.config.batch_size)
self.c_logger.print_train_start()
loader_start_time = time.time()
+ # iterate over the training samples
for cur_step, batch in enumerate(self.train_loader):
_, _ = self.train_step(batch, batch_num_steps, cur_step, loader_start_time)
loader_start_time = time.time()
epoch_time = time.time() - epoch_start_time
- # Plot self.epochs_done Stats
+ # plot self.epochs_done Stats
if self.args.rank == 0:
epoch_stats = {"epoch_time": epoch_time}
epoch_stats.update(self.keep_avg_train.avg_values)
@@ -754,6 +836,10 @@ class Trainer:
else:
self.scheduler.step()
+ #######################
+ # EVAL FUNCTIONS
+ #######################
+
@staticmethod
def _model_eval_step(
batch: Dict, model: nn.Module, criterion: nn.Module, optimizer_idx: int = None
@@ -803,7 +889,7 @@ class Trainer:
loss_dict_new[f"loss_{idx}"] = loss_dict_new.pop("loss")
loss_dict.update(loss_dict_new)
- loss_dict = self._detach_loss_dict(loss_dict)
+ loss_dict = self._detach_loss_dict(loss_dict)
# update avg stats
update_eval_values = {}
@@ -819,8 +905,8 @@ class Trainer:
"""Main entry point for the evaluation loop. Run evaluation on the all validation samples."""
self.eval_loader = (
self.get_eval_dataloader(
- self.ap,
- self.data_eval,
+ self.training_assets,
+ self.eval_samples,
verbose=True,
)
if self.config.run_eval
@@ -840,15 +926,12 @@ class Trainer:
loader_start_time = time.time()
# plot epoch stats, artifacts and figures
if self.args.rank == 0:
- figures, audios = None, None
if hasattr(self.model, "module") and hasattr(self.model.module, "eval_log"):
- figures, audios = self.model.module.eval_log(self.ap, batch, outputs)
+ self.model.module.eval_log(
+ batch, outputs, self.dashboard_logger, self.training_assets, self.total_steps_done
+ )
elif hasattr(self.model, "eval_log"):
- figures, audios = self.model.eval_log(self.ap, batch, outputs)
- if figures is not None:
- self.dashboard_logger.eval_figures(self.total_steps_done, figures)
- if audios is not None:
- self.dashboard_logger.eval_audios(self.total_steps_done, audios, self.ap.sample_rate)
+ self.model.eval_log(batch, outputs, self.dashboard_logger, self.training_assets, self.total_steps_done)
self.dashboard_logger.eval_stats(self.total_steps_done, self.keep_avg_eval.avg_values)
def test_run(self) -> None:
@@ -857,22 +940,22 @@ class Trainer:
if hasattr(self.model, "test_run") or (self.num_gpus > 1 and hasattr(self.model.module, "test_run")):
if self.eval_loader is None:
self.eval_loader = self.get_eval_dataloader(
- self.ap,
- self.data_eval,
+ self.training_assets,
+ self.eval_samples,
verbose=True,
)
if hasattr(self.eval_loader.dataset, "load_test_samples"):
samples = self.eval_loader.dataset.load_test_samples(1)
if self.num_gpus > 1:
- figures, audios = self.model.module.test_run(self.ap, samples, None)
+ figures, audios = self.model.module.test_run(self.training_assets, samples, None)
else:
- figures, audios = self.model.test_run(self.ap, samples, None)
+ figures, audios = self.model.test_run(self.training_assets, samples, None)
else:
if self.num_gpus > 1:
- figures, audios = self.model.module.test_run(self.ap)
+ figures, audios = self.model.module.test_run(self.training_assets)
else:
- figures, audios = self.model.test_run(self.ap)
+ figures, audios = self.model.test_run(self.training_assets)
self.dashboard_logger.test_audios(self.total_steps_done, audios, self.config.audio["sample_rate"])
self.dashboard_logger.test_figures(self.total_steps_done, figures)
@@ -886,6 +969,10 @@ class Trainer:
self.best_loss = ch["model_loss"]
print(f" > Starting with loaded last best loss {self.best_loss}.")
+ ###################################
+ # FIT FUNCTIONS
+ ###################################
+
def _fit(self) -> None:
"""🏃 train -> evaluate -> test for the number of epochs."""
self._restore_best_loss()
@@ -901,7 +988,8 @@ class Trainer:
self.keep_avg_eval = KeepAverage() if self.config.run_eval else None
self.epochs_done = epoch
self.c_logger.print_epoch_start(epoch, self.config.epochs, self.output_path)
- self.train_epoch()
+ if not self.args.skip_train_epoch:
+ self.train_epoch()
if self.config.run_eval:
self.eval_epoch()
if epoch >= self.config.test_delay_epochs and self.args.rank <= 0:
@@ -939,24 +1027,6 @@ class Trainer:
traceback.print_exc()
sys.exit(1)
- def _pick_target_avg_loss(self, keep_avg_target: KeepAverage) -> Dict:
- """Pick the target loss to compare models"""
- target_avg_loss = None
-
- # return if target loss defined in the model config
- if "target_loss" in self.config and self.config.target_loss:
- return keep_avg_target[f"avg_{self.config.target_loss}"]
-
- # take the average of loss_{optimizer_idx} as the target loss when there are multiple optimizers
- if isinstance(self.optimizer, list):
- target_avg_loss = 0
- for idx in range(len(self.optimizer)):
- target_avg_loss += keep_avg_target[f"avg_loss_{idx}"]
- target_avg_loss /= len(self.optimizer)
- else:
- target_avg_loss = keep_avg_target["avg_loss"]
- return target_avg_loss
-
def save_best_model(self) -> None:
"""Save the best model. It only saves if the current target loss is smaller then the previous."""
@@ -978,35 +1048,9 @@ class Trainer:
keep_after=self.config.keep_after,
)
- def _setup_logger_config(self, log_file: str) -> None:
- """Write log strings to a file and print logs to the terminal.
- TODO: Causes formatting issues in pdb debugging."""
-
- class Logger(object):
- def __init__(self, print_to_terminal=True):
- self.print_to_terminal = print_to_terminal
- self.terminal = sys.stdout
- self.log_file = log_file
-
- def write(self, message):
- if self.print_to_terminal:
- self.terminal.write(message)
- with open(self.log_file, "a", encoding="utf-8") as f:
- f.write(message)
-
- def flush(self):
- # this flush method is needed for python 3 compatibility.
- # this handles the flush command by doing nothing.
- # you might want to specify some extra behavior here.
- pass
-
- # don't let processes rank > 0 write to the terminal
- sys.stdout = Logger(self.args.rank == 0)
-
- @staticmethod
- def _is_apex_available() -> bool:
- """Check if Nvidia's APEX is available."""
- return importlib.util.find_spec("apex") is not None
+ #####################
+ # GET FUNCTIONS
+ #####################
@staticmethod
def get_optimizer(model: nn.Module, config: Coqpit) -> Union[torch.optim.Optimizer, List]:
@@ -1084,154 +1128,72 @@ class Trainer:
criterion = model.get_criterion()
return criterion
+ ####################
+ # HELPER FUNCTIONS
+ ####################
-def getarguments():
- train_config = TrainingArgs()
- parser = train_config.init_argparse(arg_prefix="")
- return parser
+ @staticmethod
+ def _detach_loss_dict(loss_dict: Dict) -> Dict:
+ """Detach loss values from autograp.
+ Args:
+ loss_dict (Dict): losses.
-def get_last_checkpoint(path: str) -> Tuple[str, str]:
- """Get latest checkpoint or/and best model in path.
+ Returns:
+ Dict: losses detached from autograph.
+ """
+ loss_dict_detached = {}
+ for key, value in loss_dict.items():
+ if isinstance(value, (int, float)):
+ loss_dict_detached[key] = value
+ else:
+ loss_dict_detached[key] = value.detach().item()
+ return loss_dict_detached
- It is based on globbing for `*.pth.tar` and the RegEx
- `(checkpoint|best_model)_([0-9]+)`.
+ def _pick_target_avg_loss(self, keep_avg_target: KeepAverage) -> Dict:
+ """Pick the target loss to compare models"""
+ target_avg_loss = None
- Args:
- path: Path to files to be compared.
+ # return if target loss defined in the model config
+ if "target_loss" in self.config and self.config.target_loss:
+ return keep_avg_target[f"avg_{self.config.target_loss}"]
- Raises:
- ValueError: If no checkpoint or best_model files are found.
-
- Returns:
- Path to the last checkpoint
- Path to best checkpoint
- """
- fs = fsspec.get_mapper(path).fs
- file_names = fs.glob(os.path.join(path, "*.pth.tar"))
- scheme = urlparse(path).scheme
- if scheme: # scheme is not preserved in fs.glob, add it back
- file_names = [scheme + "://" + file_name for file_name in file_names]
- last_models = {}
- last_model_nums = {}
- for key in ["checkpoint", "best_model"]:
- last_model_num = None
- last_model = None
- # pass all the checkpoint files and find
- # the one with the largest model number suffix.
- for file_name in file_names:
- match = re.search(f"{key}_([0-9]+)", file_name)
- if match is not None:
- model_num = int(match.groups()[0])
- if last_model_num is None or model_num > last_model_num:
- last_model_num = model_num
- last_model = file_name
-
- # if there is no checkpoint found above
- # find the checkpoint with the latest
- # modification date.
- key_file_names = [fn for fn in file_names if key in fn]
- if last_model is None and len(key_file_names) > 0:
- last_model = max(key_file_names, key=os.path.getctime)
- last_model_num = load_fsspec(last_model)["step"]
-
- if last_model is not None:
- last_models[key] = last_model
- last_model_nums[key] = last_model_num
-
- # check what models were found
- if not last_models:
- raise ValueError(f"No models found in continue path {path}!")
- if "checkpoint" not in last_models: # no checkpoint just best model
- last_models["checkpoint"] = last_models["best_model"]
- elif "best_model" not in last_models: # no best model
- # this shouldn't happen, but let's handle it just in case
- last_models["best_model"] = last_models["checkpoint"]
- # finally check if last best model is more recent than checkpoint
- elif last_model_nums["best_model"] > last_model_nums["checkpoint"]:
- last_models["checkpoint"] = last_models["best_model"]
-
- return last_models["checkpoint"], last_models["best_model"]
-
-
-def process_args(args, config=None):
- """Process parsed comand line arguments and initialize the config if not provided.
-
- Args:
- args (argparse.Namespace or dict like): Parsed input arguments.
- config (Coqpit): Model config. If none, it is generated from `args`. Defaults to None.
-
- Returns:
- c (TTS.utils.io.AttrDict): Config paramaters.
- out_path (str): Path to save models and logging.
- audio_path (str): Path to save generated test audios.
- c_logger (TTS.utils.console_logger.ConsoleLogger): Class that does
- logging to the console.
-
- dashboard_logger (WandbLogger or TensorboardLogger): Class that does the dashboard Logging
-
- TODO:
- - Interactive config definition.
- """
- if isinstance(args, tuple):
- args, coqpit_overrides = args
- if args.continue_path:
- # continue a previous training from its output folder
- experiment_path = args.continue_path
- args.config_path = os.path.join(args.continue_path, "config.json")
- args.restore_path, best_model = get_last_checkpoint(args.continue_path)
- if not args.best_path:
- args.best_path = best_model
- # init config if not already defined
- if config is None:
- if args.config_path:
- # init from a file
- config = load_config(args.config_path)
+ # take the average of loss_{optimizer_idx} as the target loss when there are multiple optimizers
+ if isinstance(self.optimizer, list):
+ target_avg_loss = 0
+ for idx in range(len(self.optimizer)):
+ target_avg_loss += keep_avg_target[f"avg_loss_{idx}"]
+ target_avg_loss /= len(self.optimizer)
else:
- # init from console args
- from TTS.config.shared_configs import BaseTrainingConfig # pylint: disable=import-outside-toplevel
+ target_avg_loss = keep_avg_target["avg_loss"]
+ return target_avg_loss
- config_base = BaseTrainingConfig()
- config_base.parse_known_args(coqpit_overrides)
- config = register_config(config_base.model)()
- # override values from command-line args
- config.parse_known_args(coqpit_overrides, relaxed_parser=True)
- experiment_path = args.continue_path
- if not experiment_path:
- experiment_path = get_experiment_folder_path(config.output_path, config.run_name)
- audio_path = os.path.join(experiment_path, "test_audios")
- config.output_log_path = experiment_path
- # setup rank 0 process in distributed training
- dashboard_logger = None
- if args.rank == 0:
- new_fields = {}
- if args.restore_path:
- new_fields["restore_path"] = args.restore_path
- new_fields["github_branch"] = get_git_branch()
- # if model characters are not set in the config file
- # save the default set to the config file for future
- # compatibility.
- if config.has("characters") and config.characters is None:
- used_characters = parse_symbols()
- new_fields["characters"] = used_characters
- copy_model_files(config, experiment_path, new_fields)
- dashboard_logger = init_dashboard_logger(config)
- c_logger = ConsoleLogger()
- return config, experiment_path, audio_path, c_logger, dashboard_logger
+ def _setup_logger_config(self, log_file: str) -> None:
+ """Write log strings to a file and print logs to the terminal.
+ TODO: Causes formatting issues in pdb debugging."""
+ class Logger(object):
+ def __init__(self, print_to_terminal=True):
+ self.print_to_terminal = print_to_terminal
+ self.terminal = sys.stdout
+ self.log_file = log_file
-def init_arguments():
- train_config = TrainingArgs()
- parser = train_config.init_argparse(arg_prefix="")
- return parser
+ def write(self, message):
+ if self.print_to_terminal:
+ self.terminal.write(message)
+ with open(self.log_file, "a", encoding="utf-8") as f:
+ f.write(message)
+ def flush(self):
+ # this flush method is needed for python 3 compatibility.
+ # this handles the flush command by doing nothing.
+ # you might want to specify some extra behavior here.
+ pass
-def init_training(argv: Union[List, Coqpit], config: Coqpit = None):
- """Initialization of a training run."""
- if isinstance(argv, Coqpit):
- parser = argv.init_argparse(arg_prefix="")
- else:
- parser = init_arguments()
- args = parser.parse_known_args()
- config, OUT_PATH, AUDIO_PATH, c_logger, dashboard_logger = process_args(args, config)
- return args[0], config, OUT_PATH, AUDIO_PATH, c_logger, dashboard_logger
+ # don't let processes rank > 0 write to the terminal
+ sys.stdout = Logger(self.args.rank == 0)
+
+ @staticmethod
+ def _is_apex_available() -> bool:
+ """Check if Nvidia's APEX is available."""
+ return importlib.util.find_spec("apex") is not None
diff --git a/TTS/tts/configs/__init__.py b/TTS/tts/configs/__init__.py
index 5ad4fe8c..3146ac1c 100644
--- a/TTS/tts/configs/__init__.py
+++ b/TTS/tts/configs/__init__.py
@@ -3,15 +3,15 @@ import os
from inspect import isclass
# import all files under configs/
-configs_dir = os.path.dirname(__file__)
-for file in os.listdir(configs_dir):
- path = os.path.join(configs_dir, file)
- if not file.startswith("_") and not file.startswith(".") and (file.endswith(".py") or os.path.isdir(path)):
- config_name = file[: file.find(".py")] if file.endswith(".py") else file
- module = importlib.import_module("TTS.tts.configs." + config_name)
- for attribute_name in dir(module):
- attribute = getattr(module, attribute_name)
+# configs_dir = os.path.dirname(__file__)
+# for file in os.listdir(configs_dir):
+# path = os.path.join(configs_dir, file)
+# if not file.startswith("_") and not file.startswith(".") and (file.endswith(".py") or os.path.isdir(path)):
+# config_name = file[: file.find(".py")] if file.endswith(".py") else file
+# module = importlib.import_module("TTS.tts.configs." + config_name)
+# for attribute_name in dir(module):
+# attribute = getattr(module, attribute_name)
- if isclass(attribute):
- # Add the class to this package's variables
- globals()[attribute_name] = attribute
+# if isclass(attribute):
+# # Add the class to this package's variables
+# globals()[attribute_name] = attribute
diff --git a/TTS/tts/configs/align_tts_config.py b/TTS/tts/configs/align_tts_config.py
index 837cd519..317a01af 100644
--- a/TTS/tts/configs/align_tts_config.py
+++ b/TTS/tts/configs/align_tts_config.py
@@ -10,7 +10,7 @@ class AlignTTSConfig(BaseTTSConfig):
"""Defines parameters for AlignTTS model.
Example:
- >>> from TTS.tts.configs import AlignTTSConfig
+ >>> from TTS.tts.configs.align_tts_config import AlignTTSConfig
>>> config = AlignTTSConfig()
Args:
diff --git a/TTS/tts/configs/fast_pitch_config.py b/TTS/tts/configs/fast_pitch_config.py
index 668ea227..8f063102 100644
--- a/TTS/tts/configs/fast_pitch_config.py
+++ b/TTS/tts/configs/fast_pitch_config.py
@@ -11,7 +11,7 @@ class FastPitchConfig(BaseTTSConfig):
Example:
- >>> from TTS.tts.configs import FastPitchConfig
+ >>> from TTS.tts.configs.fast_pitch_config import FastPitchConfig
>>> config = FastPitchConfig()
Args:
@@ -30,6 +30,10 @@ class FastPitchConfig(BaseTTSConfig):
Activation Normalization that pre-computes normalization stats at the beginning and use the same values
for the rest. Defaults to 10.
+ speakers_file (str):
+ Path to the file containing the list of speakers. Needed at inference for loading matching speaker ids to
+ speaker names. Defaults to `None`.
+
use_speaker_embedding (bool):
enable / disable using speaker embeddings for multi-speaker models. If set True, the model is
in the multi-speaker mode. Defaults to False.
@@ -105,6 +109,8 @@ class FastPitchConfig(BaseTTSConfig):
model_args: ForwardTTSArgs = ForwardTTSArgs()
# multi-speaker settings
+ num_speakers: int = 0
+ speakers_file: str = None
use_speaker_embedding: bool = False
use_d_vector_file: bool = False
d_vector_file: str = False
@@ -149,3 +155,22 @@ class FastPitchConfig(BaseTTSConfig):
"Prior to November 22, 1963.",
]
)
+
+ def __post_init__(self):
+ # Pass multi-speaker parameters to the model args as `model.init_multispeaker()` looks for it there.
+ if self.num_speakers > 0:
+ self.model_args.num_speakers = self.num_speakers
+
+ # speaker embedding settings
+ if self.use_speaker_embedding:
+ self.model_args.use_speaker_embedding = True
+ if self.speakers_file:
+ self.model_args.speakers_file = self.speakers_file
+
+ # d-vector settings
+ if self.use_d_vector_file:
+ self.model_args.use_d_vector_file = True
+ if self.d_vector_dim is not None and self.d_vector_dim > 0:
+ self.model_args.d_vector_dim = self.d_vector_dim
+ if self.d_vector_file:
+ self.model_args.d_vector_file = self.d_vector_file
diff --git a/TTS/tts/configs/fast_speech_config.py b/TTS/tts/configs/fast_speech_config.py
index bba47bb3..040a8910 100644
--- a/TTS/tts/configs/fast_speech_config.py
+++ b/TTS/tts/configs/fast_speech_config.py
@@ -11,7 +11,7 @@ class FastSpeechConfig(BaseTTSConfig):
Example:
- >>> from TTS.tts.configs import FastSpeechConfig
+ >>> from TTS.tts.configs.fast_speech_config import FastSpeechConfig
>>> config = FastSpeechConfig()
Args:
@@ -30,6 +30,11 @@ class FastSpeechConfig(BaseTTSConfig):
Activation Normalization that pre-computes normalization stats at the beginning and use the same values
for the rest. Defaults to 10.
+ speakers_file (str):
+ Path to the file containing the list of speakers. Needed at inference for loading matching speaker ids to
+ speaker names. Defaults to `None`.
+
+
use_speaker_embedding (bool):
enable / disable using speaker embeddings for multi-speaker models. If set True, the model is
in the multi-speaker mode. Defaults to False.
@@ -105,6 +110,7 @@ class FastSpeechConfig(BaseTTSConfig):
model_args: ForwardTTSArgs = ForwardTTSArgs(use_pitch=False)
# multi-speaker settings
+ speakers_file: str = None
use_speaker_embedding: bool = False
use_d_vector_file: bool = False
d_vector_file: str = False
@@ -149,3 +155,22 @@ class FastSpeechConfig(BaseTTSConfig):
"Prior to November 22, 1963.",
]
)
+
+ def __post_init__(self):
+ # Pass multi-speaker parameters to the model args as `model.init_multispeaker()` looks for it there.
+ if self.num_speakers > 0:
+ self.model_args.num_speakers = self.num_speakers
+
+ # speaker embedding settings
+ if self.use_speaker_embedding:
+ self.model_args.use_speaker_embedding = True
+ if self.speakers_file:
+ self.model_args.speakers_file = self.speakers_file
+
+ # d-vector settings
+ if self.use_d_vector_file:
+ self.model_args.use_d_vector_file = True
+ if self.d_vector_dim is not None and self.d_vector_dim > 0:
+ self.model_args.d_vector_dim = self.d_vector_dim
+ if self.d_vector_file:
+ self.model_args.d_vector_file = self.d_vector_file
diff --git a/TTS/tts/configs/glow_tts_config.py b/TTS/tts/configs/glow_tts_config.py
index 97fd3577..ce8eee6d 100644
--- a/TTS/tts/configs/glow_tts_config.py
+++ b/TTS/tts/configs/glow_tts_config.py
@@ -10,7 +10,7 @@ class GlowTTSConfig(BaseTTSConfig):
Example:
- >>> from TTS.tts.configs import GlowTTSConfig
+ >>> from TTS.tts.configs.glow_tts_config import GlowTTSConfig
>>> config = GlowTTSConfig()
Args:
diff --git a/TTS/tts/configs/shared_configs.py b/TTS/tts/configs/shared_configs.py
index e208c16c..60ef7276 100644
--- a/TTS/tts/configs/shared_configs.py
+++ b/TTS/tts/configs/shared_configs.py
@@ -218,7 +218,3 @@ class BaseTTSConfig(BaseTrainingConfig):
lr_scheduler_params: dict = field(default_factory=lambda: {})
# testing
test_sentences: List[str] = field(default_factory=lambda: [])
- # multi-speaker
- use_speaker_embedding: bool = False
- use_d_vector_file: bool = False
- d_vector_dim: int = 0
diff --git a/TTS/tts/configs/speedy_speech_config.py b/TTS/tts/configs/speedy_speech_config.py
index ba561c89..ea6866ed 100644
--- a/TTS/tts/configs/speedy_speech_config.py
+++ b/TTS/tts/configs/speedy_speech_config.py
@@ -11,7 +11,7 @@ class SpeedySpeechConfig(BaseTTSConfig):
Example:
- >>> from TTS.tts.configs import SpeedySpeechConfig
+ >>> from TTS.tts.configs.speedy_speech_config import SpeedySpeechConfig
>>> config = SpeedySpeechConfig()
Args:
@@ -30,6 +30,10 @@ class SpeedySpeechConfig(BaseTTSConfig):
Activation Normalization that pre-computes normalization stats at the beginning and use the same values
for the rest. Defaults to 10.
+ speakers_file (str):
+ Path to the file containing the list of speakers. Needed at inference for loading matching speaker ids to
+ speaker names. Defaults to `None`.
+
use_speaker_embedding (bool):
enable / disable using speaker embeddings for multi-speaker models. If set True, the model is
in the multi-speaker mode. Defaults to False.
@@ -117,12 +121,13 @@ class SpeedySpeechConfig(BaseTTSConfig):
},
out_channels=80,
hidden_channels=128,
- num_speakers=0,
positional_encoding=True,
detach_duration_predictor=True,
)
# multi-speaker settings
+ num_speakers: int = 0
+ speakers_file: str = None
use_speaker_embedding: bool = False
use_d_vector_file: bool = False
d_vector_file: str = False
@@ -166,3 +171,22 @@ class SpeedySpeechConfig(BaseTTSConfig):
"Prior to November 22, 1963.",
]
)
+
+ def __post_init__(self):
+ # Pass multi-speaker parameters to the model args as `model.init_multispeaker()` looks for it there.
+ if self.num_speakers > 0:
+ self.model_args.num_speakers = self.num_speakers
+
+ # speaker embedding settings
+ if self.use_speaker_embedding:
+ self.model_args.use_speaker_embedding = True
+ if self.speakers_file:
+ self.model_args.speakers_file = self.speakers_file
+
+ # d-vector settings
+ if self.use_d_vector_file:
+ self.model_args.use_d_vector_file = True
+ if self.d_vector_dim is not None and self.d_vector_dim > 0:
+ self.model_args.d_vector_dim = self.d_vector_dim
+ if self.d_vector_file:
+ self.model_args.d_vector_file = self.d_vector_file
diff --git a/TTS/tts/configs/tacotron2_config.py b/TTS/tts/configs/tacotron2_config.py
index b622e640..95b65202 100644
--- a/TTS/tts/configs/tacotron2_config.py
+++ b/TTS/tts/configs/tacotron2_config.py
@@ -9,7 +9,7 @@ class Tacotron2Config(TacotronConfig):
Example:
- >>> from TTS.tts.configs import Tacotron2Config
+ >>> from TTS.tts.configs.tacotron2_config import Tacotron2Config
>>> config = Tacotron2Config()
Check `TacotronConfig` for argument descriptions.
diff --git a/TTS/tts/configs/tacotron_config.py b/TTS/tts/configs/tacotron_config.py
index 89fb8d81..d6edd267 100644
--- a/TTS/tts/configs/tacotron_config.py
+++ b/TTS/tts/configs/tacotron_config.py
@@ -10,7 +10,7 @@ class TacotronConfig(BaseTTSConfig):
Example:
- >>> from TTS.tts.configs import TacotronConfig
+ >>> from TTS.tts.configs.tacotron_config import TacotronConfig
>>> config = TacotronConfig()
Args:
@@ -106,7 +106,7 @@ class TacotronConfig(BaseTTSConfig):
Weight decay coefficient. Defaults to `1e-6`.
grad_clip (float):
Gradient clipping threshold. Defaults to `5`.
- seq_len_notm (bool):
+ seq_len_norm (bool):
enable / disable the sequnce length normalization in the loss functions. If set True, loss of a sample
is divided by the sequence length. Defaults to False.
loss_masking (bool):
diff --git a/TTS/tts/configs/vits_config.py b/TTS/tts/configs/vits_config.py
index 39479231..d490e6e6 100644
--- a/TTS/tts/configs/vits_config.py
+++ b/TTS/tts/configs/vits_config.py
@@ -90,7 +90,7 @@ class VitsConfig(BaseTTSConfig):
Example:
- >>> from TTS.tts.configs import VitsConfig
+ >>> from TTS.tts.configs.vits_config import VitsConfig
>>> config = VitsConfig()
"""
@@ -139,3 +139,36 @@ class VitsConfig(BaseTTSConfig):
"Prior to November 22, 1963.",
]
)
+
+ # multi-speaker settings
+ # use speaker embedding layer
+ num_speakers: int = 0
+ use_speaker_embedding: bool = False
+ speakers_file: str = None
+ speaker_embedding_channels: int = 256
+
+ # use d-vectors
+ use_d_vector_file: bool = False
+ d_vector_file: str = False
+ d_vector_dim: int = None
+
+ def __post_init__(self):
+ # Pass multi-speaker parameters to the model args as `model.init_multispeaker()` looks for it there.
+ if self.num_speakers > 0:
+ self.model_args.num_speakers = self.num_speakers
+
+ # speaker embedding settings
+ if self.use_speaker_embedding:
+ self.model_args.use_speaker_embedding = True
+ if self.speakers_file:
+ self.model_args.speakers_file = self.speakers_file
+ if self.speaker_embedding_channels:
+ self.model_args.speaker_embedding_channels = self.speaker_embedding_channels
+
+ # d-vector settings
+ if self.use_d_vector_file:
+ self.model_args.use_d_vector_file = True
+ if self.d_vector_dim is not None and self.d_vector_dim > 0:
+ self.model_args.d_vector_dim = self.d_vector_dim
+ if self.d_vector_file:
+ self.model_args.d_vector_file = self.d_vector_file
diff --git a/TTS/tts/datasets/__init__.py b/TTS/tts/datasets/__init__.py
index c2e55038..78024936 100644
--- a/TTS/tts/datasets/__init__.py
+++ b/TTS/tts/datasets/__init__.py
@@ -1,15 +1,20 @@
import sys
from collections import Counter
from pathlib import Path
-from typing import Dict, List, Tuple
+from typing import Callable, Dict, List, Tuple, Union
import numpy as np
+from TTS.tts.datasets.dataset import *
from TTS.tts.datasets.formatters import *
-from TTS.tts.datasets.TTSDataset import TTSDataset
def split_dataset(items):
+ """Split a dataset into train and eval. Consider speaker distribution in multi-speaker training.
+
+ Args:
+ items (List[List]): A list of samples. Each sample is a list of `[audio_path, text, speaker_id]`.
+ """
speakers = [item[-1] for item in items]
is_multi_speaker = len(set(speakers)) > 1
eval_split_size = min(500, int(len(items) * 0.01))
@@ -31,33 +36,47 @@ def split_dataset(items):
return items[:eval_split_size], items[eval_split_size:]
-def load_meta_data(datasets: List[Dict], eval_split=True) -> Tuple[List[List], List[List]]:
- """Parse the dataset, load the samples as a list and load the attention alignments if provided.
+def load_tts_samples(
+ datasets: Union[List[Dict], Dict], eval_split=True, formatter: Callable = None
+) -> Tuple[List[List], List[List]]:
+ """Parse the dataset from the datasets config, load the samples as a List and load the attention alignments if provided.
+ If `formatter` is not None, apply the formatter to the samples else pick the formatter from the available ones based
+ on the dataset name.
Args:
- datasets (List[Dict]): A list of dataset dictionaries or dataset configs.
+ datasets (List[Dict], Dict): A list of datasets or a single dataset dictionary. If multiple datasets are
+ in the list, they are all merged.
+
eval_split (bool, optional): If true, create a evaluation split. If an eval split provided explicitly, generate
an eval split automatically. Defaults to True.
+ formatter (Callable, optional): The preprocessing function to be applied to create the list of samples. It
+ must take the root_path and the meta_file name and return a list of samples in the format of
+ `[[audio_path, text, speaker_id], ...]]`. See the available formatters in `TTS.tts.dataset.formatter` as
+ example. Defaults to None.
+
Returns:
Tuple[List[List], List[List]: training and evaluation splits of the dataset.
"""
meta_data_train_all = []
meta_data_eval_all = [] if eval_split else None
+ if not isinstance(datasets, list):
+ datasets = [datasets]
for dataset in datasets:
name = dataset["name"]
root_path = dataset["path"]
meta_file_train = dataset["meta_file_train"]
meta_file_val = dataset["meta_file_val"]
# setup the right data processor
- preprocessor = _get_preprocessor_by_name(name)
+ if formatter is None:
+ formatter = _get_formatter_by_name(name)
# load train set
- meta_data_train = preprocessor(root_path, meta_file_train)
+ meta_data_train = formatter(root_path, meta_file_train)
print(f" | > Found {len(meta_data_train)} files in {Path(root_path).resolve()}")
# load evaluation split if set
if eval_split:
if meta_file_val:
- meta_data_eval = preprocessor(root_path, meta_file_val)
+ meta_data_eval = formatter(root_path, meta_file_val)
else:
meta_data_eval, meta_data_train = split_dataset(meta_data_train)
meta_data_eval_all += meta_data_eval
@@ -87,7 +106,7 @@ def load_attention_mask_meta_data(metafile_path):
return meta_data
-def _get_preprocessor_by_name(name):
+def _get_formatter_by_name(name):
"""Returns the respective preprocessing function."""
thismodule = sys.modules[__name__]
return getattr(thismodule, name.lower())
diff --git a/TTS/tts/datasets/TTSDataset.py b/TTS/tts/datasets/dataset.py
similarity index 99%
rename from TTS/tts/datasets/TTSDataset.py
rename to TTS/tts/datasets/dataset.py
index c81e0e6c..04314bab 100644
--- a/TTS/tts/datasets/TTSDataset.py
+++ b/TTS/tts/datasets/dataset.py
@@ -330,7 +330,7 @@ class TTSDataset(Dataset):
if by_audio_len:
lengths = []
for item in self.items:
- lengths.append(os.path.getsize(item[1]))
+ lengths.append(os.path.getsize(item[1]) / 16 * 8) # assuming 16bit audio
lengths = np.array(lengths)
else:
lengths = np.array([len(ins[0]) for ins in self.items])
@@ -419,6 +419,7 @@ class TTSDataset(Dataset):
d_vectors = [self.d_vector_mapping[w]["embedding"] for w in wav_files_names]
else:
d_vectors = None
+
# get numerical speaker ids from speaker names
if self.speaker_id_mapping:
speaker_ids = [self.speaker_id_mapping[sn] for sn in batch["speaker_name"]]
diff --git a/TTS/tts/datasets/formatters.py b/TTS/tts/datasets/formatters.py
index eee407a8..dcd18740 100644
--- a/TTS/tts/datasets/formatters.py
+++ b/TTS/tts/datasets/formatters.py
@@ -308,14 +308,14 @@ def mls(root_path, meta_files=None):
# ======================================== VOX CELEB ===========================================
def voxceleb2(root_path, meta_file=None):
"""
- :param meta_file Used only for consistency with load_meta_data api
+ :param meta_file Used only for consistency with load_tts_samples api
"""
return _voxcel_x(root_path, meta_file, voxcel_idx="2")
def voxceleb1(root_path, meta_file=None):
"""
- :param meta_file Used only for consistency with load_meta_data api
+ :param meta_file Used only for consistency with load_tts_samples api
"""
return _voxcel_x(root_path, meta_file, voxcel_idx="1")
diff --git a/TTS/tts/layers/glow_tts/glow.py b/TTS/tts/layers/glow_tts/glow.py
index 392447de..ff1b99e8 100644
--- a/TTS/tts/layers/glow_tts/glow.py
+++ b/TTS/tts/layers/glow_tts/glow.py
@@ -106,7 +106,6 @@ class InvConvNear(nn.Module):
- x: :math:`[B, C, T]`
- x_mask: :math:`[B, 1, T]`
"""
-
b, c, t = x.size()
assert c % self.num_splits == 0
if x_mask is None:
diff --git a/TTS/tts/layers/losses.py b/TTS/tts/layers/losses.py
index f465c638..0ea342e8 100644
--- a/TTS/tts/layers/losses.py
+++ b/TTS/tts/layers/losses.py
@@ -410,11 +410,6 @@ class TacotronLoss(torch.nn.Module):
return_dict["postnet_ssim_loss"] = postnet_ssim_loss
return_dict["loss"] = loss
-
- # check if any loss is NaN
- for key, loss in return_dict.items():
- if torch.isnan(loss):
- raise RuntimeError(f" [!] NaN loss with {key}.")
return return_dict
diff --git a/TTS/tts/layers/tacotron/attentions.py b/TTS/tts/layers/tacotron/attentions.py
index a01ccc49..8c30a00a 100644
--- a/TTS/tts/layers/tacotron/attentions.py
+++ b/TTS/tts/layers/tacotron/attentions.py
@@ -126,27 +126,24 @@ class GravesAttention(nn.Module):
class OriginalAttention(nn.Module):
- """Bahdanau Attention with various optional modifications. Proposed below.
+ """Bahdanau Attention with various optional modifications.
- Location sensitive attnetion: https://arxiv.org/abs/1712.05884
- Forward Attention: https://arxiv.org/abs/1807.06736 + state masking at inference
- Using sigmoid instead of softmax normalization
- Attention windowing at inference time
Note:
- Location Sensitive Attention is an attention mechanism that extends the additive attention mechanism
- to use cumulative attention weights from previous decoder time steps as an additional feature.
+ Location Sensitive Attention extends the additive attention mechanism
+ to use cumulative attention weights from previous decoder time steps with the current time step features.
- Forward attention considers only the alignment paths that satisfy the monotonic condition at each
- decoder timestep. The modified attention probabilities at each timestep are computed recursively
- using a forward algorithm.
+ Forward attention computes most probable monotonic alignment. The modified attention probabilities at each
+ timestep are computed recursively by the forward algorithm.
- Transition agent for forward attention is further proposed, which helps the attention mechanism
- to make decisions whether to move forward or stay at each decoder timestep.
-
- Attention windowing applies a sliding windows to time steps of the input tensor centering at the last
- time step with the largest attention weight. It is especially useful at inference to keep the attention
- alignment diagonal.
+ Transition agent in the forward attention explicitly gates the attention mechanism whether to move forward or
+ stay at each decoder timestep.
+ Attention windowing is a inductive prior that prevents the model from attending to previous and future timesteps
+ beyond a certain window.
Args:
query_dim (int): number of channels in the query tensor.
diff --git a/TTS/tts/models/__init__.py b/TTS/tts/models/__init__.py
index 1236fa76..780f22cd 100644
--- a/TTS/tts/models/__init__.py
+++ b/TTS/tts/models/__init__.py
@@ -2,7 +2,7 @@ from TTS.tts.utils.text.symbols import make_symbols, parse_symbols
from TTS.utils.generic_utils import find_module
-def setup_model(config):
+def setup_model(config, speaker_manager: "SpeakerManager" = None):
print(" > Using model: {}".format(config.model))
# fetch the right model implementation.
if "base_model" in config and config["base_model"] is not None:
@@ -31,7 +31,7 @@ def setup_model(config):
config.model_params.num_chars = num_chars
if "model_args" in config:
config.model_args.num_chars = num_chars
- model = MyModel(config)
+ model = MyModel(config, speaker_manager=speaker_manager)
return model
diff --git a/TTS/tts/models/align_tts.py b/TTS/tts/models/align_tts.py
index 78fbaeab..2fc00b0b 100644
--- a/TTS/tts/models/align_tts.py
+++ b/TTS/tts/models/align_tts.py
@@ -1,5 +1,4 @@
from dataclasses import dataclass, field
-from typing import Dict, Tuple
import torch
from coqpit import Coqpit
@@ -12,8 +11,8 @@ from TTS.tts.layers.feed_forward.encoder import Encoder
from TTS.tts.layers.generic.pos_encoding import PositionalEncoding
from TTS.tts.models.base_tts import BaseTTS
from TTS.tts.utils.helpers import generate_path, maximum_path, sequence_mask
+from TTS.tts.utils.speakers import SpeakerManager
from TTS.tts.utils.visual import plot_alignment, plot_spectrogram
-from TTS.utils.audio import AudioProcessor
from TTS.utils.io import load_fsspec
@@ -93,7 +92,7 @@ class AlignTTS(BaseTTS):
differently based on your requirements using ```encoder_type``` and ```decoder_type``` parameters.
Examples:
- >>> from TTS.tts.configs import AlignTTSConfig
+ >>> from TTS.tts.configs.align_tts_config import AlignTTSConfig
>>> config = AlignTTSConfig()
>>> model = AlignTTS(config)
@@ -101,9 +100,10 @@ class AlignTTS(BaseTTS):
# pylint: disable=dangerous-default-value
- def __init__(self, config: Coqpit):
+ def __init__(self, config: Coqpit, speaker_manager: SpeakerManager = None):
- super().__init__()
+ super().__init__(config)
+ self.speaker_manager = speaker_manager
self.config = config
self.phase = -1
self.length_scale = (
@@ -360,9 +360,7 @@ class AlignTTS(BaseTTS):
return outputs, loss_dict
- def train_log(
- self, ap: AudioProcessor, batch: dict, outputs: dict
- ) -> Tuple[Dict, Dict]: # pylint: disable=no-self-use
+ def _create_logs(self, batch, outputs, ap): # pylint: disable=no-self-use
model_outputs = outputs["model_outputs"]
alignments = outputs["alignments"]
mel_input = batch["mel_input"]
@@ -381,11 +379,22 @@ class AlignTTS(BaseTTS):
train_audio = ap.inv_melspectrogram(pred_spec.T)
return figures, {"audio": train_audio}
+ def train_log(
+ self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int
+ ) -> None: # pylint: disable=no-self-use
+ ap = assets["audio_processor"]
+ figures, audios = self._create_logs(batch, outputs, ap)
+ logger.train_figures(steps, figures)
+ logger.train_audios(steps, audios, ap.sample_rate)
+
def eval_step(self, batch: dict, criterion: nn.Module):
return self.train_step(batch, criterion)
- def eval_log(self, ap: AudioProcessor, batch: dict, outputs: dict):
- return self.train_log(ap, batch, outputs)
+ def eval_log(self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int) -> None:
+ ap = assets["audio_processor"]
+ figures, audios = self._create_logs(batch, outputs, ap)
+ logger.eval_figures(steps, figures)
+ logger.eval_audios(steps, audios, ap.sample_rate)
def load_checkpoint(
self, config, checkpoint_path, eval=False
diff --git a/TTS/tts/models/base_tacotron.py b/TTS/tts/models/base_tacotron.py
index 8cfd750d..ca8f3bb9 100644
--- a/TTS/tts/models/base_tacotron.py
+++ b/TTS/tts/models/base_tacotron.py
@@ -1,59 +1,26 @@
import copy
from abc import abstractmethod
-from dataclasses import dataclass
-from typing import Dict, List
+from typing import Dict
import torch
-from coqpit import MISSING, Coqpit
+from coqpit import Coqpit
from torch import nn
from TTS.tts.layers.losses import TacotronLoss
from TTS.tts.models.base_tts import BaseTTS
from TTS.tts.utils.helpers import sequence_mask
-from TTS.tts.utils.speakers import SpeakerManager, get_speaker_manager
-from TTS.tts.utils.text import make_symbols
from TTS.utils.generic_utils import format_aux_input
from TTS.utils.io import load_fsspec
from TTS.utils.training import gradual_training_scheduler
-@dataclass
-class BaseTacotronArgs(Coqpit):
- """TODO: update Tacotron configs using it"""
-
- num_chars: int = MISSING
- num_speakers: int = MISSING
- r: int = MISSING
- out_channels: int = 80
- decoder_output_dim: int = 80
- attn_type: str = "original"
- attn_win: bool = False
- attn_norm: str = "softmax"
- prenet_type: str = "original"
- prenet_dropout: bool = True
- prenet_dropout_at_inference: bool = False
- forward_attn: bool = False
- trans_agent: bool = False
- forward_attn_mask: bool = False
- location_attn: bool = True
- attn_K: int = 5
- separate_stopnet: bool = True
- bidirectional_decoder: bool = False
- double_decoder_consistency: bool = False
- ddc_r: int = None
- encoder_in_features: int = 512
- decoder_in_features: int = 512
- d_vector_dim: int = None
- use_gst: bool = False
- gst: bool = None
- gradual_training: bool = None
-
-
class BaseTacotron(BaseTTS):
- def __init__(self, config: Coqpit):
- """Abstract Tacotron class"""
- super().__init__()
+ """Base class shared by Tacotron and Tacotron2"""
+ def __init__(self, config: Coqpit):
+ super().__init__(config)
+
+ # pass all config fields as class attributes
for key in config:
setattr(self, key, config[key])
@@ -78,6 +45,7 @@ class BaseTacotron(BaseTTS):
@staticmethod
def _format_aux_input(aux_input: Dict) -> Dict:
+ """Set missing fields to their default values"""
if aux_input:
return format_aux_input({"d_vectors": None, "speaker_ids": None}, aux_input)
return None
@@ -86,14 +54,12 @@ class BaseTacotron(BaseTTS):
# INIT FUNCTIONS
#############################
- def _init_states(self):
- self.embedded_speakers = None
- self.embedded_speakers_projected = None
-
def _init_backward_decoder(self):
+ """Init the backward decoder for Forward-Backward decoding."""
self.decoder_backward = copy.deepcopy(self.decoder)
def _init_coarse_decoder(self):
+ """Init the coarse decoder for Double-Decoder Consistency."""
self.coarse_decoder = copy.deepcopy(self.decoder)
self.coarse_decoder.r_init = self.ddc_r
self.coarse_decoder.set_r(self.ddc_r)
@@ -113,6 +79,13 @@ class BaseTacotron(BaseTTS):
def load_checkpoint(
self, config, checkpoint_path, eval=False
): # pylint: disable=unused-argument, redefined-builtin
+ """Load model checkpoint and set up internals.
+
+ Args:
+ config (Coqpi): model configuration.
+ checkpoint_path (str): path to checkpoint file.
+ eval (bool): whether to load model for evaluation.
+ """
state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"))
self.load_state_dict(state["model"])
# TODO: set r in run-time by taking it from the new config
@@ -131,61 +104,9 @@ class BaseTacotron(BaseTTS):
assert not self.training
def get_criterion(self) -> nn.Module:
+ """Get the model criterion used in training."""
return TacotronLoss(self.config)
- @staticmethod
- def get_characters(config: Coqpit) -> str:
- # TODO: implement CharacterProcessor
- if config.characters is not None:
- symbols, phonemes = make_symbols(**config.characters)
- else:
- from TTS.tts.utils.text.symbols import ( # pylint: disable=import-outside-toplevel
- parse_symbols,
- phonemes,
- symbols,
- )
-
- config.characters = parse_symbols()
- model_characters = phonemes if config.use_phonemes else symbols
- return model_characters, config
-
- @staticmethod
- def get_speaker_manager(config: Coqpit, restore_path: str, data: List, out_path: str = None) -> SpeakerManager:
- return get_speaker_manager(config, restore_path, data, out_path)
-
- def get_aux_input(self, **kwargs) -> Dict:
- """Compute Tacotron's auxiliary inputs based on model config.
- - speaker d_vector
- - style wav for GST
- - speaker ID for speaker embedding
- """
- # setup speaker_id
- if self.config.use_speaker_embedding:
- speaker_id = kwargs.get("speaker_id", 0)
- else:
- speaker_id = None
- # setup d_vector
- d_vector = (
- self.speaker_manager.get_d_vectors_by_speaker(self.speaker_manager.speaker_names[0])
- if self.config.use_d_vector_file and self.config.use_speaker_embedding
- else None
- )
- # setup style_mel
- if "style_wav" in kwargs:
- style_wav = kwargs["style_wav"]
- elif self.config.has("gst_style_input"):
- style_wav = self.config.gst_style_input
- else:
- style_wav = None
- if style_wav is None and "use_gst" in self.config and self.config.use_gst:
- # inicialize GST with zero dict.
- style_wav = {}
- print("WARNING: You don't provided a gst style wav, for this reason we use a zero tensor!")
- for i in range(self.config.gst["gst_num_style_tokens"]):
- style_wav[str(i)] = 0
- aux_inputs = {"speaker_id": speaker_id, "style_wav": style_wav, "d_vector": d_vector}
- return aux_inputs
-
#############################
# COMMON COMPUTE FUNCTIONS
#############################
@@ -231,15 +152,6 @@ class BaseTacotron(BaseTTS):
# EMBEDDING FUNCTIONS
#############################
- def compute_speaker_embedding(self, speaker_ids):
- """Compute speaker embedding vectors"""
- if hasattr(self, "speaker_embedding") and speaker_ids is None:
- raise RuntimeError(" [!] Model has speaker embedding layer but speaker_id is not provided")
- if hasattr(self, "speaker_embedding") and speaker_ids is not None:
- self.embedded_speakers = self.speaker_embedding(speaker_ids).unsqueeze(1)
- if hasattr(self, "speaker_project_mel") and speaker_ids is not None:
- self.embedded_speakers_projected = self.speaker_project_mel(self.embedded_speakers).squeeze(1)
-
def compute_gst(self, inputs, style_input, speaker_embedding=None):
"""Compute global style token"""
if isinstance(style_input, dict):
diff --git a/TTS/tts/models/base_tts.py b/TTS/tts/models/base_tts.py
index 06c7cb2b..b77c1e23 100644
--- a/TTS/tts/models/base_tts.py
+++ b/TTS/tts/models/base_tts.py
@@ -1,4 +1,5 @@
import os
+import random
from typing import Dict, List, Tuple
import torch
@@ -9,20 +10,20 @@ from torch.utils.data import DataLoader
from torch.utils.data.distributed import DistributedSampler
from TTS.model import BaseModel
-from TTS.tts.datasets import TTSDataset
+from TTS.tts.configs.shared_configs import CharactersConfig
+from TTS.tts.datasets.dataset import TTSDataset
from TTS.tts.utils.speakers import SpeakerManager, get_speaker_manager
from TTS.tts.utils.synthesis import synthesis
from TTS.tts.utils.text import make_symbols
from TTS.tts.utils.visual import plot_alignment, plot_spectrogram
-from TTS.utils.audio import AudioProcessor
# pylint: skip-file
class BaseTTS(BaseModel):
- """Abstract `tts` class. Every new `tts` model must inherit this.
+ """Base `tts` class. Every new `tts` model must inherit this.
- It defines `tts` specific functions on top of `Model`.
+ It defines common `tts` specific functions on top of `Model` implementation.
Notes on input/output tensor shapes:
Any input or output tensor of the model must be shaped as
@@ -32,6 +33,30 @@ class BaseTTS(BaseModel):
- 1D tensors `batch x 1`
"""
+ def _set_model_args(self, config: Coqpit):
+ """Setup model args based on the config type.
+
+ If the config is for training with a name like "*Config", then the model args are embeded in the
+ config.model_args
+
+ If the config is for the model with a name like "*Args", then we assign the directly.
+ """
+ # don't use isintance not to import recursively
+ if "Config" in config.__class__.__name__:
+ if "characters" in config:
+ _, self.config, num_chars = self.get_characters(config)
+ self.config.num_chars = num_chars
+ if hasattr(self.config, "model_args"):
+ config.model_args.num_chars = num_chars
+ self.args = self.config.model_args
+ else:
+ self.config = config
+ self.args = config.model_args
+ elif "Args" in config.__class__.__name__:
+ self.args = config
+ else:
+ raise ValueError("config must be either a *Config or *Args")
+
@staticmethod
def get_characters(config: Coqpit) -> str:
# TODO: implement CharacterProcessor
@@ -40,7 +65,7 @@ class BaseTTS(BaseModel):
else:
from TTS.tts.utils.text.symbols import parse_symbols, phonemes, symbols
- config.characters = parse_symbols()
+ config.characters = CharactersConfig(**parse_symbols())
model_characters = phonemes if config.use_phonemes else symbols
num_chars = len(model_characters) + getattr(config, "add_blank", False)
return model_characters, config, num_chars
@@ -48,35 +73,18 @@ class BaseTTS(BaseModel):
def get_speaker_manager(config: Coqpit, restore_path: str, data: List, out_path: str = None) -> SpeakerManager:
return get_speaker_manager(config, restore_path, data, out_path)
- def init_multispeaker(self, config: Coqpit, data: List = None):
- """Initialize a speaker embedding layer if needen and define expected embedding channel size for defining
- `in_channels` size of the connected layers.
-
- This implementation yields 3 possible outcomes:
-
- 1. If `config.use_speaker_embedding` and `config.use_d_vector_file are False, do nothing.
- 2. If `config.use_d_vector_file` is True, set expected embedding channel size to `config.d_vector_dim` or 512.
- 3. If `config.use_speaker_embedding`, initialize a speaker embedding layer with channel size of
- `config.d_vector_dim` or 512.
-
- You can override this function for new models.0
+ def init_multispeaker(self, config: Coqpit):
+ """Init speaker embedding layer if `use_speaker_embedding` is True and set the expected speaker embedding
+ vector dimension in the network. If model uses d-vectors, then it only sets the expected dimension.
Args:
config (Coqpit): Model configuration.
- data (List, optional): Dataset items to infer number of speakers. Defaults to None.
"""
- # init speaker manager
- self.speaker_manager = get_speaker_manager(config, data=data)
-
- # set number of speakers - if num_speakers is set in config, use it, otherwise use speaker_manager
- if data is not None or self.speaker_manager.speaker_ids:
+ # set number of speakers
+ if self.speaker_manager is not None:
self.num_speakers = self.speaker_manager.num_speakers
- else:
- self.num_speakers = (
- config.num_speakers
- if "num_speakers" in config and config.num_speakers != 0
- else self.speaker_manager.num_speakers
- )
+ elif hasattr(config, "num_speakers"):
+ self.num_speakers = config.num_speakers
# set ultimate speaker embedding size
if config.use_speaker_embedding or config.use_d_vector_file:
@@ -85,13 +93,10 @@ class BaseTTS(BaseModel):
)
# init speaker embedding layer
if config.use_speaker_embedding and not config.use_d_vector_file:
+ print(" > Init speaker_embedding layer.")
self.speaker_embedding = nn.Embedding(self.num_speakers, self.embedded_speaker_dim)
self.speaker_embedding.weight.data.normal_(0, 0.3)
- def get_aux_input(self, **kwargs) -> Dict:
- """Prepare and return `aux_input` used by `forward()`"""
- return {"speaker_id": None, "style_wav": None, "d_vector": None}
-
def format_batch(self, batch: Dict) -> Dict:
"""Generic batch formatting for `TTSDataset`.
@@ -169,7 +174,7 @@ class BaseTTS(BaseModel):
def get_data_loader(
self,
config: Coqpit,
- ap: AudioProcessor,
+ assets: Dict,
is_eval: bool,
data_items: List,
verbose: bool,
@@ -179,14 +184,12 @@ class BaseTTS(BaseModel):
if is_eval and not config.run_eval:
loader = None
else:
+ ap = assets["audio_processor"]
+
# setup multi-speaker attributes
- if hasattr(self, "speaker_manager"):
+ if hasattr(self, "speaker_manager") and self.speaker_manager is not None:
speaker_id_mapping = self.speaker_manager.speaker_ids if config.use_speaker_embedding else None
- d_vector_mapping = (
- self.speaker_manager.d_vectors
- if config.use_speaker_embedding and config.use_d_vector_file
- else None
- )
+ d_vector_mapping = self.speaker_manager.d_vectors if config.use_d_vector_file else None
else:
speaker_id_mapping = None
d_vector_mapping = None
@@ -219,9 +222,7 @@ class BaseTTS(BaseModel):
use_noise_augment=not is_eval,
verbose=verbose,
speaker_id_mapping=speaker_id_mapping,
- d_vector_mapping=d_vector_mapping
- if config.use_speaker_embedding and config.use_d_vector_file
- else None,
+ d_vector_mapping=d_vector_mapping if config.use_d_vector_file else None,
)
# pre-compute phonemes
@@ -280,19 +281,41 @@ class BaseTTS(BaseModel):
)
return loader
- def test_run(self, ap) -> Tuple[Dict, Dict]:
+ def _get_test_aux_input(
+ self,
+ ) -> Dict:
+
+ d_vector = None
+ if self.config.use_d_vector_file:
+ d_vector = [self.speaker_manager.d_vectors[name]["embedding"] for name in self.speaker_manager.d_vectors]
+ d_vector = (random.sample(sorted(d_vector), 1),)
+
+ aux_inputs = {
+ "speaker_id": None
+ if not self.config.use_speaker_embedding
+ else random.sample(sorted(self.speaker_manager.speaker_ids.values()), 1),
+ "d_vector": d_vector,
+ "style_wav": None, # TODO: handle GST style input
+ }
+ return aux_inputs
+
+ def test_run(self, assets: Dict) -> Tuple[Dict, Dict]:
"""Generic test run for `tts` models used by `Trainer`.
You can override this for a different behaviour.
+ Args:
+ assets (dict): A dict of training assets. For `tts` models, it must include `{'audio_processor': ap}`.
+
Returns:
Tuple[Dict, Dict]: Test figures and audios to be projected to Tensorboard.
"""
+ ap = assets["audio_processor"]
print(" | > Synthesizing test sentences.")
test_audios = {}
test_figures = {}
test_sentences = self.config.test_sentences
- aux_inputs = self.get_aux_input()
+ aux_inputs = self._get_test_aux_input()
for idx, sen in enumerate(test_sentences):
outputs_dict = synthesis(
self,
@@ -315,3 +338,17 @@ class BaseTTS(BaseModel):
outputs_dict["outputs"]["alignments"], output_fig=False
)
return test_figures, test_audios
+
+ def on_init_start(self, trainer):
+ """Save the speaker.json at the beginning of the training. And update the config.json with the
+ speakers.json file path."""
+ if self.speaker_manager is not None:
+ output_path = os.path.join(trainer.output_path, "speakers.json")
+ self.speaker_manager.save_speaker_ids_to_file(output_path)
+ trainer.config.speakers_file = output_path
+ # some models don't have `model_args` set
+ if hasattr(trainer.config, "model_args"):
+ trainer.config.model_args.speakers_file = output_path
+ trainer.config.save_json(os.path.join(trainer.output_path, "config.json"))
+ print(f" > `speakers.json` is saved to {output_path}.")
+ print(" > `speakers_file` is updated in the config.json.")
diff --git a/TTS/tts/models/forward_tts.py b/TTS/tts/models/forward_tts.py
index 9dce36fa..b2c41df5 100644
--- a/TTS/tts/models/forward_tts.py
+++ b/TTS/tts/models/forward_tts.py
@@ -13,8 +13,8 @@ from TTS.tts.layers.generic.pos_encoding import PositionalEncoding
from TTS.tts.layers.glow_tts.duration_predictor import DurationPredictor
from TTS.tts.models.base_tts import BaseTTS
from TTS.tts.utils.helpers import average_over_durations, generate_path, maximum_path, sequence_mask
+from TTS.tts.utils.speakers import SpeakerManager
from TTS.tts.utils.visual import plot_alignment, plot_pitch, plot_spectrogram
-from TTS.utils.audio import AudioProcessor
@dataclass
@@ -32,9 +32,6 @@ class ForwardTTSArgs(Coqpit):
hidden_channels (int):
Number of base hidden channels of the model. Defaults to 512.
- num_speakers (int):
- Number of speakers for the speaker embedding layer. Defaults to 0.
-
use_aligner (bool):
Whether to use aligner network to learn the text to speech alignment or use pre-computed durations.
If set False, durations should be computed by `TTS/bin/compute_attention_masks.py` and path to the
@@ -87,12 +84,6 @@ class ForwardTTSArgs(Coqpit):
decoder_params (str):
Parameters of the decoder module. Defaults to ```{"hidden_channels_ffn": 1024, "num_heads": 1, "num_layers": 6, "dropout_p": 0.1}```
- use_d_vetor (bool):
- Whether to use precomputed d-vectors for multi-speaker training. Defaults to False.
-
- d_vector_dim (int):
- Number of channels of the d-vectors. Defaults to 0.
-
detach_duration_predictor (bool):
Detach the input to the duration predictor from the earlier computation graph so that the duraiton loss
does not pass to the earlier layers. Defaults to True.
@@ -100,12 +91,26 @@ class ForwardTTSArgs(Coqpit):
max_duration (int):
Maximum duration accepted by the model. Defaults to 75.
+ num_speakers (int):
+ Number of speakers for the speaker embedding layer. Defaults to 0.
+
+ speakers_file (str):
+ Path to the speaker mapping file for the Speaker Manager. Defaults to None.
+
+ speaker_embedding_channels (int):
+ Number of speaker embedding channels. Defaults to 256.
+
+ use_d_vector_file (bool):
+ Enable/Disable the use of d-vectors for multi-speaker training. Defaults to False.
+
+ d_vector_dim (int):
+ Number of d-vector channels. Defaults to 0.
+
"""
num_chars: int = None
out_channels: int = 80
hidden_channels: int = 384
- num_speakers: int = 0
use_aligner: bool = True
use_pitch: bool = True
pitch_predictor_hidden_channels: int = 256
@@ -126,10 +131,14 @@ class ForwardTTSArgs(Coqpit):
decoder_params: dict = field(
default_factory=lambda: {"hidden_channels_ffn": 1024, "num_heads": 1, "num_layers": 6, "dropout_p": 0.1}
)
- use_d_vector: bool = False
- d_vector_dim: int = 0
detach_duration_predictor: bool = False
max_duration: int = 75
+ num_speakers: int = 1
+ use_speaker_embedding: bool = False
+ speakers_file: str = None
+ use_d_vector_file: bool = False
+ d_vector_dim: int = None
+ d_vector_file: str = None
class ForwardTTS(BaseTTS):
@@ -151,6 +160,8 @@ class ForwardTTS(BaseTTS):
Args:
config (Coqpit): Model coqpit class.
+ speaker_manager (SpeakerManager): Speaker manager for multi-speaker training. Only used for multi-speaker models.
+ Defaults to None.
Examples:
>>> from TTS.tts.models.fast_pitch import ForwardTTS, ForwardTTSArgs
@@ -159,26 +170,12 @@ class ForwardTTS(BaseTTS):
"""
# pylint: disable=dangerous-default-value
- def __init__(self, config: Coqpit):
+ def __init__(self, config: Coqpit, speaker_manager: SpeakerManager = None):
- super().__init__()
+ super().__init__(config)
- # don't use isintance not to import recursively
- if "Config" in config.__class__.__name__:
- if "characters" in config:
- # loading from FasrPitchConfig
- _, self.config, num_chars = self.get_characters(config)
- config.model_args.num_chars = num_chars
- self.args = self.config.model_args
- else:
- # loading from ForwardTTSArgs
- self.config = config
- self.args = config.model_args
- elif isinstance(config, ForwardTTSArgs):
- self.args = config
- self.config = config
- else:
- raise ValueError("config must be either a *Config or ForwardTTSArgs")
+ self.speaker_manager = speaker_manager
+ self.init_multispeaker(config)
self.max_duration = self.args.max_duration
self.use_aligner = self.args.use_aligner
@@ -196,7 +193,7 @@ class ForwardTTS(BaseTTS):
self.args.hidden_channels,
self.args.encoder_type,
self.args.encoder_params,
- self.args.d_vector_dim,
+ self.embedded_speaker_dim,
)
if self.args.positional_encoding:
@@ -210,7 +207,7 @@ class ForwardTTS(BaseTTS):
)
self.duration_predictor = DurationPredictor(
- self.args.hidden_channels + self.args.d_vector_dim,
+ self.args.hidden_channels + self.embedded_speaker_dim,
self.args.duration_predictor_hidden_channels,
self.args.duration_predictor_kernel_size,
self.args.duration_predictor_dropout_p,
@@ -218,7 +215,7 @@ class ForwardTTS(BaseTTS):
if self.args.use_pitch:
self.pitch_predictor = DurationPredictor(
- self.args.hidden_channels + self.args.d_vector_dim,
+ self.args.hidden_channels + self.embedded_speaker_dim,
self.args.pitch_predictor_hidden_channels,
self.args.pitch_predictor_kernel_size,
self.args.pitch_predictor_dropout_p,
@@ -230,19 +227,37 @@ class ForwardTTS(BaseTTS):
padding=int((self.args.pitch_embedding_kernel_size - 1) / 2),
)
- if self.args.num_speakers > 1 and not self.args.use_d_vector:
- # speaker embedding layer
- self.emb_g = nn.Embedding(self.args.num_speakers, self.args.d_vector_dim)
- nn.init.uniform_(self.emb_g.weight, -0.1, 0.1)
-
- if self.args.d_vector_dim > 0 and self.args.d_vector_dim != self.args.hidden_channels:
- self.proj_g = nn.Conv1d(self.args.d_vector_dim, self.args.hidden_channels, 1)
-
if self.args.use_aligner:
self.aligner = AlignmentNetwork(
in_query_channels=self.args.out_channels, in_key_channels=self.args.hidden_channels
)
+ def init_multispeaker(self, config: Coqpit):
+ """Init for multi-speaker training.
+
+ Args:
+ config (Coqpit): Model configuration.
+ """
+ self.embedded_speaker_dim = 0
+ # init speaker manager
+ if self.speaker_manager is None and (config.use_d_vector_file or config.use_speaker_embedding):
+ raise ValueError(
+ " > SpeakerManager is not provided. You must provide the SpeakerManager before initializing a multi-speaker model."
+ )
+ # set number of speakers
+ if self.speaker_manager is not None:
+ self.num_speakers = self.speaker_manager.num_speakers
+ # init d-vector embedding
+ if config.use_d_vector_file:
+ self.embedded_speaker_dim = config.d_vector_dim
+ if self.args.d_vector_dim != self.args.hidden_channels:
+ self.proj_g = nn.Conv1d(self.args.d_vector_dim, self.args.hidden_channels, 1)
+ # init speaker embedding layer
+ if config.use_speaker_embedding and not config.use_d_vector_file:
+ print(" > Init speaker_embedding layer.")
+ self.emb_g = nn.Embedding(self.args.num_speakers, self.args.hidden_channels)
+ nn.init.uniform_(self.emb_g.weight, -0.1, 0.1)
+
@staticmethod
def generate_attn(dr, x_mask, y_mask=None):
"""Generate an attention mask from the durations.
@@ -307,18 +322,6 @@ class ForwardTTS(BaseTTS):
o_dr = torch.round(o_dr)
return o_dr
- @staticmethod
- def _concat_speaker_embedding(o_en, g):
- g_exp = g.expand(-1, -1, o_en.size(-1)) # [B, C, T_en]
- o_en = torch.cat([o_en, g_exp], 1)
- return o_en
-
- def _sum_speaker_embedding(self, x, g):
- # project g to decoder dim.
- if hasattr(self, "proj_g"):
- g = self.proj_g(g)
- return x + g
-
def _forward_encoder(
self, x: torch.LongTensor, x_mask: torch.FloatTensor, g: torch.FloatTensor = None
) -> Tuple[torch.FloatTensor, torch.FloatTensor, torch.FloatTensor, torch.FloatTensor, torch.FloatTensor]:
@@ -327,7 +330,7 @@ class ForwardTTS(BaseTTS):
1. Embed speaker IDs if multi-speaker mode.
2. Embed character sequences.
3. Run the encoder network.
- 4. Concat speaker embedding to the encoder output for the duration predictor.
+ 4. Sum encoder outputs and speaker embeddings
Args:
x (torch.LongTensor): Input sequence IDs.
@@ -345,19 +348,18 @@ class ForwardTTS(BaseTTS):
- g: :math:`(B, C)`
"""
if hasattr(self, "emb_g"):
- g = nn.functional.normalize(self.emb_g(g)) # [B, C, 1]
+ g = self.emb_g(g) # [B, C, 1]
if g is not None:
g = g.unsqueeze(-1)
# [B, T, C]
x_emb = self.emb(x)
# encoder pass
o_en = self.encoder(torch.transpose(x_emb, 1, -1), x_mask)
- # speaker conditioning for duration predictor
+ # speaker conditioning
+ # TODO: try different ways of conditioning
if g is not None:
- o_en_dp = self._concat_speaker_embedding(o_en, g)
- else:
- o_en_dp = o_en
- return o_en, o_en_dp, x_mask, g, x_emb
+ o_en = o_en + g
+ return o_en, x_mask, g, x_emb
def _forward_decoder(
self,
@@ -391,9 +393,6 @@ class ForwardTTS(BaseTTS):
# positional encoding
if hasattr(self, "pos_encoder"):
o_en_ex = self.pos_encoder(o_en_ex, y_mask)
- # speaker embedding
- if g is not None:
- o_en_ex = self._sum_speaker_embedding(o_en_ex, g)
# decoder pass
o_de = self.decoder(o_en_ex, y_mask, g=g)
return o_de.transpose(1, 2), attn.transpose(1, 2)
@@ -475,6 +474,19 @@ class ForwardTTS(BaseTTS):
alignment_soft = alignment_soft.squeeze(1).transpose(1, 2)
return o_alignment_dur, alignment_soft, alignment_logprob, alignment_mas
+ def _set_speaker_input(self, aux_input: Dict):
+ d_vectors = aux_input.get("d_vectors", None)
+ speaker_ids = aux_input.get("speaker_ids", None)
+
+ if d_vectors is not None and speaker_ids is not None:
+ raise ValueError("[!] Cannot use d-vectors and speaker-ids together.")
+
+ if speaker_ids is not None and not hasattr(self, "emb_g"):
+ raise ValueError("[!] Cannot use speaker-ids without enabling speaker embedding.")
+
+ g = speaker_ids if speaker_ids is not None else d_vectors
+ return g
+
def forward(
self,
x: torch.LongTensor,
@@ -505,17 +517,17 @@ class ForwardTTS(BaseTTS):
- g: :math:`[B, C]`
- pitch: :math:`[B, 1, T]`
"""
- g = aux_input["d_vectors"] if "d_vectors" in aux_input else None
+ g = self._set_speaker_input(aux_input)
# compute sequence masks
y_mask = torch.unsqueeze(sequence_mask(y_lengths, None), 1).float()
x_mask = torch.unsqueeze(sequence_mask(x_lengths, x.shape[1]), 1).float()
# encoder pass
- o_en, o_en_dp, x_mask, g, x_emb = self._forward_encoder(x, x_mask, g)
+ o_en, x_mask, g, x_emb = self._forward_encoder(x, x_mask, g)
# duration predictor pass
if self.args.detach_duration_predictor:
- o_dr_log = self.duration_predictor(o_en_dp.detach(), x_mask)
+ o_dr_log = self.duration_predictor(o_en.detach(), x_mask)
else:
- o_dr_log = self.duration_predictor(o_en_dp, x_mask)
+ o_dr_log = self.duration_predictor(o_en, x_mask)
o_dr = torch.clamp(torch.exp(o_dr_log) - 1, 0, self.max_duration)
# generate attn mask from predicted durations
o_attn = self.generate_attn(o_dr.squeeze(1), x_mask)
@@ -535,10 +547,12 @@ class ForwardTTS(BaseTTS):
o_pitch = None
avg_pitch = None
if self.args.use_pitch:
- o_pitch_emb, o_pitch, avg_pitch = self._forward_pitch_predictor(o_en_dp, x_mask, pitch, dr)
+ o_pitch_emb, o_pitch, avg_pitch = self._forward_pitch_predictor(o_en, x_mask, pitch, dr)
o_en = o_en + o_pitch_emb
# decoder pass
- o_de, attn = self._forward_decoder(o_en, dr, x_mask, y_lengths, g=g)
+ o_de, attn = self._forward_decoder(
+ o_en, dr, x_mask, y_lengths, g=None
+ ) # TODO: maybe pass speaker embedding (g) too
outputs = {
"model_outputs": o_de, # [B, T, C]
"durations_log": o_dr_log.squeeze(1), # [B, T]
@@ -569,22 +583,22 @@ class ForwardTTS(BaseTTS):
- x_lengths: [B]
- g: [B, C]
"""
- g = aux_input["d_vectors"] if "d_vectors" in aux_input else None
+ g = self._set_speaker_input(aux_input)
x_lengths = torch.tensor(x.shape[1:2]).to(x.device)
x_mask = torch.unsqueeze(sequence_mask(x_lengths, x.shape[1]), 1).to(x.dtype).float()
# encoder pass
- o_en, o_en_dp, x_mask, g, _ = self._forward_encoder(x, x_mask, g)
+ o_en, x_mask, g, _ = self._forward_encoder(x, x_mask, g)
# duration predictor pass
- o_dr_log = self.duration_predictor(o_en_dp, x_mask)
+ o_dr_log = self.duration_predictor(o_en, x_mask)
o_dr = self.format_durations(o_dr_log, x_mask).squeeze(1)
y_lengths = o_dr.sum(1)
# pitch predictor pass
o_pitch = None
if self.args.use_pitch:
- o_pitch_emb, o_pitch = self._forward_pitch_predictor(o_en_dp, x_mask)
+ o_pitch_emb, o_pitch = self._forward_pitch_predictor(o_en, x_mask)
o_en = o_en + o_pitch_emb
# decoder pass
- o_de, attn = self._forward_decoder(o_en, o_dr, x_mask, y_lengths, g=g)
+ o_de, attn = self._forward_decoder(o_en, o_dr, x_mask, y_lengths, g=None)
outputs = {
"model_outputs": o_de,
"alignments": attn,
@@ -634,7 +648,8 @@ class ForwardTTS(BaseTTS):
return outputs, loss_dict
- def train_log(self, ap: AudioProcessor, batch: dict, outputs: dict): # pylint: disable=no-self-use
+ def _create_logs(self, batch, outputs, ap):
+ """Create common logger outputs."""
model_outputs = outputs["model_outputs"]
alignments = outputs["alignments"]
mel_input = batch["mel_input"]
@@ -674,11 +689,22 @@ class ForwardTTS(BaseTTS):
train_audio = ap.inv_melspectrogram(pred_spec.T)
return figures, {"audio": train_audio}
+ def train_log(
+ self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int
+ ) -> None: # pylint: disable=no-self-use
+ ap = assets["audio_processor"]
+ figures, audios = self._create_logs(batch, outputs, ap)
+ logger.train_figures(steps, figures)
+ logger.train_audios(steps, audios, ap.sample_rate)
+
def eval_step(self, batch: dict, criterion: nn.Module):
return self.train_step(batch, criterion)
- def eval_log(self, ap: AudioProcessor, batch: dict, outputs: dict):
- return self.train_log(ap, batch, outputs)
+ def eval_log(self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int) -> None:
+ ap = assets["audio_processor"]
+ figures, audios = self._create_logs(batch, outputs, ap)
+ logger.eval_figures(steps, figures)
+ logger.eval_audios(steps, audios, ap.sample_rate)
def load_checkpoint(
self, config, checkpoint_path, eval=False
diff --git a/TTS/tts/models/glow_tts.py b/TTS/tts/models/glow_tts.py
index 2e94659e..c1e4c2ac 100644
--- a/TTS/tts/models/glow_tts.py
+++ b/TTS/tts/models/glow_tts.py
@@ -1,19 +1,20 @@
import math
+from typing import Dict, Tuple, Union
import torch
+from coqpit import Coqpit
from torch import nn
from torch.cuda.amp.autocast_mode import autocast
from torch.nn import functional as F
-from TTS.tts.configs import GlowTTSConfig
+from TTS.tts.configs.glow_tts_config import GlowTTSConfig
from TTS.tts.layers.glow_tts.decoder import Decoder
from TTS.tts.layers.glow_tts.encoder import Encoder
from TTS.tts.models.base_tts import BaseTTS
from TTS.tts.utils.helpers import generate_path, maximum_path, sequence_mask
-from TTS.tts.utils.speakers import get_speaker_manager
+from TTS.tts.utils.speakers import SpeakerManager
from TTS.tts.utils.synthesis import synthesis
from TTS.tts.utils.visual import plot_alignment, plot_spectrogram
-from TTS.utils.audio import AudioProcessor
from TTS.utils.io import load_fsspec
@@ -38,16 +39,18 @@ class GlowTTS(BaseTTS):
Check :class:`TTS.tts.configs.glow_tts_config.GlowTTSConfig` for class arguments.
Examples:
- >>> from TTS.tts.configs import GlowTTSConfig
+ >>> from TTS.tts.configs.glow_tts_config import GlowTTSConfig
>>> from TTS.tts.models.glow_tts import GlowTTS
>>> config = GlowTTSConfig()
>>> model = GlowTTS(config)
"""
- def __init__(self, config: GlowTTSConfig):
+ def __init__(self, config: GlowTTSConfig, speaker_manager: SpeakerManager = None):
- super().__init__()
+ super().__init__(config)
+
+ self.speaker_manager = speaker_manager
# pass all config fields to `self`
# for fewer code change
@@ -58,19 +61,10 @@ class GlowTTS(BaseTTS):
_, self.config, self.num_chars = self.get_characters(config)
self.decoder_output_dim = config.out_channels
+ # init multi-speaker layers if necessary
self.init_multispeaker(config)
- # if is a multispeaker and c_in_channels is 0, set to 256
- self.c_in_channels = 0
- if self.num_speakers > 1:
- if self.d_vector_dim:
- self.c_in_channels = self.d_vector_dim
- elif self.c_in_channels == 0 and not self.d_vector_dim:
- # TODO: make this adjustable
- self.c_in_channels = 256
-
self.run_data_dep_init = config.data_dep_init_steps > 0
-
self.encoder = Encoder(
self.num_chars,
out_channels=self.out_channels,
@@ -98,28 +92,35 @@ class GlowTTS(BaseTTS):
c_in_channels=self.c_in_channels,
)
- def init_multispeaker(self, config: "Coqpit", data: list = None) -> None:
- """Initialize multi-speaker modules of a model. A model can be trained either with a speaker embedding layer
- or with external `d_vectors` computed from a speaker encoder model.
-
- If you need a different behaviour, override this function for your model.
+ def init_multispeaker(self, config: Coqpit):
+ """Init speaker embedding layer if `use_speaker_embedding` is True and set the expected speaker embedding
+ vector dimension in the network. If model uses d-vectors, then it only sets the expected dimension.
Args:
config (Coqpit): Model configuration.
- data (List, optional): Dataset items to infer number of speakers. Defaults to None.
"""
+ self.embedded_speaker_dim = 0
# init speaker manager
- self.speaker_manager = get_speaker_manager(config, data=data)
- self.num_speakers = self.speaker_manager.num_speakers
- if config.use_d_vector_file:
- self.external_d_vector_dim = config.d_vector_dim
- else:
- self.external_d_vector_dim = 0
+ if self.speaker_manager is None and (self.use_speaker_embedding or self.use_d_vector_file):
+ raise ValueError(
+ " > SpeakerManager is not provided. You must provide the SpeakerManager before initializing a multi-speaker model."
+ )
+ # set number of speakers - if num_speakers is set in config, use it, otherwise use speaker_manager
+ if self.speaker_manager is not None:
+ self.num_speakers = self.speaker_manager.num_speakers
+ # set ultimate speaker embedding size
+ if config.use_speaker_embedding or config.use_d_vector_file:
+ self.embedded_speaker_dim = (
+ config.d_vector_dim if "d_vector_dim" in config and config.d_vector_dim is not None else 512
+ )
# init speaker embedding layer
if config.use_speaker_embedding and not config.use_d_vector_file:
- self.embedded_speaker_dim = self.c_in_channels
- self.emb_g = nn.Embedding(self.num_speakers, self.embedded_speaker_dim)
+ print(" > Init speaker_embedding layer.")
+ self.embedded_speaker_dim = self.hidden_channels_enc
+ self.emb_g = nn.Embedding(self.num_speakers, self.hidden_channels_enc)
nn.init.uniform_(self.emb_g.weight, -0.1, 0.1)
+ # set conditioning dimensions
+ self.c_in_channels = self.embedded_speaker_dim
@staticmethod
def compute_outputs(attn, o_mean, o_log_scale, x_mask):
@@ -146,6 +147,35 @@ class GlowTTS(BaseTTS):
if getattr(f, "set_ddi", False):
f.set_ddi(False)
+ def _set_speaker_input(self, aux_input: Dict):
+ if aux_input is None:
+ d_vectors = None
+ speaker_ids = None
+ else:
+ d_vectors = aux_input.get("d_vectors", None)
+ speaker_ids = aux_input.get("speaker_ids", None)
+
+ if d_vectors is not None and speaker_ids is not None:
+ raise ValueError("[!] Cannot use d-vectors and speaker-ids together.")
+
+ if speaker_ids is not None and not hasattr(self, "emb_g"):
+ raise ValueError("[!] Cannot use speaker-ids without enabling speaker embedding.")
+
+ g = speaker_ids if speaker_ids is not None else d_vectors
+ return g
+
+ def _speaker_embedding(self, aux_input: Dict) -> Union[torch.tensor, None]:
+ g = self._set_speaker_input(aux_input)
+ # speaker embedding
+ if g is not None:
+ if hasattr(self, "emb_g"):
+ # use speaker embedding layer
+ g = F.normalize(self.emb_g(g)).unsqueeze(-1) # [b, h, 1]
+ else:
+ # use d-vector
+ g = F.normalize(g).unsqueeze(-1) # [b, h, 1]
+ return g
+
def forward(
self, x, x_lengths, y, y_lengths=None, aux_input={"d_vectors": None, "speaker_ids": None}
): # pylint: disable=dangerous-default-value
@@ -161,12 +191,7 @@ class GlowTTS(BaseTTS):
y = y.transpose(1, 2)
y_max_length = y.size(2)
# norm speaker embeddings
- g = aux_input["d_vectors"] if aux_input is not None and "d_vectors" in aux_input else None
- if self.use_speaker_embedding or self.use_d_vector_file:
- if not self.use_d_vector_file:
- g = F.normalize(g).unsqueeze(-1)
- else:
- g = F.normalize(self.emb_g(g)).unsqueeze(-1) # [b, h, 1]
+ g = self._speaker_embedding(aux_input)
# embedding pass
o_mean, o_log_scale, o_dur_log, x_mask = self.encoder(x, x_lengths, g=g)
# drop redisual frames wrt num_squeeze and set y_lengths.
@@ -217,12 +242,7 @@ class GlowTTS(BaseTTS):
y = y.transpose(1, 2)
y_max_length = y.size(2)
# norm speaker embeddings
- g = aux_input["d_vectors"] if aux_input is not None and "d_vectors" in aux_input else None
- if self.use_speaker_embedding or self.use_d_vector_file:
- if not self.use_d_vector_file:
- g = F.normalize(g).unsqueeze(-1)
- else:
- g = F.normalize(self.emb_g(g)).unsqueeze(-1) # [b, h, 1]
+ g = self._speaker_embedding(aux_input)
# embedding pass
o_mean, o_log_scale, o_dur_log, x_mask = self.encoder(x, x_lengths, g=g)
# drop redisual frames wrt num_squeeze and set y_lengths.
@@ -272,22 +292,12 @@ class GlowTTS(BaseTTS):
"""
y = y.transpose(1, 2)
y_max_length = y.size(2)
- g = aux_input["d_vectors"] if aux_input is not None and "d_vectors" in aux_input else None
- # norm speaker embeddings
- if g is not None:
- if self.external_d_vector_dim:
- g = F.normalize(g).unsqueeze(-1)
- else:
- g = F.normalize(self.emb_g(g)).unsqueeze(-1) # [b, h, 1]
-
+ g = self._speaker_embedding(aux_input)
y_mask = torch.unsqueeze(sequence_mask(y_lengths, y_max_length), 1).to(y.dtype)
-
# decoder pass
z, logdet = self.decoder(y, y_mask, g=g, reverse=False)
-
# reverse decoder and predict
y, logdet = self.decoder(z, y_mask, g=g, reverse=True)
-
outputs = {}
outputs["model_outputs"] = y.transpose(1, 2)
outputs["logdet"] = logdet
@@ -298,19 +308,12 @@ class GlowTTS(BaseTTS):
self, x, aux_input={"x_lengths": None, "d_vectors": None, "speaker_ids": None}
): # pylint: disable=dangerous-default-value
x_lengths = aux_input["x_lengths"]
- g = aux_input["d_vectors"] if aux_input is not None and "d_vectors" in aux_input else None
-
- if g is not None:
- if self.d_vector_dim:
- g = F.normalize(g).unsqueeze(-1)
- else:
- g = F.normalize(self.emb_g(g)).unsqueeze(-1) # [b, h]
-
+ g = self._speaker_embedding(aux_input)
# embedding pass
o_mean, o_log_scale, o_dur_log, x_mask = self.encoder(x, x_lengths, g=g)
# compute output durations
w = (torch.exp(o_dur_log) - 1) * x_mask * self.length_scale
- w_ceil = torch.ceil(w)
+ w_ceil = torch.clamp_min(torch.ceil(w), 1)
y_lengths = torch.clamp_min(torch.sum(w_ceil, [1, 2]), 1).long()
y_max_length = None
# compute masks
@@ -387,17 +390,17 @@ class GlowTTS(BaseTTS):
)
return outputs, loss_dict
- def train_log(self, ap: AudioProcessor, batch: dict, outputs: dict): # pylint: disable=no-self-use
+ def _create_logs(self, batch, outputs, ap):
alignments = outputs["alignments"]
- text_input = batch["text_input"]
+ text_input = batch["text_input"][:1] if batch["text_input"] is not None else None
text_lengths = batch["text_lengths"]
mel_input = batch["mel_input"]
- d_vectors = batch["d_vectors"]
- speaker_ids = batch["speaker_ids"]
+ d_vectors = batch["d_vectors"][:1] if batch["d_vectors"] is not None else None
+ speaker_ids = batch["speaker_ids"][:1] if batch["speaker_ids"] is not None else None
# model runs reverse flow to predict spectrograms
pred_outputs = self.inference(
- text_input[:1],
+ text_input,
aux_input={"x_lengths": text_lengths[:1], "d_vectors": d_vectors, "speaker_ids": speaker_ids},
)
model_outputs = pred_outputs["model_outputs"]
@@ -416,15 +419,26 @@ class GlowTTS(BaseTTS):
train_audio = ap.inv_melspectrogram(pred_spec.T)
return figures, {"audio": train_audio}
+ def train_log(
+ self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int
+ ) -> None: # pylint: disable=no-self-use
+ ap = assets["audio_processor"]
+ figures, audios = self._create_logs(batch, outputs, ap)
+ logger.train_figures(steps, figures)
+ logger.train_audios(steps, audios, ap.sample_rate)
+
@torch.no_grad()
def eval_step(self, batch: dict, criterion: nn.Module):
return self.train_step(batch, criterion)
- def eval_log(self, ap: AudioProcessor, batch: dict, outputs: dict):
- return self.train_log(ap, batch, outputs)
+ def eval_log(self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int) -> None:
+ ap = assets["audio_processor"]
+ figures, audios = self._create_logs(batch, outputs, ap)
+ logger.eval_figures(steps, figures)
+ logger.eval_audios(steps, audios, ap.sample_rate)
@torch.no_grad()
- def test_run(self, ap):
+ def test_run(self, assets: Dict) -> Tuple[Dict, Dict]:
"""Generic test run for `tts` models used by `Trainer`.
You can override this for a different behaviour.
@@ -432,11 +446,12 @@ class GlowTTS(BaseTTS):
Returns:
Tuple[Dict, Dict]: Test figures and audios to be projected to Tensorboard.
"""
+ ap = assets["audio_processor"]
print(" | > Synthesizing test sentences.")
test_audios = {}
test_figures = {}
test_sentences = self.config.test_sentences
- aux_inputs = self.get_aux_input()
+ aux_inputs = self._get_test_aux_input()
if len(test_sentences) == 0:
print(" | [!] No test sentences provided.")
else:
diff --git a/TTS/tts/models/tacotron.py b/TTS/tts/models/tacotron.py
index 84a256d5..4e46d252 100644
--- a/TTS/tts/models/tacotron.py
+++ b/TTS/tts/models/tacotron.py
@@ -1,29 +1,34 @@
# coding: utf-8
-from typing import Dict, Tuple
-
import torch
from coqpit import Coqpit
from torch import nn
+from torch.cuda.amp.autocast_mode import autocast
from TTS.tts.layers.tacotron.gst_layers import GST
from TTS.tts.layers.tacotron.tacotron import Decoder, Encoder, PostCBHG
from TTS.tts.models.base_tacotron import BaseTacotron
from TTS.tts.utils.measures import alignment_diagonal_score
+from TTS.tts.utils.speakers import SpeakerManager
from TTS.tts.utils.visual import plot_alignment, plot_spectrogram
-from TTS.utils.audio import AudioProcessor
class Tacotron(BaseTacotron):
"""Tacotron as in https://arxiv.org/abs/1703.10135
It's an autoregressive encoder-attention-decoder-postnet architecture.
Check `TacotronConfig` for the arguments.
+
+ Args:
+ config (TacotronConfig): Configuration for the Tacotron model.
+ speaker_manager (SpeakerManager): Speaker manager to handle multi-speaker settings. Only use if the model is
+ a multi-speaker model. Defaults to None.
"""
- def __init__(self, config: Coqpit):
+ def __init__(self, config: Coqpit, speaker_manager: SpeakerManager = None):
super().__init__(config)
- chars, self.config = self.get_characters(config)
+ self.speaker_manager = speaker_manager
+ chars, self.config, _ = self.get_characters(config)
config.num_chars = self.num_chars = len(chars)
# pass all config fields to `self`
@@ -243,40 +248,47 @@ class Tacotron(BaseTacotron):
outputs = self.forward(text_input, text_lengths, mel_input, mel_lengths, aux_input)
# compute loss
- loss_dict = criterion(
- outputs["model_outputs"],
- outputs["decoder_outputs"],
- mel_input,
- linear_input,
- outputs["stop_tokens"],
- stop_targets,
- stop_target_lengths,
- mel_lengths,
- outputs["decoder_outputs_backward"],
- outputs["alignments"],
- alignment_lengths,
- outputs["alignments_backward"],
- text_lengths,
- )
+ with autocast(enabled=False): # use float32 for the criterion
+ loss_dict = criterion(
+ outputs["model_outputs"].float(),
+ outputs["decoder_outputs"].float(),
+ mel_input.float(),
+ linear_input.float(),
+ outputs["stop_tokens"].float(),
+ stop_targets.float(),
+ stop_target_lengths,
+ mel_lengths,
+ None if outputs["decoder_outputs_backward"] is None else outputs["decoder_outputs_backward"].float(),
+ outputs["alignments"].float(),
+ alignment_lengths,
+ None if outputs["alignments_backward"] is None else outputs["alignments_backward"].float(),
+ text_lengths,
+ )
# compute alignment error (the lower the better )
align_error = 1 - alignment_diagonal_score(outputs["alignments"])
loss_dict["align_error"] = align_error
return outputs, loss_dict
- def train_log(self, ap: AudioProcessor, batch: dict, outputs: dict) -> Tuple[Dict, Dict]:
+ def _create_logs(self, batch, outputs, ap):
postnet_outputs = outputs["model_outputs"]
+ decoder_outputs = outputs["decoder_outputs"]
alignments = outputs["alignments"]
alignments_backward = outputs["alignments_backward"]
mel_input = batch["mel_input"]
+ linear_input = batch["linear_input"]
- pred_spec = postnet_outputs[0].data.cpu().numpy()
- gt_spec = mel_input[0].data.cpu().numpy()
+ pred_linear_spec = postnet_outputs[0].data.cpu().numpy()
+ pred_mel_spec = decoder_outputs[0].data.cpu().numpy()
+ gt_linear_spec = linear_input[0].data.cpu().numpy()
+ gt_mel_spec = mel_input[0].data.cpu().numpy()
align_img = alignments[0].data.cpu().numpy()
figures = {
- "prediction": plot_spectrogram(pred_spec, ap, output_fig=False),
- "ground_truth": plot_spectrogram(gt_spec, ap, output_fig=False),
+ "pred_linear_spec": plot_spectrogram(pred_linear_spec, ap, output_fig=False),
+ "real_linear_spec": plot_spectrogram(gt_linear_spec, ap, output_fig=False),
+ "pred_mel_spec": plot_spectrogram(pred_mel_spec, ap, output_fig=False),
+ "real_mel_spec": plot_spectrogram(gt_mel_spec, ap, output_fig=False),
"alignment": plot_alignment(align_img, output_fig=False),
}
@@ -284,11 +296,22 @@ class Tacotron(BaseTacotron):
figures["alignment_backward"] = plot_alignment(alignments_backward[0].data.cpu().numpy(), output_fig=False)
# Sample audio
- train_audio = ap.inv_spectrogram(pred_spec.T)
- return figures, {"audio": train_audio}
+ audio = ap.inv_spectrogram(pred_linear_spec.T)
+ return figures, {"audio": audio}
- def eval_step(self, batch, criterion):
+ def train_log(
+ self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int
+ ) -> None: # pylint: disable=no-self-use
+ ap = assets["audio_processor"]
+ figures, audios = self._create_logs(batch, outputs, ap)
+ logger.train_figures(steps, figures)
+ logger.train_audios(steps, audios, ap.sample_rate)
+
+ def eval_step(self, batch: dict, criterion: nn.Module):
return self.train_step(batch, criterion)
- def eval_log(self, ap, batch, outputs):
- return self.train_log(ap, batch, outputs)
+ def eval_log(self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int) -> None:
+ ap = assets["audio_processor"]
+ figures, audios = self._create_logs(batch, outputs, ap)
+ logger.eval_figures(steps, figures)
+ logger.eval_audios(steps, audios, ap.sample_rate)
diff --git a/TTS/tts/models/tacotron2.py b/TTS/tts/models/tacotron2.py
index 39ef12a8..ead3bf2b 100644
--- a/TTS/tts/models/tacotron2.py
+++ b/TTS/tts/models/tacotron2.py
@@ -1,28 +1,50 @@
# coding: utf-8
-from typing import Dict, Tuple
+from typing import Dict
import torch
from coqpit import Coqpit
from torch import nn
+from torch.cuda.amp.autocast_mode import autocast
from TTS.tts.layers.tacotron.gst_layers import GST
from TTS.tts.layers.tacotron.tacotron2 import Decoder, Encoder, Postnet
from TTS.tts.models.base_tacotron import BaseTacotron
from TTS.tts.utils.measures import alignment_diagonal_score
+from TTS.tts.utils.speakers import SpeakerManager
from TTS.tts.utils.visual import plot_alignment, plot_spectrogram
-from TTS.utils.audio import AudioProcessor
class Tacotron2(BaseTacotron):
- """Tacotron2 as in https://arxiv.org/abs/1712.05884
- Check `TacotronConfig` for the arguments.
+ """Tacotron2 model implementation inherited from :class:`TTS.tts.models.base_tacotron.BaseTacotron`.
+
+ Paper::
+ https://arxiv.org/abs/1712.05884
+
+ Paper abstract::
+ This paper describes Tacotron 2, a neural network architecture for speech synthesis directly from text.
+ The system is composed of a recurrent sequence-to-sequence feature prediction network that maps character
+ embeddings to mel-scale spectrograms, followed by a modified WaveNet model acting as a vocoder to synthesize
+ timedomain waveforms from those spectrograms. Our model achieves a mean opinion score (MOS) of 4.53 comparable
+ to a MOS of 4.58 for professionally recorded speech. To validate our design choices, we present ablation
+ studies of key components of our system and evaluate the impact of using mel spectrograms as the input to
+ WaveNet instead of linguistic, duration, and F0 features. We further demonstrate that using a compact acoustic
+ intermediate representation enables significant simplification of the WaveNet architecture.
+
+ Check :class:`TTS.tts.configs.tacotron2_config.Tacotron2Config` for model arguments.
+
+ Args:
+ config (TacotronConfig):
+ Configuration for the Tacotron2 model.
+ speaker_manager (SpeakerManager):
+ Speaker manager for multi-speaker training. Uuse only for multi-speaker training. Defaults to None.
"""
- def __init__(self, config: Coqpit):
+ def __init__(self, config: Coqpit, speaker_manager: SpeakerManager = None):
super().__init__(config)
- chars, self.config = self.get_characters(config)
+ self.speaker_manager = speaker_manager
+ chars, self.config, _ = self.get_characters(config)
config.num_chars = len(chars)
self.decoder_output_dim = config.out_channels
@@ -31,9 +53,7 @@ class Tacotron2(BaseTacotron):
for key in config:
setattr(self, key, config[key])
- # set speaker embedding channel size for determining `in_channels` for the connected layers.
- # `init_multispeaker` needs to be called once more in training to initialize the speaker embedding layer based
- # on the number of speakers infered from the dataset.
+ # init multi-speaker layers
if self.use_speaker_embedding or self.use_d_vector_file:
self.init_multispeaker(config)
self.decoder_in_features += self.embedded_speaker_dim # add speaker embedding dim
@@ -103,6 +123,7 @@ class Tacotron2(BaseTacotron):
@staticmethod
def shape_outputs(mel_outputs, mel_outputs_postnet, alignments):
+ """Final reshape of the model output tensors."""
mel_outputs = mel_outputs.transpose(1, 2)
mel_outputs_postnet = mel_outputs_postnet.transpose(1, 2)
return mel_outputs, mel_outputs_postnet, alignments
@@ -110,13 +131,14 @@ class Tacotron2(BaseTacotron):
def forward( # pylint: disable=dangerous-default-value
self, text, text_lengths, mel_specs=None, mel_lengths=None, aux_input={"speaker_ids": None, "d_vectors": None}
):
- """
+ """Forward pass for training with Teacher Forcing.
+
Shapes:
- text: [B, T_in]
- text_lengths: [B]
- mel_specs: [B, T_out, C]
- mel_lengths: [B]
- aux_input: 'speaker_ids': [B, 1] and 'd_vectors':[B, C]
+ text: :math:`[B, T_in]`
+ text_lengths: :math:`[B]`
+ mel_specs: :math:`[B, T_out, C]`
+ mel_lengths: :math:`[B]`
+ aux_input: 'speaker_ids': :math:`[B, 1]` and 'd_vectors': :math:`[B, C]`
"""
aux_input = self._format_aux_input(aux_input)
outputs = {"alignments_backward": None, "decoder_outputs_backward": None}
@@ -177,6 +199,12 @@ class Tacotron2(BaseTacotron):
@torch.no_grad()
def inference(self, text, aux_input=None):
+ """Forward pass for inference with no Teacher-Forcing.
+
+ Shapes:
+ text: :math:`[B, T_in]`
+ text_lengths: :math:`[B]`
+ """
aux_input = self._format_aux_input(aux_input)
embedded_inputs = self.embedding(text).transpose(1, 2)
encoder_outputs = self.encoder.inference(embedded_inputs)
@@ -210,18 +238,17 @@ class Tacotron2(BaseTacotron):
}
return outputs
- def train_step(self, batch, criterion):
- """Perform a single training step by fetching the right set if samples from the batch.
+ def train_step(self, batch: Dict, criterion: torch.nn.Module):
+ """A single training step. Forward pass and loss computation.
Args:
- batch ([type]): [description]
- criterion ([type]): [description]
+ batch ([Dict]): A dictionary of input tensors.
+ criterion ([type]): Callable criterion to compute model loss.
"""
text_input = batch["text_input"]
text_lengths = batch["text_lengths"]
mel_input = batch["mel_input"]
mel_lengths = batch["mel_lengths"]
- linear_input = batch["linear_input"]
stop_targets = batch["stop_targets"]
stop_target_lengths = batch["stop_target_lengths"]
speaker_ids = batch["speaker_ids"]
@@ -248,28 +275,30 @@ class Tacotron2(BaseTacotron):
outputs = self.forward(text_input, text_lengths, mel_input, mel_lengths, aux_input)
# compute loss
- loss_dict = criterion(
- outputs["model_outputs"],
- outputs["decoder_outputs"],
- mel_input,
- linear_input,
- outputs["stop_tokens"],
- stop_targets,
- stop_target_lengths,
- mel_lengths,
- outputs["decoder_outputs_backward"],
- outputs["alignments"],
- alignment_lengths,
- outputs["alignments_backward"],
- text_lengths,
- )
+ with autocast(enabled=False): # use float32 for the criterion
+ loss_dict = criterion(
+ outputs["model_outputs"].float(),
+ outputs["decoder_outputs"].float(),
+ mel_input.float(),
+ None,
+ outputs["stop_tokens"].float(),
+ stop_targets.float(),
+ stop_target_lengths,
+ mel_lengths,
+ None if outputs["decoder_outputs_backward"] is None else outputs["decoder_outputs_backward"].float(),
+ outputs["alignments"].float(),
+ alignment_lengths,
+ None if outputs["alignments_backward"] is None else outputs["alignments_backward"].float(),
+ text_lengths,
+ )
# compute alignment error (the lower the better )
align_error = 1 - alignment_diagonal_score(outputs["alignments"])
loss_dict["align_error"] = align_error
return outputs, loss_dict
- def train_log(self, ap: AudioProcessor, batch: dict, outputs: dict) -> Tuple[Dict, Dict]:
+ def _create_logs(self, batch, outputs, ap):
+ """Create dashboard log information."""
postnet_outputs = outputs["model_outputs"]
alignments = outputs["alignments"]
alignments_backward = outputs["alignments_backward"]
@@ -289,11 +318,23 @@ class Tacotron2(BaseTacotron):
figures["alignment_backward"] = plot_alignment(alignments_backward[0].data.cpu().numpy(), output_fig=False)
# Sample audio
- train_audio = ap.inv_melspectrogram(pred_spec.T)
- return figures, {"audio": train_audio}
+ audio = ap.inv_melspectrogram(pred_spec.T)
+ return figures, {"audio": audio}
- def eval_step(self, batch, criterion):
+ def train_log(
+ self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int
+ ) -> None: # pylint: disable=no-self-use
+ """Log training progress."""
+ ap = assets["audio_processor"]
+ figures, audios = self._create_logs(batch, outputs, ap)
+ logger.train_figures(steps, figures)
+ logger.train_audios(steps, audios, ap.sample_rate)
+
+ def eval_step(self, batch: dict, criterion: nn.Module):
return self.train_step(batch, criterion)
- def eval_log(self, ap, batch, outputs):
- return self.train_log(ap, batch, outputs)
+ def eval_log(self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int) -> None:
+ ap = assets["audio_processor"]
+ figures, audios = self._create_logs(batch, outputs, ap)
+ logger.eval_figures(steps, figures)
+ logger.eval_audios(steps, audios, ap.sample_rate)
diff --git a/TTS/tts/models/vits.py b/TTS/tts/models/vits.py
index 87695774..5e4b408b 100644
--- a/TTS/tts/models/vits.py
+++ b/TTS/tts/models/vits.py
@@ -1,4 +1,5 @@
import math
+import random
from dataclasses import dataclass, field
from itertools import chain
from typing import Dict, List, Tuple
@@ -14,10 +15,9 @@ from TTS.tts.layers.vits.networks import PosteriorEncoder, ResidualCouplingBlock
from TTS.tts.layers.vits.stochastic_duration_predictor import StochasticDurationPredictor
from TTS.tts.models.base_tts import BaseTTS
from TTS.tts.utils.helpers import generate_path, maximum_path, rand_segments, segment, sequence_mask
-from TTS.tts.utils.speakers import get_speaker_manager
+from TTS.tts.utils.speakers import SpeakerManager
from TTS.tts.utils.synthesis import synthesis
from TTS.tts.utils.visual import plot_alignment
-from TTS.utils.audio import AudioProcessor
from TTS.utils.trainer_utils import get_optimizer, get_scheduler
from TTS.vocoder.models.hifigan_generator import HifiganGenerator
from TTS.vocoder.utils.generic_utils import plot_results
@@ -181,6 +181,7 @@ class VitsArgs(Coqpit):
speakers_file: str = None
speaker_embedding_channels: int = 256
use_d_vector_file: bool = False
+ d_vector_file: str = None
d_vector_dim: int = 0
detach_dp_input: bool = True
@@ -207,7 +208,7 @@ class Vits(BaseTTS):
Check :class:`TTS.tts.configs.vits_config.VitsConfig` for class arguments.
Examples:
- >>> from TTS.tts.configs import VitsConfig
+ >>> from TTS.tts.configs.vits_config import VitsConfig
>>> from TTS.tts.models.vits import Vits
>>> config = VitsConfig()
>>> model = Vits(config)
@@ -215,12 +216,13 @@ class Vits(BaseTTS):
# pylint: disable=dangerous-default-value
- def __init__(self, config: Coqpit):
+ def __init__(self, config: Coqpit, speaker_manager: SpeakerManager = None):
- super().__init__()
+ super().__init__(config)
self.END2END = True
+ self.speaker_manager = speaker_manager
if config.__class__.__name__ == "VitsConfig":
# loading from VitsConfig
if "num_chars" not in config:
@@ -312,31 +314,42 @@ class Vits(BaseTTS):
if args.init_discriminator:
self.disc = VitsDiscriminator(use_spectral_norm=args.use_spectral_norm_disriminator)
- def init_multispeaker(self, config: Coqpit, data: List = None):
+ def init_multispeaker(self, config: Coqpit):
"""Initialize multi-speaker modules of a model. A model can be trained either with a speaker embedding layer
or with external `d_vectors` computed from a speaker encoder model.
- If you need a different behaviour, override this function for your model.
-
Args:
config (Coqpit): Model configuration.
data (List, optional): Dataset items to infer number of speakers. Defaults to None.
"""
+ self.embedded_speaker_dim = 0
if hasattr(config, "model_args"):
config = config.model_args
- self.embedded_speaker_dim = 0
- # init speaker manager
- self.speaker_manager = get_speaker_manager(config, data=data)
- if config.num_speakers > 0 and self.speaker_manager.num_speakers == 0:
- self.speaker_manager.num_speakers = config.num_speakers
- self.num_speakers = self.speaker_manager.num_speakers
- # init speaker embedding layer
- if config.use_speaker_embedding and not config.use_d_vector_file:
- self.embedded_speaker_dim = config.speaker_embedding_channels
- self.emb_g = nn.Embedding(config.num_speakers, config.speaker_embedding_channels)
- # init d-vector usage
+
+ self.num_speakers = config.num_speakers
+
+ if config.use_speaker_embedding:
+ self._init_speaker_embedding(config)
+
if config.use_d_vector_file:
- self.embedded_speaker_dim = config.d_vector_dim
+ self._init_d_vector(config)
+
+ def _init_speaker_embedding(self, config):
+ # pylint: disable=attribute-defined-outside-init
+ if config.speakers_file is not None:
+ self.speaker_manager = SpeakerManager(speaker_id_file_path=config.speakers_file_path)
+
+ if self.num_speakers > 0:
+ print(" > initialization of speaker-embedding layers.")
+ self.embedded_speaker_dim = config.speaker_embedding_channels
+ self.emb_g = nn.Embedding(self.num_speakers, self.embedded_speaker_dim)
+
+ def _init_d_vector(self, config):
+ # pylint: disable=attribute-defined-outside-init
+ if hasattr(self, "emb_g"):
+ raise ValueError("[!] Speaker embedding layer already initialized before d_vector settings.")
+ self.speaker_manager = SpeakerManager(d_vectors_file_path=config.d_vector_file)
+ self.embedded_speaker_dim = config.d_vector_dim
@staticmethod
def _set_cond_input(aux_input: Dict):
@@ -350,6 +363,10 @@ class Vits(BaseTTS):
g = aux_input["d_vectors"]
return sid, g
+ def get_aux_input(self, aux_input: Dict):
+ sid, g = self._set_cond_input(aux_input)
+ return {"speaker_id": sid, "style_wav": None, "d_vector": g}
+
def forward(
self,
x: torch.tensor,
@@ -457,7 +474,7 @@ class Vits(BaseTTS):
x, m_p, logs_p, x_mask = self.text_encoder(x, x_lengths)
- if self.num_speakers > 0 and sid:
+ if self.num_speakers > 0 and sid is not None:
g = self.emb_g(sid).unsqueeze(-1)
if self.args.use_sdp:
@@ -576,22 +593,7 @@ class Vits(BaseTTS):
)
return outputs, loss_dict
- def train_log(
- self, ap: AudioProcessor, batch: Dict, outputs: List, name_prefix="train"
- ): # pylint: disable=no-self-use
- """Create visualizations and waveform examples.
-
- For example, here you can plot spectrograms and generate sample sample waveforms from these spectrograms to
- be projected onto Tensorboard.
-
- Args:
- ap (AudioProcessor): audio processor used at training.
- batch (Dict): Model inputs used at the previous training step.
- outputs (Dict): Model outputs generated at the previoud training step.
-
- Returns:
- Tuple[Dict, np.ndarray]: training plots and output waveform.
- """
+ def _log(self, ap, batch, outputs, name_prefix="train"): # pylint: disable=unused-argument,no-self-use
y_hat = outputs[0]["model_outputs"]
y = outputs[0]["waveform_seg"]
figures = plot_results(y_hat, y, ap, name_prefix)
@@ -609,12 +611,32 @@ class Vits(BaseTTS):
return figures, audios
+ def train_log(
+ self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int
+ ): # pylint: disable=no-self-use
+ """Create visualizations and waveform examples.
+
+ For example, here you can plot spectrograms and generate sample sample waveforms from these spectrograms to
+ be projected onto Tensorboard.
+
+ Args:
+ ap (AudioProcessor): audio processor used at training.
+ batch (Dict): Model inputs used at the previous training step.
+ outputs (Dict): Model outputs generated at the previoud training step.
+
+ Returns:
+ Tuple[Dict, np.ndarray]: training plots and output waveform.
+ """
+ ap = assets["audio_processor"]
+ self._log(ap, batch, outputs, "train")
+
@torch.no_grad()
def eval_step(self, batch: dict, criterion: nn.Module, optimizer_idx: int):
return self.train_step(batch, criterion, optimizer_idx)
- def eval_log(self, ap: AudioProcessor, batch: dict, outputs: dict):
- return self.train_log(ap, batch, outputs, "eval")
+ def eval_log(self, batch: dict, outputs: dict, logger: "Logger", assets: dict, steps: int) -> None:
+ ap = assets["audio_processor"]
+ return self._log(ap, batch, outputs, "eval")
@torch.no_grad()
def test_run(self, ap) -> Tuple[Dict, Dict]:
@@ -629,7 +651,15 @@ class Vits(BaseTTS):
test_audios = {}
test_figures = {}
test_sentences = self.config.test_sentences
- aux_inputs = self.get_aux_input()
+ aux_inputs = {
+ "speaker_id": None
+ if not self.config.use_speaker_embedding
+ else random.sample(sorted(self.speaker_manager.speaker_ids.values()), 1),
+ "d_vector": None
+ if not self.config.use_d_vector_file
+ else random.samples(sorted(self.speaker_manager.d_vectors.values()), 1),
+ "style_wav": None,
+ }
for idx, sen in enumerate(test_sentences):
wav, alignment, _, _ = synthesis(
self,
@@ -666,7 +696,7 @@ class Vits(BaseTTS):
)
# add the speaker embedding layer
if hasattr(self, "emb_g"):
- gen_parameters = chain(gen_parameters, self.emb_g)
+ gen_parameters = chain(gen_parameters, self.emb_g.parameters())
optimizer0 = get_optimizer(
self.config.optimizer, self.config.optimizer_params, self.config.lr_gen, parameters=gen_parameters
)
diff --git a/TTS/tts/utils/monotonic_align/core.c b/TTS/tts/utils/monotonic_align/core.c
deleted file mode 100644
index 5e3d619b..00000000
--- a/TTS/tts/utils/monotonic_align/core.c
+++ /dev/null
@@ -1,23086 +0,0 @@
-/* Generated by Cython 0.29.24 */
-
-/* BEGIN: Cython Metadata
-{
- "distutils": {
- "depends": [],
- "name": "TTS.tts.layers.glow_tts.monotonic_align.core",
- "sources": [
- "TTS/tts/layers/glow_tts/monotonic_align/core.pyx"
- ]
- },
- "module_name": "TTS.tts.layers.glow_tts.monotonic_align.core"
-}
-END: Cython Metadata */
-
-#ifndef PY_SSIZE_T_CLEAN
-#define PY_SSIZE_T_CLEAN
-#endif /* PY_SSIZE_T_CLEAN */
-#include "Python.h"
-#ifndef Py_PYTHON_H
- #error Python headers needed to compile C extensions, please install development version of Python.
-#elif PY_VERSION_HEX < 0x02060000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03030000)
- #error Cython requires Python 2.6+ or Python 3.3+.
-#else
-#define CYTHON_ABI "0_29_24"
-#define CYTHON_HEX_VERSION 0x001D18F0
-#define CYTHON_FUTURE_DIVISION 1
-#include
-#ifndef offsetof
- #define offsetof(type, member) ( (size_t) & ((type*)0) -> member )
-#endif
-#if !defined(WIN32) && !defined(MS_WINDOWS)
- #ifndef __stdcall
- #define __stdcall
- #endif
- #ifndef __cdecl
- #define __cdecl
- #endif
- #ifndef __fastcall
- #define __fastcall
- #endif
-#endif
-#ifndef DL_IMPORT
- #define DL_IMPORT(t) t
-#endif
-#ifndef DL_EXPORT
- #define DL_EXPORT(t) t
-#endif
-#define __PYX_COMMA ,
-#ifndef HAVE_LONG_LONG
- #if PY_VERSION_HEX >= 0x02070000
- #define HAVE_LONG_LONG
- #endif
-#endif
-#ifndef PY_LONG_LONG
- #define PY_LONG_LONG LONG_LONG
-#endif
-#ifndef Py_HUGE_VAL
- #define Py_HUGE_VAL HUGE_VAL
-#endif
-#ifdef PYPY_VERSION
- #define CYTHON_COMPILING_IN_PYPY 1
- #define CYTHON_COMPILING_IN_PYSTON 0
- #define CYTHON_COMPILING_IN_CPYTHON 0
- #undef CYTHON_USE_TYPE_SLOTS
- #define CYTHON_USE_TYPE_SLOTS 0
- #undef CYTHON_USE_PYTYPE_LOOKUP
- #define CYTHON_USE_PYTYPE_LOOKUP 0
- #if PY_VERSION_HEX < 0x03050000
- #undef CYTHON_USE_ASYNC_SLOTS
- #define CYTHON_USE_ASYNC_SLOTS 0
- #elif !defined(CYTHON_USE_ASYNC_SLOTS)
- #define CYTHON_USE_ASYNC_SLOTS 1
- #endif
- #undef CYTHON_USE_PYLIST_INTERNALS
- #define CYTHON_USE_PYLIST_INTERNALS 0
- #undef CYTHON_USE_UNICODE_INTERNALS
- #define CYTHON_USE_UNICODE_INTERNALS 0
- #undef CYTHON_USE_UNICODE_WRITER
- #define CYTHON_USE_UNICODE_WRITER 0
- #undef CYTHON_USE_PYLONG_INTERNALS
- #define CYTHON_USE_PYLONG_INTERNALS 0
- #undef CYTHON_AVOID_BORROWED_REFS
- #define CYTHON_AVOID_BORROWED_REFS 1
- #undef CYTHON_ASSUME_SAFE_MACROS
- #define CYTHON_ASSUME_SAFE_MACROS 0
- #undef CYTHON_UNPACK_METHODS
- #define CYTHON_UNPACK_METHODS 0
- #undef CYTHON_FAST_THREAD_STATE
- #define CYTHON_FAST_THREAD_STATE 0
- #undef CYTHON_FAST_PYCALL
- #define CYTHON_FAST_PYCALL 0
- #undef CYTHON_PEP489_MULTI_PHASE_INIT
- #define CYTHON_PEP489_MULTI_PHASE_INIT 0
- #undef CYTHON_USE_TP_FINALIZE
- #define CYTHON_USE_TP_FINALIZE 0
- #undef CYTHON_USE_DICT_VERSIONS
- #define CYTHON_USE_DICT_VERSIONS 0
- #undef CYTHON_USE_EXC_INFO_STACK
- #define CYTHON_USE_EXC_INFO_STACK 0
-#elif defined(PYSTON_VERSION)
- #define CYTHON_COMPILING_IN_PYPY 0
- #define CYTHON_COMPILING_IN_PYSTON 1
- #define CYTHON_COMPILING_IN_CPYTHON 0
- #ifndef CYTHON_USE_TYPE_SLOTS
- #define CYTHON_USE_TYPE_SLOTS 1
- #endif
- #undef CYTHON_USE_PYTYPE_LOOKUP
- #define CYTHON_USE_PYTYPE_LOOKUP 0
- #undef CYTHON_USE_ASYNC_SLOTS
- #define CYTHON_USE_ASYNC_SLOTS 0
- #undef CYTHON_USE_PYLIST_INTERNALS
- #define CYTHON_USE_PYLIST_INTERNALS 0
- #ifndef CYTHON_USE_UNICODE_INTERNALS
- #define CYTHON_USE_UNICODE_INTERNALS 1
- #endif
- #undef CYTHON_USE_UNICODE_WRITER
- #define CYTHON_USE_UNICODE_WRITER 0
- #undef CYTHON_USE_PYLONG_INTERNALS
- #define CYTHON_USE_PYLONG_INTERNALS 0
- #ifndef CYTHON_AVOID_BORROWED_REFS
- #define CYTHON_AVOID_BORROWED_REFS 0
- #endif
- #ifndef CYTHON_ASSUME_SAFE_MACROS
- #define CYTHON_ASSUME_SAFE_MACROS 1
- #endif
- #ifndef CYTHON_UNPACK_METHODS
- #define CYTHON_UNPACK_METHODS 1
- #endif
- #undef CYTHON_FAST_THREAD_STATE
- #define CYTHON_FAST_THREAD_STATE 0
- #undef CYTHON_FAST_PYCALL
- #define CYTHON_FAST_PYCALL 0
- #undef CYTHON_PEP489_MULTI_PHASE_INIT
- #define CYTHON_PEP489_MULTI_PHASE_INIT 0
- #undef CYTHON_USE_TP_FINALIZE
- #define CYTHON_USE_TP_FINALIZE 0
- #undef CYTHON_USE_DICT_VERSIONS
- #define CYTHON_USE_DICT_VERSIONS 0
- #undef CYTHON_USE_EXC_INFO_STACK
- #define CYTHON_USE_EXC_INFO_STACK 0
-#else
- #define CYTHON_COMPILING_IN_PYPY 0
- #define CYTHON_COMPILING_IN_PYSTON 0
- #define CYTHON_COMPILING_IN_CPYTHON 1
- #ifndef CYTHON_USE_TYPE_SLOTS
- #define CYTHON_USE_TYPE_SLOTS 1
- #endif
- #if PY_VERSION_HEX < 0x02070000
- #undef CYTHON_USE_PYTYPE_LOOKUP
- #define CYTHON_USE_PYTYPE_LOOKUP 0
- #elif !defined(CYTHON_USE_PYTYPE_LOOKUP)
- #define CYTHON_USE_PYTYPE_LOOKUP 1
- #endif
- #if PY_MAJOR_VERSION < 3
- #undef CYTHON_USE_ASYNC_SLOTS
- #define CYTHON_USE_ASYNC_SLOTS 0
- #elif !defined(CYTHON_USE_ASYNC_SLOTS)
- #define CYTHON_USE_ASYNC_SLOTS 1
- #endif
- #if PY_VERSION_HEX < 0x02070000
- #undef CYTHON_USE_PYLONG_INTERNALS
- #define CYTHON_USE_PYLONG_INTERNALS 0
- #elif !defined(CYTHON_USE_PYLONG_INTERNALS)
- #define CYTHON_USE_PYLONG_INTERNALS 1
- #endif
- #ifndef CYTHON_USE_PYLIST_INTERNALS
- #define CYTHON_USE_PYLIST_INTERNALS 1
- #endif
- #ifndef CYTHON_USE_UNICODE_INTERNALS
- #define CYTHON_USE_UNICODE_INTERNALS 1
- #endif
- #if PY_VERSION_HEX < 0x030300F0
- #undef CYTHON_USE_UNICODE_WRITER
- #define CYTHON_USE_UNICODE_WRITER 0
- #elif !defined(CYTHON_USE_UNICODE_WRITER)
- #define CYTHON_USE_UNICODE_WRITER 1
- #endif
- #ifndef CYTHON_AVOID_BORROWED_REFS
- #define CYTHON_AVOID_BORROWED_REFS 0
- #endif
- #ifndef CYTHON_ASSUME_SAFE_MACROS
- #define CYTHON_ASSUME_SAFE_MACROS 1
- #endif
- #ifndef CYTHON_UNPACK_METHODS
- #define CYTHON_UNPACK_METHODS 1
- #endif
- #ifndef CYTHON_FAST_THREAD_STATE
- #define CYTHON_FAST_THREAD_STATE 1
- #endif
- #ifndef CYTHON_FAST_PYCALL
- #define CYTHON_FAST_PYCALL 1
- #endif
- #ifndef CYTHON_PEP489_MULTI_PHASE_INIT
- #define CYTHON_PEP489_MULTI_PHASE_INIT (PY_VERSION_HEX >= 0x03050000)
- #endif
- #ifndef CYTHON_USE_TP_FINALIZE
- #define CYTHON_USE_TP_FINALIZE (PY_VERSION_HEX >= 0x030400a1)
- #endif
- #ifndef CYTHON_USE_DICT_VERSIONS
- #define CYTHON_USE_DICT_VERSIONS (PY_VERSION_HEX >= 0x030600B1)
- #endif
- #ifndef CYTHON_USE_EXC_INFO_STACK
- #define CYTHON_USE_EXC_INFO_STACK (PY_VERSION_HEX >= 0x030700A3)
- #endif
-#endif
-#if !defined(CYTHON_FAST_PYCCALL)
-#define CYTHON_FAST_PYCCALL (CYTHON_FAST_PYCALL && PY_VERSION_HEX >= 0x030600B1)
-#endif
-#if CYTHON_USE_PYLONG_INTERNALS
- #include "longintrepr.h"
- #undef SHIFT
- #undef BASE
- #undef MASK
- #ifdef SIZEOF_VOID_P
- enum { __pyx_check_sizeof_voidp = 1 / (int)(SIZEOF_VOID_P == sizeof(void*)) };
- #endif
-#endif
-#ifndef __has_attribute
- #define __has_attribute(x) 0
-#endif
-#ifndef __has_cpp_attribute
- #define __has_cpp_attribute(x) 0
-#endif
-#ifndef CYTHON_RESTRICT
- #if defined(__GNUC__)
- #define CYTHON_RESTRICT __restrict__
- #elif defined(_MSC_VER) && _MSC_VER >= 1400
- #define CYTHON_RESTRICT __restrict
- #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
- #define CYTHON_RESTRICT restrict
- #else
- #define CYTHON_RESTRICT
- #endif
-#endif
-#ifndef CYTHON_UNUSED
-# if defined(__GNUC__)
-# if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
-# define CYTHON_UNUSED __attribute__ ((__unused__))
-# else
-# define CYTHON_UNUSED
-# endif
-# elif defined(__ICC) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER))
-# define CYTHON_UNUSED __attribute__ ((__unused__))
-# else
-# define CYTHON_UNUSED
-# endif
-#endif
-#ifndef CYTHON_MAYBE_UNUSED_VAR
-# if defined(__cplusplus)
- template void CYTHON_MAYBE_UNUSED_VAR( const T& ) { }
-# else
-# define CYTHON_MAYBE_UNUSED_VAR(x) (void)(x)
-# endif
-#endif
-#ifndef CYTHON_NCP_UNUSED
-# if CYTHON_COMPILING_IN_CPYTHON
-# define CYTHON_NCP_UNUSED
-# else
-# define CYTHON_NCP_UNUSED CYTHON_UNUSED
-# endif
-#endif
-#define __Pyx_void_to_None(void_result) ((void)(void_result), Py_INCREF(Py_None), Py_None)
-#ifdef _MSC_VER
- #ifndef _MSC_STDINT_H_
- #if _MSC_VER < 1300
- typedef unsigned char uint8_t;
- typedef unsigned int uint32_t;
- #else
- typedef unsigned __int8 uint8_t;
- typedef unsigned __int32 uint32_t;
- #endif
- #endif
-#else
- #include
-#endif
-#ifndef CYTHON_FALLTHROUGH
- #if defined(__cplusplus) && __cplusplus >= 201103L
- #if __has_cpp_attribute(fallthrough)
- #define CYTHON_FALLTHROUGH [[fallthrough]]
- #elif __has_cpp_attribute(clang::fallthrough)
- #define CYTHON_FALLTHROUGH [[clang::fallthrough]]
- #elif __has_cpp_attribute(gnu::fallthrough)
- #define CYTHON_FALLTHROUGH [[gnu::fallthrough]]
- #endif
- #endif
- #ifndef CYTHON_FALLTHROUGH
- #if __has_attribute(fallthrough)
- #define CYTHON_FALLTHROUGH __attribute__((fallthrough))
- #else
- #define CYTHON_FALLTHROUGH
- #endif
- #endif
- #if defined(__clang__ ) && defined(__apple_build_version__)
- #if __apple_build_version__ < 7000000
- #undef CYTHON_FALLTHROUGH
- #define CYTHON_FALLTHROUGH
- #endif
- #endif
-#endif
-
-#ifndef CYTHON_INLINE
- #if defined(__clang__)
- #define CYTHON_INLINE __inline__ __attribute__ ((__unused__))
- #elif defined(__GNUC__)
- #define CYTHON_INLINE __inline__
- #elif defined(_MSC_VER)
- #define CYTHON_INLINE __inline
- #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
- #define CYTHON_INLINE inline
- #else
- #define CYTHON_INLINE
- #endif
-#endif
-
-#if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX < 0x02070600 && !defined(Py_OptimizeFlag)
- #define Py_OptimizeFlag 0
-#endif
-#define __PYX_BUILD_PY_SSIZE_T "n"
-#define CYTHON_FORMAT_SSIZE_T "z"
-#if PY_MAJOR_VERSION < 3
- #define __Pyx_BUILTIN_MODULE_NAME "__builtin__"
- #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\
- PyCode_New(a+k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
- #define __Pyx_DefaultClassType PyClass_Type
-#else
- #define __Pyx_BUILTIN_MODULE_NAME "builtins"
-#if PY_VERSION_HEX >= 0x030800A4 && PY_VERSION_HEX < 0x030800B2
- #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\
- PyCode_New(a, 0, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
-#else
- #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\
- PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
-#endif
- #define __Pyx_DefaultClassType PyType_Type
-#endif
-#ifndef Py_TPFLAGS_CHECKTYPES
- #define Py_TPFLAGS_CHECKTYPES 0
-#endif
-#ifndef Py_TPFLAGS_HAVE_INDEX
- #define Py_TPFLAGS_HAVE_INDEX 0
-#endif
-#ifndef Py_TPFLAGS_HAVE_NEWBUFFER
- #define Py_TPFLAGS_HAVE_NEWBUFFER 0
-#endif
-#ifndef Py_TPFLAGS_HAVE_FINALIZE
- #define Py_TPFLAGS_HAVE_FINALIZE 0
-#endif
-#ifndef METH_STACKLESS
- #define METH_STACKLESS 0
-#endif
-#if PY_VERSION_HEX <= 0x030700A3 || !defined(METH_FASTCALL)
- #ifndef METH_FASTCALL
- #define METH_FASTCALL 0x80
- #endif
- typedef PyObject *(*__Pyx_PyCFunctionFast) (PyObject *self, PyObject *const *args, Py_ssize_t nargs);
- typedef PyObject *(*__Pyx_PyCFunctionFastWithKeywords) (PyObject *self, PyObject *const *args,
- Py_ssize_t nargs, PyObject *kwnames);
-#else
- #define __Pyx_PyCFunctionFast _PyCFunctionFast
- #define __Pyx_PyCFunctionFastWithKeywords _PyCFunctionFastWithKeywords
-#endif
-#if CYTHON_FAST_PYCCALL
-#define __Pyx_PyFastCFunction_Check(func)\
- ((PyCFunction_Check(func) && (METH_FASTCALL == (PyCFunction_GET_FLAGS(func) & ~(METH_CLASS | METH_STATIC | METH_COEXIST | METH_KEYWORDS | METH_STACKLESS)))))
-#else
-#define __Pyx_PyFastCFunction_Check(func) 0
-#endif
-#if CYTHON_COMPILING_IN_PYPY && !defined(PyObject_Malloc)
- #define PyObject_Malloc(s) PyMem_Malloc(s)
- #define PyObject_Free(p) PyMem_Free(p)
- #define PyObject_Realloc(p) PyMem_Realloc(p)
-#endif
-#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030400A1
- #define PyMem_RawMalloc(n) PyMem_Malloc(n)
- #define PyMem_RawRealloc(p, n) PyMem_Realloc(p, n)
- #define PyMem_RawFree(p) PyMem_Free(p)
-#endif
-#if CYTHON_COMPILING_IN_PYSTON
- #define __Pyx_PyCode_HasFreeVars(co) PyCode_HasFreeVars(co)
- #define __Pyx_PyFrame_SetLineNumber(frame, lineno) PyFrame_SetLineNumber(frame, lineno)
-#else
- #define __Pyx_PyCode_HasFreeVars(co) (PyCode_GetNumFree(co) > 0)
- #define __Pyx_PyFrame_SetLineNumber(frame, lineno) (frame)->f_lineno = (lineno)
-#endif
-#if !CYTHON_FAST_THREAD_STATE || PY_VERSION_HEX < 0x02070000
- #define __Pyx_PyThreadState_Current PyThreadState_GET()
-#elif PY_VERSION_HEX >= 0x03060000
- #define __Pyx_PyThreadState_Current _PyThreadState_UncheckedGet()
-#elif PY_VERSION_HEX >= 0x03000000
- #define __Pyx_PyThreadState_Current PyThreadState_GET()
-#else
- #define __Pyx_PyThreadState_Current _PyThreadState_Current
-#endif
-#if PY_VERSION_HEX < 0x030700A2 && !defined(PyThread_tss_create) && !defined(Py_tss_NEEDS_INIT)
-#include "pythread.h"
-#define Py_tss_NEEDS_INIT 0
-typedef int Py_tss_t;
-static CYTHON_INLINE int PyThread_tss_create(Py_tss_t *key) {
- *key = PyThread_create_key();
- return 0;
-}
-static CYTHON_INLINE Py_tss_t * PyThread_tss_alloc(void) {
- Py_tss_t *key = (Py_tss_t *)PyObject_Malloc(sizeof(Py_tss_t));
- *key = Py_tss_NEEDS_INIT;
- return key;
-}
-static CYTHON_INLINE void PyThread_tss_free(Py_tss_t *key) {
- PyObject_Free(key);
-}
-static CYTHON_INLINE int PyThread_tss_is_created(Py_tss_t *key) {
- return *key != Py_tss_NEEDS_INIT;
-}
-static CYTHON_INLINE void PyThread_tss_delete(Py_tss_t *key) {
- PyThread_delete_key(*key);
- *key = Py_tss_NEEDS_INIT;
-}
-static CYTHON_INLINE int PyThread_tss_set(Py_tss_t *key, void *value) {
- return PyThread_set_key_value(*key, value);
-}
-static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) {
- return PyThread_get_key_value(*key);
-}
-#endif
-#if CYTHON_COMPILING_IN_CPYTHON || defined(_PyDict_NewPresized)
-#define __Pyx_PyDict_NewPresized(n) ((n <= 8) ? PyDict_New() : _PyDict_NewPresized(n))
-#else
-#define __Pyx_PyDict_NewPresized(n) PyDict_New()
-#endif
-#if PY_MAJOR_VERSION >= 3 || CYTHON_FUTURE_DIVISION
- #define __Pyx_PyNumber_Divide(x,y) PyNumber_TrueDivide(x,y)
- #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceTrueDivide(x,y)
-#else
- #define __Pyx_PyNumber_Divide(x,y) PyNumber_Divide(x,y)
- #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceDivide(x,y)
-#endif
-#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030500A1 && CYTHON_USE_UNICODE_INTERNALS
-#define __Pyx_PyDict_GetItemStr(dict, name) _PyDict_GetItem_KnownHash(dict, name, ((PyASCIIObject *) name)->hash)
-#else
-#define __Pyx_PyDict_GetItemStr(dict, name) PyDict_GetItem(dict, name)
-#endif
-#if PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND)
- #define CYTHON_PEP393_ENABLED 1
- #if defined(PyUnicode_IS_READY)
- #define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ?\
- 0 : _PyUnicode_Ready((PyObject *)(op)))
- #else
- #define __Pyx_PyUnicode_READY(op) (0)
- #endif
- #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u)
- #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i)
- #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) PyUnicode_MAX_CHAR_VALUE(u)
- #define __Pyx_PyUnicode_KIND(u) PyUnicode_KIND(u)
- #define __Pyx_PyUnicode_DATA(u) PyUnicode_DATA(u)
- #define __Pyx_PyUnicode_READ(k, d, i) PyUnicode_READ(k, d, i)
- #define __Pyx_PyUnicode_WRITE(k, d, i, ch) PyUnicode_WRITE(k, d, i, ch)
- #if defined(PyUnicode_IS_READY) && defined(PyUnicode_GET_SIZE)
- #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03090000
- #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : ((PyCompactUnicodeObject *)(u))->wstr_length))
- #else
- #define __Pyx_PyUnicode_IS_TRUE(u) (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : PyUnicode_GET_SIZE(u)))
- #endif
- #else
- #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_LENGTH(u))
- #endif
-#else
- #define CYTHON_PEP393_ENABLED 0
- #define PyUnicode_1BYTE_KIND 1
- #define PyUnicode_2BYTE_KIND 2
- #define PyUnicode_4BYTE_KIND 4
- #define __Pyx_PyUnicode_READY(op) (0)
- #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_SIZE(u)
- #define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i]))
- #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u) ((sizeof(Py_UNICODE) == 2) ? 65535 : 1114111)
- #define __Pyx_PyUnicode_KIND(u) (sizeof(Py_UNICODE))
- #define __Pyx_PyUnicode_DATA(u) ((void*)PyUnicode_AS_UNICODE(u))
- #define __Pyx_PyUnicode_READ(k, d, i) ((void)(k), (Py_UCS4)(((Py_UNICODE*)d)[i]))
- #define __Pyx_PyUnicode_WRITE(k, d, i, ch) (((void)(k)), ((Py_UNICODE*)d)[i] = ch)
- #define __Pyx_PyUnicode_IS_TRUE(u) (0 != PyUnicode_GET_SIZE(u))
-#endif
-#if CYTHON_COMPILING_IN_PYPY
- #define __Pyx_PyUnicode_Concat(a, b) PyNumber_Add(a, b)
- #define __Pyx_PyUnicode_ConcatSafe(a, b) PyNumber_Add(a, b)
-#else
- #define __Pyx_PyUnicode_Concat(a, b) PyUnicode_Concat(a, b)
- #define __Pyx_PyUnicode_ConcatSafe(a, b) ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ?\
- PyNumber_Add(a, b) : __Pyx_PyUnicode_Concat(a, b))
-#endif
-#if CYTHON_COMPILING_IN_PYPY && !defined(PyUnicode_Contains)
- #define PyUnicode_Contains(u, s) PySequence_Contains(u, s)
-#endif
-#if CYTHON_COMPILING_IN_PYPY && !defined(PyByteArray_Check)
- #define PyByteArray_Check(obj) PyObject_TypeCheck(obj, &PyByteArray_Type)
-#endif
-#if CYTHON_COMPILING_IN_PYPY && !defined(PyObject_Format)
- #define PyObject_Format(obj, fmt) PyObject_CallMethod(obj, "__format__", "O", fmt)
-#endif
-#define __Pyx_PyString_FormatSafe(a, b) ((unlikely((a) == Py_None || (PyString_Check(b) && !PyString_CheckExact(b)))) ? PyNumber_Remainder(a, b) : __Pyx_PyString_Format(a, b))
-#define __Pyx_PyUnicode_FormatSafe(a, b) ((unlikely((a) == Py_None || (PyUnicode_Check(b) && !PyUnicode_CheckExact(b)))) ? PyNumber_Remainder(a, b) : PyUnicode_Format(a, b))
-#if PY_MAJOR_VERSION >= 3
- #define __Pyx_PyString_Format(a, b) PyUnicode_Format(a, b)
-#else
- #define __Pyx_PyString_Format(a, b) PyString_Format(a, b)
-#endif
-#if PY_MAJOR_VERSION < 3 && !defined(PyObject_ASCII)
- #define PyObject_ASCII(o) PyObject_Repr(o)
-#endif
-#if PY_MAJOR_VERSION >= 3
- #define PyBaseString_Type PyUnicode_Type
- #define PyStringObject PyUnicodeObject
- #define PyString_Type PyUnicode_Type
- #define PyString_Check PyUnicode_Check
- #define PyString_CheckExact PyUnicode_CheckExact
-#ifndef PyObject_Unicode
- #define PyObject_Unicode PyObject_Str
-#endif
-#endif
-#if PY_MAJOR_VERSION >= 3
- #define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj)
- #define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj)
-#else
- #define __Pyx_PyBaseString_Check(obj) (PyString_Check(obj) || PyUnicode_Check(obj))
- #define __Pyx_PyBaseString_CheckExact(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj))
-#endif
-#ifndef PySet_CheckExact
- #define PySet_CheckExact(obj) (Py_TYPE(obj) == &PySet_Type)
-#endif
-#if PY_VERSION_HEX >= 0x030900A4
- #define __Pyx_SET_REFCNT(obj, refcnt) Py_SET_REFCNT(obj, refcnt)
- #define __Pyx_SET_SIZE(obj, size) Py_SET_SIZE(obj, size)
-#else
- #define __Pyx_SET_REFCNT(obj, refcnt) Py_REFCNT(obj) = (refcnt)
- #define __Pyx_SET_SIZE(obj, size) Py_SIZE(obj) = (size)
-#endif
-#if CYTHON_ASSUME_SAFE_MACROS
- #define __Pyx_PySequence_SIZE(seq) Py_SIZE(seq)
-#else
- #define __Pyx_PySequence_SIZE(seq) PySequence_Size(seq)
-#endif
-#if PY_MAJOR_VERSION >= 3
- #define PyIntObject PyLongObject
- #define PyInt_Type PyLong_Type
- #define PyInt_Check(op) PyLong_Check(op)
- #define PyInt_CheckExact(op) PyLong_CheckExact(op)
- #define PyInt_FromString PyLong_FromString
- #define PyInt_FromUnicode PyLong_FromUnicode
- #define PyInt_FromLong PyLong_FromLong
- #define PyInt_FromSize_t PyLong_FromSize_t
- #define PyInt_FromSsize_t PyLong_FromSsize_t
- #define PyInt_AsLong PyLong_AsLong
- #define PyInt_AS_LONG PyLong_AS_LONG
- #define PyInt_AsSsize_t PyLong_AsSsize_t
- #define PyInt_AsUnsignedLongMask PyLong_AsUnsignedLongMask
- #define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask
- #define PyNumber_Int PyNumber_Long
-#endif
-#if PY_MAJOR_VERSION >= 3
- #define PyBoolObject PyLongObject
-#endif
-#if PY_MAJOR_VERSION >= 3 && CYTHON_COMPILING_IN_PYPY
- #ifndef PyUnicode_InternFromString
- #define PyUnicode_InternFromString(s) PyUnicode_FromString(s)
- #endif
-#endif
-#if PY_VERSION_HEX < 0x030200A4
- typedef long Py_hash_t;
- #define __Pyx_PyInt_FromHash_t PyInt_FromLong
- #define __Pyx_PyInt_AsHash_t PyInt_AsLong
-#else
- #define __Pyx_PyInt_FromHash_t PyInt_FromSsize_t
- #define __Pyx_PyInt_AsHash_t PyInt_AsSsize_t
-#endif
-#if PY_MAJOR_VERSION >= 3
- #define __Pyx_PyMethod_New(func, self, klass) ((self) ? ((void)(klass), PyMethod_New(func, self)) : __Pyx_NewRef(func))
-#else
- #define __Pyx_PyMethod_New(func, self, klass) PyMethod_New(func, self, klass)
-#endif
-#if CYTHON_USE_ASYNC_SLOTS
- #if PY_VERSION_HEX >= 0x030500B1
- #define __Pyx_PyAsyncMethodsStruct PyAsyncMethods
- #define __Pyx_PyType_AsAsync(obj) (Py_TYPE(obj)->tp_as_async)
- #else
- #define __Pyx_PyType_AsAsync(obj) ((__Pyx_PyAsyncMethodsStruct*) (Py_TYPE(obj)->tp_reserved))
- #endif
-#else
- #define __Pyx_PyType_AsAsync(obj) NULL
-#endif
-#ifndef __Pyx_PyAsyncMethodsStruct
- typedef struct {
- unaryfunc am_await;
- unaryfunc am_aiter;
- unaryfunc am_anext;
- } __Pyx_PyAsyncMethodsStruct;
-#endif
-
-#if defined(WIN32) || defined(MS_WINDOWS)
- #define _USE_MATH_DEFINES
-#endif
-#include
-#ifdef NAN
-#define __PYX_NAN() ((float) NAN)
-#else
-static CYTHON_INLINE float __PYX_NAN() {
- float value;
- memset(&value, 0xFF, sizeof(value));
- return value;
-}
-#endif
-#if defined(__CYGWIN__) && defined(_LDBL_EQ_DBL)
-#define __Pyx_truncl trunc
-#else
-#define __Pyx_truncl truncl
-#endif
-
-#define __PYX_MARK_ERR_POS(f_index, lineno) \
- { __pyx_filename = __pyx_f[f_index]; (void)__pyx_filename; __pyx_lineno = lineno; (void)__pyx_lineno; __pyx_clineno = __LINE__; (void)__pyx_clineno; }
-#define __PYX_ERR(f_index, lineno, Ln_error) \
- { __PYX_MARK_ERR_POS(f_index, lineno) goto Ln_error; }
-
-#ifndef __PYX_EXTERN_C
- #ifdef __cplusplus
- #define __PYX_EXTERN_C extern "C"
- #else
- #define __PYX_EXTERN_C extern
- #endif
-#endif
-
-#define __PYX_HAVE__TTS__tts__layers__glow_tts__monotonic_align__core
-#define __PYX_HAVE_API__TTS__tts__layers__glow_tts__monotonic_align__core
-/* Early includes */
-#include
-#include
-#include "numpy/arrayobject.h"
-#include "numpy/ufuncobject.h"
-
- /* NumPy API declarations from "numpy/__init__.pxd" */
-
-#include "pythread.h"
-#include
-#include "pystate.h"
-#ifdef _OPENMP
-#include
-#endif /* _OPENMP */
-
-#if defined(PYREX_WITHOUT_ASSERTIONS) && !defined(CYTHON_WITHOUT_ASSERTIONS)
-#define CYTHON_WITHOUT_ASSERTIONS
-#endif
-
-typedef struct {PyObject **p; const char *s; const Py_ssize_t n; const char* encoding;
- const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry;
-
-#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 0
-#define __PYX_DEFAULT_STRING_ENCODING_IS_UTF8 0
-#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT (PY_MAJOR_VERSION >= 3 && __PYX_DEFAULT_STRING_ENCODING_IS_UTF8)
-#define __PYX_DEFAULT_STRING_ENCODING ""
-#define __Pyx_PyObject_FromString __Pyx_PyBytes_FromString
-#define __Pyx_PyObject_FromStringAndSize __Pyx_PyBytes_FromStringAndSize
-#define __Pyx_uchar_cast(c) ((unsigned char)c)
-#define __Pyx_long_cast(x) ((long)x)
-#define __Pyx_fits_Py_ssize_t(v, type, is_signed) (\
- (sizeof(type) < sizeof(Py_ssize_t)) ||\
- (sizeof(type) > sizeof(Py_ssize_t) &&\
- likely(v < (type)PY_SSIZE_T_MAX ||\
- v == (type)PY_SSIZE_T_MAX) &&\
- (!is_signed || likely(v > (type)PY_SSIZE_T_MIN ||\
- v == (type)PY_SSIZE_T_MIN))) ||\
- (sizeof(type) == sizeof(Py_ssize_t) &&\
- (is_signed || likely(v < (type)PY_SSIZE_T_MAX ||\
- v == (type)PY_SSIZE_T_MAX))) )
-static CYTHON_INLINE int __Pyx_is_valid_index(Py_ssize_t i, Py_ssize_t limit) {
- return (size_t) i < (size_t) limit;
-}
-#if defined (__cplusplus) && __cplusplus >= 201103L
- #include
- #define __Pyx_sst_abs(value) std::abs(value)
-#elif SIZEOF_INT >= SIZEOF_SIZE_T
- #define __Pyx_sst_abs(value) abs(value)
-#elif SIZEOF_LONG >= SIZEOF_SIZE_T
- #define __Pyx_sst_abs(value) labs(value)
-#elif defined (_MSC_VER)
- #define __Pyx_sst_abs(value) ((Py_ssize_t)_abs64(value))
-#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
- #define __Pyx_sst_abs(value) llabs(value)
-#elif defined (__GNUC__)
- #define __Pyx_sst_abs(value) __builtin_llabs(value)
-#else
- #define __Pyx_sst_abs(value) ((value<0) ? -value : value)
-#endif
-static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject*);
-static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length);
-#define __Pyx_PyByteArray_FromString(s) PyByteArray_FromStringAndSize((const char*)s, strlen((const char*)s))
-#define __Pyx_PyByteArray_FromStringAndSize(s, l) PyByteArray_FromStringAndSize((const char*)s, l)
-#define __Pyx_PyBytes_FromString PyBytes_FromString
-#define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize
-static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char*);
-#if PY_MAJOR_VERSION < 3
- #define __Pyx_PyStr_FromString __Pyx_PyBytes_FromString
- #define __Pyx_PyStr_FromStringAndSize __Pyx_PyBytes_FromStringAndSize
-#else
- #define __Pyx_PyStr_FromString __Pyx_PyUnicode_FromString
- #define __Pyx_PyStr_FromStringAndSize __Pyx_PyUnicode_FromStringAndSize
-#endif
-#define __Pyx_PyBytes_AsWritableString(s) ((char*) PyBytes_AS_STRING(s))
-#define __Pyx_PyBytes_AsWritableSString(s) ((signed char*) PyBytes_AS_STRING(s))
-#define __Pyx_PyBytes_AsWritableUString(s) ((unsigned char*) PyBytes_AS_STRING(s))
-#define __Pyx_PyBytes_AsString(s) ((const char*) PyBytes_AS_STRING(s))
-#define __Pyx_PyBytes_AsSString(s) ((const signed char*) PyBytes_AS_STRING(s))
-#define __Pyx_PyBytes_AsUString(s) ((const unsigned char*) PyBytes_AS_STRING(s))
-#define __Pyx_PyObject_AsWritableString(s) ((char*) __Pyx_PyObject_AsString(s))
-#define __Pyx_PyObject_AsWritableSString(s) ((signed char*) __Pyx_PyObject_AsString(s))
-#define __Pyx_PyObject_AsWritableUString(s) ((unsigned char*) __Pyx_PyObject_AsString(s))
-#define __Pyx_PyObject_AsSString(s) ((const signed char*) __Pyx_PyObject_AsString(s))
-#define __Pyx_PyObject_AsUString(s) ((const unsigned char*) __Pyx_PyObject_AsString(s))
-#define __Pyx_PyObject_FromCString(s) __Pyx_PyObject_FromString((const char*)s)
-#define __Pyx_PyBytes_FromCString(s) __Pyx_PyBytes_FromString((const char*)s)
-#define __Pyx_PyByteArray_FromCString(s) __Pyx_PyByteArray_FromString((const char*)s)
-#define __Pyx_PyStr_FromCString(s) __Pyx_PyStr_FromString((const char*)s)
-#define __Pyx_PyUnicode_FromCString(s) __Pyx_PyUnicode_FromString((const char*)s)
-static CYTHON_INLINE size_t __Pyx_Py_UNICODE_strlen(const Py_UNICODE *u) {
- const Py_UNICODE *u_end = u;
- while (*u_end++) ;
- return (size_t)(u_end - u - 1);
-}
-#define __Pyx_PyUnicode_FromUnicode(u) PyUnicode_FromUnicode(u, __Pyx_Py_UNICODE_strlen(u))
-#define __Pyx_PyUnicode_FromUnicodeAndLength PyUnicode_FromUnicode
-#define __Pyx_PyUnicode_AsUnicode PyUnicode_AsUnicode
-#define __Pyx_NewRef(obj) (Py_INCREF(obj), obj)
-#define __Pyx_Owned_Py_None(b) __Pyx_NewRef(Py_None)
-static CYTHON_INLINE PyObject * __Pyx_PyBool_FromLong(long b);
-static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*);
-static CYTHON_INLINE int __Pyx_PyObject_IsTrueAndDecref(PyObject*);
-static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x);
-#define __Pyx_PySequence_Tuple(obj)\
- (likely(PyTuple_CheckExact(obj)) ? __Pyx_NewRef(obj) : PySequence_Tuple(obj))
-static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*);
-static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t);
-#if CYTHON_ASSUME_SAFE_MACROS
-#define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x))
-#else
-#define __pyx_PyFloat_AsDouble(x) PyFloat_AsDouble(x)
-#endif
-#define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x))
-#if PY_MAJOR_VERSION >= 3
-#define __Pyx_PyNumber_Int(x) (PyLong_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Long(x))
-#else
-#define __Pyx_PyNumber_Int(x) (PyInt_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Int(x))
-#endif
-#define __Pyx_PyNumber_Float(x) (PyFloat_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Float(x))
-#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
-static int __Pyx_sys_getdefaultencoding_not_ascii;
-static int __Pyx_init_sys_getdefaultencoding_params(void) {
- PyObject* sys;
- PyObject* default_encoding = NULL;
- PyObject* ascii_chars_u = NULL;
- PyObject* ascii_chars_b = NULL;
- const char* default_encoding_c;
- sys = PyImport_ImportModule("sys");
- if (!sys) goto bad;
- default_encoding = PyObject_CallMethod(sys, (char*) "getdefaultencoding", NULL);
- Py_DECREF(sys);
- if (!default_encoding) goto bad;
- default_encoding_c = PyBytes_AsString(default_encoding);
- if (!default_encoding_c) goto bad;
- if (strcmp(default_encoding_c, "ascii") == 0) {
- __Pyx_sys_getdefaultencoding_not_ascii = 0;
- } else {
- char ascii_chars[128];
- int c;
- for (c = 0; c < 128; c++) {
- ascii_chars[c] = c;
- }
- __Pyx_sys_getdefaultencoding_not_ascii = 1;
- ascii_chars_u = PyUnicode_DecodeASCII(ascii_chars, 128, NULL);
- if (!ascii_chars_u) goto bad;
- ascii_chars_b = PyUnicode_AsEncodedString(ascii_chars_u, default_encoding_c, NULL);
- if (!ascii_chars_b || !PyBytes_Check(ascii_chars_b) || memcmp(ascii_chars, PyBytes_AS_STRING(ascii_chars_b), 128) != 0) {
- PyErr_Format(
- PyExc_ValueError,
- "This module compiled with c_string_encoding=ascii, but default encoding '%.200s' is not a superset of ascii.",
- default_encoding_c);
- goto bad;
- }
- Py_DECREF(ascii_chars_u);
- Py_DECREF(ascii_chars_b);
- }
- Py_DECREF(default_encoding);
- return 0;
-bad:
- Py_XDECREF(default_encoding);
- Py_XDECREF(ascii_chars_u);
- Py_XDECREF(ascii_chars_b);
- return -1;
-}
-#endif
-#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT && PY_MAJOR_VERSION >= 3
-#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_DecodeUTF8(c_str, size, NULL)
-#else
-#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_Decode(c_str, size, __PYX_DEFAULT_STRING_ENCODING, NULL)
-#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
-static char* __PYX_DEFAULT_STRING_ENCODING;
-static int __Pyx_init_sys_getdefaultencoding_params(void) {
- PyObject* sys;
- PyObject* default_encoding = NULL;
- char* default_encoding_c;
- sys = PyImport_ImportModule("sys");
- if (!sys) goto bad;
- default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL);
- Py_DECREF(sys);
- if (!default_encoding) goto bad;
- default_encoding_c = PyBytes_AsString(default_encoding);
- if (!default_encoding_c) goto bad;
- __PYX_DEFAULT_STRING_ENCODING = (char*) malloc(strlen(default_encoding_c) + 1);
- if (!__PYX_DEFAULT_STRING_ENCODING) goto bad;
- strcpy(__PYX_DEFAULT_STRING_ENCODING, default_encoding_c);
- Py_DECREF(default_encoding);
- return 0;
-bad:
- Py_XDECREF(default_encoding);
- return -1;
-}
-#endif
-#endif
-
-
-/* Test for GCC > 2.95 */
-#if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95)))
- #define likely(x) __builtin_expect(!!(x), 1)
- #define unlikely(x) __builtin_expect(!!(x), 0)
-#else /* !__GNUC__ or GCC < 2.95 */
- #define likely(x) (x)
- #define unlikely(x) (x)
-#endif /* __GNUC__ */
-static CYTHON_INLINE void __Pyx_pretend_to_initialize(void* ptr) { (void)ptr; }
-
-static PyObject *__pyx_m = NULL;
-static PyObject *__pyx_d;
-static PyObject *__pyx_b;
-static PyObject *__pyx_cython_runtime = NULL;
-static PyObject *__pyx_empty_tuple;
-static PyObject *__pyx_empty_bytes;
-static PyObject *__pyx_empty_unicode;
-static int __pyx_lineno;
-static int __pyx_clineno = 0;
-static const char * __pyx_cfilenm= __FILE__;
-static const char *__pyx_filename;
-
-/* Header.proto */
-#if !defined(CYTHON_CCOMPLEX)
- #if defined(__cplusplus)
- #define CYTHON_CCOMPLEX 1
- #elif defined(_Complex_I)
- #define CYTHON_CCOMPLEX 1
- #else
- #define CYTHON_CCOMPLEX 0
- #endif
-#endif
-#if CYTHON_CCOMPLEX
- #ifdef __cplusplus
- #include
- #else
- #include
- #endif
-#endif
-#if CYTHON_CCOMPLEX && !defined(__cplusplus) && defined(__sun__) && defined(__GNUC__)
- #undef _Complex_I
- #define _Complex_I 1.0fj
-#endif
-
-
-static const char *__pyx_f[] = {
- "TTS/tts/layers/glow_tts/monotonic_align/core.pyx",
- "__init__.pxd",
- "stringsource",
- "type.pxd",
-};
-/* NoFastGil.proto */
-#define __Pyx_PyGILState_Ensure PyGILState_Ensure
-#define __Pyx_PyGILState_Release PyGILState_Release
-#define __Pyx_FastGIL_Remember()
-#define __Pyx_FastGIL_Forget()
-#define __Pyx_FastGilFuncInit()
-
-/* MemviewSliceStruct.proto */
-struct __pyx_memoryview_obj;
-typedef struct {
- struct __pyx_memoryview_obj *memview;
- char *data;
- Py_ssize_t shape[8];
- Py_ssize_t strides[8];
- Py_ssize_t suboffsets[8];
-} __Pyx_memviewslice;
-#define __Pyx_MemoryView_Len(m) (m.shape[0])
-
-/* Atomics.proto */
-#include
-#ifndef CYTHON_ATOMICS
- #define CYTHON_ATOMICS 1
-#endif
-#define __pyx_atomic_int_type int
-#if CYTHON_ATOMICS && __GNUC__ >= 4 && (__GNUC_MINOR__ > 1 ||\
- (__GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL >= 2)) &&\
- !defined(__i386__)
- #define __pyx_atomic_incr_aligned(value, lock) __sync_fetch_and_add(value, 1)
- #define __pyx_atomic_decr_aligned(value, lock) __sync_fetch_and_sub(value, 1)
- #ifdef __PYX_DEBUG_ATOMICS
- #warning "Using GNU atomics"
- #endif
-#elif CYTHON_ATOMICS && defined(_MSC_VER) && 0
- #include
- #undef __pyx_atomic_int_type
- #define __pyx_atomic_int_type LONG
- #define __pyx_atomic_incr_aligned(value, lock) InterlockedIncrement(value)
- #define __pyx_atomic_decr_aligned(value, lock) InterlockedDecrement(value)
- #ifdef __PYX_DEBUG_ATOMICS
- #pragma message ("Using MSVC atomics")
- #endif
-#elif CYTHON_ATOMICS && (defined(__ICC) || defined(__INTEL_COMPILER)) && 0
- #define __pyx_atomic_incr_aligned(value, lock) _InterlockedIncrement(value)
- #define __pyx_atomic_decr_aligned(value, lock) _InterlockedDecrement(value)
- #ifdef __PYX_DEBUG_ATOMICS
- #warning "Using Intel atomics"
- #endif
-#else
- #undef CYTHON_ATOMICS
- #define CYTHON_ATOMICS 0
- #ifdef __PYX_DEBUG_ATOMICS
- #warning "Not using atomics"
- #endif
-#endif
-typedef volatile __pyx_atomic_int_type __pyx_atomic_int;
-#if CYTHON_ATOMICS
- #define __pyx_add_acquisition_count(memview)\
- __pyx_atomic_incr_aligned(__pyx_get_slice_count_pointer(memview), memview->lock)
- #define __pyx_sub_acquisition_count(memview)\
- __pyx_atomic_decr_aligned(__pyx_get_slice_count_pointer(memview), memview->lock)
-#else
- #define __pyx_add_acquisition_count(memview)\
- __pyx_add_acquisition_count_locked(__pyx_get_slice_count_pointer(memview), memview->lock)
- #define __pyx_sub_acquisition_count(memview)\
- __pyx_sub_acquisition_count_locked(__pyx_get_slice_count_pointer(memview), memview->lock)
-#endif
-
-/* ForceInitThreads.proto */
-#ifndef __PYX_FORCE_INIT_THREADS
- #define __PYX_FORCE_INIT_THREADS 0
-#endif
-
-/* BufferFormatStructs.proto */
-#define IS_UNSIGNED(type) (((type) -1) > 0)
-struct __Pyx_StructField_;
-#define __PYX_BUF_FLAGS_PACKED_STRUCT (1 << 0)
-typedef struct {
- const char* name;
- struct __Pyx_StructField_* fields;
- size_t size;
- size_t arraysize[8];
- int ndim;
- char typegroup;
- char is_unsigned;
- int flags;
-} __Pyx_TypeInfo;
-typedef struct __Pyx_StructField_ {
- __Pyx_TypeInfo* type;
- const char* name;
- size_t offset;
-} __Pyx_StructField;
-typedef struct {
- __Pyx_StructField* field;
- size_t parent_offset;
-} __Pyx_BufFmt_StackElem;
-typedef struct {
- __Pyx_StructField root;
- __Pyx_BufFmt_StackElem* head;
- size_t fmt_offset;
- size_t new_count, enc_count;
- size_t struct_alignment;
- int is_complex;
- char enc_type;
- char new_packmode;
- char enc_packmode;
- char is_valid_array;
-} __Pyx_BufFmt_Context;
-
-
-/* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":689
- * # in Cython to enable them only on the right systems.
- *
- * ctypedef npy_int8 int8_t # <<<<<<<<<<<<<<
- * ctypedef npy_int16 int16_t
- * ctypedef npy_int32 int32_t
- */
-typedef npy_int8 __pyx_t_5numpy_int8_t;
-
-/* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":690
- *
- * ctypedef npy_int8 int8_t
- * ctypedef npy_int16 int16_t # <<<<<<<<<<<<<<
- * ctypedef npy_int32 int32_t
- * ctypedef npy_int64 int64_t
- */
-typedef npy_int16 __pyx_t_5numpy_int16_t;
-
-/* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":691
- * ctypedef npy_int8 int8_t
- * ctypedef npy_int16 int16_t
- * ctypedef npy_int32 int32_t # <<<<<<<<<<<<<<
- * ctypedef npy_int64 int64_t
- * #ctypedef npy_int96 int96_t
- */
-typedef npy_int32 __pyx_t_5numpy_int32_t;
-
-/* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":692
- * ctypedef npy_int16 int16_t
- * ctypedef npy_int32 int32_t
- * ctypedef npy_int64 int64_t # <<<<<<<<<<<<<<
- * #ctypedef npy_int96 int96_t
- * #ctypedef npy_int128 int128_t
- */
-typedef npy_int64 __pyx_t_5numpy_int64_t;
-
-/* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":696
- * #ctypedef npy_int128 int128_t
- *
- * ctypedef npy_uint8 uint8_t # <<<<<<<<<<<<<<
- * ctypedef npy_uint16 uint16_t
- * ctypedef npy_uint32 uint32_t
- */
-typedef npy_uint8 __pyx_t_5numpy_uint8_t;
-
-/* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":697
- *
- * ctypedef npy_uint8 uint8_t
- * ctypedef npy_uint16 uint16_t # <<<<<<<<<<<<<<
- * ctypedef npy_uint32 uint32_t
- * ctypedef npy_uint64 uint64_t
- */
-typedef npy_uint16 __pyx_t_5numpy_uint16_t;
-
-/* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":698
- * ctypedef npy_uint8 uint8_t
- * ctypedef npy_uint16 uint16_t
- * ctypedef npy_uint32 uint32_t # <<<<<<<<<<<<<<
- * ctypedef npy_uint64 uint64_t
- * #ctypedef npy_uint96 uint96_t
- */
-typedef npy_uint32 __pyx_t_5numpy_uint32_t;
-
-/* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":699
- * ctypedef npy_uint16 uint16_t
- * ctypedef npy_uint32 uint32_t
- * ctypedef npy_uint64 uint64_t # <<<<<<<<<<<<<<
- * #ctypedef npy_uint96 uint96_t
- * #ctypedef npy_uint128 uint128_t
- */
-typedef npy_uint64 __pyx_t_5numpy_uint64_t;
-
-/* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":703
- * #ctypedef npy_uint128 uint128_t
- *
- * ctypedef npy_float32 float32_t # <<<<<<<<<<<<<<
- * ctypedef npy_float64 float64_t
- * #ctypedef npy_float80 float80_t
- */
-typedef npy_float32 __pyx_t_5numpy_float32_t;
-
-/* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":704
- *
- * ctypedef npy_float32 float32_t
- * ctypedef npy_float64 float64_t # <<<<<<<<<<<<<<
- * #ctypedef npy_float80 float80_t
- * #ctypedef npy_float128 float128_t
- */
-typedef npy_float64 __pyx_t_5numpy_float64_t;
-
-/* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":713
- * # The int types are mapped a bit surprising --
- * # numpy.int corresponds to 'l' and numpy.long to 'q'
- * ctypedef npy_long int_t # <<<<<<<<<<<<<<
- * ctypedef npy_longlong long_t
- * ctypedef npy_longlong longlong_t
- */
-typedef npy_long __pyx_t_5numpy_int_t;
-
-/* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":714
- * # numpy.int corresponds to 'l' and numpy.long to 'q'
- * ctypedef npy_long int_t
- * ctypedef npy_longlong long_t # <<<<<<<<<<<<<<
- * ctypedef npy_longlong longlong_t
- *
- */
-typedef npy_longlong __pyx_t_5numpy_long_t;
-
-/* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":715
- * ctypedef npy_long int_t
- * ctypedef npy_longlong long_t
- * ctypedef npy_longlong longlong_t # <<<<<<<<<<<<<<
- *
- * ctypedef npy_ulong uint_t
- */
-typedef npy_longlong __pyx_t_5numpy_longlong_t;
-
-/* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":717
- * ctypedef npy_longlong longlong_t
- *
- * ctypedef npy_ulong uint_t # <<<<<<<<<<<<<<
- * ctypedef npy_ulonglong ulong_t
- * ctypedef npy_ulonglong ulonglong_t
- */
-typedef npy_ulong __pyx_t_5numpy_uint_t;
-
-/* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":718
- *
- * ctypedef npy_ulong uint_t
- * ctypedef npy_ulonglong ulong_t # <<<<<<<<<<<<<<
- * ctypedef npy_ulonglong ulonglong_t
- *
- */
-typedef npy_ulonglong __pyx_t_5numpy_ulong_t;
-
-/* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":719
- * ctypedef npy_ulong uint_t
- * ctypedef npy_ulonglong ulong_t
- * ctypedef npy_ulonglong ulonglong_t # <<<<<<<<<<<<<<
- *
- * ctypedef npy_intp intp_t
- */
-typedef npy_ulonglong __pyx_t_5numpy_ulonglong_t;
-
-/* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":721
- * ctypedef npy_ulonglong ulonglong_t
- *
- * ctypedef npy_intp intp_t # <<<<<<<<<<<<<<
- * ctypedef npy_uintp uintp_t
- *
- */
-typedef npy_intp __pyx_t_5numpy_intp_t;
-
-/* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":722
- *
- * ctypedef npy_intp intp_t
- * ctypedef npy_uintp uintp_t # <<<<<<<<<<<<<<
- *
- * ctypedef npy_double float_t
- */
-typedef npy_uintp __pyx_t_5numpy_uintp_t;
-
-/* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":724
- * ctypedef npy_uintp uintp_t
- *
- * ctypedef npy_double float_t # <<<<<<<<<<<<<<
- * ctypedef npy_double double_t
- * ctypedef npy_longdouble longdouble_t
- */
-typedef npy_double __pyx_t_5numpy_float_t;
-
-/* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":725
- *
- * ctypedef npy_double float_t
- * ctypedef npy_double double_t # <<<<<<<<<<<<<<
- * ctypedef npy_longdouble longdouble_t
- *
- */
-typedef npy_double __pyx_t_5numpy_double_t;
-
-/* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":726
- * ctypedef npy_double float_t
- * ctypedef npy_double double_t
- * ctypedef npy_longdouble longdouble_t # <<<<<<<<<<<<<<
- *
- * ctypedef npy_cfloat cfloat_t
- */
-typedef npy_longdouble __pyx_t_5numpy_longdouble_t;
-/* Declarations.proto */
-#if CYTHON_CCOMPLEX
- #ifdef __cplusplus
- typedef ::std::complex< float > __pyx_t_float_complex;
- #else
- typedef float _Complex __pyx_t_float_complex;
- #endif
-#else
- typedef struct { float real, imag; } __pyx_t_float_complex;
-#endif
-static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float, float);
-
-/* Declarations.proto */
-#if CYTHON_CCOMPLEX
- #ifdef __cplusplus
- typedef ::std::complex< double > __pyx_t_double_complex;
- #else
- typedef double _Complex __pyx_t_double_complex;
- #endif
-#else
- typedef struct { double real, imag; } __pyx_t_double_complex;
-#endif
-static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double, double);
-
-
-/*--- Type declarations ---*/
-struct __pyx_array_obj;
-struct __pyx_MemviewEnum_obj;
-struct __pyx_memoryview_obj;
-struct __pyx_memoryviewslice_obj;
-
-/* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":728
- * ctypedef npy_longdouble longdouble_t
- *
- * ctypedef npy_cfloat cfloat_t # <<<<<<<<<<<<<<
- * ctypedef npy_cdouble cdouble_t
- * ctypedef npy_clongdouble clongdouble_t
- */
-typedef npy_cfloat __pyx_t_5numpy_cfloat_t;
-
-/* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":729
- *
- * ctypedef npy_cfloat cfloat_t
- * ctypedef npy_cdouble cdouble_t # <<<<<<<<<<<<<<
- * ctypedef npy_clongdouble clongdouble_t
- *
- */
-typedef npy_cdouble __pyx_t_5numpy_cdouble_t;
-
-/* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":730
- * ctypedef npy_cfloat cfloat_t
- * ctypedef npy_cdouble cdouble_t
- * ctypedef npy_clongdouble clongdouble_t # <<<<<<<<<<<<<<
- *
- * ctypedef npy_cdouble complex_t
- */
-typedef npy_clongdouble __pyx_t_5numpy_clongdouble_t;
-
-/* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":732
- * ctypedef npy_clongdouble clongdouble_t
- *
- * ctypedef npy_cdouble complex_t # <<<<<<<<<<<<<<
- *
- * cdef inline object PyArray_MultiIterNew1(a):
- */
-typedef npy_cdouble __pyx_t_5numpy_complex_t;
-struct __pyx_opt_args_3TTS_3tts_6layers_8glow_tts_15monotonic_align_4core_maximum_path_c;
-
-/* "TTS/tts/layers/glow_tts/monotonic_align/core.pyx":42
- * @cython.boundscheck(False)
- * @cython.wraparound(False)
- * cpdef void maximum_path_c(int[:,:,::1] paths, float[:,:,::1] values, int[::1] t_xs, int[::1] t_ys, float max_neg_val=-1e9) nogil: # <<<<<<<<<<<<<<
- * cdef int b = values.shape[0]
- *
- */
-struct __pyx_opt_args_3TTS_3tts_6layers_8glow_tts_15monotonic_align_4core_maximum_path_c {
- int __pyx_n;
- float max_neg_val;
-};
-
-/* "View.MemoryView":105
- *
- * @cname("__pyx_array")
- * cdef class array: # <<<<<<<<<<<<<<
- *
- * cdef:
- */
-struct __pyx_array_obj {
- PyObject_HEAD
- struct __pyx_vtabstruct_array *__pyx_vtab;
- char *data;
- Py_ssize_t len;
- char *format;
- int ndim;
- Py_ssize_t *_shape;
- Py_ssize_t *_strides;
- Py_ssize_t itemsize;
- PyObject *mode;
- PyObject *_format;
- void (*callback_free_data)(void *);
- int free_data;
- int dtype_is_object;
-};
-
-
-/* "View.MemoryView":279
- *
- * @cname('__pyx_MemviewEnum')
- * cdef class Enum(object): # <<<<<<<<<<<<<<
- * cdef object name
- * def __init__(self, name):
- */
-struct __pyx_MemviewEnum_obj {
- PyObject_HEAD
- PyObject *name;
-};
-
-
-/* "View.MemoryView":330
- *
- * @cname('__pyx_memoryview')
- * cdef class memoryview(object): # <<<<<<<<<<<<<<
- *
- * cdef object obj
- */
-struct __pyx_memoryview_obj {
- PyObject_HEAD
- struct __pyx_vtabstruct_memoryview *__pyx_vtab;
- PyObject *obj;
- PyObject *_size;
- PyObject *_array_interface;
- PyThread_type_lock lock;
- __pyx_atomic_int acquisition_count[2];
- __pyx_atomic_int *acquisition_count_aligned_p;
- Py_buffer view;
- int flags;
- int dtype_is_object;
- __Pyx_TypeInfo *typeinfo;
-};
-
-
-/* "View.MemoryView":965
- *
- * @cname('__pyx_memoryviewslice')
- * cdef class _memoryviewslice(memoryview): # <<<<<<<<<<<<<<
- * "Internal class for passing memoryview slices to Python"
- *
- */
-struct __pyx_memoryviewslice_obj {
- struct __pyx_memoryview_obj __pyx_base;
- __Pyx_memviewslice from_slice;
- PyObject *from_object;
- PyObject *(*to_object_func)(char *);
- int (*to_dtype_func)(char *, PyObject *);
-};
-
-
-
-/* "View.MemoryView":105
- *
- * @cname("__pyx_array")
- * cdef class array: # <<<<<<<<<<<<<<
- *
- * cdef:
- */
-
-struct __pyx_vtabstruct_array {
- PyObject *(*get_memview)(struct __pyx_array_obj *);
-};
-static struct __pyx_vtabstruct_array *__pyx_vtabptr_array;
-
-
-/* "View.MemoryView":330
- *
- * @cname('__pyx_memoryview')
- * cdef class memoryview(object): # <<<<<<<<<<<<<<
- *
- * cdef object obj
- */
-
-struct __pyx_vtabstruct_memoryview {
- char *(*get_item_pointer)(struct __pyx_memoryview_obj *, PyObject *);
- PyObject *(*is_slice)(struct __pyx_memoryview_obj *, PyObject *);
- PyObject *(*setitem_slice_assignment)(struct __pyx_memoryview_obj *, PyObject *, PyObject *);
- PyObject *(*setitem_slice_assign_scalar)(struct __pyx_memoryview_obj *, struct __pyx_memoryview_obj *, PyObject *);
- PyObject *(*setitem_indexed)(struct __pyx_memoryview_obj *, PyObject *, PyObject *);
- PyObject *(*convert_item_to_object)(struct __pyx_memoryview_obj *, char *);
- PyObject *(*assign_item_from_object)(struct __pyx_memoryview_obj *, char *, PyObject *);
-};
-static struct __pyx_vtabstruct_memoryview *__pyx_vtabptr_memoryview;
-
-
-/* "View.MemoryView":965
- *
- * @cname('__pyx_memoryviewslice')
- * cdef class _memoryviewslice(memoryview): # <<<<<<<<<<<<<<
- * "Internal class for passing memoryview slices to Python"
- *
- */
-
-struct __pyx_vtabstruct__memoryviewslice {
- struct __pyx_vtabstruct_memoryview __pyx_base;
-};
-static struct __pyx_vtabstruct__memoryviewslice *__pyx_vtabptr__memoryviewslice;
-
-/* --- Runtime support code (head) --- */
-/* Refnanny.proto */
-#ifndef CYTHON_REFNANNY
- #define CYTHON_REFNANNY 0
-#endif
-#if CYTHON_REFNANNY
- typedef struct {
- void (*INCREF)(void*, PyObject*, int);
- void (*DECREF)(void*, PyObject*, int);
- void (*GOTREF)(void*, PyObject*, int);
- void (*GIVEREF)(void*, PyObject*, int);
- void* (*SetupContext)(const char*, int, const char*);
- void (*FinishContext)(void**);
- } __Pyx_RefNannyAPIStruct;
- static __Pyx_RefNannyAPIStruct *__Pyx_RefNanny = NULL;
- static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname);
- #define __Pyx_RefNannyDeclarations void *__pyx_refnanny = NULL;
-#ifdef WITH_THREAD
- #define __Pyx_RefNannySetupContext(name, acquire_gil)\
- if (acquire_gil) {\
- PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\
- __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__);\
- PyGILState_Release(__pyx_gilstate_save);\
- } else {\
- __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__);\
- }
-#else
- #define __Pyx_RefNannySetupContext(name, acquire_gil)\
- __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__)
-#endif
- #define __Pyx_RefNannyFinishContext()\
- __Pyx_RefNanny->FinishContext(&__pyx_refnanny)
- #define __Pyx_INCREF(r) __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
- #define __Pyx_DECREF(r) __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
- #define __Pyx_GOTREF(r) __Pyx_RefNanny->GOTREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
- #define __Pyx_GIVEREF(r) __Pyx_RefNanny->GIVEREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
- #define __Pyx_XINCREF(r) do { if((r) != NULL) {__Pyx_INCREF(r); }} while(0)
- #define __Pyx_XDECREF(r) do { if((r) != NULL) {__Pyx_DECREF(r); }} while(0)
- #define __Pyx_XGOTREF(r) do { if((r) != NULL) {__Pyx_GOTREF(r); }} while(0)
- #define __Pyx_XGIVEREF(r) do { if((r) != NULL) {__Pyx_GIVEREF(r);}} while(0)
-#else
- #define __Pyx_RefNannyDeclarations
- #define __Pyx_RefNannySetupContext(name, acquire_gil)
- #define __Pyx_RefNannyFinishContext()
- #define __Pyx_INCREF(r) Py_INCREF(r)
- #define __Pyx_DECREF(r) Py_DECREF(r)
- #define __Pyx_GOTREF(r)
- #define __Pyx_GIVEREF(r)
- #define __Pyx_XINCREF(r) Py_XINCREF(r)
- #define __Pyx_XDECREF(r) Py_XDECREF(r)
- #define __Pyx_XGOTREF(r)
- #define __Pyx_XGIVEREF(r)
-#endif
-#define __Pyx_XDECREF_SET(r, v) do {\
- PyObject *tmp = (PyObject *) r;\
- r = v; __Pyx_XDECREF(tmp);\
- } while (0)
-#define __Pyx_DECREF_SET(r, v) do {\
- PyObject *tmp = (PyObject *) r;\
- r = v; __Pyx_DECREF(tmp);\
- } while (0)
-#define __Pyx_CLEAR(r) do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0)
-#define __Pyx_XCLEAR(r) do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0)
-
-/* PyObjectGetAttrStr.proto */
-#if CYTHON_USE_TYPE_SLOTS
-static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name);
-#else
-#define __Pyx_PyObject_GetAttrStr(o,n) PyObject_GetAttr(o,n)
-#endif
-
-/* GetBuiltinName.proto */
-static PyObject *__Pyx_GetBuiltinName(PyObject *name);
-
-/* MemviewSliceInit.proto */
-#define __Pyx_BUF_MAX_NDIMS %(BUF_MAX_NDIMS)d
-#define __Pyx_MEMVIEW_DIRECT 1
-#define __Pyx_MEMVIEW_PTR 2
-#define __Pyx_MEMVIEW_FULL 4
-#define __Pyx_MEMVIEW_CONTIG 8
-#define __Pyx_MEMVIEW_STRIDED 16
-#define __Pyx_MEMVIEW_FOLLOW 32
-#define __Pyx_IS_C_CONTIG 1
-#define __Pyx_IS_F_CONTIG 2
-static int __Pyx_init_memviewslice(
- struct __pyx_memoryview_obj *memview,
- int ndim,
- __Pyx_memviewslice *memviewslice,
- int memview_is_new_reference);
-static CYTHON_INLINE int __pyx_add_acquisition_count_locked(
- __pyx_atomic_int *acquisition_count, PyThread_type_lock lock);
-static CYTHON_INLINE int __pyx_sub_acquisition_count_locked(
- __pyx_atomic_int *acquisition_count, PyThread_type_lock lock);
-#define __pyx_get_slice_count_pointer(memview) (memview->acquisition_count_aligned_p)
-#define __pyx_get_slice_count(memview) (*__pyx_get_slice_count_pointer(memview))
-#define __PYX_INC_MEMVIEW(slice, have_gil) __Pyx_INC_MEMVIEW(slice, have_gil, __LINE__)
-#define __PYX_XDEC_MEMVIEW(slice, have_gil) __Pyx_XDEC_MEMVIEW(slice, have_gil, __LINE__)
-static CYTHON_INLINE void __Pyx_INC_MEMVIEW(__Pyx_memviewslice *, int, int);
-static CYTHON_INLINE void __Pyx_XDEC_MEMVIEW(__Pyx_memviewslice *, int, int);
-
-/* RaiseArgTupleInvalid.proto */
-static void __Pyx_RaiseArgtupleInvalid(const char* func_name, int exact,
- Py_ssize_t num_min, Py_ssize_t num_max, Py_ssize_t num_found);
-
-/* RaiseDoubleKeywords.proto */
-static void __Pyx_RaiseDoubleKeywordsError(const char* func_name, PyObject* kw_name);
-
-/* ParseKeywords.proto */
-static int __Pyx_ParseOptionalKeywords(PyObject *kwds, PyObject **argnames[],\
- PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args,\
- const char* function_name);
-
-/* None.proto */
-static CYTHON_INLINE void __Pyx_RaiseUnboundLocalError(const char *varname);
-
-/* GetTopmostException.proto */
-#if CYTHON_USE_EXC_INFO_STACK
-static _PyErr_StackItem * __Pyx_PyErr_GetTopmostException(PyThreadState *tstate);
-#endif
-
-/* PyThreadStateGet.proto */
-#if CYTHON_FAST_THREAD_STATE
-#define __Pyx_PyThreadState_declare PyThreadState *__pyx_tstate;
-#define __Pyx_PyThreadState_assign __pyx_tstate = __Pyx_PyThreadState_Current;
-#define __Pyx_PyErr_Occurred() __pyx_tstate->curexc_type
-#else
-#define __Pyx_PyThreadState_declare
-#define __Pyx_PyThreadState_assign
-#define __Pyx_PyErr_Occurred() PyErr_Occurred()
-#endif
-
-/* SaveResetException.proto */
-#if CYTHON_FAST_THREAD_STATE
-#define __Pyx_ExceptionSave(type, value, tb) __Pyx__ExceptionSave(__pyx_tstate, type, value, tb)
-static CYTHON_INLINE void __Pyx__ExceptionSave(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb);
-#define __Pyx_ExceptionReset(type, value, tb) __Pyx__ExceptionReset(__pyx_tstate, type, value, tb)
-static CYTHON_INLINE void __Pyx__ExceptionReset(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb);
-#else
-#define __Pyx_ExceptionSave(type, value, tb) PyErr_GetExcInfo(type, value, tb)
-#define __Pyx_ExceptionReset(type, value, tb) PyErr_SetExcInfo(type, value, tb)
-#endif
-
-/* PyErrExceptionMatches.proto */
-#if CYTHON_FAST_THREAD_STATE
-#define __Pyx_PyErr_ExceptionMatches(err) __Pyx_PyErr_ExceptionMatchesInState(__pyx_tstate, err)
-static CYTHON_INLINE int __Pyx_PyErr_ExceptionMatchesInState(PyThreadState* tstate, PyObject* err);
-#else
-#define __Pyx_PyErr_ExceptionMatches(err) PyErr_ExceptionMatches(err)
-#endif
-
-/* GetException.proto */
-#if CYTHON_FAST_THREAD_STATE
-#define __Pyx_GetException(type, value, tb) __Pyx__GetException(__pyx_tstate, type, value, tb)
-static int __Pyx__GetException(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb);
-#else
-static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb);
-#endif
-
-/* PyObjectCall.proto */
-#if CYTHON_COMPILING_IN_CPYTHON
-static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw);
-#else
-#define __Pyx_PyObject_Call(func, arg, kw) PyObject_Call(func, arg, kw)
-#endif
-
-/* PyErrFetchRestore.proto */
-#if CYTHON_FAST_THREAD_STATE
-#define __Pyx_PyErr_Clear() __Pyx_ErrRestore(NULL, NULL, NULL)
-#define __Pyx_ErrRestoreWithState(type, value, tb) __Pyx_ErrRestoreInState(PyThreadState_GET(), type, value, tb)
-#define __Pyx_ErrFetchWithState(type, value, tb) __Pyx_ErrFetchInState(PyThreadState_GET(), type, value, tb)
-#define __Pyx_ErrRestore(type, value, tb) __Pyx_ErrRestoreInState(__pyx_tstate, type, value, tb)
-#define __Pyx_ErrFetch(type, value, tb) __Pyx_ErrFetchInState(__pyx_tstate, type, value, tb)
-static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb);
-static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb);
-#if CYTHON_COMPILING_IN_CPYTHON
-#define __Pyx_PyErr_SetNone(exc) (Py_INCREF(exc), __Pyx_ErrRestore((exc), NULL, NULL))
-#else
-#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc)
-#endif
-#else
-#define __Pyx_PyErr_Clear() PyErr_Clear()
-#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc)
-#define __Pyx_ErrRestoreWithState(type, value, tb) PyErr_Restore(type, value, tb)
-#define __Pyx_ErrFetchWithState(type, value, tb) PyErr_Fetch(type, value, tb)
-#define __Pyx_ErrRestoreInState(tstate, type, value, tb) PyErr_Restore(type, value, tb)
-#define __Pyx_ErrFetchInState(tstate, type, value, tb) PyErr_Fetch(type, value, tb)
-#define __Pyx_ErrRestore(type, value, tb) PyErr_Restore(type, value, tb)
-#define __Pyx_ErrFetch(type, value, tb) PyErr_Fetch(type, value, tb)
-#endif
-
-/* RaiseException.proto */
-static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause);
-
-/* ArgTypeTest.proto */
-#define __Pyx_ArgTypeTest(obj, type, none_allowed, name, exact)\
- ((likely((Py_TYPE(obj) == type) | (none_allowed && (obj == Py_None)))) ? 1 :\
- __Pyx__ArgTypeTest(obj, type, name, exact))
-static int __Pyx__ArgTypeTest(PyObject *obj, PyTypeObject *type, const char *name, int exact);
-
-/* PyCFunctionFastCall.proto */
-#if CYTHON_FAST_PYCCALL
-static CYTHON_INLINE PyObject *__Pyx_PyCFunction_FastCall(PyObject *func, PyObject **args, Py_ssize_t nargs);
-#else
-#define __Pyx_PyCFunction_FastCall(func, args, nargs) (assert(0), NULL)
-#endif
-
-/* PyFunctionFastCall.proto */
-#if CYTHON_FAST_PYCALL
-#define __Pyx_PyFunction_FastCall(func, args, nargs)\
- __Pyx_PyFunction_FastCallDict((func), (args), (nargs), NULL)
-#if 1 || PY_VERSION_HEX < 0x030600B1
-static PyObject *__Pyx_PyFunction_FastCallDict(PyObject *func, PyObject **args, Py_ssize_t nargs, PyObject *kwargs);
-#else
-#define __Pyx_PyFunction_FastCallDict(func, args, nargs, kwargs) _PyFunction_FastCallDict(func, args, nargs, kwargs)
-#endif
-#define __Pyx_BUILD_ASSERT_EXPR(cond)\
- (sizeof(char [1 - 2*!(cond)]) - 1)
-#ifndef Py_MEMBER_SIZE
-#define Py_MEMBER_SIZE(type, member) sizeof(((type *)0)->member)
-#endif
- static size_t __pyx_pyframe_localsplus_offset = 0;
- #include "frameobject.h"
- #define __Pxy_PyFrame_Initialize_Offsets()\
- ((void)__Pyx_BUILD_ASSERT_EXPR(sizeof(PyFrameObject) == offsetof(PyFrameObject, f_localsplus) + Py_MEMBER_SIZE(PyFrameObject, f_localsplus)),\
- (void)(__pyx_pyframe_localsplus_offset = ((size_t)PyFrame_Type.tp_basicsize) - Py_MEMBER_SIZE(PyFrameObject, f_localsplus)))
- #define __Pyx_PyFrame_GetLocalsplus(frame)\
- (assert(__pyx_pyframe_localsplus_offset), (PyObject **)(((char *)(frame)) + __pyx_pyframe_localsplus_offset))
-#endif
-
-/* PyObjectCall2Args.proto */
-static CYTHON_UNUSED PyObject* __Pyx_PyObject_Call2Args(PyObject* function, PyObject* arg1, PyObject* arg2);
-
-/* PyObjectCallMethO.proto */
-#if CYTHON_COMPILING_IN_CPYTHON
-static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg);
-#endif
-
-/* PyObjectCallOneArg.proto */
-static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg);
-
-/* IncludeStringH.proto */
-#include
-
-/* BytesEquals.proto */
-static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals);
-
-/* UnicodeEquals.proto */
-static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals);
-
-/* StrEquals.proto */
-#if PY_MAJOR_VERSION >= 3
-#define __Pyx_PyString_Equals __Pyx_PyUnicode_Equals
-#else
-#define __Pyx_PyString_Equals __Pyx_PyBytes_Equals
-#endif
-
-/* None.proto */
-static CYTHON_INLINE Py_ssize_t __Pyx_div_Py_ssize_t(Py_ssize_t, Py_ssize_t);
-
-/* UnaryNegOverflows.proto */
-#define UNARY_NEG_WOULD_OVERFLOW(x)\
- (((x) < 0) & ((unsigned long)(x) == 0-(unsigned long)(x)))
-
-static CYTHON_UNUSED int __pyx_array_getbuffer(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /*proto*/
-static PyObject *__pyx_array_get_memview(struct __pyx_array_obj *); /*proto*/
-/* GetAttr.proto */
-static CYTHON_INLINE PyObject *__Pyx_GetAttr(PyObject *, PyObject *);
-
-/* GetItemInt.proto */
-#define __Pyx_GetItemInt(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\
- (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\
- __Pyx_GetItemInt_Fast(o, (Py_ssize_t)i, is_list, wraparound, boundscheck) :\
- (is_list ? (PyErr_SetString(PyExc_IndexError, "list index out of range"), (PyObject*)NULL) :\
- __Pyx_GetItemInt_Generic(o, to_py_func(i))))
-#define __Pyx_GetItemInt_List(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\
- (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\
- __Pyx_GetItemInt_List_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) :\
- (PyErr_SetString(PyExc_IndexError, "list index out of range"), (PyObject*)NULL))
-static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, Py_ssize_t i,
- int wraparound, int boundscheck);
-#define __Pyx_GetItemInt_Tuple(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\
- (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\
- __Pyx_GetItemInt_Tuple_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) :\
- (PyErr_SetString(PyExc_IndexError, "tuple index out of range"), (PyObject*)NULL))
-static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, Py_ssize_t i,
- int wraparound, int boundscheck);
-static PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j);
-static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i,
- int is_list, int wraparound, int boundscheck);
-
-/* ObjectGetItem.proto */
-#if CYTHON_USE_TYPE_SLOTS
-static CYTHON_INLINE PyObject *__Pyx_PyObject_GetItem(PyObject *obj, PyObject* key);
-#else
-#define __Pyx_PyObject_GetItem(obj, key) PyObject_GetItem(obj, key)
-#endif
-
-/* decode_c_string_utf16.proto */
-static CYTHON_INLINE PyObject *__Pyx_PyUnicode_DecodeUTF16(const char *s, Py_ssize_t size, const char *errors) {
- int byteorder = 0;
- return PyUnicode_DecodeUTF16(s, size, errors, &byteorder);
-}
-static CYTHON_INLINE PyObject *__Pyx_PyUnicode_DecodeUTF16LE(const char *s, Py_ssize_t size, const char *errors) {
- int byteorder = -1;
- return PyUnicode_DecodeUTF16(s, size, errors, &byteorder);
-}
-static CYTHON_INLINE PyObject *__Pyx_PyUnicode_DecodeUTF16BE(const char *s, Py_ssize_t size, const char *errors) {
- int byteorder = 1;
- return PyUnicode_DecodeUTF16(s, size, errors, &byteorder);
-}
-
-/* decode_c_string.proto */
-static CYTHON_INLINE PyObject* __Pyx_decode_c_string(
- const char* cstring, Py_ssize_t start, Py_ssize_t stop,
- const char* encoding, const char* errors,
- PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors));
-
-/* GetAttr3.proto */
-static CYTHON_INLINE PyObject *__Pyx_GetAttr3(PyObject *, PyObject *, PyObject *);
-
-/* PyDictVersioning.proto */
-#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_TYPE_SLOTS
-#define __PYX_DICT_VERSION_INIT ((PY_UINT64_T) -1)
-#define __PYX_GET_DICT_VERSION(dict) (((PyDictObject*)(dict))->ma_version_tag)
-#define __PYX_UPDATE_DICT_CACHE(dict, value, cache_var, version_var)\
- (version_var) = __PYX_GET_DICT_VERSION(dict);\
- (cache_var) = (value);
-#define __PYX_PY_DICT_LOOKUP_IF_MODIFIED(VAR, DICT, LOOKUP) {\
- static PY_UINT64_T __pyx_dict_version = 0;\
- static PyObject *__pyx_dict_cached_value = NULL;\
- if (likely(__PYX_GET_DICT_VERSION(DICT) == __pyx_dict_version)) {\
- (VAR) = __pyx_dict_cached_value;\
- } else {\
- (VAR) = __pyx_dict_cached_value = (LOOKUP);\
- __pyx_dict_version = __PYX_GET_DICT_VERSION(DICT);\
- }\
-}
-static CYTHON_INLINE PY_UINT64_T __Pyx_get_tp_dict_version(PyObject *obj);
-static CYTHON_INLINE PY_UINT64_T __Pyx_get_object_dict_version(PyObject *obj);
-static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj, PY_UINT64_T tp_dict_version, PY_UINT64_T obj_dict_version);
-#else
-#define __PYX_GET_DICT_VERSION(dict) (0)
-#define __PYX_UPDATE_DICT_CACHE(dict, value, cache_var, version_var)
-#define __PYX_PY_DICT_LOOKUP_IF_MODIFIED(VAR, DICT, LOOKUP) (VAR) = (LOOKUP);
-#endif
-
-/* GetModuleGlobalName.proto */
-#if CYTHON_USE_DICT_VERSIONS
-#define __Pyx_GetModuleGlobalName(var, name) {\
- static PY_UINT64_T __pyx_dict_version = 0;\
- static PyObject *__pyx_dict_cached_value = NULL;\
- (var) = (likely(__pyx_dict_version == __PYX_GET_DICT_VERSION(__pyx_d))) ?\
- (likely(__pyx_dict_cached_value) ? __Pyx_NewRef(__pyx_dict_cached_value) : __Pyx_GetBuiltinName(name)) :\
- __Pyx__GetModuleGlobalName(name, &__pyx_dict_version, &__pyx_dict_cached_value);\
-}
-#define __Pyx_GetModuleGlobalNameUncached(var, name) {\
- PY_UINT64_T __pyx_dict_version;\
- PyObject *__pyx_dict_cached_value;\
- (var) = __Pyx__GetModuleGlobalName(name, &__pyx_dict_version, &__pyx_dict_cached_value);\
-}
-static PyObject *__Pyx__GetModuleGlobalName(PyObject *name, PY_UINT64_T *dict_version, PyObject **dict_cached_value);
-#else
-#define __Pyx_GetModuleGlobalName(var, name) (var) = __Pyx__GetModuleGlobalName(name)
-#define __Pyx_GetModuleGlobalNameUncached(var, name) (var) = __Pyx__GetModuleGlobalName(name)
-static CYTHON_INLINE PyObject *__Pyx__GetModuleGlobalName(PyObject *name);
-#endif
-
-/* RaiseTooManyValuesToUnpack.proto */
-static CYTHON_INLINE void __Pyx_RaiseTooManyValuesError(Py_ssize_t expected);
-
-/* RaiseNeedMoreValuesToUnpack.proto */
-static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index);
-
-/* RaiseNoneIterError.proto */
-static CYTHON_INLINE void __Pyx_RaiseNoneNotIterableError(void);
-
-/* ExtTypeTest.proto */
-static CYTHON_INLINE int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type);
-
-/* SwapException.proto */
-#if CYTHON_FAST_THREAD_STATE
-#define __Pyx_ExceptionSwap(type, value, tb) __Pyx__ExceptionSwap(__pyx_tstate, type, value, tb)
-static CYTHON_INLINE void __Pyx__ExceptionSwap(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb);
-#else
-static CYTHON_INLINE void __Pyx_ExceptionSwap(PyObject **type, PyObject **value, PyObject **tb);
-#endif
-
-/* Import.proto */
-static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level);
-
-/* FastTypeChecks.proto */
-#if CYTHON_COMPILING_IN_CPYTHON
-#define __Pyx_TypeCheck(obj, type) __Pyx_IsSubtype(Py_TYPE(obj), (PyTypeObject *)type)
-static CYTHON_INLINE int __Pyx_IsSubtype(PyTypeObject *a, PyTypeObject *b);
-static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches(PyObject *err, PyObject *type);
-static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches2(PyObject *err, PyObject *type1, PyObject *type2);
-#else
-#define __Pyx_TypeCheck(obj, type) PyObject_TypeCheck(obj, (PyTypeObject *)type)
-#define __Pyx_PyErr_GivenExceptionMatches(err, type) PyErr_GivenExceptionMatches(err, type)
-#define __Pyx_PyErr_GivenExceptionMatches2(err, type1, type2) (PyErr_GivenExceptionMatches(err, type1) || PyErr_GivenExceptionMatches(err, type2))
-#endif
-#define __Pyx_PyException_Check(obj) __Pyx_TypeCheck(obj, PyExc_Exception)
-
-static CYTHON_UNUSED int __pyx_memoryview_getbuffer(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /*proto*/
-/* ListCompAppend.proto */
-#if CYTHON_USE_PYLIST_INTERNALS && CYTHON_ASSUME_SAFE_MACROS
-static CYTHON_INLINE int __Pyx_ListComp_Append(PyObject* list, PyObject* x) {
- PyListObject* L = (PyListObject*) list;
- Py_ssize_t len = Py_SIZE(list);
- if (likely(L->allocated > len)) {
- Py_INCREF(x);
- PyList_SET_ITEM(list, len, x);
- __Pyx_SET_SIZE(list, len + 1);
- return 0;
- }
- return PyList_Append(list, x);
-}
-#else
-#define __Pyx_ListComp_Append(L,x) PyList_Append(L,x)
-#endif
-
-/* PyIntBinop.proto */
-#if !CYTHON_COMPILING_IN_PYPY
-static PyObject* __Pyx_PyInt_AddObjC(PyObject *op1, PyObject *op2, long intval, int inplace, int zerodivision_check);
-#else
-#define __Pyx_PyInt_AddObjC(op1, op2, intval, inplace, zerodivision_check)\
- (inplace ? PyNumber_InPlaceAdd(op1, op2) : PyNumber_Add(op1, op2))
-#endif
-
-/* ListExtend.proto */
-static CYTHON_INLINE int __Pyx_PyList_Extend(PyObject* L, PyObject* v) {
-#if CYTHON_COMPILING_IN_CPYTHON
- PyObject* none = _PyList_Extend((PyListObject*)L, v);
- if (unlikely(!none))
- return -1;
- Py_DECREF(none);
- return 0;
-#else
- return PyList_SetSlice(L, PY_SSIZE_T_MAX, PY_SSIZE_T_MAX, v);
-#endif
-}
-
-/* ListAppend.proto */
-#if CYTHON_USE_PYLIST_INTERNALS && CYTHON_ASSUME_SAFE_MACROS
-static CYTHON_INLINE int __Pyx_PyList_Append(PyObject* list, PyObject* x) {
- PyListObject* L = (PyListObject*) list;
- Py_ssize_t len = Py_SIZE(list);
- if (likely(L->allocated > len) & likely(len > (L->allocated >> 1))) {
- Py_INCREF(x);
- PyList_SET_ITEM(list, len, x);
- __Pyx_SET_SIZE(list, len + 1);
- return 0;
- }
- return PyList_Append(list, x);
-}
-#else
-#define __Pyx_PyList_Append(L,x) PyList_Append(L,x)
-#endif
-
-/* None.proto */
-static CYTHON_INLINE long __Pyx_div_long(long, long);
-
-/* ImportFrom.proto */
-static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name);
-
-/* HasAttr.proto */
-static CYTHON_INLINE int __Pyx_HasAttr(PyObject *, PyObject *);
-
-/* PyObject_GenericGetAttrNoDict.proto */
-#if CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP && PY_VERSION_HEX < 0x03070000
-static CYTHON_INLINE PyObject* __Pyx_PyObject_GenericGetAttrNoDict(PyObject* obj, PyObject* attr_name);
-#else
-#define __Pyx_PyObject_GenericGetAttrNoDict PyObject_GenericGetAttr
-#endif
-
-/* PyObject_GenericGetAttr.proto */
-#if CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP && PY_VERSION_HEX < 0x03070000
-static PyObject* __Pyx_PyObject_GenericGetAttr(PyObject* obj, PyObject* attr_name);
-#else
-#define __Pyx_PyObject_GenericGetAttr PyObject_GenericGetAttr
-#endif
-
-/* SetVTable.proto */
-static int __Pyx_SetVtable(PyObject *dict, void *vtable);
-
-/* PyObjectGetAttrStrNoError.proto */
-static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStrNoError(PyObject* obj, PyObject* attr_name);
-
-/* SetupReduce.proto */
-static int __Pyx_setup_reduce(PyObject* type_obj);
-
-/* TypeImport.proto */
-#ifndef __PYX_HAVE_RT_ImportType_proto
-#define __PYX_HAVE_RT_ImportType_proto
-enum __Pyx_ImportType_CheckSize {
- __Pyx_ImportType_CheckSize_Error = 0,
- __Pyx_ImportType_CheckSize_Warn = 1,
- __Pyx_ImportType_CheckSize_Ignore = 2
-};
-static PyTypeObject *__Pyx_ImportType(PyObject* module, const char *module_name, const char *class_name, size_t size, enum __Pyx_ImportType_CheckSize check_size);
-#endif
-
-/* CLineInTraceback.proto */
-#ifdef CYTHON_CLINE_IN_TRACEBACK
-#define __Pyx_CLineForTraceback(tstate, c_line) (((CYTHON_CLINE_IN_TRACEBACK)) ? c_line : 0)
-#else
-static int __Pyx_CLineForTraceback(PyThreadState *tstate, int c_line);
-#endif
-
-/* CodeObjectCache.proto */
-typedef struct {
- PyCodeObject* code_object;
- int code_line;
-} __Pyx_CodeObjectCacheEntry;
-struct __Pyx_CodeObjectCache {
- int count;
- int max_count;
- __Pyx_CodeObjectCacheEntry* entries;
-};
-static struct __Pyx_CodeObjectCache __pyx_code_cache = {0,0,NULL};
-static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line);
-static PyCodeObject *__pyx_find_code_object(int code_line);
-static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object);
-
-/* AddTraceback.proto */
-static void __Pyx_AddTraceback(const char *funcname, int c_line,
- int py_line, const char *filename);
-
-#if PY_MAJOR_VERSION < 3
- static int __Pyx_GetBuffer(PyObject *obj, Py_buffer *view, int flags);
- static void __Pyx_ReleaseBuffer(Py_buffer *view);
-#else
- #define __Pyx_GetBuffer PyObject_GetBuffer
- #define __Pyx_ReleaseBuffer PyBuffer_Release
-#endif
-
-
-/* BufferStructDeclare.proto */
-typedef struct {
- Py_ssize_t shape, strides, suboffsets;
-} __Pyx_Buf_DimInfo;
-typedef struct {
- size_t refcount;
- Py_buffer pybuffer;
-} __Pyx_Buffer;
-typedef struct {
- __Pyx_Buffer *rcbuffer;
- char *data;
- __Pyx_Buf_DimInfo diminfo[8];
-} __Pyx_LocalBuf_ND;
-
-/* MemviewSliceIsContig.proto */
-static int __pyx_memviewslice_is_contig(const __Pyx_memviewslice mvs, char order, int ndim);
-
-/* OverlappingSlices.proto */
-static int __pyx_slices_overlap(__Pyx_memviewslice *slice1,
- __Pyx_memviewslice *slice2,
- int ndim, size_t itemsize);
-
-/* Capsule.proto */
-static CYTHON_INLINE PyObject *__pyx_capsule_create(void *p, const char *sig);
-
-/* IsLittleEndian.proto */
-static CYTHON_INLINE int __Pyx_Is_Little_Endian(void);
-
-/* BufferFormatCheck.proto */
-static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const char* ts);
-static void __Pyx_BufFmt_Init(__Pyx_BufFmt_Context* ctx,
- __Pyx_BufFmt_StackElem* stack,
- __Pyx_TypeInfo* type);
-
-/* TypeInfoCompare.proto */
-static int __pyx_typeinfo_cmp(__Pyx_TypeInfo *a, __Pyx_TypeInfo *b);
-
-/* MemviewSliceValidateAndInit.proto */
-static int __Pyx_ValidateAndInit_memviewslice(
- int *axes_specs,
- int c_or_f_flag,
- int buf_flags,
- int ndim,
- __Pyx_TypeInfo *dtype,
- __Pyx_BufFmt_StackElem stack[],
- __Pyx_memviewslice *memviewslice,
- PyObject *original_obj);
-
-/* ObjectToMemviewSlice.proto */
-static CYTHON_INLINE __Pyx_memviewslice __Pyx_PyObject_to_MemoryviewSlice_d_d_dc_int(PyObject *, int writable_flag);
-
-/* ObjectToMemviewSlice.proto */
-static CYTHON_INLINE __Pyx_memviewslice __Pyx_PyObject_to_MemoryviewSlice_d_d_dc_float(PyObject *, int writable_flag);
-
-/* ObjectToMemviewSlice.proto */
-static CYTHON_INLINE __Pyx_memviewslice __Pyx_PyObject_to_MemoryviewSlice_dc_int(PyObject *, int writable_flag);
-
-/* GCCDiagnostics.proto */
-#if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
-#define __Pyx_HAS_GCC_DIAGNOSTIC
-#endif
-
-/* RealImag.proto */
-#if CYTHON_CCOMPLEX
- #ifdef __cplusplus
- #define __Pyx_CREAL(z) ((z).real())
- #define __Pyx_CIMAG(z) ((z).imag())
- #else
- #define __Pyx_CREAL(z) (__real__(z))
- #define __Pyx_CIMAG(z) (__imag__(z))
- #endif
-#else
- #define __Pyx_CREAL(z) ((z).real)
- #define __Pyx_CIMAG(z) ((z).imag)
-#endif
-#if defined(__cplusplus) && CYTHON_CCOMPLEX\
- && (defined(_WIN32) || defined(__clang__) || (defined(__GNUC__) && (__GNUC__ >= 5 || __GNUC__ == 4 && __GNUC_MINOR__ >= 4 )) || __cplusplus >= 201103)
- #define __Pyx_SET_CREAL(z,x) ((z).real(x))
- #define __Pyx_SET_CIMAG(z,y) ((z).imag(y))
-#else
- #define __Pyx_SET_CREAL(z,x) __Pyx_CREAL(z) = (x)
- #define __Pyx_SET_CIMAG(z,y) __Pyx_CIMAG(z) = (y)
-#endif
-
-/* Arithmetic.proto */
-#if CYTHON_CCOMPLEX
- #define __Pyx_c_eq_float(a, b) ((a)==(b))
- #define __Pyx_c_sum_float(a, b) ((a)+(b))
- #define __Pyx_c_diff_float(a, b) ((a)-(b))
- #define __Pyx_c_prod_float(a, b) ((a)*(b))
- #define __Pyx_c_quot_float(a, b) ((a)/(b))
- #define __Pyx_c_neg_float(a) (-(a))
- #ifdef __cplusplus
- #define __Pyx_c_is_zero_float(z) ((z)==(float)0)
- #define __Pyx_c_conj_float(z) (::std::conj(z))
- #if 1
- #define __Pyx_c_abs_float(z) (::std::abs(z))
- #define __Pyx_c_pow_float(a, b) (::std::pow(a, b))
- #endif
- #else
- #define __Pyx_c_is_zero_float(z) ((z)==0)
- #define __Pyx_c_conj_float(z) (conjf(z))
- #if 1
- #define __Pyx_c_abs_float(z) (cabsf(z))
- #define __Pyx_c_pow_float(a, b) (cpowf(a, b))
- #endif
- #endif
-#else
- static CYTHON_INLINE int __Pyx_c_eq_float(__pyx_t_float_complex, __pyx_t_float_complex);
- static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_sum_float(__pyx_t_float_complex, __pyx_t_float_complex);
- static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_diff_float(__pyx_t_float_complex, __pyx_t_float_complex);
- static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_prod_float(__pyx_t_float_complex, __pyx_t_float_complex);
- static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_quot_float(__pyx_t_float_complex, __pyx_t_float_complex);
- static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_neg_float(__pyx_t_float_complex);
- static CYTHON_INLINE int __Pyx_c_is_zero_float(__pyx_t_float_complex);
- static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_conj_float(__pyx_t_float_complex);
- #if 1
- static CYTHON_INLINE float __Pyx_c_abs_float(__pyx_t_float_complex);
- static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_pow_float(__pyx_t_float_complex, __pyx_t_float_complex);
- #endif
-#endif
-
-/* Arithmetic.proto */
-#if CYTHON_CCOMPLEX
- #define __Pyx_c_eq_double(a, b) ((a)==(b))
- #define __Pyx_c_sum_double(a, b) ((a)+(b))
- #define __Pyx_c_diff_double(a, b) ((a)-(b))
- #define __Pyx_c_prod_double(a, b) ((a)*(b))
- #define __Pyx_c_quot_double(a, b) ((a)/(b))
- #define __Pyx_c_neg_double(a) (-(a))
- #ifdef __cplusplus
- #define __Pyx_c_is_zero_double(z) ((z)==(double)0)
- #define __Pyx_c_conj_double(z) (::std::conj(z))
- #if 1
- #define __Pyx_c_abs_double(z) (::std::abs(z))
- #define __Pyx_c_pow_double(a, b) (::std::pow(a, b))
- #endif
- #else
- #define __Pyx_c_is_zero_double(z) ((z)==0)
- #define __Pyx_c_conj_double(z) (conj(z))
- #if 1
- #define __Pyx_c_abs_double(z) (cabs(z))
- #define __Pyx_c_pow_double(a, b) (cpow(a, b))
- #endif
- #endif
-#else
- static CYTHON_INLINE int __Pyx_c_eq_double(__pyx_t_double_complex, __pyx_t_double_complex);
- static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_sum_double(__pyx_t_double_complex, __pyx_t_double_complex);
- static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_diff_double(__pyx_t_double_complex, __pyx_t_double_complex);
- static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_prod_double(__pyx_t_double_complex, __pyx_t_double_complex);
- static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_quot_double(__pyx_t_double_complex, __pyx_t_double_complex);
- static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_neg_double(__pyx_t_double_complex);
- static CYTHON_INLINE int __Pyx_c_is_zero_double(__pyx_t_double_complex);
- static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_conj_double(__pyx_t_double_complex);
- #if 1
- static CYTHON_INLINE double __Pyx_c_abs_double(__pyx_t_double_complex);
- static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_pow_double(__pyx_t_double_complex, __pyx_t_double_complex);
- #endif
-#endif
-
-/* MemviewSliceCopyTemplate.proto */
-static __Pyx_memviewslice
-__pyx_memoryview_copy_new_contig(const __Pyx_memviewslice *from_mvs,
- const char *mode, int ndim,
- size_t sizeof_dtype, int contig_flag,
- int dtype_is_object);
-
-/* CIntToPy.proto */
-static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value);
-
-/* CIntFromPy.proto */
-static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *);
-
-/* CIntToPy.proto */
-static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value);
-
-/* CIntFromPy.proto */
-static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *);
-
-/* CIntFromPy.proto */
-static CYTHON_INLINE char __Pyx_PyInt_As_char(PyObject *);
-
-/* CheckBinaryVersion.proto */
-static int __Pyx_check_binary_version(void);
-
-/* InitStrings.proto */
-static int __Pyx_InitStrings(__Pyx_StringTabEntry *t);
-
-static PyObject *__pyx_array_get_memview(struct __pyx_array_obj *__pyx_v_self); /* proto*/
-static char *__pyx_memoryview_get_item_pointer(struct __pyx_memoryview_obj *__pyx_v_self, PyObject *__pyx_v_index); /* proto*/
-static PyObject *__pyx_memoryview_is_slice(struct __pyx_memoryview_obj *__pyx_v_self, PyObject *__pyx_v_obj); /* proto*/
-static PyObject *__pyx_memoryview_setitem_slice_assignment(struct __pyx_memoryview_obj *__pyx_v_self, PyObject *__pyx_v_dst, PyObject *__pyx_v_src); /* proto*/
-static PyObject *__pyx_memoryview_setitem_slice_assign_scalar(struct __pyx_memoryview_obj *__pyx_v_self, struct __pyx_memoryview_obj *__pyx_v_dst, PyObject *__pyx_v_value); /* proto*/
-static PyObject *__pyx_memoryview_setitem_indexed(struct __pyx_memoryview_obj *__pyx_v_self, PyObject *__pyx_v_index, PyObject *__pyx_v_value); /* proto*/
-static PyObject *__pyx_memoryview_convert_item_to_object(struct __pyx_memoryview_obj *__pyx_v_self, char *__pyx_v_itemp); /* proto*/
-static PyObject *__pyx_memoryview_assign_item_from_object(struct __pyx_memoryview_obj *__pyx_v_self, char *__pyx_v_itemp, PyObject *__pyx_v_value); /* proto*/
-static PyObject *__pyx_memoryviewslice_convert_item_to_object(struct __pyx_memoryviewslice_obj *__pyx_v_self, char *__pyx_v_itemp); /* proto*/
-static PyObject *__pyx_memoryviewslice_assign_item_from_object(struct __pyx_memoryviewslice_obj *__pyx_v_self, char *__pyx_v_itemp, PyObject *__pyx_v_value); /* proto*/
-
-/* Module declarations from 'cython.view' */
-
-/* Module declarations from 'cython' */
-
-/* Module declarations from 'cpython.buffer' */
-
-/* Module declarations from 'libc.string' */
-
-/* Module declarations from 'libc.stdio' */
-
-/* Module declarations from '__builtin__' */
-
-/* Module declarations from 'cpython.type' */
-static PyTypeObject *__pyx_ptype_7cpython_4type_type = 0;
-
-/* Module declarations from 'cpython' */
-
-/* Module declarations from 'cpython.object' */
-
-/* Module declarations from 'cpython.ref' */
-
-/* Module declarations from 'cpython.mem' */
-
-/* Module declarations from 'numpy' */
-
-/* Module declarations from 'numpy' */
-static PyTypeObject *__pyx_ptype_5numpy_dtype = 0;
-static PyTypeObject *__pyx_ptype_5numpy_flatiter = 0;
-static PyTypeObject *__pyx_ptype_5numpy_broadcast = 0;
-static PyTypeObject *__pyx_ptype_5numpy_ndarray = 0;
-static PyTypeObject *__pyx_ptype_5numpy_ufunc = 0;
-
-/* Module declarations from 'TTS.tts.layers.glow_tts.monotonic_align.core' */
-static PyTypeObject *__pyx_array_type = 0;
-static PyTypeObject *__pyx_MemviewEnum_type = 0;
-static PyTypeObject *__pyx_memoryview_type = 0;
-static PyTypeObject *__pyx_memoryviewslice_type = 0;
-static PyObject *generic = 0;
-static PyObject *strided = 0;
-static PyObject *indirect = 0;
-static PyObject *contiguous = 0;
-static PyObject *indirect_contiguous = 0;
-static int __pyx_memoryview_thread_locks_used;
-static PyThread_type_lock __pyx_memoryview_thread_locks[8];
-static void __pyx_f_3TTS_3tts_6layers_8glow_tts_15monotonic_align_4core_maximum_path_each(__Pyx_memviewslice, __Pyx_memviewslice, int, int, float); /*proto*/
-static void __pyx_f_3TTS_3tts_6layers_8glow_tts_15monotonic_align_4core_maximum_path_c(__Pyx_memviewslice, __Pyx_memviewslice, __Pyx_memviewslice, __Pyx_memviewslice, int __pyx_skip_dispatch, struct __pyx_opt_args_3TTS_3tts_6layers_8glow_tts_15monotonic_align_4core_maximum_path_c *__pyx_optional_args); /*proto*/
-static struct __pyx_array_obj *__pyx_array_new(PyObject *, Py_ssize_t, char *, char *, char *); /*proto*/
-static void *__pyx_align_pointer(void *, size_t); /*proto*/
-static PyObject *__pyx_memoryview_new(PyObject *, int, int, __Pyx_TypeInfo *); /*proto*/
-static CYTHON_INLINE int __pyx_memoryview_check(PyObject *); /*proto*/
-static PyObject *_unellipsify(PyObject *, int); /*proto*/
-static PyObject *assert_direct_dimensions(Py_ssize_t *, int); /*proto*/
-static struct __pyx_memoryview_obj *__pyx_memview_slice(struct __pyx_memoryview_obj *, PyObject *); /*proto*/
-static int __pyx_memoryview_slice_memviewslice(__Pyx_memviewslice *, Py_ssize_t, Py_ssize_t, Py_ssize_t, int, int, int *, Py_ssize_t, Py_ssize_t, Py_ssize_t, int, int, int, int); /*proto*/
-static char *__pyx_pybuffer_index(Py_buffer *, char *, Py_ssize_t, Py_ssize_t); /*proto*/
-static int __pyx_memslice_transpose(__Pyx_memviewslice *); /*proto*/
-static PyObject *__pyx_memoryview_fromslice(__Pyx_memviewslice, int, PyObject *(*)(char *), int (*)(char *, PyObject *), int); /*proto*/
-static __Pyx_memviewslice *__pyx_memoryview_get_slice_from_memoryview(struct __pyx_memoryview_obj *, __Pyx_memviewslice *); /*proto*/
-static void __pyx_memoryview_slice_copy(struct __pyx_memoryview_obj *, __Pyx_memviewslice *); /*proto*/
-static PyObject *__pyx_memoryview_copy_object(struct __pyx_memoryview_obj *); /*proto*/
-static PyObject *__pyx_memoryview_copy_object_from_slice(struct __pyx_memoryview_obj *, __Pyx_memviewslice *); /*proto*/
-static Py_ssize_t abs_py_ssize_t(Py_ssize_t); /*proto*/
-static char __pyx_get_best_slice_order(__Pyx_memviewslice *, int); /*proto*/
-static void _copy_strided_to_strided(char *, Py_ssize_t *, char *, Py_ssize_t *, Py_ssize_t *, Py_ssize_t *, int, size_t); /*proto*/
-static void copy_strided_to_strided(__Pyx_memviewslice *, __Pyx_memviewslice *, int, size_t); /*proto*/
-static Py_ssize_t __pyx_memoryview_slice_get_size(__Pyx_memviewslice *, int); /*proto*/
-static Py_ssize_t __pyx_fill_contig_strides_array(Py_ssize_t *, Py_ssize_t *, Py_ssize_t, int, char); /*proto*/
-static void *__pyx_memoryview_copy_data_to_temp(__Pyx_memviewslice *, __Pyx_memviewslice *, char, int); /*proto*/
-static int __pyx_memoryview_err_extents(int, Py_ssize_t, Py_ssize_t); /*proto*/
-static int __pyx_memoryview_err_dim(PyObject *, char *, int); /*proto*/
-static int __pyx_memoryview_err(PyObject *, char *); /*proto*/
-static int __pyx_memoryview_copy_contents(__Pyx_memviewslice, __Pyx_memviewslice, int, int, int); /*proto*/
-static void __pyx_memoryview_broadcast_leading(__Pyx_memviewslice *, int, int); /*proto*/
-static void __pyx_memoryview_refcount_copying(__Pyx_memviewslice *, int, int, int); /*proto*/
-static void __pyx_memoryview_refcount_objects_in_slice_with_gil(char *, Py_ssize_t *, Py_ssize_t *, int, int); /*proto*/
-static void __pyx_memoryview_refcount_objects_in_slice(char *, Py_ssize_t *, Py_ssize_t *, int, int); /*proto*/
-static void __pyx_memoryview_slice_assign_scalar(__Pyx_memviewslice *, int, size_t, void *, int); /*proto*/
-static void __pyx_memoryview__slice_assign_scalar(char *, Py_ssize_t *, Py_ssize_t *, int, size_t, void *); /*proto*/
-static PyObject *__pyx_unpickle_Enum__set_state(struct __pyx_MemviewEnum_obj *, PyObject *); /*proto*/
-static __Pyx_TypeInfo __Pyx_TypeInfo_int = { "int", NULL, sizeof(int), { 0 }, 0, IS_UNSIGNED(int) ? 'U' : 'I', IS_UNSIGNED(int), 0 };
-static __Pyx_TypeInfo __Pyx_TypeInfo_float = { "float", NULL, sizeof(float), { 0 }, 0, 'R', 0, 0 };
-#define __Pyx_MODULE_NAME "TTS.tts.layers.glow_tts.monotonic_align.core"
-extern int __pyx_module_is_main_TTS__tts__layers__glow_tts__monotonic_align__core;
-int __pyx_module_is_main_TTS__tts__layers__glow_tts__monotonic_align__core = 0;
-
-/* Implementation of 'TTS.tts.layers.glow_tts.monotonic_align.core' */
-static PyObject *__pyx_builtin_range;
-static PyObject *__pyx_builtin_ImportError;
-static PyObject *__pyx_builtin_ValueError;
-static PyObject *__pyx_builtin_MemoryError;
-static PyObject *__pyx_builtin_enumerate;
-static PyObject *__pyx_builtin_TypeError;
-static PyObject *__pyx_builtin_Ellipsis;
-static PyObject *__pyx_builtin_id;
-static PyObject *__pyx_builtin_IndexError;
-static const char __pyx_k_O[] = "O";
-static const char __pyx_k_c[] = "c";
-static const char __pyx_k_id[] = "id";
-static const char __pyx_k_np[] = "np";
-static const char __pyx_k_new[] = "__new__";
-static const char __pyx_k_obj[] = "obj";
-static const char __pyx_k_base[] = "base";
-static const char __pyx_k_dict[] = "__dict__";
-static const char __pyx_k_main[] = "__main__";
-static const char __pyx_k_mode[] = "mode";
-static const char __pyx_k_name[] = "name";
-static const char __pyx_k_ndim[] = "ndim";
-static const char __pyx_k_pack[] = "pack";
-static const char __pyx_k_size[] = "size";
-static const char __pyx_k_step[] = "step";
-static const char __pyx_k_stop[] = "stop";
-static const char __pyx_k_t_xs[] = "t_xs";
-static const char __pyx_k_t_ys[] = "t_ys";
-static const char __pyx_k_test[] = "__test__";
-static const char __pyx_k_ASCII[] = "ASCII";
-static const char __pyx_k_class[] = "__class__";
-static const char __pyx_k_error[] = "error";
-static const char __pyx_k_flags[] = "flags";
-static const char __pyx_k_numpy[] = "numpy";
-static const char __pyx_k_paths[] = "paths";
-static const char __pyx_k_range[] = "range";
-static const char __pyx_k_shape[] = "shape";
-static const char __pyx_k_start[] = "start";
-static const char __pyx_k_encode[] = "encode";
-static const char __pyx_k_format[] = "format";
-static const char __pyx_k_import[] = "__import__";
-static const char __pyx_k_name_2[] = "__name__";
-static const char __pyx_k_pickle[] = "pickle";
-static const char __pyx_k_reduce[] = "__reduce__";
-static const char __pyx_k_struct[] = "struct";
-static const char __pyx_k_unpack[] = "unpack";
-static const char __pyx_k_update[] = "update";
-static const char __pyx_k_values[] = "values";
-static const char __pyx_k_fortran[] = "fortran";
-static const char __pyx_k_memview[] = "memview";
-static const char __pyx_k_Ellipsis[] = "Ellipsis";
-static const char __pyx_k_getstate[] = "__getstate__";
-static const char __pyx_k_itemsize[] = "itemsize";
-static const char __pyx_k_pyx_type[] = "__pyx_type";
-static const char __pyx_k_setstate[] = "__setstate__";
-static const char __pyx_k_TypeError[] = "TypeError";
-static const char __pyx_k_enumerate[] = "enumerate";
-static const char __pyx_k_pyx_state[] = "__pyx_state";
-static const char __pyx_k_reduce_ex[] = "__reduce_ex__";
-static const char __pyx_k_IndexError[] = "IndexError";
-static const char __pyx_k_ValueError[] = "ValueError";
-static const char __pyx_k_pyx_result[] = "__pyx_result";
-static const char __pyx_k_pyx_vtable[] = "__pyx_vtable__";
-static const char __pyx_k_ImportError[] = "ImportError";
-static const char __pyx_k_MemoryError[] = "MemoryError";
-static const char __pyx_k_PickleError[] = "PickleError";
-static const char __pyx_k_max_neg_val[] = "max_neg_val";
-static const char __pyx_k_pyx_checksum[] = "__pyx_checksum";
-static const char __pyx_k_stringsource[] = "stringsource";
-static const char __pyx_k_pyx_getbuffer[] = "__pyx_getbuffer";
-static const char __pyx_k_reduce_cython[] = "__reduce_cython__";
-static const char __pyx_k_View_MemoryView[] = "View.MemoryView";
-static const char __pyx_k_allocate_buffer[] = "allocate_buffer";
-static const char __pyx_k_dtype_is_object[] = "dtype_is_object";
-static const char __pyx_k_pyx_PickleError[] = "__pyx_PickleError";
-static const char __pyx_k_setstate_cython[] = "__setstate_cython__";
-static const char __pyx_k_pyx_unpickle_Enum[] = "__pyx_unpickle_Enum";
-static const char __pyx_k_cline_in_traceback[] = "cline_in_traceback";
-static const char __pyx_k_strided_and_direct[] = "";
-static const char __pyx_k_strided_and_indirect[] = "";
-static const char __pyx_k_contiguous_and_direct[] = "";
-static const char __pyx_k_MemoryView_of_r_object[] = "";
-static const char __pyx_k_MemoryView_of_r_at_0x_x[] = "";
-static const char __pyx_k_contiguous_and_indirect[] = "";
-static const char __pyx_k_Cannot_index_with_type_s[] = "Cannot index with type '%s'";
-static const char __pyx_k_Invalid_shape_in_axis_d_d[] = "Invalid shape in axis %d: %d.";
-static const char __pyx_k_itemsize_0_for_cython_array[] = "itemsize <= 0 for cython.array";
-static const char __pyx_k_unable_to_allocate_array_data[] = "unable to allocate array data.";
-static const char __pyx_k_strided_and_direct_or_indirect[] = "";
-static const char __pyx_k_numpy_core_multiarray_failed_to[] = "numpy.core.multiarray failed to import";
-static const char __pyx_k_Buffer_view_does_not_expose_stri[] = "Buffer view does not expose strides";
-static const char __pyx_k_Can_only_create_a_buffer_that_is[] = "Can only create a buffer that is contiguous in memory.";
-static const char __pyx_k_Cannot_assign_to_read_only_memor[] = "Cannot assign to read-only memoryview";
-static const char __pyx_k_Cannot_create_writable_memory_vi[] = "Cannot create writable memory view from read-only memoryview";
-static const char __pyx_k_Empty_shape_tuple_for_cython_arr[] = "Empty shape tuple for cython.array";
-static const char __pyx_k_Incompatible_checksums_s_vs_0xb0[] = "Incompatible checksums (%s vs 0xb068931 = (name))";
-static const char __pyx_k_Indirect_dimensions_not_supporte[] = "Indirect dimensions not supported";
-static const char __pyx_k_Invalid_mode_expected_c_or_fortr[] = "Invalid mode, expected 'c' or 'fortran', got %s";
-static const char __pyx_k_Out_of_bounds_on_buffer_access_a[] = "Out of bounds on buffer access (axis %d)";
-static const char __pyx_k_Unable_to_convert_item_to_object[] = "Unable to convert item to object";
-static const char __pyx_k_got_differing_extents_in_dimensi[] = "got differing extents in dimension %d (got %d and %d)";
-static const char __pyx_k_no_default___reduce___due_to_non[] = "no default __reduce__ due to non-trivial __cinit__";
-static const char __pyx_k_numpy_core_umath_failed_to_impor[] = "numpy.core.umath failed to import";
-static const char __pyx_k_unable_to_allocate_shape_and_str[] = "unable to allocate shape and strides.";
-static PyObject *__pyx_n_s_ASCII;
-static PyObject *__pyx_kp_s_Buffer_view_does_not_expose_stri;
-static PyObject *__pyx_kp_s_Can_only_create_a_buffer_that_is;
-static PyObject *__pyx_kp_s_Cannot_assign_to_read_only_memor;
-static PyObject *__pyx_kp_s_Cannot_create_writable_memory_vi;
-static PyObject *__pyx_kp_s_Cannot_index_with_type_s;
-static PyObject *__pyx_n_s_Ellipsis;
-static PyObject *__pyx_kp_s_Empty_shape_tuple_for_cython_arr;
-static PyObject *__pyx_n_s_ImportError;
-static PyObject *__pyx_kp_s_Incompatible_checksums_s_vs_0xb0;
-static PyObject *__pyx_n_s_IndexError;
-static PyObject *__pyx_kp_s_Indirect_dimensions_not_supporte;
-static PyObject *__pyx_kp_s_Invalid_mode_expected_c_or_fortr;
-static PyObject *__pyx_kp_s_Invalid_shape_in_axis_d_d;
-static PyObject *__pyx_n_s_MemoryError;
-static PyObject *__pyx_kp_s_MemoryView_of_r_at_0x_x;
-static PyObject *__pyx_kp_s_MemoryView_of_r_object;
-static PyObject *__pyx_n_b_O;
-static PyObject *__pyx_kp_s_Out_of_bounds_on_buffer_access_a;
-static PyObject *__pyx_n_s_PickleError;
-static PyObject *__pyx_n_s_TypeError;
-static PyObject *__pyx_kp_s_Unable_to_convert_item_to_object;
-static PyObject *__pyx_n_s_ValueError;
-static PyObject *__pyx_n_s_View_MemoryView;
-static PyObject *__pyx_n_s_allocate_buffer;
-static PyObject *__pyx_n_s_base;
-static PyObject *__pyx_n_s_c;
-static PyObject *__pyx_n_u_c;
-static PyObject *__pyx_n_s_class;
-static PyObject *__pyx_n_s_cline_in_traceback;
-static PyObject *__pyx_kp_s_contiguous_and_direct;
-static PyObject *__pyx_kp_s_contiguous_and_indirect;
-static PyObject *__pyx_n_s_dict;
-static PyObject *__pyx_n_s_dtype_is_object;
-static PyObject *__pyx_n_s_encode;
-static PyObject *__pyx_n_s_enumerate;
-static PyObject *__pyx_n_s_error;
-static PyObject *__pyx_n_s_flags;
-static PyObject *__pyx_n_s_format;
-static PyObject *__pyx_n_s_fortran;
-static PyObject *__pyx_n_u_fortran;
-static PyObject *__pyx_n_s_getstate;
-static PyObject *__pyx_kp_s_got_differing_extents_in_dimensi;
-static PyObject *__pyx_n_s_id;
-static PyObject *__pyx_n_s_import;
-static PyObject *__pyx_n_s_itemsize;
-static PyObject *__pyx_kp_s_itemsize_0_for_cython_array;
-static PyObject *__pyx_n_s_main;
-static PyObject *__pyx_n_s_max_neg_val;
-static PyObject *__pyx_n_s_memview;
-static PyObject *__pyx_n_s_mode;
-static PyObject *__pyx_n_s_name;
-static PyObject *__pyx_n_s_name_2;
-static PyObject *__pyx_n_s_ndim;
-static PyObject *__pyx_n_s_new;
-static PyObject *__pyx_kp_s_no_default___reduce___due_to_non;
-static PyObject *__pyx_n_s_np;
-static PyObject *__pyx_n_s_numpy;
-static PyObject *__pyx_kp_u_numpy_core_multiarray_failed_to;
-static PyObject *__pyx_kp_u_numpy_core_umath_failed_to_impor;
-static PyObject *__pyx_n_s_obj;
-static PyObject *__pyx_n_s_pack;
-static PyObject *__pyx_n_s_paths;
-static PyObject *__pyx_n_s_pickle;
-static PyObject *__pyx_n_s_pyx_PickleError;
-static PyObject *__pyx_n_s_pyx_checksum;
-static PyObject *__pyx_n_s_pyx_getbuffer;
-static PyObject *__pyx_n_s_pyx_result;
-static PyObject *__pyx_n_s_pyx_state;
-static PyObject *__pyx_n_s_pyx_type;
-static PyObject *__pyx_n_s_pyx_unpickle_Enum;
-static PyObject *__pyx_n_s_pyx_vtable;
-static PyObject *__pyx_n_s_range;
-static PyObject *__pyx_n_s_reduce;
-static PyObject *__pyx_n_s_reduce_cython;
-static PyObject *__pyx_n_s_reduce_ex;
-static PyObject *__pyx_n_s_setstate;
-static PyObject *__pyx_n_s_setstate_cython;
-static PyObject *__pyx_n_s_shape;
-static PyObject *__pyx_n_s_size;
-static PyObject *__pyx_n_s_start;
-static PyObject *__pyx_n_s_step;
-static PyObject *__pyx_n_s_stop;
-static PyObject *__pyx_kp_s_strided_and_direct;
-static PyObject *__pyx_kp_s_strided_and_direct_or_indirect;
-static PyObject *__pyx_kp_s_strided_and_indirect;
-static PyObject *__pyx_kp_s_stringsource;
-static PyObject *__pyx_n_s_struct;
-static PyObject *__pyx_n_s_t_xs;
-static PyObject *__pyx_n_s_t_ys;
-static PyObject *__pyx_n_s_test;
-static PyObject *__pyx_kp_s_unable_to_allocate_array_data;
-static PyObject *__pyx_kp_s_unable_to_allocate_shape_and_str;
-static PyObject *__pyx_n_s_unpack;
-static PyObject *__pyx_n_s_update;
-static PyObject *__pyx_n_s_values;
-static PyObject *__pyx_pf_3TTS_3tts_6layers_8glow_tts_15monotonic_align_4core_maximum_path_c(CYTHON_UNUSED PyObject *__pyx_self, __Pyx_memviewslice __pyx_v_paths, __Pyx_memviewslice __pyx_v_values, __Pyx_memviewslice __pyx_v_t_xs, __Pyx_memviewslice __pyx_v_t_ys, float __pyx_v_max_neg_val); /* proto */
-static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array___cinit__(struct __pyx_array_obj *__pyx_v_self, PyObject *__pyx_v_shape, Py_ssize_t __pyx_v_itemsize, PyObject *__pyx_v_format, PyObject *__pyx_v_mode, int __pyx_v_allocate_buffer); /* proto */
-static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_2__getbuffer__(struct __pyx_array_obj *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /* proto */
-static void __pyx_array___pyx_pf_15View_dot_MemoryView_5array_4__dealloc__(struct __pyx_array_obj *__pyx_v_self); /* proto */
-static PyObject *__pyx_pf_15View_dot_MemoryView_5array_7memview___get__(struct __pyx_array_obj *__pyx_v_self); /* proto */
-static Py_ssize_t __pyx_array___pyx_pf_15View_dot_MemoryView_5array_6__len__(struct __pyx_array_obj *__pyx_v_self); /* proto */
-static PyObject *__pyx_array___pyx_pf_15View_dot_MemoryView_5array_8__getattr__(struct __pyx_array_obj *__pyx_v_self, PyObject *__pyx_v_attr); /* proto */
-static PyObject *__pyx_array___pyx_pf_15View_dot_MemoryView_5array_10__getitem__(struct __pyx_array_obj *__pyx_v_self, PyObject *__pyx_v_item); /* proto */
-static int __pyx_array___pyx_pf_15View_dot_MemoryView_5array_12__setitem__(struct __pyx_array_obj *__pyx_v_self, PyObject *__pyx_v_item, PyObject *__pyx_v_value); /* proto */
-static PyObject *__pyx_pf___pyx_array___reduce_cython__(CYTHON_UNUSED struct __pyx_array_obj *__pyx_v_self); /* proto */
-static PyObject *__pyx_pf___pyx_array_2__setstate_cython__(CYTHON_UNUSED struct __pyx_array_obj *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v___pyx_state); /* proto */
-static int __pyx_MemviewEnum___pyx_pf_15View_dot_MemoryView_4Enum___init__(struct __pyx_MemviewEnum_obj *__pyx_v_self, PyObject *__pyx_v_name); /* proto */
-static PyObject *__pyx_MemviewEnum___pyx_pf_15View_dot_MemoryView_4Enum_2__repr__(struct __pyx_MemviewEnum_obj *__pyx_v_self); /* proto */
-static PyObject *__pyx_pf___pyx_MemviewEnum___reduce_cython__(struct __pyx_MemviewEnum_obj *__pyx_v_self); /* proto */
-static PyObject *__pyx_pf___pyx_MemviewEnum_2__setstate_cython__(struct __pyx_MemviewEnum_obj *__pyx_v_self, PyObject *__pyx_v___pyx_state); /* proto */
-static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview___cinit__(struct __pyx_memoryview_obj *__pyx_v_self, PyObject *__pyx_v_obj, int __pyx_v_flags, int __pyx_v_dtype_is_object); /* proto */
-static void __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_2__dealloc__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */
-static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_4__getitem__(struct __pyx_memoryview_obj *__pyx_v_self, PyObject *__pyx_v_index); /* proto */
-static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_6__setitem__(struct __pyx_memoryview_obj *__pyx_v_self, PyObject *__pyx_v_index, PyObject *__pyx_v_value); /* proto */
-static int __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_8__getbuffer__(struct __pyx_memoryview_obj *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /* proto */
-static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_1T___get__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */
-static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_4base___get__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */
-static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_5shape___get__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */
-static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_7strides___get__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */
-static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_10suboffsets___get__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */
-static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_4ndim___get__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */
-static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_8itemsize___get__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */
-static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_6nbytes___get__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */
-static PyObject *__pyx_pf_15View_dot_MemoryView_10memoryview_4size___get__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */
-static Py_ssize_t __pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_10__len__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */
-static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_12__repr__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */
-static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_14__str__(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */
-static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_16is_c_contig(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */
-static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_18is_f_contig(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */
-static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_20copy(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */
-static PyObject *__pyx_memoryview___pyx_pf_15View_dot_MemoryView_10memoryview_22copy_fortran(struct __pyx_memoryview_obj *__pyx_v_self); /* proto */
-static PyObject *__pyx_pf___pyx_memoryview___reduce_cython__(CYTHON_UNUSED struct __pyx_memoryview_obj *__pyx_v_self); /* proto */
-static PyObject *__pyx_pf___pyx_memoryview_2__setstate_cython__(CYTHON_UNUSED struct __pyx_memoryview_obj *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v___pyx_state); /* proto */
-static void __pyx_memoryviewslice___pyx_pf_15View_dot_MemoryView_16_memoryviewslice___dealloc__(struct __pyx_memoryviewslice_obj *__pyx_v_self); /* proto */
-static PyObject *__pyx_pf_15View_dot_MemoryView_16_memoryviewslice_4base___get__(struct __pyx_memoryviewslice_obj *__pyx_v_self); /* proto */
-static PyObject *__pyx_pf___pyx_memoryviewslice___reduce_cython__(CYTHON_UNUSED struct __pyx_memoryviewslice_obj *__pyx_v_self); /* proto */
-static PyObject *__pyx_pf___pyx_memoryviewslice_2__setstate_cython__(CYTHON_UNUSED struct __pyx_memoryviewslice_obj *__pyx_v_self, CYTHON_UNUSED PyObject *__pyx_v___pyx_state); /* proto */
-static PyObject *__pyx_pf_15View_dot_MemoryView___pyx_unpickle_Enum(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v___pyx_type, long __pyx_v___pyx_checksum, PyObject *__pyx_v___pyx_state); /* proto */
-static PyObject *__pyx_tp_new_array(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/
-static PyObject *__pyx_tp_new_Enum(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/
-static PyObject *__pyx_tp_new_memoryview(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/
-static PyObject *__pyx_tp_new__memoryviewslice(PyTypeObject *t, PyObject *a, PyObject *k); /*proto*/
-static PyObject *__pyx_int_0;
-static PyObject *__pyx_int_1;
-static PyObject *__pyx_int_184977713;
-static PyObject *__pyx_int_neg_1;
-static float __pyx_k_;
-static PyObject *__pyx_tuple__2;
-static PyObject *__pyx_tuple__3;
-static PyObject *__pyx_tuple__4;
-static PyObject *__pyx_tuple__5;
-static PyObject *__pyx_tuple__6;
-static PyObject *__pyx_tuple__7;
-static PyObject *__pyx_tuple__8;
-static PyObject *__pyx_tuple__9;
-static PyObject *__pyx_slice__18;
-static PyObject *__pyx_tuple__10;
-static PyObject *__pyx_tuple__11;
-static PyObject *__pyx_tuple__12;
-static PyObject *__pyx_tuple__13;
-static PyObject *__pyx_tuple__14;
-static PyObject *__pyx_tuple__15;
-static PyObject *__pyx_tuple__16;
-static PyObject *__pyx_tuple__17;
-static PyObject *__pyx_tuple__19;
-static PyObject *__pyx_tuple__20;
-static PyObject *__pyx_tuple__21;
-static PyObject *__pyx_tuple__22;
-static PyObject *__pyx_tuple__23;
-static PyObject *__pyx_tuple__24;
-static PyObject *__pyx_tuple__25;
-static PyObject *__pyx_tuple__26;
-static PyObject *__pyx_tuple__27;
-static PyObject *__pyx_codeobj__28;
-/* Late includes */
-
-/* "TTS/tts/layers/glow_tts/monotonic_align/core.pyx":11
- * @cython.boundscheck(False)
- * @cython.wraparound(False)
- * cdef void maximum_path_each(int[:,::1] path, float[:,::1] value, int t_x, int t_y, float max_neg_val) nogil: # <<<<<<<<<<<<<<
- * cdef int x
- * cdef int y
- */
-
-static void __pyx_f_3TTS_3tts_6layers_8glow_tts_15monotonic_align_4core_maximum_path_each(__Pyx_memviewslice __pyx_v_path, __Pyx_memviewslice __pyx_v_value, int __pyx_v_t_x, int __pyx_v_t_y, float __pyx_v_max_neg_val) {
- int __pyx_v_x;
- int __pyx_v_y;
- float __pyx_v_v_prev;
- float __pyx_v_v_cur;
- int __pyx_v_index;
- int __pyx_t_1;
- int __pyx_t_2;
- int __pyx_t_3;
- long __pyx_t_4;
- int __pyx_t_5;
- long __pyx_t_6;
- long __pyx_t_7;
- int __pyx_t_8;
- Py_ssize_t __pyx_t_9;
- Py_ssize_t __pyx_t_10;
- float __pyx_t_11;
- float __pyx_t_12;
- float __pyx_t_13;
- Py_ssize_t __pyx_t_14;
- Py_ssize_t __pyx_t_15;
- int __pyx_t_16;
-
- /* "TTS/tts/layers/glow_tts/monotonic_align/core.pyx":17
- * cdef float v_cur
- * cdef float tmp
- * cdef int index = t_x - 1 # <<<<<<<<<<<<<<
- *
- * for y in range(t_y):
- */
- __pyx_v_index = (__pyx_v_t_x - 1);
-
- /* "TTS/tts/layers/glow_tts/monotonic_align/core.pyx":19
- * cdef int index = t_x - 1
- *
- * for y in range(t_y): # <<<<<<<<<<<<<<
- * for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)):
- * if x == y:
- */
- __pyx_t_1 = __pyx_v_t_y;
- __pyx_t_2 = __pyx_t_1;
- for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) {
- __pyx_v_y = __pyx_t_3;
-
- /* "TTS/tts/layers/glow_tts/monotonic_align/core.pyx":20
- *
- * for y in range(t_y):
- * for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)): # <<<<<<<<<<<<<<
- * if x == y:
- * v_cur = max_neg_val
- */
- __pyx_t_4 = (__pyx_v_y + 1);
- __pyx_t_5 = __pyx_v_t_x;
- if (((__pyx_t_4 < __pyx_t_5) != 0)) {
- __pyx_t_6 = __pyx_t_4;
- } else {
- __pyx_t_6 = __pyx_t_5;
- }
- __pyx_t_4 = __pyx_t_6;
- __pyx_t_5 = ((__pyx_v_t_x + __pyx_v_y) - __pyx_v_t_y);
- __pyx_t_6 = 0;
- if (((__pyx_t_5 > __pyx_t_6) != 0)) {
- __pyx_t_7 = __pyx_t_5;
- } else {
- __pyx_t_7 = __pyx_t_6;
- }
- __pyx_t_6 = __pyx_t_4;
- for (__pyx_t_5 = __pyx_t_7; __pyx_t_5 < __pyx_t_6; __pyx_t_5+=1) {
- __pyx_v_x = __pyx_t_5;
-
- /* "TTS/tts/layers/glow_tts/monotonic_align/core.pyx":21
- * for y in range(t_y):
- * for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)):
- * if x == y: # <<<<<<<<<<<<<<
- * v_cur = max_neg_val
- * else:
- */
- __pyx_t_8 = ((__pyx_v_x == __pyx_v_y) != 0);
- if (__pyx_t_8) {
-
- /* "TTS/tts/layers/glow_tts/monotonic_align/core.pyx":22
- * for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)):
- * if x == y:
- * v_cur = max_neg_val # <<<<<<<<<<<<<<
- * else:
- * v_cur = value[x, y-1]
- */
- __pyx_v_v_cur = __pyx_v_max_neg_val;
-
- /* "TTS/tts/layers/glow_tts/monotonic_align/core.pyx":21
- * for y in range(t_y):
- * for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)):
- * if x == y: # <<<<<<<<<<<<<<
- * v_cur = max_neg_val
- * else:
- */
- goto __pyx_L7;
- }
-
- /* "TTS/tts/layers/glow_tts/monotonic_align/core.pyx":24
- * v_cur = max_neg_val
- * else:
- * v_cur = value[x, y-1] # <<<<<<<<<<<<<<
- * if x == 0:
- * if y == 0:
- */
- /*else*/ {
- __pyx_t_9 = __pyx_v_x;
- __pyx_t_10 = (__pyx_v_y - 1);
- __pyx_v_v_cur = (*((float *) ( /* dim=1 */ ((char *) (((float *) ( /* dim=0 */ (__pyx_v_value.data + __pyx_t_9 * __pyx_v_value.strides[0]) )) + __pyx_t_10)) )));
- }
- __pyx_L7:;
-
- /* "TTS/tts/layers/glow_tts/monotonic_align/core.pyx":25
- * else:
- * v_cur = value[x, y-1]
- * if x == 0: # <<<<<<<<<<<<<<
- * if y == 0:
- * v_prev = 0.
- */
- __pyx_t_8 = ((__pyx_v_x == 0) != 0);
- if (__pyx_t_8) {
-
- /* "TTS/tts/layers/glow_tts/monotonic_align/core.pyx":26
- * v_cur = value[x, y-1]
- * if x == 0:
- * if y == 0: # <<<<<<<<<<<<<<
- * v_prev = 0.
- * else:
- */
- __pyx_t_8 = ((__pyx_v_y == 0) != 0);
- if (__pyx_t_8) {
-
- /* "TTS/tts/layers/glow_tts/monotonic_align/core.pyx":27
- * if x == 0:
- * if y == 0:
- * v_prev = 0. # <<<<<<<<<<<<<<
- * else:
- * v_prev = max_neg_val
- */
- __pyx_v_v_prev = 0.;
-
- /* "TTS/tts/layers/glow_tts/monotonic_align/core.pyx":26
- * v_cur = value[x, y-1]
- * if x == 0:
- * if y == 0: # <<<<<<<<<<<<<<
- * v_prev = 0.
- * else:
- */
- goto __pyx_L9;
- }
-
- /* "TTS/tts/layers/glow_tts/monotonic_align/core.pyx":29
- * v_prev = 0.
- * else:
- * v_prev = max_neg_val # <<<<<<<<<<<<<<
- * else:
- * v_prev = value[x-1, y-1]
- */
- /*else*/ {
- __pyx_v_v_prev = __pyx_v_max_neg_val;
- }
- __pyx_L9:;
-
- /* "TTS/tts/layers/glow_tts/monotonic_align/core.pyx":25
- * else:
- * v_cur = value[x, y-1]
- * if x == 0: # <<<<<<<<<<<<<<
- * if y == 0:
- * v_prev = 0.
- */
- goto __pyx_L8;
- }
-
- /* "TTS/tts/layers/glow_tts/monotonic_align/core.pyx":31
- * v_prev = max_neg_val
- * else:
- * v_prev = value[x-1, y-1] # <<<<<<<<<<<<<<
- * value[x, y] = max(v_cur, v_prev) + value[x, y]
- *
- */
- /*else*/ {
- __pyx_t_10 = (__pyx_v_x - 1);
- __pyx_t_9 = (__pyx_v_y - 1);
- __pyx_v_v_prev = (*((float *) ( /* dim=1 */ ((char *) (((float *) ( /* dim=0 */ (__pyx_v_value.data + __pyx_t_10 * __pyx_v_value.strides[0]) )) + __pyx_t_9)) )));
- }
- __pyx_L8:;
-
- /* "TTS/tts/layers/glow_tts/monotonic_align/core.pyx":32
- * else:
- * v_prev = value[x-1, y-1]
- * value[x, y] = max(v_cur, v_prev) + value[x, y] # <<<<<<<<<<<<<<
- *
- * for y in range(t_y - 1, -1, -1):
- */
- __pyx_t_11 = __pyx_v_v_prev;
- __pyx_t_12 = __pyx_v_v_cur;
- if (((__pyx_t_11 > __pyx_t_12) != 0)) {
- __pyx_t_13 = __pyx_t_11;
- } else {
- __pyx_t_13 = __pyx_t_12;
- }
- __pyx_t_9 = __pyx_v_x;
- __pyx_t_10 = __pyx_v_y;
- __pyx_t_14 = __pyx_v_x;
- __pyx_t_15 = __pyx_v_y;
- *((float *) ( /* dim=1 */ ((char *) (((float *) ( /* dim=0 */ (__pyx_v_value.data + __pyx_t_14 * __pyx_v_value.strides[0]) )) + __pyx_t_15)) )) = (__pyx_t_13 + (*((float *) ( /* dim=1 */ ((char *) (((float *) ( /* dim=0 */ (__pyx_v_value.data + __pyx_t_9 * __pyx_v_value.strides[0]) )) + __pyx_t_10)) ))));
- }
- }
-
- /* "TTS/tts/layers/glow_tts/monotonic_align/core.pyx":34
- * value[x, y] = max(v_cur, v_prev) + value[x, y]
- *
- * for y in range(t_y - 1, -1, -1): # <<<<<<<<<<<<<<
- * path[index, y] = 1
- * if index != 0 and (index == y or value[index, y-1] < value[index-1, y-1]):
- */
- for (__pyx_t_1 = (__pyx_v_t_y - 1); __pyx_t_1 > -1; __pyx_t_1-=1) {
- __pyx_v_y = __pyx_t_1;
-
- /* "TTS/tts/layers/glow_tts/monotonic_align/core.pyx":35
- *
- * for y in range(t_y - 1, -1, -1):
- * path[index, y] = 1 # <<<<<<<<<<<<<<
- * if index != 0 and (index == y or value[index, y-1] < value[index-1, y-1]):
- * index = index - 1
- */
- __pyx_t_10 = __pyx_v_index;
- __pyx_t_9 = __pyx_v_y;
- *((int *) ( /* dim=1 */ ((char *) (((int *) ( /* dim=0 */ (__pyx_v_path.data + __pyx_t_10 * __pyx_v_path.strides[0]) )) + __pyx_t_9)) )) = 1;
-
- /* "TTS/tts/layers/glow_tts/monotonic_align/core.pyx":36
- * for y in range(t_y - 1, -1, -1):
- * path[index, y] = 1
- * if index != 0 and (index == y or value[index, y-1] < value[index-1, y-1]): # <<<<<<<<<<<<<<
- * index = index - 1
- *
- */
- __pyx_t_16 = ((__pyx_v_index != 0) != 0);
- if (__pyx_t_16) {
- } else {
- __pyx_t_8 = __pyx_t_16;
- goto __pyx_L13_bool_binop_done;
- }
- __pyx_t_16 = ((__pyx_v_index == __pyx_v_y) != 0);
- if (!__pyx_t_16) {
- } else {
- __pyx_t_8 = __pyx_t_16;
- goto __pyx_L13_bool_binop_done;
- }
- __pyx_t_9 = __pyx_v_index;
- __pyx_t_10 = (__pyx_v_y - 1);
- __pyx_t_15 = (__pyx_v_index - 1);
- __pyx_t_14 = (__pyx_v_y - 1);
- __pyx_t_16 = (((*((float *) ( /* dim=1 */ ((char *) (((float *) ( /* dim=0 */ (__pyx_v_value.data + __pyx_t_9 * __pyx_v_value.strides[0]) )) + __pyx_t_10)) ))) < (*((float *) ( /* dim=1 */ ((char *) (((float *) ( /* dim=0 */ (__pyx_v_value.data + __pyx_t_15 * __pyx_v_value.strides[0]) )) + __pyx_t_14)) )))) != 0);
- __pyx_t_8 = __pyx_t_16;
- __pyx_L13_bool_binop_done:;
- if (__pyx_t_8) {
-
- /* "TTS/tts/layers/glow_tts/monotonic_align/core.pyx":37
- * path[index, y] = 1
- * if index != 0 and (index == y or value[index, y-1] < value[index-1, y-1]):
- * index = index - 1 # <<<<<<<<<<<<<<
- *
- *
- */
- __pyx_v_index = (__pyx_v_index - 1);
-
- /* "TTS/tts/layers/glow_tts/monotonic_align/core.pyx":36
- * for y in range(t_y - 1, -1, -1):
- * path[index, y] = 1
- * if index != 0 and (index == y or value[index, y-1] < value[index-1, y-1]): # <<<<<<<<<<<<<<
- * index = index - 1
- *
- */
- }
- }
-
- /* "TTS/tts/layers/glow_tts/monotonic_align/core.pyx":11
- * @cython.boundscheck(False)
- * @cython.wraparound(False)
- * cdef void maximum_path_each(int[:,::1] path, float[:,::1] value, int t_x, int t_y, float max_neg_val) nogil: # <<<<<<<<<<<<<<
- * cdef int x
- * cdef int y
- */
-
- /* function exit code */
-}
-
-/* "TTS/tts/layers/glow_tts/monotonic_align/core.pyx":42
- * @cython.boundscheck(False)
- * @cython.wraparound(False)
- * cpdef void maximum_path_c(int[:,:,::1] paths, float[:,:,::1] values, int[::1] t_xs, int[::1] t_ys, float max_neg_val=-1e9) nogil: # <<<<<<<<<<<<<<
- * cdef int b = values.shape[0]
- *
- */
-
-static PyObject *__pyx_pw_3TTS_3tts_6layers_8glow_tts_15monotonic_align_4core_1maximum_path_c(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
-static void __pyx_f_3TTS_3tts_6layers_8glow_tts_15monotonic_align_4core_maximum_path_c(__Pyx_memviewslice __pyx_v_paths, __Pyx_memviewslice __pyx_v_values, __Pyx_memviewslice __pyx_v_t_xs, __Pyx_memviewslice __pyx_v_t_ys, CYTHON_UNUSED int __pyx_skip_dispatch, struct __pyx_opt_args_3TTS_3tts_6layers_8glow_tts_15monotonic_align_4core_maximum_path_c *__pyx_optional_args) {
- float __pyx_v_max_neg_val = __pyx_k_;
- CYTHON_UNUSED int __pyx_v_b;
- int __pyx_v_i;
- int __pyx_t_1;
- int __pyx_t_2;
- int __pyx_t_3;
- __Pyx_memviewslice __pyx_t_4 = { 0, 0, { 0 }, { 0 }, { 0 } };
- __Pyx_memviewslice __pyx_t_5 = { 0, 0, { 0 }, { 0 }, { 0 } };
- Py_ssize_t __pyx_t_6;
- Py_ssize_t __pyx_t_7;
- if (__pyx_optional_args) {
- if (__pyx_optional_args->__pyx_n > 0) {
- __pyx_v_max_neg_val = __pyx_optional_args->max_neg_val;
- }
- }
-
- /* "TTS/tts/layers/glow_tts/monotonic_align/core.pyx":43
- * @cython.wraparound(False)
- * cpdef void maximum_path_c(int[:,:,::1] paths, float[:,:,::1] values, int[::1] t_xs, int[::1] t_ys, float max_neg_val=-1e9) nogil:
- * cdef int b = values.shape[0] # <<<<<<<<<<<<<<
- *
- * cdef int i
- */
- __pyx_v_b = (__pyx_v_values.shape[0]);
-
- /* "TTS/tts/layers/glow_tts/monotonic_align/core.pyx":46
- *
- * cdef int i
- * for i in prange(b, nogil=True): # <<<<<<<<<<<<<<
- * maximum_path_each(paths[i], values[i], t_xs[i], t_ys[i], max_neg_val)
- */
- {
- #ifdef WITH_THREAD
- PyThreadState *_save;
- Py_UNBLOCK_THREADS
- __Pyx_FastGIL_Remember();
- #endif
- /*try:*/ {
- __pyx_t_1 = __pyx_v_b;
- if ((1 == 0)) abort();
- {
- #if ((defined(__APPLE__) || defined(__OSX__)) && (defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95)))))
- #undef likely
- #undef unlikely
- #define likely(x) (x)
- #define unlikely(x) (x)
- #endif
- __pyx_t_3 = (__pyx_t_1 - 0 + 1 - 1/abs(1)) / 1;
- if (__pyx_t_3 > 0)
- {
- #ifdef _OPENMP
- #pragma omp parallel private(__pyx_t_6, __pyx_t_7) firstprivate(__pyx_t_4, __pyx_t_5)
- #endif /* _OPENMP */
- {
- #ifdef _OPENMP
- #pragma omp for firstprivate(__pyx_v_i) lastprivate(__pyx_v_i)
- #endif /* _OPENMP */
- for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_3; __pyx_t_2++){
- {
- __pyx_v_i = (int)(0 + 1 * __pyx_t_2);
-
- /* "TTS/tts/layers/glow_tts/monotonic_align/core.pyx":47
- * cdef int i
- * for i in prange(b, nogil=True):
- * maximum_path_each(paths[i], values[i], t_xs[i], t_ys[i], max_neg_val) # <<<<<<<<<<<<<<
- */
- __pyx_t_4.data = __pyx_v_paths.data;
- __pyx_t_4.memview = __pyx_v_paths.memview;
- __PYX_INC_MEMVIEW(&__pyx_t_4, 0);
- {
- Py_ssize_t __pyx_tmp_idx = __pyx_v_i;
- Py_ssize_t __pyx_tmp_stride = __pyx_v_paths.strides[0];
- __pyx_t_4.data += __pyx_tmp_idx * __pyx_tmp_stride;
-}
-
-__pyx_t_4.shape[0] = __pyx_v_paths.shape[1];
-__pyx_t_4.strides[0] = __pyx_v_paths.strides[1];
- __pyx_t_4.suboffsets[0] = -1;
-
-__pyx_t_4.shape[1] = __pyx_v_paths.shape[2];
-__pyx_t_4.strides[1] = __pyx_v_paths.strides[2];
- __pyx_t_4.suboffsets[1] = -1;
-
-__pyx_t_5.data = __pyx_v_values.data;
- __pyx_t_5.memview = __pyx_v_values.memview;
- __PYX_INC_MEMVIEW(&__pyx_t_5, 0);
- {
- Py_ssize_t __pyx_tmp_idx = __pyx_v_i;
- Py_ssize_t __pyx_tmp_stride = __pyx_v_values.strides[0];
- __pyx_t_5.data += __pyx_tmp_idx * __pyx_tmp_stride;
-}
-
-__pyx_t_5.shape[0] = __pyx_v_values.shape[1];
-__pyx_t_5.strides[0] = __pyx_v_values.strides[1];
- __pyx_t_5.suboffsets[0] = -1;
-
-__pyx_t_5.shape[1] = __pyx_v_values.shape[2];
-__pyx_t_5.strides[1] = __pyx_v_values.strides[2];
- __pyx_t_5.suboffsets[1] = -1;
-
-__pyx_t_6 = __pyx_v_i;
- __pyx_t_7 = __pyx_v_i;
- __pyx_f_3TTS_3tts_6layers_8glow_tts_15monotonic_align_4core_maximum_path_each(__pyx_t_4, __pyx_t_5, (*((int *) ( /* dim=0 */ ((char *) (((int *) __pyx_v_t_xs.data) + __pyx_t_6)) ))), (*((int *) ( /* dim=0 */ ((char *) (((int *) __pyx_v_t_ys.data) + __pyx_t_7)) ))), __pyx_v_max_neg_val);
- __PYX_XDEC_MEMVIEW(&__pyx_t_4, 0);
- __pyx_t_4.memview = NULL;
- __pyx_t_4.data = NULL;
- __PYX_XDEC_MEMVIEW(&__pyx_t_5, 0);
- __pyx_t_5.memview = NULL;
- __pyx_t_5.data = NULL;
- }
- }
- }
- }
- }
- #if ((defined(__APPLE__) || defined(__OSX__)) && (defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95)))))
- #undef likely
- #undef unlikely
- #define likely(x) __builtin_expect(!!(x), 1)
- #define unlikely(x) __builtin_expect(!!(x), 0)
- #endif
- }
-
- /* "TTS/tts/layers/glow_tts/monotonic_align/core.pyx":46
- *
- * cdef int i
- * for i in prange(b, nogil=True): # <<<<<<<<<<<<<<
- * maximum_path_each(paths[i], values[i], t_xs[i], t_ys[i], max_neg_val)
- */
- /*finally:*/ {
- /*normal exit:*/{
- #ifdef WITH_THREAD
- __Pyx_FastGIL_Forget();
- Py_BLOCK_THREADS
- #endif
- goto __pyx_L5;
- }
- __pyx_L5:;
- }
- }
-
- /* "TTS/tts/layers/glow_tts/monotonic_align/core.pyx":42
- * @cython.boundscheck(False)
- * @cython.wraparound(False)
- * cpdef void maximum_path_c(int[:,:,::1] paths, float[:,:,::1] values, int[::1] t_xs, int[::1] t_ys, float max_neg_val=-1e9) nogil: # <<<<<<<<<<<<<<
- * cdef int b = values.shape[0]
- *
- */
-
- /* function exit code */
-}
-
-/* Python wrapper */
-static PyObject *__pyx_pw_3TTS_3tts_6layers_8glow_tts_15monotonic_align_4core_1maximum_path_c(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
-static PyObject *__pyx_pw_3TTS_3tts_6layers_8glow_tts_15monotonic_align_4core_1maximum_path_c(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
- __Pyx_memviewslice __pyx_v_paths = { 0, 0, { 0 }, { 0 }, { 0 } };
- __Pyx_memviewslice __pyx_v_values = { 0, 0, { 0 }, { 0 }, { 0 } };
- __Pyx_memviewslice __pyx_v_t_xs = { 0, 0, { 0 }, { 0 }, { 0 } };
- __Pyx_memviewslice __pyx_v_t_ys = { 0, 0, { 0 }, { 0 }, { 0 } };
- float __pyx_v_max_neg_val;
- int __pyx_lineno = 0;
- const char *__pyx_filename = NULL;
- int __pyx_clineno = 0;
- PyObject *__pyx_r = 0;
- __Pyx_RefNannyDeclarations
- __Pyx_RefNannySetupContext("maximum_path_c (wrapper)", 0);
- {
- static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_paths,&__pyx_n_s_values,&__pyx_n_s_t_xs,&__pyx_n_s_t_ys,&__pyx_n_s_max_neg_val,0};
- PyObject* values[5] = {0,0,0,0,0};
- if (unlikely(__pyx_kwds)) {
- Py_ssize_t kw_args;
- const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
- switch (pos_args) {
- case 5: values[4] = PyTuple_GET_ITEM(__pyx_args, 4);
- CYTHON_FALLTHROUGH;
- case 4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
- CYTHON_FALLTHROUGH;
- case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
- CYTHON_FALLTHROUGH;
- case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
- CYTHON_FALLTHROUGH;
- case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
- CYTHON_FALLTHROUGH;
- case 0: break;
- default: goto __pyx_L5_argtuple_error;
- }
- kw_args = PyDict_Size(__pyx_kwds);
- switch (pos_args) {
- case 0:
- if (likely((values[0] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_paths)) != 0)) kw_args--;
- else goto __pyx_L5_argtuple_error;
- CYTHON_FALLTHROUGH;
- case 1:
- if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_values)) != 0)) kw_args--;
- else {
- __Pyx_RaiseArgtupleInvalid("maximum_path_c", 0, 4, 5, 1); __PYX_ERR(0, 42, __pyx_L3_error)
- }
- CYTHON_FALLTHROUGH;
- case 2:
- if (likely((values[2] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_t_xs)) != 0)) kw_args--;
- else {
- __Pyx_RaiseArgtupleInvalid("maximum_path_c", 0, 4, 5, 2); __PYX_ERR(0, 42, __pyx_L3_error)
- }
- CYTHON_FALLTHROUGH;
- case 3:
- if (likely((values[3] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_t_ys)) != 0)) kw_args--;
- else {
- __Pyx_RaiseArgtupleInvalid("maximum_path_c", 0, 4, 5, 3); __PYX_ERR(0, 42, __pyx_L3_error)
- }
- CYTHON_FALLTHROUGH;
- case 4:
- if (kw_args > 0) {
- PyObject* value = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_max_neg_val);
- if (value) { values[4] = value; kw_args--; }
- }
- }
- if (unlikely(kw_args > 0)) {
- if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "maximum_path_c") < 0)) __PYX_ERR(0, 42, __pyx_L3_error)
- }
- } else {
- switch (PyTuple_GET_SIZE(__pyx_args)) {
- case 5: values[4] = PyTuple_GET_ITEM(__pyx_args, 4);
- CYTHON_FALLTHROUGH;
- case 4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
- values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
- values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
- values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
- break;
- default: goto __pyx_L5_argtuple_error;
- }
- }
- __pyx_v_paths = __Pyx_PyObject_to_MemoryviewSlice_d_d_dc_int(values[0], PyBUF_WRITABLE); if (unlikely(!__pyx_v_paths.memview)) __PYX_ERR(0, 42, __pyx_L3_error)
- __pyx_v_values = __Pyx_PyObject_to_MemoryviewSlice_d_d_dc_float(values[1], PyBUF_WRITABLE); if (unlikely(!__pyx_v_values.memview)) __PYX_ERR(0, 42, __pyx_L3_error)
- __pyx_v_t_xs = __Pyx_PyObject_to_MemoryviewSlice_dc_int(values[2], PyBUF_WRITABLE); if (unlikely(!__pyx_v_t_xs.memview)) __PYX_ERR(0, 42, __pyx_L3_error)
- __pyx_v_t_ys = __Pyx_PyObject_to_MemoryviewSlice_dc_int(values[3], PyBUF_WRITABLE); if (unlikely(!__pyx_v_t_ys.memview)) __PYX_ERR(0, 42, __pyx_L3_error)
- if (values[4]) {
- __pyx_v_max_neg_val = __pyx_PyFloat_AsFloat(values[4]); if (unlikely((__pyx_v_max_neg_val == (float)-1) && PyErr_Occurred())) __PYX_ERR(0, 42, __pyx_L3_error)
- } else {
- __pyx_v_max_neg_val = __pyx_k_;
- }
- }
- goto __pyx_L4_argument_unpacking_done;
- __pyx_L5_argtuple_error:;
- __Pyx_RaiseArgtupleInvalid("maximum_path_c", 0, 4, 5, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 42, __pyx_L3_error)
- __pyx_L3_error:;
- __Pyx_AddTraceback("TTS.tts.layers.glow_tts.monotonic_align.core.maximum_path_c", __pyx_clineno, __pyx_lineno, __pyx_filename);
- __Pyx_RefNannyFinishContext();
- return NULL;
- __pyx_L4_argument_unpacking_done:;
- __pyx_r = __pyx_pf_3TTS_3tts_6layers_8glow_tts_15monotonic_align_4core_maximum_path_c(__pyx_self, __pyx_v_paths, __pyx_v_values, __pyx_v_t_xs, __pyx_v_t_ys, __pyx_v_max_neg_val);
-
- /* function exit code */
- __Pyx_RefNannyFinishContext();
- return __pyx_r;
-}
-
-static PyObject *__pyx_pf_3TTS_3tts_6layers_8glow_tts_15monotonic_align_4core_maximum_path_c(CYTHON_UNUSED PyObject *__pyx_self, __Pyx_memviewslice __pyx_v_paths, __Pyx_memviewslice __pyx_v_values, __Pyx_memviewslice __pyx_v_t_xs, __Pyx_memviewslice __pyx_v_t_ys, float __pyx_v_max_neg_val) {
- PyObject *__pyx_r = NULL;
- __Pyx_RefNannyDeclarations
- struct __pyx_opt_args_3TTS_3tts_6layers_8glow_tts_15monotonic_align_4core_maximum_path_c __pyx_t_1;
- PyObject *__pyx_t_2 = NULL;
- int __pyx_lineno = 0;
- const char *__pyx_filename = NULL;
- int __pyx_clineno = 0;
- __Pyx_RefNannySetupContext("maximum_path_c", 0);
- __Pyx_XDECREF(__pyx_r);
- if (unlikely(!__pyx_v_paths.memview)) { __Pyx_RaiseUnboundLocalError("paths"); __PYX_ERR(0, 42, __pyx_L1_error) }
- if (unlikely(!__pyx_v_values.memview)) { __Pyx_RaiseUnboundLocalError("values"); __PYX_ERR(0, 42, __pyx_L1_error) }
- if (unlikely(!__pyx_v_t_xs.memview)) { __Pyx_RaiseUnboundLocalError("t_xs"); __PYX_ERR(0, 42, __pyx_L1_error) }
- if (unlikely(!__pyx_v_t_ys.memview)) { __Pyx_RaiseUnboundLocalError("t_ys"); __PYX_ERR(0, 42, __pyx_L1_error) }
- __pyx_t_1.__pyx_n = 1;
- __pyx_t_1.max_neg_val = __pyx_v_max_neg_val;
- __pyx_f_3TTS_3tts_6layers_8glow_tts_15monotonic_align_4core_maximum_path_c(__pyx_v_paths, __pyx_v_values, __pyx_v_t_xs, __pyx_v_t_ys, 0, &__pyx_t_1);
- __pyx_t_2 = __Pyx_void_to_None(NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 42, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_2);
- __pyx_r = __pyx_t_2;
- __pyx_t_2 = 0;
- goto __pyx_L0;
-
- /* function exit code */
- __pyx_L1_error:;
- __Pyx_XDECREF(__pyx_t_2);
- __Pyx_AddTraceback("TTS.tts.layers.glow_tts.monotonic_align.core.maximum_path_c", __pyx_clineno, __pyx_lineno, __pyx_filename);
- __pyx_r = NULL;
- __pyx_L0:;
- __PYX_XDEC_MEMVIEW(&__pyx_v_paths, 1);
- __PYX_XDEC_MEMVIEW(&__pyx_v_values, 1);
- __PYX_XDEC_MEMVIEW(&__pyx_v_t_xs, 1);
- __PYX_XDEC_MEMVIEW(&__pyx_v_t_ys, 1);
- __Pyx_XGIVEREF(__pyx_r);
- __Pyx_RefNannyFinishContext();
- return __pyx_r;
-}
-
-/* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":734
- * ctypedef npy_cdouble complex_t
- *
- * cdef inline object PyArray_MultiIterNew1(a): # <<<<<<<<<<<<<<
- * return PyArray_MultiIterNew(1, a)
- *
- */
-
-static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__pyx_v_a) {
- PyObject *__pyx_r = NULL;
- __Pyx_RefNannyDeclarations
- PyObject *__pyx_t_1 = NULL;
- int __pyx_lineno = 0;
- const char *__pyx_filename = NULL;
- int __pyx_clineno = 0;
- __Pyx_RefNannySetupContext("PyArray_MultiIterNew1", 0);
-
- /* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":735
- *
- * cdef inline object PyArray_MultiIterNew1(a):
- * return PyArray_MultiIterNew(1, a) # <<<<<<<<<<<<<<
- *
- * cdef inline object PyArray_MultiIterNew2(a, b):
- */
- __Pyx_XDECREF(__pyx_r);
- __pyx_t_1 = PyArray_MultiIterNew(1, ((void *)__pyx_v_a)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 735, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_1);
- __pyx_r = __pyx_t_1;
- __pyx_t_1 = 0;
- goto __pyx_L0;
-
- /* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":734
- * ctypedef npy_cdouble complex_t
- *
- * cdef inline object PyArray_MultiIterNew1(a): # <<<<<<<<<<<<<<
- * return PyArray_MultiIterNew(1, a)
- *
- */
-
- /* function exit code */
- __pyx_L1_error:;
- __Pyx_XDECREF(__pyx_t_1);
- __Pyx_AddTraceback("numpy.PyArray_MultiIterNew1", __pyx_clineno, __pyx_lineno, __pyx_filename);
- __pyx_r = 0;
- __pyx_L0:;
- __Pyx_XGIVEREF(__pyx_r);
- __Pyx_RefNannyFinishContext();
- return __pyx_r;
-}
-
-/* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":737
- * return PyArray_MultiIterNew(1, a)
- *
- * cdef inline object PyArray_MultiIterNew2(a, b): # <<<<<<<<<<<<<<
- * return PyArray_MultiIterNew(2, a, b)
- *
- */
-
-static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__pyx_v_a, PyObject *__pyx_v_b) {
- PyObject *__pyx_r = NULL;
- __Pyx_RefNannyDeclarations
- PyObject *__pyx_t_1 = NULL;
- int __pyx_lineno = 0;
- const char *__pyx_filename = NULL;
- int __pyx_clineno = 0;
- __Pyx_RefNannySetupContext("PyArray_MultiIterNew2", 0);
-
- /* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":738
- *
- * cdef inline object PyArray_MultiIterNew2(a, b):
- * return PyArray_MultiIterNew(2, a, b) # <<<<<<<<<<<<<<
- *
- * cdef inline object PyArray_MultiIterNew3(a, b, c):
- */
- __Pyx_XDECREF(__pyx_r);
- __pyx_t_1 = PyArray_MultiIterNew(2, ((void *)__pyx_v_a), ((void *)__pyx_v_b)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 738, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_1);
- __pyx_r = __pyx_t_1;
- __pyx_t_1 = 0;
- goto __pyx_L0;
-
- /* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":737
- * return PyArray_MultiIterNew(1, a)
- *
- * cdef inline object PyArray_MultiIterNew2(a, b): # <<<<<<<<<<<<<<
- * return PyArray_MultiIterNew(2, a, b)
- *
- */
-
- /* function exit code */
- __pyx_L1_error:;
- __Pyx_XDECREF(__pyx_t_1);
- __Pyx_AddTraceback("numpy.PyArray_MultiIterNew2", __pyx_clineno, __pyx_lineno, __pyx_filename);
- __pyx_r = 0;
- __pyx_L0:;
- __Pyx_XGIVEREF(__pyx_r);
- __Pyx_RefNannyFinishContext();
- return __pyx_r;
-}
-
-/* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":740
- * return PyArray_MultiIterNew(2, a, b)
- *
- * cdef inline object PyArray_MultiIterNew3(a, b, c): # <<<<<<<<<<<<<<
- * return PyArray_MultiIterNew(3, a, b, c)
- *
- */
-
-static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c) {
- PyObject *__pyx_r = NULL;
- __Pyx_RefNannyDeclarations
- PyObject *__pyx_t_1 = NULL;
- int __pyx_lineno = 0;
- const char *__pyx_filename = NULL;
- int __pyx_clineno = 0;
- __Pyx_RefNannySetupContext("PyArray_MultiIterNew3", 0);
-
- /* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":741
- *
- * cdef inline object PyArray_MultiIterNew3(a, b, c):
- * return PyArray_MultiIterNew(3, a, b, c) # <<<<<<<<<<<<<<
- *
- * cdef inline object PyArray_MultiIterNew4(a, b, c, d):
- */
- __Pyx_XDECREF(__pyx_r);
- __pyx_t_1 = PyArray_MultiIterNew(3, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 741, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_1);
- __pyx_r = __pyx_t_1;
- __pyx_t_1 = 0;
- goto __pyx_L0;
-
- /* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":740
- * return PyArray_MultiIterNew(2, a, b)
- *
- * cdef inline object PyArray_MultiIterNew3(a, b, c): # <<<<<<<<<<<<<<
- * return PyArray_MultiIterNew(3, a, b, c)
- *
- */
-
- /* function exit code */
- __pyx_L1_error:;
- __Pyx_XDECREF(__pyx_t_1);
- __Pyx_AddTraceback("numpy.PyArray_MultiIterNew3", __pyx_clineno, __pyx_lineno, __pyx_filename);
- __pyx_r = 0;
- __pyx_L0:;
- __Pyx_XGIVEREF(__pyx_r);
- __Pyx_RefNannyFinishContext();
- return __pyx_r;
-}
-
-/* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":743
- * return PyArray_MultiIterNew(3, a, b, c)
- *
- * cdef inline object PyArray_MultiIterNew4(a, b, c, d): # <<<<<<<<<<<<<<
- * return PyArray_MultiIterNew(4, a, b, c, d)
- *
- */
-
-static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c, PyObject *__pyx_v_d) {
- PyObject *__pyx_r = NULL;
- __Pyx_RefNannyDeclarations
- PyObject *__pyx_t_1 = NULL;
- int __pyx_lineno = 0;
- const char *__pyx_filename = NULL;
- int __pyx_clineno = 0;
- __Pyx_RefNannySetupContext("PyArray_MultiIterNew4", 0);
-
- /* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":744
- *
- * cdef inline object PyArray_MultiIterNew4(a, b, c, d):
- * return PyArray_MultiIterNew(4, a, b, c, d) # <<<<<<<<<<<<<<
- *
- * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e):
- */
- __Pyx_XDECREF(__pyx_r);
- __pyx_t_1 = PyArray_MultiIterNew(4, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c), ((void *)__pyx_v_d)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 744, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_1);
- __pyx_r = __pyx_t_1;
- __pyx_t_1 = 0;
- goto __pyx_L0;
-
- /* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":743
- * return PyArray_MultiIterNew(3, a, b, c)
- *
- * cdef inline object PyArray_MultiIterNew4(a, b, c, d): # <<<<<<<<<<<<<<
- * return PyArray_MultiIterNew(4, a, b, c, d)
- *
- */
-
- /* function exit code */
- __pyx_L1_error:;
- __Pyx_XDECREF(__pyx_t_1);
- __Pyx_AddTraceback("numpy.PyArray_MultiIterNew4", __pyx_clineno, __pyx_lineno, __pyx_filename);
- __pyx_r = 0;
- __pyx_L0:;
- __Pyx_XGIVEREF(__pyx_r);
- __Pyx_RefNannyFinishContext();
- return __pyx_r;
-}
-
-/* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":746
- * return PyArray_MultiIterNew(4, a, b, c, d)
- *
- * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): # <<<<<<<<<<<<<<
- * return PyArray_MultiIterNew(5, a, b, c, d, e)
- *
- */
-
-static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c, PyObject *__pyx_v_d, PyObject *__pyx_v_e) {
- PyObject *__pyx_r = NULL;
- __Pyx_RefNannyDeclarations
- PyObject *__pyx_t_1 = NULL;
- int __pyx_lineno = 0;
- const char *__pyx_filename = NULL;
- int __pyx_clineno = 0;
- __Pyx_RefNannySetupContext("PyArray_MultiIterNew5", 0);
-
- /* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":747
- *
- * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e):
- * return PyArray_MultiIterNew(5, a, b, c, d, e) # <<<<<<<<<<<<<<
- *
- * cdef inline tuple PyDataType_SHAPE(dtype d):
- */
- __Pyx_XDECREF(__pyx_r);
- __pyx_t_1 = PyArray_MultiIterNew(5, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c), ((void *)__pyx_v_d), ((void *)__pyx_v_e)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 747, __pyx_L1_error)
- __Pyx_GOTREF(__pyx_t_1);
- __pyx_r = __pyx_t_1;
- __pyx_t_1 = 0;
- goto __pyx_L0;
-
- /* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":746
- * return PyArray_MultiIterNew(4, a, b, c, d)
- *
- * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): # <<<<<<<<<<<<<<
- * return PyArray_MultiIterNew(5, a, b, c, d, e)
- *
- */
-
- /* function exit code */
- __pyx_L1_error:;
- __Pyx_XDECREF(__pyx_t_1);
- __Pyx_AddTraceback("numpy.PyArray_MultiIterNew5", __pyx_clineno, __pyx_lineno, __pyx_filename);
- __pyx_r = 0;
- __pyx_L0:;
- __Pyx_XGIVEREF(__pyx_r);
- __Pyx_RefNannyFinishContext();
- return __pyx_r;
-}
-
-/* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":749
- * return PyArray_MultiIterNew(5, a, b, c, d, e)
- *
- * cdef inline tuple PyDataType_SHAPE(dtype d): # <<<<<<<<<<<<<<
- * if PyDataType_HASSUBARRAY(d):
- * return d.subarray.shape
- */
-
-static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyDataType_SHAPE(PyArray_Descr *__pyx_v_d) {
- PyObject *__pyx_r = NULL;
- __Pyx_RefNannyDeclarations
- int __pyx_t_1;
- __Pyx_RefNannySetupContext("PyDataType_SHAPE", 0);
-
- /* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":750
- *
- * cdef inline tuple PyDataType_SHAPE(dtype d):
- * if PyDataType_HASSUBARRAY(d): # <<<<<<<<<<<<<<
- * return d.subarray.shape
- * else:
- */
- __pyx_t_1 = (PyDataType_HASSUBARRAY(__pyx_v_d) != 0);
- if (__pyx_t_1) {
-
- /* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":751
- * cdef inline tuple PyDataType_SHAPE(dtype d):
- * if PyDataType_HASSUBARRAY(d):
- * return d.subarray.shape # <<<<<<<<<<<<<<
- * else:
- * return ()
- */
- __Pyx_XDECREF(__pyx_r);
- __Pyx_INCREF(((PyObject*)__pyx_v_d->subarray->shape));
- __pyx_r = ((PyObject*)__pyx_v_d->subarray->shape);
- goto __pyx_L0;
-
- /* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":750
- *
- * cdef inline tuple PyDataType_SHAPE(dtype d):
- * if PyDataType_HASSUBARRAY(d): # <<<<<<<<<<<<<<
- * return d.subarray.shape
- * else:
- */
- }
-
- /* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":753
- * return d.subarray.shape
- * else:
- * return () # <<<<<<<<<<<<<<
- *
- *
- */
- /*else*/ {
- __Pyx_XDECREF(__pyx_r);
- __Pyx_INCREF(__pyx_empty_tuple);
- __pyx_r = __pyx_empty_tuple;
- goto __pyx_L0;
- }
-
- /* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":749
- * return PyArray_MultiIterNew(5, a, b, c, d, e)
- *
- * cdef inline tuple PyDataType_SHAPE(dtype d): # <<<<<<<<<<<<<<
- * if PyDataType_HASSUBARRAY(d):
- * return d.subarray.shape
- */
-
- /* function exit code */
- __pyx_L0:;
- __Pyx_XGIVEREF(__pyx_r);
- __Pyx_RefNannyFinishContext();
- return __pyx_r;
-}
-
-/* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":868
- * int _import_umath() except -1
- *
- * cdef inline void set_array_base(ndarray arr, object base): # <<<<<<<<<<<<<<
- * Py_INCREF(base) # important to do this before stealing the reference below!
- * PyArray_SetBaseObject(arr, base)
- */
-
-static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_arr, PyObject *__pyx_v_base) {
- __Pyx_RefNannyDeclarations
- __Pyx_RefNannySetupContext("set_array_base", 0);
-
- /* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":869
- *
- * cdef inline void set_array_base(ndarray arr, object base):
- * Py_INCREF(base) # important to do this before stealing the reference below! # <<<<<<<<<<<<<<
- * PyArray_SetBaseObject(arr, base)
- *
- */
- Py_INCREF(__pyx_v_base);
-
- /* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":870
- * cdef inline void set_array_base(ndarray arr, object base):
- * Py_INCREF(base) # important to do this before stealing the reference below!
- * PyArray_SetBaseObject(arr, base) # <<<<<<<<<<<<<<
- *
- * cdef inline object get_array_base(ndarray arr):
- */
- (void)(PyArray_SetBaseObject(__pyx_v_arr, __pyx_v_base));
-
- /* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":868
- * int _import_umath() except -1
- *
- * cdef inline void set_array_base(ndarray arr, object base): # <<<<<<<<<<<<<<
- * Py_INCREF(base) # important to do this before stealing the reference below!
- * PyArray_SetBaseObject(arr, base)
- */
-
- /* function exit code */
- __Pyx_RefNannyFinishContext();
-}
-
-/* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":872
- * PyArray_SetBaseObject(arr, base)
- *
- * cdef inline object get_array_base(ndarray arr): # <<<<<<<<<<<<<<
- * base = PyArray_BASE(arr)
- * if base is NULL:
- */
-
-static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__pyx_v_arr) {
- PyObject *__pyx_v_base;
- PyObject *__pyx_r = NULL;
- __Pyx_RefNannyDeclarations
- int __pyx_t_1;
- __Pyx_RefNannySetupContext("get_array_base", 0);
-
- /* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":873
- *
- * cdef inline object get_array_base(ndarray arr):
- * base = PyArray_BASE(arr) # <<<<<<<<<<<<<<
- * if base is NULL:
- * return None
- */
- __pyx_v_base = PyArray_BASE(__pyx_v_arr);
-
- /* "../miniconda3/envs/p3.6/lib/python3.6/site-packages/numpy/__init__.pxd":874
- * cdef inline object get_array_base(ndarray arr):
- * base = PyArray_BASE(arr)
- * if base is NULL: # <<<<<<<<<<<<<<
- * return None
- * return