mirror of https://github.com/coqui-ai/TTS.git
style: run pre-commit
Automatic changes from: pre-commit run --all-files
This commit is contained in:
parent
5cf1d41555
commit
ec50006855
|
@ -6,4 +6,4 @@ TTS.egg-info/
|
|||
tests/outputs/*
|
||||
tests/train_outputs/*
|
||||
__pycache__/
|
||||
*.pyc
|
||||
*.pyc
|
||||
|
|
|
@ -15,4 +15,3 @@ markComment: >
|
|||
for your contributions. You might also look our discussion channels.
|
||||
# Comment to post when closing a stale issue. Set to `false` to disable
|
||||
closeComment: false
|
||||
|
||||
|
|
|
@ -169,4 +169,4 @@ wandb
|
|||
depot/*
|
||||
coqui_recipes/*
|
||||
local_scripts/*
|
||||
coqui_demos/*
|
||||
coqui_demos/*
|
||||
|
|
|
@ -17,4 +17,4 @@ keywords:
|
|||
- deep learning
|
||||
- artificial intelligence
|
||||
- text to speech
|
||||
- TTS
|
||||
- TTS
|
||||
|
|
|
@ -119,11 +119,11 @@ This Code of Conduct is adapted from the [Contributor Covenant][homepage],
|
|||
version 2.0, available at
|
||||
[https://www.contributor-covenant.org/version/2/0/code_of_conduct.html][v2.0].
|
||||
|
||||
Community Impact Guidelines were inspired by
|
||||
Community Impact Guidelines were inspired by
|
||||
[Mozilla's code of conduct enforcement ladder][Mozilla CoC].
|
||||
|
||||
For answers to common questions about this code of conduct, see the FAQ at
|
||||
[https://www.contributor-covenant.org/faq][FAQ]. Translations are available
|
||||
[https://www.contributor-covenant.org/faq][FAQ]. Translations are available
|
||||
at [https://www.contributor-covenant.org/translations][translations].
|
||||
|
||||
[homepage]: https://www.contributor-covenant.org
|
||||
|
|
|
@ -35,7 +35,7 @@ Mozilla Public License Version 2.0
|
|||
means any form of the work other than Source Code Form.
|
||||
|
||||
1.7. "Larger Work"
|
||||
means a work that combines Covered Software with other material, in
|
||||
means a work that combines Covered Software with other material, in
|
||||
a separate file or files, that is not Covered Software.
|
||||
|
||||
1.8. "License"
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
faster_whisper==0.9.0
|
||||
gradio==4.7.1
|
||||
gradio==4.7.1
|
||||
|
|
|
@ -128,4 +128,4 @@
|
|||
|
||||
</body>
|
||||
|
||||
</html>
|
||||
</html>
|
||||
|
|
|
@ -1 +1 @@
|
|||
{"version":"1.0","truncation":null,"padding":null,"added_tokens":[{"id":0,"special":true,"content":"[STOP]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":1,"special":true,"content":"[UNK]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":2,"special":true,"content":"[SPACE]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false}],"normalizer":null,"pre_tokenizer":{"type":"Whitespace"},"post_processor":null,"decoder":null,"model":{"type":"BPE","dropout":null,"unk_token":"[UNK]","continuing_subword_prefix":null,"end_of_word_suffix":null,"fuse_unk":false,"vocab":{"[STOP]":0,"[UNK]":1,"[SPACE]":2,"!":3,"'":4,"(":5,")":6,",":7,"-":8,".":9,"/":10,":":11,";":12,"?":13,"a":14,"b":15,"c":16,"d":17,"e":18,"f":19,"g":20,"h":21,"i":22,"j":23,"k":24,"l":25,"m":26,"n":27,"o":28,"p":29,"q":30,"r":31,"s":32,"t":33,"u":34,"v":35,"w":36,"x":37,"y":38,"z":39,"th":40,"in":41,"the":42,"an":43,"er":44,"ou":45,"re":46,"on":47,"at":48,"ed":49,"en":50,"to":51,"ing":52,"and":53,"is":54,"as":55,"al":56,"or":57,"of":58,"ar":59,"it":60,"es":61,"he":62,"st":63,"le":64,"om":65,"se":66,"be":67,"ad":68,"ow":69,"ly":70,"ch":71,"wh":72,"that":73,"you":74,"li":75,"ve":76,"ac":77,"ti":78,"ld":79,"me":80,"was":81,"gh":82,"id":83,"ll":84,"wi":85,"ent":86,"for":87,"ay":88,"ro":89,"ver":90,"ic":91,"her":92,"ke":93,"his":94,"no":95,"ut":96,"un":97,"ir":98,"lo":99,"we":100,"ri":101,"ha":102,"with":103,"ght":104,"out":105,"im":106,"ion":107,"all":108,"ab":109,"one":110,"ne":111,"ge":112,"ould":113,"ter":114,"mo":115,"had":116,"ce":117,"she":118,"go":119,"sh":120,"ur":121,"am":122,"so":123,"pe":124,"my":125,"de":126,"are":127,"but":128,"ome":129,"fr":130,"ther":131,"fe":132,"su":133,"do":134,"con":135,"te":136,"ain":137,"ere":138,"po":139,"if":140,"they":141,"us":142,"ag":143,"tr":144,"now":145,"oun":146,"this":147,"have":148,"not":149,"sa":150,"il":151,"up":152,"thing":153,"from":154,"ap":155,"him":156,"ack":157,"ation":158,"ant":159,"our":160,"op":161,"like":162,"ust":163,"ess":164,"bo":165,"ok":166,"ul":167,"ind":168,"ex":169,"com":170,"some":171,"there":172,"ers":173,"co":174,"res":175,"man":176,"ard":177,"pl":178,"wor":179,"way":180,"tion":181,"fo":182,"ca":183,"were":184,"by":185,"ate":186,"pro":187,"ted":188,"ound":189,"own":190,"would":191,"ts":192,"what":193,"qu":194,"ally":195,"ight":196,"ck":197,"gr":198,"when":199,"ven":200,"can":201,"ough":202,"ine":203,"end":204,"per":205,"ous":206,"od":207,"ide":208,"know":209,"ty":210,"very":211,"si":212,"ak":213,"who":214,"about":215,"ill":216,"them":217,"est":218,"red":219,"ye":220,"could":221,"ong":222,"your":223,"their":224,"em":225,"just":226,"other":227,"into":228,"any":229,"whi":230,"um":231,"tw":232,"ast":233,"der":234,"did":235,"ie":236,"been":237,"ace":238,"ink":239,"ity":240,"back":241,"ting":242,"br":243,"more":244,"ake":245,"pp":246,"then":247,"sp":248,"el":249,"use":250,"bl":251,"said":252,"over":253,"get":254},"merges":["t h","i n","th e","a n","e r","o u","r e","o n","a t","e d","e n","t o","in g","an d","i s","a s","a l","o r","o f","a r","i t","e s","h e","s t","l e","o m","s e","b e","a d","o w","l y","c h","w h","th at","y ou","l i","v e","a c","t i","l d","m e","w as","g h","i d","l l","w i","en t","f or","a y","r o","v er","i c","h er","k e","h is","n o","u t","u n","i r","l o","w e","r i","h a","wi th","gh t","ou t","i m","i on","al l","a b","on e","n e","g e","ou ld","t er","m o","h ad","c e","s he","g o","s h","u r","a m","s o","p e","m y","d e","a re","b ut","om e","f r","the r","f e","s u","d o","c on","t e","a in","er e","p o","i f","the y","u s","a g","t r","n ow","ou n","th is","ha ve","no t","s a","i l","u p","th ing","fr om","a p","h im","ac k","at ion","an t","ou r","o p","li ke","u st","es s","b o","o k","u l","in d","e x","c om","s ome","the re","er s","c o","re s","m an","ar d","p l","w or","w ay","ti on","f o","c a","w ere","b y","at e","p ro","t ed","oun d","ow n","w ould","t s","wh at","q u","al ly","i ght","c k","g r","wh en","v en","c an","ou gh","in e","en d","p er","ou s","o d","id e","k now","t y","ver y","s i","a k","wh o","ab out","i ll","the m","es t","re d","y e","c ould","on g","you r","the ir","e m","j ust","o ther","in to","an y","wh i","u m","t w","as t","d er","d id","i e","be en","ac e","in k","it y","b ack","t ing","b r","mo re","a ke","p p","the n","s p","e l","u se","b l","sa id","o ver","ge t"]}}
|
||||
{"version":"1.0","truncation":null,"padding":null,"added_tokens":[{"id":0,"special":true,"content":"[STOP]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":1,"special":true,"content":"[UNK]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":2,"special":true,"content":"[SPACE]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false}],"normalizer":null,"pre_tokenizer":{"type":"Whitespace"},"post_processor":null,"decoder":null,"model":{"type":"BPE","dropout":null,"unk_token":"[UNK]","continuing_subword_prefix":null,"end_of_word_suffix":null,"fuse_unk":false,"vocab":{"[STOP]":0,"[UNK]":1,"[SPACE]":2,"!":3,"'":4,"(":5,")":6,",":7,"-":8,".":9,"/":10,":":11,";":12,"?":13,"a":14,"b":15,"c":16,"d":17,"e":18,"f":19,"g":20,"h":21,"i":22,"j":23,"k":24,"l":25,"m":26,"n":27,"o":28,"p":29,"q":30,"r":31,"s":32,"t":33,"u":34,"v":35,"w":36,"x":37,"y":38,"z":39,"th":40,"in":41,"the":42,"an":43,"er":44,"ou":45,"re":46,"on":47,"at":48,"ed":49,"en":50,"to":51,"ing":52,"and":53,"is":54,"as":55,"al":56,"or":57,"of":58,"ar":59,"it":60,"es":61,"he":62,"st":63,"le":64,"om":65,"se":66,"be":67,"ad":68,"ow":69,"ly":70,"ch":71,"wh":72,"that":73,"you":74,"li":75,"ve":76,"ac":77,"ti":78,"ld":79,"me":80,"was":81,"gh":82,"id":83,"ll":84,"wi":85,"ent":86,"for":87,"ay":88,"ro":89,"ver":90,"ic":91,"her":92,"ke":93,"his":94,"no":95,"ut":96,"un":97,"ir":98,"lo":99,"we":100,"ri":101,"ha":102,"with":103,"ght":104,"out":105,"im":106,"ion":107,"all":108,"ab":109,"one":110,"ne":111,"ge":112,"ould":113,"ter":114,"mo":115,"had":116,"ce":117,"she":118,"go":119,"sh":120,"ur":121,"am":122,"so":123,"pe":124,"my":125,"de":126,"are":127,"but":128,"ome":129,"fr":130,"ther":131,"fe":132,"su":133,"do":134,"con":135,"te":136,"ain":137,"ere":138,"po":139,"if":140,"they":141,"us":142,"ag":143,"tr":144,"now":145,"oun":146,"this":147,"have":148,"not":149,"sa":150,"il":151,"up":152,"thing":153,"from":154,"ap":155,"him":156,"ack":157,"ation":158,"ant":159,"our":160,"op":161,"like":162,"ust":163,"ess":164,"bo":165,"ok":166,"ul":167,"ind":168,"ex":169,"com":170,"some":171,"there":172,"ers":173,"co":174,"res":175,"man":176,"ard":177,"pl":178,"wor":179,"way":180,"tion":181,"fo":182,"ca":183,"were":184,"by":185,"ate":186,"pro":187,"ted":188,"ound":189,"own":190,"would":191,"ts":192,"what":193,"qu":194,"ally":195,"ight":196,"ck":197,"gr":198,"when":199,"ven":200,"can":201,"ough":202,"ine":203,"end":204,"per":205,"ous":206,"od":207,"ide":208,"know":209,"ty":210,"very":211,"si":212,"ak":213,"who":214,"about":215,"ill":216,"them":217,"est":218,"red":219,"ye":220,"could":221,"ong":222,"your":223,"their":224,"em":225,"just":226,"other":227,"into":228,"any":229,"whi":230,"um":231,"tw":232,"ast":233,"der":234,"did":235,"ie":236,"been":237,"ace":238,"ink":239,"ity":240,"back":241,"ting":242,"br":243,"more":244,"ake":245,"pp":246,"then":247,"sp":248,"el":249,"use":250,"bl":251,"said":252,"over":253,"get":254},"merges":["t h","i n","th e","a n","e r","o u","r e","o n","a t","e d","e n","t o","in g","an d","i s","a s","a l","o r","o f","a r","i t","e s","h e","s t","l e","o m","s e","b e","a d","o w","l y","c h","w h","th at","y ou","l i","v e","a c","t i","l d","m e","w as","g h","i d","l l","w i","en t","f or","a y","r o","v er","i c","h er","k e","h is","n o","u t","u n","i r","l o","w e","r i","h a","wi th","gh t","ou t","i m","i on","al l","a b","on e","n e","g e","ou ld","t er","m o","h ad","c e","s he","g o","s h","u r","a m","s o","p e","m y","d e","a re","b ut","om e","f r","the r","f e","s u","d o","c on","t e","a in","er e","p o","i f","the y","u s","a g","t r","n ow","ou n","th is","ha ve","no t","s a","i l","u p","th ing","fr om","a p","h im","ac k","at ion","an t","ou r","o p","li ke","u st","es s","b o","o k","u l","in d","e x","c om","s ome","the re","er s","c o","re s","m an","ar d","p l","w or","w ay","ti on","f o","c a","w ere","b y","at e","p ro","t ed","oun d","ow n","w ould","t s","wh at","q u","al ly","i ght","c k","g r","wh en","v en","c an","ou gh","in e","en d","p er","ou s","o d","id e","k now","t y","ver y","s i","a k","wh o","ab out","i ll","the m","es t","re d","y e","c ould","on g","you r","the ir","e m","j ust","o ther","in to","an y","wh i","u m","t w","as t","d er","d id","i e","be en","ac e","in k","it y","b ack","t ing","b r","mo re","a ke","p p","the n","s p","e l","u se","b l","sa id","o ver","ge t"]}}
|
||||
|
|
|
@ -96,4 +96,4 @@
|
|||
"transformers_version": "4.15.0.dev0",
|
||||
"use_weighted_layer_sum": false,
|
||||
"vocab_size": 32
|
||||
}
|
||||
}
|
||||
|
|
|
@ -41,4 +41,3 @@ COPY . /root
|
|||
|
||||
# Installing the TTS package itself:
|
||||
RUN make install
|
||||
|
||||
|
|
|
@ -3,4 +3,4 @@ myst-parser == 2.0.0
|
|||
sphinx == 7.2.5
|
||||
sphinx_inline_tabs
|
||||
sphinx_copybutton
|
||||
linkify-it-py
|
||||
linkify-it-py
|
||||
|
|
|
@ -14,15 +14,15 @@ import importlib.metadata
|
|||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.abspath('../..'))
|
||||
sys.path.insert(0, os.path.abspath("../.."))
|
||||
|
||||
# mock deps with system level requirements.
|
||||
autodoc_mock_imports = ["soundfile"]
|
||||
|
||||
# -- Project information -----------------------------------------------------
|
||||
project = 'TTS'
|
||||
project = "TTS"
|
||||
copyright = "2021 Coqui GmbH, 2020 TTS authors"
|
||||
author = 'Coqui GmbH'
|
||||
author = "Coqui GmbH"
|
||||
|
||||
# The version info for the project you're documenting, acts as replacement for
|
||||
# |version| and |release|, also used in various other places throughout the
|
||||
|
@ -38,32 +38,34 @@ master_doc = "index"
|
|||
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
|
||||
# ones.
|
||||
extensions = [
|
||||
'sphinx.ext.autodoc',
|
||||
'sphinx.ext.autosummary',
|
||||
'sphinx.ext.doctest',
|
||||
'sphinx.ext.intersphinx',
|
||||
'sphinx.ext.todo',
|
||||
'sphinx.ext.coverage',
|
||||
'sphinx.ext.napoleon',
|
||||
'sphinx.ext.viewcode',
|
||||
'sphinx.ext.autosectionlabel',
|
||||
'myst_parser',
|
||||
"sphinx.ext.autodoc",
|
||||
"sphinx.ext.autosummary",
|
||||
"sphinx.ext.doctest",
|
||||
"sphinx.ext.intersphinx",
|
||||
"sphinx.ext.todo",
|
||||
"sphinx.ext.coverage",
|
||||
"sphinx.ext.napoleon",
|
||||
"sphinx.ext.viewcode",
|
||||
"sphinx.ext.autosectionlabel",
|
||||
"myst_parser",
|
||||
"sphinx_copybutton",
|
||||
"sphinx_inline_tabs",
|
||||
]
|
||||
|
||||
|
||||
# Add any paths that contain templates here, relative to this directory.
|
||||
templates_path = ['_templates']
|
||||
templates_path = ["_templates"]
|
||||
|
||||
# List of patterns, relative to source directory, that match files and
|
||||
# directories to ignore when looking for source files.
|
||||
# This pattern also affects html_static_path and html_extra_path.
|
||||
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'TODO/*']
|
||||
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "TODO/*"]
|
||||
|
||||
source_suffix = [".rst", ".md"]
|
||||
|
||||
myst_enable_extensions = ['linkify',]
|
||||
myst_enable_extensions = [
|
||||
"linkify",
|
||||
]
|
||||
|
||||
# 'sphinxcontrib.katex',
|
||||
# 'sphinx.ext.autosectionlabel',
|
||||
|
@ -74,17 +76,17 @@ myst_enable_extensions = ['linkify',]
|
|||
# duplicated section names that are in different documents.
|
||||
autosectionlabel_prefix_document = True
|
||||
|
||||
language = 'en'
|
||||
language = "en"
|
||||
|
||||
autodoc_inherit_docstrings = False
|
||||
|
||||
# Disable displaying type annotations, these can be very verbose
|
||||
autodoc_typehints = 'none'
|
||||
autodoc_typehints = "none"
|
||||
|
||||
# Enable overriding of function signatures in the first line of the docstring.
|
||||
autodoc_docstring_signature = True
|
||||
|
||||
napoleon_custom_sections = [('Shapes', 'shape')]
|
||||
napoleon_custom_sections = [("Shapes", "shape")]
|
||||
|
||||
|
||||
# -- Options for HTML output -------------------------------------------------
|
||||
|
@ -92,7 +94,7 @@ napoleon_custom_sections = [('Shapes', 'shape')]
|
|||
# The theme to use for HTML and HTML Help pages. See the documentation for
|
||||
# a list of builtin themes.
|
||||
#
|
||||
html_theme = 'furo'
|
||||
html_theme = "furo"
|
||||
html_tite = "TTS"
|
||||
html_theme_options = {
|
||||
"light_logo": "logo.png",
|
||||
|
@ -101,18 +103,18 @@ html_theme_options = {
|
|||
}
|
||||
|
||||
html_sidebars = {
|
||||
'**': [
|
||||
"sidebar/scroll-start.html",
|
||||
"sidebar/brand.html",
|
||||
"sidebar/search.html",
|
||||
"sidebar/navigation.html",
|
||||
"sidebar/ethical-ads.html",
|
||||
"sidebar/scroll-end.html",
|
||||
]
|
||||
}
|
||||
"**": [
|
||||
"sidebar/scroll-start.html",
|
||||
"sidebar/brand.html",
|
||||
"sidebar/search.html",
|
||||
"sidebar/navigation.html",
|
||||
"sidebar/ethical-ads.html",
|
||||
"sidebar/scroll-end.html",
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
# Add any paths that contain custom static files (such as style sheets) here,
|
||||
# relative to this directory. They are copied after the builtin static files,
|
||||
# so a file named "default.css" will overwrite the builtin "default.css".
|
||||
html_static_path = ['_static']
|
||||
html_static_path = ["_static"]
|
||||
|
|
|
@ -53,4 +53,4 @@ python3 TTS/server/server.py --list_models #To get the list of available models
|
|||
python3 TTS/server/server.py --model_name tts_models/en/vctk/vits --use_cuda true
|
||||
```
|
||||
|
||||
Click [there](http://[::1]:5002/) and have fun with the server!
|
||||
Click [there](http://[::1]:5002/) and have fun with the server!
|
||||
|
|
|
@ -111,4 +111,3 @@ them and fine-tune it for your own dataset. This will help you in two main ways:
|
|||
--coqpit.run_name "glow-tts-finetune" \
|
||||
--coqpit.lr 0.00001
|
||||
```
|
||||
|
||||
|
|
|
@ -22,4 +22,4 @@ also must inherit or initiate `BaseAudioConfig`.
|
|||
```{eval-rst}
|
||||
.. autoclass:: TTS.config.shared_configs.BaseAudioConfig
|
||||
:members:
|
||||
```
|
||||
```
|
||||
|
|
|
@ -22,4 +22,4 @@
|
|||
```{eval-rst}
|
||||
.. autoclass:: TTS.vocoder.datasets.wavernn_dataset.WaveRNNDataset
|
||||
:members:
|
||||
```
|
||||
```
|
||||
|
|
|
@ -9,4 +9,4 @@ to do its ✨️.
|
|||
```{eval-rst}
|
||||
.. autoclass:: TTS.vocoder.models.gan.GAN
|
||||
:members:
|
||||
```
|
||||
```
|
||||
|
|
|
@ -21,4 +21,4 @@ Model API provides you a set of functions that easily make your model compatible
|
|||
```{eval-rst}
|
||||
.. autoclass:: TTS.vocoder.models.base_vocoder.BaseVocoder
|
||||
:members:
|
||||
```
|
||||
```
|
||||
|
|
|
@ -8,4 +8,4 @@ especially useful for multi-speaker models.
|
|||
```{eval-rst}
|
||||
.. automodule:: TTS.tts.utils.speakers
|
||||
:members:
|
||||
```
|
||||
```
|
||||
|
|
|
@ -61,5 +61,3 @@ Currently we provide the following pre-configured architectures:
|
|||
.. autoclass:: TTS.tts.configs.fast_speech_config.FastSpeechConfig
|
||||
:members:
|
||||
```
|
||||
|
||||
|
||||
|
|
|
@ -33,4 +33,4 @@ are available at https://shivammehta25.github.io/OverFlow/.
|
|||
```{eval-rst}
|
||||
.. autoclass:: TTS.tts.models.overflow.Overflow
|
||||
:members:
|
||||
```
|
||||
```
|
||||
|
|
|
@ -59,5 +59,3 @@ If you have a limited VRAM, then you can try using the Guided Attention Loss or
|
|||
.. autoclass:: TTS.tts.configs.tacotron2_config.Tacotron2Config
|
||||
:members:
|
||||
```
|
||||
|
||||
|
||||
|
|
|
@ -17,4 +17,4 @@ If you like to use a bespoken dataset, you might like to perform a couple of qua
|
|||
* **CheckSpectrograms** is to measure the noise level of the clips and find good audio processing parameters. The noise level might be observed by checking spectrograms. If spectrograms look cluttered, especially in silent parts, this dataset might not be a good candidate for a TTS project. If your voice clips are too noisy in the background, it makes things harder for your model to learn the alignment, and the final result might be different than the voice you are given.
|
||||
If the spectrograms look good, then the next step is to find a good set of audio processing parameters, defined in ```config.json```. In the notebook, you can compare different sets of parameters and see the resynthesis results in relation to the given ground-truth. Find the best parameters that give the best possible synthesis performance.
|
||||
|
||||
Another practical detail is the quantization level of the clips. If your dataset has a very high bit-rate, that might cause slow data-load time and consequently slow training. It is better to reduce the sample-rate of your dataset to around 16000-22050.
|
||||
Another practical detail is the quantization level of the clips. If your dataset has a very high bit-rate, that might cause slow data-load time and consequently slow training. It is better to reduce the sample-rate of your dataset to around 16000-22050.
|
||||
|
|
27
hubconf.py
27
hubconf.py
|
@ -1,15 +1,11 @@
|
|||
dependencies = [
|
||||
'torch', 'gdown', 'pysbd', 'gruut', 'anyascii', 'pypinyin', 'coqpit', 'mecab-python3', 'unidic-lite'
|
||||
]
|
||||
dependencies = ["torch", "gdown", "pysbd", "gruut", "anyascii", "pypinyin", "coqpit", "mecab-python3", "unidic-lite"]
|
||||
import torch
|
||||
|
||||
from TTS.utils.manage import ModelManager
|
||||
from TTS.utils.synthesizer import Synthesizer
|
||||
|
||||
|
||||
def tts(model_name='tts_models/en/ljspeech/tacotron2-DCA',
|
||||
vocoder_name=None,
|
||||
use_cuda=False):
|
||||
def tts(model_name="tts_models/en/ljspeech/tacotron2-DCA", vocoder_name=None, use_cuda=False):
|
||||
"""TTS entry point for PyTorch Hub that provides a Synthesizer object to synthesize speech from a give text.
|
||||
|
||||
Example:
|
||||
|
@ -28,19 +24,20 @@ def tts(model_name='tts_models/en/ljspeech/tacotron2-DCA',
|
|||
manager = ModelManager()
|
||||
|
||||
model_path, config_path, model_item = manager.download_model(model_name)
|
||||
vocoder_name = model_item[
|
||||
'default_vocoder'] if vocoder_name is None else vocoder_name
|
||||
vocoder_name = model_item["default_vocoder"] if vocoder_name is None else vocoder_name
|
||||
vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)
|
||||
|
||||
# create synthesizer
|
||||
synt = Synthesizer(tts_checkpoint=model_path,
|
||||
tts_config_path=config_path,
|
||||
vocoder_checkpoint=vocoder_path,
|
||||
vocoder_config=vocoder_config_path,
|
||||
use_cuda=use_cuda)
|
||||
synt = Synthesizer(
|
||||
tts_checkpoint=model_path,
|
||||
tts_config_path=config_path,
|
||||
vocoder_checkpoint=vocoder_path,
|
||||
vocoder_config=vocoder_config_path,
|
||||
use_cuda=use_cuda,
|
||||
)
|
||||
return synt
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
synthesizer = torch.hub.load('coqui-ai/TTS:dev', 'tts', source='github')
|
||||
if __name__ == "__main__":
|
||||
synthesizer = torch.hub.load("coqui-ai/TTS:dev", "tts", source="github")
|
||||
synthesizer.tts("This is a test!")
|
||||
|
|
|
@ -185,4 +185,4 @@
|
|||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
}
|
||||
|
|
|
@ -176,4 +176,4 @@
|
|||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,6 +2,6 @@
|
|||
|
||||
By the use of this notebook, you can easily analyze a brand new dataset, find exceptional cases and define your training set.
|
||||
|
||||
What we are looking in here is reasonable distribution of instances in terms of sequence-length, audio-length and word-coverage.
|
||||
What we are looking in here is reasonable distribution of instances in terms of sequence-length, audio-length and word-coverage.
|
||||
|
||||
This notebook is inspired from https://github.com/MycroftAI/mimic2
|
||||
|
|
|
@ -19,4 +19,4 @@ python TTS/bin/resample.py --input_dir recipes/vctk/VCTK/wav48_silence_trimmed -
|
|||
|
||||
If you train a new model using TTS, feel free to share your training to expand the list of recipes.
|
||||
|
||||
You can also open a new discussion and share your progress with the 🐸 community.
|
||||
You can also open a new discussion and share your progress with the 🐸 community.
|
||||
|
|
|
@ -9,4 +9,4 @@ To get a license and download link for this dataset, you need to visit the [webs
|
|||
You get access to the raw dataset in a couple of days. There are a few preprocessing steps you need to do to be able to use the high fidelity dataset.
|
||||
|
||||
1. Get the forced time alignments for the blizzard dataset from [here](https://github.com/mueller91/tts_alignments).
|
||||
2. Segment the high fidelity audio-book files based on the instructions [here](https://github.com/Tomiinek/Blizzard2013_Segmentation).
|
||||
2. Segment the high fidelity audio-book files based on the instructions [here](https://github.com/Tomiinek/Blizzard2013_Segmentation).
|
||||
|
|
|
@ -20,4 +20,4 @@ CUDA_VISIBLE_DEVICES="0" python TTS/bin/train_tts.py --config_path $RUN_DIR/taco
|
|||
--coqpit.output_path $RUN_DIR \
|
||||
--coqpit.datasets.0.path $RUN_DIR/$CORPUS \
|
||||
--coqpit.audio.stats_path $RUN_DIR/scale_stats.npy \
|
||||
--coqpit.phoneme_cache_path $RUN_DIR/phoneme_cache \
|
||||
--coqpit.phoneme_cache_path $RUN_DIR/phoneme_cache \
|
||||
|
|
|
@ -122,4 +122,4 @@
|
|||
"use_gst": false,
|
||||
"use_external_speaker_embedding_file": false,
|
||||
"external_speaker_embedding_file": "../../speakers-vctk-en.json"
|
||||
}
|
||||
}
|
||||
|
|
|
@ -11,4 +11,4 @@ shuf LJSpeech-1.1/metadata.csv > LJSpeech-1.1/metadata_shuf.csv
|
|||
head -n 12000 LJSpeech-1.1/metadata_shuf.csv > LJSpeech-1.1/metadata_train.csv
|
||||
tail -n 1100 LJSpeech-1.1/metadata_shuf.csv > LJSpeech-1.1/metadata_val.csv
|
||||
mv LJSpeech-1.1 $RUN_DIR/recipes/ljspeech/
|
||||
rm LJSpeech-1.1.tar.bz2
|
||||
rm LJSpeech-1.1.tar.bz2
|
||||
|
|
|
@ -4,4 +4,3 @@ BASEDIR=$(dirname "$0")
|
|||
echo "$BASEDIR"
|
||||
# run training
|
||||
CUDA_VISIBLE_DEVICES="" python TTS/bin/compute_statistics.py --config_path $BASEDIR/../inputs/test_glow_tts.json --out_path $BASEDIR/../outputs/scale_stats.npy
|
||||
|
||||
|
|
|
@ -100222,5 +100222,5 @@
|
|||
0.04999300092458725,
|
||||
-0.12125937640666962
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,4 +6,4 @@ wavs/LJ001-0004.flac|produced the block books, which were the immediate predeces
|
|||
wavs/LJ001-0005.flac|the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing.|the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing.|ljspeech-2
|
||||
wavs/LJ001-0006.flac|And it is worth mention in passing that, as an example of fine typography,|And it is worth mention in passing that, as an example of fine typography,|ljspeech-2
|
||||
wavs/LJ001-0007.flac|the earliest book printed with movable types, the Gutenberg, or "forty-two line Bible" of about 1455,|the earliest book printed with movable types, the Gutenberg, or "forty-two line Bible" of about fourteen fifty-five,|ljspeech-3
|
||||
wavs/LJ001-0008.flac|has never been surpassed.|has never been surpassed.|ljspeech-3
|
||||
wavs/LJ001-0008.flac|has never been surpassed.|has never been surpassed.|ljspeech-3
|
||||
|
|
Can't render this file because it contains an unexpected character in line 8 and column 86.
|
|
@ -6,4 +6,4 @@ wavs/LJ001-0004.mp3|produced the block books, which were the immediate predecess
|
|||
wavs/LJ001-0005.mp3|the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing.|the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing.|ljspeech-2
|
||||
wavs/LJ001-0006.mp3|And it is worth mention in passing that, as an example of fine typography,|And it is worth mention in passing that, as an example of fine typography,|ljspeech-2
|
||||
wavs/LJ001-0007.mp3|the earliest book printed with movable types, the Gutenberg, or "forty-two line Bible" of about 1455,|the earliest book printed with movable types, the Gutenberg, or "forty-two line Bible" of about fourteen fifty-five,|ljspeech-3
|
||||
wavs/LJ001-0008.mp3|has never been surpassed.|has never been surpassed.|ljspeech-3
|
||||
wavs/LJ001-0008.mp3|has never been surpassed.|has never been surpassed.|ljspeech-3
|
||||
|
|
Can't render this file because it contains an unexpected character in line 8 and column 85.
|
|
@ -6,4 +6,4 @@ wavs/LJ001-0004.wav|produced the block books, which were the immediate predecess
|
|||
wavs/LJ001-0005.wav|the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing.|the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing.|ljspeech-2
|
||||
wavs/LJ001-0006.wav|And it is worth mention in passing that, as an example of fine typography,|And it is worth mention in passing that, as an example of fine typography,|ljspeech-2
|
||||
wavs/LJ001-0007.wav|the earliest book printed with movable types, the Gutenberg, or "forty-two line Bible" of about 1455,|the earliest book printed with movable types, the Gutenberg, or "forty-two line Bible" of about fourteen fifty-five,|ljspeech-3
|
||||
wavs/LJ001-0008.wav|has never been surpassed.|has never been surpassed.|ljspeech-3
|
||||
wavs/LJ001-0008.wav|has never been surpassed.|has never been surpassed.|ljspeech-3
|
||||
|
|
Can't render this file because it contains an unexpected character in line 8 and column 85.
|
|
@ -1,6 +1,6 @@
|
|||
client_id path sentence up_votes down_votes age gender accent locale segment
|
||||
95324d489b122a800b840e0b0d068f7363a1a6c2cd2e7365672cc7033e38deaa794bd59edcf8196aa35c9791652b9085ac3839a98bb50ebab4a1e8538a94846b common_voice_en_20005954.mp3 The applicants are invited for coffee and visa is given immediately. 3 0 en
|
||||
95324d489b122a800b840e0b0d068f7363a1a6c2cd2e7365672cc7033e38deaa794bd59edcf8196aa35c9791652b9085ac3839a98bb50ebab4a1e8538a94846b common_voice_en_20005955.mp3 Developmental robotics is related to, but differs from, evolutionary robotics. 2 0 en
|
||||
95324d489b122a800b840e0b0d068f7363a1a6c2cd2e7365672cc7033e38deaa794bd59edcf8196aa35c9791652b9085ac3839a98bb50ebab4a1e8538a94846b common_voice_en_20005956.mp3 The musical was originally directed and choreographed by Alan Lund. 2 0 en
|
||||
954a4181ae9fba89d1b1570f2ae148b3ee18ee2311de978e698f598db859f830d93d35574596d713518e8c96cdae01fce7a08c60c2e0a22bcf01e020924440a6 common_voice_en_19737073.mp3 He graduated from Columbia High School, in Brown County, South Dakota. 2 0 en
|
||||
954a4181ae9fba89d1b1570f2ae148b3ee18ee2311de978e698f598db859f830d93d35574596d713518e8c96cdae01fce7a08c60c2e0a22bcf01e020924440a6 common_voice_en_19737074.mp3 Competition for limited resources has also resulted in some local conflicts. 2 0 en
|
||||
95324d489b122a800b840e0b0d068f7363a1a6c2cd2e7365672cc7033e38deaa794bd59edcf8196aa35c9791652b9085ac3839a98bb50ebab4a1e8538a94846b common_voice_en_20005954.mp3 The applicants are invited for coffee and visa is given immediately. 3 0 en
|
||||
95324d489b122a800b840e0b0d068f7363a1a6c2cd2e7365672cc7033e38deaa794bd59edcf8196aa35c9791652b9085ac3839a98bb50ebab4a1e8538a94846b common_voice_en_20005955.mp3 Developmental robotics is related to, but differs from, evolutionary robotics. 2 0 en
|
||||
95324d489b122a800b840e0b0d068f7363a1a6c2cd2e7365672cc7033e38deaa794bd59edcf8196aa35c9791652b9085ac3839a98bb50ebab4a1e8538a94846b common_voice_en_20005956.mp3 The musical was originally directed and choreographed by Alan Lund. 2 0 en
|
||||
954a4181ae9fba89d1b1570f2ae148b3ee18ee2311de978e698f598db859f830d93d35574596d713518e8c96cdae01fce7a08c60c2e0a22bcf01e020924440a6 common_voice_en_19737073.mp3 He graduated from Columbia High School, in Brown County, South Dakota. 2 0 en
|
||||
954a4181ae9fba89d1b1570f2ae148b3ee18ee2311de978e698f598db859f830d93d35574596d713518e8c96cdae01fce7a08c60c2e0a22bcf01e020924440a6 common_voice_en_19737074.mp3 Competition for limited resources has also resulted in some local conflicts. 2 0 en
|
||||
|
|
Can't render this file because it has a wrong number of fields in line 2.
|
|
@ -98,5 +98,3 @@
|
|||
"gst_style_tokens": 10
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -2,4 +2,4 @@
|
|||
"en": 0,
|
||||
"fr-fr": 1,
|
||||
"pt-br": 2
|
||||
}
|
||||
}
|
||||
|
|
|
@ -155,4 +155,4 @@
|
|||
"meta_file_attn_mask": null
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
|
|
@ -58,4 +58,4 @@
|
|||
"storage_size": 15 // the size of the in-memory storage with respect to a single batch
|
||||
},
|
||||
"datasets":null
|
||||
}
|
||||
}
|
||||
|
|
|
@ -152,4 +152,4 @@
|
|||
"meta_file_attn_mask": "tests/data/ljspeech/metadata_attn_mask.txt"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,4 +21,3 @@
|
|||
"do_trim_silence": false
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -163,4 +163,3 @@
|
|||
// PATHS
|
||||
"output_path": "tests/train_outputs/"
|
||||
}
|
||||
|
||||
|
|
|
@ -113,4 +113,3 @@
|
|||
// PATHS
|
||||
"output_path": "tests/train_outputs/"
|
||||
}
|
||||
|
||||
|
|
|
@ -109,4 +109,3 @@
|
|||
// PATHS
|
||||
"output_path": "tests/train_outputs/"
|
||||
}
|
||||
|
||||
|
|
|
@ -12666,4 +12666,4 @@
|
|||
"da kara"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue