mirror of https://github.com/coqui-ai/TTS.git
Merge pull request #22 from eginhard/unique-chars
refactor(bin.find_unique_chars): use existing function
This commit is contained in:
commit
018daa002b
|
@ -30,6 +30,7 @@ jobs:
|
||||||
- name: Install Espeak
|
- name: Install Espeak
|
||||||
if: contains(fromJSON('["inference_tests", "test_text", "test_tts", "test_tts2", "test_xtts", "test_zoo0", "test_zoo1", "test_zoo2"]'), matrix.subset)
|
if: contains(fromJSON('["inference_tests", "test_text", "test_tts", "test_tts2", "test_xtts", "test_zoo0", "test_zoo1", "test_zoo2"]'), matrix.subset)
|
||||||
run: |
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
sudo apt-get install espeak espeak-ng
|
sudo apt-get install espeak espeak-ng
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
|
|
|
@ -4,7 +4,7 @@ import argparse
|
||||||
from argparse import RawTextHelpFormatter
|
from argparse import RawTextHelpFormatter
|
||||||
|
|
||||||
from TTS.config import load_config
|
from TTS.config import load_config
|
||||||
from TTS.tts.datasets import load_tts_samples
|
from TTS.tts.datasets import find_unique_chars, load_tts_samples
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
@ -29,17 +29,7 @@ def main():
|
||||||
)
|
)
|
||||||
|
|
||||||
items = train_items + eval_items
|
items = train_items + eval_items
|
||||||
|
find_unique_chars(items)
|
||||||
texts = "".join(item["text"] for item in items)
|
|
||||||
chars = set(texts)
|
|
||||||
lower_chars = filter(lambda c: c.islower(), chars)
|
|
||||||
chars_force_lower = [c.lower() for c in chars]
|
|
||||||
chars_force_lower = set(chars_force_lower)
|
|
||||||
|
|
||||||
print(f" > Number of unique characters: {len(chars)}")
|
|
||||||
print(f" > Unique characters: {''.join(sorted(chars))}")
|
|
||||||
print(f" > Unique lower characters: {''.join(sorted(lower_chars))}")
|
|
||||||
print(f" > Unique all forced to lower characters: {''.join(sorted(chars_force_lower))}")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
@ -167,7 +167,7 @@ def _get_formatter_by_name(name):
|
||||||
|
|
||||||
|
|
||||||
def find_unique_chars(data_samples, verbose=True):
|
def find_unique_chars(data_samples, verbose=True):
|
||||||
texts = "".join(item[0] for item in data_samples)
|
texts = "".join(item["text"] for item in data_samples)
|
||||||
chars = set(texts)
|
chars = set(texts)
|
||||||
lower_chars = filter(lambda c: c.islower(), chars)
|
lower_chars = filter(lambda c: c.islower(), chars)
|
||||||
chars_force_lower = [c.lower() for c in chars]
|
chars_force_lower = [c.lower() for c in chars]
|
||||||
|
|
Loading…
Reference in New Issue