Use packaging.version for version comparisons (#2310)

* Use packaging.version for version comparisons

The distutils package is deprecated¹ and relies on PEP 386² version
comparisons, which have been superseded by PEP 440³ which is implemented
through the packaging module.

With more recent distutils versions, provided through setuptools
vendoring, we are seeing the following exception during version
comparisons:

> TypeError: '<' not supported between instances of 'str' and 'int'

This is fixed by this migration.

[1] https://docs.python.org/3/library/distutils.html
[2] https://peps.python.org/pep-0386/
[3] https://peps.python.org/pep-0440/

* Improve espeak version detection robustness

On many modern systems espeak is just a symlink to espeak-ng. In that
case looking for the 3rd word in the version output will break the
version comparison, when it finds `text-to-speech:`, instead of a proper
version.

This will not break during runtime, where espeak-ng would be
prioritized, but the phonemizer and tokenizer tests force the backend
to `espeak`, which exhibits this breakage.

This improves the version detection by simply looking for the version
after the "text-to-speech:" token.

* Replace distuils.copy_tree with shutil.copytree

The distutils module is deprecated and slated for removal in Python
3.12. Its usage should be replaced, in this case by a compatible method
from shutil.
This commit is contained in:
Martin Weinelt 2023-01-29 22:47:00 +00:00 committed by GitHub
parent c59b3f75b8
commit 994be163e1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 27 additions and 17 deletions

View File

@ -2,8 +2,8 @@ import argparse
import glob import glob
import os import os
from argparse import RawTextHelpFormatter from argparse import RawTextHelpFormatter
from distutils.dir_util import copy_tree
from multiprocessing import Pool from multiprocessing import Pool
from shutil import copytree
import librosa import librosa
import soundfile as sf import soundfile as sf
@ -19,7 +19,7 @@ def resample_file(func_args):
def resample_files(input_dir, output_sr, output_dir=None, file_ext="wav", n_jobs=10): def resample_files(input_dir, output_sr, output_dir=None, file_ext="wav", n_jobs=10):
if output_dir: if output_dir:
print("Recursively copying the input folder...") print("Recursively copying the input folder...")
copy_tree(input_dir, output_dir) copytree(input_dir, output_dir)
input_dir = output_dir input_dir = output_dir
print("Resampling the audio files...") print("Resampling the audio files...")

View File

@ -1,6 +1,5 @@
from distutils.version import LooseVersion
import torch import torch
from packaging.version import Version
from torch import nn from torch import nn
from torch.nn import functional as F from torch.nn import functional as F
@ -91,7 +90,7 @@ class InvConvNear(nn.Module):
self.no_jacobian = no_jacobian self.no_jacobian = no_jacobian
self.weight_inv = None self.weight_inv = None
if LooseVersion(torch.__version__) < LooseVersion("1.9"): if Version(torch.__version__) < Version("1.9"):
w_init = torch.qr(torch.FloatTensor(self.num_splits, self.num_splits).normal_())[0] w_init = torch.qr(torch.FloatTensor(self.num_splits, self.num_splits).normal_())[0]
else: else:
w_init = torch.linalg.qr(torch.FloatTensor(self.num_splits, self.num_splits).normal_(), "complete")[0] w_init = torch.linalg.qr(torch.FloatTensor(self.num_splits, self.num_splits).normal_(), "complete")[0]

View File

@ -1,9 +1,10 @@
import logging import logging
import re import re
import subprocess import subprocess
from distutils.version import LooseVersion
from typing import Dict, List from typing import Dict, List
from packaging.version import Version
from TTS.tts.utils.text.phonemizers.base import BasePhonemizer from TTS.tts.utils.text.phonemizers.base import BasePhonemizer
from TTS.tts.utils.text.punctuation import Punctuation from TTS.tts.utils.text.punctuation import Punctuation
@ -14,9 +15,16 @@ def is_tool(name):
return which(name) is not None return which(name) is not None
# Use a regex pattern to match the espeak version, because it may be
# symlinked to espeak-ng, which moves the version bits to another spot.
espeak_version_pattern = re.compile(r"text-to-speech:\s(?P<version>\d+\.\d+(\.\d+)?)")
def get_espeak_version(): def get_espeak_version():
output = subprocess.getoutput("espeak --version") output = subprocess.getoutput("espeak --version")
return output.split()[2] match = espeak_version_pattern.search(output)
return match.group("version")
def get_espeakng_version(): def get_espeakng_version():
@ -168,7 +176,7 @@ class ESpeak(BasePhonemizer):
else: else:
# split with '_' # split with '_'
if self.backend == "espeak": if self.backend == "espeak":
if LooseVersion(self.backend_version) >= LooseVersion("1.48.15"): if Version(self.backend_version) >= Version("1.48.15"):
args.append("--ipa=1") args.append("--ipa=1")
else: else:
args.append("--ipa=3") args.append("--ipa=3")

View File

@ -1,5 +1,5 @@
[build-system] [build-system]
requires = ["setuptools", "wheel", "cython==0.29.28", "numpy==1.21.6"] requires = ["setuptools", "wheel", "cython==0.29.28", "numpy==1.21.6", "packaging"]
[flake8] [flake8]
max-line-length=120 max-line-length=120
@ -30,4 +30,4 @@ exclude = '''
[tool.isort] [tool.isort]
line_length = 120 line_length = 120
profile = "black" profile = "black"
multi_line_output = 3 multi_line_output = 3

View File

@ -14,6 +14,7 @@ tqdm
anyascii anyascii
pyyaml pyyaml
fsspec>=2021.04.0 fsspec>=2021.04.0
packaging
# deps for examples # deps for examples
flask flask
# deps for inference # deps for inference

View File

@ -23,7 +23,7 @@
import os import os
import subprocess import subprocess
import sys import sys
from distutils.version import LooseVersion from packaging.version import Version
import numpy import numpy
import setuptools.command.build_py import setuptools.command.build_py
@ -31,7 +31,8 @@ import setuptools.command.develop
from Cython.Build import cythonize from Cython.Build import cythonize
from setuptools import Extension, find_packages, setup from setuptools import Extension, find_packages, setup
if LooseVersion(sys.version) < LooseVersion("3.7") or LooseVersion(sys.version) >= LooseVersion("3.11"): python_version = sys.version.split()[0]
if Version(python_version) < Version("3.7") or Version(python_version) >= Version("3.11"):
raise RuntimeError("TTS requires python >= 3.7 and < 3.11 " "but your Python version is {}".format(sys.version)) raise RuntimeError("TTS requires python >= 3.7 and < 3.11 " "but your Python version is {}".format(sys.version))

View File

@ -1,5 +1,6 @@
import unittest import unittest
from distutils.version import LooseVersion
from packaging.version import Version
from TTS.tts.utils.text.phonemizers import ESpeak, Gruut, JA_JP_Phonemizer, ZH_CN_Phonemizer from TTS.tts.utils.text.phonemizers import ESpeak, Gruut, JA_JP_Phonemizer, ZH_CN_Phonemizer
from TTS.tts.utils.text.phonemizers.multi_phonemizer import MultiPhonemizer from TTS.tts.utils.text.phonemizers.multi_phonemizer import MultiPhonemizer
@ -40,7 +41,7 @@ class TestEspeakPhonemizer(unittest.TestCase):
def setUp(self): def setUp(self):
self.phonemizer = ESpeak(language="en-us", backend="espeak") self.phonemizer = ESpeak(language="en-us", backend="espeak")
if LooseVersion(self.phonemizer.backend_version) >= LooseVersion("1.48.15"): if Version(self.phonemizer.backend_version) >= Version("1.48.15"):
target_phonemes = EXPECTED_ESPEAK_v1_48_15_PHONEMES target_phonemes = EXPECTED_ESPEAK_v1_48_15_PHONEMES
else: else:
target_phonemes = EXPECTED_ESPEAK_PHONEMES target_phonemes = EXPECTED_ESPEAK_PHONEMES
@ -52,7 +53,7 @@ class TestEspeakPhonemizer(unittest.TestCase):
# multiple punctuations # multiple punctuations
text = "Be a voice, not an! echo?" text = "Be a voice, not an! echo?"
gt = "biː ɐ vˈɔɪs, nˈɑːt ɐn! ˈɛkoʊ?" gt = "biː ɐ vˈɔɪs, nˈɑːt ɐn! ˈɛkoʊ?"
if LooseVersion(self.phonemizer.backend_version) >= LooseVersion("1.48.15"): if Version(self.phonemizer.backend_version) >= Version("1.48.15"):
gt = "biː ɐ vˈɔɪs, nˈɑːt æn! ˈɛkoʊ?" gt = "biː ɐ vˈɔɪs, nˈɑːt æn! ˈɛkoʊ?"
output = self.phonemizer.phonemize(text, separator="|") output = self.phonemizer.phonemize(text, separator="|")
output = output.replace("|", "") output = output.replace("|", "")
@ -61,7 +62,7 @@ class TestEspeakPhonemizer(unittest.TestCase):
# not ending with punctuation # not ending with punctuation
text = "Be a voice, not an! echo" text = "Be a voice, not an! echo"
gt = "biː ɐ vˈɔɪs, nˈɑːt ɐn! ˈɛkoʊ" gt = "biː ɐ vˈɔɪs, nˈɑːt ɐn! ˈɛkoʊ"
if LooseVersion(self.phonemizer.backend_version) >= LooseVersion("1.48.15"): if Version(self.phonemizer.backend_version) >= Version("1.48.15"):
gt = "biː ɐ vˈɔɪs, nˈɑːt æn! ˈɛkoʊ" gt = "biː ɐ vˈɔɪs, nˈɑːt æn! ˈɛkoʊ"
output = self.phonemizer.phonemize(text, separator="") output = self.phonemizer.phonemize(text, separator="")
self.assertEqual(output, gt) self.assertEqual(output, gt)
@ -69,7 +70,7 @@ class TestEspeakPhonemizer(unittest.TestCase):
# extra space after the sentence # extra space after the sentence
text = "Be a voice, not an! echo. " text = "Be a voice, not an! echo. "
gt = "biː ɐ vˈɔɪs, nˈɑːt ɐn! ˈɛkoʊ." gt = "biː ɐ vˈɔɪs, nˈɑːt ɐn! ˈɛkoʊ."
if LooseVersion(self.phonemizer.backend_version) >= LooseVersion("1.48.15"): if Version(self.phonemizer.backend_version) >= Version("1.48.15"):
gt = "biː ɐ vˈɔɪs, nˈɑːt æn! ˈɛkoʊ." gt = "biː ɐ vˈɔɪs, nˈɑːt æn! ˈɛkoʊ."
output = self.phonemizer.phonemize(text, separator="") output = self.phonemizer.phonemize(text, separator="")
self.assertEqual(output, gt) self.assertEqual(output, gt)