Merge pull request #3 from eginhard/remove-pandas

Remove pandas
This commit is contained in:
Enno Hermann 2024-03-06 22:07:32 +01:00 committed by GitHub
commit f24f7c1237
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 34 additions and 28 deletions

View File

@ -19,13 +19,13 @@
# pylint: disable=too-many-locals, too-many-statements, too-many-arguments, too-many-instance-attributes # pylint: disable=too-many-locals, too-many-statements, too-many-arguments, too-many-instance-attributes
""" voxceleb 1 & 2 """ """ voxceleb 1 & 2 """
import csv
import hashlib import hashlib
import os import os
import subprocess import subprocess
import sys import sys
import zipfile import zipfile
import pandas
import soundfile as sf import soundfile as sf
from absl import logging from absl import logging
@ -185,8 +185,11 @@ def convert_audio_and_make_label(input_dir, subset, output_dir, output_file):
# Write to CSV file which contains four columns: # Write to CSV file which contains four columns:
# "wav_filename", "wav_length_ms", "speaker_id", "speaker_name". # "wav_filename", "wav_length_ms", "speaker_id", "speaker_name".
csv_file_path = os.path.join(output_dir, output_file) csv_file_path = os.path.join(output_dir, output_file)
df = pandas.DataFrame(data=files, columns=["wav_filename", "wav_length_ms", "speaker_id", "speaker_name"]) with open(csv_file_path, "w", newline="", encoding="utf-8") as f:
df.to_csv(csv_file_path, index=False, sep="\t") writer = csv.writer(f, delimiter="\t")
writer.writerow(["wav_filename", "wav_length_ms", "speaker_id", "speaker_name"])
for wav_file in files:
writer.writerow(wav_file)
logging.info("Successfully generated csv file {}".format(csv_file_path)) logging.info("Successfully generated csv file {}".format(csv_file_path))

View File

@ -1,3 +1,4 @@
import csv
import os import os
import re import re
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
@ -5,7 +6,6 @@ from glob import glob
from pathlib import Path from pathlib import Path
from typing import List from typing import List
import pandas as pd
from tqdm import tqdm from tqdm import tqdm
######################## ########################
@ -25,25 +25,27 @@ def cml_tts(root_path, meta_file, ignored_speakers=None):
if len(line.split("|")) != num_cols: if len(line.split("|")) != num_cols:
print(f" > Missing column in line {idx + 1} -> {line.strip()}") print(f" > Missing column in line {idx + 1} -> {line.strip()}")
# load metadata # load metadata
metadata = pd.read_csv(os.path.join(root_path, meta_file), sep="|") with open(Path(root_path) / meta_file, newline="", encoding="utf-8") as f:
assert all(x in metadata.columns for x in ["wav_filename", "transcript"]) reader = csv.DictReader(f, delimiter="|")
client_id = None if "client_id" in metadata.columns else "default" metadata = list(reader)
emotion_name = None if "emotion_name" in metadata.columns else "neutral" assert all(x in metadata[0] for x in ["wav_filename", "transcript"])
client_id = None if "client_id" in metadata[0] else "default"
emotion_name = None if "emotion_name" in metadata[0] else "neutral"
items = [] items = []
not_found_counter = 0 not_found_counter = 0
for row in metadata.itertuples(): for row in metadata:
if client_id is None and ignored_speakers is not None and row.client_id in ignored_speakers: if client_id is None and ignored_speakers is not None and row["client_id"] in ignored_speakers:
continue continue
audio_path = os.path.join(root_path, row.wav_filename) audio_path = os.path.join(root_path, row["wav_filename"])
if not os.path.exists(audio_path): if not os.path.exists(audio_path):
not_found_counter += 1 not_found_counter += 1
continue continue
items.append( items.append(
{ {
"text": row.transcript, "text": row["transcript"],
"audio_file": audio_path, "audio_file": audio_path,
"speaker_name": client_id if client_id is not None else row.client_id, "speaker_name": client_id if client_id is not None else row["client_id"],
"emotion_name": emotion_name if emotion_name is not None else row.emotion_name, "emotion_name": emotion_name if emotion_name is not None else row["emotion_name"],
"root_path": root_path, "root_path": root_path,
} }
) )
@ -63,25 +65,27 @@ def coqui(root_path, meta_file, ignored_speakers=None):
if len(line.split("|")) != num_cols: if len(line.split("|")) != num_cols:
print(f" > Missing column in line {idx + 1} -> {line.strip()}") print(f" > Missing column in line {idx + 1} -> {line.strip()}")
# load metadata # load metadata
metadata = pd.read_csv(os.path.join(root_path, meta_file), sep="|") with open(Path(root_path) / meta_file, newline="", encoding="utf-8") as f:
assert all(x in metadata.columns for x in ["audio_file", "text"]) reader = csv.DictReader(f, delimiter="|")
speaker_name = None if "speaker_name" in metadata.columns else "coqui" metadata = list(reader)
emotion_name = None if "emotion_name" in metadata.columns else "neutral" assert all(x in metadata[0] for x in ["audio_file", "text"])
speaker_name = None if "speaker_name" in metadata[0] else "coqui"
emotion_name = None if "emotion_name" in metadata[0] else "neutral"
items = [] items = []
not_found_counter = 0 not_found_counter = 0
for row in metadata.itertuples(): for row in metadata:
if speaker_name is None and ignored_speakers is not None and row.speaker_name in ignored_speakers: if speaker_name is None and ignored_speakers is not None and row["speaker_name"] in ignored_speakers:
continue continue
audio_path = os.path.join(root_path, row.audio_file) audio_path = os.path.join(root_path, row["audio_file"])
if not os.path.exists(audio_path): if not os.path.exists(audio_path):
not_found_counter += 1 not_found_counter += 1
continue continue
items.append( items.append(
{ {
"text": row.text, "text": row["text"],
"audio_file": audio_path, "audio_file": audio_path,
"speaker_name": speaker_name if speaker_name is not None else row.speaker_name, "speaker_name": speaker_name if speaker_name is not None else row["speaker_name"],
"emotion_name": emotion_name if emotion_name is not None else row.emotion_name, "emotion_name": emotion_name if emotion_name is not None else row["emotion_name"],
"root_path": root_path, "root_path": root_path,
} }
) )

View File

@ -1 +1,2 @@
bokeh==1.4.0 bokeh==1.4.0
pandas>=1.4,<2.0

View File

@ -8,7 +8,6 @@ torchaudio
soundfile>=0.12.0 soundfile>=0.12.0
librosa>=0.10.0 librosa>=0.10.0
scikit-learn>=1.3.0 scikit-learn>=1.3.0
numba==0.55.1;python_version<"3.9"
numba>=0.57.0;python_version>="3.9" numba>=0.57.0;python_version>="3.9"
inflect>=5.6.0 inflect>=5.6.0
tqdm>=4.64.1 tqdm>=4.64.1
@ -24,7 +23,6 @@ flask>=2.0.1
pysbd>=0.3.4 pysbd>=0.3.4
# deps for notebooks # deps for notebooks
umap-learn>=0.5.1 umap-learn>=0.5.1
pandas>=1.4,<2.0
# deps for training # deps for training
matplotlib>=3.7.0 matplotlib>=3.7.0
# coqui stack # coqui stack
@ -54,4 +52,4 @@ encodec>=0.1.1
# deps for XTTS # deps for XTTS
unidecode>=1.3.2 unidecode>=1.3.2
num2words num2words
spacy[ja]>=3 spacy[ja]>=3