From ea56ec041f4abb1e2e014449f7e78dba2bb2d09f Mon Sep 17 00:00:00 2001 From: WeberJulian Date: Wed, 8 Nov 2023 10:16:36 +0100 Subject: [PATCH] update docs --- docs/source/models/xtts.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/docs/source/models/xtts.md b/docs/source/models/xtts.md index d9e75533..03e44af1 100644 --- a/docs/source/models/xtts.md +++ b/docs/source/models/xtts.md @@ -115,7 +115,7 @@ model.load_checkpoint(config, checkpoint_dir="/path/to/xtts/", use_deepspeed=Tru model.cuda() print("Computing speaker latents...") -gpt_cond_latent, diffusion_conditioning, speaker_embedding = model.get_conditioning_latents(audio_path=["reference.wav"]) +gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(audio_path=["reference.wav"]) print("Inference...") out = model.inference( @@ -123,7 +123,6 @@ out = model.inference( "en", gpt_cond_latent, speaker_embedding, - diffusion_conditioning, temperature=0.7, # Add custom parameters here ) torchaudio.save("xtts.wav", torch.tensor(out["wav"]).unsqueeze(0), 24000) @@ -152,7 +151,7 @@ model.load_checkpoint(config, checkpoint_dir="/path/to/xtts/", use_deepspeed=Tru model.cuda() print("Computing speaker latents...") -gpt_cond_latent, _, speaker_embedding = model.get_conditioning_latents(audio_path=["reference.wav"]) +gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(audio_path=["reference.wav"]) print("Inference...") t0 = time.time() @@ -209,7 +208,7 @@ model.load_checkpoint(config, checkpoint_path=XTTS_CHECKPOINT, vocab_path=TOKENI model.cuda() print("Computing speaker latents...") -gpt_cond_latent, diffusion_conditioning, speaker_embedding = model.get_conditioning_latents(audio_path=[SPEAKER_REFERENCE]) +gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(audio_path=[SPEAKER_REFERENCE]) print("Inference...") out = model.inference( @@ -217,7 +216,6 @@ out = model.inference( "en", gpt_cond_latent, speaker_embedding, - diffusion_conditioning, temperature=0.7, # Add custom parameters here ) torchaudio.save(OUTPUT_WAV_PATH, torch.tensor(out["wav"]).unsqueeze(0), 24000)