mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2026-04-29 21:00:42 +08:00
fix: 修复 TTS 音频后处理中的多个缺陷 (#2753)
1. 修复音频超采样时 int16 双重转换导致整数溢出(CRITICAL)
- audio_postprocess 中 `audio = (audio * 32768).astype(np.int16)` 位于
if/else 块之外无条件执行,当 super_sampling=True 时音频已在分支内
转为 int16,再次乘以 32768 导致溢出和音频完全失真
- 同时修复 super_sampling=True 但超分模型不存在时 torch.Tensor 调用
.astype() 的 AttributeError
2. 修复 batched vocoder 推理中 padding_len=0 导致音频丢失(HIGH)
- 当 padding_len 恰好为 0 时,`-0 * upsample_rate == 0`,切片
`audio[x:0]` 返回空张量,导致整段音频丢失
3. 修复文件不存在时错误地抛出 FileExistsError(LOW)
- 应为 FileNotFoundError
Made-with: Cursor
This commit is contained in:
parent
00ce973412
commit
445d18ccce
@ -499,7 +499,7 @@ class TTS:
|
||||
|
||||
if if_lora_v3 == True and os.path.exists(path_sovits) == False:
|
||||
info = path_sovits + i18n("SoVITS %s 底模缺失,无法加载相应 LoRA 权重" % model_version)
|
||||
raise FileExistsError(info)
|
||||
raise FileNotFoundError(info)
|
||||
|
||||
# dict_s2 = torch.load(weights_path, map_location=self.configs.device,weights_only=False)
|
||||
dict_s2 = load_sovits_new(weights_path)
|
||||
@ -1578,16 +1578,15 @@ class TTS:
|
||||
max_audio = np.abs(audio).max()
|
||||
if max_audio > 1:
|
||||
audio /= max_audio
|
||||
audio = (audio * 32768).astype(np.int16)
|
||||
audio = (audio * 32768).astype(np.int16)
|
||||
else:
|
||||
audio = audio.cpu().numpy()
|
||||
audio = (audio * 32768).astype(np.int16)
|
||||
t2 = time.perf_counter()
|
||||
print(f"超采样用时:{t2 - t1:.3f}s")
|
||||
else:
|
||||
# audio = audio.float() * 32768
|
||||
# audio = audio.to(dtype=torch.int16).clamp(-32768, 32767).cpu().numpy()
|
||||
|
||||
audio = audio.cpu().numpy()
|
||||
|
||||
audio = (audio * 32768).astype(np.int16)
|
||||
audio = (audio * 32768).astype(np.int16)
|
||||
|
||||
|
||||
# try:
|
||||
@ -1768,7 +1767,10 @@ class TTS:
|
||||
pos += chunk_len * upsample_rate
|
||||
|
||||
audio = self.sola_algorithm(audio_fragments, overlapped_len * upsample_rate)
|
||||
audio = audio[overlapped_len * upsample_rate : -padding_len * upsample_rate]
|
||||
if padding_len > 0:
|
||||
audio = audio[overlapped_len * upsample_rate : -padding_len * upsample_rate]
|
||||
else:
|
||||
audio = audio[overlapped_len * upsample_rate :]
|
||||
|
||||
audio_fragments = []
|
||||
for feat_len in feat_lens:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user