mirror of
https://github.com/RVC-Boss/GPT-SoVITS.git
synced 2026-07-04 04:58:12 +08:00
- Frontend: add wavesurfer.js v7 waveform visualization with region-based audio trimming - Frontend: add export trimmed audio button, OfflineAudioContext-based client-side trimming - API: add OpenAPI tags, descriptions, and summaries for all endpoints - API: enhance /health endpoint with PID, memory, and GPU info (optional psutil/torch) - API: bump version to 1.1.0, enable /docs and /redoc - Docs: rewrite simple_api.md as comprehensive API reference - Docs: update simple_api_quickstart.md with Swagger/ReDoc links - Docs: update README with endpoint table and feature list - Tests: fix DummyFastAPI mock to accept **kwargs (tags, summary, etc.) - All 7 tests pass, compile check OK
1007 lines
31 KiB
HTML
1007 lines
31 KiB
HTML
<!doctype html>
|
||
<html lang="zh-CN">
|
||
<head>
|
||
<meta charset="utf-8">
|
||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||
<title>GPT-SoVITS API Test</title>
|
||
<style>
|
||
:root {
|
||
color-scheme: light;
|
||
--bg: #f5f7f4;
|
||
--panel: #ffffff;
|
||
--ink: #18201d;
|
||
--muted: #64706b;
|
||
--line: #d8ded9;
|
||
--accent: #19745f;
|
||
--accent-strong: #0f5f4c;
|
||
--warn: #a15d12;
|
||
--bad: #b42318;
|
||
--good: #18794e;
|
||
--shadow: 0 18px 50px rgba(31, 44, 38, 0.12);
|
||
font-family: Inter, ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
|
||
}
|
||
|
||
* {
|
||
box-sizing: border-box;
|
||
}
|
||
|
||
body {
|
||
margin: 0;
|
||
min-height: 100vh;
|
||
background:
|
||
linear-gradient(135deg, rgba(25, 116, 95, 0.08), rgba(248, 251, 247, 0) 42%),
|
||
var(--bg);
|
||
color: var(--ink);
|
||
}
|
||
|
||
main {
|
||
width: min(1180px, calc(100vw - 32px));
|
||
margin: 0 auto;
|
||
padding: 28px 0 44px;
|
||
}
|
||
|
||
header {
|
||
display: grid;
|
||
grid-template-columns: minmax(0, 1fr) auto;
|
||
align-items: end;
|
||
gap: 20px;
|
||
padding: 8px 0 22px;
|
||
border-bottom: 1px solid var(--line);
|
||
}
|
||
|
||
h1 {
|
||
margin: 0;
|
||
font-size: clamp(28px, 4vw, 54px);
|
||
line-height: 1.02;
|
||
letter-spacing: 0;
|
||
}
|
||
|
||
.sub {
|
||
margin: 12px 0 0;
|
||
color: var(--muted);
|
||
max-width: 760px;
|
||
line-height: 1.6;
|
||
}
|
||
|
||
.status {
|
||
min-width: 172px;
|
||
padding: 12px 14px;
|
||
border: 1px solid var(--line);
|
||
border-radius: 8px;
|
||
background: rgba(255, 255, 255, 0.68);
|
||
color: var(--muted);
|
||
text-align: right;
|
||
font-size: 14px;
|
||
}
|
||
|
||
.status strong {
|
||
display: block;
|
||
color: var(--ink);
|
||
font-size: 16px;
|
||
margin-bottom: 2px;
|
||
}
|
||
|
||
.workspace {
|
||
display: grid;
|
||
grid-template-columns: minmax(0, 1.05fr) minmax(320px, 0.65fr);
|
||
gap: 22px;
|
||
padding-top: 24px;
|
||
align-items: start;
|
||
}
|
||
|
||
section {
|
||
background: var(--panel);
|
||
border: 1px solid var(--line);
|
||
border-radius: 8px;
|
||
box-shadow: var(--shadow);
|
||
}
|
||
|
||
.form {
|
||
padding: 22px;
|
||
}
|
||
|
||
.result {
|
||
padding: 20px;
|
||
position: sticky;
|
||
top: 18px;
|
||
}
|
||
|
||
.grid {
|
||
display: grid;
|
||
grid-template-columns: repeat(2, minmax(0, 1fr));
|
||
gap: 16px;
|
||
}
|
||
|
||
.full {
|
||
grid-column: 1 / -1;
|
||
}
|
||
|
||
label {
|
||
display: block;
|
||
color: var(--ink);
|
||
font-weight: 650;
|
||
font-size: 14px;
|
||
margin-bottom: 8px;
|
||
}
|
||
|
||
input,
|
||
textarea,
|
||
select {
|
||
width: 100%;
|
||
border: 1px solid var(--line);
|
||
border-radius: 8px;
|
||
background: #fbfcfb;
|
||
color: var(--ink);
|
||
font: inherit;
|
||
padding: 12px;
|
||
outline: none;
|
||
transition: border-color 150ms ease, box-shadow 150ms ease, background 150ms ease;
|
||
}
|
||
|
||
input:focus,
|
||
textarea:focus,
|
||
select:focus {
|
||
border-color: rgba(25, 116, 95, 0.72);
|
||
box-shadow: 0 0 0 4px rgba(25, 116, 95, 0.12);
|
||
background: #fff;
|
||
}
|
||
|
||
textarea {
|
||
min-height: 142px;
|
||
resize: vertical;
|
||
line-height: 1.55;
|
||
}
|
||
|
||
.hint {
|
||
margin: 7px 0 0;
|
||
color: var(--muted);
|
||
font-size: 13px;
|
||
line-height: 1.45;
|
||
}
|
||
|
||
.file-line {
|
||
display: flex;
|
||
align-items: center;
|
||
gap: 10px;
|
||
min-height: 24px;
|
||
color: var(--muted);
|
||
font-size: 13px;
|
||
margin-top: 8px;
|
||
}
|
||
|
||
.duration-ok {
|
||
color: var(--good);
|
||
}
|
||
|
||
.duration-warn {
|
||
color: var(--warn);
|
||
}
|
||
|
||
.actions {
|
||
display: flex;
|
||
flex-wrap: wrap;
|
||
align-items: center;
|
||
gap: 12px;
|
||
margin-top: 18px;
|
||
padding-top: 18px;
|
||
border-top: 1px solid var(--line);
|
||
}
|
||
|
||
button,
|
||
.download {
|
||
border: 0;
|
||
border-radius: 8px;
|
||
min-height: 44px;
|
||
padding: 0 16px;
|
||
background: var(--accent);
|
||
color: #fff;
|
||
font-weight: 700;
|
||
font-size: 14px;
|
||
cursor: pointer;
|
||
display: inline-flex;
|
||
align-items: center;
|
||
justify-content: center;
|
||
gap: 8px;
|
||
text-decoration: none;
|
||
transition: transform 140ms ease, background 140ms ease, opacity 140ms ease;
|
||
}
|
||
|
||
button:hover,
|
||
.download:hover {
|
||
background: var(--accent-strong);
|
||
transform: translateY(-1px);
|
||
}
|
||
|
||
button:disabled,
|
||
.download[aria-disabled="true"] {
|
||
opacity: 0.55;
|
||
cursor: not-allowed;
|
||
transform: none;
|
||
}
|
||
|
||
.ghost {
|
||
background: #e9efeb;
|
||
color: var(--ink);
|
||
}
|
||
|
||
.ghost:hover {
|
||
background: #dde7e1;
|
||
}
|
||
|
||
.result h2 {
|
||
margin: 0 0 12px;
|
||
font-size: 20px;
|
||
letter-spacing: 0;
|
||
}
|
||
|
||
.log {
|
||
min-height: 126px;
|
||
border-radius: 8px;
|
||
border: 1px solid var(--line);
|
||
background: #101815;
|
||
color: #d8f3e9;
|
||
padding: 13px;
|
||
font-family: ui-monospace, SFMono-Regular, Consolas, "Liberation Mono", monospace;
|
||
font-size: 12px;
|
||
line-height: 1.6;
|
||
white-space: pre-wrap;
|
||
overflow-wrap: anywhere;
|
||
}
|
||
|
||
audio {
|
||
width: 100%;
|
||
margin-top: 16px;
|
||
}
|
||
|
||
.meta {
|
||
display: grid;
|
||
grid-template-columns: repeat(2, minmax(0, 1fr));
|
||
gap: 10px;
|
||
margin: 16px 0;
|
||
}
|
||
|
||
.metric {
|
||
border: 1px solid var(--line);
|
||
border-radius: 8px;
|
||
padding: 12px;
|
||
background: #fbfcfb;
|
||
}
|
||
|
||
.metric span {
|
||
display: block;
|
||
color: var(--muted);
|
||
font-size: 12px;
|
||
margin-bottom: 3px;
|
||
}
|
||
|
||
.metric strong {
|
||
font-size: 15px;
|
||
}
|
||
|
||
.danger {
|
||
color: var(--bad);
|
||
}
|
||
|
||
.waveform-wrap {
|
||
display: none;
|
||
margin-top: 14px;
|
||
border: 1px solid var(--line);
|
||
border-radius: 8px;
|
||
background: #fbfcfb;
|
||
padding: 12px;
|
||
}
|
||
|
||
.waveform-wrap.visible {
|
||
display: block;
|
||
}
|
||
|
||
.waveform-wrap h3 {
|
||
margin: 0 0 8px;
|
||
font-size: 13px;
|
||
font-weight: 650;
|
||
color: var(--ink);
|
||
}
|
||
|
||
#waveform {
|
||
border-radius: 4px;
|
||
overflow: hidden;
|
||
background: #e9efeb;
|
||
min-height: 50px;
|
||
}
|
||
|
||
.trim-controls {
|
||
display: none;
|
||
align-items: center;
|
||
gap: 12px;
|
||
margin-top: 10px;
|
||
flex-wrap: wrap;
|
||
}
|
||
|
||
.trim-controls.visible {
|
||
display: flex;
|
||
}
|
||
|
||
.trim-controls label {
|
||
margin-bottom: 0;
|
||
font-size: 12px;
|
||
}
|
||
|
||
.trim-controls input[type="number"] {
|
||
width: 80px;
|
||
padding: 6px 8px;
|
||
font-size: 13px;
|
||
text-align: center;
|
||
}
|
||
|
||
.trim-time-group {
|
||
display: flex;
|
||
align-items: center;
|
||
gap: 6px;
|
||
}
|
||
|
||
.trim-time-group span {
|
||
color: var(--muted);
|
||
font-size: 12px;
|
||
}
|
||
|
||
.trim-separator {
|
||
color: var(--muted);
|
||
font-size: 13px;
|
||
padding: 0 2px;
|
||
}
|
||
|
||
.btn-export-trim {
|
||
margin-left: auto;
|
||
font-size: 12px;
|
||
min-height: 32px;
|
||
padding: 0 12px;
|
||
}
|
||
|
||
@media (max-width: 860px) {
|
||
header,
|
||
.workspace,
|
||
.grid {
|
||
grid-template-columns: 1fr;
|
||
}
|
||
|
||
.status {
|
||
text-align: left;
|
||
}
|
||
|
||
.result {
|
||
position: static;
|
||
}
|
||
}
|
||
</style>
|
||
</head>
|
||
<body>
|
||
<main>
|
||
<header>
|
||
<div>
|
||
<h1>GPT-SoVITS 接口测试台</h1>
|
||
<p class="sub">选择 3-10 秒参考音频或视频(视频会自动提取音频),在波形图上拖拽选取裁剪区域,填写后端接口地址和生成文本,直接调用中间层 <code>/api/tts</code>。</p>
|
||
</div>
|
||
<div class="status" id="statusBox">
|
||
<strong>未检测</strong>
|
||
后端连接状态
|
||
</div>
|
||
</header>
|
||
|
||
<div class="workspace">
|
||
<section class="form">
|
||
<form id="ttsForm">
|
||
<div class="grid">
|
||
<div class="full">
|
||
<label for="endpoint">后端接口地址</label>
|
||
<input id="endpoint" name="endpoint" type="url" value="http://127.0.0.1:9881/api/tts" required>
|
||
<p class="hint">如果后端端口或主机变了,在这里改完整地址。页面会把表单直接 POST 到这个地址。</p>
|
||
</div>
|
||
|
||
<div class="full">
|
||
<label for="text">需要生成的文字</label>
|
||
<textarea id="text" name="text" placeholder="输入要生成的文字,后端固定按标点符号切句。" required></textarea>
|
||
</div>
|
||
|
||
<div>
|
||
<label for="refAudio">主参考音频/视频</label>
|
||
<input id="refAudio" name="ref_audio" type="file" accept="audio/*,video/*" required>
|
||
<div class="file-line" id="refInfo">请选择 3-10 秒音频或视频(视频会自动提取音频)</div>
|
||
<div class="file-line" id="extractInfo" style="display:none;"></div>
|
||
<div class="waveform-wrap" id="waveformWrap">
|
||
<h3>波形预览 & 裁剪</h3>
|
||
<div id="waveform"></div>
|
||
<div class="trim-controls" id="trimControls">
|
||
<div class="trim-time-group">
|
||
<label for="trimStart">起始</label>
|
||
<input id="trimStart" type="number" min="0" step="0.01" value="0">
|
||
</div>
|
||
<span class="trim-separator">—</span>
|
||
<div class="trim-time-group">
|
||
<label for="trimEnd">结束</label>
|
||
<input id="trimEnd" type="number" min="0" step="0.01" value="0">
|
||
</div>
|
||
<button type="button" class="ghost btn-export-trim" id="exportTrimBtn">导出裁剪音频</button>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
<div>
|
||
<label for="auxAudio">辅助参考音频</label>
|
||
<input id="auxAudio" name="aux_ref_audio" type="file" accept="audio/*" multiple>
|
||
<div class="file-line" id="auxInfo">可选,可多选</div>
|
||
</div>
|
||
|
||
<div class="full">
|
||
<label for="promptText">参考音频文字</label>
|
||
<textarea id="promptText" name="prompt_text" placeholder="可留空。v2 支持空参考文字;v3/v4 后端会要求填写。"></textarea>
|
||
</div>
|
||
|
||
<div>
|
||
<label for="textLang">生成文字语言</label>
|
||
<select id="textLang" name="text_lang">
|
||
<option value="zh">zh</option>
|
||
<option value="en">en</option>
|
||
<option value="ja">ja</option>
|
||
<option value="ko">ko</option>
|
||
<option value="yue">yue</option>
|
||
<option value="auto">auto</option>
|
||
</select>
|
||
</div>
|
||
|
||
<div>
|
||
<label for="promptLang">参考音频语言</label>
|
||
<select id="promptLang" name="prompt_lang">
|
||
<option value="zh">zh</option>
|
||
<option value="en">en</option>
|
||
<option value="ja">ja</option>
|
||
<option value="ko">ko</option>
|
||
<option value="yue">yue</option>
|
||
<option value="auto">auto</option>
|
||
</select>
|
||
</div>
|
||
|
||
<div>
|
||
<label for="emotion">情绪 preset</label>
|
||
<select id="emotion" name="emotion">
|
||
<option value="neutral">neutral</option>
|
||
<option value="happy">happy</option>
|
||
<option value="calm">calm</option>
|
||
<option value="sad">sad</option>
|
||
<option value="angry">angry</option>
|
||
</select>
|
||
</div>
|
||
|
||
<div>
|
||
<label for="speed">语速</label>
|
||
<input id="speed" name="speed" type="number" min="0.5" max="2" step="0.05" value="1">
|
||
<p class="hint">显式语速会覆盖情绪 preset 中的语速。</p>
|
||
</div>
|
||
|
||
<div>
|
||
<label for="seed">Seed</label>
|
||
<input id="seed" name="seed" type="number" value="-1">
|
||
</div>
|
||
|
||
<div>
|
||
<label for="format">返回格式</label>
|
||
<select id="format" name="format">
|
||
<option value="wav">wav</option>
|
||
<option value="ogg">ogg</option>
|
||
<option value="aac">aac</option>
|
||
<option value="raw">raw</option>
|
||
</select>
|
||
</div>
|
||
</div>
|
||
|
||
<div class="actions">
|
||
<button type="button" class="ghost" id="healthBtn">检测后端</button>
|
||
<button type="submit" id="submitBtn">生成音频</button>
|
||
<button type="button" class="ghost" id="resetBtn">清空结果</button>
|
||
</div>
|
||
</form>
|
||
</section>
|
||
|
||
<section class="result">
|
||
<h2>返回结果</h2>
|
||
<div class="meta">
|
||
<div class="metric"><span>耗时</span><strong id="elapsed">-</strong></div>
|
||
<div class="metric"><span>文件大小</span><strong id="fileSize">-</strong></div>
|
||
</div>
|
||
<div class="log" id="log">等待请求。</div>
|
||
<audio id="player" controls hidden></audio>
|
||
<div class="actions">
|
||
<a class="download" id="downloadLink" aria-disabled="true">下载音频</a>
|
||
</div>
|
||
</section>
|
||
</div>
|
||
</main>
|
||
|
||
<script src="https://unpkg.com/wavesurfer.js@7/dist/wavesurfer.min.js"></script>
|
||
<script src="https://unpkg.com/wavesurfer.js@7/dist/plugins/regions.min.js"></script>
|
||
<script>
|
||
const form = document.querySelector("#ttsForm");
|
||
const endpoint = document.querySelector("#endpoint");
|
||
const refAudio = document.querySelector("#refAudio");
|
||
const auxAudio = document.querySelector("#auxAudio");
|
||
const refInfo = document.querySelector("#refInfo");
|
||
const auxInfo = document.querySelector("#auxInfo");
|
||
const logBox = document.querySelector("#log");
|
||
const player = document.querySelector("#player");
|
||
const downloadLink = document.querySelector("#downloadLink");
|
||
const submitBtn = document.querySelector("#submitBtn");
|
||
const resetBtn = document.querySelector("#resetBtn");
|
||
const healthBtn = document.querySelector("#healthBtn");
|
||
const statusBox = document.querySelector("#statusBox");
|
||
const elapsed = document.querySelector("#elapsed");
|
||
const fileSize = document.querySelector("#fileSize");
|
||
const waveformWrap = document.querySelector("#waveformWrap");
|
||
const waveformEl = document.querySelector("#waveform");
|
||
const trimControls = document.querySelector("#trimControls");
|
||
const trimStartInput = document.querySelector("#trimStart");
|
||
const trimEndInput = document.querySelector("#trimEnd");
|
||
const exportTrimBtn = document.querySelector("#exportTrimBtn");
|
||
let resultUrl = null;
|
||
|
||
if (location.protocol === "http:" || location.protocol === "https:") {
|
||
endpoint.value = new URL("/api/tts", location.origin).toString();
|
||
}
|
||
|
||
function log(message, isError = false) {
|
||
logBox.textContent = message;
|
||
logBox.classList.toggle("danger", isError);
|
||
}
|
||
|
||
function bytesLabel(bytes) {
|
||
if (!bytes) return "-";
|
||
const units = ["B", "KB", "MB", "GB"];
|
||
let value = bytes;
|
||
let index = 0;
|
||
while (value >= 1024 && index < units.length - 1) {
|
||
value /= 1024;
|
||
index += 1;
|
||
}
|
||
return `${value.toFixed(index === 0 ? 0 : 2)} ${units[index]}`;
|
||
}
|
||
|
||
function apiBaseUrl() {
|
||
try {
|
||
const url = new URL(endpoint.value.trim());
|
||
url.pathname = url.pathname.replace(/\/api\/tts\/?$/, "/health");
|
||
url.search = "";
|
||
url.hash = "";
|
||
return url.toString();
|
||
} catch {
|
||
return "";
|
||
}
|
||
}
|
||
|
||
function clearResult() {
|
||
if (resultUrl) URL.revokeObjectURL(resultUrl);
|
||
resultUrl = null;
|
||
player.hidden = true;
|
||
player.removeAttribute("src");
|
||
downloadLink.removeAttribute("href");
|
||
downloadLink.setAttribute("aria-disabled", "true");
|
||
elapsed.textContent = "-";
|
||
fileSize.textContent = "-";
|
||
log("等待请求。");
|
||
}
|
||
|
||
let extractedAudioBlob = null;
|
||
let wavesurfer = null;
|
||
let wsRegions = null;
|
||
let currentAudioDuration = 0;
|
||
|
||
function isVideoFile(file) {
|
||
return file && file.type && file.type.startsWith("video/");
|
||
}
|
||
|
||
function audioBufferToWav(buffer) {
|
||
const numChannels = buffer.numberOfChannels;
|
||
const sampleRate = buffer.sampleRate;
|
||
const format = 1;
|
||
const bitDepth = 16;
|
||
const bytesPerSample = bitDepth / 8;
|
||
const blockAlign = numChannels * bytesPerSample;
|
||
const dataLength = buffer.length * blockAlign;
|
||
const headerLength = 44;
|
||
const totalLength = headerLength + dataLength;
|
||
const arrayBuffer = new ArrayBuffer(totalLength);
|
||
const view = new DataView(arrayBuffer);
|
||
|
||
function writeString(offset, str) {
|
||
for (let i = 0; i < str.length; i++) view.setUint8(offset + i, str.charCodeAt(i));
|
||
}
|
||
|
||
writeString(0, "RIFF");
|
||
view.setUint32(4, totalLength - 8, true);
|
||
writeString(8, "WAVE");
|
||
writeString(12, "fmt ");
|
||
view.setUint32(16, 16, true);
|
||
view.setUint16(20, format, true);
|
||
view.setUint16(22, numChannels, true);
|
||
view.setUint32(24, sampleRate, true);
|
||
view.setUint32(28, sampleRate * blockAlign, true);
|
||
view.setUint16(32, blockAlign, true);
|
||
view.setUint16(34, bitDepth, true);
|
||
writeString(36, "data");
|
||
view.setUint32(40, dataLength, true);
|
||
|
||
const channels = [];
|
||
for (let ch = 0; ch < numChannels; ch++) channels.push(buffer.getChannelData(ch));
|
||
|
||
let offset = 44;
|
||
for (let i = 0; i < buffer.length; i++) {
|
||
for (let ch = 0; ch < numChannels; ch++) {
|
||
const sample = Math.max(-1, Math.min(1, channels[ch][i]));
|
||
view.setInt16(offset, sample < 0 ? sample * 0x8000 : sample * 0x7FFF, true);
|
||
offset += 2;
|
||
}
|
||
}
|
||
|
||
return new Blob([arrayBuffer], { type: "audio/wav" });
|
||
}
|
||
|
||
async function trimAudioBuffer(audioBuffer, startTime, endTime) {
|
||
const offlineCtx = new OfflineAudioContext(
|
||
audioBuffer.numberOfChannels,
|
||
Math.ceil((endTime - startTime) * audioBuffer.sampleRate),
|
||
audioBuffer.sampleRate
|
||
);
|
||
const source = offlineCtx.createBufferSource();
|
||
source.buffer = audioBuffer;
|
||
|
||
const startSample = Math.floor(startTime * audioBuffer.sampleRate);
|
||
const endSample = Math.floor(endTime * audioBuffer.sampleRate);
|
||
const length = endSample - startSample;
|
||
|
||
const trimmedBuffer = offlineCtx.createBuffer(
|
||
audioBuffer.numberOfChannels,
|
||
length,
|
||
audioBuffer.sampleRate
|
||
);
|
||
|
||
for (let ch = 0; ch < audioBuffer.numberOfChannels; ch++) {
|
||
const sourceData = audioBuffer.getChannelData(ch);
|
||
const destData = trimmedBuffer.getChannelData(ch);
|
||
for (let i = 0; i < length; i++) {
|
||
destData[i] = sourceData[startSample + i] || 0;
|
||
}
|
||
}
|
||
|
||
source.buffer = trimmedBuffer;
|
||
source.connect(offlineCtx.destination);
|
||
source.start(0);
|
||
|
||
return await offlineCtx.startRendering();
|
||
}
|
||
|
||
async function exportTrimmedAudio() {
|
||
const sourceBlob = extractedAudioBlob || refAudio.files[0];
|
||
if (!sourceBlob) return;
|
||
|
||
const start = parseFloat(trimStartInput.value) || 0;
|
||
const end = parseFloat(trimEndInput.value) || currentAudioDuration;
|
||
if (end <= start) {
|
||
log("裁剪结束时间必须大于起始时间。", true);
|
||
return;
|
||
}
|
||
|
||
exportTrimBtn.disabled = true;
|
||
exportTrimBtn.textContent = "处理中...";
|
||
|
||
try {
|
||
const arrayBuffer = await sourceBlob.arrayBuffer();
|
||
const audioCtx = new (window.AudioContext || window.webkitAudioContext)();
|
||
const audioBuffer = await audioCtx.decodeAudioData(arrayBuffer);
|
||
audioCtx.close();
|
||
|
||
const trimmedBuffer = await trimAudioBuffer(audioBuffer, start, end);
|
||
const wavBlob = audioBufferToWav(trimmedBuffer);
|
||
const url = URL.createObjectURL(wavBlob);
|
||
const a = document.createElement("a");
|
||
a.href = url;
|
||
a.download = `trimmed_${start.toFixed(2)}-${end.toFixed(2)}s.wav`;
|
||
a.click();
|
||
URL.revokeObjectURL(url);
|
||
} catch (err) {
|
||
log(`导出失败:${err.message}`, true);
|
||
} finally {
|
||
exportTrimBtn.disabled = false;
|
||
exportTrimBtn.textContent = "导出裁剪音频";
|
||
}
|
||
}
|
||
|
||
exportTrimBtn.addEventListener("click", exportTrimmedAudio);
|
||
|
||
function initWaveform(arrayBuffer) {
|
||
if (wavesurfer) {
|
||
wavesurfer.destroy();
|
||
wavesurfer = null;
|
||
wsRegions = null;
|
||
}
|
||
|
||
waveformWrap.classList.add("visible");
|
||
waveformEl.innerHTML = "";
|
||
|
||
wavesurfer = WaveSurfer.create({
|
||
container: waveformEl,
|
||
waveColor: "#b8d4c8",
|
||
progressColor: "#19745f",
|
||
cursorColor: "#0f5f4c",
|
||
height: 60,
|
||
responsive: true,
|
||
barWidth: 2,
|
||
barGap: 1,
|
||
barRadius: 2,
|
||
});
|
||
|
||
wsRegions = wavesurfer.registerPlugin(WaveSurfer.Regions.create());
|
||
|
||
const blob = new Blob([arrayBuffer], { type: "audio/wav" });
|
||
wavesurfer.loadBlob(blob);
|
||
|
||
wavesurfer.on("ready", () => {
|
||
currentAudioDuration = wavesurfer.getDuration();
|
||
trimEndInput.max = currentAudioDuration;
|
||
trimStartInput.max = currentAudioDuration;
|
||
|
||
const regionEnd = Math.min(10, currentAudioDuration);
|
||
trimStartInput.value = 0;
|
||
trimEndInput.value = regionEnd.toFixed(2);
|
||
|
||
wsRegions.addRegion({
|
||
start: 0,
|
||
end: regionEnd,
|
||
color: "rgba(25, 116, 95, 0.18)",
|
||
drag: true,
|
||
resize: true,
|
||
});
|
||
|
||
trimControls.classList.add("visible");
|
||
});
|
||
|
||
wsRegions.on("region-updated", (region) => {
|
||
trimStartInput.value = region.start.toFixed(2);
|
||
trimEndInput.value = region.end.toFixed(2);
|
||
});
|
||
}
|
||
|
||
trimStartInput.addEventListener("change", () => {
|
||
if (!wsRegions) return;
|
||
const regions = wsRegions.getRegions();
|
||
if (regions.length === 0) return;
|
||
const r = regions[0];
|
||
const s = parseFloat(trimStartInput.value) || 0;
|
||
const e = parseFloat(trimEndInput.value) || currentAudioDuration;
|
||
r.setOptions({ start: Math.min(s, e), end: e });
|
||
});
|
||
|
||
trimEndInput.addEventListener("change", () => {
|
||
if (!wsRegions) return;
|
||
const regions = wsRegions.getRegions();
|
||
if (regions.length === 0) return;
|
||
const r = regions[0];
|
||
const s = parseFloat(trimStartInput.value) || 0;
|
||
const e = parseFloat(trimEndInput.value) || currentAudioDuration;
|
||
r.setOptions({ start: s, end: Math.max(s, e) });
|
||
});
|
||
|
||
async function extractAudioFromVideo(file) {
|
||
const extractInfo = document.querySelector("#extractInfo");
|
||
extractInfo.style.display = "block";
|
||
extractInfo.textContent = "正在从视频中提取音频...";
|
||
extractInfo.className = "file-line";
|
||
|
||
try {
|
||
const video = document.createElement("video");
|
||
video.preload = "auto";
|
||
const videoUrl = URL.createObjectURL(file);
|
||
video.src = videoUrl;
|
||
|
||
await new Promise((resolve, reject) => {
|
||
video.onloadeddata = resolve;
|
||
video.onerror = () => reject(new Error("无法加载视频文件"));
|
||
});
|
||
|
||
const audioCtx = new (window.AudioContext || window.webkitAudioContext)();
|
||
const response = await fetch(videoUrl);
|
||
const arrayBuffer = await response.arrayBuffer();
|
||
const audioBuffer = await audioCtx.decodeAudioData(arrayBuffer);
|
||
|
||
const wavBlob = audioBufferToWav(audioBuffer);
|
||
URL.revokeObjectURL(videoUrl);
|
||
audioCtx.close();
|
||
|
||
extractedAudioBlob = wavBlob;
|
||
const duration = audioBuffer.duration;
|
||
const ok = Number.isFinite(duration) && duration >= 3 && duration <= 10;
|
||
extractInfo.textContent = `已提取音频 · ${duration.toFixed(2)}s · ${bytesLabel(wavBlob.size)}${ok ? " ✓" : " ⚠ 建议裁剪到 3-10 秒"}`;
|
||
extractInfo.className = `file-line ${ok ? "duration-ok" : "duration-warn"}`;
|
||
|
||
const wavArrayBuffer = await wavBlob.arrayBuffer();
|
||
initWaveform(wavArrayBuffer);
|
||
|
||
return true;
|
||
} catch (err) {
|
||
extractInfo.textContent = `提取失败:${err.message}`;
|
||
extractInfo.className = "file-line duration-warn";
|
||
extractedAudioBlob = null;
|
||
waveformWrap.classList.remove("visible");
|
||
return false;
|
||
}
|
||
}
|
||
|
||
function inspectDuration(file, target) {
|
||
extractedAudioBlob = null;
|
||
const extractInfo = document.querySelector("#extractInfo");
|
||
extractInfo.style.display = "none";
|
||
|
||
if (!file) {
|
||
target.textContent = "请选择 3-10 秒音频或视频";
|
||
target.className = "file-line";
|
||
waveformWrap.classList.remove("visible");
|
||
if (wavesurfer) { wavesurfer.destroy(); wavesurfer = null; wsRegions = null; }
|
||
return;
|
||
}
|
||
|
||
if (isVideoFile(file)) {
|
||
target.textContent = `${file.name} · ${bytesLabel(file.size)} · 视频文件`;
|
||
target.className = "file-line";
|
||
extractAudioFromVideo(file);
|
||
return;
|
||
}
|
||
|
||
const url = URL.createObjectURL(file);
|
||
const audio = new Audio();
|
||
audio.preload = "metadata";
|
||
audio.onloadedmetadata = () => {
|
||
URL.revokeObjectURL(url);
|
||
const duration = audio.duration;
|
||
const ok = Number.isFinite(duration) && duration >= 3 && duration <= 10;
|
||
target.textContent = `${file.name} · ${duration.toFixed(2)}s · ${bytesLabel(file.size)}`;
|
||
target.className = `file-line ${ok ? "duration-ok" : "duration-warn"}`;
|
||
|
||
file.arrayBuffer().then((buf) => initWaveform(buf));
|
||
};
|
||
audio.onerror = () => {
|
||
URL.revokeObjectURL(url);
|
||
target.textContent = `${file.name} · 无法读取时长 · ${bytesLabel(file.size)}`;
|
||
target.className = "file-line duration-warn";
|
||
waveformWrap.classList.remove("visible");
|
||
};
|
||
audio.src = url;
|
||
}
|
||
|
||
refAudio.addEventListener("change", () => {
|
||
inspectDuration(refAudio.files[0], refInfo);
|
||
});
|
||
|
||
auxAudio.addEventListener("change", () => {
|
||
const count = auxAudio.files.length;
|
||
auxInfo.textContent = count ? `已选择 ${count} 个辅助音频` : "可选,可多选";
|
||
});
|
||
|
||
healthBtn.addEventListener("click", async () => {
|
||
const healthUrl = apiBaseUrl();
|
||
if (!healthUrl) {
|
||
log("后端地址格式不正确。", true);
|
||
return;
|
||
}
|
||
statusBox.innerHTML = "<strong>检测中</strong>正在请求 /health";
|
||
try {
|
||
const response = await fetch(healthUrl);
|
||
const data = await response.json();
|
||
if (!response.ok) throw new Error(JSON.stringify(data));
|
||
statusBox.innerHTML = `<strong>可连接</strong>${data.version || "unknown"} · ${data.status || "ok"}`;
|
||
log(JSON.stringify(data, null, 2));
|
||
} catch (error) {
|
||
statusBox.innerHTML = "<strong>连接失败</strong>检查后端是否启动";
|
||
log(`检测失败:${error.message}`, true);
|
||
}
|
||
});
|
||
|
||
resetBtn.addEventListener("click", () => {
|
||
clearResult();
|
||
waveformWrap.classList.remove("visible");
|
||
trimControls.classList.remove("visible");
|
||
if (wavesurfer) { wavesurfer.destroy(); wavesurfer = null; wsRegions = null; }
|
||
});
|
||
|
||
form.addEventListener("submit", async (event) => {
|
||
event.preventDefault();
|
||
clearResult();
|
||
|
||
const file = refAudio.files[0];
|
||
if (!file) {
|
||
log("请先选择主参考音频或视频。", true);
|
||
return;
|
||
}
|
||
|
||
if (isVideoFile(file) && !extractedAudioBlob) {
|
||
log("视频音频提取尚未完成,请稍候再试。", true);
|
||
return;
|
||
}
|
||
|
||
const started = performance.now();
|
||
const data = new FormData();
|
||
data.append("text", document.querySelector("#text").value.trim());
|
||
|
||
const useTrimmed = waveformWrap.classList.contains("visible") && wsRegions;
|
||
if (useTrimmed) {
|
||
const regions = wsRegions.getRegions();
|
||
if (regions.length > 0) {
|
||
const region = regions[0];
|
||
const trimStartTime = region.start;
|
||
const trimEndTime = region.end;
|
||
|
||
if (trimEndTime - trimStartTime < currentAudioDuration - 0.01) {
|
||
try {
|
||
const sourceBlob = extractedAudioBlob || file;
|
||
const arrayBuffer = await sourceBlob.arrayBuffer();
|
||
const audioCtx = new (window.AudioContext || window.webkitAudioContext)();
|
||
const audioBuffer = await audioCtx.decodeAudioData(arrayBuffer);
|
||
audioCtx.close();
|
||
|
||
const trimmedBuffer = await trimAudioBuffer(audioBuffer, trimStartTime, trimEndTime);
|
||
const trimmedBlob = audioBufferToWav(trimmedBuffer);
|
||
data.append("ref_audio", trimmedBlob, "trimmed_audio.wav");
|
||
} catch (err) {
|
||
log(`裁剪失败,使用原始音频:${err.message}`, true);
|
||
data.append("ref_audio", extractedAudioBlob || file);
|
||
}
|
||
} else {
|
||
data.append("ref_audio", extractedAudioBlob || file);
|
||
}
|
||
} else {
|
||
data.append("ref_audio", extractedAudioBlob || file);
|
||
}
|
||
} else {
|
||
if (extractedAudioBlob) {
|
||
data.append("ref_audio", extractedAudioBlob, "extracted_audio.wav");
|
||
} else {
|
||
data.append("ref_audio", file);
|
||
}
|
||
}
|
||
|
||
for (const aux of auxAudio.files) data.append("aux_ref_audio", aux);
|
||
data.append("prompt_text", document.querySelector("#promptText").value);
|
||
data.append("text_lang", document.querySelector("#textLang").value);
|
||
data.append("prompt_lang", document.querySelector("#promptLang").value);
|
||
data.append("format", document.querySelector("#format").value);
|
||
data.append("emotion", document.querySelector("#emotion").value);
|
||
data.append("speed", document.querySelector("#speed").value);
|
||
data.append("seed", document.querySelector("#seed").value);
|
||
|
||
submitBtn.disabled = true;
|
||
log("正在请求后端,请等待模型生成。");
|
||
|
||
try {
|
||
const response = await fetch(endpoint.value.trim(), { method: "POST", body: data });
|
||
const contentType = response.headers.get("content-type") || "";
|
||
if (!response.ok) {
|
||
const detail = contentType.includes("application/json") ? await response.json() : await response.text();
|
||
throw new Error(typeof detail === "string" ? detail : JSON.stringify(detail, null, 2));
|
||
}
|
||
|
||
const blob = await response.blob();
|
||
resultUrl = URL.createObjectURL(blob);
|
||
player.src = resultUrl;
|
||
player.hidden = false;
|
||
downloadLink.href = resultUrl;
|
||
downloadLink.download = `gpt-sovits-${Date.now()}.${document.querySelector("#format").value}`;
|
||
downloadLink.setAttribute("aria-disabled", "false");
|
||
|
||
elapsed.textContent = `${((performance.now() - started) / 1000).toFixed(2)}s`;
|
||
fileSize.textContent = bytesLabel(blob.size);
|
||
log(`生成成功。\nContent-Type: ${contentType || "unknown"}\nSize: ${bytesLabel(blob.size)}`);
|
||
} catch (error) {
|
||
log(`生成失败:\n${error.message}`, true);
|
||
} finally {
|
||
submitBtn.disabled = false;
|
||
}
|
||
});
|
||
</script>
|
||
</body>
|
||
</html>
|