add memory endpoint to the server api code

This commit is contained in:
litongjava 2024-05-21 18:37:49 -10:00
parent 799168a5f7
commit 2f85395675
5 changed files with 68 additions and 64 deletions

View File

@ -1,63 +0,0 @@
# 推理
## Windows
### 使用cpu推理
本文档介绍如何使用cpu进行推理,使用cpu的推理速度有点慢,但不是很慢
#### 安装依赖
```
# 拉取项目代码
git clone --depth=1 https://github.com/RVC-Boss/GPT-SoVITS
cd GPT-SoVITS
# 安装好 Miniconda 之后,先创建一个虚拟环境:
conda create -n GPTSoVits python=3.9
conda activate GPTSoVits
# 安装依赖:
pip install -r requirements.txt
# (可选)如果网络环境不好,可以考虑换源(比如清华源)
pip install -i https://pypi.tuna.tsinghua.edu.cn/simple -r requirements.txt
```
#### 添加预训练模型
```
# 安装 huggingface-cli 用于和 huggingface hub 交互
pip install huggingface_hub
# 登录 huggingface-cli
huggingface-cli login
# 下载模型, 由于模型文件较大,可能需要一段时间
# --local-dir-use-symlinks False 用于解决 macOS alias 文件的问题
# 会下载到 GPT_SoVITS/pretrained_models 文件夹下
huggingface-cli download --resume-download lj1995/GPT-SoVITS --local-dir GPT_SoVITS/pretrained_models --local-dir-use-symlinks False
```
#### 添加微调模型(可选)
笔者是将微调添加到了GPT-SoVITS/trained目录,内容如下,正常情况下包含 openai_alloy-e15.ckpt 和openai_alloy_e8_s112.pth 即可
如果仅仅测试合成效果,不添加微调模型 使用预训练模型作为微调模型也可以
```
├── .gitignore
├── openai_alloy
│ ├── infer_config.json
│ ├── openai_alloy-e15.ckpt
│ ├── openai_alloy_e8_s112.pth
│ ├── output-2.txt
│ ├── output-2.wav
```
#### 启动推理webtui
```
python.exe GPT_SoVITS/inference_webui.py
```
配置如下
![](inference_cpu_files/1.jpg)
### 使用gpu推理
```
pip uninstall torch torchaudio -y
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117
```
请根据你的环境选择合适的cuda版本

View File

@ -0,0 +1,18 @@
加载模型后需要占用的内存容量如下,单位是MB
```
{
"memory_usage": {
"rss": 1029.30078125,
"vms": 4505.546875,
"percent": 6.5181980027997115
}
}
{
"gpu_memory_usage": {
"used_memory": 2640,
"total_memory": 4096,
"percent": 64.453125
}
}
```

View File

@ -25,4 +25,4 @@ jieba_fast
jieba jieba
LangSegment>=0.2.0 LangSegment>=0.2.0
Faster_Whisper Faster_Whisper
wordsegment wordsegmentpsutil

View File

@ -1,4 +1,6 @@
from fastapi import APIRouter, Request from fastapi import APIRouter, Request
from memory_service import get_memory_usage, get_gpu_memory_usage
from pyutils.logs import llog from pyutils.logs import llog
from tts_service import change_sovits_weights, change_gpt_weights, handle_control, handle_change, handle from tts_service import change_sovits_weights, change_gpt_weights, handle_control, handle_change, handle
@ -70,3 +72,13 @@ async def tts_endpoint(
cut_punc: str = None, cut_punc: str = None,
): ):
return handle(refer_wav_path, prompt_text, prompt_language, text, text_language, cut_punc) return handle(refer_wav_path, prompt_text, prompt_language, text, text_language, cut_punc)
@index_router.get("/memory-usage")
def read_memory_usage():
memory_usage = get_memory_usage()
return {"memory_usage": memory_usage}
@index_router.get("/gpu-memory-usage")
def read_gpu_memory_usage():
gpu_memory_usage = get_gpu_memory_usage()
return {"gpu_memory_usage": gpu_memory_usage}

37
server/memory_service.py Normal file
View File

@ -0,0 +1,37 @@
import psutil
import subprocess
def get_memory_usage():
process = psutil.Process()
mem_info = process.memory_info()
memory_usage = {
"rss": mem_info.rss / (1024 ** 2), # Resident Set Size
"vms": mem_info.vms / (1024 ** 2), # Virtual Memory Size
"percent": process.memory_percent() # Percentage of memory usage
}
return memory_usage
def get_gpu_memory_usage():
try:
result = subprocess.run(
["nvidia-smi", "--query-gpu=memory.used,memory.total", "--format=csv,nounits,noheader"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
check=True,
text=True
)
output = result.stdout.strip()
if output:
used_memory, total_memory = map(int, output.split(', '))
gpu_memory_usage = {
"used_memory": used_memory, # in MiB
"total_memory": total_memory, # in MiB
"percent": (used_memory / total_memory) * 100 # Percentage of GPU memory usage
}
return gpu_memory_usage
else:
return {"error": "No GPU found or unable to query GPU memory usage."}
except Exception as e:
return {"error": str(e)}