From 2f85395675d35fabcea00131a70f040a899d0dcc Mon Sep 17 00:00:00 2001 From: litongjava Date: Tue, 21 May 2024 18:37:49 -1000 Subject: [PATCH] add memory endpoint to the server api code --- docs/cn/inference_cpu.md | 63 --------------------------- docs/cn/inference_cpu_files/memory.md | 18 ++++++++ requirements.txt | 2 +- server/handlers.py | 12 +++++ server/memory_service.py | 37 ++++++++++++++++ 5 files changed, 68 insertions(+), 64 deletions(-) delete mode 100644 docs/cn/inference_cpu.md create mode 100644 docs/cn/inference_cpu_files/memory.md create mode 100644 server/memory_service.py diff --git a/docs/cn/inference_cpu.md b/docs/cn/inference_cpu.md deleted file mode 100644 index e72c8b20..00000000 --- a/docs/cn/inference_cpu.md +++ /dev/null @@ -1,63 +0,0 @@ -# 推理 - -## Windows - -### 使用cpu推理 -本文档介绍如何使用cpu进行推理,使用cpu的推理速度有点慢,但不是很慢 - -#### 安装依赖 -``` -# 拉取项目代码 -git clone --depth=1 https://github.com/RVC-Boss/GPT-SoVITS -cd GPT-SoVITS - -# 安装好 Miniconda 之后,先创建一个虚拟环境: -conda create -n GPTSoVits python=3.9 -conda activate GPTSoVits - -# 安装依赖: -pip install -r requirements.txt - -# (可选)如果网络环境不好,可以考虑换源(比如清华源): -pip install -i https://pypi.tuna.tsinghua.edu.cn/simple -r requirements.txt -``` - -#### 添加预训练模型 -``` -# 安装 huggingface-cli 用于和 huggingface hub 交互 -pip install huggingface_hub -# 登录 huggingface-cli -huggingface-cli login - -# 下载模型, 由于模型文件较大,可能需要一段时间 -# --local-dir-use-symlinks False 用于解决 macOS alias 文件的问题 -# 会下载到 GPT_SoVITS/pretrained_models 文件夹下 -huggingface-cli download --resume-download lj1995/GPT-SoVITS --local-dir GPT_SoVITS/pretrained_models --local-dir-use-symlinks False -``` - -#### 添加微调模型(可选) -笔者是将微调添加到了GPT-SoVITS/trained目录,内容如下,正常情况下包含 openai_alloy-e15.ckpt 和openai_alloy_e8_s112.pth 即可 -如果仅仅测试合成效果,不添加微调模型 使用预训练模型作为微调模型也可以 -``` -├── .gitignore -├── openai_alloy -│ ├── infer_config.json -│ ├── openai_alloy-e15.ckpt -│ ├── openai_alloy_e8_s112.pth -│ ├── output-2.txt -│ ├── output-2.wav -``` - -#### 启动推理webtui -``` -python.exe GPT_SoVITS/inference_webui.py -``` -配置如下 -![](inference_cpu_files/1.jpg) - -### 使用gpu推理 -``` -pip uninstall torch torchaudio -y -pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117 -``` -请根据你的环境选择合适的cuda版本 diff --git a/docs/cn/inference_cpu_files/memory.md b/docs/cn/inference_cpu_files/memory.md new file mode 100644 index 00000000..af64077a --- /dev/null +++ b/docs/cn/inference_cpu_files/memory.md @@ -0,0 +1,18 @@ +加载模型后需要占用的内存容量如下,单位是MB +``` +{ + "memory_usage": { + "rss": 1029.30078125, + "vms": 4505.546875, + "percent": 6.5181980027997115 + } +} + +{ + "gpu_memory_usage": { + "used_memory": 2640, + "total_memory": 4096, + "percent": 64.453125 + } +} +``` diff --git a/requirements.txt b/requirements.txt index 73912d01..19294040 100644 --- a/requirements.txt +++ b/requirements.txt @@ -25,4 +25,4 @@ jieba_fast jieba LangSegment>=0.2.0 Faster_Whisper -wordsegment \ No newline at end of file +wordsegmentpsutil diff --git a/server/handlers.py b/server/handlers.py index bc164745..7bc4f376 100644 --- a/server/handlers.py +++ b/server/handlers.py @@ -1,4 +1,6 @@ from fastapi import APIRouter, Request + +from memory_service import get_memory_usage, get_gpu_memory_usage from pyutils.logs import llog from tts_service import change_sovits_weights, change_gpt_weights, handle_control, handle_change, handle @@ -70,3 +72,13 @@ async def tts_endpoint( cut_punc: str = None, ): return handle(refer_wav_path, prompt_text, prompt_language, text, text_language, cut_punc) + +@index_router.get("/memory-usage") +def read_memory_usage(): + memory_usage = get_memory_usage() + return {"memory_usage": memory_usage} + +@index_router.get("/gpu-memory-usage") +def read_gpu_memory_usage(): + gpu_memory_usage = get_gpu_memory_usage() + return {"gpu_memory_usage": gpu_memory_usage} \ No newline at end of file diff --git a/server/memory_service.py b/server/memory_service.py new file mode 100644 index 00000000..e1474637 --- /dev/null +++ b/server/memory_service.py @@ -0,0 +1,37 @@ +import psutil +import subprocess + + +def get_memory_usage(): + process = psutil.Process() + mem_info = process.memory_info() + memory_usage = { + "rss": mem_info.rss / (1024 ** 2), # Resident Set Size + "vms": mem_info.vms / (1024 ** 2), # Virtual Memory Size + "percent": process.memory_percent() # Percentage of memory usage + } + return memory_usage + + +def get_gpu_memory_usage(): + try: + result = subprocess.run( + ["nvidia-smi", "--query-gpu=memory.used,memory.total", "--format=csv,nounits,noheader"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + check=True, + text=True + ) + output = result.stdout.strip() + if output: + used_memory, total_memory = map(int, output.split(', ')) + gpu_memory_usage = { + "used_memory": used_memory, # in MiB + "total_memory": total_memory, # in MiB + "percent": (used_memory / total_memory) * 100 # Percentage of GPU memory usage + } + return gpu_memory_usage + else: + return {"error": "No GPU found or unable to query GPU memory usage."} + except Exception as e: + return {"error": str(e)}