From 40a5ea1f663ec3e6a8438c66d26428d0a33261d6 Mon Sep 17 00:00:00 2001
From: Kaihui-AMD <Kaihui.Tang@amd.com>
Date: Fri, 29 May 2026 16:35:41 +0800
Subject: [PATCH] Add ROCm Docker support for AMD GPUs (RDNA3/RDNA4)

- Add Dockerfile.rocm based on rocm/pytorch:rocm7.2.3
- Add docker-compose-rocm.yaml with /dev/kfd + /dev/dri passthrough
- Add ROCm Docker section to README with quick start and notes
- Use onnxruntime_migraphx from repo.radeon.com for GPU-accelerated ONNX
- Pin starlette<1.0.0 to work around #2762
- Set ROCBLAS_USE_HIPBLASLT=0 for RDNA4 stability

Tested: GPT-SoVITS v2 inference on Radeon AI PRO R9700 (gfx1201)
with ROCm 7.2.3, PyTorch 2.9.1, fp16. WebUI and API both working.
---
 Dockerfile.rocm          | 52 ++++++++++++++++++++++++++++++++++++++++
 README.md                | 52 ++++++++++++++++++++++++++++++++++++++++
 docker-compose-rocm.yaml | 30 +++++++++++++++++++++++
 3 files changed, 134 insertions(+)
 create mode 100644 Dockerfile.rocm
 create mode 100644 docker-compose-rocm.yaml

diff --git a/Dockerfile.rocm b/Dockerfile.rocm
new file mode 100644
index 00000000..2b38445b
--- /dev/null
+++ b/Dockerfile.rocm
@@ -0,0 +1,52 @@
+ARG ROCM_VERSION=7.2.3
+ARG PYTORCH_IMAGE=rocm/pytorch:rocm${ROCM_VERSION}_ubuntu24.04_py3.12_pytorch_release_2.9.1
+
+FROM ${PYTORCH_IMAGE}
+
+LABEL maintainer="AMD Community"
+LABEL description="GPT-SoVITS with ROCm support (RDNA3/RDNA4)"
+
+SHELL ["/bin/bash", "-c"]
+
+RUN apt-get update -qq && \
+    apt-get install -y --no-install-recommends \
+        git ffmpeg unzip wget cmake make gcc g++ && \
+    rm -rf /var/lib/apt/lists/*
+
+WORKDIR /workspace/GPT-SoVITS
+
+COPY requirements.txt extra-req.txt /workspace/GPT-SoVITS/
+
+RUN pip install --no-cache-dir soundfile && \
+    sed 's/onnxruntime-gpu.*/onnxruntime_migraphx/' requirements.txt > /tmp/requirements-rocm.txt && \
+    pip install --no-cache-dir -r extra-req.txt --no-deps && \
+    pip install --no-cache-dir -r /tmp/requirements-rocm.txt \
+        -f https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2.3/ && \
+    pip install --no-cache-dir "starlette>=0.40.0,<1.0.0" && \
+    python -c "import nltk; nltk.download('averaged_perceptron_tagger_eng'); nltk.download('averaged_perceptron_tagger'); nltk.download('cmudict')" && \
+    rm -rf /tmp/* /root/.cache/pip
+
+ARG HF_SOURCE=https://huggingface.co/XXXXRT/GPT-SoVITS-Pretrained/resolve/main
+RUN wget -q "${HF_SOURCE}/pretrained_models.zip" && \
+    unzip -q -o pretrained_models.zip -d GPT_SoVITS && \
+    rm pretrained_models.zip && \
+    wget -q "${HF_SOURCE}/G2PWModel.zip" && \
+    unzip -q -o G2PWModel.zip -d GPT_SoVITS/text && \
+    rm G2PWModel.zip && \
+    wget -q "${HF_SOURCE}/nltk_data.zip" -O nltk_data.zip && \
+    PY_PREFIX=$(python -c "import sys; print(sys.prefix)") && \
+    unzip -q -o nltk_data.zip -d "$PY_PREFIX" && \
+    rm nltk_data.zip && \
+    wget -q "${HF_SOURCE}/open_jtalk_dic_utf_8-1.11.tar.gz" && \
+    PYOPENJTALK_DIR=$(python -c "import os, pyopenjtalk; print(os.path.dirname(pyopenjtalk.__file__))") && \
+    tar -xzf open_jtalk_dic_utf_8-1.11.tar.gz -C "$PYOPENJTALK_DIR" && \
+    rm open_jtalk_dic_utf_8-1.11.tar.gz
+
+COPY . /workspace/GPT-SoVITS
+
+ENV PYTHONPATH="/workspace/GPT-SoVITS:/workspace/GPT-SoVITS/GPT_SoVITS"
+ENV ROCBLAS_USE_HIPBLASLT=0
+
+EXPOSE 9871 9872 9873 9874 9880
+
+CMD ["/bin/bash"]
diff --git a/README.md b/README.md
index 2511c73c..df716061 100644
--- a/README.md
+++ b/README.md
@@ -176,6 +176,58 @@ To run a specific service with Docker Compose, use:
 docker compose run --service-ports <GPT-SoVITS-CU126-Lite|GPT-SoVITS-CU128-Lite|GPT-SoVITS-CU126|GPT-SoVITS-CU128>
 ```
 
+#### Running with AMD ROCm (Docker)
+
+GPT-SoVITS supports AMD GPUs via ROCm. Tested on RDNA4 (Radeon AI PRO R9700 / gfx1201) with ROCm 7.2.3.
+
+**Requirements:**
+- AMD GPU with ROCm support (RDNA3 / RDNA4)
+- ROCm drivers installed on the host
+- Docker with GPU passthrough (`/dev/kfd` and `/dev/dri`)
+
+**Quick start:**
+
+```bash
+docker compose -f docker-compose-rocm.yaml run --service-ports GPT-SoVITS-ROCm
+```
+
+**Or build and run manually:**
+
+```bash
+docker build -f Dockerfile.rocm -t gpt-sovits:rocm .
+docker run -it --rm \
+  --device=/dev/kfd --device=/dev/dri \
+  --group-add video --group-add render \
+  -e ROCBLAS_USE_HIPBLASLT=0 \
+  -e is_half=true \
+  --shm-size=16g \
+  -p 9880:9880 -p 9874:9874 \
+  gpt-sovits:rocm
+```
+
+Then inside the container:
+
+```bash
+python webui.py
+```
+
+**Notes:**
+- `ROCBLAS_USE_HIPBLASLT=0` is required for RDNA4 (gfx1201) stability
+- RDNA3 (gfx1100/gfx1101) users may not need this env var
+- The first inference run will be slower due to MIOpen kernel auto-tuning; subsequent runs are faster
+- `onnxruntime-gpu` is replaced with [`onnxruntime_migraphx`](https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2.3/) which provides `ROCMExecutionProvider` and `MIGraphXExecutionProvider` for GPU-accelerated ONNX inference
+
+**Tested environment:**
+
+| Component | Version |
+|-----------|---------|
+| GPU | AMD Radeon AI PRO R9700 (gfx1201 / RDNA4, 32GB) |
+| ROCm | 7.2.3 |
+| PyTorch | 2.9.1+rocm7.2.3 |
+| Python | 3.12 |
+| Model | GPT-SoVITS v2 pretrained |
+| Inference | ✅ Passed (fp16) |
+
 #### Building the Docker Image Locally
 
 If you want to build the image yourself, use:
diff --git a/docker-compose-rocm.yaml b/docker-compose-rocm.yaml
new file mode 100644
index 00000000..f7dd7bc0
--- /dev/null
+++ b/docker-compose-rocm.yaml
@@ -0,0 +1,30 @@
+version: "3.8"
+
+services:
+  GPT-SoVITS-ROCm:
+    build:
+      context: .
+      dockerfile: Dockerfile.rocm
+    image: gpt-sovits:rocm7.2.3
+    container_name: GPT-SoVITS-ROCm
+    ports:
+      - "9871:9871"
+      - "9872:9872"
+      - "9873:9873"
+      - "9874:9874"
+      - "9880:9880"
+    volumes:
+      - .:/workspace/GPT-SoVITS
+    environment:
+      - is_half=true
+      - ROCBLAS_USE_HIPBLASLT=0
+    devices:
+      - /dev/kfd
+      - /dev/dri
+    group_add:
+      - video
+      - render
+    tty: true
+    stdin_open: true
+    shm_size: "16g"
+    restart: unless-stopped