From 824feef38d110370df44b71e1af63007fd689b6f Mon Sep 17 00:00:00 2001
From: glide-the
Date: Wed, 25 Sep 2024 16:51:48 +0800
Subject: [PATCH 1/7] rife
---
inference/gradio_composite_demo/rife_model.py | 82 +++++++++++++++----
1 file changed, 64 insertions(+), 18 deletions(-)
diff --git a/inference/gradio_composite_demo/rife_model.py b/inference/gradio_composite_demo/rife_model.py
index dbb7d00..901038d 100644
--- a/inference/gradio_composite_demo/rife_model.py
+++ b/inference/gradio_composite_demo/rife_model.py
@@ -8,8 +8,10 @@ import numpy as np
import logging
import skvideo.io
from rife.RIFE_HDv3 import Model
+from huggingface_hub import hf_hub_download, snapshot_download
logger = logging.getLogger(__name__)
+
device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -18,8 +20,9 @@ def pad_image(img, scale):
tmp = max(32, int(32 / scale))
ph = ((h - 1) // tmp + 1) * tmp
pw = ((w - 1) // tmp + 1) * tmp
- padding = (0, 0, pw - w, ph - h)
- return F.pad(img, padding)
+ padding = (0, pw - w, 0, ph - h)
+
+ return F.pad(img, padding), padding
def make_inference(model, I0, I1, upscale_amount, n):
@@ -36,15 +39,23 @@ def make_inference(model, I0, I1, upscale_amount, n):
@torch.inference_mode()
def ssim_interpolation_rife(model, samples, exp=1, upscale_amount=1, output_device="cpu"):
-
+ print(f"samples dtype:{samples.dtype}")
+ print(f"samples shape:{samples.shape}")
output = []
+ pbar = utils.ProgressBar(samples.shape[0], desc="RIFE inference")
# [f, c, h, w]
for b in range(samples.shape[0]):
frame = samples[b : b + 1]
_, _, h, w = frame.shape
+
I0 = samples[b : b + 1]
I1 = samples[b + 1 : b + 2] if b + 2 < samples.shape[0] else samples[-1:]
- I1 = pad_image(I1, upscale_amount)
+
+ I0, padding = pad_image(I0, upscale_amount)
+ I0 = I0.to(torch.float)
+ I1, _ = pad_image(I1, upscale_amount)
+ I1 = I1.to(torch.float)
+
# [c, h, w]
I0_small = F.interpolate(I0, (32, 32), mode="bilinear", align_corners=False)
I1_small = F.interpolate(I1, (32, 32), mode="bilinear", align_corners=False)
@@ -52,15 +63,25 @@ def ssim_interpolation_rife(model, samples, exp=1, upscale_amount=1, output_devi
ssim = ssim_matlab(I0_small[:, :3], I1_small[:, :3])
if ssim > 0.996:
- I1 = I0
- I1 = pad_image(I1, upscale_amount)
+ I1 = samples[b : b + 1]
+ # print(f'upscale_amount:{upscale_amount}')
+ # print(f'ssim:{upscale_amount}')
+ # print(f'I0 shape:{I0.shape}')
+ # print(f'I1 shape:{I1.shape}')
+ I1, padding = pad_image(I1, upscale_amount)
+ # print(f'I0 shape:{I0.shape}')
+ # print(f'I1 shape:{I1.shape}')
I1 = make_inference(model, I0, I1, upscale_amount, 1)
- I1_small = F.interpolate(I1[0], (32, 32), mode="bilinear", align_corners=False)
- ssim = ssim_matlab(I0_small[:, :3], I1_small[:, :3])
- frame = I1[0]
+ # print(f'I0 shape:{I0.shape}')
+ # print(f'I1[0] shape:{I1[0].shape}')
I1 = I1[0]
+ # print(f'I1[0] unpadded shape:{I1.shape}')
+ I1_small = F.interpolate(I1, (32, 32), mode="bilinear", align_corners=False)
+ ssim = ssim_matlab(I0_small[:, :3], I1_small[:, :3])
+ frame = I1[padding[0] :, padding[2] :, : -padding[3], padding[1] :]
+
tmp_output = []
if ssim < 0.2:
for i in range((2**exp) - 1):
@@ -69,10 +90,16 @@ def ssim_interpolation_rife(model, samples, exp=1, upscale_amount=1, output_devi
else:
tmp_output = make_inference(model, I0, I1, upscale_amount, 2**exp - 1) if exp else []
- frame = pad_image(frame, upscale_amount)
- tmp_output = [frame] + tmp_output
- for i, frame in enumerate(tmp_output):
- output.append(frame.to(output_device))
+ frame, _ = pad_image(frame, upscale_amount)
+ # print(f'frame shape:{frame.shape}')
+
+ frame = F.interpolate(frame, size=(h, w))
+ output.append(frame.to(output_device))
+ for i, tmp_frame in enumerate(tmp_output):
+ # tmp_frame, _ = pad_image(tmp_frame, upscale_amount)
+ tmp_frame = F.interpolate(tmp_frame, size=(h, w))
+ output.append(tmp_frame.to(output_device))
+ pbar.update(1)
return output
@@ -94,14 +121,24 @@ def frame_generator(video_capture):
def rife_inference_with_path(model, video_path):
+ # Open the video file
video_capture = cv2.VideoCapture(video_path)
- tot_frame = video_capture.get(cv2.CAP_PROP_FRAME_COUNT)
+ fps = video_capture.get(cv2.CAP_PROP_FPS) # Get the frames per second
+ tot_frame = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT)) # Total frames in the video
pt_frame_data = []
pt_frame = skvideo.io.vreader(video_path)
- for frame in pt_frame:
- pt_frame_data.append(
- torch.from_numpy(np.transpose(frame, (2, 0, 1))).to("cpu", non_blocking=True).float() / 255.0
- )
+ # Cyclic reading of the video frames
+ while video_capture.isOpened():
+ ret, frame = video_capture.read()
+
+ if not ret:
+ break
+
+ # BGR to RGB
+ frame_rgb = frame[..., ::-1]
+ frame_rgb = frame_rgb.copy()
+ tensor = torch.from_numpy(frame_rgb).float().to("cpu", non_blocking=True).float() / 255.0
+ pt_frame_data.append(tensor.permute(2, 0, 1)) # to [c, h, w,]
pt_frame = torch.from_numpy(np.stack(pt_frame_data))
pt_frame = pt_frame.to(device)
@@ -122,8 +159,17 @@ def rife_inference_with_latents(model, latents):
for i in range(latents.size(0)):
# [f, c, w, h]
latent = latents[i]
+
frames = ssim_interpolation_rife(model, latent)
pt_image = torch.stack([frames[i].squeeze(0) for i in range(len(frames))]) # (to [f, c, w, h])
rife_results.append(pt_image)
return torch.stack(rife_results)
+
+
+if __name__ == "__main__":
+ snapshot_download(repo_id="AlexWortega/RIFE", local_dir="model_rife")
+ model = load_rife_model("model_rife")
+
+ video_path = rife_inference_with_path(model, "/mnt/ceph/develop/jiawei/CogVideo/output/chunk_3710_1.mp4")
+ print(video_path)
From df3da996d3fcca547997026c226ea0978f39e7f6 Mon Sep 17 00:00:00 2001
From: Alexandre Poisson <13329302+AlexandrePoisson@users.noreply.github.com>
Date: Sun, 29 Sep 2024 12:54:08 +0200
Subject: [PATCH 2/7] Update app.py
Model was not found, so fix it by creating a global variable with the correct path : MODEL = "THUDM/CogVideoX-5b"
Fix typo 'experimental use'
---
inference/gradio_composite_demo/app.py | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/inference/gradio_composite_demo/app.py b/inference/gradio_composite_demo/app.py
index 1df2e65..757b31c 100644
--- a/inference/gradio_composite_demo/app.py
+++ b/inference/gradio_composite_demo/app.py
@@ -37,13 +37,15 @@ from huggingface_hub import hf_hub_download, snapshot_download
device = "cuda" if torch.cuda.is_available() else "cpu"
+MODEL = "THUDM/CogVideoX-5b"
+
hf_hub_download(repo_id="ai-forever/Real-ESRGAN", filename="RealESRGAN_x4.pth", local_dir="model_real_esran")
snapshot_download(repo_id="AlexWortega/RIFE", local_dir="model_rife")
-pipe = CogVideoXPipeline.from_pretrained("/share/official_pretrains/hf_home/CogVideoX-5b", torch_dtype=torch.bfloat16).to(device)
+pipe = CogVideoXPipeline.from_pretrained(MODEL, torch_dtype=torch.bfloat16).to(device)
pipe.scheduler = CogVideoXDPMScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
pipe_video = CogVideoXVideoToVideoPipeline.from_pretrained(
- "/share/official_pretrains/hf_home/CogVideoX-5b",
+ MODEL,
transformer=pipe.transformer,
vae=pipe.vae,
scheduler=pipe.scheduler,
@@ -53,9 +55,9 @@ pipe_video = CogVideoXVideoToVideoPipeline.from_pretrained(
).to(device)
pipe_image = CogVideoXImageToVideoPipeline.from_pretrained(
- "/share/official_pretrains/hf_home/CogVideoX-5b-I2V",
+ MODEL,
transformer=CogVideoXTransformer3DModel.from_pretrained(
- "/share/official_pretrains/hf_home/CogVideoX-5b-I2V", subfolder="transformer", torch_dtype=torch.bfloat16
+ MODEL, subfolder="transformer", torch_dtype=torch.bfloat16
),
vae=pipe.vae,
scheduler=pipe.scheduler,
@@ -315,7 +317,7 @@ with gr.Blocks() as demo:
">
- ⚠️ This demo is for academic research and experiential use only.
+ ⚠️ This demo is for academic research and experimental use only.
""")
with gr.Row():
From f36c34aef2740ee7855965e6cd449d0b1e512386 Mon Sep 17 00:00:00 2001
From: feifeibear
Date: Mon, 30 Sep 2024 12:30:42 +0800
Subject: [PATCH 3/7] xdit in friendly link
---
README.md | 2 ++
README_ja.md | 1 +
README_zh.md | 2 ++
3 files changed, 5 insertions(+)
diff --git a/README.md b/README.md
index 4d484bb..a700f24 100644
--- a/README.md
+++ b/README.md
@@ -294,6 +294,8 @@ works have already been adapted for CogVideoX, and we invite everyone to use the
Space image provided by community members.
+ [Interior Design Fine-Tuning Model](https://huggingface.co/collections/bertjiazheng/koolcogvideox-66e4762f53287b7f39f8f3ba):
is a fine-tuned model based on CogVideoX, specifically designed for interior design.
++ [xDiT](https://github.com/xdit-project/xDiT): xDiT is a scalable inference engine for Diffusion Transformers (DiTs)
+ on multiple GPU Clusters. xDiT supports real-time image and video generations services.
## Project Structure
diff --git a/README_ja.md b/README_ja.md
index c24aa02..7a66850 100644
--- a/README_ja.md
+++ b/README_ja.md
@@ -271,6 +271,7 @@ pipe.vae.enable_tiling()
+ [AutoDLイメージ](https://www.codewithgpu.com/i/THUDM/CogVideo/CogVideoX-5b-demo): コミュニティメンバーが提供するHuggingface
Spaceイメージのワンクリックデプロイメント。
+ [インテリアデザイン微調整モデル](https://huggingface.co/collections/bertjiazheng/koolcogvideox-66e4762f53287b7f39f8f3ba): は、CogVideoXを基盤にした微調整モデルで、インテリアデザイン専用に設計されています。
++ [xDiT](https://github.com/xdit-project/xDiT): xDiTは、複数のGPUクラスター上でDiTsを並列推論するためのエンジンです。xDiTはリアルタイムの画像およびビデオ生成サービスをサポートしています。
## プロジェクト構造
diff --git a/README_zh.md b/README_zh.md
index d831ec1..f0e218c 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -256,6 +256,8 @@ pipe.vae.enable_tiling()
+ [AutoDL镜像](https://www.codewithgpu.com/i/THUDM/CogVideo/CogVideoX-5b-demo): 由社区成员提供的一键部署Huggingface
Space镜像。
+ [室内设计微调模型](https://huggingface.co/collections/bertjiazheng/koolcogvideox-66e4762f53287b7f39f8f3ba) 基于 CogVideoX的微调模型,它专为室内设计而设计
++ [xDiT](https://github.com/xdit-project/xDiT): xDiT是一个用于在多GPU集群上对DiTs并行推理的引擎。xDiT支持实时图像和视频生成服务。
+
## 完整项目代码结构
From a59ed84b52c77b88231745cf194ddd216df50560 Mon Sep 17 00:00:00 2001
From: LittleNyima
Date: Mon, 30 Sep 2024 20:45:53 +0800
Subject: [PATCH 4/7] fix deprecation of clear_objs_and_retain_memory
---
finetune/train_cogvideox_lora.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/finetune/train_cogvideox_lora.py b/finetune/train_cogvideox_lora.py
index 137f322..a746af8 100644
--- a/finetune/train_cogvideox_lora.py
+++ b/finetune/train_cogvideox_lora.py
@@ -40,7 +40,7 @@ from diffusers.optimization import get_scheduler
from diffusers.pipelines.cogvideo.pipeline_cogvideox import get_resize_crop_region_for_grid
from diffusers.training_utils import (
cast_training_params,
- clear_objs_and_retain_memory,
+ free_memory,
)
from diffusers.utils import check_min_version, convert_unet_state_dict_to_peft, export_to_video, is_wandb_available
from diffusers.utils.hub_utils import load_or_create_model_card, populate_model_card
@@ -726,7 +726,7 @@ def log_validation(
}
)
- clear_objs_and_retain_memory([pipe])
+ free_memory()
return videos
From f0098c06621a9de3f73f7122bf4c9b48b5f37e52 Mon Sep 17 00:00:00 2001
From: glide-the
Date: Thu, 3 Oct 2024 13:25:55 +0800
Subject: [PATCH 5/7] padding fix
---
inference/gradio_composite_demo/rife_model.py | 47 +++++++++++--------
1 file changed, 28 insertions(+), 19 deletions(-)
diff --git a/inference/gradio_composite_demo/rife_model.py b/inference/gradio_composite_demo/rife_model.py
index 901038d..e1783e3 100644
--- a/inference/gradio_composite_demo/rife_model.py
+++ b/inference/gradio_composite_demo/rife_model.py
@@ -9,7 +9,6 @@ import logging
import skvideo.io
from rife.RIFE_HDv3 import Model
from huggingface_hub import hf_hub_download, snapshot_download
-
logger = logging.getLogger(__name__)
device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -20,8 +19,7 @@ def pad_image(img, scale):
tmp = max(32, int(32 / scale))
ph = ((h - 1) // tmp + 1) * tmp
pw = ((w - 1) // tmp + 1) * tmp
- padding = (0, pw - w, 0, ph - h)
-
+ padding = (0, pw - w, 0, ph - h)
return F.pad(img, padding), padding
@@ -47,15 +45,15 @@ def ssim_interpolation_rife(model, samples, exp=1, upscale_amount=1, output_devi
for b in range(samples.shape[0]):
frame = samples[b : b + 1]
_, _, h, w = frame.shape
-
+
I0 = samples[b : b + 1]
I1 = samples[b + 1 : b + 2] if b + 2 < samples.shape[0] else samples[-1:]
-
+
I0, padding = pad_image(I0, upscale_amount)
I0 = I0.to(torch.float)
I1, _ = pad_image(I1, upscale_amount)
I1 = I1.to(torch.float)
-
+
# [c, h, w]
I0_small = F.interpolate(I0, (32, 32), mode="bilinear", align_corners=False)
I1_small = F.interpolate(I1, (32, 32), mode="bilinear", align_corners=False)
@@ -72,15 +70,23 @@ def ssim_interpolation_rife(model, samples, exp=1, upscale_amount=1, output_devi
# print(f'I0 shape:{I0.shape}')
# print(f'I1 shape:{I1.shape}')
I1 = make_inference(model, I0, I1, upscale_amount, 1)
-
+
# print(f'I0 shape:{I0.shape}')
- # print(f'I1[0] shape:{I1[0].shape}')
+ # print(f'I1[0] shape:{I1[0].shape}')
I1 = I1[0]
-
- # print(f'I1[0] unpadded shape:{I1.shape}')
+
+ # print(f'I1[0] unpadded shape:{I1.shape}')
I1_small = F.interpolate(I1, (32, 32), mode="bilinear", align_corners=False)
ssim = ssim_matlab(I0_small[:, :3], I1_small[:, :3])
- frame = I1[padding[0] :, padding[2] :, : -padding[3], padding[1] :]
+ if padding[3] > 0 and padding[1] >0 :
+
+ frame = I1[:, :, : -padding[3],:-padding[1]]
+ elif padding[3] > 0:
+ frame = I1[:, :, : -padding[3],:]
+ elif padding[1] >0:
+ frame = I1[:, :, :,:-padding[1]]
+ else:
+ frame = I1
tmp_output = []
if ssim < 0.2:
@@ -95,7 +101,8 @@ def ssim_interpolation_rife(model, samples, exp=1, upscale_amount=1, output_devi
frame = F.interpolate(frame, size=(h, w))
output.append(frame.to(output_device))
- for i, tmp_frame in enumerate(tmp_output):
+ for i, tmp_frame in enumerate(tmp_output):
+
# tmp_frame, _ = pad_image(tmp_frame, upscale_amount)
tmp_frame = F.interpolate(tmp_frame, size=(h, w))
output.append(tmp_frame.to(output_device))
@@ -138,7 +145,9 @@ def rife_inference_with_path(model, video_path):
frame_rgb = frame[..., ::-1]
frame_rgb = frame_rgb.copy()
tensor = torch.from_numpy(frame_rgb).float().to("cpu", non_blocking=True).float() / 255.0
- pt_frame_data.append(tensor.permute(2, 0, 1)) # to [c, h, w,]
+ pt_frame_data.append(
+ tensor.permute(2, 0, 1)
+ ) # to [c, h, w,]
pt_frame = torch.from_numpy(np.stack(pt_frame_data))
pt_frame = pt_frame.to(device)
@@ -167,9 +176,9 @@ def rife_inference_with_latents(model, latents):
return torch.stack(rife_results)
-if __name__ == "__main__":
- snapshot_download(repo_id="AlexWortega/RIFE", local_dir="model_rife")
- model = load_rife_model("model_rife")
-
- video_path = rife_inference_with_path(model, "/mnt/ceph/develop/jiawei/CogVideo/output/chunk_3710_1.mp4")
- print(video_path)
+# if __name__ == "__main__":
+# snapshot_download(repo_id="AlexWortega/RIFE", local_dir="model_rife")
+# model = load_rife_model("model_rife")
+
+# video_path = rife_inference_with_path(model, "/mnt/ceph/develop/jiawei/CogVideo/output/20241003_130720.mp4")
+# print(video_path)
\ No newline at end of file
From 46906f675cc119cfd83f7a53b052c94b9010eab1 Mon Sep 17 00:00:00 2001
From: glide-the
Date: Wed, 9 Oct 2024 18:30:44 +0800
Subject: [PATCH 6/7] =?UTF-8?q?=E9=A3=9E=E4=B9=A6=E6=8A=80=E6=9C=AF?=
=?UTF-8?q?=E6=96=87=E6=A1=A3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
README.md | 8 ++++++++
README_ja.md | 6 +++++-
README_zh.md | 6 ++++++
3 files changed, 19 insertions(+), 1 deletion(-)
diff --git a/README.md b/README.md
index a700f24..ec9337e 100644
--- a/README.md
+++ b/README.md
@@ -18,10 +18,18 @@ Experience the CogVideoX-5B model online at
📍 Visit QingYing and API Platform to experience larger-scale commercial video generation models.
+
+We have publicly shared the feishu technical documentation on CogVideoX fine-tuning scenarios, aiming to further increase the flexibility of distribution. All examples in the public documentation can be fully replicated.
+
+CogVideoX fine-tuning is divided into SFT and LoRA fine-tuning. Based on our publicly available data processing scripts, you can more easily align specific styles in vertical scenarios. We provide guidance for ablation experiments on character image (IP) and scene style, further reducing the difficulty of replicating fine-tuning tasks.
+
+We look forward to creative explorations and contributions.
## Project Updates
+- 🔥🔥 **News**: ```2024/10/09```: We have publicly released the [technical documentation](https://zhipu-ai.feishu.cn/wiki/DHCjw1TrJiTyeukfc9RceoSRnCh) for CogVideoX fine-tuning on Feishu, further increasing distribution flexibility. All examples in the public documentation can be fully reproduced.
+
- 🔥🔥 **News**: ```2024/9/25```: CogVideoX web demo is available on Replicate. Try the text-to-video model **CogVideoX-5B** here [](https://replicate.com/chenxwh/cogvideox-t2v) and image-to-video model **CogVideoX-5B-I2V** here [](https://replicate.com/chenxwh/cogvideox-i2v).
- 🔥🔥 **News**: ```2024/9/19```: We have open-sourced the CogVideoX series image-to-video model **CogVideoX-5B-I2V**.
This model can take an image as a background input and generate a video combined with prompt words, offering greater
diff --git a/README_ja.md b/README_ja.md
index 7a66850..6d156c4 100644
--- a/README_ja.md
+++ b/README_ja.md
@@ -17,10 +17,14 @@
👋 WeChat と Discord に参加
-📍 清影 と APIプラットフォーム を訪問して、より大規模な商用ビデオ生成モデルを体験
+📍 清影 と APIプラットフォーム を訪問して、より大規模な商用ビデオ生成モデルを体験.
+CogVideoXの動画生成に関連するエコシステムコミュニティをさらに活性化させるためには、生成モデルの最適化が非常に重要な方向性です。私たちは、CogVideoXの微調整シナリ飛書オを技術文書で公開し、分配の自由度をさらに高めるために、公開されている全てのサンプルを完全に再現可能にしています。
+
+CogVideoXの微調整方法は、SFTとLoRA微調整に分かれており、公開されているデータ処理スクリプトを使用することで、特定の分野においてスタイルの一致をより手軽に達成できます。また、キャラクターイメージ(IP)やシーンスタイルのアブレーション実験のガイドも提供しており、微調整タスクの再現の難易度をさらに低減します。 私たちは、さらに創造的な探索が加わることを期待しています。
## 更新とニュース
+- 🔥🔥 **ニュース**: ```2024/10/09```: 飛書の[技術ドキュメント](https://zhipu-ai.feishu.cn/wiki/DHCjw1TrJiTyeukfc9RceoSRnCh)でCogVideoXの微調整ガイドを公開しています。分配の自由度をさらに高めるため、公開されているドキュメント内のすべての例が完全に再現可能です。
- 🔥🔥 **ニュース**: ```2024/9/19```: CogVideoXシリーズの画像生成ビデオモデル **CogVideoX-5B-I2V**
をオープンソース化しました。このモデルは、画像を背景入力として使用し、プロンプトワードと組み合わせてビデオを生成することができ、より高い制御性を提供します。これにより、CogVideoXシリーズのモデルは、テキストからビデオ生成、ビデオの継続、画像からビデオ生成の3つのタスクをサポートするようになりました。オンラインでの[体験](https://huggingface.co/spaces/THUDM/CogVideoX-5B-Space)
diff --git a/README_zh.md b/README_zh.md
index f0e218c..01feaa9 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -19,10 +19,16 @@
📍 前往 清影 和 API平台 体验更大规模的商业版视频生成模型。
+
+我们在飞书技术文档公开CogVideoX微调指导,以进一步增加分发自由度,公开文档中所有示例可以完全复现
+
+CogVideoX微调方式分为SFT和lora微调,在我们公开的数据处理的脚本上,你可以更加便捷的在垂类的场景上完成某些风格对齐,我们提供了人物形象(IP)和场景风格的消融实验指导,进一步减少复现微调任务的难度
+我们期待更加有创意探索加入[新月脸]
## 项目更新
+- 🔥🔥 **News**: ```2024/10/09```: 我们在飞书[技术文档](https://zhipu-ai.feishu.cn/wiki/DHCjw1TrJiTyeukfc9RceoSRnCh")公开CogVideoX微调指导,以进一步增加分发自由度,公开文档中所有示例可以完全复现
- 🔥🔥 **News**: ```2024/9/19```: 我们开源 CogVideoX 系列图生视频模型 **CogVideoX-5B-I2V**
。该模型可以将一张图像作为背景输入,结合提示词一起生成视频,具有更强的可控性。
至此,CogVideoX系列模型已经支持文本生成视频,视频续写,图片生成视频三种任务。欢迎前往在线[体验](https://huggingface.co/spaces/THUDM/CogVideoX-5B-Space)。
From da5fc1c1cab3b1451c866b1f9111eeec9d0410e4 Mon Sep 17 00:00:00 2001
From: yangzhuoyi <515221650@qq.com>
Date: Thu, 10 Oct 2024 14:40:39 +0800
Subject: [PATCH 7/7] update technical report
---
README.md | 2 ++
README_ja.md | 3 +++
README_zh.md | 2 ++
3 files changed, 7 insertions(+)
diff --git a/README.md b/README.md
index ec9337e..4cca1e4 100644
--- a/README.md
+++ b/README.md
@@ -28,6 +28,8 @@ We look forward to creative explorations and contributions.
## Project Updates
+- 🔥🔥 **News**: ```2024/10/10```: We have updated our technical report, including more training details and demos.
+
- 🔥🔥 **News**: ```2024/10/09```: We have publicly released the [technical documentation](https://zhipu-ai.feishu.cn/wiki/DHCjw1TrJiTyeukfc9RceoSRnCh) for CogVideoX fine-tuning on Feishu, further increasing distribution flexibility. All examples in the public documentation can be fully reproduced.
- 🔥🔥 **News**: ```2024/9/25```: CogVideoX web demo is available on Replicate. Try the text-to-video model **CogVideoX-5B** here [](https://replicate.com/chenxwh/cogvideox-t2v) and image-to-video model **CogVideoX-5B-I2V** here [](https://replicate.com/chenxwh/cogvideox-i2v).
diff --git a/README_ja.md b/README_ja.md
index 6d156c4..dc27b76 100644
--- a/README_ja.md
+++ b/README_ja.md
@@ -24,6 +24,9 @@ CogVideoXの微調整方法は、SFTとLoRA微調整に分かれており、公
## 更新とニュース
+
+- 🔥🔥 **ニュース**: ```2024/10/10```: 技術報告書を更新し、より詳細なトレーニング情報とデモを追加しました。
+
- 🔥🔥 **ニュース**: ```2024/10/09```: 飛書の[技術ドキュメント](https://zhipu-ai.feishu.cn/wiki/DHCjw1TrJiTyeukfc9RceoSRnCh)でCogVideoXの微調整ガイドを公開しています。分配の自由度をさらに高めるため、公開されているドキュメント内のすべての例が完全に再現可能です。
- 🔥🔥 **ニュース**: ```2024/9/19```: CogVideoXシリーズの画像生成ビデオモデル **CogVideoX-5B-I2V**
diff --git a/README_zh.md b/README_zh.md
index 01feaa9..bf6a2d0 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -28,6 +28,8 @@ CogVideoX微调方式分为SFT和lora微调,在我们公开的数据处理的
## 项目更新
+- 🔥🔥 **News**: ```2024/10/10```: 我们更新了我们的技术报告,附上了更多的训练细节和demo
+
- 🔥🔥 **News**: ```2024/10/09```: 我们在飞书[技术文档](https://zhipu-ai.feishu.cn/wiki/DHCjw1TrJiTyeukfc9RceoSRnCh")公开CogVideoX微调指导,以进一步增加分发自由度,公开文档中所有示例可以完全复现
- 🔥🔥 **News**: ```2024/9/19```: 我们开源 CogVideoX 系列图生视频模型 **CogVideoX-5B-I2V**
。该模型可以将一张图像作为背景输入,结合提示词一起生成视频,具有更强的可控性。