diff --git a/GPT_SoVITS/configs/s2v2Pro.json b/GPT_SoVITS/configs/s2v2Pro.json new file mode 100644 index 0000000..4eaee80 --- /dev/null +++ b/GPT_SoVITS/configs/s2v2Pro.json @@ -0,0 +1,91 @@ +{ + "train": { + "log_interval": 100, + "eval_interval": 500, + "seed": 1234, + "epochs": 100, + "learning_rate": 0.0001, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 32, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 20480, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0, + "text_low_lr_rate": 0.4, + "grad_ckpt": false + }, + "data": { + "max_wav_value": 32768.0, + "sampling_rate": 32000, + "filter_length": 2048, + "hop_length": 640, + "win_length": 2048, + "n_mel_channels": 128, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 300, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.0, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 10, + 8, + 2, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 8, + 2, + 2 + ], + "n_layers_q": 3, + "use_spectral_norm": false, + "gin_channels": 1024, + "semantic_frame_rate": "25hz", + "freeze_quantizer": true + }, + "s2_ckpt_dir": "logs/s2/big2k1", + "content_module": "cnhubert" +} \ No newline at end of file diff --git a/GPT_SoVITS/configs/s2v2ProPlus.json b/GPT_SoVITS/configs/s2v2ProPlus.json new file mode 100644 index 0000000..37d8e16 --- /dev/null +++ b/GPT_SoVITS/configs/s2v2ProPlus.json @@ -0,0 +1,91 @@ +{ + "train": { + "log_interval": 100, + "eval_interval": 500, + "seed": 1234, + "epochs": 100, + "learning_rate": 0.0001, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 32, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 20480, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0, + "text_low_lr_rate": 0.4, + "grad_ckpt": false + }, + "data": { + "max_wav_value": 32768.0, + "sampling_rate": 32000, + "filter_length": 2048, + "hop_length": 640, + "win_length": 2048, + "n_mel_channels": 128, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 300, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.0, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 10, + 8, + 2, + 2, + 2 + ], + "upsample_initial_channel": 768, + "upsample_kernel_sizes": [ + 20, + 16, + 8, + 2, + 2 + ], + "n_layers_q": 3, + "use_spectral_norm": false, + "gin_channels": 1024, + "semantic_frame_rate": "25hz", + "freeze_quantizer": true + }, + "s2_ckpt_dir": "logs/s2/big2k1", + "content_module": "cnhubert" +} \ No newline at end of file