diff --git a/GPT_SoVITS/BigVGAN/configs/bigvgan_22khz_80band.json b/GPT_SoVITS/BigVGAN/configs/bigvgan_22khz_80band.json index 8b13789..64bca78 100644 --- a/GPT_SoVITS/BigVGAN/configs/bigvgan_22khz_80band.json +++ b/GPT_SoVITS/BigVGAN/configs/bigvgan_22khz_80band.json @@ -1 +1,45 @@ +{ + "resblock": "1", + "num_gpus": 0, + "batch_size": 32, + "learning_rate": 0.0001, + "adam_b1": 0.8, + "adam_b2": 0.99, + "lr_decay": 0.9999996, + "seed": 1234, + "upsample_rates": [4,4,2,2,2,2], + "upsample_kernel_sizes": [8,8,4,4,4,4], + "upsample_initial_channel": 1536, + "resblock_kernel_sizes": [3,7,11], + "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]], + + "activation": "snakebeta", + "snake_logscale": true, + + "resolutions": [[1024, 120, 600], [2048, 240, 1200], [512, 50, 240]], + "mpd_reshapes": [2, 3, 5, 7, 11], + "use_spectral_norm": false, + "discriminator_channel_mult": 1, + + "segment_size": 8192, + "num_mels": 80, + "num_freq": 1025, + "n_fft": 1024, + "hop_size": 256, + "win_size": 1024, + + "sampling_rate": 22050, + + "fmin": 0, + "fmax": 8000, + "fmax_for_loss": null, + + "num_workers": 4, + + "dist_config": { + "dist_backend": "nccl", + "dist_url": "tcp://localhost:54321", + "world_size": 1 + } +} diff --git a/GPT_SoVITS/BigVGAN/configs/bigvgan_24khz_100band.json b/GPT_SoVITS/BigVGAN/configs/bigvgan_24khz_100band.json new file mode 100644 index 0000000..e7f7ff0 --- /dev/null +++ b/GPT_SoVITS/BigVGAN/configs/bigvgan_24khz_100band.json @@ -0,0 +1,45 @@ +{ + "resblock": "1", + "num_gpus": 0, + "batch_size": 32, + "learning_rate": 0.0001, + "adam_b1": 0.8, + "adam_b2": 0.99, + "lr_decay": 0.9999996, + "seed": 1234, + + "upsample_rates": [4,4,2,2,2,2], + "upsample_kernel_sizes": [8,8,4,4,4,4], + "upsample_initial_channel": 1536, + "resblock_kernel_sizes": [3,7,11], + "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]], + + "activation": "snakebeta", + "snake_logscale": true, + + "resolutions": [[1024, 120, 600], [2048, 240, 1200], [512, 50, 240]], + "mpd_reshapes": [2, 3, 5, 7, 11], + "use_spectral_norm": false, + "discriminator_channel_mult": 1, + + "segment_size": 8192, + "num_mels": 100, + "num_freq": 1025, + "n_fft": 1024, + "hop_size": 256, + "win_size": 1024, + + "sampling_rate": 24000, + + "fmin": 0, + "fmax": 12000, + "fmax_for_loss": null, + + "num_workers": 4, + + "dist_config": { + "dist_backend": "nccl", + "dist_url": "tcp://localhost:54321", + "world_size": 1 + } +} diff --git a/GPT_SoVITS/BigVGAN/configs/bigvgan_base_22khz_80band.json b/GPT_SoVITS/BigVGAN/configs/bigvgan_base_22khz_80band.json new file mode 100644 index 0000000..fd24484 --- /dev/null +++ b/GPT_SoVITS/BigVGAN/configs/bigvgan_base_22khz_80band.json @@ -0,0 +1,45 @@ +{ + "resblock": "1", + "num_gpus": 0, + "batch_size": 32, + "learning_rate": 0.0001, + "adam_b1": 0.8, + "adam_b2": 0.99, + "lr_decay": 0.9999996, + "seed": 1234, + + "upsample_rates": [8,8,2,2], + "upsample_kernel_sizes": [16,16,4,4], + "upsample_initial_channel": 512, + "resblock_kernel_sizes": [3,7,11], + "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]], + + "activation": "snakebeta", + "snake_logscale": true, + + "resolutions": [[1024, 120, 600], [2048, 240, 1200], [512, 50, 240]], + "mpd_reshapes": [2, 3, 5, 7, 11], + "use_spectral_norm": false, + "discriminator_channel_mult": 1, + + "segment_size": 8192, + "num_mels": 80, + "num_freq": 1025, + "n_fft": 1024, + "hop_size": 256, + "win_size": 1024, + + "sampling_rate": 22050, + + "fmin": 0, + "fmax": 8000, + "fmax_for_loss": null, + + "num_workers": 4, + + "dist_config": { + "dist_backend": "nccl", + "dist_url": "tcp://localhost:54321", + "world_size": 1 + } +} diff --git a/GPT_SoVITS/BigVGAN/configs/bigvgan_base_24khz_100band.json b/GPT_SoVITS/BigVGAN/configs/bigvgan_base_24khz_100band.json new file mode 100644 index 0000000..0911508 --- /dev/null +++ b/GPT_SoVITS/BigVGAN/configs/bigvgan_base_24khz_100band.json @@ -0,0 +1,45 @@ +{ + "resblock": "1", + "num_gpus": 0, + "batch_size": 32, + "learning_rate": 0.0001, + "adam_b1": 0.8, + "adam_b2": 0.99, + "lr_decay": 0.9999996, + "seed": 1234, + + "upsample_rates": [8,8,2,2], + "upsample_kernel_sizes": [16,16,4,4], + "upsample_initial_channel": 512, + "resblock_kernel_sizes": [3,7,11], + "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]], + + "activation": "snakebeta", + "snake_logscale": true, + + "resolutions": [[1024, 120, 600], [2048, 240, 1200], [512, 50, 240]], + "mpd_reshapes": [2, 3, 5, 7, 11], + "use_spectral_norm": false, + "discriminator_channel_mult": 1, + + "segment_size": 8192, + "num_mels": 100, + "num_freq": 1025, + "n_fft": 1024, + "hop_size": 256, + "win_size": 1024, + + "sampling_rate": 24000, + + "fmin": 0, + "fmax": 12000, + "fmax_for_loss": null, + + "num_workers": 4, + + "dist_config": { + "dist_backend": "nccl", + "dist_url": "tcp://localhost:54321", + "world_size": 1 + } +} diff --git a/GPT_SoVITS/BigVGAN/configs/bigvgan_v2_22khz_80band_256x.json b/GPT_SoVITS/BigVGAN/configs/bigvgan_v2_22khz_80band_256x.json new file mode 100644 index 0000000..e96bd5f --- /dev/null +++ b/GPT_SoVITS/BigVGAN/configs/bigvgan_v2_22khz_80band_256x.json @@ -0,0 +1,61 @@ +{ + "resblock": "1", + "num_gpus": 0, + "batch_size": 4, + "learning_rate": 0.0001, + "adam_b1": 0.8, + "adam_b2": 0.99, + "lr_decay": 0.9999996, + "seed": 1234, + + "upsample_rates": [4,4,2,2,2,2], + "upsample_kernel_sizes": [8,8,4,4,4,4], + "upsample_initial_channel": 1536, + "resblock_kernel_sizes": [3,7,11], + "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]], + + "use_tanh_at_final": false, + "use_bias_at_final": false, + + "activation": "snakebeta", + "snake_logscale": true, + + "use_cqtd_instead_of_mrd": true, + "cqtd_filters": 128, + "cqtd_max_filters": 1024, + "cqtd_filters_scale": 1, + "cqtd_dilations": [1, 2, 4], + "cqtd_hop_lengths": [512, 256, 256], + "cqtd_n_octaves": [9, 9, 9], + "cqtd_bins_per_octaves": [24, 36, 48], + + "mpd_reshapes": [2, 3, 5, 7, 11], + "use_spectral_norm": false, + "discriminator_channel_mult": 1, + + "use_multiscale_melloss": true, + "lambda_melloss": 15, + + "clip_grad_norm": 500, + + "segment_size": 65536, + "num_mels": 80, + "num_freq": 1025, + "n_fft": 1024, + "hop_size": 256, + "win_size": 1024, + + "sampling_rate": 22050, + + "fmin": 0, + "fmax": null, + "fmax_for_loss": null, + + "num_workers": 4, + + "dist_config": { + "dist_backend": "nccl", + "dist_url": "tcp://localhost:54321", + "world_size": 1 + } +} diff --git a/GPT_SoVITS/BigVGAN/configs/bigvgan_v2_22khz_80band_fmax8k_256x.json b/GPT_SoVITS/BigVGAN/configs/bigvgan_v2_22khz_80band_fmax8k_256x.json new file mode 100644 index 0000000..a3c9699 --- /dev/null +++ b/GPT_SoVITS/BigVGAN/configs/bigvgan_v2_22khz_80band_fmax8k_256x.json @@ -0,0 +1,61 @@ +{ + "resblock": "1", + "num_gpus": 0, + "batch_size": 4, + "learning_rate": 0.0001, + "adam_b1": 0.8, + "adam_b2": 0.99, + "lr_decay": 0.9999996, + "seed": 1234, + + "upsample_rates": [4,4,2,2,2,2], + "upsample_kernel_sizes": [8,8,4,4,4,4], + "upsample_initial_channel": 1536, + "resblock_kernel_sizes": [3,7,11], + "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]], + + "use_tanh_at_final": false, + "use_bias_at_final": false, + + "activation": "snakebeta", + "snake_logscale": true, + + "use_cqtd_instead_of_mrd": true, + "cqtd_filters": 128, + "cqtd_max_filters": 1024, + "cqtd_filters_scale": 1, + "cqtd_dilations": [1, 2, 4], + "cqtd_hop_lengths": [512, 256, 256], + "cqtd_n_octaves": [9, 9, 9], + "cqtd_bins_per_octaves": [24, 36, 48], + + "mpd_reshapes": [2, 3, 5, 7, 11], + "use_spectral_norm": false, + "discriminator_channel_mult": 1, + + "use_multiscale_melloss": true, + "lambda_melloss": 15, + + "clip_grad_norm": 500, + + "segment_size": 65536, + "num_mels": 80, + "num_freq": 1025, + "n_fft": 1024, + "hop_size": 256, + "win_size": 1024, + + "sampling_rate": 22050, + + "fmin": 0, + "fmax": 8000, + "fmax_for_loss": null, + + "num_workers": 4, + + "dist_config": { + "dist_backend": "nccl", + "dist_url": "tcp://localhost:54321", + "world_size": 1 + } +} diff --git a/GPT_SoVITS/BigVGAN/configs/bigvgan_v2_24khz_100band_256x.json b/GPT_SoVITS/BigVGAN/configs/bigvgan_v2_24khz_100band_256x.json new file mode 100644 index 0000000..8057ee2 --- /dev/null +++ b/GPT_SoVITS/BigVGAN/configs/bigvgan_v2_24khz_100band_256x.json @@ -0,0 +1,61 @@ +{ + "resblock": "1", + "num_gpus": 0, + "batch_size": 4, + "learning_rate": 0.0001, + "adam_b1": 0.8, + "adam_b2": 0.99, + "lr_decay": 0.9999996, + "seed": 1234, + + "upsample_rates": [4,4,2,2,2,2], + "upsample_kernel_sizes": [8,8,4,4,4,4], + "upsample_initial_channel": 1536, + "resblock_kernel_sizes": [3,7,11], + "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]], + + "use_tanh_at_final": false, + "use_bias_at_final": false, + + "activation": "snakebeta", + "snake_logscale": true, + + "use_cqtd_instead_of_mrd": true, + "cqtd_filters": 128, + "cqtd_max_filters": 1024, + "cqtd_filters_scale": 1, + "cqtd_dilations": [1, 2, 4], + "cqtd_hop_lengths": [512, 256, 256], + "cqtd_n_octaves": [9, 9, 9], + "cqtd_bins_per_octaves": [24, 36, 48], + + "mpd_reshapes": [2, 3, 5, 7, 11], + "use_spectral_norm": false, + "discriminator_channel_mult": 1, + + "use_multiscale_melloss": true, + "lambda_melloss": 15, + + "clip_grad_norm": 500, + + "segment_size": 65536, + "num_mels": 100, + "num_freq": 1025, + "n_fft": 1024, + "hop_size": 256, + "win_size": 1024, + + "sampling_rate": 24000, + + "fmin": 0, + "fmax": null, + "fmax_for_loss": null, + + "num_workers": 4, + + "dist_config": { + "dist_backend": "nccl", + "dist_url": "tcp://localhost:54321", + "world_size": 1 + } +} diff --git a/GPT_SoVITS/BigVGAN/configs/bigvgan_v2_44khz_128band_256x.json b/GPT_SoVITS/BigVGAN/configs/bigvgan_v2_44khz_128band_256x.json new file mode 100644 index 0000000..b6999d3 --- /dev/null +++ b/GPT_SoVITS/BigVGAN/configs/bigvgan_v2_44khz_128band_256x.json @@ -0,0 +1,61 @@ +{ + "resblock": "1", + "num_gpus": 0, + "batch_size": 4, + "learning_rate": 0.0001, + "adam_b1": 0.8, + "adam_b2": 0.99, + "lr_decay": 0.9999996, + "seed": 1234, + + "upsample_rates": [4,4,2,2,2,2], + "upsample_kernel_sizes": [8,8,4,4,4,4], + "upsample_initial_channel": 1536, + "resblock_kernel_sizes": [3,7,11], + "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]], + + "use_tanh_at_final": false, + "use_bias_at_final": false, + + "activation": "snakebeta", + "snake_logscale": true, + + "use_cqtd_instead_of_mrd": true, + "cqtd_filters": 128, + "cqtd_max_filters": 1024, + "cqtd_filters_scale": 1, + "cqtd_dilations": [1, 2, 4], + "cqtd_hop_lengths": [512, 256, 256], + "cqtd_n_octaves": [9, 9, 9], + "cqtd_bins_per_octaves": [24, 36, 48], + + "mpd_reshapes": [2, 3, 5, 7, 11], + "use_spectral_norm": false, + "discriminator_channel_mult": 1, + + "use_multiscale_melloss": true, + "lambda_melloss": 15, + + "clip_grad_norm": 500, + + "segment_size": 65536, + "num_mels": 128, + "num_freq": 1025, + "n_fft": 1024, + "hop_size": 256, + "win_size": 1024, + + "sampling_rate": 44100, + + "fmin": 0, + "fmax": null, + "fmax_for_loss": null, + + "num_workers": 4, + + "dist_config": { + "dist_backend": "nccl", + "dist_url": "tcp://localhost:54321", + "world_size": 1 + } +} diff --git a/GPT_SoVITS/BigVGAN/configs/bigvgan_v2_44khz_128band_512x.json b/GPT_SoVITS/BigVGAN/configs/bigvgan_v2_44khz_128band_512x.json new file mode 100644 index 0000000..2d7176c --- /dev/null +++ b/GPT_SoVITS/BigVGAN/configs/bigvgan_v2_44khz_128band_512x.json @@ -0,0 +1,61 @@ +{ + "resblock": "1", + "num_gpus": 0, + "batch_size": 4, + "learning_rate": 0.0001, + "adam_b1": 0.8, + "adam_b2": 0.99, + "lr_decay": 0.9999996, + "seed": 1234, + + "upsample_rates": [8,4,2,2,2,2], + "upsample_kernel_sizes": [16,8,4,4,4,4], + "upsample_initial_channel": 1536, + "resblock_kernel_sizes": [3,7,11], + "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]], + + "use_tanh_at_final": false, + "use_bias_at_final": false, + + "activation": "snakebeta", + "snake_logscale": true, + + "use_cqtd_instead_of_mrd": true, + "cqtd_filters": 128, + "cqtd_max_filters": 1024, + "cqtd_filters_scale": 1, + "cqtd_dilations": [1, 2, 4], + "cqtd_hop_lengths": [512, 256, 256], + "cqtd_n_octaves": [9, 9, 9], + "cqtd_bins_per_octaves": [24, 36, 48], + + "mpd_reshapes": [2, 3, 5, 7, 11], + "use_spectral_norm": false, + "discriminator_channel_mult": 1, + + "use_multiscale_melloss": true, + "lambda_melloss": 15, + + "clip_grad_norm": 500, + + "segment_size": 65536, + "num_mels": 128, + "num_freq": 2049, + "n_fft": 2048, + "hop_size": 512, + "win_size": 2048, + + "sampling_rate": 44100, + + "fmin": 0, + "fmax": null, + "fmax_for_loss": null, + + "num_workers": 4, + + "dist_config": { + "dist_backend": "nccl", + "dist_url": "tcp://localhost:54321", + "world_size": 1 + } +}