feat:text_bert and audio_hubert exports are ready and fully tested, todo:solve dependancy in playground runs

2025-09-29 00:30:15 +08:00 · 2025-08-19 00:05:45 -04:00 · 2025-08-19 00:05:45 -04:00 · aef9d26580
commit aef9d26580
parent 4e42a28f9c
3 changed files with 328 additions and 1 deletions
--- a/.gitignore
+++ b/.gitignore
@ -194,4 +194,5 @@ cython_debug/
 # PyPI configuration file
 .pypirc
 onnx/
-*.onnx
+*.onnx
+tokenizer.json
--- a/playground/export_bert.py
+++ b/playground/export_bert.py
@ -0,0 +1,156 @@
+import torch
+import torch.nn as nn
+from transformers import AutoTokenizer, AutoModelForMaskedLM
+import onnx
+import onnxruntime as ort
+from typing import Dict, Any
+import argparse
+import os
+import shutil
+import numpy as np
+
+class CombinedBERTModel(nn.Module):
+    """Wrapper class that combines BERT tokenizer preprocessing and model inference."""
+    
+    def __init__(self, model_name: str):
+        super().__init__()
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        self.model = AutoModelForMaskedLM.from_pretrained(model_name)
+        
+    def forward(self, text_input: torch.Tensor):
+        """Forward pass that includes tokenization and model inference."""
+        # Note: For ONNX export, we'll work with pre-tokenized input_ids
+        # In practice, text tokenization needs to happen outside ONNX
+        input_ids = text_input.long()
+        
+        outputs = self.model(input_ids=input_ids, output_hidden_states=True)
+        return torch.cat(outputs["hidden_states"][-3:-2], -1)[0].cpu()[1:-1]
+
+def export_bert_to_onnx(
+    model_name: str = "bert-base-uncased",
+    output_dir: str = "bert_exported",
+    max_seq_length: int = 512
+):
+    """Export BERT model to ONNX format and copy tokenizer files."""
+    
+    # Create output directory
+    os.makedirs(output_dir, exist_ok=True)
+    
+    print(f"Loading model: {model_name}")
+    combined_model = CombinedBERTModel(model_name)
+    combined_model.eval()
+    
+    # Create dummy inputs for ONNX export (pre-tokenized input_ids)
+    batch_size = 1
+    dummy_input_ids = torch.randint(0, combined_model.tokenizer.vocab_size, (batch_size, max_seq_length))
+    
+    # Export to ONNX
+    onnx_path = os.path.join(output_dir, "chinese-roberta-wwm-ext-large.onnx")
+    print(f"Exporting to ONNX: {onnx_path}")
+    torch.onnx.export(
+        combined_model,
+        dummy_input_ids,
+        onnx_path,
+        export_params=True,
+        opset_version=14,
+        do_constant_folding=True,
+        input_names=['input_ids'],
+        output_names=['logits'],
+        dynamic_axes={
+            'input_ids': {0: 'batch_size', 1: 'sequence_length'},
+            'logits': {0: 'batch_size', 1: 'sequence_length'}
+        }
+    )
+    
+    # Copy tokenizer.json if it exists
+    tokenizer_cache_dir = combined_model.tokenizer.name_or_path
+    if os.path.isdir(tokenizer_cache_dir):
+        tokenizer_json_path = os.path.join(tokenizer_cache_dir, "tokenizer.json")
+    else:
+        # For models from HuggingFace cache
+        from transformers import cached_path
+        try:
+            tokenizer_json_path = combined_model.tokenizer._tokenizer.model_path
+        except:
+            # Alternative approach to find tokenizer.json in cache
+            cache_dir = os.path.expanduser("~/.cache/huggingface/transformers")
+            tokenizer_json_path = None
+            for root, dirs, files in os.walk(cache_dir):
+                if "tokenizer.json" in files and model_name.replace("/", "--") in root:
+                    tokenizer_json_path = os.path.join(root, "tokenizer.json")
+                    break
+    
+    if tokenizer_json_path and os.path.exists(tokenizer_json_path):
+        dest_tokenizer_path = os.path.join(output_dir, "tokenizer.json")
+        shutil.copy2(tokenizer_json_path, dest_tokenizer_path)
+        print(f"Copied tokenizer.json to: {dest_tokenizer_path}")
+    else:
+        print("Warning: tokenizer.json not found")
+    
+    print(f"Model exported successfully to: {output_dir}")
+    return combined_model, onnx_path
+
+def test_model_equivalence(original_model, onnx_path: str, max_seq_length: int = 512, tolerance: float = 1e-5):
+    """Test if the original PyTorch model and ONNX model produce the same outputs."""
+    
+    print("Testing model equivalence...")
+    
+    # Create test input
+    batch_size = 1
+    test_input_ids = torch.randint(0, original_model.tokenizer.vocab_size, (batch_size, max_seq_length))
+    
+    # Get PyTorch output
+    original_model.eval()
+    with torch.no_grad():
+        pytorch_output = original_model(test_input_ids).numpy()
+    
+    # Get ONNX output
+    ort_session = ort.InferenceSession(onnx_path)
+    onnx_output = ort_session.run(None, {"input_ids": test_input_ids.numpy()})[0]
+    
+    # Compare outputs
+    max_diff = np.max(np.abs(pytorch_output - onnx_output))
+    mean_diff = np.mean(np.abs(pytorch_output - onnx_output))
+    
+    print(f"Maximum absolute difference: {max_diff}")
+    print(f"Mean absolute difference: {mean_diff}")
+    
+    if max_diff < tolerance:
+        print("✅ Models are numerically equivalent!")
+        return True
+    else:
+        print("❌ Models have significant differences!")
+        return False
+
+def main():
+    parser = argparse.ArgumentParser(description="Export BERT model to ONNX")
+    parser.add_argument("--model_name", type=str, default="GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large", 
+                       help="Pretrained BERT model name")
+    parser.add_argument("--output_dir", type=str, default="playground/bert",
+                       help="Output directory path")
+    parser.add_argument("--max_seq_length", type=int, default=512,
+                       help="Maximum sequence length")
+    parser.add_argument("--tolerance", type=float, default=1e-3,
+                       help="Tolerance for numerical comparison")
+    
+    args = parser.parse_args()
+
+    os.makedirs(args.output_dir, exist_ok=True)
+
+    # Export model
+    original_model, onnx_path = export_bert_to_onnx(
+        model_name=args.model_name,
+        output_dir=args.output_dir,
+        max_seq_length=args.max_seq_length
+    )
+    
+    # Test equivalence
+    test_model_equivalence(
+        original_model=original_model,
+        onnx_path=onnx_path,
+        max_seq_length=args.max_seq_length,
+        tolerance=args.tolerance
+    )
+
+if __name__ == "__main__":
+    main()
--- a/playground/export_hubert.py
+++ b/playground/export_hubert.py
@ -0,0 +1,170 @@
+import os
+import sys
+import torch
+import torchaudio
+import onnxruntime as ort
+import numpy as np
+import argparse
+from transformers import HubertModel, HubertConfig
+
+
+class HubertONNXExporter:
+    """Export and test HuBERT model to ONNX format"""
+    
+    def __init__(self, model_path="GPT_SoVITS/pretrained_models/chinese-hubert-base", output_path="playground/hubert/chinese-hubert-base.onnx"):
+        self.model_path = model_path
+        self.onnx_path = output_path
+        self.model = None
+        self.config = None
+        
+    def setup_model(self):
+        """Configure and load the HuBERT model for ONNX export"""
+        # Configure for better ONNX compatibility
+        self.config = HubertConfig.from_pretrained(self.model_path)
+        self.config._attn_implementation = "eager"  # Use standard attention
+        self.config.apply_spec_augment = False      # Disable masking for inference
+        self.config.layerdrop = 0.0                 # Disable layer dropout
+        
+        # Load the model
+        self.model = HubertModel.from_pretrained(
+            self.model_path, 
+            config=self.config, 
+            local_files_only=True
+        )
+        self.model.eval()
+        
+    def export_to_onnx(self, dummy_length=16000):
+        """Export the model to ONNX format"""
+        if self.model is None:
+            raise ValueError("Model not initialized. Call setup_model() first.")
+            
+        # Create dummy input (1 second at 16kHz)
+        dummy_input = torch.rand(1, dummy_length, dtype=torch.float32) - 0.5
+        
+        # Export to ONNX
+        torch.onnx.export(
+            self.model,
+            dummy_input,
+            self.onnx_path,
+            export_params=True,
+            opset_version=11,
+            do_constant_folding=True,
+            input_names=['audio16k'],
+            output_names=['last_hidden_state'],
+            dynamic_axes={
+                'audio16k': {0: 'batch_size', 1: 'sequence_length'},
+                'last_hidden_state': {0: 'batch_size', 1: 'sequence_length'}
+            }
+        )
+        print(f"[Success] Model exported to {self.onnx_path}")
+        
+    def test_onnx_export_exists(self):
+        """Test that the ONNX model file was created"""
+        if os.path.exists(self.onnx_path):
+            print(f"[Success] ONNX model file exists at {self.onnx_path}")
+            return True
+        else:
+            print(f"[Error] ONNX model not found at {self.onnx_path}")
+            return False
+            
+    def _load_and_preprocess_audio(self, audio_path, max_length=32000):
+        """Load and preprocess audio file"""
+        waveform, sample_rate = torchaudio.load(audio_path)
+        
+        # Resample to 16kHz if needed
+        if sample_rate != 16000:
+            resampler = torchaudio.transforms.Resample(sample_rate, 16000)
+            waveform = resampler(waveform)
+        
+        # Take first channel
+        if waveform.shape[0] > 1:
+            waveform = waveform[0:1]
+        
+        # Limit length for testing (2 seconds at 16kHz)
+        if waveform.shape[1] > max_length:
+            waveform = waveform[:, :max_length]
+            
+        return waveform
+        
+    def test_torch_vs_onnx(self, audio_path="playground/ref/audio.wav"):
+        """Test that ONNX model outputs match PyTorch model outputs"""
+        if not os.path.exists(audio_path):
+            print(f"[Skip] Test audio file not found at {audio_path}")
+            return False
+            
+        if self.model is None:
+            raise ValueError("Model not initialized. Call setup_model() first.")
+            
+        # Load and preprocess audio
+        waveform = self._load_and_preprocess_audio(audio_path)
+        
+        # PyTorch inference
+        with torch.no_grad():
+            torch_output = self.model(waveform)
+            torch_hidden_states = torch_output.last_hidden_state
+        
+        # ONNX inference
+        ort_session = ort.InferenceSession(self.onnx_path)
+        input_values = waveform.numpy().astype(np.float32)
+        ort_inputs = {ort_session.get_inputs()[0].name: input_values}
+        ort_outputs = ort_session.run(None, ort_inputs)
+        onnx_hidden_states = ort_outputs[0]
+        
+        # Compare outputs
+        torch_numpy = torch_hidden_states.numpy()
+        diff = np.abs(torch_numpy - onnx_hidden_states).mean()
+        
+        success = diff <= 1e-5
+        status = "[Success]" if success else "[Fail]"
+        
+        print(f"{status} ONNX vs PyTorch comparison")
+        print(f" > mean_difference={diff}")
+        print(f" > torch_shape={torch_numpy.shape}")
+        print(f" > onnx_shape={onnx_hidden_states.shape}")
+        
+        return success
+        
+    def run_full_export_and_test(self):
+        """Run the complete export and testing pipeline"""
+        print("Starting HuBERT ONNX export and testing...")
+        
+        # Create output directory if it doesn't exist
+        os.makedirs(os.path.dirname(self.onnx_path), exist_ok=True)
+        
+        # Setup model
+        self.setup_model()
+        
+        # Export to ONNX
+        self.export_to_onnx()
+        
+        # Test export
+        self.test_onnx_export_exists()
+        self.test_torch_vs_onnx()
+        
+        print("Export and testing complete!")
+
+
+def main():
+    """Main execution function"""
+    parser = argparse.ArgumentParser(description="Export HuBERT model to ONNX format")
+    parser.add_argument(
+        "--model_path", 
+        type=str, 
+        default="GPT_SoVITS/pretrained_models/chinese-hubert-base",
+        help="Path to the HuBERT model directory (default: GPT_SoVITS/pretrained_models/chinese-hubert-base)"
+    )
+    parser.add_argument(
+        "--output_path", 
+        type=str, 
+        default="playground/hubert/chinese-hubert-base.onnx",
+        help="Output path for the ONNX model (default: playground/hubert/chinese-hubert-base.onnx)"
+    )
+    
+    args = parser.parse_args()
+    
+    exporter = HubertONNXExporter(model_path=args.model_path, output_path=args.output_path)
+    exporter.run_full_export_and_test()
+
+
+if __name__ == "__main__":
+    main()