Spaces:

Gamahea
/

lemm-test-100

Runtime error

Gamahea commited on 4 days ago

Commit

d5ccfff

1 Parent(s): b166366

Fix ZeroGPU compatibility - Dynamic device allocation

- Changed device initialization to always use CPU initially
- Device detection now happens inside @spaces.GPU decorated functions
- Models moved to GPU dynamically when ZeroGPU allocates resources
- Fixes 'CUDA driver initialization failed' error

Changes:
- DiffRhythmService: Dynamic device detection in _generate_with_diffrhythm2()
- LyricMindService: Dynamic device detection in _generate_with_model()
- Updated _tokenize_lyrics() to accept device parameter
- Added hf_oauth: true to README for HF authentication

Files changed (3) hide show

README.md +1 -0
backend/services/diffrhythm_service.py +24 -26
backend/services/lyricmind_service.py +17 -10

README.md CHANGED Viewed

@@ -8,6 +8,7 @@ sdk_version: "4.44.1"
 app_file: app.py
 pinned: false
 license: mit
 ---
 # LEMM - Let Everyone Make Music

 app_file: app.py
 pinned: false
 license: mit
+hf_oauth: true
 ---
 # LEMM - Let Everyone Make Music

backend/services/diffrhythm_service.py CHANGED Viewed

@@ -63,18 +63,10 @@ class DiffRhythmService:
         logger.info(f"Using device: {self.device}")
     def _get_device(self):
-        """Get compute device (CUDA or CPU)"""
-        # Try CUDA first (NVIDIA)
-        if torch.cuda.is_available():
-            logger.info("Using CUDA (NVIDIA GPU)")
-            return torch.device("cuda")
-        # Note: DirectML support disabled due to version conflicts with DiffRhythm2
-        # DiffRhythm2 requires torch>=2.4, but torch-directml requires torch==2.4.1
-        # For AMD GPU acceleration, consider using ROCm with compatible PyTorch build
-        # Fallback to CPU
-        logger.info("Using CPU (no GPU acceleration)")
         return torch.device("cpu")
     def _initialize_model(self):
@@ -278,19 +270,21 @@ class DiffRhythmService:
         try:
             logger.info("Generating with DiffRhythm 2 model...")
-            # Move models to GPU (for ZeroGPU compatibility)
-            # This ensures models are on GPU only within the decorated function
-            if self.device.type != 'cpu':
-                self.model = self.model.to(self.device)
-                self.mulan = self.mulan.to(self.device)
-                self.decoder = self.decoder.to(self.device)
             # Prepare lyrics tokens
             if lyrics:
-                lyrics_token = self._tokenize_lyrics(lyrics)
             else:
                 # For instrumental, use empty structure
-                lyrics_token = torch.tensor([500, 511], dtype=torch.long, device=self.device)  # [start][stop]
             # Encode style prompt with optional reference audio blending
             with torch.no_grad():
@@ -303,7 +297,7 @@ class DiffRhythmService:
                             ref_waveform = torchaudio.functional.resample(ref_waveform, ref_sr, 24000)
                         # Encode reference audio with MuLan
-                        ref_waveform = ref_waveform.to(self.device)
                         audio_style_embed = self.mulan(audios=ref_waveform.unsqueeze(0))
                         text_style_embed = self.mulan(texts=[prompt])
@@ -316,10 +310,10 @@ class DiffRhythmService:
                 else:
                     style_prompt_embed = self.mulan(texts=[prompt])
-            style_prompt_embed = style_prompt_embed.to(self.device).squeeze(0)
             # Use FP16 if on GPU
-            if self.device.type != 'cpu':
                 self.model = self.model.half()
                 self.decoder = self.decoder.half()
                 style_prompt_embed = style_prompt_embed.half()
@@ -361,16 +355,20 @@ class DiffRhythmService:
             logger.error(f"DiffRhythm 2 generation failed: {str(e)}")
             return self._generate_placeholder(duration, sample_rate)
-    def _tokenize_lyrics(self, lyrics: str) -> torch.Tensor:
         """
         Tokenize lyrics for DiffRhythm 2
         Args:
             lyrics: Lyrics text
         Returns:
             Tokenized lyrics tensor
         """
         try:
             # Structure tags
             STRUCT_INFO = {
@@ -396,12 +394,12 @@ class DiffRhythmService:
             # Add structure: [start] + lyrics + [stop]
             lyrics_tokens = [STRUCT_INFO['[start]']] + tokens + [STRUCT_INFO['[stop]']]
-            return torch.tensor(lyrics_tokens, dtype=torch.long, device=self.device)
         except Exception as e:
             logger.error(f"Lyrics tokenization failed: {str(e)}")
             # Return minimal structure
-            return torch.tensor([500, 511], dtype=torch.long, device=self.device)
     def _generate_placeholder(self, duration: int, sample_rate: int) -> np.ndarray:
         """

         logger.info(f"Using device: {self.device}")
     def _get_device(self):
+        """Get compute device - for ZeroGPU, always start with CPU"""
+        # For ZeroGPU Spaces, device allocation happens dynamically inside @spaces.GPU functions
+        # Always return CPU here - GPU allocation is handled by the decorator
+        logger.info("Using CPU for initialization (GPU allocated by @spaces.GPU decorator)")
         return torch.device("cpu")
     def _initialize_model(self):
         try:
             logger.info("Generating with DiffRhythm 2 model...")
+            # For ZeroGPU, dynamically detect device inside GPU-decorated function
+            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+            logger.info(f"GPU-decorated function using device: {device}")
+            # Move models to detected device (GPU if available via ZeroGPU)
+            self.model = self.model.to(device)
+            self.mulan = self.mulan.to(device)
+            self.decoder = self.decoder.to(device)
             # Prepare lyrics tokens
             if lyrics:
+                lyrics_token = self._tokenize_lyrics(lyrics, device)
             else:
                 # For instrumental, use empty structure
+                lyrics_token = torch.tensor([500, 511], dtype=torch.long, device=device)  # [start][stop]
             # Encode style prompt with optional reference audio blending
             with torch.no_grad():
                             ref_waveform = torchaudio.functional.resample(ref_waveform, ref_sr, 24000)
                         # Encode reference audio with MuLan
+                        ref_waveform = ref_waveform.to(device)
                         audio_style_embed = self.mulan(audios=ref_waveform.unsqueeze(0))
                         text_style_embed = self.mulan(texts=[prompt])
                 else:
                     style_prompt_embed = self.mulan(texts=[prompt])
+            style_prompt_embed = style_prompt_embed.to(device).squeeze(0)
             # Use FP16 if on GPU
+            if device.type != 'cpu':
                 self.model = self.model.half()
                 self.decoder = self.decoder.half()
                 style_prompt_embed = style_prompt_embed.half()
             logger.error(f"DiffRhythm 2 generation failed: {str(e)}")
             return self._generate_placeholder(duration, sample_rate)
+    def _tokenize_lyrics(self, lyrics: str, device: torch.device = None) -> torch.Tensor:
         """
         Tokenize lyrics for DiffRhythm 2
         Args:
             lyrics: Lyrics text
+            device: Target device for tensor
         Returns:
             Tokenized lyrics tensor
         """
+        if device is None:
+            device = torch.device("cpu")
         try:
             # Structure tags
             STRUCT_INFO = {
             # Add structure: [start] + lyrics + [stop]
             lyrics_tokens = [STRUCT_INFO['[start]']] + tokens + [STRUCT_INFO['[stop]']]
+            return torch.tensor(lyrics_tokens, dtype=torch.long, device=device)
         except Exception as e:
             logger.error(f"Lyrics tokenization failed: {str(e)}")
             # Return minimal structure
+            return torch.tensor([500, 511], dtype=torch.long, device=device)
     def _generate_placeholder(self, duration: int, sample_rate: int) -> np.ndarray:
         """

backend/services/lyricmind_service.py CHANGED Viewed

@@ -27,12 +27,11 @@ class LyricMindService:
         logger.info(f"Using device: {self.device}")
     def _get_device(self):
-        """Get compute device (AMD GPU via DirectML or CPU)"""
-        try:
-            from utils.amd_gpu import DEFAULT_DEVICE
-            return DEFAULT_DEVICE
-        except:
-            return torch.device("cpu")
     def _initialize_model(self):
         """Lazy load the model when first needed"""
@@ -54,10 +53,10 @@ class LyricMindService:
                     self.model = AutoModelForCausalLM.from_pretrained(
                         fallback_path,
                         trust_remote_code=True,
-                        torch_dtype=torch.float32  # Use FP32 for AMD GPU compatibility
                     )
-                    self.model.to(self.device)
-                    logger.info("✅ Text generation model loaded successfully")
                 else:
                     logger.warning("Text generation model not found, using placeholder")
@@ -148,6 +147,14 @@ class LyricMindService:
         try:
             logger.info("Generating lyrics with AI model...")
             # Create structured prompt with analysis context
             mood = analysis.get('mood', 'neutral')
             bpm = analysis.get('bpm', 120)
@@ -157,7 +164,7 @@ class LyricMindService:
             # Tokenize
             inputs = self.tokenizer(full_prompt, return_tensors="pt")
-            inputs = {k: v.to(self.device) for k, v in inputs.items()}
             # Calculate max length based on duration
             max_length = min(200 + inputs["input_ids"].shape[1], 512)

         logger.info(f"Using device: {self.device}")
     def _get_device(self):
+        """Get compute device - for ZeroGPU, always start with CPU"""
+        # For ZeroGPU Spaces, device allocation happens dynamically inside @spaces.GPU functions
+        # Always return CPU here - GPU allocation is handled by the decorator
+        logger.info("Using CPU for initialization (GPU allocated by @spaces.GPU decorator)")
+        return torch.device("cpu")
     def _initialize_model(self):
         """Lazy load the model when first needed"""
                     self.model = AutoModelForCausalLM.from_pretrained(
                         fallback_path,
                         trust_remote_code=True,
+                        torch_dtype=torch.float32  # Use FP32 for compatibility
                     )
+                    # Model stays on CPU initially - moved to GPU inside @spaces.GPU function
+                    logger.info("✅ Text generation model loaded successfully (on CPU)")
                 else:
                     logger.warning("Text generation model not found, using placeholder")
         try:
             logger.info("Generating lyrics with AI model...")
+            # Dynamically detect device (for ZeroGPU compatibility)
+            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+            logger.info(f"Using device for lyrics generation: {device}")
+            # Move model to device if not already there
+            if self.model.device != device:
+                self.model = self.model.to(device)
             # Create structured prompt with analysis context
             mood = analysis.get('mood', 'neutral')
             bpm = analysis.get('bpm', 120)
             # Tokenize
             inputs = self.tokenizer(full_prompt, return_tensors="pt")
+            inputs = {k: v.to(device) for k, v in inputs.items()}
             # Calculate max length based on duration
             max_length = min(200 + inputs["input_ids"].shape[1], 512)