""" Stem Enhancement Service Separates audio into stems (vocals, drums, bass, other) and enhances each independently """ import os import logging import numpy as np import soundfile as sf from typing import Optional, Dict from pathlib import Path logger = logging.getLogger(__name__) class StemEnhancementService: """Service for stem separation and per-stem enhancement""" def __init__(self): """Initialize stem enhancement service""" self.separator = None self.model_loaded = False logger.info("Stem enhancement service initialized") def _load_model(self): """Lazy load Demucs model (1.3GB download on first use)""" if self.model_loaded: return try: logger.info("Loading Demucs model (htdemucs_ft)...") import demucs.api # Use htdemucs_ft - best quality for music self.separator = demucs.api.Separator(model="htdemucs_ft") self.model_loaded = True logger.info("Demucs model loaded successfully") except Exception as e: logger.error(f"Failed to load Demucs model: {e}", exc_info=True) raise def enhance_clip( self, audio_path: str, output_path: Optional[str] = None, enhancement_level: str = "balanced" ) -> str: """ Enhance audio quality through stem separation and processing Args: audio_path: Input audio file path output_path: Output audio file path (optional) enhancement_level: 'fast', 'balanced', or 'maximum' Returns: Path to enhanced audio file """ try: logger.info(f"Starting stem enhancement: {audio_path} (level: {enhancement_level})") # Load model if not already loaded self._load_model() # Generate output path if not provided if output_path is None: base, ext = os.path.splitext(audio_path) output_path = f"{base}_enhanced{ext}" # Load audio logger.info(f"Loading audio from: {audio_path}") audio, sr = sf.read(audio_path) # Ensure audio is in correct format for Demucs (2D array, stereo) if audio.ndim == 1: # Mono to stereo audio = np.stack([audio, audio], axis=1) # Separate stems logger.info("Separating stems with Demucs...") origin, stems = self.separator.separate_audio_file(audio_path) # stems is a dict: {'vocals': ndarray, 'drums': ndarray, 'bass': ndarray, 'other': ndarray} logger.info(f"Stems separated: {list(stems.keys())}") # Process each stem based on enhancement level if enhancement_level == "fast": # Fast mode: minimal processing, just denoise vocals vocals_enhanced = self._denoise_stem(stems['vocals'], sr, intensity=0.5) drums_enhanced = stems['drums'] bass_enhanced = stems['bass'] other_enhanced = stems['other'] elif enhancement_level == "balanced": # Balanced mode: denoise + basic processing vocals_enhanced = self._enhance_vocals(stems['vocals'], sr, aggressive=False) drums_enhanced = self._enhance_drums(stems['drums'], sr, aggressive=False) bass_enhanced = self._enhance_bass(stems['bass'], sr, aggressive=False) other_enhanced = self._denoise_stem(stems['other'], sr, intensity=0.5) else: # maximum # Maximum mode: full processing vocals_enhanced = self._enhance_vocals(stems['vocals'], sr, aggressive=True) drums_enhanced = self._enhance_drums(stems['drums'], sr, aggressive=True) bass_enhanced = self._enhance_bass(stems['bass'], sr, aggressive=True) other_enhanced = self._enhance_other(stems['other'], sr) # Reassemble stems logger.info("Reassembling enhanced stems...") enhanced_audio = ( vocals_enhanced + drums_enhanced + bass_enhanced + other_enhanced ) # Normalize to prevent clipping max_val = np.abs(enhanced_audio).max() if max_val > 0: enhanced_audio = enhanced_audio / max_val * 0.95 # Save enhanced audio logger.info(f"Saving enhanced audio to: {output_path}") sf.write(output_path, enhanced_audio, sr) logger.info(f"Stem enhancement complete: {output_path}") return output_path except Exception as e: logger.error(f"Stem enhancement failed: {e}", exc_info=True) # Return original if enhancement fails return audio_path def _denoise_stem(self, stem: np.ndarray, sr: int, intensity: float = 1.0) -> np.ndarray: """ Apply noise reduction to a stem Args: stem: Audio stem (ndarray) sr: Sample rate intensity: Denoising intensity (0-1) Returns: Denoised stem """ try: import noisereduce as nr # Handle stereo if stem.ndim == 2: # Process each channel denoised = np.zeros_like(stem) for ch in range(stem.shape[1]): denoised[:, ch] = nr.reduce_noise( y=stem[:, ch], sr=sr, stationary=True, prop_decrease=intensity, freq_mask_smooth_hz=500, time_mask_smooth_ms=50 ) return denoised else: # Mono return nr.reduce_noise( y=stem, sr=sr, stationary=True, prop_decrease=intensity ) except Exception as e: logger.warning(f"Denoising failed: {e}, returning original stem") return stem def _enhance_vocals(self, vocals: np.ndarray, sr: int, aggressive: bool = False) -> np.ndarray: """ Enhance vocal stem (critical for LyricMind AI vocals) Args: vocals: Vocal stem sr: Sample rate aggressive: Use more aggressive processing Returns: Enhanced vocals """ try: # 1. Denoise (reduce AI artifacts) intensity = 1.0 if aggressive else 0.7 vocals_clean = self._denoise_stem(vocals, sr, intensity=intensity) # 2. Apply subtle compression and EQ with Pedalboard try: from pedalboard import Pedalboard, Compressor, HighShelfFilter, LowpassFilter board = Pedalboard([ # Remove very high frequencies (often artifacts) LowpassFilter(cutoff_frequency_hz=16000), # Subtle compression for consistency Compressor( threshold_db=-20, ratio=3 if aggressive else 2, attack_ms=5, release_ms=50 ), # Add air HighShelfFilter(cutoff_frequency_hz=8000, gain_db=2 if aggressive else 1) ]) vocals_processed = board(vocals_clean, sr) logger.info(f"Vocals enhanced (aggressive={aggressive})") return vocals_processed except Exception as e: logger.warning(f"Pedalboard processing failed: {e}, using denoised only") return vocals_clean except Exception as e: logger.error(f"Vocal enhancement failed: {e}", exc_info=True) return vocals def _enhance_drums(self, drums: np.ndarray, sr: int, aggressive: bool = False) -> np.ndarray: """ Enhance drum stem Args: drums: Drum stem sr: Sample rate aggressive: Use more aggressive processing Returns: Enhanced drums """ try: from pedalboard import Pedalboard, NoiseGate, Compressor board = Pedalboard([ # Gate to clean up between hits NoiseGate( threshold_db=-40 if aggressive else -35, ratio=10, attack_ms=1, release_ms=100 ), # Compression for punch Compressor( threshold_db=-15, ratio=4 if aggressive else 3, attack_ms=10, release_ms=100 ) ]) drums_processed = board(drums, sr) logger.info(f"Drums enhanced (aggressive={aggressive})") return drums_processed except Exception as e: logger.warning(f"Drum enhancement failed: {e}, returning original") return drums def _enhance_bass(self, bass: np.ndarray, sr: int, aggressive: bool = False) -> np.ndarray: """ Enhance bass stem Args: bass: Bass stem sr: Sample rate aggressive: Use more aggressive processing Returns: Enhanced bass """ try: from pedalboard import Pedalboard, HighpassFilter, Compressor board = Pedalboard([ # Remove sub-bass rumble HighpassFilter(cutoff_frequency_hz=30), # Compression for consistency Compressor( threshold_db=-18, ratio=3 if aggressive else 2.5, attack_ms=30, release_ms=200 ) ]) bass_processed = board(bass, sr) logger.info(f"Bass enhanced (aggressive={aggressive})") return bass_processed except Exception as e: logger.warning(f"Bass enhancement failed: {e}, returning original") return bass def _enhance_other(self, other: np.ndarray, sr: int) -> np.ndarray: """ Enhance other instruments stem Args: other: Other instruments stem sr: Sample rate Returns: Enhanced other stem """ try: # Spectral cleanup with moderate denoising other_clean = self._denoise_stem(other, sr, intensity=0.5) logger.info("Other instruments enhanced") return other_clean except Exception as e: logger.warning(f"Other enhancement failed: {e}, returning original") return other