""" Music Generation Studio - HuggingFace Spaces Deployment Main application file for Gradio interface """ import os import sys import gradio as gr import logging from pathlib import Path import shutil import subprocess # Run DiffRhythm2 source setup if needed setup_script = Path(__file__).parent / "setup_diffrhythm2_src.sh" if setup_script.exists(): try: subprocess.run(["bash", str(setup_script)], check=True) except Exception as e: print(f"Warning: Failed to run setup script: {e}") # Configure environment for HuggingFace Spaces (espeak-ng paths, etc.) import hf_config # Setup paths for HuggingFace Spaces SPACE_DIR = Path(__file__).parent sys.path.insert(0, str(SPACE_DIR / 'backend')) # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) # Import services try: from services.diffrhythm_service import DiffRhythmService from services.lyricmind_service import LyricMindService from services.timeline_service import TimelineService from services.export_service import ExportService from config.settings import Config from utils.prompt_analyzer import PromptAnalyzer except ImportError as e: logger.error(f"Import error: {e}") raise # Initialize configuration config = Config() # Create necessary directories os.makedirs("outputs", exist_ok=True) os.makedirs("outputs/music", exist_ok=True) os.makedirs("outputs/mixed", exist_ok=True) os.makedirs("models", exist_ok=True) os.makedirs("logs", exist_ok=True) # Initialize services timeline_service = TimelineService() export_service = ExportService() # Lazy-load AI services (heavy models) diffrhythm_service = None lyricmind_service = None def get_diffrhythm_service(): """Lazy load DiffRhythm service""" global diffrhythm_service if diffrhythm_service is None: logger.info("Loading DiffRhythm2 model...") diffrhythm_service = DiffRhythmService(model_path=config.DIFFRHYTHM_MODEL_PATH) logger.info("DiffRhythm2 model loaded") return diffrhythm_service def get_lyricmind_service(): """Lazy load LyricMind service""" global lyricmind_service if lyricmind_service is None: logger.info("Loading LyricMind model...") lyricmind_service = LyricMindService(model_path=config.LYRICMIND_MODEL_PATH) logger.info("LyricMind model loaded") return lyricmind_service def generate_lyrics(prompt: str, duration: int, progress=gr.Progress()): """Generate lyrics from prompt using analysis""" try: if not prompt or not prompt.strip(): return "❌ Please enter a prompt" progress(0, desc="🔍 Analyzing prompt...") logger.info(f"Generating lyrics for: {prompt}") # Analyze prompt analysis = PromptAnalyzer.analyze(prompt) genre = analysis.get('genres', ['general'])[0] if analysis.get('genres') else 'general' mood = analysis.get('mood', 'unknown') logger.info(f"Analysis - Genre: {genre}, Mood: {mood}") progress(0.3, desc=f"âœī¸ Generating {genre} lyrics...") service = get_lyricmind_service() lyrics = service.generate( prompt=prompt, duration=duration, prompt_analysis=analysis ) progress(1.0, desc="✅ Lyrics generated!") return lyrics except Exception as e: logger.error(f"Error generating lyrics: {e}", exc_info=True) return f"❌ Error: {str(e)}" def generate_music(prompt: str, lyrics: str, lyrics_mode: str, duration: int, position: str, progress=gr.Progress()): """Generate music clip and add to timeline""" try: if not prompt or not prompt.strip(): return "❌ Please enter a music prompt", get_timeline_display(), None # Estimate time (CPU on HF Spaces) est_time = int(duration * 4) # Conservative estimate for CPU progress(0, desc=f"🔍 Analyzing prompt... (Est. {est_time}s)") logger.info(f"Generating music: {prompt}, mode={lyrics_mode}, duration={duration}s") # Analyze prompt analysis = PromptAnalyzer.analyze(prompt) genre = analysis.get('genres', ['general'])[0] if analysis.get('genres') else 'general' bpm = analysis.get('bpm', 120) mood = analysis.get('mood', 'neutral') logger.info(f"Analysis - Genre: {genre}, BPM: {bpm}, Mood: {mood}") # Determine lyrics based on mode lyrics_to_use = None if lyrics_mode == "Instrumental": logger.info("Generating instrumental (no vocals)") progress(0.1, desc=f"🎹 Preparing instrumental generation... ({est_time}s)") elif lyrics_mode == "User Lyrics": if not lyrics or not lyrics.strip(): return "❌ Please enter lyrics or switch mode", get_timeline_display(), None lyrics_to_use = lyrics.strip() logger.info("Using user-provided lyrics") progress(0.1, desc=f"🎤 Preparing vocal generation... ({est_time}s)") elif lyrics_mode == "Auto Lyrics": if lyrics and lyrics.strip(): lyrics_to_use = lyrics.strip() logger.info("Using existing lyrics from textbox") progress(0.1, desc=f"🎤 Using provided lyrics... ({est_time}s)") else: progress(0.1, desc="âœī¸ Generating lyrics...") logger.info("Auto-generating lyrics...") lyric_service = get_lyricmind_service() lyrics_to_use = lyric_service.generate( prompt=prompt, duration=duration, prompt_analysis=analysis ) logger.info(f"Generated {len(lyrics_to_use)} characters of lyrics") progress(0.25, desc=f"đŸŽĩ Lyrics ready, generating music... ({est_time}s)") # Generate music progress(0.3, desc=f"đŸŽŧ Generating {genre} at {bpm} BPM... ({est_time}s)") service = get_diffrhythm_service() final_path = service.generate( prompt=prompt, duration=duration, lyrics=lyrics_to_use ) # Add to timeline progress(0.9, desc="📊 Adding to timeline...") clip_id = os.path.basename(final_path).split('.')[0] from models.schemas import ClipPosition clip_info = timeline_service.add_clip( clip_id=clip_id, file_path=final_path, duration=float(duration), position=ClipPosition(position) ) logger.info(f"Music added to timeline at position {clip_info['timeline_position']}") # Build status message progress(1.0, desc="✅ Complete!") status_msg = f"✅ Music generated successfully!\n" status_msg += f"🎸 Genre: {genre} | đŸĨ BPM: {bpm} | 🎭 Mood: {mood}\n" status_msg += f"🎤 Mode: {lyrics_mode} | 📍 Position: {position}\n" if lyrics_mode == "Auto Lyrics" and lyrics_to_use and not lyrics: status_msg += "âœī¸ (Lyrics auto-generated)" return status_msg, get_timeline_display(), final_path except Exception as e: logger.error(f"Error generating music: {e}", exc_info=True) return f"❌ Error: {str(e)}", get_timeline_display(), None def get_timeline_display(): """Get timeline clips as formatted text""" clips = timeline_service.get_all_clips() if not clips: return "📭 Timeline is empty. Generate clips to get started!" total_duration = timeline_service.get_total_duration() display = f"**📊 Timeline ({len(clips)} clips, {format_duration(total_duration)} total)**\n\n" for i, clip in enumerate(clips, 1): display += f"**{i}.** `{clip['clip_id'][:12]}...` | " display += f"âąī¸ {format_duration(clip['duration'])} | " display += f"â–ļī¸ {format_duration(clip['start_time'])}\n" return display def remove_clip(clip_number: int): """Remove a clip from timeline""" try: clips = timeline_service.get_all_clips() if not clips: return "📭 Timeline is empty", get_timeline_display() if clip_number < 1 or clip_number > len(clips): return f"❌ Invalid clip number. Choose 1-{len(clips)}", get_timeline_display() clip_id = clips[clip_number - 1]['clip_id'] timeline_service.remove_clip(clip_id) return f"✅ Clip {clip_number} removed", get_timeline_display() except Exception as e: logger.error(f"Error removing clip: {e}", exc_info=True) return f"❌ Error: {str(e)}", get_timeline_display() def clear_timeline(): """Clear all clips from timeline""" try: timeline_service.clear() return "✅ Timeline cleared", get_timeline_display() except Exception as e: logger.error(f"Error clearing timeline: {e}", exc_info=True) return f"❌ Error: {str(e)}", get_timeline_display() def export_timeline(filename: str, export_format: str, progress=gr.Progress()): """Export timeline to audio file""" try: clips = timeline_service.get_all_clips() if not clips: return "❌ No clips to export", None if not filename or not filename.strip(): filename = "output" progress(0, desc="🔄 Merging clips...") logger.info(f"Exporting timeline: {filename}.{export_format}") export_service.timeline_service = timeline_service progress(0.5, desc="💾 Encoding audio...") output_path = export_service.merge_clips( filename=filename, export_format=export_format ) if output_path: progress(1.0, desc="✅ Export complete!") return f"✅ Exported: {os.path.basename(output_path)}", output_path else: return "❌ Export failed", None except Exception as e: logger.error(f"Error exporting: {e}", exc_info=True) return f"❌ Error: {str(e)}", None def format_duration(seconds: float) -> str: """Format duration as MM:SS""" mins = int(seconds // 60) secs = int(seconds % 60) return f"{mins}:{secs:02d}" # Create Gradio interface with gr.Blocks( title="đŸŽĩ Music Generation Studio", theme=gr.themes.Soft(primary_hue="purple", secondary_hue="pink") ) as app: gr.Markdown( """ # đŸŽĩ Music Generation Studio Create AI-powered music with DiffRhythm2 and LyricMind AI 💡 **Tip**: Start with 10-20 second clips for faster generation on HuggingFace Spaces """ ) with gr.Row(): # Left Column - Generation with gr.Column(scale=2): gr.Markdown("### đŸŽŧ Music Generation") prompt_input = gr.Textbox( label="đŸŽ¯ Music Prompt", placeholder="energetic rock song with electric guitar at 140 BPM", lines=3, info="Describe the music style, instruments, tempo, and mood" ) lyrics_mode = gr.Radio( choices=["Instrumental", "User Lyrics", "Auto Lyrics"], value="Instrumental", label="🎤 Vocal Mode", info="Instrumental: no vocals | User: provide lyrics | Auto: AI-generated" ) with gr.Row(): auto_gen_btn = gr.Button("âœī¸ Generate Lyrics", size="sm") lyrics_input = gr.Textbox( label="📝 Lyrics", placeholder="Enter lyrics or click 'Generate Lyrics'...", lines=6 ) with gr.Row(): duration_input = gr.Slider( minimum=10, maximum=60, value=20, step=5, label="âąī¸ Duration (seconds)", info="Shorter = faster generation" ) position_input = gr.Radio( choices=["intro", "previous", "next", "outro"], value="next", label="📍 Position" ) generate_btn = gr.Button( "✨ Generate Music Clip", variant="primary", size="lg" ) gen_status = gr.Textbox(label="📊 Status", lines=3, interactive=False) audio_output = gr.Audio(label="🎧 Preview", type="filepath") # Right Column - Timeline with gr.Column(scale=1): gr.Markdown("### 📊 Timeline") timeline_display = gr.Textbox( label="Clips", value=get_timeline_display(), lines=12, interactive=False ) with gr.Row(): clip_number_input = gr.Number( label="Clip #", precision=0, minimum=1, scale=1 ) remove_btn = gr.Button("đŸ—‘ī¸ Remove", size="sm", scale=1) clear_btn = gr.Button("đŸ—‘ī¸ Clear All", variant="stop") timeline_status = gr.Textbox(label="Status", lines=1, interactive=False) # Export Section gr.Markdown("---") gr.Markdown("### 💾 Export") with gr.Row(): export_filename = gr.Textbox( label="Filename", value="my_song", scale=2 ) export_format = gr.Dropdown( choices=["wav", "mp3"], value="wav", label="Format", scale=1 ) export_btn = gr.Button("💾 Export", variant="primary", scale=1) export_status = gr.Textbox(label="Status", lines=1, interactive=False) export_audio = gr.Audio(label="đŸ“Ĩ Download", type="filepath") # Event handlers auto_gen_btn.click( fn=generate_lyrics, inputs=[prompt_input, duration_input], outputs=lyrics_input ) generate_btn.click( fn=generate_music, inputs=[prompt_input, lyrics_input, lyrics_mode, duration_input, position_input], outputs=[gen_status, timeline_display, audio_output] ) remove_btn.click( fn=remove_clip, inputs=clip_number_input, outputs=[timeline_status, timeline_display] ) clear_btn.click( fn=clear_timeline, outputs=[timeline_status, timeline_display] ) export_btn.click( fn=export_timeline, inputs=[export_filename, export_format], outputs=[export_status, export_audio] ) # Help section with gr.Accordion("â„šī¸ Help & Tips", open=False): gr.Markdown( """ ## 🚀 Quick Start 1. **Enter a prompt**: "upbeat pop song with synth at 128 BPM" 2. **Choose mode**: Instrumental (fastest) or with vocals 3. **Set duration**: Start with 10-20s for quick results 4. **Generate**: Click the button and wait ~2-4 minutes 5. **Export**: Download your complete song ## ⚡ Performance Tips - **Shorter clips = faster**: 10-20s clips generate in ~1-2 minutes - **Instrumental mode**: ~30% faster than with vocals - **HF Spaces uses CPU**: Expect 2-4 minutes per 30s clip - **Build incrementally**: Generate short clips, then combine ## đŸŽ¯ Prompt Tips - **Be specific**: "energetic rock with distorted guitar" > "rock song" - **Include BPM**: "at 140 BPM" helps set tempo - **Mention instruments**: "with piano and drums" - **Describe mood**: "melancholic", "upbeat", "aggressive" ## 🎤 Vocal Modes - **Instrumental**: Pure music, no vocals (fastest) - **User Lyrics**: Provide your own lyrics - **Auto Lyrics**: AI generates lyrics based on prompt ## 📊 Timeline - Clips are arranged sequentially - Remove or clear clips as needed - Export combines all clips into one file --- âąī¸ **Average Generation Time**: 2-4 minutes per 30-second clip on CPU đŸŽĩ **Models**: DiffRhythm2 + MuQ-MuLan + LyricMind AI """ ) # Configure and launch if __name__ == "__main__": logger.info("đŸŽĩ Starting Music Generation Studio on HuggingFace Spaces...") app.queue( default_concurrency_limit=1, max_size=5 ) app.launch()