Spaces:
Running
on
Zero
Running
on
Zero
| """ | |
| Music Generation Studio - HuggingFace Spaces Deployment | |
| Main application file for Gradio interface | |
| """ | |
| import os | |
| import sys | |
| import gradio as gr | |
| import logging | |
| from pathlib import Path | |
| import shutil | |
| import subprocess | |
| # Run DiffRhythm2 source setup if needed | |
| setup_script = Path(__file__).parent / "setup_diffrhythm2_src.sh" | |
| if setup_script.exists(): | |
| try: | |
| subprocess.run(["bash", str(setup_script)], check=True) | |
| except Exception as e: | |
| print(f"Warning: Failed to run setup script: {e}") | |
| # Configure environment for HuggingFace Spaces (espeak-ng paths, etc.) | |
| import hf_config | |
| # Setup paths for HuggingFace Spaces | |
| SPACE_DIR = Path(__file__).parent | |
| sys.path.insert(0, str(SPACE_DIR / 'backend')) | |
| # Configure logging | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' | |
| ) | |
| logger = logging.getLogger(__name__) | |
| # Import services | |
| try: | |
| from services.diffrhythm_service import DiffRhythmService | |
| from services.lyricmind_service import LyricMindService | |
| from services.timeline_service import TimelineService | |
| from services.export_service import ExportService | |
| from config.settings import Config | |
| from utils.prompt_analyzer import PromptAnalyzer | |
| except ImportError as e: | |
| logger.error(f"Import error: {e}") | |
| raise | |
| # Initialize configuration | |
| config = Config() | |
| # Create necessary directories | |
| os.makedirs("outputs", exist_ok=True) | |
| os.makedirs("outputs/music", exist_ok=True) | |
| os.makedirs("outputs/mixed", exist_ok=True) | |
| os.makedirs("models", exist_ok=True) | |
| os.makedirs("logs", exist_ok=True) | |
| # Initialize services | |
| timeline_service = TimelineService() | |
| export_service = ExportService() | |
| # Lazy-load AI services (heavy models) | |
| diffrhythm_service = None | |
| lyricmind_service = None | |
| def get_diffrhythm_service(): | |
| """Lazy load DiffRhythm service""" | |
| global diffrhythm_service | |
| if diffrhythm_service is None: | |
| logger.info("Loading DiffRhythm2 model...") | |
| diffrhythm_service = DiffRhythmService(model_path=config.DIFFRHYTHM_MODEL_PATH) | |
| logger.info("DiffRhythm2 model loaded") | |
| return diffrhythm_service | |
| def get_lyricmind_service(): | |
| """Lazy load LyricMind service""" | |
| global lyricmind_service | |
| if lyricmind_service is None: | |
| logger.info("Loading LyricMind model...") | |
| lyricmind_service = LyricMindService(model_path=config.LYRICMIND_MODEL_PATH) | |
| logger.info("LyricMind model loaded") | |
| return lyricmind_service | |
| def generate_lyrics(prompt: str, duration: int, progress=gr.Progress()): | |
| """Generate lyrics from prompt using analysis""" | |
| try: | |
| if not prompt or not prompt.strip(): | |
| return "β Please enter a prompt" | |
| progress(0, desc="π Analyzing prompt...") | |
| logger.info(f"Generating lyrics for: {prompt}") | |
| # Analyze prompt | |
| analysis = PromptAnalyzer.analyze(prompt) | |
| genre = analysis.get('genres', ['general'])[0] if analysis.get('genres') else 'general' | |
| mood = analysis.get('mood', 'unknown') | |
| logger.info(f"Analysis - Genre: {genre}, Mood: {mood}") | |
| progress(0.3, desc=f"βοΈ Generating {genre} lyrics...") | |
| service = get_lyricmind_service() | |
| lyrics = service.generate( | |
| prompt=prompt, | |
| duration=duration, | |
| prompt_analysis=analysis | |
| ) | |
| progress(1.0, desc="β Lyrics generated!") | |
| return lyrics | |
| except Exception as e: | |
| logger.error(f"Error generating lyrics: {e}", exc_info=True) | |
| return f"β Error: {str(e)}" | |
| def generate_music(prompt: str, lyrics: str, lyrics_mode: str, duration: int, position: str, progress=gr.Progress()): | |
| """Generate music clip and add to timeline""" | |
| try: | |
| if not prompt or not prompt.strip(): | |
| return "β Please enter a music prompt", get_timeline_display(), None | |
| # Estimate time (CPU on HF Spaces) | |
| est_time = int(duration * 4) # Conservative estimate for CPU | |
| progress(0, desc=f"π Analyzing prompt... (Est. {est_time}s)") | |
| logger.info(f"Generating music: {prompt}, mode={lyrics_mode}, duration={duration}s") | |
| # Analyze prompt | |
| analysis = PromptAnalyzer.analyze(prompt) | |
| genre = analysis.get('genres', ['general'])[0] if analysis.get('genres') else 'general' | |
| bpm = analysis.get('bpm', 120) | |
| mood = analysis.get('mood', 'neutral') | |
| logger.info(f"Analysis - Genre: {genre}, BPM: {bpm}, Mood: {mood}") | |
| # Determine lyrics based on mode | |
| lyrics_to_use = None | |
| if lyrics_mode == "Instrumental": | |
| logger.info("Generating instrumental (no vocals)") | |
| progress(0.1, desc=f"πΉ Preparing instrumental generation... ({est_time}s)") | |
| elif lyrics_mode == "User Lyrics": | |
| if not lyrics or not lyrics.strip(): | |
| return "β Please enter lyrics or switch mode", get_timeline_display(), None | |
| lyrics_to_use = lyrics.strip() | |
| logger.info("Using user-provided lyrics") | |
| progress(0.1, desc=f"π€ Preparing vocal generation... ({est_time}s)") | |
| elif lyrics_mode == "Auto Lyrics": | |
| if lyrics and lyrics.strip(): | |
| lyrics_to_use = lyrics.strip() | |
| logger.info("Using existing lyrics from textbox") | |
| progress(0.1, desc=f"π€ Using provided lyrics... ({est_time}s)") | |
| else: | |
| progress(0.1, desc="βοΈ Generating lyrics...") | |
| logger.info("Auto-generating lyrics...") | |
| lyric_service = get_lyricmind_service() | |
| lyrics_to_use = lyric_service.generate( | |
| prompt=prompt, | |
| duration=duration, | |
| prompt_analysis=analysis | |
| ) | |
| logger.info(f"Generated {len(lyrics_to_use)} characters of lyrics") | |
| progress(0.25, desc=f"π΅ Lyrics ready, generating music... ({est_time}s)") | |
| # Generate music | |
| progress(0.3, desc=f"πΌ Generating {genre} at {bpm} BPM... ({est_time}s)") | |
| service = get_diffrhythm_service() | |
| final_path = service.generate( | |
| prompt=prompt, | |
| duration=duration, | |
| lyrics=lyrics_to_use | |
| ) | |
| # Add to timeline | |
| progress(0.9, desc="π Adding to timeline...") | |
| clip_id = os.path.basename(final_path).split('.')[0] | |
| from models.schemas import ClipPosition | |
| clip_info = timeline_service.add_clip( | |
| clip_id=clip_id, | |
| file_path=final_path, | |
| duration=float(duration), | |
| position=ClipPosition(position) | |
| ) | |
| logger.info(f"Music added to timeline at position {clip_info['timeline_position']}") | |
| # Build status message | |
| progress(1.0, desc="β Complete!") | |
| status_msg = f"β Music generated successfully!\n" | |
| status_msg += f"πΈ Genre: {genre} | π₯ BPM: {bpm} | π Mood: {mood}\n" | |
| status_msg += f"π€ Mode: {lyrics_mode} | π Position: {position}\n" | |
| if lyrics_mode == "Auto Lyrics" and lyrics_to_use and not lyrics: | |
| status_msg += "βοΈ (Lyrics auto-generated)" | |
| return status_msg, get_timeline_display(), final_path | |
| except Exception as e: | |
| logger.error(f"Error generating music: {e}", exc_info=True) | |
| return f"β Error: {str(e)}", get_timeline_display(), None | |
| def get_timeline_display(): | |
| """Get timeline clips as formatted text""" | |
| clips = timeline_service.get_all_clips() | |
| if not clips: | |
| return "π Timeline is empty. Generate clips to get started!" | |
| total_duration = timeline_service.get_total_duration() | |
| display = f"**π Timeline ({len(clips)} clips, {format_duration(total_duration)} total)**\n\n" | |
| for i, clip in enumerate(clips, 1): | |
| display += f"**{i}.** `{clip['clip_id'][:12]}...` | " | |
| display += f"β±οΈ {format_duration(clip['duration'])} | " | |
| display += f"βΆοΈ {format_duration(clip['start_time'])}\n" | |
| return display | |
| def remove_clip(clip_number: int): | |
| """Remove a clip from timeline""" | |
| try: | |
| clips = timeline_service.get_all_clips() | |
| if not clips: | |
| return "π Timeline is empty", get_timeline_display() | |
| if clip_number < 1 or clip_number > len(clips): | |
| return f"β Invalid clip number. Choose 1-{len(clips)}", get_timeline_display() | |
| clip_id = clips[clip_number - 1]['clip_id'] | |
| timeline_service.remove_clip(clip_id) | |
| return f"β Clip {clip_number} removed", get_timeline_display() | |
| except Exception as e: | |
| logger.error(f"Error removing clip: {e}", exc_info=True) | |
| return f"β Error: {str(e)}", get_timeline_display() | |
| def clear_timeline(): | |
| """Clear all clips from timeline""" | |
| try: | |
| timeline_service.clear() | |
| return "β Timeline cleared", get_timeline_display() | |
| except Exception as e: | |
| logger.error(f"Error clearing timeline: {e}", exc_info=True) | |
| return f"β Error: {str(e)}", get_timeline_display() | |
| def export_timeline(filename: str, export_format: str, progress=gr.Progress()): | |
| """Export timeline to audio file""" | |
| try: | |
| clips = timeline_service.get_all_clips() | |
| if not clips: | |
| return "β No clips to export", None | |
| if not filename or not filename.strip(): | |
| filename = "output" | |
| progress(0, desc="π Merging clips...") | |
| logger.info(f"Exporting timeline: {filename}.{export_format}") | |
| export_service.timeline_service = timeline_service | |
| progress(0.5, desc="πΎ Encoding audio...") | |
| output_path = export_service.merge_clips( | |
| filename=filename, | |
| export_format=export_format | |
| ) | |
| if output_path: | |
| progress(1.0, desc="β Export complete!") | |
| return f"β Exported: {os.path.basename(output_path)}", output_path | |
| else: | |
| return "β Export failed", None | |
| except Exception as e: | |
| logger.error(f"Error exporting: {e}", exc_info=True) | |
| return f"β Error: {str(e)}", None | |
| def format_duration(seconds: float) -> str: | |
| """Format duration as MM:SS""" | |
| mins = int(seconds // 60) | |
| secs = int(seconds % 60) | |
| return f"{mins}:{secs:02d}" | |
| # Create Gradio interface | |
| with gr.Blocks( | |
| title="π΅ Music Generation Studio", | |
| theme=gr.themes.Soft(primary_hue="purple", secondary_hue="pink") | |
| ) as app: | |
| gr.Markdown( | |
| """ | |
| # π΅ Music Generation Studio | |
| Create AI-powered music with DiffRhythm2 and LyricMind AI | |
| π‘ **Tip**: Start with 10-20 second clips for faster generation on HuggingFace Spaces | |
| """ | |
| ) | |
| with gr.Row(): | |
| # Left Column - Generation | |
| with gr.Column(scale=2): | |
| gr.Markdown("### πΌ Music Generation") | |
| prompt_input = gr.Textbox( | |
| label="π― Music Prompt", | |
| placeholder="energetic rock song with electric guitar at 140 BPM", | |
| lines=3, | |
| info="Describe the music style, instruments, tempo, and mood" | |
| ) | |
| lyrics_mode = gr.Radio( | |
| choices=["Instrumental", "User Lyrics", "Auto Lyrics"], | |
| value="Instrumental", | |
| label="π€ Vocal Mode", | |
| info="Instrumental: no vocals | User: provide lyrics | Auto: AI-generated" | |
| ) | |
| with gr.Row(): | |
| auto_gen_btn = gr.Button("βοΈ Generate Lyrics", size="sm") | |
| lyrics_input = gr.Textbox( | |
| label="π Lyrics", | |
| placeholder="Enter lyrics or click 'Generate Lyrics'...", | |
| lines=6 | |
| ) | |
| with gr.Row(): | |
| duration_input = gr.Slider( | |
| minimum=10, | |
| maximum=60, | |
| value=20, | |
| step=5, | |
| label="β±οΈ Duration (seconds)", | |
| info="Shorter = faster generation" | |
| ) | |
| position_input = gr.Radio( | |
| choices=["intro", "previous", "next", "outro"], | |
| value="next", | |
| label="π Position" | |
| ) | |
| generate_btn = gr.Button( | |
| "β¨ Generate Music Clip", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| gen_status = gr.Textbox(label="π Status", lines=3, interactive=False) | |
| audio_output = gr.Audio(label="π§ Preview", type="filepath") | |
| # Right Column - Timeline | |
| with gr.Column(scale=1): | |
| gr.Markdown("### π Timeline") | |
| timeline_display = gr.Textbox( | |
| label="Clips", | |
| value=get_timeline_display(), | |
| lines=12, | |
| interactive=False | |
| ) | |
| with gr.Row(): | |
| clip_number_input = gr.Number( | |
| label="Clip #", | |
| precision=0, | |
| minimum=1, | |
| scale=1 | |
| ) | |
| remove_btn = gr.Button("ποΈ Remove", size="sm", scale=1) | |
| clear_btn = gr.Button("ποΈ Clear All", variant="stop") | |
| timeline_status = gr.Textbox(label="Status", lines=1, interactive=False) | |
| # Export Section | |
| gr.Markdown("---") | |
| gr.Markdown("### πΎ Export") | |
| with gr.Row(): | |
| export_filename = gr.Textbox( | |
| label="Filename", | |
| value="my_song", | |
| scale=2 | |
| ) | |
| export_format = gr.Dropdown( | |
| choices=["wav", "mp3"], | |
| value="wav", | |
| label="Format", | |
| scale=1 | |
| ) | |
| export_btn = gr.Button("πΎ Export", variant="primary", scale=1) | |
| export_status = gr.Textbox(label="Status", lines=1, interactive=False) | |
| export_audio = gr.Audio(label="π₯ Download", type="filepath") | |
| # Event handlers | |
| auto_gen_btn.click( | |
| fn=generate_lyrics, | |
| inputs=[prompt_input, duration_input], | |
| outputs=lyrics_input | |
| ) | |
| generate_btn.click( | |
| fn=generate_music, | |
| inputs=[prompt_input, lyrics_input, lyrics_mode, duration_input, position_input], | |
| outputs=[gen_status, timeline_display, audio_output] | |
| ) | |
| remove_btn.click( | |
| fn=remove_clip, | |
| inputs=clip_number_input, | |
| outputs=[timeline_status, timeline_display] | |
| ) | |
| clear_btn.click( | |
| fn=clear_timeline, | |
| outputs=[timeline_status, timeline_display] | |
| ) | |
| export_btn.click( | |
| fn=export_timeline, | |
| inputs=[export_filename, export_format], | |
| outputs=[export_status, export_audio] | |
| ) | |
| # Help section | |
| with gr.Accordion("βΉοΈ Help & Tips", open=False): | |
| gr.Markdown( | |
| """ | |
| ## π Quick Start | |
| 1. **Enter a prompt**: "upbeat pop song with synth at 128 BPM" | |
| 2. **Choose mode**: Instrumental (fastest) or with vocals | |
| 3. **Set duration**: Start with 10-20s for quick results | |
| 4. **Generate**: Click the button and wait ~2-4 minutes | |
| 5. **Export**: Download your complete song | |
| ## β‘ Performance Tips | |
| - **Shorter clips = faster**: 10-20s clips generate in ~1-2 minutes | |
| - **Instrumental mode**: ~30% faster than with vocals | |
| - **HF Spaces uses CPU**: Expect 2-4 minutes per 30s clip | |
| - **Build incrementally**: Generate short clips, then combine | |
| ## π― Prompt Tips | |
| - **Be specific**: "energetic rock with distorted guitar" > "rock song" | |
| - **Include BPM**: "at 140 BPM" helps set tempo | |
| - **Mention instruments**: "with piano and drums" | |
| - **Describe mood**: "melancholic", "upbeat", "aggressive" | |
| ## π€ Vocal Modes | |
| - **Instrumental**: Pure music, no vocals (fastest) | |
| - **User Lyrics**: Provide your own lyrics | |
| - **Auto Lyrics**: AI generates lyrics based on prompt | |
| ## π Timeline | |
| - Clips are arranged sequentially | |
| - Remove or clear clips as needed | |
| - Export combines all clips into one file | |
| --- | |
| β±οΈ **Average Generation Time**: 2-4 minutes per 30-second clip on CPU | |
| π΅ **Models**: DiffRhythm2 + MuQ-MuLan + LyricMind AI | |
| """ | |
| ) | |
| # Configure and launch | |
| if __name__ == "__main__": | |
| logger.info("π΅ Starting Music Generation Studio on HuggingFace Spaces...") | |
| app.queue( | |
| default_concurrency_limit=1, | |
| max_size=5 | |
| ) | |
| app.launch() | |