lemm-test-100 / app.py
Gamahea
Deploy Music Generation Studio - 2025-12-12 16:01
aad9d66
raw
history blame
17.3 kB
"""
Music Generation Studio - HuggingFace Spaces Deployment
Main application file for Gradio interface
"""
import os
import sys
import gradio as gr
import logging
from pathlib import Path
import shutil
import subprocess
# Run DiffRhythm2 source setup if needed
setup_script = Path(__file__).parent / "setup_diffrhythm2_src.sh"
if setup_script.exists():
try:
subprocess.run(["bash", str(setup_script)], check=True)
except Exception as e:
print(f"Warning: Failed to run setup script: {e}")
# Configure environment for HuggingFace Spaces (espeak-ng paths, etc.)
import hf_config
# Setup paths for HuggingFace Spaces
SPACE_DIR = Path(__file__).parent
sys.path.insert(0, str(SPACE_DIR / 'backend'))
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# Import services
try:
from services.diffrhythm_service import DiffRhythmService
from services.lyricmind_service import LyricMindService
from services.timeline_service import TimelineService
from services.export_service import ExportService
from config.settings import Config
from utils.prompt_analyzer import PromptAnalyzer
except ImportError as e:
logger.error(f"Import error: {e}")
raise
# Initialize configuration
config = Config()
# Create necessary directories
os.makedirs("outputs", exist_ok=True)
os.makedirs("outputs/music", exist_ok=True)
os.makedirs("outputs/mixed", exist_ok=True)
os.makedirs("models", exist_ok=True)
os.makedirs("logs", exist_ok=True)
# Initialize services
timeline_service = TimelineService()
export_service = ExportService()
# Lazy-load AI services (heavy models)
diffrhythm_service = None
lyricmind_service = None
def get_diffrhythm_service():
"""Lazy load DiffRhythm service"""
global diffrhythm_service
if diffrhythm_service is None:
logger.info("Loading DiffRhythm2 model...")
diffrhythm_service = DiffRhythmService(model_path=config.DIFFRHYTHM_MODEL_PATH)
logger.info("DiffRhythm2 model loaded")
return diffrhythm_service
def get_lyricmind_service():
"""Lazy load LyricMind service"""
global lyricmind_service
if lyricmind_service is None:
logger.info("Loading LyricMind model...")
lyricmind_service = LyricMindService(model_path=config.LYRICMIND_MODEL_PATH)
logger.info("LyricMind model loaded")
return lyricmind_service
def generate_lyrics(prompt: str, duration: int, progress=gr.Progress()):
"""Generate lyrics from prompt using analysis"""
try:
if not prompt or not prompt.strip():
return "❌ Please enter a prompt"
progress(0, desc="πŸ” Analyzing prompt...")
logger.info(f"Generating lyrics for: {prompt}")
# Analyze prompt
analysis = PromptAnalyzer.analyze(prompt)
genre = analysis.get('genres', ['general'])[0] if analysis.get('genres') else 'general'
mood = analysis.get('mood', 'unknown')
logger.info(f"Analysis - Genre: {genre}, Mood: {mood}")
progress(0.3, desc=f"✍️ Generating {genre} lyrics...")
service = get_lyricmind_service()
lyrics = service.generate(
prompt=prompt,
duration=duration,
prompt_analysis=analysis
)
progress(1.0, desc="βœ… Lyrics generated!")
return lyrics
except Exception as e:
logger.error(f"Error generating lyrics: {e}", exc_info=True)
return f"❌ Error: {str(e)}"
def generate_music(prompt: str, lyrics: str, lyrics_mode: str, duration: int, position: str, progress=gr.Progress()):
"""Generate music clip and add to timeline"""
try:
if not prompt or not prompt.strip():
return "❌ Please enter a music prompt", get_timeline_display(), None
# Estimate time (CPU on HF Spaces)
est_time = int(duration * 4) # Conservative estimate for CPU
progress(0, desc=f"πŸ” Analyzing prompt... (Est. {est_time}s)")
logger.info(f"Generating music: {prompt}, mode={lyrics_mode}, duration={duration}s")
# Analyze prompt
analysis = PromptAnalyzer.analyze(prompt)
genre = analysis.get('genres', ['general'])[0] if analysis.get('genres') else 'general'
bpm = analysis.get('bpm', 120)
mood = analysis.get('mood', 'neutral')
logger.info(f"Analysis - Genre: {genre}, BPM: {bpm}, Mood: {mood}")
# Determine lyrics based on mode
lyrics_to_use = None
if lyrics_mode == "Instrumental":
logger.info("Generating instrumental (no vocals)")
progress(0.1, desc=f"🎹 Preparing instrumental generation... ({est_time}s)")
elif lyrics_mode == "User Lyrics":
if not lyrics or not lyrics.strip():
return "❌ Please enter lyrics or switch mode", get_timeline_display(), None
lyrics_to_use = lyrics.strip()
logger.info("Using user-provided lyrics")
progress(0.1, desc=f"🎀 Preparing vocal generation... ({est_time}s)")
elif lyrics_mode == "Auto Lyrics":
if lyrics and lyrics.strip():
lyrics_to_use = lyrics.strip()
logger.info("Using existing lyrics from textbox")
progress(0.1, desc=f"🎀 Using provided lyrics... ({est_time}s)")
else:
progress(0.1, desc="✍️ Generating lyrics...")
logger.info("Auto-generating lyrics...")
lyric_service = get_lyricmind_service()
lyrics_to_use = lyric_service.generate(
prompt=prompt,
duration=duration,
prompt_analysis=analysis
)
logger.info(f"Generated {len(lyrics_to_use)} characters of lyrics")
progress(0.25, desc=f"🎡 Lyrics ready, generating music... ({est_time}s)")
# Generate music
progress(0.3, desc=f"🎼 Generating {genre} at {bpm} BPM... ({est_time}s)")
service = get_diffrhythm_service()
final_path = service.generate(
prompt=prompt,
duration=duration,
lyrics=lyrics_to_use
)
# Add to timeline
progress(0.9, desc="πŸ“Š Adding to timeline...")
clip_id = os.path.basename(final_path).split('.')[0]
from models.schemas import ClipPosition
clip_info = timeline_service.add_clip(
clip_id=clip_id,
file_path=final_path,
duration=float(duration),
position=ClipPosition(position)
)
logger.info(f"Music added to timeline at position {clip_info['timeline_position']}")
# Build status message
progress(1.0, desc="βœ… Complete!")
status_msg = f"βœ… Music generated successfully!\n"
status_msg += f"🎸 Genre: {genre} | πŸ₯ BPM: {bpm} | 🎭 Mood: {mood}\n"
status_msg += f"🎀 Mode: {lyrics_mode} | πŸ“ Position: {position}\n"
if lyrics_mode == "Auto Lyrics" and lyrics_to_use and not lyrics:
status_msg += "✍️ (Lyrics auto-generated)"
return status_msg, get_timeline_display(), final_path
except Exception as e:
logger.error(f"Error generating music: {e}", exc_info=True)
return f"❌ Error: {str(e)}", get_timeline_display(), None
def get_timeline_display():
"""Get timeline clips as formatted text"""
clips = timeline_service.get_all_clips()
if not clips:
return "πŸ“­ Timeline is empty. Generate clips to get started!"
total_duration = timeline_service.get_total_duration()
display = f"**πŸ“Š Timeline ({len(clips)} clips, {format_duration(total_duration)} total)**\n\n"
for i, clip in enumerate(clips, 1):
display += f"**{i}.** `{clip['clip_id'][:12]}...` | "
display += f"⏱️ {format_duration(clip['duration'])} | "
display += f"▢️ {format_duration(clip['start_time'])}\n"
return display
def remove_clip(clip_number: int):
"""Remove a clip from timeline"""
try:
clips = timeline_service.get_all_clips()
if not clips:
return "πŸ“­ Timeline is empty", get_timeline_display()
if clip_number < 1 or clip_number > len(clips):
return f"❌ Invalid clip number. Choose 1-{len(clips)}", get_timeline_display()
clip_id = clips[clip_number - 1]['clip_id']
timeline_service.remove_clip(clip_id)
return f"βœ… Clip {clip_number} removed", get_timeline_display()
except Exception as e:
logger.error(f"Error removing clip: {e}", exc_info=True)
return f"❌ Error: {str(e)}", get_timeline_display()
def clear_timeline():
"""Clear all clips from timeline"""
try:
timeline_service.clear()
return "βœ… Timeline cleared", get_timeline_display()
except Exception as e:
logger.error(f"Error clearing timeline: {e}", exc_info=True)
return f"❌ Error: {str(e)}", get_timeline_display()
def export_timeline(filename: str, export_format: str, progress=gr.Progress()):
"""Export timeline to audio file"""
try:
clips = timeline_service.get_all_clips()
if not clips:
return "❌ No clips to export", None
if not filename or not filename.strip():
filename = "output"
progress(0, desc="πŸ”„ Merging clips...")
logger.info(f"Exporting timeline: {filename}.{export_format}")
export_service.timeline_service = timeline_service
progress(0.5, desc="πŸ’Ύ Encoding audio...")
output_path = export_service.merge_clips(
filename=filename,
export_format=export_format
)
if output_path:
progress(1.0, desc="βœ… Export complete!")
return f"βœ… Exported: {os.path.basename(output_path)}", output_path
else:
return "❌ Export failed", None
except Exception as e:
logger.error(f"Error exporting: {e}", exc_info=True)
return f"❌ Error: {str(e)}", None
def format_duration(seconds: float) -> str:
"""Format duration as MM:SS"""
mins = int(seconds // 60)
secs = int(seconds % 60)
return f"{mins}:{secs:02d}"
# Create Gradio interface
with gr.Blocks(
title="🎡 Music Generation Studio",
theme=gr.themes.Soft(primary_hue="purple", secondary_hue="pink")
) as app:
gr.Markdown(
"""
# 🎡 Music Generation Studio
Create AI-powered music with DiffRhythm2 and LyricMind AI
πŸ’‘ **Tip**: Start with 10-20 second clips for faster generation on HuggingFace Spaces
"""
)
with gr.Row():
# Left Column - Generation
with gr.Column(scale=2):
gr.Markdown("### 🎼 Music Generation")
prompt_input = gr.Textbox(
label="🎯 Music Prompt",
placeholder="energetic rock song with electric guitar at 140 BPM",
lines=3,
info="Describe the music style, instruments, tempo, and mood"
)
lyrics_mode = gr.Radio(
choices=["Instrumental", "User Lyrics", "Auto Lyrics"],
value="Instrumental",
label="🎀 Vocal Mode",
info="Instrumental: no vocals | User: provide lyrics | Auto: AI-generated"
)
with gr.Row():
auto_gen_btn = gr.Button("✍️ Generate Lyrics", size="sm")
lyrics_input = gr.Textbox(
label="πŸ“ Lyrics",
placeholder="Enter lyrics or click 'Generate Lyrics'...",
lines=6
)
with gr.Row():
duration_input = gr.Slider(
minimum=10,
maximum=60,
value=20,
step=5,
label="⏱️ Duration (seconds)",
info="Shorter = faster generation"
)
position_input = gr.Radio(
choices=["intro", "previous", "next", "outro"],
value="next",
label="πŸ“ Position"
)
generate_btn = gr.Button(
"✨ Generate Music Clip",
variant="primary",
size="lg"
)
gen_status = gr.Textbox(label="πŸ“Š Status", lines=3, interactive=False)
audio_output = gr.Audio(label="🎧 Preview", type="filepath")
# Right Column - Timeline
with gr.Column(scale=1):
gr.Markdown("### πŸ“Š Timeline")
timeline_display = gr.Textbox(
label="Clips",
value=get_timeline_display(),
lines=12,
interactive=False
)
with gr.Row():
clip_number_input = gr.Number(
label="Clip #",
precision=0,
minimum=1,
scale=1
)
remove_btn = gr.Button("πŸ—‘οΈ Remove", size="sm", scale=1)
clear_btn = gr.Button("πŸ—‘οΈ Clear All", variant="stop")
timeline_status = gr.Textbox(label="Status", lines=1, interactive=False)
# Export Section
gr.Markdown("---")
gr.Markdown("### πŸ’Ύ Export")
with gr.Row():
export_filename = gr.Textbox(
label="Filename",
value="my_song",
scale=2
)
export_format = gr.Dropdown(
choices=["wav", "mp3"],
value="wav",
label="Format",
scale=1
)
export_btn = gr.Button("πŸ’Ύ Export", variant="primary", scale=1)
export_status = gr.Textbox(label="Status", lines=1, interactive=False)
export_audio = gr.Audio(label="πŸ“₯ Download", type="filepath")
# Event handlers
auto_gen_btn.click(
fn=generate_lyrics,
inputs=[prompt_input, duration_input],
outputs=lyrics_input
)
generate_btn.click(
fn=generate_music,
inputs=[prompt_input, lyrics_input, lyrics_mode, duration_input, position_input],
outputs=[gen_status, timeline_display, audio_output]
)
remove_btn.click(
fn=remove_clip,
inputs=clip_number_input,
outputs=[timeline_status, timeline_display]
)
clear_btn.click(
fn=clear_timeline,
outputs=[timeline_status, timeline_display]
)
export_btn.click(
fn=export_timeline,
inputs=[export_filename, export_format],
outputs=[export_status, export_audio]
)
# Help section
with gr.Accordion("ℹ️ Help & Tips", open=False):
gr.Markdown(
"""
## πŸš€ Quick Start
1. **Enter a prompt**: "upbeat pop song with synth at 128 BPM"
2. **Choose mode**: Instrumental (fastest) or with vocals
3. **Set duration**: Start with 10-20s for quick results
4. **Generate**: Click the button and wait ~2-4 minutes
5. **Export**: Download your complete song
## ⚑ Performance Tips
- **Shorter clips = faster**: 10-20s clips generate in ~1-2 minutes
- **Instrumental mode**: ~30% faster than with vocals
- **HF Spaces uses CPU**: Expect 2-4 minutes per 30s clip
- **Build incrementally**: Generate short clips, then combine
## 🎯 Prompt Tips
- **Be specific**: "energetic rock with distorted guitar" > "rock song"
- **Include BPM**: "at 140 BPM" helps set tempo
- **Mention instruments**: "with piano and drums"
- **Describe mood**: "melancholic", "upbeat", "aggressive"
## 🎀 Vocal Modes
- **Instrumental**: Pure music, no vocals (fastest)
- **User Lyrics**: Provide your own lyrics
- **Auto Lyrics**: AI generates lyrics based on prompt
## πŸ“Š Timeline
- Clips are arranged sequentially
- Remove or clear clips as needed
- Export combines all clips into one file
---
⏱️ **Average Generation Time**: 2-4 minutes per 30-second clip on CPU
🎡 **Models**: DiffRhythm2 + MuQ-MuLan + LyricMind AI
"""
)
# Configure and launch
if __name__ == "__main__":
logger.info("🎡 Starting Music Generation Studio on HuggingFace Spaces...")
app.queue(
default_concurrency_limit=1,
max_size=5
)
app.launch()