Spaces:

Gamahea
/

lemm-test-100

Running on Zero

lemm-test-100 / app.py

Gamahea

Deploy Music Generation Studio - 2025-12-12 16:01

aad9d66 10 days ago

17.3 kB

	"""
	Music Generation Studio - HuggingFace Spaces Deployment
	Main application file for Gradio interface
	"""
	import os
	import sys
	import gradio as gr
	import logging
	from pathlib import Path
	import shutil
	import subprocess

	# Run DiffRhythm2 source setup if needed
	setup_script = Path(__file__).parent / "setup_diffrhythm2_src.sh"
	if setup_script.exists():
	try:
	subprocess.run(["bash", str(setup_script)], check=True)
	except Exception as e:
	print(f"Warning: Failed to run setup script: {e}")

	# Configure environment for HuggingFace Spaces (espeak-ng paths, etc.)
	import hf_config

	# Setup paths for HuggingFace Spaces
	SPACE_DIR = Path(__file__).parent
	sys.path.insert(0, str(SPACE_DIR / 'backend'))

	# Configure logging
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
	)
	logger = logging.getLogger(__name__)

	# Import services
	try:
	from services.diffrhythm_service import DiffRhythmService
	from services.lyricmind_service import LyricMindService
	from services.timeline_service import TimelineService
	from services.export_service import ExportService
	from config.settings import Config
	from utils.prompt_analyzer import PromptAnalyzer
	except ImportError as e:
	logger.error(f"Import error: {e}")
	raise

	# Initialize configuration
	config = Config()

	# Create necessary directories
	os.makedirs("outputs", exist_ok=True)
	os.makedirs("outputs/music", exist_ok=True)
	os.makedirs("outputs/mixed", exist_ok=True)
	os.makedirs("models", exist_ok=True)
	os.makedirs("logs", exist_ok=True)

	# Initialize services
	timeline_service = TimelineService()
	export_service = ExportService()

	# Lazy-load AI services (heavy models)
	diffrhythm_service = None
	lyricmind_service = None

	def get_diffrhythm_service():
	"""Lazy load DiffRhythm service"""
	global diffrhythm_service
	if diffrhythm_service is None:
	logger.info("Loading DiffRhythm2 model...")
	diffrhythm_service = DiffRhythmService(model_path=config.DIFFRHYTHM_MODEL_PATH)
	logger.info("DiffRhythm2 model loaded")
	return diffrhythm_service

	def get_lyricmind_service():
	"""Lazy load LyricMind service"""
	global lyricmind_service
	if lyricmind_service is None:
	logger.info("Loading LyricMind model...")
	lyricmind_service = LyricMindService(model_path=config.LYRICMIND_MODEL_PATH)
	logger.info("LyricMind model loaded")
	return lyricmind_service

	def generate_lyrics(prompt: str, duration: int, progress=gr.Progress()):
	"""Generate lyrics from prompt using analysis"""
	try:
	if not prompt or not prompt.strip():
	return "❌ Please enter a prompt"

	progress(0, desc="🔍 Analyzing prompt...")
	logger.info(f"Generating lyrics for: {prompt}")

	# Analyze prompt
	analysis = PromptAnalyzer.analyze(prompt)
	genre = analysis.get('genres', ['general'])[0] if analysis.get('genres') else 'general'
	mood = analysis.get('mood', 'unknown')

	logger.info(f"Analysis - Genre: {genre}, Mood: {mood}")

	progress(0.3, desc=f"✍️ Generating {genre} lyrics...")

	service = get_lyricmind_service()
	lyrics = service.generate(
	prompt=prompt,
	duration=duration,
	prompt_analysis=analysis
	)

	progress(1.0, desc="✅ Lyrics generated!")
	return lyrics

	except Exception as e:
	logger.error(f"Error generating lyrics: {e}", exc_info=True)
	return f"❌ Error: {str(e)}"

	def generate_music(prompt: str, lyrics: str, lyrics_mode: str, duration: int, position: str, progress=gr.Progress()):
	"""Generate music clip and add to timeline"""
	try:
	if not prompt or not prompt.strip():
	return "❌ Please enter a music prompt", get_timeline_display(), None

	# Estimate time (CPU on HF Spaces)
	est_time = int(duration * 4) # Conservative estimate for CPU

	progress(0, desc=f"🔍 Analyzing prompt... (Est. {est_time}s)")
	logger.info(f"Generating music: {prompt}, mode={lyrics_mode}, duration={duration}s")

	# Analyze prompt
	analysis = PromptAnalyzer.analyze(prompt)
	genre = analysis.get('genres', ['general'])[0] if analysis.get('genres') else 'general'
	bpm = analysis.get('bpm', 120)
	mood = analysis.get('mood', 'neutral')

	logger.info(f"Analysis - Genre: {genre}, BPM: {bpm}, Mood: {mood}")

	# Determine lyrics based on mode
	lyrics_to_use = None

	if lyrics_mode == "Instrumental":
	logger.info("Generating instrumental (no vocals)")
	progress(0.1, desc=f"🎹 Preparing instrumental generation... ({est_time}s)")

	elif lyrics_mode == "User Lyrics":
	if not lyrics or not lyrics.strip():
	return "❌ Please enter lyrics or switch mode", get_timeline_display(), None
	lyrics_to_use = lyrics.strip()
	logger.info("Using user-provided lyrics")
	progress(0.1, desc=f"🎤 Preparing vocal generation... ({est_time}s)")

	elif lyrics_mode == "Auto Lyrics":
	if lyrics and lyrics.strip():
	lyrics_to_use = lyrics.strip()
	logger.info("Using existing lyrics from textbox")
	progress(0.1, desc=f"🎤 Using provided lyrics... ({est_time}s)")
	else:
	progress(0.1, desc="✍️ Generating lyrics...")
	logger.info("Auto-generating lyrics...")
	lyric_service = get_lyricmind_service()
	lyrics_to_use = lyric_service.generate(
	prompt=prompt,
	duration=duration,
	prompt_analysis=analysis
	)
	logger.info(f"Generated {len(lyrics_to_use)} characters of lyrics")
	progress(0.25, desc=f"🎵 Lyrics ready, generating music... ({est_time}s)")

	# Generate music
	progress(0.3, desc=f"🎼 Generating {genre} at {bpm} BPM... ({est_time}s)")
	service = get_diffrhythm_service()

	final_path = service.generate(
	prompt=prompt,
	duration=duration,
	lyrics=lyrics_to_use
	)

	# Add to timeline
	progress(0.9, desc="📊 Adding to timeline...")
	clip_id = os.path.basename(final_path).split('.')[0]

	from models.schemas import ClipPosition
	clip_info = timeline_service.add_clip(
	clip_id=clip_id,
	file_path=final_path,
	duration=float(duration),
	position=ClipPosition(position)
	)

	logger.info(f"Music added to timeline at position {clip_info['timeline_position']}")

	# Build status message
	progress(1.0, desc="✅ Complete!")
	status_msg = f"✅ Music generated successfully!\n"
	status_msg += f"🎸 Genre: {genre} \| 🥁 BPM: {bpm} \| 🎭 Mood: {mood}\n"
	status_msg += f"🎤 Mode: {lyrics_mode} \| 📍 Position: {position}\n"

	if lyrics_mode == "Auto Lyrics" and lyrics_to_use and not lyrics:
	status_msg += "✍️ (Lyrics auto-generated)"

	return status_msg, get_timeline_display(), final_path

	except Exception as e:
	logger.error(f"Error generating music: {e}", exc_info=True)
	return f"❌ Error: {str(e)}", get_timeline_display(), None

	def get_timeline_display():
	"""Get timeline clips as formatted text"""
	clips = timeline_service.get_all_clips()

	if not clips:
	return "📭 Timeline is empty. Generate clips to get started!"

	total_duration = timeline_service.get_total_duration()

	display = f"📊 Timeline ({len(clips)} clips, {format_duration(total_duration)} total)\n\n"

	for i, clip in enumerate(clips, 1):
	display += f"{i}. `{clip['clip_id'][:12]}...` \| "
	display += f"⏱️ {format_duration(clip['duration'])} \| "
	display += f"▶️ {format_duration(clip['start_time'])}\n"

	return display

	def remove_clip(clip_number: int):
	"""Remove a clip from timeline"""
	try:
	clips = timeline_service.get_all_clips()

	if not clips:
	return "📭 Timeline is empty", get_timeline_display()

	if clip_number < 1 or clip_number > len(clips):
	return f"❌ Invalid clip number. Choose 1-{len(clips)}", get_timeline_display()

	clip_id = clips[clip_number - 1]['clip_id']
	timeline_service.remove_clip(clip_id)

	return f"✅ Clip {clip_number} removed", get_timeline_display()

	except Exception as e:
	logger.error(f"Error removing clip: {e}", exc_info=True)
	return f"❌ Error: {str(e)}", get_timeline_display()

	def clear_timeline():
	"""Clear all clips from timeline"""
	try:
	timeline_service.clear()
	return "✅ Timeline cleared", get_timeline_display()
	except Exception as e:
	logger.error(f"Error clearing timeline: {e}", exc_info=True)
	return f"❌ Error: {str(e)}", get_timeline_display()

	def export_timeline(filename: str, export_format: str, progress=gr.Progress()):
	"""Export timeline to audio file"""
	try:
	clips = timeline_service.get_all_clips()

	if not clips:
	return "❌ No clips to export", None

	if not filename or not filename.strip():
	filename = "output"

	progress(0, desc="🔄 Merging clips...")
	logger.info(f"Exporting timeline: {filename}.{export_format}")

	export_service.timeline_service = timeline_service

	progress(0.5, desc="💾 Encoding audio...")
	output_path = export_service.merge_clips(
	filename=filename,
	export_format=export_format
	)

	if output_path:
	progress(1.0, desc="✅ Export complete!")
	return f"✅ Exported: {os.path.basename(output_path)}", output_path
	else:
	return "❌ Export failed", None

	except Exception as e:
	logger.error(f"Error exporting: {e}", exc_info=True)
	return f"❌ Error: {str(e)}", None

	def format_duration(seconds: float) -> str:
	"""Format duration as MM:SS"""
	mins = int(seconds // 60)
	secs = int(seconds % 60)
	return f"{mins}:{secs:02d}"

	# Create Gradio interface
	with gr.Blocks(
	title="🎵 Music Generation Studio",
	theme=gr.themes.Soft(primary_hue="purple", secondary_hue="pink")
	) as app:

	gr.Markdown(
	"""
	# 🎵 Music Generation Studio

	Create AI-powered music with DiffRhythm2 and LyricMind AI

	💡 Tip: Start with 10-20 second clips for faster generation on HuggingFace Spaces
	"""
	)

	with gr.Row():
	# Left Column - Generation
	with gr.Column(scale=2):
	gr.Markdown("### 🎼 Music Generation")

	prompt_input = gr.Textbox(
	label="🎯 Music Prompt",
	placeholder="energetic rock song with electric guitar at 140 BPM",
	lines=3,
	info="Describe the music style, instruments, tempo, and mood"
	)

	lyrics_mode = gr.Radio(
	choices=["Instrumental", "User Lyrics", "Auto Lyrics"],
	value="Instrumental",
	label="🎤 Vocal Mode",
	info="Instrumental: no vocals \| User: provide lyrics \| Auto: AI-generated"
	)

	with gr.Row():
	auto_gen_btn = gr.Button("✍️ Generate Lyrics", size="sm")

	lyrics_input = gr.Textbox(
	label="📝 Lyrics",
	placeholder="Enter lyrics or click 'Generate Lyrics'...",
	lines=6
	)

	with gr.Row():
	duration_input = gr.Slider(
	minimum=10,
	maximum=60,
	value=20,
	step=5,
	label="⏱️ Duration (seconds)",
	info="Shorter = faster generation"
	)
	position_input = gr.Radio(
	choices=["intro", "previous", "next", "outro"],
	value="next",
	label="📍 Position"
	)

	generate_btn = gr.Button(
	"✨ Generate Music Clip",
	variant="primary",
	size="lg"
	)

	gen_status = gr.Textbox(label="📊 Status", lines=3, interactive=False)
	audio_output = gr.Audio(label="🎧 Preview", type="filepath")

	# Right Column - Timeline
	with gr.Column(scale=1):
	gr.Markdown("### 📊 Timeline")

	timeline_display = gr.Textbox(
	label="Clips",
	value=get_timeline_display(),
	lines=12,
	interactive=False
	)

	with gr.Row():
	clip_number_input = gr.Number(
	label="Clip #",
	precision=0,
	minimum=1,
	scale=1
	)
	remove_btn = gr.Button("🗑️ Remove", size="sm", scale=1)

	clear_btn = gr.Button("🗑️ Clear All", variant="stop")
	timeline_status = gr.Textbox(label="Status", lines=1, interactive=False)

	# Export Section
	gr.Markdown("---")
	gr.Markdown("### 💾 Export")

	with gr.Row():
	export_filename = gr.Textbox(
	label="Filename",
	value="my_song",
	scale=2
	)
	export_format = gr.Dropdown(
	choices=["wav", "mp3"],
	value="wav",
	label="Format",
	scale=1
	)
	export_btn = gr.Button("💾 Export", variant="primary", scale=1)

	export_status = gr.Textbox(label="Status", lines=1, interactive=False)
	export_audio = gr.Audio(label="📥 Download", type="filepath")

	# Event handlers
	auto_gen_btn.click(
	fn=generate_lyrics,
	inputs=[prompt_input, duration_input],
	outputs=lyrics_input
	)

	generate_btn.click(
	fn=generate_music,
	inputs=[prompt_input, lyrics_input, lyrics_mode, duration_input, position_input],
	outputs=[gen_status, timeline_display, audio_output]
	)

	remove_btn.click(
	fn=remove_clip,
	inputs=clip_number_input,
	outputs=[timeline_status, timeline_display]
	)

	clear_btn.click(
	fn=clear_timeline,
	outputs=[timeline_status, timeline_display]
	)

	export_btn.click(
	fn=export_timeline,
	inputs=[export_filename, export_format],
	outputs=[export_status, export_audio]
	)

	# Help section
	with gr.Accordion("ℹ️ Help & Tips", open=False):
	gr.Markdown(
	"""
	## 🚀 Quick Start

	1. Enter a prompt: "upbeat pop song with synth at 128 BPM"
	2. Choose mode: Instrumental (fastest) or with vocals
	3. Set duration: Start with 10-20s for quick results
	4. Generate: Click the button and wait ~2-4 minutes
	5. Export: Download your complete song

	## ⚡ Performance Tips

	- Shorter clips = faster: 10-20s clips generate in ~1-2 minutes
	- Instrumental mode: ~30% faster than with vocals
	- HF Spaces uses CPU: Expect 2-4 minutes per 30s clip
	- Build incrementally: Generate short clips, then combine

	## 🎯 Prompt Tips

	- Be specific: "energetic rock with distorted guitar" > "rock song"
	- Include BPM: "at 140 BPM" helps set tempo
	- Mention instruments: "with piano and drums"
	- Describe mood: "melancholic", "upbeat", "aggressive"

	## 🎤 Vocal Modes

	- Instrumental: Pure music, no vocals (fastest)
	- User Lyrics: Provide your own lyrics
	- Auto Lyrics: AI generates lyrics based on prompt

	## 📊 Timeline

	- Clips are arranged sequentially
	- Remove or clear clips as needed
	- Export combines all clips into one file

	---

	⏱️ Average Generation Time: 2-4 minutes per 30-second clip on CPU

	🎵 Models: DiffRhythm2 + MuQ-MuLan + LyricMind AI
	"""
	)

	# Configure and launch
	if __name__ == "__main__":
	logger.info("🎵 Starting Music Generation Studio on HuggingFace Spaces...")

	app.queue(
	default_concurrency_limit=1,
	max_size=5
	)

	app.launch()