Spaces:

hamza2923
/

faster-whisper-transcription-fcm-api5

Sleeping

App Files Files Community

faster-whisper-transcription-fcm-api5 / app.py

hamza2923

Upload 2 files

1b11103 verified 6 months ago

raw

history blame

7.29 kB

	from flask import Flask, request, jsonify, Response
	from faster_whisper import WhisperModel
	import torch
	import io
	import time
	import datetime
	from threading import Semaphore
	import os
	from werkzeug.utils import secure_filename
	import tempfile
	from moviepy.editor import VideoFileClip
	import firebase_admin
	from firebase_admin import credentials, messaging # Added for FCM

	app = Flask(__name__)

	# Configuration
	MAX_CONCURRENT_REQUESTS = 2
	MAX_FILE_DURATION = 60 * 30
	TEMPORARY_FOLDER = tempfile.gettempdir()
	ALLOWED_AUDIO_EXTENSIONS = {'mp3', 'wav', 'ogg', 'm4a', 'flac', 'aac', 'wma', 'opus', 'aiff'}
	ALLOWED_VIDEO_EXTENSIONS = {'mp4', 'avi', 'mov', 'mkv', 'webm', 'flv', 'wmv', 'mpeg', 'mpg', '3gp'}
	ALLOWED_EXTENSIONS = ALLOWED_AUDIO_EXTENSIONS.union(ALLOWED_VIDEO_EXTENSIONS)


	# Initialize Firebase Admin SDK using environment variables
	firebase_credentials = {
	"type": "service_account",
	"project_id": os.getenv("FIREBASE_PROJECT_ID"),
	"private_key_id": os.getenv("FIREBASE_PRIVATE_KEY_ID"),
	"private_key": os.getenv("FIREBASE_PRIVATE_KEY").replace("\\n", "\n"),
	"client_email": os.getenv("FIREBASE_CLIENT_EMAIL"),
	"client_id": os.getenv("FIREBASE_CLIENT_ID"),
	"auth_uri": "https://accounts.google.com/o/oauth2/auth",
	"token_uri": "https://oauth2.googleapis.com/token",
	"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
	"client_x509_cert_url": f"https://www.googleapis.com/robot/v1/metadata/x509/{os.getenv('FIREBASE_CLIENT_EMAIL')}"
	}
	cred = credentials.Certificate(firebase_credentials)
	firebase_admin.initialize_app(cred)

	# Device check for faster-whisper
	device = "cuda" if torch.cuda.is_available() else "cpu"
	compute_type = "float16" if device == "cuda" else "int8"
	print(f"Using device: {device} with compute_type: {compute_type}")

	# Faster Whisper setup
	beamsize = 2
	wmodel = WhisperModel(
	"guillaumekln/faster-whisper-small",
	device=device,
	compute_type=compute_type,
	download_root="./model_cache"
	)

	# Concurrency control
	request_semaphore = Semaphore(MAX_CONCURRENT_REQUESTS)
	active_requests = 0

	def allowed_file(filename):
	return '.' in filename and \
	filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

	def cleanup_temp_files(*file_paths):
	for file_path in file_paths:
	try:
	if file_path and os.path.exists(file_path):
	os.remove(file_path)
	except Exception as e:
	print(f"Error cleaning up temp file {file_path}: {str(e)}")

	def extract_audio_from_video(video_path, output_audio_path):
	try:
	video = VideoFileClip(video_path)
	if video.duration > MAX_FILE_DURATION:
	video.close()
	raise ValueError(f"Video duration exceeds {MAX_FILE_DURATION} seconds")
	video.audio.write_audiofile(output_audio_path)
	video.close()
	return output_audio_path
	except Exception as e:
	raise Exception(f"Failed to extract audio from video: {str(e)}")

	def send_fcm_data_message(fcm_token, transcription, file_type, created_date, transcription_name):
	"""Send an FCM message with transcription details and a notification"""
	try:
	message = messaging.Message(
	notification=messaging.Notification(
	title=transcription_name,
	body="Successfully downloaded"
	),
	data={
	'transcription': transcription,
	'file_type': file_type,
	'created_date': created_date,
	'transcription_name': transcription_name
	},
	token=fcm_token
	)
	response = messaging.send(message)
	print(f"FCM message sent: {response}")
	return True
	except Exception as e:
	print(f"Error sending FCM message: {str(e)}")
	return False

	@app.route("/health", methods=["GET"])
	def health_check():
	return jsonify({
	'status': 'API is running',
	'timestamp': datetime.datetime.now().isoformat(),
	'device': device,
	'compute_type': compute_type,
	'active_requests': active_requests,
	'max_duration_supported': MAX_FILE_DURATION,
	'supported_formats': list(ALLOWED_EXTENSIONS)
	})

	@app.route("/status/busy", methods=["GET"])
	def server_busy():
	is_busy = active_requests >= MAX_CONCURRENT_REQUESTS
	return jsonify({
	'is_busy': is_busy,
	'active_requests': active_requests,
	'max_capacity': MAX_CONCURRENT_REQUESTS
	})

	@app.route("/whisper_transcribe", methods=["POST"])
	def transcribe():
	global active_requests

	if not request_semaphore.acquire(blocking=False):
	return jsonify({'error': 'Server busy'}), 503

	active_requests += 1
	start_time = time.time()
	temp_file_path = None
	temp_audio_path = None

	try:
	if 'file' not in request.files or 'fcm_token' not in request.form:
	return jsonify({'error': 'Missing file or FCM token'}), 400

	file = request.files['file']
	fcm_token = request.form['fcm_token']
	created_date = request.form['created_date']
	transcription_name = request.form['transcription_name']
	if not (file and allowed_file(file.filename)):
	return jsonify({'error': f'Invalid file format. Supported: {", ".join(ALLOWED_EXTENSIONS)}'}), 400

	# Save uploaded file
	temp_file_path = os.path.join(TEMPORARY_FOLDER, secure_filename(file.filename))
	file.save(temp_file_path)

	# Handle video/audio
	file_extension = file.filename.rsplit('.', 1)[1].lower()
	if file_extension in ALLOWED_VIDEO_EXTENSIONS:
	temp_audio_path = os.path.join(TEMPORARY_FOLDER, f"temp_audio_{int(time.time())}.wav")
	extract_audio_from_video(temp_file_path, temp_audio_path)
	transcription_file = temp_audio_path
	else:
	transcription_file = temp_file_path

	# Transcribe
	segments, _ = wmodel.transcribe(
	transcription_file,
	beam_size=beamsize,
	vad_filter=True,
	without_timestamps=True,
	compression_ratio_threshold=2.4,
	word_timestamps=False
	)

	full_text = " ".join(segment.text for segment in segments)
	file_type = 'video' if file_extension in ALLOWED_VIDEO_EXTENSIONS else 'audio'

	# Send FCM data message
	# Send FCM data message
	send_fcm_data_message(fcm_token, full_text, file_type, created_date, transcription_name)

	return jsonify({}), 200

	except Exception as e:
	return jsonify({'error': str(e)}), 500

	finally:
	cleanup_temp_files(temp_file_path, temp_audio_path)
	active_requests -= 1
	request_semaphore.release()
	print(f"Processed in {time.time()-start_time:.2f}s (Active: {active_requests})")

	if __name__ == "__main__":
	if not os.path.exists(TEMPORARY_FOLDER):
	os.makedirs(TEMPORARY_FOLDER)

	app.run(host="0.0.0.0", port=7860, threaded=True)