|
|
from flask import Flask, request, jsonify, Response
|
|
|
from faster_whisper import WhisperModel
|
|
|
import torch
|
|
|
import io
|
|
|
import time
|
|
|
import datetime
|
|
|
from threading import Semaphore
|
|
|
import os
|
|
|
from werkzeug.utils import secure_filename
|
|
|
import tempfile
|
|
|
from moviepy.editor import VideoFileClip
|
|
|
import firebase_admin
|
|
|
from firebase_admin import credentials, messaging
|
|
|
|
|
|
app = Flask(__name__)
|
|
|
|
|
|
|
|
|
MAX_CONCURRENT_REQUESTS = 2
|
|
|
MAX_FILE_DURATION = 60 * 30
|
|
|
TEMPORARY_FOLDER = tempfile.gettempdir()
|
|
|
ALLOWED_AUDIO_EXTENSIONS = {'mp3', 'wav', 'ogg', 'm4a', 'flac', 'aac', 'wma', 'opus', 'aiff'}
|
|
|
ALLOWED_VIDEO_EXTENSIONS = {'mp4', 'avi', 'mov', 'mkv', 'webm', 'flv', 'wmv', 'mpeg', 'mpg', '3gp'}
|
|
|
ALLOWED_EXTENSIONS = ALLOWED_AUDIO_EXTENSIONS.union(ALLOWED_VIDEO_EXTENSIONS)
|
|
|
|
|
|
|
|
|
|
|
|
firebase_credentials = {
|
|
|
"type": "service_account",
|
|
|
"project_id": os.getenv("FIREBASE_PROJECT_ID"),
|
|
|
"private_key_id": os.getenv("FIREBASE_PRIVATE_KEY_ID"),
|
|
|
"private_key": os.getenv("FIREBASE_PRIVATE_KEY").replace("\\n", "\n"),
|
|
|
"client_email": os.getenv("FIREBASE_CLIENT_EMAIL"),
|
|
|
"client_id": os.getenv("FIREBASE_CLIENT_ID"),
|
|
|
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
|
|
"token_uri": "https://oauth2.googleapis.com/token",
|
|
|
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
|
|
|
"client_x509_cert_url": f"https://www.googleapis.com/robot/v1/metadata/x509/{os.getenv('FIREBASE_CLIENT_EMAIL')}"
|
|
|
}
|
|
|
cred = credentials.Certificate(firebase_credentials)
|
|
|
firebase_admin.initialize_app(cred)
|
|
|
|
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
compute_type = "float16" if device == "cuda" else "int8"
|
|
|
print(f"Using device: {device} with compute_type: {compute_type}")
|
|
|
|
|
|
|
|
|
beamsize = 2
|
|
|
wmodel = WhisperModel(
|
|
|
"guillaumekln/faster-whisper-small",
|
|
|
device=device,
|
|
|
compute_type=compute_type,
|
|
|
download_root="./model_cache"
|
|
|
)
|
|
|
|
|
|
|
|
|
request_semaphore = Semaphore(MAX_CONCURRENT_REQUESTS)
|
|
|
active_requests = 0
|
|
|
|
|
|
def allowed_file(filename):
|
|
|
return '.' in filename and \
|
|
|
filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
|
|
|
|
|
|
def cleanup_temp_files(*file_paths):
|
|
|
for file_path in file_paths:
|
|
|
try:
|
|
|
if file_path and os.path.exists(file_path):
|
|
|
os.remove(file_path)
|
|
|
except Exception as e:
|
|
|
print(f"Error cleaning up temp file {file_path}: {str(e)}")
|
|
|
|
|
|
def extract_audio_from_video(video_path, output_audio_path):
|
|
|
try:
|
|
|
video = VideoFileClip(video_path)
|
|
|
if video.duration > MAX_FILE_DURATION:
|
|
|
video.close()
|
|
|
raise ValueError(f"Video duration exceeds {MAX_FILE_DURATION} seconds")
|
|
|
video.audio.write_audiofile(output_audio_path)
|
|
|
video.close()
|
|
|
return output_audio_path
|
|
|
except Exception as e:
|
|
|
raise Exception(f"Failed to extract audio from video: {str(e)}")
|
|
|
|
|
|
def send_fcm_data_message(fcm_token, transcription, file_type, created_date, transcription_name):
|
|
|
"""Send an FCM message with transcription details and a notification"""
|
|
|
try:
|
|
|
message = messaging.Message(
|
|
|
notification=messaging.Notification(
|
|
|
title=transcription_name,
|
|
|
body="Successfully downloaded"
|
|
|
),
|
|
|
data={
|
|
|
'transcription': transcription,
|
|
|
'file_type': file_type,
|
|
|
'created_date': created_date,
|
|
|
'transcription_name': transcription_name
|
|
|
},
|
|
|
token=fcm_token
|
|
|
)
|
|
|
response = messaging.send(message)
|
|
|
print(f"FCM message sent: {response}")
|
|
|
return True
|
|
|
except Exception as e:
|
|
|
print(f"Error sending FCM message: {str(e)}")
|
|
|
return False
|
|
|
|
|
|
@app.route("/health", methods=["GET"])
|
|
|
def health_check():
|
|
|
return jsonify({
|
|
|
'status': 'API is running',
|
|
|
'timestamp': datetime.datetime.now().isoformat(),
|
|
|
'device': device,
|
|
|
'compute_type': compute_type,
|
|
|
'active_requests': active_requests,
|
|
|
'max_duration_supported': MAX_FILE_DURATION,
|
|
|
'supported_formats': list(ALLOWED_EXTENSIONS)
|
|
|
})
|
|
|
|
|
|
@app.route("/status/busy", methods=["GET"])
|
|
|
def server_busy():
|
|
|
is_busy = active_requests >= MAX_CONCURRENT_REQUESTS
|
|
|
return jsonify({
|
|
|
'is_busy': is_busy,
|
|
|
'active_requests': active_requests,
|
|
|
'max_capacity': MAX_CONCURRENT_REQUESTS
|
|
|
})
|
|
|
|
|
|
@app.route("/whisper_transcribe", methods=["POST"])
|
|
|
def transcribe():
|
|
|
global active_requests
|
|
|
|
|
|
if not request_semaphore.acquire(blocking=False):
|
|
|
return jsonify({'error': 'Server busy'}), 503
|
|
|
|
|
|
active_requests += 1
|
|
|
start_time = time.time()
|
|
|
temp_file_path = None
|
|
|
temp_audio_path = None
|
|
|
|
|
|
try:
|
|
|
if 'file' not in request.files or 'fcm_token' not in request.form:
|
|
|
return jsonify({'error': 'Missing file or FCM token'}), 400
|
|
|
|
|
|
file = request.files['file']
|
|
|
fcm_token = request.form['fcm_token']
|
|
|
created_date = request.form['created_date']
|
|
|
transcription_name = request.form['transcription_name']
|
|
|
if not (file and allowed_file(file.filename)):
|
|
|
return jsonify({'error': f'Invalid file format. Supported: {", ".join(ALLOWED_EXTENSIONS)}'}), 400
|
|
|
|
|
|
|
|
|
temp_file_path = os.path.join(TEMPORARY_FOLDER, secure_filename(file.filename))
|
|
|
file.save(temp_file_path)
|
|
|
|
|
|
|
|
|
file_extension = file.filename.rsplit('.', 1)[1].lower()
|
|
|
if file_extension in ALLOWED_VIDEO_EXTENSIONS:
|
|
|
temp_audio_path = os.path.join(TEMPORARY_FOLDER, f"temp_audio_{int(time.time())}.wav")
|
|
|
extract_audio_from_video(temp_file_path, temp_audio_path)
|
|
|
transcription_file = temp_audio_path
|
|
|
else:
|
|
|
transcription_file = temp_file_path
|
|
|
|
|
|
|
|
|
segments, _ = wmodel.transcribe(
|
|
|
transcription_file,
|
|
|
beam_size=beamsize,
|
|
|
vad_filter=True,
|
|
|
without_timestamps=True,
|
|
|
compression_ratio_threshold=2.4,
|
|
|
word_timestamps=False
|
|
|
)
|
|
|
|
|
|
full_text = " ".join(segment.text for segment in segments)
|
|
|
file_type = 'video' if file_extension in ALLOWED_VIDEO_EXTENSIONS else 'audio'
|
|
|
|
|
|
|
|
|
|
|
|
send_fcm_data_message(fcm_token, full_text, file_type, created_date, transcription_name)
|
|
|
|
|
|
return jsonify({}), 200
|
|
|
|
|
|
except Exception as e:
|
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
|
|
finally:
|
|
|
cleanup_temp_files(temp_file_path, temp_audio_path)
|
|
|
active_requests -= 1
|
|
|
request_semaphore.release()
|
|
|
print(f"Processed in {time.time()-start_time:.2f}s (Active: {active_requests})")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
if not os.path.exists(TEMPORARY_FOLDER):
|
|
|
os.makedirs(TEMPORARY_FOLDER)
|
|
|
|
|
|
app.run(host="0.0.0.0", port=7860, threaded=True) |