import cv2 import os import json import logging from pathlib import Path from libs.vision_tools_salamandra_2 import FaceOfImageEmbedding_video_nuevo from libs.audio_tools_ana_2 import extract_audio_ffmpeg, diarize_audio, embed_voice_segments from libs.vision_tools_salamandra_2 import ImageEmbedding, keyframe_conditional_extraction_ana logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def faces_embedding_extraction(video_path: str, output_dir_caras: Path): extract_every = 1.0 embedder = FaceOfImageEmbedding_video_nuevo() video = cv2.VideoCapture(video_path) fps = int(video.get(cv2.CAP_PROP_FPS)) frame_interval = int(fps * extract_every) frame_count = 0 saved_count = 0 embeddings_caras = [] while True: ret, frame = video.read() if not ret: break if frame_count % frame_interval == 0: temp_path = output_dir_caras / "temp_frame.jpg" cv2.imwrite(str(temp_path), frame) resultados = embedder.encode_image(temp_path) if resultados: for i, r in enumerate(resultados): embedding = r['embedding'] cara = r['face_crop'] save_path = output_dir_caras / f"frame_{saved_count:04d}.jpg" cv2.imwrite(str(save_path), cv2.cvtColor(cara, cv2.COLOR_RGB2BGR)) embeddings_caras.append({"embeddings":embedding, "path": str(save_path)}) saved_count += 1 os.remove(temp_path) frame_count += 1 video.release() return embeddings_caras def voices_embedding_extraction(video_path: str, output_dir_audio: Path): sr = 16000 fmt = "wav" wav_path = extract_audio_ffmpeg( video_path, output_dir_audio / f"{Path(video_path).stem}.{fmt}", sr=sr ) min_dur = 0.5 max_dur = 10.0 clip_paths, diar_segs = diarize_audio( wav_path, output_dir_audio, "clips", min_dur, max_dur ) embeddings_voices = [] embeddings = embed_voice_segments(clip_paths) for i, emb in enumerate(embeddings): embeddings_voices.append({"embeddings": emb, "path": str(clip_paths[i])}) return embeddings_voices def scenes_embedding_extraction(video_path: str, output_dir_scenes: Path): keyframes_final =keyframe_conditional_extraction_ana( video_path=video_path, output_dir=output_dir_scenes, threshold=30.0, ) image_embedder = ImageEmbedding() embeddings_escenas = [] for keyframe in keyframes_final: frame_path = keyframe["path"] embedding = image_embedder.encode_image(frame_path) embeddings_escenas.append({"embeddings": embedding, "path": str(frame_path)}) return embeddings_escenas video_path = "/home/acasado/bsc/proyecto_bsc/base_datos_dif_catala_1/dif_catala_1.mp4" video_concreto = "dif_catala_1_2" output_dir_caras = Path(f"/home/acasado/bsc/proyecto_bsc/{video_concreto}/frames") output_dir_caras.mkdir(parents=True, exist_ok=True) output_dir_audio = Path(f"/home/acasado/bsc/proyecto_bsc/{video_concreto}/audio") output_dir_audio.mkdir(parents=True, exist_ok=True) output_dir_escenas = Path(f"/home/acasado/bsc/proyecto_bsc/{video_concreto}/escenas") output_dir_escenas.mkdir(parents=True, exist_ok=True) embeddings_caras = faces_embedding_extraction(video_path, output_dir_caras) embeddings_voices = voices_embedding_extraction(video_path, output_dir_audio) embeddings_escenas = scenes_embedding_extraction(video_path, output_dir_escenas) embeddings_finales = { "caras": embeddings_caras, "voices": embeddings_voices, "escenas": embeddings_escenas } analysis_path = f"/home/acasado/bsc/proyecto_bsc/{video_concreto}/analysis.json" try: with open(analysis_path, "w", encoding="utf-8") as f: json.dump(embeddings_finales, f, indent=2, ensure_ascii=False) logger.info("Analysis JSON saved: %s", analysis_path) except Exception as e: logger.warning(f"Failed to write analysis JSON: {e}")