engine / generacion_video_nuevo.py
VeuReu's picture
Upload 3 files
c705b37 verified
import cv2
import os
import json
import logging
from pathlib import Path
from libs.vision_tools_salamandra_2 import FaceOfImageEmbedding_video_nuevo
from libs.audio_tools_ana_2 import extract_audio_ffmpeg, diarize_audio, embed_voice_segments
from libs.vision_tools_salamandra_2 import ImageEmbedding, keyframe_conditional_extraction_ana
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def faces_embedding_extraction(video_path: str, output_dir_caras: Path):
extract_every = 1.0
embedder = FaceOfImageEmbedding_video_nuevo()
video = cv2.VideoCapture(video_path)
fps = int(video.get(cv2.CAP_PROP_FPS))
frame_interval = int(fps * extract_every)
frame_count = 0
saved_count = 0
embeddings_caras = []
while True:
ret, frame = video.read()
if not ret:
break
if frame_count % frame_interval == 0:
temp_path = output_dir_caras / "temp_frame.jpg"
cv2.imwrite(str(temp_path), frame)
resultados = embedder.encode_image(temp_path)
if resultados:
for i, r in enumerate(resultados):
embedding = r['embedding']
cara = r['face_crop']
save_path = output_dir_caras / f"frame_{saved_count:04d}.jpg"
cv2.imwrite(str(save_path), cv2.cvtColor(cara, cv2.COLOR_RGB2BGR))
embeddings_caras.append({"embeddings":embedding, "path": str(save_path)})
saved_count += 1
os.remove(temp_path)
frame_count += 1
video.release()
return embeddings_caras
def voices_embedding_extraction(video_path: str, output_dir_audio: Path):
sr = 16000
fmt = "wav"
wav_path = extract_audio_ffmpeg(
video_path,
output_dir_audio / f"{Path(video_path).stem}.{fmt}",
sr=sr
)
min_dur = 0.5
max_dur = 10.0
clip_paths, diar_segs = diarize_audio(
wav_path,
output_dir_audio,
"clips",
min_dur,
max_dur
)
embeddings_voices = []
embeddings = embed_voice_segments(clip_paths)
for i, emb in enumerate(embeddings):
embeddings_voices.append({"embeddings": emb, "path": str(clip_paths[i])})
return embeddings_voices
def scenes_embedding_extraction(video_path: str, output_dir_scenes: Path):
keyframes_final =keyframe_conditional_extraction_ana(
video_path=video_path,
output_dir=output_dir_scenes,
threshold=30.0,
)
image_embedder = ImageEmbedding()
embeddings_escenas = []
for keyframe in keyframes_final:
frame_path = keyframe["path"]
embedding = image_embedder.encode_image(frame_path)
embeddings_escenas.append({"embeddings": embedding, "path": str(frame_path)})
return embeddings_escenas
video_path = "/home/acasado/bsc/proyecto_bsc/base_datos_dif_catala_1/dif_catala_1.mp4"
video_concreto = "dif_catala_1_2"
output_dir_caras = Path(f"/home/acasado/bsc/proyecto_bsc/{video_concreto}/frames")
output_dir_caras.mkdir(parents=True, exist_ok=True)
output_dir_audio = Path(f"/home/acasado/bsc/proyecto_bsc/{video_concreto}/audio")
output_dir_audio.mkdir(parents=True, exist_ok=True)
output_dir_escenas = Path(f"/home/acasado/bsc/proyecto_bsc/{video_concreto}/escenas")
output_dir_escenas.mkdir(parents=True, exist_ok=True)
embeddings_caras = faces_embedding_extraction(video_path, output_dir_caras)
embeddings_voices = voices_embedding_extraction(video_path, output_dir_audio)
embeddings_escenas = scenes_embedding_extraction(video_path, output_dir_escenas)
embeddings_finales = {
"caras": embeddings_caras,
"voices": embeddings_voices,
"escenas": embeddings_escenas
}
analysis_path = f"/home/acasado/bsc/proyecto_bsc/{video_concreto}/analysis.json"
try:
with open(analysis_path, "w", encoding="utf-8") as f:
json.dump(embeddings_finales, f, indent=2, ensure_ascii=False)
logger.info("Analysis JSON saved: %s", analysis_path)
except Exception as e:
logger.warning(f"Failed to write analysis JSON: {e}")