from pathlib import Path from sklearn.cluster import DBSCAN import numpy as np import json class DataHub: def __init__(self, video_analysis_json_path: str): print("DataHub inicializando con JSON:", video_analysis_json_path) self.video = json.loads(Path(video_analysis_json_path).read_text(encoding='utf-8')) class get_face_clusters: def __init__(self, data: DataHub): self.data = data def get_clusters(self, eps: float, min_samples: int): caras = self.data.video.get("caras", {}) embeddings_caras = [] for cara in caras: embeddings_caras.append(cara['embeddings']) X = np.array(embeddings_caras) clustering = DBSCAN(eps=eps, min_samples=min_samples, metric='euclidean').fit(X) labels_caras = clustering.labels_ print(labels_caras) return labels_caras class get_voices_clusters: def __init__(self, data: DataHub): self.data = data def get_clusters(self, eps: float, min_samples: int): voices = self.data.video.get("voices", {}) embeddings_voices = [] for voice in voices: embeddings_voices.append(voice['embeddings']) X = np.array(embeddings_voices) clustering = DBSCAN(eps=eps, min_samples=min_samples, metric='euclidean').fit(X) labels_voices = clustering.labels_ print(labels_voices) return labels_voices class get_scene_clusters: def __init__(self, data: DataHub): self.data = data def get_clusters(self, eps: float, min_samples: int): scenes = self.data.video.get("escenas", {}) embeddings_scenes = [] for scene in scenes: embeddings_scenes.append(scene['embeddings']) X = np.array(embeddings_scenes) clustering = DBSCAN(eps=eps, min_samples=min_samples, metric='euclidean').fit(X) labels_scenes = clustering.labels_ print(labels_scenes) return labels_scenes video = "dif_catala_1_2" analysis_path = f"/home/acasado/bsc/proyecto_bsc/{video}/analysis.json" datahub = DataHub(analysis_path) face_clusterer = get_face_clusters(datahub) voice_clusterer = get_voices_clusters(datahub) scene_clusterer = get_scene_clusters(datahub) labels_caras = face_clusterer.get_clusters(eps=0.4, min_samples=2) labels_voces = voice_clusterer.get_clusters(eps=1.3, min_samples=1) labels_escenas = scene_clusterer.get_clusters(eps=1.3, min_samples=2)