Upload 3 files
Browse files- audio_tools.py +1 -4
- identity_manager.py +4 -3
- vision_tools.py +1 -1
audio_tools.py
CHANGED
|
@@ -28,10 +28,7 @@ try:
|
|
| 28 |
import torchaudio as ta
|
| 29 |
import torchaudio.transforms as T
|
| 30 |
HAS_TORCHAUDIO = True
|
| 31 |
-
|
| 32 |
-
ta.set_audio_backend("soundfile")
|
| 33 |
-
except Exception:
|
| 34 |
-
pass
|
| 35 |
except Exception:
|
| 36 |
HAS_TORCHAUDIO = False
|
| 37 |
ta = None # type: ignore
|
|
|
|
| 28 |
import torchaudio as ta
|
| 29 |
import torchaudio.transforms as T
|
| 30 |
HAS_TORCHAUDIO = True
|
| 31 |
+
# Note: ta.set_audio_backend is deprecated in newer torchaudio versions
|
|
|
|
|
|
|
|
|
|
| 32 |
except Exception:
|
| 33 |
HAS_TORCHAUDIO = False
|
| 34 |
ta = None # type: ignore
|
identity_manager.py
CHANGED
|
@@ -2,9 +2,10 @@
|
|
| 2 |
# File: identity_manager.py
|
| 3 |
# =========================
|
| 4 |
from __future__ import annotations
|
| 5 |
-
from typing import Any, Dict, List, Optional, Tuple
|
| 6 |
|
| 7 |
-
|
|
|
|
| 8 |
|
| 9 |
|
| 10 |
class IdentityManager:
|
|
@@ -13,7 +14,7 @@ class IdentityManager:
|
|
| 13 |
y su proyecci贸n sobre frames, clips y SRT.
|
| 14 |
"""
|
| 15 |
|
| 16 |
-
def __init__(self, face_collection: Optional[Collection] = None, voice_collection: Optional[Collection] = None):
|
| 17 |
self.face_collection = face_collection
|
| 18 |
self.voice_collection = voice_collection
|
| 19 |
|
|
|
|
| 2 |
# File: identity_manager.py
|
| 3 |
# =========================
|
| 4 |
from __future__ import annotations
|
| 5 |
+
from typing import Any, Dict, List, Optional, Tuple, TYPE_CHECKING
|
| 6 |
|
| 7 |
+
if TYPE_CHECKING:
|
| 8 |
+
from chromadb.api.models.Collection import Collection
|
| 9 |
|
| 10 |
|
| 11 |
class IdentityManager:
|
|
|
|
| 14 |
y su proyecci贸n sobre frames, clips y SRT.
|
| 15 |
"""
|
| 16 |
|
| 17 |
+
def __init__(self, face_collection: Optional["Collection"] = None, voice_collection: Optional["Collection"] = None):
|
| 18 |
self.face_collection = face_collection
|
| 19 |
self.voice_collection = voice_collection
|
| 20 |
|
vision_tools.py
CHANGED
|
@@ -30,7 +30,7 @@ import torchaudio
|
|
| 30 |
import torchaudio.transforms as T
|
| 31 |
from transformers import WhisperProcessor, WhisperForConditionalGeneration
|
| 32 |
from pyannote.audio import Pipeline as PyannotePipeline
|
| 33 |
-
from speechbrain.
|
| 34 |
from pydub import AudioSegment
|
| 35 |
from sklearn.cluster import KMeans
|
| 36 |
from sklearn.metrics import silhouette_score
|
|
|
|
| 30 |
import torchaudio.transforms as T
|
| 31 |
from transformers import WhisperProcessor, WhisperForConditionalGeneration
|
| 32 |
from pyannote.audio import Pipeline as PyannotePipeline
|
| 33 |
+
from speechbrain.inference.speaker import SpeakerRecognition
|
| 34 |
from pydub import AudioSegment
|
| 35 |
from sklearn.cluster import KMeans
|
| 36 |
from sklearn.metrics import silhouette_score
|