Spaces:
Sleeping
Sleeping
| # models/asr_whisper.py | |
| from __future__ import annotations | |
| import os | |
| from typing import Optional, Dict, Any | |
| from faster_whisper import WhisperModel | |
| from utils.config import get_settings | |
| _asr_singleton: Optional["WhisperASR"] = None | |
| def _norm_device(req: str) -> str: | |
| """faster-whisper supports only 'cpu' or 'cuda'.""" | |
| r = (req or "cpu").strip().lower() | |
| if r == "mps": | |
| print("[ASR] 'mps' is not supported by faster-whisper; falling back to CPU.") | |
| return "cpu" | |
| return r if r in ("cpu", "cuda") else "cpu" | |
| def _compute_type_for(device: str) -> str: | |
| # Keep it simple and stable for HF/macOS: | |
| # - CPU: int8 (fast, small) | |
| # - CUDA: float16 (good default on GPUs) | |
| return "float16" if device == "cuda" else "int8" | |
| class WhisperASR: | |
| def __init__(self): | |
| s = get_settings() | |
| self.model_size = os.getenv("WHISPER_SIZE", "tiny").strip() # tiny|base|small|medium|large-v3 ... | |
| self.language = os.getenv("WHISPER_LANG", "").strip() or None # e.g., "en"; None = auto | |
| self.device = _norm_device(getattr(s, "ASR_DEVICE", "cpu")) | |
| self.compute_type = _compute_type_for(self.device) | |
| print(f"[ASR] Loading faster-whisper: size={self.model_size} device={self.device} compute_type={self.compute_type}") | |
| self.model = WhisperModel(self.model_size, device=self.device, compute_type=self.compute_type) | |
| def transcribe(self, path: str) -> Dict[str, Any]: | |
| """ | |
| Returns: {"text": str, "language": str|None, "segments": [...]} | |
| """ | |
| # language=None lets faster-whisper auto-detect. You can force via WHISPER_LANG. | |
| segments, info = self.model.transcribe( | |
| path, | |
| beam_size=1, | |
| language=self.language or None, | |
| ) | |
| text = " ".join((seg.text or "").strip() for seg in segments).strip() | |
| return { | |
| "text": text or "", | |
| "language": getattr(info, "language", None), | |
| # You can expose timings later if you want: | |
| "segments": [] # keep lightweight for UI | |
| } | |
| def get_asr() -> WhisperASR: | |
| global _asr_singleton | |
| if _asr_singleton is None: | |
| _asr_singleton = WhisperASR() | |
| return _asr_singleton |