Aduc-sdr-2_5

Paused

App Files Files Community

carlex3321 commited on Sep 29

Commit

cc1f91b

verified ·

1 Parent(s): 3629c3e

Upload 2 files

Browse files

Files changed (2) hide show

builder.sh +74 -47
info.sh +154 -110

builder.sh CHANGED Viewed

@@ -1,9 +1,9 @@
 #!/usr/bin/env bash
 set -euo pipefail
-echo "🚀 Builder (Apex + Q8) — runtime, GPU visível, cache persistente"
-# ===== Persistência e caches =====
 if [ -d /data ]; then
   export HF_HOME="${HF_HOME:-/data/.cache/huggingface}"
   export TORCH_HOME="${TORCH_HOME:-/data/.cache/torch}"
@@ -15,59 +15,56 @@ export HF_HUB_CACHE="${HF_HUB_CACHE:-$HF_HOME/hub}"
 mkdir -p "$HF_HOME" "$HF_HUB_CACHE" "$TORCH_HOME"
 mkdir -p /app/.cache && ln -sf "$HF_HOME" /app/.cache/huggingface
-# ===== Repositório de wheels no Hub =====
 export SELF_HF_REPO_ID="${SELF_HF_REPO_ID:-carlex3321/aduc-sdr}"
-# ===== Aceleração de transferência =====
 export HF_HUB_ENABLE_HF_TRANSFER="${HF_HUB_ENABLE_HF_TRANSFER:-1}"
 export HF_HUB_DOWNLOAD_TIMEOUT="${HF_HUB_DOWNLOAD_TIMEOUT:-60}"
-# ===== Diretórios de trabalho =====
 mkdir -p /app/wheels /app/cuda_cache /app/wheels/src
 chmod -R 777 /app/wheels || true
 export CUDA_CACHE_PATH="/app/cuda_cache"
-# Licença NGC se presente
 [ -f "/NGC-DL-CONTAINER-LICENSE" ] && cp -f /NGC-DL-CONTAINER-LICENSE /app/wheels/NGC-DL-CONTAINER-LICENSE || true
-# ===== Dependências mínimas de build =====
 python -m pip install -U pip build setuptools wheel hatchling hatch-vcs scikit-build-core cmake ninja packaging "huggingface_hub[hf_transfer]" || true
-# ===== Tags =====
 PY_TAG="$(python -c 'import sys; print(f"cp{sys.version_info[0]}{sys.version_info[1]}")' 2>/dev/null || echo cp310)"
 TORCH_VER="$(python - <<'PY'
 try:
-  import torch, re
-  v = torch.__version__
-  print(re.sub(r'\+.*$', '', v))
 except Exception:
-  print("unknown")
 PY
 )"
 CU_TAG="$(python - <<'PY'
 try:
-  import torch
-  cu = getattr(torch.version, "cuda", None)
-  print("cu"+cu.replace(".","")) if cu else print("")
 except Exception:
-  print("")
 PY
 )"
 echo "[env] PY_TAG=${PY_TAG} TORCH_VER=${TORCH_VER} CU_TAG=${CU_TAG}"
-# ===== Checkers =====
 check_apex() {
 python - <<'PY'
 try:
-  from apex.normalization import FusedLayerNorm, FusedRMSNorm
-  import importlib; importlib.import_module("fused_layer_norm_cuda")
-  ok = True
 except Exception:
-  ok = False
 raise SystemExit(0 if ok else 1)
 PY
 }
 check_q8() {
 python - <<'PY'
 import importlib.util
@@ -75,8 +72,23 @@ spec = importlib.util.find_spec("ltx_q8_kernels") or importlib.util.find_spec("q
 raise SystemExit(0 if spec else 1)
 PY
 }
-# ===== Download de wheels do Hub =====
 install_from_hf () {
   local PKG="$1"
   python - "$PKG" "$PY_TAG" "$CU_TAG" <<'PY' || exit 0
@@ -86,9 +98,9 @@ pkg, py_tag, cu_tag = sys.argv[1], sys.argv[2], sys.argv[3]
 repo = os.environ.get("SELF_HF_REPO_ID","carlex3321/aduc-sdr")
 api = HfApi(token=os.getenv("HF_TOKEN") or HfFolder.get_token())
 try:
-  files = api.list_repo_files(repo_id=repo, repo_type="model")
 except Exception:
-  raise SystemExit(0)
 cands = [f for f in files if f.endswith(".whl") and f.rsplit("/",1)[-1].startswith(pkg+"-") and py_tag in f]
 pref = [f for f in cands if cu_tag and cu_tag in f] or cands
 if not pref: raise SystemExit(0)
@@ -99,7 +111,7 @@ print(path)
 PY
 }
-# ===== Builders =====
 build_apex () {
   local SRC="/app/wheels/src/apex"
   if [ -d "$SRC/.git" ]; then
@@ -114,12 +126,11 @@ build_apex () {
   python -m pip wheel --no-build-isolation --no-deps "$SRC" -w /app/wheels || true
   local W="$(ls -t /app/wheels/apex-*.whl 2>/dev/null | head -n1 || true)"
   if [ -n "${W}" ]; then
-    python -m pip install  -U --no-deps "${W}" || true
   else
     python -m pip install --no-build-isolation "$SRC" || true
   fi
 }
 Q8_REPO="${Q8_REPO:-https://github.com/Lightricks/LTX-Video-Q8-Kernels.git}"
 Q8_COMMIT="${Q8_COMMIT:-f3066edea210082799ca5a2bbf9ef0321c5dd8fc}"
 build_q8 () {
@@ -131,19 +142,34 @@ build_q8 () {
   python -m pip wheel --no-build-isolation "$SRC" -w /app/wheels || true
   local W="$(ls -t /app/wheels/q8_kernels-*.whl 2>/dev/null | head -n1 || true)"
   if [ -n "${W}" ]; then
-    python -m pip install  -U --no-deps "${W}" || true
   else
-    python -m pip install  --no-build-isolation "$SRC" || true
   fi
 }
 ensure_pkg () {
-  local PKG="$1"       # apex | q8_kernels
-  local CHECK_FN="$2"  # check_apex | check_q8
-  local BUILD_FN="$3"  # build_apex | build_q8
   if ${CHECK_FN}; then
-    echo "[flow] ${PKG}: já instalado"
-    return 0
   fi
   echo "[flow] ${PKG}: tentando wheel do Hub (${SELF_HF_REPO_ID})"
   HF_OUT="$(install_from_hf "$PKG" || true)"
@@ -151,8 +177,7 @@ ensure_pkg () {
     WHEEL_PATH="$(printf "%s\n" "${HF_OUT}" | tail -n1)"
     python -m pip install -U --no-build-isolation "${WHEEL_PATH}" || true
     if ${CHECK_FN}; then
-      echo "[flow] ${PKG}: sucesso via Hub (${WHEEL_PATH})"
-      return 0
     fi
   fi
   echo "[flow] ${PKG}: compilando (fallback)"
@@ -160,24 +185,26 @@ ensure_pkg () {
   ${CHECK_FN} || echo "[flow] ${PKG}: falhou após build; seguindo"
 }
-ensure_pkg "apex" check_apex build_apex || true
-ensure_pkg "q8_kernels" check_q8 build_q8 || true
-# Upload opcional de wheels geradas
 python - <<'PY'
 import os
 from huggingface_hub import HfApi, HfFolder
 repo=os.environ.get("SELF_HF_REPO_ID","carlex3321/aduc-sdr")
 token=os.getenv("HF_TOKEN") or HfFolder.get_token()
 if not token:
-  raise SystemExit(0)
 api=HfApi(token=token)
 api.upload_folder(
-  folder_path="/app/wheels",
-  repo_id=repo,
-  repo_type="model",
-  allow_patterns=["*.whl","NGC-DL-CONTAINER-LICENSE"],
-  ignore_patterns=["**/src/**","**/*.log","**/logs/**",".git/**"],
 )
 print("Upload de wheels concluído.")
 PY

 #!/usr/bin/env bash
 set -euo pipefail
+echo "🚀 Builder (Apex + Q8 + FlashAttention) — runtime, GPU visível, cache persistente"
+# Persistência e caches
 if [ -d /data ]; then
   export HF_HOME="${HF_HOME:-/data/.cache/huggingface}"
   export TORCH_HOME="${TORCH_HOME:-/data/.cache/torch}"
 mkdir -p "$HF_HOME" "$HF_HUB_CACHE" "$TORCH_HOME"
 mkdir -p /app/.cache && ln -sf "$HF_HOME" /app/.cache/huggingface
+# Repo de wheels
 export SELF_HF_REPO_ID="${SELF_HF_REPO_ID:-carlex3321/aduc-sdr}"
+# Transfer accel
 export HF_HUB_ENABLE_HF_TRANSFER="${HF_HUB_ENABLE_HF_TRANSFER:-1}"
 export HF_HUB_DOWNLOAD_TIMEOUT="${HF_HUB_DOWNLOAD_TIMEOUT:-60}"
+# Work dirs
 mkdir -p /app/wheels /app/cuda_cache /app/wheels/src
 chmod -R 777 /app/wheels || true
 export CUDA_CACHE_PATH="/app/cuda_cache"
 [ -f "/NGC-DL-CONTAINER-LICENSE" ] && cp -f /NGC-DL-CONTAINER-LICENSE /app/wheels/NGC-DL-CONTAINER-LICENSE || true
+# Build deps
 python -m pip install -U pip build setuptools wheel hatchling hatch-vcs scikit-build-core cmake ninja packaging "huggingface_hub[hf_transfer]" || true
+# Tags do ambiente
 PY_TAG="$(python -c 'import sys; print(f"cp{sys.version_info[0]}{sys.version_info[1]}")' 2>/dev/null || echo cp310)"
 TORCH_VER="$(python - <<'PY'
 try:
+    import torch, re
+    v = torch.__version__
+    print(re.sub(r'\+.*$', '', v))
 except Exception:
+    print("unknown")
 PY
 )"
 CU_TAG="$(python - <<'PY'
 try:
+    import torch
+    cu = getattr(torch.version, "cuda", None)
+    print("cu"+cu.replace(".","")) if cu else print("")
 except Exception:
+    print("")
 PY
 )"
 echo "[env] PY_TAG=${PY_TAG} TORCH_VER=${TORCH_VER} CU_TAG=${CU_TAG}"
+# Checkers
 check_apex() {
 python - <<'PY'
 try:
+    from apex.normalization import FusedLayerNorm, FusedRMSNorm
+    import importlib; importlib.import_module("fused_layer_norm_cuda")
+    ok = True
 except Exception:
+    ok = False
 raise SystemExit(0 if ok else 1)
 PY
 }
 check_q8() {
 python - <<'PY'
 import importlib.util
 raise SystemExit(0 if spec else 1)
 PY
 }
+check_flash() {
+python - <<'PY'
+ok = False
+try:
+    import importlib
+    for name in ("flash_attn_2_cuda","flash_attn.ops.layer_norm","flash_attn.layers.layer_norm","flash_attn"):
+        try:
+            importlib.import_module(name); ok=True; break
+        except Exception:
+            pass
+except Exception:
+    ok = False
+raise SystemExit(0 if ok else 1)
+PY
+}
+# Baixar wheel do Hub
 install_from_hf () {
   local PKG="$1"
   python - "$PKG" "$PY_TAG" "$CU_TAG" <<'PY' || exit 0
 repo = os.environ.get("SELF_HF_REPO_ID","carlex3321/aduc-sdr")
 api = HfApi(token=os.getenv("HF_TOKEN") or HfFolder.get_token())
 try:
+    files = api.list_repo_files(repo_id=repo, repo_type="model")
 except Exception:
+    raise SystemExit(0)
 cands = [f for f in files if f.endswith(".whl") and f.rsplit("/",1)[-1].startswith(pkg+"-") and py_tag in f]
 pref = [f for f in cands if cu_tag and cu_tag in f] or cands
 if not pref: raise SystemExit(0)
 PY
 }
+# Builders
 build_apex () {
   local SRC="/app/wheels/src/apex"
   if [ -d "$SRC/.git" ]; then
   python -m pip wheel --no-build-isolation --no-deps "$SRC" -w /app/wheels || true
   local W="$(ls -t /app/wheels/apex-*.whl 2>/dev/null | head -n1 || true)"
   if [ -n "${W}" ]; then
+    python -m pip install -U --no-deps "${W}" || true
   else
     python -m pip install --no-build-isolation "$SRC" || true
   fi
 }
 Q8_REPO="${Q8_REPO:-https://github.com/Lightricks/LTX-Video-Q8-Kernels.git}"
 Q8_COMMIT="${Q8_COMMIT:-f3066edea210082799ca5a2bbf9ef0321c5dd8fc}"
 build_q8 () {
   python -m pip wheel --no-build-isolation "$SRC" -w /app/wheels || true
   local W="$(ls -t /app/wheels/q8_kernels-*.whl 2>/dev/null | head -n1 || true)"
   if [ -n "${W}" ]; then
+    python -m pip install -U --no-deps "${W}" || true
   else
+    python -m pip install --no-build-isolation "$SRC" || true
+  fi
+}
+FLASH_ATTENTION_TAG="${FLASH_ATTENTION_TAG:-v2.8.3}"
+build_flash () {
+  set -e
+  local SRC="/app/wheels/src/flash-attn"
+  rm -rf "$SRC"
+  git clone --depth 1 --branch "$FLASH_ATTENTION_TAG" https://github.com/Dao-AILab/flash-attention.git "$SRC"
+  export TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-8.9}"
+  export MAX_JOBS="${MAX_JOBS:-$(nproc)}"
+  export CUDA_HOME="${CUDA_HOME:-/usr/local/cuda}"
+  python -m pip wheel --no-build-isolation --no-deps "$SRC" -w /app/wheels || true
+  local W="$(ls -t /app/wheels/flash_attn-*.whl 2>/dev/null | head -n1 || true)"
+  if [ -n "${W}" ]; then
+    python -m pip install -U --no-deps "${W}" || true
+  else
+    python -m pip install --no-build-isolation "$SRC" || true
   fi
 }
+# Orquestrador
 ensure_pkg () {
+  local PKG="$1"; local CHECK_FN="$2"; local BUILD_FN="$3"
   if ${CHECK_FN}; then
+    echo "[flow] ${PKG}: já instalado"; return 0
   fi
   echo "[flow] ${PKG}: tentando wheel do Hub (${SELF_HF_REPO_ID})"
   HF_OUT="$(install_from_hf "$PKG" || true)"
     WHEEL_PATH="$(printf "%s\n" "${HF_OUT}" | tail -n1)"
     python -m pip install -U --no-build-isolation "${WHEEL_PATH}" || true
     if ${CHECK_FN}; then
+      echo "[flow] ${PKG}: sucesso via Hub (${WHEEL_PATH})"; return 0
     fi
   fi
   echo "[flow] ${PKG}: compilando (fallback)"
   ${CHECK_FN} || echo "[flow] ${PKG}: falhou após build; seguindo"
 }
+# Execução
+ensure_pkg "apex"       check_apex   build_apex   || true
+ensure_pkg "q8_kernels" check_q8     build_q8     || true
+ensure_pkg "flash_attn" check_flash  build_flash  || true
+# Upload das wheels
 python - <<'PY'
 import os
 from huggingface_hub import HfApi, HfFolder
 repo=os.environ.get("SELF_HF_REPO_ID","carlex3321/aduc-sdr")
 token=os.getenv("HF_TOKEN") or HfFolder.get_token()
 if not token:
+    raise SystemExit(0)
 api=HfApi(token=token)
 api.upload_folder(
+    folder_path="/app/wheels",
+    repo_id=repo,
+    repo_type="model",
+    allow_patterns=["*.whl","NGC-DL-CONTAINER-LICENSE"],
+    ignore_patterns=["**/src/**","**/*.log","**/logs/**",".git/**"],
 )
 print("Upload de wheels concluído.")
 PY

info.sh CHANGED Viewed

@@ -1,123 +1,167 @@
-#!/usr/bin/env python3
-import os, sys, json, subprocess
-from pathlib import Path
-from typing import List, Optional
-from time import time, sleep
-from huggingface_hub import hf_hub_download
-class VincieService:
-    def __init__(
-        self,
-        repo_dir: str = "/app/VINCIE",
-        ckpt_dir: str = "/app/ckpt/VINCIE-3B",
-        python_bin: str = "python3",
-        repo_id: str = "ByteDance-Seed/VINCIE-3B",
-    ):
-        self.repo_dir = Path(repo_dir)
-        self.ckpt_dir = Path(ckpt_dir)
-        self.python = python_bin
-        self.repo_id = repo_id
-        self.generate_yaml = self.repo_dir / "configs" / "generate.yaml"
-        self.output_root = Path("/app/outputs")
-        self.output_root.mkdir(parents=True, exist_ok=True)
-        (self.repo_dir / "ckpt").mkdir(parents=True, exist_ok=True)
-    def ensure_repo(self, git_url: str = "https://github.com/ByteDance-Seed/VINCIE") -> None:
-        if not self.repo_dir.exists():
-            subprocess.run(["git", "clone", "--depth", "1", git_url, str(self.repo_dir)], check=True)
-    def ensure_model(self, hf_token: Optional[str] = None) -> None:
-        self.ckpt_dir.mkdir(parents=True, exist_ok=True)
-        token = hf_token or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN")
-        def _need(p: Path) -> bool:
             try:
-                return not (p.exists() and p.stat().st_size > 1_000_000)
-            except FileNotFoundError:
-                return True
-        for fname in ["dit.pth", "vae.pth"]:
-            dst = self.ckpt_dir / fname
-            if _need(dst):
-                print(f"[vince] downloading {fname} from {self.repo_id} ...")
-                hf_hub_download(
-                    repo_id=self.repo_id,
-                    filename=fname,
-                    local_dir=str(self.ckpt_dir),
-                    token=token,
-                    force_download=False,
-                    local_files_only=False,
-                )
-        link = self.repo_dir / "ckpt" / "VINCIE-3B"
         try:
-            if link.is_symlink() or link.exists():
-                try:
-                    link.unlink()
-                except IsADirectoryError:
-                    pass
-            if not link.exists():
-                link.symlink_to(self.ckpt_dir, target_is_directory=True)
         except Exception as e:
-            print("[vince] symlink warning:", e)
-    def ready(self) -> bool:
-        have_repo = self.repo_dir.exists() and self.generate_yaml.exists()
-        dit_ok = (self.ckpt_dir / "dit.pth").exists()
-        vae_ok = (self.ckpt_dir / "vae.pth").exists()
-        return bool(have_repo and dit_ok and vae_ok)
-    def _wait_until_outputs(self, out_dir: Path, timeout_s: int = 300) -> None:
-        exts = (".png", ".jpg", ".jpeg", ".gif", ".mp4")
-        deadline = time() + timeout_s
-        while time() < deadline:
-            if any(p.is_file() and p.suffix.lower() in exts for p in out_dir.rglob("*")):
-                print(f"[vince] outputs detected in {out_dir}")
-                return
-            sleep(1)
-        print(f"[vince] warning: no outputs detected in {out_dir} within {timeout_s}s")
-    def _run_vincie(self, overrides: List[str], work_output: Path, wait_outputs: bool = True) -> None:
-        work_output.mkdir(parents=True, exist_ok=True)
-        cmd = [
-            self.python,
-            "main.py",
-            str(self.generate_yaml),
-            *overrides,
-            f"generation.output.dir={str(work_output)}",
-        ]
-        print("[vince] CWD=", self.repo_dir)
-        print("[vince] CMD=", " ".join(cmd))
-        subprocess.run(cmd, cwd=self.repo_dir, check=True, env=os.environ.copy())
-        if wait_outputs:
-            self._wait_until_outputs(work_output, timeout_s=int(os.getenv("VINCIE_WAIT_OUTPUTS_SEC", "300")))
-    def multi_turn_edit(self, input_image: str, turns: List[str], **kwargs) -> Path:
-        out_dir = self.output_root / f"multi_turn_{Path(input_image).stem}"
-        overrides = [
-            f'generation.positive_prompt.image_path="{str(input_image)}"',
-            f"generation.positive_prompt.prompts={json.dumps(turns)}",
-            f"generation.seed={int(kwargs.get('seed', 1))}",
-            f"diffusion.timesteps.sampling.steps={int(kwargs.get('steps', 50))}",
-            f"diffusion.cfg.scale={float(kwargs.get('cfg_scale', 7.5))}",
-            f'generation.negative_prompt="{kwargs.get("negative_prompt","")}"',
-            f"generation.resolution={int(kwargs.get('resolution', 512))}",
-            f"generation.batch_size={int(kwargs.get('batch_size', 1))}",
-        ]
-        self._run_vincie(overrides, out_dir, wait_outputs=True)
-        return out_dir
-    def multi_concept_compose(self, files: List[str], descs: List[str], final_prompt: str, **kwargs) -> Path:
-        out_dir = self.output_root / f"multi_concept_{len(files)}"
-        overrides = [
-            f"generation.concepts.files={json.dumps(files)}",
-            f"generation.concepts.descs={json.dumps(descs)}",
-            f'generation.final_prompt="{final_prompt}"',
-            f"generation.seed={int(kwargs.get('seed', 1))}",
-            f"diffusion.timesteps.sampling.steps={int(kwargs.get('steps', 50))}",
-            f"diffusion.cfg.scale={float(kwargs.get('cfg_scale', 7.5))}",
-            f"generation.resolution={int(kwargs.get('resolution', 512))}",
-            f"generation.batch_size={int(kwargs.get('batch_size', 1))}",
-        ]
-        self._run_vincie(overrides, out_dir, wait_outputs=True)
-        return out_dir

+#!/usr/bin/env bash
+set -euo pipefail
+echo "================= RUNTIME CAPABILITIES ================="
+date
+if command -v nvidia-smi >/dev/null 2>&1; then
+  nvidia-smi
+else
+  echo "nvidia-smi: not available"
+fi
+echo
+echo "CUDA_HOME: ${CUDA_HOME:-/usr/local/cuda}"
+if command -v nvcc >/dev/null 2>&1; then
+  nvcc --version || true
+else
+  echo "nvcc: not available"
+fi
+echo
+echo "[PyTorch / CUDA backend]"
+python - <<'PY'
+import json, os, torch, inspect
+def to_bool(x):
+    try:
+        if callable(x):
             try:
+                sig = inspect.signature(x)
+                if len(sig.parameters)==0:
+                    return bool(x())
+            except Exception:
+                pass
+            return True
+        return bool(x)
+    except Exception:
+        return None
+info = {
+  "torch": getattr(torch, "__version__", None),
+  "cuda_available": torch.cuda.is_available(),
+  "cuda_device_count": torch.cuda.device_count(),
+  "cuda_runtime_version": getattr(torch.version, "cuda", None),
+  "cudnn_version": torch.backends.cudnn.version() if torch.backends.cudnn.is_available() else None,
+  "tf32": (torch.backends.cuda.matmul.allow_tf32 if torch.cuda.is_available() else None),
+  "flash_sdp": (to_bool(getattr(torch.backends.cuda, "enable_flash_sdp", None)) if torch.cuda.is_available() else None),
+  "mem_efficient_sdp": (to_bool(getattr(torch.backends.cuda, "enable_mem_efficient_sdp", None)) if torch.cuda.is_available() else None),
+  "math_sdp": (to_bool(getattr(torch.backends.cuda, "enable_math_sdp", None)) if torch.cuda.is_available() else None),
+}
+print(json.dumps(info, indent=2))
+for i in range(min(torch.cuda.device_count(), 8)):
+  print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
+PY
+echo
+echo "[Apex]"
+python - <<'PY'
+try:
+  from apex.normalization import FusedLayerNorm, FusedRMSNorm
+  import importlib; importlib.import_module("fused_layer_norm_cuda")
+  print("apex.normalization: OK")
+except Exception as e:
+  print("apex.normalization: FAIL ->", e)
+PY
+echo
+echo "[FlashAttention]"
+python - <<'PY'
+import importlib
+for m in ("flash_attn","flash_attn_2_cuda"):
+  try:
+    importlib.import_module(m); print(f"{m}: OK")
+  except Exception as e:
+    print(f"{m}: FAIL -> {e}")
+PY
+echo
+echo "[FlashAttention LN test]"
+python - <<'PY'
+import os, warnings, importlib
+warnings.filterwarnings("ignore", category=FutureWarning)
+def ok_import(names):
+    for n in names:
         try:
+            importlib.import_module(n)
+            print(f"  [+] import '{n}' OK")
+            return True
         except Exception as e:
+            print(f"  [-] import '{n}' fail: {e}")
+    return False
+fa_ver = None
+try:
+    import flash_attn
+    fa_ver = getattr(flash_attn, "__version__", None)
+except Exception:
+    pass
+try:
+    import torch
+    tv = torch.__version__
+    cu = getattr(torch.version, "cuda", None)
+except Exception:
+    tv, cu = "unknown", "unknown"
+print(f"  flash_attn version: {fa_ver}")
+print(f"  torch: {tv} | cuda: {cu} | TORCH_CUDA_ARCH_LIST={os.getenv('TORCH_CUDA_ARCH_LIST')}")
+names_to_try = [
+    "flash_attn_2_cuda",
+    "flash_attn.ops.layer_norm",
+    "flash_attn.layers.layer_norm",
+]
+ok = ok_import(names_to_try)
+if not ok:
+    print("  Hint: faltam kernels LN/RMSNorm do FlashAttention (performance reduzida).")
+    print("  Use builder.sh para compilar flash_attn e reutilizar a wheel.")
+PY
+echo
+echo "[Triton]"
+python - <<'PY'
+try:
+  import triton
+  print("triton:", triton.__version__)
+  try:
+    import triton.ops as _; print("triton.ops: OK")
+  except Exception:
+    print("triton.ops: not present (ok on Triton>=3.x)")
+except Exception as e:
+  print("triton: FAIL ->", e)
+PY
+echo
+echo "[BitsAndBytes (Q8/Q4)]"
+python - <<'PY'
+try:
+  import bitsandbytes as bnb
+  print("bitsandbytes:", bnb.__version__)
+  try:
+    from bitsandbytes.triton import _custom_ops as _; print("bnb.triton.int8_matmul_mixed_dequantize: OK")
+  except Exception as e:
+    print("bnb.triton: partial ->", e)
+except Exception as e:
+  print("bitsandbytes: FAIL ->", e)
+PY
+echo
+echo "[Transformers / Diffusers / XFormers]"
+python - <<'PY'
+def _v(m):
+  try:
+    mod = __import__(m)
+    print(f"{m}:", getattr(mod, "__version__", "unknown"))
+  except Exception as e:
+    print(f"{m}: FAIL -> {e}")
+for m in ("transformers","diffusers","xformers"):
+  _v(m)
+PY
+echo
+echo "[Distribuído / NCCL Env]"
+env | grep -E '^(CUDA_VISIBLE_DEVICES|NCCL_|TORCH_|ENABLE_.*SDP|HF_HUB_.*|CUDA_|NV_.*NCCL.*|PYTORCH_CUDA_ALLOC_CONF)=' | sort
+echo
+echo "[Caminhos e permissões de saída]"
+OUT="/app/outputs"
+echo "OUT dir: $OUT"
+mkdir -p "$OUT"
+ls -la "$OUT" || true
+echo "================= END CAPABILITIES ================="