#!/usr/bin/env bash set -euo pipefail echo "================= RUNTIME CAPABILITIES =================" nvidia-smi || true echo "CUDA_HOME: ${CUDA_HOME:-/usr/local/cuda}" echo "NVCC: $(nvcc --version 2>/dev/null | tail -n1 || echo 'N/A')" echo echo "[PyTorch / CUDA backend]" python3 - <<'PY' import json try: import torch info = { "torch": torch.__version__, "cuda_available": torch.cuda.is_available(), "cuda_device_count": torch.cuda.device_count(), "cuda_runtime_version": getattr(torch.version, "cuda", None), "cudnn_version": (torch.backends.cudnn.version() if torch.cuda.is_available() else None), "tf32": (torch.backends.cuda.matmul.allow_tf32 if torch.cuda.is_available() else None), "flash_sdp": (torch.backends.cuda.flash_sdp_enabled() if hasattr(torch.backends.cuda,"flash_sdp_enabled") else None), "mem_efficient_sdp": (torch.backends.cuda.mem_efficient_sdp_enabled() if hasattr(torch.backends.cuda,"mem_efficient_sdp_enabled") else None), "math_sdp": (torch.backends.cuda.math_sdp_enabled() if hasattr(torch.backends.cuda,"math_sdp_enabled") else None), } print(json.dumps(info, indent=2)) if torch.cuda.is_available(): for i in range(torch.cuda.device_count()): print(f"GPU {i}: {torch.cuda.get_device_name(i)}") except Exception as e: print(f"[ERR torch] {type(e).__name__}: {e}") PY echo echo "[Apex]" python3 - <<'PY' try: import importlib importlib.import_module("apex.normalization") print("apex.normalization: OK") except Exception as e: print(f"Apex: ERR {type(e).__name__}: {e}") PY echo echo "[FlashAttention]" python3 - <<'PY' try: import flash_attn print(f"flash_attn: OK (version={getattr(flash_attn,'__version__', 'unknown')})") try: import flash_attn_2_cuda print("flash_attn_2_cuda: OK") except Exception as e: print(f"flash_attn_2_cuda: ERR {type(e).__name__}: {e}") except Exception as e: print(f"flash_attn: ERR {type(e).__name__}: {e}") PY echo echo "[Triton]" python3 - <<'PY' try: import triton print(f"triton: OK (version={getattr(triton,'__version__','unknown')})") try: import triton.ops print("triton.ops: legacy module present") except ModuleNotFoundError: print("triton.ops: not present (ok on Triton>=3.x)") except Exception as e: print(f"triton.ops: WARN {type(e).__name__}: {e}") except Exception as e: print(f"triton: ERR {type(e).__name__}: {e}") PY echo echo "[BitsAndBytes (Q8/Q4)]" python3 - <<'PY' try: import bitsandbytes as bnb v = getattr(bnb, "__version__", "unknown") print(f"bitsandbytes: OK (version={v})") try: import bitsandbytes.triton.int8_matmul_mixed_dequantize as q8 print("bnb.triton.int8_matmul_mixed_dequantize: OK") except ModuleNotFoundError: print("bnb.q8.triton: not present (disabled or no GPU build)") except Exception as e: print(f"bnb.q8.triton: WARN {type(e).__name__}: {e}") except Exception as e: print(f"bitsandbytes: ERR {type(e).__name__}: {e}") PY echo echo "[Transformers / Diffusers / XFormers]" python3 - <<'PY' import importlib def ver(name): try: m = importlib.import_module(name) return getattr(m, "__version__", "unknown") except Exception as e: return f"ERR:{type(e).__name__}" print("transformers:", ver("transformers")) print("diffusers:", ver("diffusers")) print("xformers:", ver("xformers")) PY echo echo "[Distribuído / NCCL Env]" env | egrep 'MASTER_|NCCL|CUDA_VISIBLE_DEVICES|TORCH_|ENABLE_' | sort echo "================= END CAPABILITIES ================="