Aduc-sdr-2_5 / info.sh
carlex3321's picture
Upload 2 files
cc1f91b verified
raw
history blame
4.64 kB
#!/usr/bin/env bash
set -euo pipefail
echo "================= RUNTIME CAPABILITIES ================="
date
if command -v nvidia-smi >/dev/null 2>&1; then
nvidia-smi
else
echo "nvidia-smi: not available"
fi
echo
echo "CUDA_HOME: ${CUDA_HOME:-/usr/local/cuda}"
if command -v nvcc >/dev/null 2>&1; then
nvcc --version || true
else
echo "nvcc: not available"
fi
echo
echo "[PyTorch / CUDA backend]"
python - <<'PY'
import json, os, torch, inspect
def to_bool(x):
try:
if callable(x):
try:
sig = inspect.signature(x)
if len(sig.parameters)==0:
return bool(x())
except Exception:
pass
return True
return bool(x)
except Exception:
return None
info = {
"torch": getattr(torch, "__version__", None),
"cuda_available": torch.cuda.is_available(),
"cuda_device_count": torch.cuda.device_count(),
"cuda_runtime_version": getattr(torch.version, "cuda", None),
"cudnn_version": torch.backends.cudnn.version() if torch.backends.cudnn.is_available() else None,
"tf32": (torch.backends.cuda.matmul.allow_tf32 if torch.cuda.is_available() else None),
"flash_sdp": (to_bool(getattr(torch.backends.cuda, "enable_flash_sdp", None)) if torch.cuda.is_available() else None),
"mem_efficient_sdp": (to_bool(getattr(torch.backends.cuda, "enable_mem_efficient_sdp", None)) if torch.cuda.is_available() else None),
"math_sdp": (to_bool(getattr(torch.backends.cuda, "enable_math_sdp", None)) if torch.cuda.is_available() else None),
}
print(json.dumps(info, indent=2))
for i in range(min(torch.cuda.device_count(), 8)):
print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
PY
echo
echo "[Apex]"
python - <<'PY'
try:
from apex.normalization import FusedLayerNorm, FusedRMSNorm
import importlib; importlib.import_module("fused_layer_norm_cuda")
print("apex.normalization: OK")
except Exception as e:
print("apex.normalization: FAIL ->", e)
PY
echo
echo "[FlashAttention]"
python - <<'PY'
import importlib
for m in ("flash_attn","flash_attn_2_cuda"):
try:
importlib.import_module(m); print(f"{m}: OK")
except Exception as e:
print(f"{m}: FAIL -> {e}")
PY
echo
echo "[FlashAttention LN test]"
python - <<'PY'
import os, warnings, importlib
warnings.filterwarnings("ignore", category=FutureWarning)
def ok_import(names):
for n in names:
try:
importlib.import_module(n)
print(f" [+] import '{n}' OK")
return True
except Exception as e:
print(f" [-] import '{n}' fail: {e}")
return False
fa_ver = None
try:
import flash_attn
fa_ver = getattr(flash_attn, "__version__", None)
except Exception:
pass
try:
import torch
tv = torch.__version__
cu = getattr(torch.version, "cuda", None)
except Exception:
tv, cu = "unknown", "unknown"
print(f" flash_attn version: {fa_ver}")
print(f" torch: {tv} | cuda: {cu} | TORCH_CUDA_ARCH_LIST={os.getenv('TORCH_CUDA_ARCH_LIST')}")
names_to_try = [
"flash_attn_2_cuda",
"flash_attn.ops.layer_norm",
"flash_attn.layers.layer_norm",
]
ok = ok_import(names_to_try)
if not ok:
print(" Hint: faltam kernels LN/RMSNorm do FlashAttention (performance reduzida).")
print(" Use builder.sh para compilar flash_attn e reutilizar a wheel.")
PY
echo
echo "[Triton]"
python - <<'PY'
try:
import triton
print("triton:", triton.__version__)
try:
import triton.ops as _; print("triton.ops: OK")
except Exception:
print("triton.ops: not present (ok on Triton>=3.x)")
except Exception as e:
print("triton: FAIL ->", e)
PY
echo
echo "[BitsAndBytes (Q8/Q4)]"
python - <<'PY'
try:
import bitsandbytes as bnb
print("bitsandbytes:", bnb.__version__)
try:
from bitsandbytes.triton import _custom_ops as _; print("bnb.triton.int8_matmul_mixed_dequantize: OK")
except Exception as e:
print("bnb.triton: partial ->", e)
except Exception as e:
print("bitsandbytes: FAIL ->", e)
PY
echo
echo "[Transformers / Diffusers / XFormers]"
python - <<'PY'
def _v(m):
try:
mod = __import__(m)
print(f"{m}:", getattr(mod, "__version__", "unknown"))
except Exception as e:
print(f"{m}: FAIL -> {e}")
for m in ("transformers","diffusers","xformers"):
_v(m)
PY
echo
echo "[Distribuído / NCCL Env]"
env | grep -E '^(CUDA_VISIBLE_DEVICES|NCCL_|TORCH_|ENABLE_.*SDP|HF_HUB_.*|CUDA_|NV_.*NCCL.*|PYTORCH_CUDA_ALLOC_CONF)=' | sort
echo
echo "[Caminhos e permissões de saída]"
OUT="/app/outputs"
echo "OUT dir: $OUT"
mkdir -p "$OUT"
ls -la "$OUT" || true
echo "================= END CAPABILITIES ================="