Spaces:
Paused
Paused
| set -euo pipefail | |
| echo "================= RUNTIME CAPABILITIES =================" | |
| date | |
| if command -v nvidia-smi >/dev/null 2>&1; then | |
| nvidia-smi | |
| else | |
| echo "nvidia-smi: not available" | |
| fi | |
| echo | |
| echo "CUDA_HOME: ${CUDA_HOME:-/usr/local/cuda}" | |
| if command -v nvcc >/dev/null 2>&1; then | |
| nvcc --version || true | |
| else | |
| echo "nvcc: not available" | |
| fi | |
| echo | |
| echo "[PyTorch / CUDA backend]" | |
| python - <<'PY' | |
| import json, os, torch, inspect | |
| def to_bool(x): | |
| try: | |
| if callable(x): | |
| try: | |
| sig = inspect.signature(x) | |
| if len(sig.parameters)==0: | |
| return bool(x()) | |
| except Exception: | |
| pass | |
| return True | |
| return bool(x) | |
| except Exception: | |
| return None | |
| info = { | |
| "torch": getattr(torch, "__version__", None), | |
| "cuda_available": torch.cuda.is_available(), | |
| "cuda_device_count": torch.cuda.device_count(), | |
| "cuda_runtime_version": getattr(torch.version, "cuda", None), | |
| "cudnn_version": torch.backends.cudnn.version() if torch.backends.cudnn.is_available() else None, | |
| "tf32": (torch.backends.cuda.matmul.allow_tf32 if torch.cuda.is_available() else None), | |
| "flash_sdp": (to_bool(getattr(torch.backends.cuda, "enable_flash_sdp", None)) if torch.cuda.is_available() else None), | |
| "mem_efficient_sdp": (to_bool(getattr(torch.backends.cuda, "enable_mem_efficient_sdp", None)) if torch.cuda.is_available() else None), | |
| "math_sdp": (to_bool(getattr(torch.backends.cuda, "enable_math_sdp", None)) if torch.cuda.is_available() else None), | |
| } | |
| print(json.dumps(info, indent=2)) | |
| for i in range(min(torch.cuda.device_count(), 8)): | |
| print(f"GPU {i}: {torch.cuda.get_device_name(i)}") | |
| PY | |
| echo | |
| echo "[Apex]" | |
| python - <<'PY' | |
| try: | |
| from apex.normalization import FusedLayerNorm, FusedRMSNorm | |
| import importlib; importlib.import_module("fused_layer_norm_cuda") | |
| print("apex.normalization: OK") | |
| except Exception as e: | |
| print("apex.normalization: FAIL ->", e) | |
| PY | |
| echo | |
| echo "[FlashAttention]" | |
| python - <<'PY' | |
| import importlib | |
| for m in ("flash_attn","flash_attn_2_cuda"): | |
| try: | |
| importlib.import_module(m); print(f"{m}: OK") | |
| except Exception as e: | |
| print(f"{m}: FAIL -> {e}") | |
| PY | |
| echo | |
| echo "[FlashAttention LN test]" | |
| python - <<'PY' | |
| import os, warnings, importlib | |
| warnings.filterwarnings("ignore", category=FutureWarning) | |
| def ok_import(names): | |
| for n in names: | |
| try: | |
| importlib.import_module(n) | |
| print(f" [+] import '{n}' OK") | |
| return True | |
| except Exception as e: | |
| print(f" [-] import '{n}' fail: {e}") | |
| return False | |
| fa_ver = None | |
| try: | |
| import flash_attn | |
| fa_ver = getattr(flash_attn, "__version__", None) | |
| except Exception: | |
| pass | |
| try: | |
| import torch | |
| tv = torch.__version__ | |
| cu = getattr(torch.version, "cuda", None) | |
| except Exception: | |
| tv, cu = "unknown", "unknown" | |
| print(f" flash_attn version: {fa_ver}") | |
| print(f" torch: {tv} | cuda: {cu} | TORCH_CUDA_ARCH_LIST={os.getenv('TORCH_CUDA_ARCH_LIST')}") | |
| names_to_try = [ | |
| "flash_attn_2_cuda", | |
| "flash_attn.ops.layer_norm", | |
| "flash_attn.layers.layer_norm", | |
| ] | |
| ok = ok_import(names_to_try) | |
| if not ok: | |
| print(" Hint: faltam kernels LN/RMSNorm do FlashAttention (performance reduzida).") | |
| print(" Use builder.sh para compilar flash_attn e reutilizar a wheel.") | |
| PY | |
| echo | |
| echo "[Triton]" | |
| python - <<'PY' | |
| try: | |
| import triton | |
| print("triton:", triton.__version__) | |
| try: | |
| import triton.ops as _; print("triton.ops: OK") | |
| except Exception: | |
| print("triton.ops: not present (ok on Triton>=3.x)") | |
| except Exception as e: | |
| print("triton: FAIL ->", e) | |
| PY | |
| echo | |
| echo "[BitsAndBytes (Q8/Q4)]" | |
| python - <<'PY' | |
| try: | |
| import bitsandbytes as bnb | |
| print("bitsandbytes:", bnb.__version__) | |
| try: | |
| from bitsandbytes.triton import _custom_ops as _; print("bnb.triton.int8_matmul_mixed_dequantize: OK") | |
| except Exception as e: | |
| print("bnb.triton: partial ->", e) | |
| except Exception as e: | |
| print("bitsandbytes: FAIL ->", e) | |
| PY | |
| echo | |
| echo "[Transformers / Diffusers / XFormers]" | |
| python - <<'PY' | |
| def _v(m): | |
| try: | |
| mod = __import__(m) | |
| print(f"{m}:", getattr(mod, "__version__", "unknown")) | |
| except Exception as e: | |
| print(f"{m}: FAIL -> {e}") | |
| for m in ("transformers","diffusers","xformers"): | |
| _v(m) | |
| PY | |
| echo | |
| echo "[Distribuído / NCCL Env]" | |
| env | grep -E '^(CUDA_VISIBLE_DEVICES|NCCL_|TORCH_|ENABLE_.*SDP|HF_HUB_.*|CUDA_|NV_.*NCCL.*|PYTORCH_CUDA_ALLOC_CONF)=' | sort | |
| echo | |
| echo "[Caminhos e permissões de saída]" | |
| OUT="/app/outputs" | |
| echo "OUT dir: $OUT" | |
| mkdir -p "$OUT" | |
| ls -la "$OUT" || true | |
| echo "================= END CAPABILITIES =================" | |