carlex3321 commited on
Commit
cc1f91b
·
verified ·
1 Parent(s): 3629c3e

Upload 2 files

Browse files
Files changed (2) hide show
  1. builder.sh +74 -47
  2. info.sh +154 -110
builder.sh CHANGED
@@ -1,9 +1,9 @@
1
  #!/usr/bin/env bash
2
  set -euo pipefail
3
 
4
- echo "🚀 Builder (Apex + Q8) — runtime, GPU visível, cache persistente"
5
 
6
- # ===== Persistência e caches =====
7
  if [ -d /data ]; then
8
  export HF_HOME="${HF_HOME:-/data/.cache/huggingface}"
9
  export TORCH_HOME="${TORCH_HOME:-/data/.cache/torch}"
@@ -15,59 +15,56 @@ export HF_HUB_CACHE="${HF_HUB_CACHE:-$HF_HOME/hub}"
15
  mkdir -p "$HF_HOME" "$HF_HUB_CACHE" "$TORCH_HOME"
16
  mkdir -p /app/.cache && ln -sf "$HF_HOME" /app/.cache/huggingface
17
 
18
- # ===== Repositório de wheels no Hub =====
19
  export SELF_HF_REPO_ID="${SELF_HF_REPO_ID:-carlex3321/aduc-sdr}"
20
 
21
- # ===== Aceleração de transferência =====
22
  export HF_HUB_ENABLE_HF_TRANSFER="${HF_HUB_ENABLE_HF_TRANSFER:-1}"
23
  export HF_HUB_DOWNLOAD_TIMEOUT="${HF_HUB_DOWNLOAD_TIMEOUT:-60}"
24
 
25
- # ===== Diretórios de trabalho =====
26
  mkdir -p /app/wheels /app/cuda_cache /app/wheels/src
27
  chmod -R 777 /app/wheels || true
28
  export CUDA_CACHE_PATH="/app/cuda_cache"
29
-
30
- # Licença NGC se presente
31
  [ -f "/NGC-DL-CONTAINER-LICENSE" ] && cp -f /NGC-DL-CONTAINER-LICENSE /app/wheels/NGC-DL-CONTAINER-LICENSE || true
32
 
33
- # ===== Dependências mínimas de build =====
34
  python -m pip install -U pip build setuptools wheel hatchling hatch-vcs scikit-build-core cmake ninja packaging "huggingface_hub[hf_transfer]" || true
35
 
36
- # ===== Tags =====
37
  PY_TAG="$(python -c 'import sys; print(f"cp{sys.version_info[0]}{sys.version_info[1]}")' 2>/dev/null || echo cp310)"
38
  TORCH_VER="$(python - <<'PY'
39
  try:
40
- import torch, re
41
- v = torch.__version__
42
- print(re.sub(r'\+.*$', '', v))
43
  except Exception:
44
- print("unknown")
45
  PY
46
  )"
47
  CU_TAG="$(python - <<'PY'
48
  try:
49
- import torch
50
- cu = getattr(torch.version, "cuda", None)
51
- print("cu"+cu.replace(".","")) if cu else print("")
52
  except Exception:
53
- print("")
54
  PY
55
  )"
56
  echo "[env] PY_TAG=${PY_TAG} TORCH_VER=${TORCH_VER} CU_TAG=${CU_TAG}"
57
 
58
- # ===== Checkers =====
59
  check_apex() {
60
  python - <<'PY'
61
  try:
62
- from apex.normalization import FusedLayerNorm, FusedRMSNorm
63
- import importlib; importlib.import_module("fused_layer_norm_cuda")
64
- ok = True
65
  except Exception:
66
- ok = False
67
  raise SystemExit(0 if ok else 1)
68
  PY
69
  }
70
-
71
  check_q8() {
72
  python - <<'PY'
73
  import importlib.util
@@ -75,8 +72,23 @@ spec = importlib.util.find_spec("ltx_q8_kernels") or importlib.util.find_spec("q
75
  raise SystemExit(0 if spec else 1)
76
  PY
77
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
- # ===== Download de wheels do Hub =====
80
  install_from_hf () {
81
  local PKG="$1"
82
  python - "$PKG" "$PY_TAG" "$CU_TAG" <<'PY' || exit 0
@@ -86,9 +98,9 @@ pkg, py_tag, cu_tag = sys.argv[1], sys.argv[2], sys.argv[3]
86
  repo = os.environ.get("SELF_HF_REPO_ID","carlex3321/aduc-sdr")
87
  api = HfApi(token=os.getenv("HF_TOKEN") or HfFolder.get_token())
88
  try:
89
- files = api.list_repo_files(repo_id=repo, repo_type="model")
90
  except Exception:
91
- raise SystemExit(0)
92
  cands = [f for f in files if f.endswith(".whl") and f.rsplit("/",1)[-1].startswith(pkg+"-") and py_tag in f]
93
  pref = [f for f in cands if cu_tag and cu_tag in f] or cands
94
  if not pref: raise SystemExit(0)
@@ -99,7 +111,7 @@ print(path)
99
  PY
100
  }
101
 
102
- # ===== Builders =====
103
  build_apex () {
104
  local SRC="/app/wheels/src/apex"
105
  if [ -d "$SRC/.git" ]; then
@@ -114,12 +126,11 @@ build_apex () {
114
  python -m pip wheel --no-build-isolation --no-deps "$SRC" -w /app/wheels || true
115
  local W="$(ls -t /app/wheels/apex-*.whl 2>/dev/null | head -n1 || true)"
116
  if [ -n "${W}" ]; then
117
- python -m pip install -U --no-deps "${W}" || true
118
  else
119
  python -m pip install --no-build-isolation "$SRC" || true
120
  fi
121
  }
122
-
123
  Q8_REPO="${Q8_REPO:-https://github.com/Lightricks/LTX-Video-Q8-Kernels.git}"
124
  Q8_COMMIT="${Q8_COMMIT:-f3066edea210082799ca5a2bbf9ef0321c5dd8fc}"
125
  build_q8 () {
@@ -131,19 +142,34 @@ build_q8 () {
131
  python -m pip wheel --no-build-isolation "$SRC" -w /app/wheels || true
132
  local W="$(ls -t /app/wheels/q8_kernels-*.whl 2>/dev/null | head -n1 || true)"
133
  if [ -n "${W}" ]; then
134
- python -m pip install -U --no-deps "${W}" || true
135
  else
136
- python -m pip install --no-build-isolation "$SRC" || true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  fi
138
  }
139
 
 
140
  ensure_pkg () {
141
- local PKG="$1" # apex | q8_kernels
142
- local CHECK_FN="$2" # check_apex | check_q8
143
- local BUILD_FN="$3" # build_apex | build_q8
144
  if ${CHECK_FN}; then
145
- echo "[flow] ${PKG}: já instalado"
146
- return 0
147
  fi
148
  echo "[flow] ${PKG}: tentando wheel do Hub (${SELF_HF_REPO_ID})"
149
  HF_OUT="$(install_from_hf "$PKG" || true)"
@@ -151,8 +177,7 @@ ensure_pkg () {
151
  WHEEL_PATH="$(printf "%s\n" "${HF_OUT}" | tail -n1)"
152
  python -m pip install -U --no-build-isolation "${WHEEL_PATH}" || true
153
  if ${CHECK_FN}; then
154
- echo "[flow] ${PKG}: sucesso via Hub (${WHEEL_PATH})"
155
- return 0
156
  fi
157
  fi
158
  echo "[flow] ${PKG}: compilando (fallback)"
@@ -160,24 +185,26 @@ ensure_pkg () {
160
  ${CHECK_FN} || echo "[flow] ${PKG}: falhou após build; seguindo"
161
  }
162
 
163
- ensure_pkg "apex" check_apex build_apex || true
164
- ensure_pkg "q8_kernels" check_q8 build_q8 || true
 
 
165
 
166
- # Upload opcional de wheels geradas
167
  python - <<'PY'
168
  import os
169
  from huggingface_hub import HfApi, HfFolder
170
  repo=os.environ.get("SELF_HF_REPO_ID","carlex3321/aduc-sdr")
171
  token=os.getenv("HF_TOKEN") or HfFolder.get_token()
172
  if not token:
173
- raise SystemExit(0)
174
  api=HfApi(token=token)
175
  api.upload_folder(
176
- folder_path="/app/wheels",
177
- repo_id=repo,
178
- repo_type="model",
179
- allow_patterns=["*.whl","NGC-DL-CONTAINER-LICENSE"],
180
- ignore_patterns=["**/src/**","**/*.log","**/logs/**",".git/**"],
181
  )
182
  print("Upload de wheels concluído.")
183
  PY
 
1
  #!/usr/bin/env bash
2
  set -euo pipefail
3
 
4
+ echo "🚀 Builder (Apex + Q8 + FlashAttention) — runtime, GPU visível, cache persistente"
5
 
6
+ # Persistência e caches
7
  if [ -d /data ]; then
8
  export HF_HOME="${HF_HOME:-/data/.cache/huggingface}"
9
  export TORCH_HOME="${TORCH_HOME:-/data/.cache/torch}"
 
15
  mkdir -p "$HF_HOME" "$HF_HUB_CACHE" "$TORCH_HOME"
16
  mkdir -p /app/.cache && ln -sf "$HF_HOME" /app/.cache/huggingface
17
 
18
+ # Repo de wheels
19
  export SELF_HF_REPO_ID="${SELF_HF_REPO_ID:-carlex3321/aduc-sdr}"
20
 
21
+ # Transfer accel
22
  export HF_HUB_ENABLE_HF_TRANSFER="${HF_HUB_ENABLE_HF_TRANSFER:-1}"
23
  export HF_HUB_DOWNLOAD_TIMEOUT="${HF_HUB_DOWNLOAD_TIMEOUT:-60}"
24
 
25
+ # Work dirs
26
  mkdir -p /app/wheels /app/cuda_cache /app/wheels/src
27
  chmod -R 777 /app/wheels || true
28
  export CUDA_CACHE_PATH="/app/cuda_cache"
 
 
29
  [ -f "/NGC-DL-CONTAINER-LICENSE" ] && cp -f /NGC-DL-CONTAINER-LICENSE /app/wheels/NGC-DL-CONTAINER-LICENSE || true
30
 
31
+ # Build deps
32
  python -m pip install -U pip build setuptools wheel hatchling hatch-vcs scikit-build-core cmake ninja packaging "huggingface_hub[hf_transfer]" || true
33
 
34
+ # Tags do ambiente
35
  PY_TAG="$(python -c 'import sys; print(f"cp{sys.version_info[0]}{sys.version_info[1]}")' 2>/dev/null || echo cp310)"
36
  TORCH_VER="$(python - <<'PY'
37
  try:
38
+ import torch, re
39
+ v = torch.__version__
40
+ print(re.sub(r'\+.*$', '', v))
41
  except Exception:
42
+ print("unknown")
43
  PY
44
  )"
45
  CU_TAG="$(python - <<'PY'
46
  try:
47
+ import torch
48
+ cu = getattr(torch.version, "cuda", None)
49
+ print("cu"+cu.replace(".","")) if cu else print("")
50
  except Exception:
51
+ print("")
52
  PY
53
  )"
54
  echo "[env] PY_TAG=${PY_TAG} TORCH_VER=${TORCH_VER} CU_TAG=${CU_TAG}"
55
 
56
+ # Checkers
57
  check_apex() {
58
  python - <<'PY'
59
  try:
60
+ from apex.normalization import FusedLayerNorm, FusedRMSNorm
61
+ import importlib; importlib.import_module("fused_layer_norm_cuda")
62
+ ok = True
63
  except Exception:
64
+ ok = False
65
  raise SystemExit(0 if ok else 1)
66
  PY
67
  }
 
68
  check_q8() {
69
  python - <<'PY'
70
  import importlib.util
 
72
  raise SystemExit(0 if spec else 1)
73
  PY
74
  }
75
+ check_flash() {
76
+ python - <<'PY'
77
+ ok = False
78
+ try:
79
+ import importlib
80
+ for name in ("flash_attn_2_cuda","flash_attn.ops.layer_norm","flash_attn.layers.layer_norm","flash_attn"):
81
+ try:
82
+ importlib.import_module(name); ok=True; break
83
+ except Exception:
84
+ pass
85
+ except Exception:
86
+ ok = False
87
+ raise SystemExit(0 if ok else 1)
88
+ PY
89
+ }
90
 
91
+ # Baixar wheel do Hub
92
  install_from_hf () {
93
  local PKG="$1"
94
  python - "$PKG" "$PY_TAG" "$CU_TAG" <<'PY' || exit 0
 
98
  repo = os.environ.get("SELF_HF_REPO_ID","carlex3321/aduc-sdr")
99
  api = HfApi(token=os.getenv("HF_TOKEN") or HfFolder.get_token())
100
  try:
101
+ files = api.list_repo_files(repo_id=repo, repo_type="model")
102
  except Exception:
103
+ raise SystemExit(0)
104
  cands = [f for f in files if f.endswith(".whl") and f.rsplit("/",1)[-1].startswith(pkg+"-") and py_tag in f]
105
  pref = [f for f in cands if cu_tag and cu_tag in f] or cands
106
  if not pref: raise SystemExit(0)
 
111
  PY
112
  }
113
 
114
+ # Builders
115
  build_apex () {
116
  local SRC="/app/wheels/src/apex"
117
  if [ -d "$SRC/.git" ]; then
 
126
  python -m pip wheel --no-build-isolation --no-deps "$SRC" -w /app/wheels || true
127
  local W="$(ls -t /app/wheels/apex-*.whl 2>/dev/null | head -n1 || true)"
128
  if [ -n "${W}" ]; then
129
+ python -m pip install -U --no-deps "${W}" || true
130
  else
131
  python -m pip install --no-build-isolation "$SRC" || true
132
  fi
133
  }
 
134
  Q8_REPO="${Q8_REPO:-https://github.com/Lightricks/LTX-Video-Q8-Kernels.git}"
135
  Q8_COMMIT="${Q8_COMMIT:-f3066edea210082799ca5a2bbf9ef0321c5dd8fc}"
136
  build_q8 () {
 
142
  python -m pip wheel --no-build-isolation "$SRC" -w /app/wheels || true
143
  local W="$(ls -t /app/wheels/q8_kernels-*.whl 2>/dev/null | head -n1 || true)"
144
  if [ -n "${W}" ]; then
145
+ python -m pip install -U --no-deps "${W}" || true
146
  else
147
+ python -m pip install --no-build-isolation "$SRC" || true
148
+ fi
149
+ }
150
+ FLASH_ATTENTION_TAG="${FLASH_ATTENTION_TAG:-v2.8.3}"
151
+ build_flash () {
152
+ set -e
153
+ local SRC="/app/wheels/src/flash-attn"
154
+ rm -rf "$SRC"
155
+ git clone --depth 1 --branch "$FLASH_ATTENTION_TAG" https://github.com/Dao-AILab/flash-attention.git "$SRC"
156
+ export TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-8.9}"
157
+ export MAX_JOBS="${MAX_JOBS:-$(nproc)}"
158
+ export CUDA_HOME="${CUDA_HOME:-/usr/local/cuda}"
159
+ python -m pip wheel --no-build-isolation --no-deps "$SRC" -w /app/wheels || true
160
+ local W="$(ls -t /app/wheels/flash_attn-*.whl 2>/dev/null | head -n1 || true)"
161
+ if [ -n "${W}" ]; then
162
+ python -m pip install -U --no-deps "${W}" || true
163
+ else
164
+ python -m pip install --no-build-isolation "$SRC" || true
165
  fi
166
  }
167
 
168
+ # Orquestrador
169
  ensure_pkg () {
170
+ local PKG="$1"; local CHECK_FN="$2"; local BUILD_FN="$3"
 
 
171
  if ${CHECK_FN}; then
172
+ echo "[flow] ${PKG}: já instalado"; return 0
 
173
  fi
174
  echo "[flow] ${PKG}: tentando wheel do Hub (${SELF_HF_REPO_ID})"
175
  HF_OUT="$(install_from_hf "$PKG" || true)"
 
177
  WHEEL_PATH="$(printf "%s\n" "${HF_OUT}" | tail -n1)"
178
  python -m pip install -U --no-build-isolation "${WHEEL_PATH}" || true
179
  if ${CHECK_FN}; then
180
+ echo "[flow] ${PKG}: sucesso via Hub (${WHEEL_PATH})"; return 0
 
181
  fi
182
  fi
183
  echo "[flow] ${PKG}: compilando (fallback)"
 
185
  ${CHECK_FN} || echo "[flow] ${PKG}: falhou após build; seguindo"
186
  }
187
 
188
+ # Execução
189
+ ensure_pkg "apex" check_apex build_apex || true
190
+ ensure_pkg "q8_kernels" check_q8 build_q8 || true
191
+ ensure_pkg "flash_attn" check_flash build_flash || true
192
 
193
+ # Upload das wheels
194
  python - <<'PY'
195
  import os
196
  from huggingface_hub import HfApi, HfFolder
197
  repo=os.environ.get("SELF_HF_REPO_ID","carlex3321/aduc-sdr")
198
  token=os.getenv("HF_TOKEN") or HfFolder.get_token()
199
  if not token:
200
+ raise SystemExit(0)
201
  api=HfApi(token=token)
202
  api.upload_folder(
203
+ folder_path="/app/wheels",
204
+ repo_id=repo,
205
+ repo_type="model",
206
+ allow_patterns=["*.whl","NGC-DL-CONTAINER-LICENSE"],
207
+ ignore_patterns=["**/src/**","**/*.log","**/logs/**",".git/**"],
208
  )
209
  print("Upload de wheels concluído.")
210
  PY
info.sh CHANGED
@@ -1,123 +1,167 @@
1
- #!/usr/bin/env python3
2
- import os, sys, json, subprocess
3
- from pathlib import Path
4
- from typing import List, Optional
5
- from time import time, sleep
6
- from huggingface_hub import hf_hub_download
7
 
8
- class VincieService:
9
- def __init__(
10
- self,
11
- repo_dir: str = "/app/VINCIE",
12
- ckpt_dir: str = "/app/ckpt/VINCIE-3B",
13
- python_bin: str = "python3",
14
- repo_id: str = "ByteDance-Seed/VINCIE-3B",
15
- ):
16
- self.repo_dir = Path(repo_dir)
17
- self.ckpt_dir = Path(ckpt_dir)
18
- self.python = python_bin
19
- self.repo_id = repo_id
20
- self.generate_yaml = self.repo_dir / "configs" / "generate.yaml"
21
- self.output_root = Path("/app/outputs")
22
- self.output_root.mkdir(parents=True, exist_ok=True)
23
- (self.repo_dir / "ckpt").mkdir(parents=True, exist_ok=True)
24
 
25
- def ensure_repo(self, git_url: str = "https://github.com/ByteDance-Seed/VINCIE") -> None:
26
- if not self.repo_dir.exists():
27
- subprocess.run(["git", "clone", "--depth", "1", git_url, str(self.repo_dir)], check=True)
 
 
 
 
28
 
29
- def ensure_model(self, hf_token: Optional[str] = None) -> None:
30
- self.ckpt_dir.mkdir(parents=True, exist_ok=True)
31
- token = hf_token or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN")
32
-
33
- def _need(p: Path) -> bool:
 
 
34
  try:
35
- return not (p.exists() and p.stat().st_size > 1_000_000)
36
- except FileNotFoundError:
37
- return True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
- for fname in ["dit.pth", "vae.pth"]:
40
- dst = self.ckpt_dir / fname
41
- if _need(dst):
42
- print(f"[vince] downloading {fname} from {self.repo_id} ...")
43
- hf_hub_download(
44
- repo_id=self.repo_id,
45
- filename=fname,
46
- local_dir=str(self.ckpt_dir),
47
- token=token,
48
- force_download=False,
49
- local_files_only=False,
50
- )
51
 
52
- link = self.repo_dir / "ckpt" / "VINCIE-3B"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  try:
54
- if link.is_symlink() or link.exists():
55
- try:
56
- link.unlink()
57
- except IsADirectoryError:
58
- pass
59
- if not link.exists():
60
- link.symlink_to(self.ckpt_dir, target_is_directory=True)
61
  except Exception as e:
62
- print("[vince] symlink warning:", e)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
- def ready(self) -> bool:
65
- have_repo = self.repo_dir.exists() and self.generate_yaml.exists()
66
- dit_ok = (self.ckpt_dir / "dit.pth").exists()
67
- vae_ok = (self.ckpt_dir / "vae.pth").exists()
68
- return bool(have_repo and dit_ok and vae_ok)
 
 
 
 
 
 
 
 
69
 
70
- def _wait_until_outputs(self, out_dir: Path, timeout_s: int = 300) -> None:
71
- exts = (".png", ".jpg", ".jpeg", ".gif", ".mp4")
72
- deadline = time() + timeout_s
73
- while time() < deadline:
74
- if any(p.is_file() and p.suffix.lower() in exts for p in out_dir.rglob("*")):
75
- print(f"[vince] outputs detected in {out_dir}")
76
- return
77
- sleep(1)
78
- print(f"[vince] warning: no outputs detected in {out_dir} within {timeout_s}s")
 
 
 
79
 
80
- def _run_vincie(self, overrides: List[str], work_output: Path, wait_outputs: bool = True) -> None:
81
- work_output.mkdir(parents=True, exist_ok=True)
82
- cmd = [
83
- self.python,
84
- "main.py",
85
- str(self.generate_yaml),
86
- *overrides,
87
- f"generation.output.dir={str(work_output)}",
88
- ]
89
- print("[vince] CWD=", self.repo_dir)
90
- print("[vince] CMD=", " ".join(cmd))
91
- subprocess.run(cmd, cwd=self.repo_dir, check=True, env=os.environ.copy())
92
- if wait_outputs:
93
- self._wait_until_outputs(work_output, timeout_s=int(os.getenv("VINCIE_WAIT_OUTPUTS_SEC", "300")))
94
 
95
- def multi_turn_edit(self, input_image: str, turns: List[str], **kwargs) -> Path:
96
- out_dir = self.output_root / f"multi_turn_{Path(input_image).stem}"
97
- overrides = [
98
- f'generation.positive_prompt.image_path="{str(input_image)}"',
99
- f"generation.positive_prompt.prompts={json.dumps(turns)}",
100
- f"generation.seed={int(kwargs.get('seed', 1))}",
101
- f"diffusion.timesteps.sampling.steps={int(kwargs.get('steps', 50))}",
102
- f"diffusion.cfg.scale={float(kwargs.get('cfg_scale', 7.5))}",
103
- f'generation.negative_prompt="{kwargs.get("negative_prompt","")}"',
104
- f"generation.resolution={int(kwargs.get('resolution', 512))}",
105
- f"generation.batch_size={int(kwargs.get('batch_size', 1))}",
106
- ]
107
- self._run_vincie(overrides, out_dir, wait_outputs=True)
108
- return out_dir
109
 
110
- def multi_concept_compose(self, files: List[str], descs: List[str], final_prompt: str, **kwargs) -> Path:
111
- out_dir = self.output_root / f"multi_concept_{len(files)}"
112
- overrides = [
113
- f"generation.concepts.files={json.dumps(files)}",
114
- f"generation.concepts.descs={json.dumps(descs)}",
115
- f'generation.final_prompt="{final_prompt}"',
116
- f"generation.seed={int(kwargs.get('seed', 1))}",
117
- f"diffusion.timesteps.sampling.steps={int(kwargs.get('steps', 50))}",
118
- f"diffusion.cfg.scale={float(kwargs.get('cfg_scale', 7.5))}",
119
- f"generation.resolution={int(kwargs.get('resolution', 512))}",
120
- f"generation.batch_size={int(kwargs.get('batch_size', 1))}",
121
- ]
122
- self._run_vincie(overrides, out_dir, wait_outputs=True)
123
- return out_dir
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
 
 
 
 
3
 
4
+ echo "================= RUNTIME CAPABILITIES ================="
5
+ date
6
+ if command -v nvidia-smi >/dev/null 2>&1; then
7
+ nvidia-smi
8
+ else
9
+ echo "nvidia-smi: not available"
10
+ fi
 
 
 
 
 
 
 
 
 
11
 
12
+ echo
13
+ echo "CUDA_HOME: ${CUDA_HOME:-/usr/local/cuda}"
14
+ if command -v nvcc >/dev/null 2>&1; then
15
+ nvcc --version || true
16
+ else
17
+ echo "nvcc: not available"
18
+ fi
19
 
20
+ echo
21
+ echo "[PyTorch / CUDA backend]"
22
+ python - <<'PY'
23
+ import json, os, torch, inspect
24
+ def to_bool(x):
25
+ try:
26
+ if callable(x):
27
  try:
28
+ sig = inspect.signature(x)
29
+ if len(sig.parameters)==0:
30
+ return bool(x())
31
+ except Exception:
32
+ pass
33
+ return True
34
+ return bool(x)
35
+ except Exception:
36
+ return None
37
+
38
+ info = {
39
+ "torch": getattr(torch, "__version__", None),
40
+ "cuda_available": torch.cuda.is_available(),
41
+ "cuda_device_count": torch.cuda.device_count(),
42
+ "cuda_runtime_version": getattr(torch.version, "cuda", None),
43
+ "cudnn_version": torch.backends.cudnn.version() if torch.backends.cudnn.is_available() else None,
44
+ "tf32": (torch.backends.cuda.matmul.allow_tf32 if torch.cuda.is_available() else None),
45
+ "flash_sdp": (to_bool(getattr(torch.backends.cuda, "enable_flash_sdp", None)) if torch.cuda.is_available() else None),
46
+ "mem_efficient_sdp": (to_bool(getattr(torch.backends.cuda, "enable_mem_efficient_sdp", None)) if torch.cuda.is_available() else None),
47
+ "math_sdp": (to_bool(getattr(torch.backends.cuda, "enable_math_sdp", None)) if torch.cuda.is_available() else None),
48
+ }
49
+ print(json.dumps(info, indent=2))
50
+ for i in range(min(torch.cuda.device_count(), 8)):
51
+ print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
52
+ PY
53
 
54
+ echo
55
+ echo "[Apex]"
56
+ python - <<'PY'
57
+ try:
58
+ from apex.normalization import FusedLayerNorm, FusedRMSNorm
59
+ import importlib; importlib.import_module("fused_layer_norm_cuda")
60
+ print("apex.normalization: OK")
61
+ except Exception as e:
62
+ print("apex.normalization: FAIL ->", e)
63
+ PY
 
 
64
 
65
+ echo
66
+ echo "[FlashAttention]"
67
+ python - <<'PY'
68
+ import importlib
69
+ for m in ("flash_attn","flash_attn_2_cuda"):
70
+ try:
71
+ importlib.import_module(m); print(f"{m}: OK")
72
+ except Exception as e:
73
+ print(f"{m}: FAIL -> {e}")
74
+ PY
75
+
76
+ echo
77
+ echo "[FlashAttention LN test]"
78
+ python - <<'PY'
79
+ import os, warnings, importlib
80
+ warnings.filterwarnings("ignore", category=FutureWarning)
81
+ def ok_import(names):
82
+ for n in names:
83
  try:
84
+ importlib.import_module(n)
85
+ print(f" [+] import '{n}' OK")
86
+ return True
 
 
 
 
87
  except Exception as e:
88
+ print(f" [-] import '{n}' fail: {e}")
89
+ return False
90
+ fa_ver = None
91
+ try:
92
+ import flash_attn
93
+ fa_ver = getattr(flash_attn, "__version__", None)
94
+ except Exception:
95
+ pass
96
+ try:
97
+ import torch
98
+ tv = torch.__version__
99
+ cu = getattr(torch.version, "cuda", None)
100
+ except Exception:
101
+ tv, cu = "unknown", "unknown"
102
+ print(f" flash_attn version: {fa_ver}")
103
+ print(f" torch: {tv} | cuda: {cu} | TORCH_CUDA_ARCH_LIST={os.getenv('TORCH_CUDA_ARCH_LIST')}")
104
+ names_to_try = [
105
+ "flash_attn_2_cuda",
106
+ "flash_attn.ops.layer_norm",
107
+ "flash_attn.layers.layer_norm",
108
+ ]
109
+ ok = ok_import(names_to_try)
110
+ if not ok:
111
+ print(" Hint: faltam kernels LN/RMSNorm do FlashAttention (performance reduzida).")
112
+ print(" Use builder.sh para compilar flash_attn e reutilizar a wheel.")
113
+ PY
114
+
115
+ echo
116
+ echo "[Triton]"
117
+ python - <<'PY'
118
+ try:
119
+ import triton
120
+ print("triton:", triton.__version__)
121
+ try:
122
+ import triton.ops as _; print("triton.ops: OK")
123
+ except Exception:
124
+ print("triton.ops: not present (ok on Triton>=3.x)")
125
+ except Exception as e:
126
+ print("triton: FAIL ->", e)
127
+ PY
128
 
129
+ echo
130
+ echo "[BitsAndBytes (Q8/Q4)]"
131
+ python - <<'PY'
132
+ try:
133
+ import bitsandbytes as bnb
134
+ print("bitsandbytes:", bnb.__version__)
135
+ try:
136
+ from bitsandbytes.triton import _custom_ops as _; print("bnb.triton.int8_matmul_mixed_dequantize: OK")
137
+ except Exception as e:
138
+ print("bnb.triton: partial ->", e)
139
+ except Exception as e:
140
+ print("bitsandbytes: FAIL ->", e)
141
+ PY
142
 
143
+ echo
144
+ echo "[Transformers / Diffusers / XFormers]"
145
+ python - <<'PY'
146
+ def _v(m):
147
+ try:
148
+ mod = __import__(m)
149
+ print(f"{m}:", getattr(mod, "__version__", "unknown"))
150
+ except Exception as e:
151
+ print(f"{m}: FAIL -> {e}")
152
+ for m in ("transformers","diffusers","xformers"):
153
+ _v(m)
154
+ PY
155
 
156
+ echo
157
+ echo "[Distribuído / NCCL Env]"
158
+ env | grep -E '^(CUDA_VISIBLE_DEVICES|NCCL_|TORCH_|ENABLE_.*SDP|HF_HUB_.*|CUDA_|NV_.*NCCL.*|PYTORCH_CUDA_ALLOC_CONF)=' | sort
 
 
 
 
 
 
 
 
 
 
 
159
 
160
+ echo
161
+ echo "[Caminhos e permissões de saída]"
162
+ OUT="/app/outputs"
163
+ echo "OUT dir: $OUT"
164
+ mkdir -p "$OUT"
165
+ ls -la "$OUT" || true
 
 
 
 
 
 
 
 
166
 
167
+ echo "================= END CAPABILITIES ================="