File size: 5,267 Bytes
545445d
 
bdec336
 
 
 
 
 
 
 
 
1c31d0f
bdec336
 
 
 
 
 
1c31d0f
545445d
bdec336
545445d
 
bdec336
 
545445d
 
 
 
 
bdec336
 
545445d
 
1c31d0f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
545445d
316b67d
b1804d0
 
 
 
 
 
 
 
 
 
1c31d0f
14c5fa4
 
b1804d0
 
 
 
 
 
1c31d0f
 
 
b1804d0
 
1c31d0f
 
 
 
 
 
545445d
316b67d
545445d
 
a817dc4
 
316b67d
545445d
316b67d
 
545445d
cf55b31
fb56537
f21bddb
 
e445ead
 
6dfd96b
f21bddb
a817dc4
0834a4b
1c31d0f
 
bcf1a11
0834a4b
545445d
1c31d0f
a817dc4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
#!/usr/bin/env bash
set -euo pipefail

echo "======================================================="
echo " ADUC-SDR — Start (VINCIE/SeedVR, 8× L40S)"
echo "======================================================="

# ---------------------- Env base ----------------------
export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0,1,2,3,4,5,6,7}"
export TORCH_DTYPE="${TORCH_DTYPE:-bfloat16}"

# SDPA / FA toggles
export ENABLE_FLASH_SDP="${ENABLE_FLASH_SDP:-1}"
export ENABLE_MEMORY_EFFICIENT_SDP="${ENABLE_MEMORY_EFFICIENT_SDP:-1}"
export ENABLE_MATH_SDP="${ENABLE_MATH_SDP:-0}"
export FLASH_ATTENTION_DISABLE="${FLASH_ATTENTION_DISABLE:-0}"
export XFORMERS_FORCE_DISABLE="${XFORMERS_FORCE_DISABLE:-1}"

# CUDA / NCCL baseline
export CUDA_MODULE_LOADING="LAZY"
export CUDA_DEVICE_MAX_CONNECTIONS="${CUDA_DEVICE_MAX_CONNECTIONS:-32}"
export CUDA_DEVICE_ORDER="PCI_BUS_ID"
export PYTORCH_CUDA_ALLOC_CONF="max_split_size_mb:512,garbage_collection_threshold:0.8"
export OMP_NUM_THREADS="${OMP_NUM_THREADS:-8}"
export MKL_NUM_THREADS="${MKL_NUM_THREADS:-8}"
export NCCL_DEBUG="INFO"
export NCCL_ASYNC_ERROR_HANDLING=1
export NCCL_P2P_DISABLE=0
export NCCL_IB_DISABLE=1
export NCCL_SOCKET_IFNAME="lo"
export NCCL_BLOCKING_WAIT=1
export TORCH_NCCL_BLOCKING_WAIT=1
export NCCL_TIMEOUT=600

# ---------------------- Persistência HF/torch ----------------------
if [ -d /data ]; then
  export HF_HOME="/data/.cache/huggingface"
  export TORCH_HOME="/data/.cache/torch"
else
  export HF_HOME="/app/.cache/huggingface"
  export TORCH_HOME="/app/.cache/torch"
fi
export HF_HUB_CACHE="${HF_HUB_CACHE:-$HF_HOME/hub}"
mkdir -p "$HF_HUB_CACHE" "$TORCH_HOME"

mkdir -p /app/.cache
ln -sf "$HF_HOME" /app/.cache/huggingface
unset TRANSFORMERS_CACHE

export HF_HUB_ENABLE_HF_TRANSFER=1
export HF_HUB_DOWNLOAD_TIMEOUT=60

MODEL_REPO="ByteDance-Seed/VINCIE-3B"
CACHE_MODEL_DIR="$HF_HUB_CACHE/models--ByteDance-Seed--VINCIE-3B"
CKPT_DIR="/app/ckpt/VINCIE-3B"
mkdir -p "$CKPT_DIR"

# ---------------------- Download: cache estruturado ou direto ----------------------
if [ "${DIRECT_TO_CKPT:-0}" -eq 1 ]; then
  echo "[direct] Baixando ${MODEL_REPO} diretamente para $CKPT_DIR"
  python - <<'PY'
import os
from pathlib import Path
from huggingface_hub import snapshot_download
repo_id = "ByteDance-Seed/VINCIE-3B"
ckpt_dir = Path("/app/ckpt/VINCIE-3B")
ckpt_dir.mkdir(parents=True, exist_ok=True)
token = os.getenv("HF_TOKEN") or os.getenv("HUGGING_FACE_TOKEN")
snapshot_download(
  repo_id=repo_id,
  local_dir=str(ckpt_dir),
  local_dir_use_symlinks=False,
  resume_download=True,
  token=token,
)
print("[direct] Snapshot materializado em", ckpt_dir)
PY
else
  echo "Verificando snapshot do ${MODEL_REPO} no cache..."
  python - <<'PY'
import os
from pathlib import Path
from huggingface_hub import snapshot_download
hf_home = os.environ.get("HF_HOME","/app/.cache/huggingface")
cache_dir = os.path.join(hf_home, "hub")
os.makedirs(cache_dir, exist_ok=True)
repo_id = "ByteDance-Seed/VINCIE-3B"
token = os.getenv("HF_TOKEN") or os.getenv("HUGGING_FACE_TOKEN")
snapshot_download(
  repo_id=repo_id,
  cache_dir=cache_dir,
  resume_download=True,
  local_dir_use_symlinks=False,
  token=token,
)
mcache = Path(cache_dir) / ("models--" + repo_id.replace("/", "--"))
#print("[cache] Estrutura em:", mcache)
#print("[cache] refs:", list((mcache/"refs").glob("*")))
#print("[cache] snapshots:", [p.name for p in (mcache/"snapshots").glob("*") if p.is_dir()])
PY
fi


python3 - <<'PY'
from huggingface_hub import snapshot_download
import os
save_dir = '/app/ckpt/VINCIE-3B'
os.makedirs(save_dir, exist_ok=True)
try:
    print('📥 Baixando VINCIE-3B...')
    snapshot_download(
        repo_id='ByteDance-Seed/VINCIE-3B',
        local_dir=save_dir,
        cache_dir=cache_dir,
        #resume_download=True,
        #local_dir_use_symlinks=False
    )
    print('✅ Modelo ok')
except Exception as e:
    print(f'⚠️ Download falhou: {e}')
PY

mkdir -p /app/VINCIE/ckpt
ln -sfn /app/ckpt/VINCIE-3B /app/VINCIE/ckpt/VINCIE-3B



#echo "[diag] Cache model dir: $CACHE_MODEL_DIR"
#ls -la "$CACHE_MODEL_DIR" || true
#echo "[diag] refs:"; ls -la "$CACHE_MODEL_DIR/refs" || true
#echo "[diag] snapshots:"; ls -la "$CACHE_MODEL_DIR/snapshots" || true
#echo "[diag] CKPT_DIR: $CKPT_DIR"; ls -la "$CKPT_DIR" || true

# ---------------------- Builder Apex/Q8 ----------------------
if nvidia-smi >/dev/null 2>&1; then
  if [ "${DISABLE_BUILDER:-0}" -eq 0 ]; then
    echo "Executando builder Apex/Q8..."
    chmod +x /app/builder.sh || true
    timeout "${BUILDER_TIMEOUT_SEC:-7200}" bash -lc /app/builder.sh || echo "Builder excedeu tempo/erro, prosseguindo."
  else
    echo "Builder desabilitado por DISABLE_BUILDER=1"
  fi
else
  echo "GPU não visível, pulando builder Apex/Q8."
fi

pip uninstall -y triton || true && \
    pip install -v --no-build-isolation triton==3.1.0

pip uninstall -y bitsandbytes || true && \
    pip install bitsandbytes==0.43.1

# ---------------------- Diagnóstico ----------------------
/app/info.sh || true
#ls -la /app || true
#ls -R /app | head -n 2000 || true

# ---------------------- Subindo serviço ----------------------
echo "🚀 Subindo serviços..."
# Dica: pode-se exportar VINCIE_DIRECT_TO_CKPT=1 para fallback interno
python /app/app_vince.py