Update compile_flash.sh
Browse files- compile_flash.sh +52 -32
compile_flash.sh
CHANGED
|
@@ -1,50 +1,70 @@
|
|
| 1 |
#!/bin/bash
|
| 2 |
set -e
|
| 3 |
-
|
| 4 |
-
echo "🔧 [Aduc-SDR] Compilador Persistente de FlashAttention (GPU Mode)"
|
| 5 |
-
echo "----------------------------------------------------------------"
|
| 6 |
|
| 7 |
WHEEL_DIR="/data/wheel"
|
| 8 |
-
|
|
|
|
|
|
|
| 9 |
|
| 10 |
FLASH_WHL=$(find "$WHEEL_DIR" -name "flash_attn-*.whl" | head -n 1)
|
| 11 |
-
|
| 12 |
if [[ -f "$FLASH_WHL" ]]; then
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
exit 0
|
| 17 |
fi
|
| 18 |
|
| 19 |
-
echo "⚙️ Nenhum wheel encontrado — iniciando compilação com CUDA
|
| 20 |
-
|
| 21 |
-
# --- Controle de paralelismo e memória ---
|
| 22 |
-
export MAX_JOBS=4 # usa metade das CPUs
|
| 23 |
-
export OMP_NUM_THREADS=4
|
| 24 |
-
export CUDA_VISIBLE_DEVICES="0" # usa apenas 1 GPU
|
| 25 |
-
export TORCH_CUDA_ARCH_LIST="8.9"
|
| 26 |
-
export PYTORCH_CUDA_ALLOC_CONF="max_split_size_mb:256"
|
| 27 |
|
| 28 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
export USE_FLASH_ATTENTION_WITH_CUDA=1
|
| 30 |
export FORCE_CUDA=1
|
| 31 |
|
| 32 |
-
|
| 33 |
-
echo "
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
-
# --- Instala o wheel gerado ---
|
| 42 |
FLASH_WHL=$(find "$WHEEL_DIR" -name "flash_attn-*.whl" | head -n 1)
|
| 43 |
if [[ -f "$FLASH_WHL" ]]; then
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
else
|
| 48 |
-
|
| 49 |
-
exit 1
|
| 50 |
fi
|
|
|
|
| 1 |
#!/bin/bash
|
| 2 |
set -e
|
| 3 |
+
echo "🔧 Compilador FlashAttention — modo GPU (configurado para infra grande)"
|
|
|
|
|
|
|
| 4 |
|
| 5 |
WHEEL_DIR="/data/wheel"
|
| 6 |
+
TMP_DIR="/data/tmp"
|
| 7 |
+
mkdir -p "$WHEEL_DIR" "$TMP_DIR"
|
| 8 |
+
export TMPDIR="$TMP_DIR"
|
| 9 |
|
| 10 |
FLASH_WHL=$(find "$WHEEL_DIR" -name "flash_attn-*.whl" | head -n 1)
|
|
|
|
| 11 |
if [[ -f "$FLASH_WHL" ]]; then
|
| 12 |
+
echo "📦 Wheel já existe: $FLASH_WHL"
|
| 13 |
+
pip install --no-cache-dir "$FLASH_WHL"
|
| 14 |
+
exit 0
|
|
|
|
| 15 |
fi
|
| 16 |
|
| 17 |
+
echo "⚙️ Nenhum wheel encontrado — iniciando compilação controlada com CUDA..."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
+
# ----- Ajustes seguros para ambiente grande (8x L40S) -----
|
| 20 |
+
#export MAX_JOBS=${MAX_JOBS:-128} # jobs de compilação (ninja/cmake)
|
| 21 |
+
#export OMP_NUM_THREADS=${OMP_NUM_THREADS:-8}
|
| 22 |
+
#export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-0} # usar apenas 1 GPU para build
|
| 23 |
+
#export TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST:-"8.9"}
|
| 24 |
+
#export PYTORCH_CUDA_ALLOC_CONF=${PYTORCH_CUDA_ALLOC_CONF:-"max_split_size_mb:512"}
|
| 25 |
export USE_FLASH_ATTENTION_WITH_CUDA=1
|
| 26 |
export FORCE_CUDA=1
|
| 27 |
|
| 28 |
+
echo " > MAX_JOBS=$MAX_JOBS, OMP_NUM_THREADS=$OMP_NUM_THREADS, CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
|
| 29 |
+
echo " > TMPDIR=$TMPDIR, WHEEL_DIR=$WHEEL_DIR"
|
| 30 |
+
|
| 31 |
+
# Tenta build com CUDA
|
| 32 |
+
set -o pipefail
|
| 33 |
+
if pip wheel flash-attn==2.8.0 --no-build-isolation -w "$WHEEL_DIR" 2>&1 | tee /tmp/flash_build.log; then
|
| 34 |
+
echo "✅ Build com CUDA concluído."
|
| 35 |
+
else
|
| 36 |
+
echo "⚠️ Build CUDA falhou; tentando fallback CPU-only (mais lento). Verifique /tmp/flash_build.log"
|
| 37 |
+
export USE_FLASH_ATTENTION_WITH_CUDA=0
|
| 38 |
+
export FORCE_CUDA=0
|
| 39 |
+
pip wheel flash-attn==2.8.0 --no-build-isolation -w "$WHEEL_DIR"
|
| 40 |
+
fi
|
| 41 |
|
|
|
|
| 42 |
FLASH_WHL=$(find "$WHEEL_DIR" -name "flash_attn-*.whl" | head -n 1)
|
| 43 |
if [[ -f "$FLASH_WHL" ]]; then
|
| 44 |
+
echo "✅ Wheel gerado: $FLASH_WHL"
|
| 45 |
+
pip install --no-cache-dir "$FLASH_WHL"
|
| 46 |
+
echo "📁 Wheel salvo em $WHEEL_DIR"
|
| 47 |
+
else
|
| 48 |
+
echo "❌ Erro: não foi possível gerar o wheel. Ver logs em /tmp/flash_build.log"
|
| 49 |
+
exit 1
|
| 50 |
+
fi
|
| 51 |
+
|
| 52 |
+
# ----- Upload opcional para Hugging Face -----
|
| 53 |
+
if [ "${HF_UPLOAD_WHEELS:-0}" = "1" ]; then
|
| 54 |
+
echo "⬆️ Upload habilitado. Verificando HF_TOKEN..."
|
| 55 |
+
python3 - <<'PY'
|
| 56 |
+
import os
|
| 57 |
+
from huggingface_hub import HfApi, HfFolder
|
| 58 |
+
repo = os.environ.get("SELF_HF_REPO_ID","caarleexx/Flash")
|
| 59 |
+
token = os.getenv("HF_TOKEN") or HfFolder.get_token()
|
| 60 |
+
if not token:
|
| 61 |
+
raise SystemExit("HF_TOKEN ausente; upload desabilitado")
|
| 62 |
+
api = HfApi(token=token)
|
| 63 |
+
api.upload_folder(folder_path="/data/wheel", repo_id=repo, repo_type="model",
|
| 64 |
+
allow_patterns=["*.whl","NGC-DL-CONTAINER-LICENSE"],
|
| 65 |
+
ignore_patterns=["**/src/**","**/*.log","**/logs/**",".git/**"])
|
| 66 |
+
print("Upload concluído.")
|
| 67 |
+
PY
|
| 68 |
else
|
| 69 |
+
echo "ℹ️ Upload HF desabilitado (HF_UPLOAD_WHEELS!=1)"
|
|
|
|
| 70 |
fi
|