Aduc / compile_flash.sh
caarleexx's picture
Update compile_flash.sh
d000a91 verified
raw
history blame
2.67 kB
#!/bin/bash
set -e
echo "🔧 Compilador FlashAttention — modo GPU (configurado para infra grande)"
WHEEL_DIR="/data/wheel"
TMP_DIR="/data/tmp"
mkdir -p "$WHEEL_DIR" "$TMP_DIR"
export TMPDIR="$TMP_DIR"
FLASH_WHL=$(find "$WHEEL_DIR" -name "flash_attn-*.whl" | head -n 1)
if [[ -f "$FLASH_WHL" ]]; then
echo "📦 Wheel já existe: $FLASH_WHL"
pip install --no-cache-dir "$FLASH_WHL"
exit 0
fi
echo "⚙️ Nenhum wheel encontrado — iniciando compilação controlada com CUDA..."
# ----- Ajustes seguros para ambiente grande (8x L40S) -----
#export MAX_JOBS=${MAX_JOBS:-128} # jobs de compilação (ninja/cmake)
#export OMP_NUM_THREADS=${OMP_NUM_THREADS:-8}
#export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-0} # usar apenas 1 GPU para build
#export TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST:-"8.9"}
#export PYTORCH_CUDA_ALLOC_CONF=${PYTORCH_CUDA_ALLOC_CONF:-"max_split_size_mb:512"}
export USE_FLASH_ATTENTION_WITH_CUDA=1
export FORCE_CUDA=1
echo " > MAX_JOBS=$MAX_JOBS, OMP_NUM_THREADS=$OMP_NUM_THREADS, CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
echo " > TMPDIR=$TMPDIR, WHEEL_DIR=$WHEEL_DIR"
# Tenta build com CUDA
set -o pipefail
if pip wheel flash-attn==2.8.0.post2 --no-build-isolation -w "$WHEEL_DIR" 2>&1 | tee /tmp/flash_build.log; then
echo "✅ Build com CUDA concluído."
else
echo "⚠️ Build CUDA falhou; tentando fallback CPU-only (mais lento). Verifique /tmp/flash_build.log"
export USE_FLASH_ATTENTION_WITH_CUDA=0
export FORCE_CUDA=0
pip wheel flash-attn==2.8.0.post2 --no-build-isolation -w "$WHEEL_DIR"
fi
FLASH_WHL=$(find "$WHEEL_DIR" -name "flash_attn-*.whl" | head -n 1)
if [[ -f "$FLASH_WHL" ]]; then
echo "✅ Wheel gerado: $FLASH_WHL"
pip install --no-cache-dir "$FLASH_WHL"
echo "📁 Wheel salvo em $WHEEL_DIR"
else
echo "❌ Erro: não foi possível gerar o wheel. Ver logs em /tmp/flash_build.log"
exit 1
fi
# ----- Upload opcional para Hugging Face -----
if [ "${HF_UPLOAD_WHEELS:-0}" = "1" ]; then
echo "⬆️ Upload habilitado. Verificando HF_TOKEN..."
python3 - <<'PY'
import os
from huggingface_hub import HfApi, HfFolder
repo = os.environ.get("SELF_HF_REPO_ID","caarleexx/Flash")
token = os.getenv("HF_TOKEN") or HfFolder.get_token()
if not token:
raise SystemExit("HF_TOKEN ausente; upload desabilitado")
api = HfApi(token=token)
api.upload_folder(folder_path="/data/wheel", repo_id=repo, repo_type="model",
allow_patterns=["*.whl","NGC-DL-CONTAINER-LICENSE"],
ignore_patterns=["**/src/**","**/*.log","**/logs/**",".git/**"])
print("Upload concluído.")
PY
else
echo "ℹ️ Upload HF desabilitado (HF_UPLOAD_WHEELS!=1)"
fi