|
|
#!/bin/bash |
|
|
set -e |
|
|
echo "🔧 Compilador FlashAttention — modo GPU (configurado para infra grande)" |
|
|
|
|
|
WHEEL_DIR="/data/wheel" |
|
|
TMP_DIR="/data/tmp" |
|
|
mkdir -p "$WHEEL_DIR" "$TMP_DIR" |
|
|
export TMPDIR="$TMP_DIR" |
|
|
|
|
|
FLASH_WHL=$(find "$WHEEL_DIR" -name "flash_attn-*.whl" | head -n 1) |
|
|
if [[ -f "$FLASH_WHL" ]]; then |
|
|
echo "📦 Wheel já existe: $FLASH_WHL" |
|
|
pip install --no-cache-dir "$FLASH_WHL" |
|
|
exit 0 |
|
|
fi |
|
|
|
|
|
echo "⚙️ Nenhum wheel encontrado — iniciando compilação controlada com CUDA..." |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export USE_FLASH_ATTENTION_WITH_CUDA=1 |
|
|
export FORCE_CUDA=1 |
|
|
|
|
|
echo " > MAX_JOBS=$MAX_JOBS, OMP_NUM_THREADS=$OMP_NUM_THREADS, CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES" |
|
|
echo " > TMPDIR=$TMPDIR, WHEEL_DIR=$WHEEL_DIR" |
|
|
|
|
|
|
|
|
set -o pipefail |
|
|
if pip wheel flash-attn==2.8.0.post2 --no-build-isolation -w "$WHEEL_DIR" 2>&1 | tee /tmp/flash_build.log; then |
|
|
echo "✅ Build com CUDA concluído." |
|
|
else |
|
|
echo "⚠️ Build CUDA falhou; tentando fallback CPU-only (mais lento). Verifique /tmp/flash_build.log" |
|
|
export USE_FLASH_ATTENTION_WITH_CUDA=0 |
|
|
export FORCE_CUDA=0 |
|
|
pip wheel flash-attn==2.8.0.post2 --no-build-isolation -w "$WHEEL_DIR" |
|
|
fi |
|
|
|
|
|
FLASH_WHL=$(find "$WHEEL_DIR" -name "flash_attn-*.whl" | head -n 1) |
|
|
if [[ -f "$FLASH_WHL" ]]; then |
|
|
echo "✅ Wheel gerado: $FLASH_WHL" |
|
|
pip install --no-cache-dir "$FLASH_WHL" |
|
|
echo "📁 Wheel salvo em $WHEEL_DIR" |
|
|
else |
|
|
echo "❌ Erro: não foi possível gerar o wheel. Ver logs em /tmp/flash_build.log" |
|
|
exit 1 |
|
|
fi |
|
|
|
|
|
|
|
|
if [ "${HF_UPLOAD_WHEELS:-0}" = "1" ]; then |
|
|
echo "⬆️ Upload habilitado. Verificando HF_TOKEN..." |
|
|
python3 - <<'PY' |
|
|
import os |
|
|
from huggingface_hub import HfApi, HfFolder |
|
|
repo = os.environ.get("SELF_HF_REPO_ID","caarleexx/Flash") |
|
|
token = os.getenv("HF_TOKEN") or HfFolder.get_token() |
|
|
if not token: |
|
|
raise SystemExit("HF_TOKEN ausente; upload desabilitado") |
|
|
api = HfApi(token=token) |
|
|
api.upload_folder(folder_path="/data/wheel", repo_id=repo, repo_type="model", |
|
|
allow_patterns=["*.whl","NGC-DL-CONTAINER-LICENSE"], |
|
|
ignore_patterns=["**/src/**","**/*.log","**/logs/**",".git/**"]) |
|
|
print("Upload concluído.") |
|
|
PY |
|
|
else |
|
|
echo "ℹ️ Upload HF desabilitado (HF_UPLOAD_WHEELS!=1)" |
|
|
fi |
|
|
|