caarleexx commited on
Commit
07cb7c3
·
verified ·
1 Parent(s): 3d62e09

Update compile_flash.sh

Browse files
Files changed (1) hide show
  1. compile_flash.sh +52 -32
compile_flash.sh CHANGED
@@ -1,50 +1,70 @@
1
  #!/bin/bash
2
  set -e
3
-
4
- echo "🔧 [Aduc-SDR] Compilador Persistente de FlashAttention (GPU Mode)"
5
- echo "----------------------------------------------------------------"
6
 
7
  WHEEL_DIR="/data/wheel"
8
- mkdir -p "$WHEEL_DIR"
 
 
9
 
10
  FLASH_WHL=$(find "$WHEEL_DIR" -name "flash_attn-*.whl" | head -n 1)
11
-
12
  if [[ -f "$FLASH_WHL" ]]; then
13
- echo "📦 Wheel já existente: $FLASH_WHL"
14
- echo "✅ Reutilizando compilação anterior."
15
- pip install --no-cache-dir "$FLASH_WHL"
16
- exit 0
17
  fi
18
 
19
- echo "⚙️ Nenhum wheel encontrado — iniciando compilação com CUDA (modo seguro)..."
20
-
21
- # --- Controle de paralelismo e memória ---
22
- export MAX_JOBS=4 # usa metade das CPUs
23
- export OMP_NUM_THREADS=4
24
- export CUDA_VISIBLE_DEVICES="0" # usa apenas 1 GPU
25
- export TORCH_CUDA_ARCH_LIST="8.9"
26
- export PYTORCH_CUDA_ALLOC_CONF="max_split_size_mb:256"
27
 
28
- # --- Flags de build CUDA ---
 
 
 
 
 
29
  export USE_FLASH_ATTENTION_WITH_CUDA=1
30
  export FORCE_CUDA=1
31
 
32
- # --- Compilação controlada ---
33
- echo "🚧 Compilando flash-attn==2.8.3 com suporte CUDA..."
34
- pip wheel flash-attn==2.8.3 --no-build-isolation -w "$WHEEL_DIR" || {
35
- echo "⚠️ Falha na compilação CUDA. Tentando fallback CPU-only..."
36
- export USE_FLASH_ATTENTION_WITH_CUDA=0
37
- export FORCE_CUDA=0
38
- pip wheel flash-attn==2.8.3 --no-build-isolation -w "$WHEEL_DIR"
39
- }
 
 
 
 
 
40
 
41
- # --- Instala o wheel gerado ---
42
  FLASH_WHL=$(find "$WHEEL_DIR" -name "flash_attn-*.whl" | head -n 1)
43
  if [[ -f "$FLASH_WHL" ]]; then
44
- echo "✅ Wheel gerado com sucesso: $FLASH_WHL"
45
- pip install --no-cache-dir "$FLASH_WHL"
46
- echo "📁 Salvo em: $WHEEL_DIR"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  else
48
- echo " Falha ao gerar wheel FlashAttention."
49
- exit 1
50
  fi
 
1
  #!/bin/bash
2
  set -e
3
+ echo "🔧 Compilador FlashAttention — modo GPU (configurado para infra grande)"
 
 
4
 
5
  WHEEL_DIR="/data/wheel"
6
+ TMP_DIR="/data/tmp"
7
+ mkdir -p "$WHEEL_DIR" "$TMP_DIR"
8
+ export TMPDIR="$TMP_DIR"
9
 
10
  FLASH_WHL=$(find "$WHEEL_DIR" -name "flash_attn-*.whl" | head -n 1)
 
11
  if [[ -f "$FLASH_WHL" ]]; then
12
+ echo "📦 Wheel já existe: $FLASH_WHL"
13
+ pip install --no-cache-dir "$FLASH_WHL"
14
+ exit 0
 
15
  fi
16
 
17
+ echo "⚙️ Nenhum wheel encontrado — iniciando compilação controlada com CUDA..."
 
 
 
 
 
 
 
18
 
19
+ # ----- Ajustes seguros para ambiente grande (8x L40S) -----
20
+ #export MAX_JOBS=${MAX_JOBS:-128} # jobs de compilação (ninja/cmake)
21
+ #export OMP_NUM_THREADS=${OMP_NUM_THREADS:-8}
22
+ #export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-0} # usar apenas 1 GPU para build
23
+ #export TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST:-"8.9"}
24
+ #export PYTORCH_CUDA_ALLOC_CONF=${PYTORCH_CUDA_ALLOC_CONF:-"max_split_size_mb:512"}
25
  export USE_FLASH_ATTENTION_WITH_CUDA=1
26
  export FORCE_CUDA=1
27
 
28
+ echo " > MAX_JOBS=$MAX_JOBS, OMP_NUM_THREADS=$OMP_NUM_THREADS, CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
29
+ echo " > TMPDIR=$TMPDIR, WHEEL_DIR=$WHEEL_DIR"
30
+
31
+ # Tenta build com CUDA
32
+ set -o pipefail
33
+ if pip wheel flash-attn==2.8.0 --no-build-isolation -w "$WHEEL_DIR" 2>&1 | tee /tmp/flash_build.log; then
34
+ echo "✅ Build com CUDA concluído."
35
+ else
36
+ echo "⚠️ Build CUDA falhou; tentando fallback CPU-only (mais lento). Verifique /tmp/flash_build.log"
37
+ export USE_FLASH_ATTENTION_WITH_CUDA=0
38
+ export FORCE_CUDA=0
39
+ pip wheel flash-attn==2.8.0 --no-build-isolation -w "$WHEEL_DIR"
40
+ fi
41
 
 
42
  FLASH_WHL=$(find "$WHEEL_DIR" -name "flash_attn-*.whl" | head -n 1)
43
  if [[ -f "$FLASH_WHL" ]]; then
44
+ echo "✅ Wheel gerado: $FLASH_WHL"
45
+ pip install --no-cache-dir "$FLASH_WHL"
46
+ echo "📁 Wheel salvo em $WHEEL_DIR"
47
+ else
48
+ echo "❌ Erro: não foi possível gerar o wheel. Ver logs em /tmp/flash_build.log"
49
+ exit 1
50
+ fi
51
+
52
+ # ----- Upload opcional para Hugging Face -----
53
+ if [ "${HF_UPLOAD_WHEELS:-0}" = "1" ]; then
54
+ echo "⬆️ Upload habilitado. Verificando HF_TOKEN..."
55
+ python3 - <<'PY'
56
+ import os
57
+ from huggingface_hub import HfApi, HfFolder
58
+ repo = os.environ.get("SELF_HF_REPO_ID","caarleexx/Flash")
59
+ token = os.getenv("HF_TOKEN") or HfFolder.get_token()
60
+ if not token:
61
+ raise SystemExit("HF_TOKEN ausente; upload desabilitado")
62
+ api = HfApi(token=token)
63
+ api.upload_folder(folder_path="/data/wheel", repo_id=repo, repo_type="model",
64
+ allow_patterns=["*.whl","NGC-DL-CONTAINER-LICENSE"],
65
+ ignore_patterns=["**/src/**","**/*.log","**/logs/**",".git/**"])
66
+ print("Upload concluído.")
67
+ PY
68
  else
69
+ echo "ℹ️ Upload HF desabilitado (HF_UPLOAD_WHEELS!=1)"
 
70
  fi