Update compile_flash.sh
Browse files- compile_flash.sh +2 -2
compile_flash.sh
CHANGED
|
@@ -30,13 +30,13 @@ echo " > TMPDIR=$TMPDIR, WHEEL_DIR=$WHEEL_DIR"
|
|
| 30 |
|
| 31 |
# Tenta build com CUDA
|
| 32 |
set -o pipefail
|
| 33 |
-
if pip wheel flash-attn==2.8.0 --no-build-isolation -w "$WHEEL_DIR" 2>&1 | tee /tmp/flash_build.log; then
|
| 34 |
echo "✅ Build com CUDA concluído."
|
| 35 |
else
|
| 36 |
echo "⚠️ Build CUDA falhou; tentando fallback CPU-only (mais lento). Verifique /tmp/flash_build.log"
|
| 37 |
export USE_FLASH_ATTENTION_WITH_CUDA=0
|
| 38 |
export FORCE_CUDA=0
|
| 39 |
-
pip wheel flash-attn==2.8.0 --no-build-isolation -w "$WHEEL_DIR"
|
| 40 |
fi
|
| 41 |
|
| 42 |
FLASH_WHL=$(find "$WHEEL_DIR" -name "flash_attn-*.whl" | head -n 1)
|
|
|
|
| 30 |
|
| 31 |
# Tenta build com CUDA
|
| 32 |
set -o pipefail
|
| 33 |
+
if pip wheel flash-attn==2.8.0.post2 --no-build-isolation -w "$WHEEL_DIR" 2>&1 | tee /tmp/flash_build.log; then
|
| 34 |
echo "✅ Build com CUDA concluído."
|
| 35 |
else
|
| 36 |
echo "⚠️ Build CUDA falhou; tentando fallback CPU-only (mais lento). Verifique /tmp/flash_build.log"
|
| 37 |
export USE_FLASH_ATTENTION_WITH_CUDA=0
|
| 38 |
export FORCE_CUDA=0
|
| 39 |
+
pip wheel flash-attn==2.8.0.post2 --no-build-isolation -w "$WHEEL_DIR"
|
| 40 |
fi
|
| 41 |
|
| 42 |
FLASH_WHL=$(find "$WHEEL_DIR" -name "flash_attn-*.whl" | head -n 1)
|