File size: 5,220 Bytes
fba9280
316b67d
 
fba9280
b6a6cb6
fba9280
 
316b67d
 
d11dad1
fba9280
d11dad1
fba9280
 
1c31d0f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0834a4b
1c31d0f
 
 
b72eb83
1c31d0f
b72eb83
 
1c31d0f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7f0ae5a
d11dad1
 
1c31d0f
 
 
 
 
 
23b2602
b72eb83
1c31d0f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b72eb83
 
 
 
 
 
 
 
 
 
1c31d0f
 
 
 
 
 
b72eb83
3b0ba19
143c2fb
1c31d0f
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# =============================================================================
# ADUC-SDR Video Suite — High-Perf Diffusers for 8× L40S (SM 8.9)
# CUDA 12.8 | PyTorch 2.8.0+cu128 | Ubuntu 22.04
# =============================================================================
FROM nvidia/cuda:12.6.3-devel-ubuntu22.04

LABEL maintainer="Carlos Rodrigues dos Santos & Development Partner"
LABEL description="High-performance Diffusers stack with FA2/SDPA, 8×L40S"
LABEL version="4.4.0"
LABEL cuda_version="12.6.3"
LABEL python_version="3.10"
LABEL pytorch_version="2.6.3+cu126"
LABEL gpu_optimized_for="8x_NVIDIA_L40S"

# ---------------- Core env & caches ----------------
ENV DEBIAN_FRONTEND=noninteractive TZ=UTC LANG=C.UTF-8 LC_ALL=C.UTF-8 \
    PYTHONUNBUFFERED=1 PYTHONDONTWRITEBYTECODE=1 \
    PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1

# GPU/Compute
ENV NVIDIA_VISIBLE_DEVICES=all
ENV CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
ENV TORCH_CUDA_ARCH_LIST="8.9"
ENV CUDA_DEVICE_ORDER=PCI_BUS_ID
ENV CUDA_DEVICE_MAX_CONNECTIONS=32
ENV CUDA_MODULE_LOADING=LAZY

# Threads
ENV OMP_NUM_THREADS=8 MKL_NUM_THREADS=8 MAX_JOBS=48

# Alloc/caches
ENV PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512,garbage_collection_threshold:0.8
ENV CUDA_LAUNCH_BLOCKING=0 CUDA_CACHE_MAXSIZE=2147483648 CUDA_CACHE_DISABLE=0

# NCCL single-node sane defaults (use IB=1/IFNAME em clusters com IB)
ENV NCCL_DEBUG=INFO NCCL_ASYNC_ERROR_HANDLING=1 NCCL_P2P_DISABLE=0 NCCL_IB_DISABLE=1 \
    NCCL_MIN_NCHANNELS=8 NCCL_NTHREADS=256 NCCL_SOCKET_IFNAME=lo

# Hugging Face caches
ENV APP_HOME=/app
WORKDIR $APP_HOME
ENV TORCH_HOME=/app/.cache/torch
ENV HF_HOME=/app/.cache/huggingface
ENV HF_DATASETS_CACHE=/app/.cache/datasets
ENV TRANSFORMERS_CACHE=/app/.cache/transformers
ENV DIFFUSERS_CACHE=/app/.cache/diffusers
ENV HF_HUB_ENABLE_HF_TRANSFER=1
ENV TOKENIZERS_PARALLELISM=false

# FlashAttention / SDPA defaults
ENV FLASH_ATTENTION_DISABLE=0 \
    FLASH_ATTENTION_FORCE_FP16=1 \
    ATTN_FORCE_F16=1 \
    ENABLE_FLASH_SDP=1 \
    ENABLE_MEMORY_EFFICIENT_SDP=1 \
    ENABLE_MATH_SDP=0 \
    XFORMERS_FORCE_DISABLE=1 \
    TORCH_DTYPE=bfloat16 \
    NVIDIA_TF32_OVERRIDE=0 \
    FA_LOG_LEVEL=WARNING

# Link de modelos


ENV MODELS_DIR=/app/models
RUN mkdir -p /home/user/.cache/models && ln -sf /home/user/.cache/models /app/models

# ---------------- Sistema & Python ----------------
RUN apt-get update && apt-get install -y --no-install-recommends \
    build-essential cmake git git-lfs curl wget ffmpeg ninja-build \
    python3.10 python3.10-dev python3.10-distutils python3-pip \
    && apt-get clean && rm -rf /var/lib/apt/lists/*

RUN ln -sf /usr/bin/python3.10 /usr/bin/python3 && \
    ln -sf /usr/bin/python3.10 /usr/bin/python && \
    python3 -m pip install --upgrade pip


# ---------------- PyTorch cu127 (pinado) ----------------
RUN pip install --index-url https://download.pytorch.org/whl/cu126 \
    torch>=2.6.0+cu126 torchvision>=0.20.0+cu126 torchaudio>=2.5.0+cu126

# ---------------- Toolchain, Triton, FA2 (sem bnb) ----------------
RUN pip install packaging ninja cmake pybind11 scikit-build cython hf_transfer numpy==1.24.4

# Triton 3.x (sem triton.ops)
RUN pip uninstall -y triton || true && \
    pip install -v --no-build-isolation triton==3.1.0

# FlashAttention 2.8.x
#RUN pip install flash-attn==2.8.3 --no-build-isolation || \
#    pip install flash-attn==2.8.2 --no-build-isolation || \
#    pip install flash-attn==2.8.1 --no-build-isolation || \
#    pip install flash-attn==2.8.0.post2 --no-build-isolation

# Diffusers/Transformers estáveis (sem dev)
RUN pip install --no-cache-dir diffusers==0.31.0 transformers==4.44.2 accelerate==0.34.2 omegaconf==2.3.0

# Opcional: seu fork de otimizações
# RUN pip install -U git+https://github.com/carlex22/diffusers-aduc-sdr

# ---------------- Repositórios auxiliares ----------------
RUN git clone https://github.com/bytedance-seed/VINCIE.git  && \
    cp -r VINCIE/configs/. /app/configs/

# Exemplos adicionais (descomente se precisar)
RUN git clone https://github.com/bytedance-seed/SeedVR.git && \
    cp -r SeedVR/configs_3b /app/configs_3b

RUN git clone https://github.com/hkchengrex/MMAudio.git && \
    cp -r MMAudio/mmaudio /app/mmaudio

RUN git clone https://github.com/Lightricks/LTX-Video.git && \
    cp -r LTX-Video/ltx_video /app/ltx_video

# ---------------- Dependências da aplicação ----------------
COPY requirements.txt ./requirements.txt
RUN pip install --no-cache-dir -r requirements.txt


RUN pip install --upgrade bitsandbytes

# Scripts e configs
COPY info.sh ./info.sh
COPY start.sh ./start.sh
COPY /configs ./configs

# ---------------- Código e permissões ----------------
COPY . .
RUN useradd -m -u 1000 -s /bin/bash appuser && \
    chown -R appuser:appuser /app && \
    chmod 0755 /app/start.sh /app/info.sh || true

USER appuser

# Declara volume persistente para HF Spaces
VOLUME /data

# Env vars para caches em /data
ENV HF_HOME=/data/.cache/huggingface
ENV TORCH_HOME=/data/.cache/torch
ENV HF_DATASETS_CACHE=/data/.cache/datasets
ENV TRANSFORMERS_CACHE=/data/.cache/transformers
ENV DIFFUSERS_CACHE=/data/.cache/diffusers



# ---------------- Entry ----------------
ENTRYPOINT ["./start.sh"]
CMD ["gradio"]