Kernels
paged-attention / build.toml
danieldk's picture
danieldk HF Staff
Enable ROCm build
6677800
[general]
name = "paged_attention"
universal = false
[torch]
src = [
"torch-ext/torch_binding.cpp",
"torch-ext/torch_binding.h"
]
[kernel.cuda_utils]
backend = "cuda"
src = [
"cuda-utils/cuda_utils.h",
"cuda-utils/cuda_utils_kernels.cu",
]
depends = []
[kernel.cuda_utils_rocm]
backend = "rocm"
rocm-archs = [
"gfx906",
"gfx908",
"gfx90a",
"gfx940",
"gfx941",
"gfx942",
"gfx1030",
"gfx1100",
"gfx1101",
]
src = [
"cuda-utils/cuda_utils.h",
"cuda-utils/cuda_utils_kernels.cu",
]
depends = ["torch"]
[kernel.paged_attention]
backend = "cuda"
src = [
"cuda-utils/cuda_utils.h",
"paged-attention/attention/attention_dtypes.h",
"paged-attention/attention/attention_generic.cuh",
"paged-attention/attention/attention_kernels.cuh",
"paged-attention/attention/attention_utils.cuh",
"paged-attention/attention/dtype_bfloat16.cuh",
"paged-attention/attention/dtype_float16.cuh",
"paged-attention/attention/dtype_float32.cuh",
"paged-attention/attention/dtype_fp8.cuh",
"paged-attention/attention/paged_attention_v1.cu",
"paged-attention/attention/paged_attention_v2.cu",
"paged-attention/cache_kernels.cu",
"paged-attention/cuda_compat.h",
"paged-attention/dispatch_utils.h",
"paged-attention/quantization/fp8/amd/quant_utils.cuh",
"paged-attention/quantization/fp8/nvidia/quant_utils.cuh",
]
include = [ "cuda-utils", "paged-attention" ]
depends = [ "torch" ]
[kernel.paged_attention_rocm]
backend = "rocm"
rocm-archs = [
"gfx906",
"gfx908",
"gfx90a",
"gfx940",
"gfx941",
"gfx942",
"gfx1030",
"gfx1100",
"gfx1101",
]
src = [
"cuda-utils/cuda_utils.h",
"paged-attention/attention/attention_dtypes.h",
"paged-attention/attention/attention_generic.cuh",
"paged-attention/attention/attention_kernels.cuh",
"paged-attention/attention/attention_utils.cuh",
"paged-attention/attention/dtype_bfloat16.cuh",
"paged-attention/attention/dtype_float16.cuh",
"paged-attention/attention/dtype_float32.cuh",
"paged-attention/attention/dtype_fp8.cuh",
"paged-attention/attention/paged_attention_v1.cu",
"paged-attention/attention/paged_attention_v2.cu",
"paged-attention/cache_kernels.cu",
"paged-attention/cuda_compat.h",
"paged-attention/dispatch_utils.h",
"paged-attention/quantization/fp8/amd/quant_utils.cuh",
"paged-attention/quantization/fp8/nvidia/quant_utils.cuh",
]
include = [ "cuda-utils", "paged-attention" ]
depends = [ "torch" ]
[kernel.paged_attention_metal]
backend = "metal"
src = [
"paged-attention-metal/attention/paged_attention.metal",
"paged-attention-metal/cache/copy_blocks.metal",
"paged-attention-metal/cache/reshape_and_cache.metal",
"paged-attention-metal/convert_fp8.metal",
"paged-attention-metal/float8.metal",
"paged-attention-metal/utils.metal",
"paged-attention-metal/paged_attention.mm",
"paged-attention-metal/cache.mm",
"paged-attention-metal/convert_fp8.mm",
"paged-attention-metal/device.mm",
]
include = [ "." ]
depends = [ "torch" ]