| [general] | |
| name = "paged_attention" | |
| [torch] | |
| src = [ | |
| "torch-ext/torch_binding.cpp", | |
| "torch-ext/torch_binding.h" | |
| ] | |
| [kernel.cuda_utils] | |
| src = [ | |
| "cuda-utils/cuda_utils_kernels.cu", | |
| ] | |
| depends = [] | |
| [kernel.paged_attention] | |
| src = [ | |
| "paged-attention/attention/attention_dtypes.h", | |
| "paged-attention/attention/attention_generic.cuh", | |
| "paged-attention/attention/attention_kernels.cuh", | |
| "paged-attention/attention/attention_utils.cuh", | |
| "paged-attention/attention/dtype_bfloat16.cuh", | |
| "paged-attention/attention/dtype_float16.cuh", | |
| "paged-attention/attention/dtype_float32.cuh", | |
| "paged-attention/attention/dtype_fp8.cuh", | |
| "paged-attention/attention/paged_attention_v1.cu", | |
| "paged-attention/attention/paged_attention_v2.cu", | |
| "paged-attention/cache_kernels.cu", | |
| "paged-attention/cuda_compat.h", | |
| "paged-attention/dispatch_utils.h", | |
| "paged-attention/quantization/fp8/amd/hip_float8.h", | |
| "paged-attention/quantization/fp8/amd/hip_float8_impl.h", | |
| "paged-attention/quantization/fp8/amd/quant_utils.cuh", | |
| "paged-attention/quantization/fp8/nvidia/quant_utils.cuh", | |
| ] | |
| include = [ "." ] | |
| depends = [ "torch" ] | |