Update flake.nix

Browse files

Files changed (8) hide show

build.toml +1 -1
build/torch27-metal-arm64-darwin/kernels_paged_attention_metal/__init__.py +0 -21
build/torch27-metal-arm64-darwin/kernels_paged_attention_metal/_custom_ops.py +0 -173
build/torch27-metal-arm64-darwin/kernels_paged_attention_metal/_ops.py +0 -9
build/torch27-metal-arm64-darwin/kernels_paged_attention_metal/_paged_attention_2ee8d65.abi3.so +0 -3
build/torch27-metal-arm64-darwin/kernels_paged_attention_metal/_paged_attention_2ee8d65.metallib +0 -3
build/torch27-metal-arm64-darwin/kernels_paged_attention_metal/platforms.py +0 -92
flake.lock +0 -168

build.toml CHANGED Viewed

@@ -1,5 +1,5 @@
 [general]
-name = "paged_attention"
 universal = false
 [torch]

 [general]
+name = "kernels_paged_attention_metal"
 universal = false
 [torch]

build/torch27-metal-arm64-darwin/kernels_paged_attention_metal/__init__.py DELETED Viewed

@@ -1,21 +0,0 @@
-from ._custom_ops import (
-    convert_fp8,
-    copy_blocks,
-    paged_attention_v1,
-    paged_attention_v2,
-    reshape_and_cache,
-    reshape_and_cache_flash,
-    swap_blocks,
-)
-from ._ops import ops
-__all__ = [
-    "convert_fp8",
-    "copy_blocks",
-    "ops",
-    "paged_attention_v1",
-    "paged_attention_v2",
-    "reshape_and_cache",
-    "reshape_and_cache_flash",
-    "swap_blocks",
-]

build/torch27-metal-arm64-darwin/kernels_paged_attention_metal/_custom_ops.py DELETED Viewed

@@ -1,173 +0,0 @@
-from typing import List, Optional
-import torch
-from ._ops import ops
-# page attention ops
-def paged_attention_v1(
-    out: torch.Tensor,
-    query: torch.Tensor,
-    key_cache: torch.Tensor,
-    value_cache: torch.Tensor,
-    num_kv_heads: int,
-    scale: float,
-    block_tables: torch.Tensor,
-    seq_lens: torch.Tensor,
-    block_size: int,
-    max_seq_len: int,
-    alibi_slopes: Optional[torch.Tensor],
-    kv_cache_dtype: str,
-    k_scale: float,
-    v_scale: float,
-    tp_rank: int = 0,
-    blocksparse_local_blocks: int = 0,
-    blocksparse_vert_stride: int = 0,
-    blocksparse_block_size: int = 64,
-    blocksparse_head_sliding_step: int = 0,
-) -> None:
-    ops.paged_attention_v1(
-        out,
-        query,
-        key_cache,
-        value_cache,
-        num_kv_heads,
-        scale,
-        block_tables,
-        seq_lens,
-        block_size,
-        max_seq_len,
-        alibi_slopes,
-        kv_cache_dtype,
-        k_scale,
-        v_scale,
-        tp_rank,
-        blocksparse_local_blocks,
-        blocksparse_vert_stride,
-        blocksparse_block_size,
-        blocksparse_head_sliding_step,
-    )
-def paged_attention_v2(
-    out: torch.Tensor,
-    exp_sum: torch.Tensor,
-    max_logits: torch.Tensor,
-    tmp_out: torch.Tensor,
-    query: torch.Tensor,
-    key_cache: torch.Tensor,
-    value_cache: torch.Tensor,
-    num_kv_heads: int,
-    scale: float,
-    block_tables: torch.Tensor,
-    seq_lens: torch.Tensor,
-    block_size: int,
-    max_seq_len: int,
-    alibi_slopes: Optional[torch.Tensor],
-    kv_cache_dtype: str,
-    k_scale: float,
-    v_scale: float,
-    tp_rank: int = 0,
-    blocksparse_local_blocks: int = 0,
-    blocksparse_vert_stride: int = 0,
-    blocksparse_block_size: int = 64,
-    blocksparse_head_sliding_step: int = 0,
-) -> None:
-    ops.paged_attention_v2(
-        out,
-        exp_sum,
-        max_logits,
-        tmp_out,
-        query,
-        key_cache,
-        value_cache,
-        num_kv_heads,
-        scale,
-        block_tables,
-        seq_lens,
-        block_size,
-        max_seq_len,
-        alibi_slopes,
-        kv_cache_dtype,
-        k_scale,
-        v_scale,
-        tp_rank,
-        blocksparse_local_blocks,
-        blocksparse_vert_stride,
-        blocksparse_block_size,
-        blocksparse_head_sliding_step,
-    )
-def reshape_and_cache(
-    key: torch.Tensor,
-    value: torch.Tensor,
-    key_cache: torch.Tensor,
-    value_cache: torch.Tensor,
-    slot_mapping: torch.Tensor,
-    kv_cache_dtype: str,
-    k_scale: float,
-    v_scale: float,
-) -> None:
-    ops.reshape_and_cache(
-        key,
-        value,
-        key_cache,
-        value_cache,
-        slot_mapping,
-        kv_cache_dtype,
-        k_scale,
-        v_scale,
-    )
-def reshape_and_cache_flash(
-    key: torch.Tensor,
-    value: torch.Tensor,
-    key_cache: torch.Tensor,
-    value_cache: torch.Tensor,
-    slot_mapping: torch.Tensor,
-    kv_cache_dtype: str,
-    k_scale: torch.Tensor,
-    v_scale: torch.Tensor,
-) -> None:
-    ops.reshape_and_cache_flash(
-        key,
-        value,
-        key_cache,
-        value_cache,
-        slot_mapping,
-        kv_cache_dtype,
-        k_scale,
-        v_scale,
-    )
-def copy_blocks(
-    key_caches: List[torch.Tensor],
-    value_caches: List[torch.Tensor],
-    block_mapping: torch.Tensor,
-) -> None:
-    ops.copy_blocks(key_caches, value_caches, block_mapping)
-def swap_blocks(
-    src: torch.Tensor, dst: torch.Tensor, block_mapping: torch.Tensor
-) -> None:
-    ops.swap_blocks(src, dst, block_mapping)
-def convert_fp8(
-    output: torch.Tensor, input: torch.Tensor, scale: float = 1.0, kv_dtype: str = "fp8"
-) -> None:
-    ops.convert_fp8(output, input, scale, kv_dtype)
-__all__ = [
-    "convert_fp8",
-    "paged_attention_v1",
-    "paged_attention_v2",
-    "reshape_and_cache",
-    "copy_blocks",
-]

build/torch27-metal-arm64-darwin/kernels_paged_attention_metal/_ops.py DELETED Viewed

@@ -1,9 +0,0 @@
-import torch
-from . import _paged_attention_2ee8d65
-ops = torch.ops._paged_attention_2ee8d65
-def add_op_namespace_prefix(op_name: str):
-    """
-    Prefix op by namespace.
-    """
-    return f"_paged_attention_2ee8d65::{op_name}"

build/torch27-metal-arm64-darwin/kernels_paged_attention_metal/_paged_attention_2ee8d65.abi3.so DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b732f1413d0dcaf27b13b139d95f7aa17aefc238baac9a7b26e2ba461ef69de8
-size 214800

build/torch27-metal-arm64-darwin/kernels_paged_attention_metal/_paged_attention_2ee8d65.metallib DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c46eaf21c96da70c5227b2566308a8ef73ae09abf303278f40070dd4326ba0be
-size 4999876

build/torch27-metal-arm64-darwin/kernels_paged_attention_metal/platforms.py DELETED Viewed

@@ -1,92 +0,0 @@
-import os
-import random
-from abc import ABC, abstractmethod
-from functools import lru_cache, wraps
-from typing import Callable, ParamSpec, TypeVar
-import numpy as np
-import torch
-IS_ROCM = torch.version.hip is not None
-IS_MPS = torch.backends.mps.is_available()
-class Platform(ABC):
-    @classmethod
-    def seed_everything(cls, seed: int) -> None:
-        """
-        Set the seed of each random module.
-        `torch.manual_seed` will set seed on all devices.
-        Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20
-        """
-        random.seed(seed)
-        np.random.seed(seed)
-        torch.manual_seed(seed)
-    @abstractmethod
-    def get_device_name(self, device_id: int = 0) -> str: ...
-    @abstractmethod
-    def is_cuda(self) -> bool: ...
-    @abstractmethod
-    def is_rocm(self) -> bool: ...
-    @abstractmethod
-    def is_mps(self) -> bool: ...
-class CudaPlatform(Platform):
-    @classmethod
-    @lru_cache(maxsize=8)
-    def get_device_name(cls, device_id: int = 0) -> str:
-        return torch.cuda.get_device_name(0)
-    def is_cuda(self) -> bool:
-        return True
-    def is_rocm(self) -> bool:
-        return False
-    def is_mps(self) -> bool:
-        return False
-class RocmPlatform(Platform):
-    @classmethod
-    @lru_cache(maxsize=8)
-    def get_device_name(cls, device_id: int = 0) -> str:
-        return torch.cuda.get_device_name(device_id)
-    def is_cuda(self) -> bool:
-        return False
-    def is_rocm(self) -> bool:
-        return True
-    def is_mps(self) -> bool:
-        return False
-class MpsPlatform(Platform):
-    @classmethod
-    @lru_cache(maxsize=8)
-    def get_device_name(cls, device_id: int = 0) -> str:
-        return torch.cuda.get_device_name(device_id)
-    def is_cuda(self) -> bool:
-        return False
-    def is_rocm(self) -> bool:
-        return False
-    def is_mps(self) -> bool:
-        return True
-current_platform = (
-    RocmPlatform() if IS_ROCM else
-    MpsPlatform() if IS_MPS else
-    CudaPlatform() if torch.cuda.is_available() else
-    None
-)

flake.lock DELETED Viewed

@@ -1,168 +0,0 @@
-{
-  "nodes": {
-    "flake-compat": {
-      "locked": {
-        "lastModified": 1747046372,
-        "narHash": "sha256-CIVLLkVgvHYbgI2UpXvIIBJ12HWgX+fjA8Xf8PUmqCY=",
-        "owner": "edolstra",
-        "repo": "flake-compat",
-        "rev": "9100a0f413b0c601e0533d1d94ffd501ce2e7885",
-        "type": "github"
-      },
-      "original": {
-        "owner": "edolstra",
-        "repo": "flake-compat",
-        "type": "github"
-      }
-    },
-    "flake-compat_2": {
-      "locked": {
-        "lastModified": 1733328505,
-        "narHash": "sha256-NeCCThCEP3eCl2l/+27kNNK7QrwZB1IJCrXfrbv5oqU=",
-        "owner": "edolstra",
-        "repo": "flake-compat",
-        "rev": "ff81ac966bb2cae68946d5ed5fc4994f96d0ffec",
-        "type": "github"
-      },
-      "original": {
-        "owner": "edolstra",
-        "repo": "flake-compat",
-        "type": "github"
-      }
-    },
-    "flake-utils": {
-      "inputs": {
-        "systems": "systems"
-      },
-      "locked": {
-        "lastModified": 1731533236,
-        "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
-        "owner": "numtide",
-        "repo": "flake-utils",
-        "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
-        "type": "github"
-      },
-      "original": {
-        "owner": "numtide",
-        "repo": "flake-utils",
-        "type": "github"
-      }
-    },
-    "flake-utils_2": {
-      "inputs": {
-        "systems": "systems_2"
-      },
-      "locked": {
-        "lastModified": 1731533236,
-        "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
-        "owner": "numtide",
-        "repo": "flake-utils",
-        "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
-        "type": "github"
-      },
-      "original": {
-        "owner": "numtide",
-        "repo": "flake-utils",
-        "type": "github"
-      }
-    },
-    "hf-nix": {
-      "inputs": {
-        "flake-compat": "flake-compat_2",
-        "flake-utils": "flake-utils_2",
-        "nixpkgs": "nixpkgs"
-      },
-      "locked": {
-        "lastModified": 1750234878,
-        "narHash": "sha256-q9DRC9zdpzUf88qqg1qbhP1qgJbE2cMtn8oUmosuyT8=",
-        "owner": "huggingface",
-        "repo": "hf-nix",
-        "rev": "c7132f90763d756da3e77da62e01be0a4546dc57",
-        "type": "github"
-      },
-      "original": {
-        "owner": "huggingface",
-        "repo": "hf-nix",
-        "type": "github"
-      }
-    },
-    "kernel-builder": {
-      "inputs": {
-        "flake-compat": "flake-compat",
-        "flake-utils": "flake-utils",
-        "hf-nix": "hf-nix",
-        "nixpkgs": [
-          "kernel-builder",
-          "hf-nix",
-          "nixpkgs"
-        ]
-      },
-      "locked": {
-        "lastModified": 1750430211,
-        "narHash": "sha256-QEaSxFNjcqzBBB1WVYFBJ0/Uuol2k1kDSpuyoz/Slzc=",
-        "owner": "huggingface",
-        "repo": "kernel-builder",
-        "rev": "3616c38e5c1fc6cc382510eff12b9d54d6797e84",
-        "type": "github"
-      },
-      "original": {
-        "owner": "huggingface",
-        "repo": "kernel-builder",
-        "type": "github"
-      }
-    },
-    "nixpkgs": {
-      "locked": {
-        "lastModified": 1747820358,
-        "narHash": "sha256-fTqsZsUX6M3yeEvgyQvXcbGmT2CaRVyVwsi8eK29Oj4=",
-        "owner": "danieldk",
-        "repo": "nixpkgs",
-        "rev": "d3c1681180717528068082103bf323147de6ab0b",
-        "type": "github"
-      },
-      "original": {
-        "owner": "danieldk",
-        "ref": "cudatoolkit-12.9-kernel-builder",
-        "repo": "nixpkgs",
-        "type": "github"
-      }
-    },
-    "root": {
-      "inputs": {
-        "kernel-builder": "kernel-builder"
-      }
-    },
-    "systems": {
-      "locked": {
-        "lastModified": 1681028828,
-        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
-        "owner": "nix-systems",
-        "repo": "default",
-        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
-        "type": "github"
-      },
-      "original": {
-        "owner": "nix-systems",
-        "repo": "default",
-        "type": "github"
-      }
-    },
-    "systems_2": {
-      "locked": {
-        "lastModified": 1681028828,
-        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
-        "owner": "nix-systems",
-        "repo": "default",
-        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
-        "type": "github"
-      },
-      "original": {
-        "owner": "nix-systems",
-        "repo": "default",
-        "type": "github"
-      }
-    }
-  },
-  "root": "root",
-  "version": 7
-}