Improve tests for mps

Files changed (4) hide show

tests/kernels/conftest.py CHANGED Viewed

@@ -36,7 +36,6 @@ def create_kv_caches_with_random(
     seed: int = 0,
     device: Optional[str] = "cuda",
 ) -> Tuple[List[torch.Tensor], List[torch.Tensor]]:
     if cache_dtype == "fp8" and head_size % 16:
         raise ValueError(
             f"Does not support key cache of type fp8 with head_size {head_size}"

     seed: int = 0,
     device: Optional[str] = "cuda",
 ) -> Tuple[List[torch.Tensor], List[torch.Tensor]]:
     if cache_dtype == "fp8" and head_size % 16:
         raise ValueError(
             f"Does not support key cache of type fp8 with head_size {head_size}"

tests/kernels/test_attention.py CHANGED Viewed

@@ -43,6 +43,7 @@ if current_platform.is_mps():
 else:
     DEVICES = [f"cuda:{i}" for i in range(1 if torch.cuda.device_count() == 1 else 2)]
 def ref_masked_attention(
     query: torch.Tensor,
     key: torch.Tensor,
@@ -232,7 +233,11 @@ def test_paged_attention(
                 64,
                 0,
             ),
-            cond=(head_size == HEAD_SIZES[0] and block_size == BLOCK_SIZES[0] and not device.startswith("mps")),
         )
     elif version in ("v2", "rocm"):
@@ -295,7 +300,11 @@ def test_paged_attention(
                     64,
                     0,
                 ),
-                cond=(head_size == HEAD_SIZES[0] and block_size == BLOCK_SIZES[0] and not device.startswith("mps")),
             )
         else:
@@ -340,7 +349,11 @@ def test_paged_attention(
                     k_scale,
                     v_scale,
                 ),
-                cond=(head_size == HEAD_SIZES[0] and block_size == BLOCK_SIZES[0] and not device.startswith("mps")),
             )
     else:

 else:
     DEVICES = [f"cuda:{i}" for i in range(1 if torch.cuda.device_count() == 1 else 2)]
 def ref_masked_attention(
     query: torch.Tensor,
     key: torch.Tensor,
                 64,
                 0,
             ),
+            cond=(
+                head_size == HEAD_SIZES[0]
+                and block_size == BLOCK_SIZES[0]
+                and not device.startswith("mps")
+            ),
         )
     elif version in ("v2", "rocm"):
                     64,
                     0,
                 ),
+                cond=(
+                    head_size == HEAD_SIZES[0]
+                    and block_size == BLOCK_SIZES[0]
+                    and not device.startswith("mps")
+                ),
             )
         else:
                     k_scale,
                     v_scale,
                 ),
+                cond=(
+                    head_size == HEAD_SIZES[0]
+                    and block_size == BLOCK_SIZES[0]
+                    and not device.startswith("mps")
+                ),
             )
     else:

tests/kernels/test_cache.py CHANGED Viewed

@@ -60,7 +60,9 @@ def test_copy_blocks(
     if kv_cache_dtype == "fp8" and head_size % 16:
         pytest.skip()
     current_platform.seed_everything(seed)
-    torch.set_default_device(device)
     # Generate random block mappings where each source block is mapped to two
     # destination blocks.
     assert 2 * num_mappings <= num_blocks
@@ -144,13 +146,15 @@ def test_reshape_and_cache(
     if kv_cache_dtype == "fp8" and head_size % 16:
         pytest.skip()
     current_platform.seed_everything(seed)
-    torch.set_default_device(device)
     # Create a random slot mapping.
     num_slots = block_size * num_blocks
     slot_mapping_lst = random.sample(range(num_slots), num_tokens)
-    slot_mapping = torch.tensor(slot_mapping_lst, dtype=torch.long)
-    qkv = torch.randn(num_tokens, 3, num_heads, head_size, dtype=dtype)
     _, key, value = qkv.unbind(dim=1)
     # Create the KV caches.
@@ -262,7 +266,9 @@ def test_reshape_and_cache_flash(
     if current_platform.is_mps() and kv_cache_dtype == "fp8":
         pytest.skip("reshape_and_cache_flash doesn't support FP8 on MPS")
     current_platform.seed_everything(seed)
-    torch.set_default_device(device)
     # Create a random slot mapping.
     num_slots = block_size * num_blocks

     if kv_cache_dtype == "fp8" and head_size % 16:
         pytest.skip()
     current_platform.seed_everything(seed)
+    # Don't set MPS as default device to avoid placeholder storage error
+    if not device.startswith("mps"):
+        torch.set_default_device(device)
     # Generate random block mappings where each source block is mapped to two
     # destination blocks.
     assert 2 * num_mappings <= num_blocks
     if kv_cache_dtype == "fp8" and head_size % 16:
         pytest.skip()
     current_platform.seed_everything(seed)
+    # Don't set MPS as default device to avoid placeholder storage error
+    if not device.startswith("mps"):
+        torch.set_default_device(device)
     # Create a random slot mapping.
     num_slots = block_size * num_blocks
     slot_mapping_lst = random.sample(range(num_slots), num_tokens)
+    slot_mapping = torch.tensor(slot_mapping_lst, dtype=torch.long, device=device)
+    qkv = torch.randn(num_tokens, 3, num_heads, head_size, dtype=dtype, device=device)
     _, key, value = qkv.unbind(dim=1)
     # Create the KV caches.
     if current_platform.is_mps() and kv_cache_dtype == "fp8":
         pytest.skip("reshape_and_cache_flash doesn't support FP8 on MPS")
     current_platform.seed_everything(seed)
+    # Don't set MPS as default device to avoid placeholder storage error
+    if not device.startswith("mps"):
+        torch.set_default_device(device)
     # Create a random slot mapping.
     num_slots = block_size * num_blocks

tests/kernels/utils.py CHANGED Viewed

@@ -40,10 +40,18 @@ def fp8_allclose(
     """
     torch._refs._check_close_args(name="torch.allclose", a=a, b=b, rtol=rtol, atol=atol)
     return bool(
         torch.all(
             torch.isclose(
-                a.double(), b.double(), rtol=rtol, atol=atol, equal_nan=equal_nan
             )
         ).item()
     )
@@ -68,25 +76,12 @@ def opcheck(
     *,
     test_utils: Union[str, Sequence[str]] = ALL_OPCHECK_TEST_UTILS,
     raise_exception: bool = True,
-    cond: bool = True
 ) -> Dict[str, str]:
     with unittest.mock.patch("torch.allclose", new=fp8_allclose):
         if not cond:
             return {}
-        # Check if any arguments are on MPS device and skip opcheck if so
-        # as MPS has issues with placeholder storage allocation in opcheck
-        def is_mps_tensor(x):
-            return hasattr(x, 'device') and x.device.type == 'mps'
-        def check_args_for_mps(args):
-            if isinstance(args, (list, tuple)):
-                return any(check_args_for_mps(arg) for arg in args)
-            return is_mps_tensor(args)
-        if check_args_for_mps(args):
-            return {}
         return torch.library.opcheck(
             op, args, kwargs, test_utils=test_utils, raise_exception=raise_exception
         )

     """
     torch._refs._check_close_args(name="torch.allclose", a=a, b=b, rtol=rtol, atol=atol)
+    # MPS doesn't support float64, so use float32 for comparison
+    if a.device.type == "mps" or b.device.type == "mps":
+        a_cmp = a.float()
+        b_cmp = b.float()
+    else:
+        a_cmp = a.double()
+        b_cmp = b.double()
     return bool(
         torch.all(
             torch.isclose(
+                a_cmp, b_cmp, rtol=rtol, atol=atol, equal_nan=equal_nan
             )
         ).item()
     )
     *,
     test_utils: Union[str, Sequence[str]] = ALL_OPCHECK_TEST_UTILS,
     raise_exception: bool = True,
+    cond: bool = True,
 ) -> Dict[str, str]:
     with unittest.mock.patch("torch.allclose", new=fp8_allclose):
         if not cond:
             return {}
         return torch.library.opcheck(
             op, args, kwargs, test_utils=test_utils, raise_exception=raise_exception
         )