Format splash_attention_kernel_test.py.

sbodenstein · copybara-github · commit 954007cf68ee · 2025-11-24T08:45:31.000-08:00
PiperOrigin-RevId: 834807012
diff --git a/tokamax/_src/ops/experimental/tpu/splash_attention/splash_attention_kernel_test.py b/tokamax/_src/ops/experimental/tpu/splash_attention/splash_attention_kernel_test.py
@@ -216,7 +216,9 @@ def attention_strategy(draw: Draw) -> tuple[int, int, int, int, np.dtype]:
     # tests.
     dtype = np.dtype("float32")
   else:
-    dtype = draw(hps.sampled_from([np.dtype("float32"), np.dtype(jnp.bfloat16)]))
+    dtype = draw(
+        hps.sampled_from([np.dtype("float32"), np.dtype(jnp.bfloat16)])
+    )
   return q_seq_len, kv_seq_len, head_dim_qk, head_dim_v, dtype
 
 
@@ -392,9 +394,17 @@ def test_splash_attention(self, is_mqa, is_segmented, is_dynamic_mask, data):
       use_sinks=(False, True),
   )
   @hp.given(hps.data())
-  def test_splash_attention_fwd(self, is_mqa, is_segmented, is_dynamic_mask,
-                                use_base2_exp, use_max_logit_estimate,
-                                fuse_reciprocal, use_sinks, data):
+  def test_splash_attention_fwd(
+      self,
+      is_mqa,
+      is_segmented,
+      is_dynamic_mask,
+      use_base2_exp,
+      use_max_logit_estimate,
+      fuse_reciprocal,
+      use_sinks,
+      data,
+  ):
     # TODO: Re-enable once dynamic masks are fixed.
     if is_dynamic_mask:
       self.skipTest("Dynamic masks not supported.")
@@ -468,9 +478,7 @@ def test_splash_attention_fwd(self, is_mqa, is_segmented, is_dynamic_mask,
     elif use_max_logit_estimate == "value_2d":
       max_logit_value = max_val * jnp.ones((num_q_heads,), dtype=jnp.bfloat16)
 
-    make_mask_fn = partial(
-        make_mask_fn, config=config, save_residuals=True
-    )
+    make_mask_fn = partial(make_mask_fn, config=config, save_residuals=True)
     attn = make_mask_fn(mask)
     attn_ref = partial(
         splash.attention_reference,
@@ -495,18 +503,21 @@ def test_splash_attention_fwd(self, is_mqa, is_segmented, is_dynamic_mask,
     res_tol = dict(atol=1e-3, rtol=3e-3)
     if use_sinks:
       o_tol = dict(atol=1e-2, rtol=1e-2)
-    elif (use_base2_exp or use_max_logit_estimate is not None
-          or not fuse_reciprocal):
+    elif (
+        use_base2_exp
+        or use_max_logit_estimate is not None
+        or not fuse_reciprocal
+    ):
       o_tol = dict(atol=8e-3, rtol=3e-3)
     else:
       o_tol = dict(atol=4e-3, rtol=3e-3)
 
     self._assert_allclose(o, o_ref, **o_tol)
-    self._assert_allclose(stats["logsumexp"],
-                          stats_ref["logsumexp"], **res_tol)
+    self._assert_allclose(stats["logsumexp"], stats_ref["logsumexp"], **res_tol)
     if use_max_logit_estimate is None:
-      self._assert_allclose(stats["max_logits"],
-                            stats_ref["max_logits"], **res_tol)
+      self._assert_allclose(
+          stats["max_logits"], stats_ref["max_logits"], **res_tol
+      )
 
   @parameterized.product(
       is_mqa=(False, True),
@@ -614,8 +625,14 @@ def test_splash_attention_bwd(
     )
     attn = make_mask_fn(mask)
 
-    o, attn_vjp = jax.vjp(partial(attn, max_logit_value=max_logit_value),
-                          q, k, v, segment_ids, sinks)
+    o, attn_vjp = jax.vjp(
+        partial(attn, max_logit_value=max_logit_value),
+        q,
+        k,
+        v,
+        segment_ids,
+        sinks,
+    )
     q32, k32, v32 = jax.tree.map(lambda x: x.astype(jnp.float32), (q, k, v))
     o_ref, stats_ref = splash.attention_reference(
         q32,
@@ -630,8 +647,11 @@ def test_splash_attention_bwd(
     )
     if use_sinks:
       o_tol = dict(atol=1e-2, rtol=1e-2)
-    elif (use_base2_exp or use_max_logit_estimate is not None
-          or not fuse_reciprocal):
+    elif (
+        use_base2_exp
+        or use_max_logit_estimate is not None
+        or not fuse_reciprocal
+    ):
       o_tol = dict(atol=8e-3, rtol=1e-2)
     else:
       o_tol = dict(atol=4e-3, rtol=3e-3)