Add an option to not use dlpack. (#9304)

qihqi · web-flow · commit 55a75404c9b7 · 2025-06-15T19:08:12.000-07:00
diff --git a/benchmarks/util.py b/benchmarks/util.py
@@ -72,10 +72,10 @@ def is_xla_device_available(devkind, use_xla2: bool = False):
 
 def move_to_device(item, device, torch_xla2: bool = False):
   if torch_xla2:
-    import torch_xla2
+    import torchax
     import jax
     move_to_device_func = lambda t: jax.device_put(
-        torch_xla2.tensor.t2j(t), device)
+        torchax.default_env().t2j_copy(t), device)
   else:
 
     def move_to_device_func(tensor: torch.Tensor) -> torch.Tensor:
diff --git a/torchax/examples/train_llama/utils.py b/torchax/examples/train_llama/utils.py
@@ -200,7 +200,8 @@ def _shard_fsdp_style(self, state_dict, sharding=None):
       sharding = self.x_sharding
 
     def move_one_tensor(x):
-      jval = torchax.tensor.t2j(x)
+      env = torchax.default_env()
+      jval = env.t2j_copy(x)
       return sharded_device_put(jval, sharding)
 
     if isinstance(state_dict, torch.Tensor):
diff --git a/torchax/test/gemma/test_gemma.py b/torchax/test/gemma/test_gemma.py
@@ -71,7 +71,8 @@ def test_gemma(self):
     )
 
     weights, jax_func = torchax.extract_jax(model)
-    inputs_jax = pytree.tree_map_only(torch.Tensor, torchax.tensor.t2j, inputs)
+    env = torchax.default_env()
+    inputs_jax = env.t2j_copy(inputs)
 
     import jax
     print(jax.jit(jax_func)(weights, inputs_jax))
diff --git a/torchax/test/llama/test_llama.py b/torchax/test/llama/test_llama.py
@@ -88,8 +88,8 @@ def make_cache(args, batch_size):
       m_prefill = torch.export.export(m, sample_input_prefill)
 
     weights, mj_prefill = torchax.export.exported_program_to_jax(m_prefill)
-    sample_inputs = pytree.tree_map_only(torch.Tensor, tensor.t2j,
-                                         sample_input_prefill)
+    env = torchax.default_env()
+    sample_inputs = env.t2j_copy(sample_input_prefill)
     print('Prefill', mj_prefill(weights, sample_inputs))
 
     sample_input_decode = (
@@ -103,8 +103,7 @@ def make_cache(args, batch_size):
     with torch.no_grad():
       m_decode = torch.export.export(m, sample_input_decode)
     weights, mj_decode = torchax.export.exported_program_to_jax(m_decode)
-    sample_inputs = pytree.tree_map_only(torch.Tensor, tensor.t2j,
-                                         sample_input_decode)
+    sample_inputs = env.t2j_copy(sample_input_decode)
     print('Decode', mj_decode(weights, sample_inputs))
 
 
diff --git a/torchax/test/moe/moe_test.py b/torchax/test/moe/moe_test.py
@@ -48,7 +48,7 @@ def test_moe_layer(self):
     x_xla = env.to_xla(x)
     with jax.default_matmul_precision('float32'):
       res_xla = model_xla(x_xla)
-    res2 = torchax.tensor.j2t(res_xla._elem)
+    res2 = res_xla.to('cpu')
     print('max diff', torch.max((res - res2).abs()))
 
     self.assertTrue(torch.allclose(res2, res, atol=1e-2))
diff --git a/torchax/test/test_context.py b/torchax/test/test_context.py
@@ -47,8 +47,7 @@ def test_same_manual_seed(self):
       y = torch.randn((3, 3))
       self.assertIsInstance(y, tensor.Tensor)
 
-    self.assertTrue(
-        torch.equal(torchax.tensor.j2t(x._elem), torchax.tensor.j2t(y._elem)))
+      self.assertTrue(torch.allclose(x, y))
 
   def test_different_manual_seed(self):
     with xla_env:
@@ -60,36 +59,30 @@ def test_different_manual_seed(self):
       y = torch.randn((3, 3))
       self.assertIsInstance(y, tensor.Tensor)
 
-    self.assertFalse(
-        torch.equal(torchax.tensor.j2t(x._elem), torchax.tensor.j2t(y._elem)))
+      self.assertFalse(torch.allclose(x, y))
 
   def test_jit_with_rng(self):
 
-    @xla_env
-    def random_op():
-      x = torch.randn(3, 3)
-      y = torch.randn(3, 3)
-      return x @ y
+    with xla_env:
+
+      def random_op():
+        x = torch.randn(3, 3)
+        y = torch.randn(3, 3)
+        return x @ y
 
-    random_jit = torchax.interop.jax_jit(random_op)
-    self.assertIsInstance(random_jit(), tensor.Tensor)
+      random_jit = torchax.interop.jax_jit(random_op)
+      self.assertIsInstance(random_jit(), tensor.Tensor)
 
-    # Result always expected to be the same for a jitted function because seeds
-    # are baked in
-    torch.testing.assert_close(
-        torchax.tensor.j2t(random_jit()._elem),
-        torchax.tensor.j2t(random_jit()._elem),
-        atol=0,
-        rtol=0)
+      # Result always expected to be the same for a jitted function because seeds
+      # are baked in
+      torch.testing.assert_close(random_jit(), random_jit(), atol=0, rtol=0)
 
   def test_generator_seed(self):
     with xla_env:
       x = torch.randn(2, 3, generator=torch.Generator().manual_seed(0))
       y = torch.randn(2, 3, generator=torch.Generator().manual_seed(0))
 
-    # Values will be different, but still check device, layout, dtype, etc
-    torch.testing.assert_close(
-        torchax.tensor.j2t(x._elem), torchax.tensor.j2t(y._elem))
+      torch.testing.assert_close(x, y)
 
   def test_buffer(self):
 
diff --git a/torchax/test/test_conv.py b/torchax/test/test_conv.py
@@ -55,24 +55,26 @@ def forward(self, x):
 class ConvTest(base_test_util.TestCase):
 
   def test_conv1(self):
+    env = torchax.default_env()
     m = CustomConv1()
     arg = torch.randn((20, 1, 50))
     res = m(arg)
 
     jax_weights, jax_func = torchax.extract_jax(m)
-    arg = torchax.tensor.t2j(arg)
+    arg = env.t2j_copy(arg)
     res2 = jax_func(jax_weights, (arg,))
-    res2_torch = torchax.tensor.j2t(res2)
+    res2_torch = env.j2t_copy(res2)
     self.assertTrue(torch.allclose(res, res2_torch))
 
   def test_conv2(self):
+    env = torchax.default_env()
     m = CustomConv2()
     arg = torch.randn((20, 4, 50, 100))
     res = m(arg)
     jax_weights, jax_func = torchax.extract_jax(m)
-    arg = torchax.tensor.t2j(arg)
+    arg = env.t2j_copy(arg)
     res2 = jax_func(jax_weights, (arg,))
-    res2_torch = torchax.tensor.j2t(res2)
+    res2_torch = env.j2t_copy(res2)
     self.assertTrue(torch.allclose(res, res2_torch, atol=1e-4, rtol=1e-4))
 
 
diff --git a/torchax/test/test_exports.py b/torchax/test/test_exports.py
@@ -43,12 +43,14 @@ def test_interpolate(self):
     model = Interpolate()
     ans = model(*arg)
 
+    env = torchax.default_env()
+
     with torch.no_grad():
       exported = torch.export.export(model, arg)
     weights, func = torchax.export.exported_program_to_jax(exported)
-    argj = tensor.t2j(arg[0])
+    argj = env.t2j_copy(arg[0])
     ans2 = jax.jit(func)(weights, (argj,))[0]
-    ans2 = tensor.j2t(ans2)
+    ans2 = env.j2t_copy(ans2)
     self.assertTrue(torch.allclose(ans, ans2, atol=1e-3))
 
     # Convert to StableHLO
@@ -67,11 +69,11 @@ def test_constant(self):
 
     with torch.no_grad():
       exported = torch.export.export(model, arg)
-
+    env = torchax.default_env()
     weights, func = torchax.export.exported_program_to_jax(exported)
-    argj = tensor.t2j(arg[0])
+    argj = env.t2j_copy(arg[0])
     ans2 = jax.jit(func)(weights, (argj,))[0]
-    ans2 = tensor.j2t(ans2)
+    ans2 = env.j2t_copy(ans2)
     self.assertTrue(torch.allclose(ans, ans2, atol=1e-5))
 
     # Convert to StableHLO
diff --git a/torchax/test/test_functions.py b/torchax/test/test_functions.py
@@ -40,7 +40,7 @@ def test_tensor_constructor(self, arg, kwargs=None):
     actual = torch.tensor(arg, device='jax', **kwargs)
     self.assertIsInstance(actual, torchax.tensor.Tensor)
 
-    torch.testing.assert_close(torchax.tensor.j2t(actual._elem), expected)
+    torch.testing.assert_close(actual.to('cpu'), expected)
 
   def test_dont_capture_conversion(self):
     t = torch.tensor([1, 2, 3])
@@ -86,7 +86,7 @@ def test_rms_norm(self):
       model.to('jax')
       x = x.to('jax')
       res2 = model(x)
-      self.assertTrue(torch.allclose(res, torchax.tensor.j2t(res2.jax())))
+      self.assertTrue(torch.allclose(res, res2.to('cpu')))
 
   def test_randn_requires_grad(self):
     x = torch.randn((3, 3), requires_grad=True, device='jax')
diff --git a/torchax/test/test_image.py b/torchax/test/test_image.py
@@ -10,14 +10,6 @@
 import torchax.interop
 
 
-def to_xla_tensor(tensorstree):
-  return torchax.interop.torch_view(torchax.tensor.t2j(tensorstree))
-
-
-def to_torch_tensor(tensorstree):
-  return torchax.tensor.j2t(torchax.interop.jax_view(tensorstree))
-
-
 @partial(jax.jit, static_argnums=(1, 2, 3, 4))
 def upsample_jit(tensor, output_size: Tuple[int, int], align_corners: bool,
                  antialias: bool, method: str):
@@ -53,8 +45,9 @@ def test_resampling_combinations_bicubic(self, antialias, align_corners):
         align_corners=align_corners,
         antialias=antialias)
 
-    with torchax.default_env():
-      input_tensor_xla = to_xla_tensor(input_tensor)
+    env = torchax.default_env()
+    with env:
+      input_tensor_xla = env.to_xla(input_tensor)
       input_tensor_xla = torchax.interop.jax_view(input_tensor_xla)
       upsampled_tensor_xla = upsample_jit(
           input_tensor_xla,
@@ -63,7 +56,7 @@ def test_resampling_combinations_bicubic(self, antialias, align_corners):
           antialias=antialias,
           method=method)
 
-    upsampled_tensor_xla = to_torch_tensor(upsampled_tensor_xla)
+    upsampled_tensor_xla = env.j2t_copy(upsampled_tensor_xla)
     abs_err = torch.abs(upsampled_tensor - upsampled_tensor_xla)
 
     assert torch.allclose(
diff --git a/torchax/torchax/__init__.py b/torchax/torchax/__init__.py
@@ -43,9 +43,10 @@ def extract_jax(mod: torch.nn.Module, env=None):
   """Returns a pytree of jax.ndarray and a jax callable."""
   if env is None:
     env = default_env()
-  states = mod.state_dict()
+  states = dict(mod.named_buffers())
+  states.update(mod.named_parameters())
 
-  states = pytree.tree_map_only(torch.Tensor, tensor.t2j, states)
+  states = env.t2j_copy(states)
 
   #@jax.jit
   def jax_func(states, inputs):
diff --git a/torchax/torchax/config.py b/torchax/torchax/config.py
@@ -13,6 +13,9 @@ class Configuration:
   # If true, we will convert Views into torchax.Tensors eagerly
   force_materialize_views: bool = False
 
+  # Use DLPack for converting jax.Arrays <-> and torch.Tensor
+  use_dlpack_for_data_conversion: bool = False
+
   # Flash attention
   use_tpu_flash_attention: bool = False
   shmap_flash_attention: bool = False
diff --git a/torchax/torchax/export.py b/torchax/torchax/export.py
@@ -4,8 +4,9 @@
 from typing import Any, Dict, Tuple
 import torch
 from torch.utils import _pytree as pytree
+import torchax
 from torchax import tensor
-from torchax.ops import ops_registry
+from torchax.ops import ops_registry, mappings
 from torchax import decompositions
 import jax
 import jax.export
@@ -108,8 +109,8 @@ def func(states, inputs):
 
   if export_raw:
     return names, states, func
-
-  states = pytree.tree_map_only(torch.Tensor, tensor.t2j, states)
+  env = torchax.default_env()
+  states = env.t2j_copy(states)
   return states, func
 
 
@@ -135,7 +136,7 @@ def _get_dim(d):
 
     tensor_meta = arg_meta['tensor_meta']
     shape = [_get_dim(d) for d in tensor_meta.shape]
-    return jax.ShapeDtypeStruct(shape, tensor.t2j_dtype(tensor_meta.dtype))
+    return jax.ShapeDtypeStruct(shape, mappings.t2j_dtype(tensor_meta.dtype))
 
   def _get_inputs(exported):
     """Return placeholders with input metadata"""
diff --git a/torchax/torchax/ops/mappings.py b/torchax/torchax/ops/mappings.py
@@ -7,7 +7,7 @@
 import torch.utils._mode_utils as mode_utils
 
 
-def t2j(t):
+def t2j(t, use_dlpack=True):
   is_bool = False
   if t.dtype == torch.bool:
     is_bool = True
@@ -18,9 +18,14 @@ def t2j(t):
   if not t.is_contiguous():
     t = t.contiguous()
 
-  try:
-    res = jaxdl.from_dlpack(t)
-  except Exception:
+  res = None
+  if use_dlpack:
+    try:
+      res = jaxdl.from_dlpack(t)
+    except Exception:
+      pass
+
+  if res is None:
     # https://github.com/google/jax/issues/7657
     # https://github.com/google/jax/issues/17784
     if t.dtype == torch.bfloat16:
@@ -37,15 +42,29 @@ def t2j(t):
   return res
 
 
-def j2t(x):
+def j2t(x, use_dlpack=True):
   with mode_utils.no_dispatch(), torch._C.DisableTorchFunction():
-    try:
-      dl = jaxdl.to_dlpack(x)
-      res = torchdl.from_dlpack(dl)
-    except Exception:
+    res = None
+    if use_dlpack:
+      try:
+        dl = jaxdl.to_dlpack(x)
+        res = torchdl.from_dlpack(dl)
+      except Exception:
+        res = None
+
+    orig_dtype = None
+    if res is None:
+      orig_dtype = None
+      if x.dtype == jnp.bfloat16.dtype:
+        orig_dtype = x.dtype
+        x = x.astype(jnp.float32.dtype)
       res = torch.from_numpy(numpy.asarray(x))
+
     if x.dtype == jnp.bool_:
       res = res.to(torch.bool)
+
+    if orig_dtype is not None:
+      res = res.to(j2t_dtype(orig_dtype))
     return res
 
 
diff --git a/torchax/torchax/tensor.py b/torchax/torchax/tensor.py

Original file line number	Diff line number	Diff line change
`@@ -71,7 +71,8 @@ def test_gemma(self):`
`71`	`71`	`)`
`72`	`72`
`73`	`73`	`weights, jax_func = torchax.extract_jax(model)`
`74`		`- inputs_jax = pytree.tree_map_only(torch.Tensor, torchax.tensor.t2j, inputs)`
	`74`	`+ env = torchax.default_env()`
	`75`	`+ inputs_jax = env.t2j_copy(inputs)`
`75`	`76`
`76`	`77`	`import jax`
`77`	`78`	`print(jax.jit(jax_func)(weights, inputs_jax))`