Add out_sharding argument to call methods for standard layers

samanklesaria · samanklesaria · commit 1f61d6b77ea0 · 2025-11-19T13:34:39.000-06:00
diff --git a/flax/nnx/nn/linear.py b/flax/nnx/nn/linear.py
@@ -246,7 +246,7 @@ def bias_init_wrap(rng, shape, dtype):
     else:
       self.bias = nnx.data(None)
 
-  def __call__(self, inputs: Array) -> Array:
+  def __call__(self, inputs: Array, out_sharding = None) -> Array:
     """Applies a linear transformation to the inputs along multiple dimensions.
 
     Args:
@@ -288,7 +288,7 @@ def __call__(self, inputs: Array) -> Array:
     # user custom dot_general/dot_general_cls which may not have
     # preferred_element_type argument to avoid breaking
     # existing code
-    dot_general_kwargs = {}
+    dot_general_kwargs = {'out_sharding': out_sharding}
     if self.preferred_element_type is not None:
       dot_general_kwargs["preferred_element_type"] = self.preferred_element_type
     out = dot_general(
@@ -393,7 +393,7 @@ def __init__(
     self.promote_dtype = promote_dtype
     self.preferred_element_type = preferred_element_type
 
-  def __call__(self, inputs: Array) -> Array:
+  def __call__(self, inputs: Array, out_sharding = None) -> Array:
     """Applies a linear transformation to the inputs along the last dimension.
 
     Args:
@@ -413,6 +413,7 @@ def __call__(self, inputs: Array) -> Array:
     # preferred_element_type argument to avoid breaking
     # existing code
     dot_general_kwargs = {}
+    dot_general_kwargs['out_sharding'] = out_sharding
     if self.preferred_element_type is not None:
       dot_general_kwargs["preferred_element_type"] = self.preferred_element_type
     y = self.dot_general(
@@ -521,7 +522,7 @@ def __init__(
     self.preferred_element_type = preferred_element_type
 
   def __call__(
-    self, inputs: Array, einsum_str: tp.Optional[str] = None
+    self, inputs: Array, einsum_str: tp.Optional[str] = None, out_sharding = None
   ) -> Array:
     """Applies a linear transformation to the inputs along the last dimension.
 
@@ -557,7 +558,7 @@ def __call__(
     # user custom self.einsum_op method which may not have
     # preferred_element_type argument to avoid breaking
     # existing code
-    einsum_op_kwargs = {}
+    einsum_op_kwargs = {'out_sharding': out_sharding}
     if self.preferred_element_type is not None:
       einsum_op_kwargs["preferred_element_type"] = self.preferred_element_type
 
@@ -1141,7 +1142,7 @@ def maybe_broadcast(
       rhs_dilation=kernel_dilation,
       transpose_kernel=self.transpose_kernel,
       precision=self.precision,
-      preferred_element_type=self.preferred_element_type,
+      preferred_element_type=self.preferred_element_type
     )
 
     if self.padding == 'CIRCULAR':
diff --git a/flax/nnx/nn/lora.py b/flax/nnx/nn/lora.py
@@ -209,7 +209,7 @@ def __init__(
       b_metadata=b_metadata,
     )
 
-  def __call__(self, x: jax.Array):
-    y = super().__call__(x)
+  def __call__(self, x: jax.Array, out_sharding = None):
+    y = super().__call__(x, out_sharding=out_sharding)
     y += self.lora(x)
     return y
diff --git a/tests/nnx/spmd_test.py b/tests/nnx/spmd_test.py
@@ -20,7 +20,7 @@
 from flax import nnx
 import jax
 import jax.numpy as jnp
-from jax.sharding import PartitionSpec as P, NamedSharding
+from jax.sharding import PartitionSpec as P, NamedSharding, AxisType, reshard
 import optax
 
 
@@ -211,6 +211,16 @@ def test_eager_sharding_context(self, use_eager_sharding):
         else:
           assert not has_sharding_spec(w)
 
+  def test_out_sharding(self):
+    mesh = jax.make_mesh((2, 2), ("X", "Y"),
+                         axis_types=(AxisType.Explicit, AxisType.Explicit))
+    with jax.set_mesh(mesh):
+      replicated_array = jnp.arange(4).reshape(2, 2)
+      sharded_array = reshard(replicated_array, P("X", None))
+      l = nnx.Linear(2,4, rngs=nnx.Rngs(0))
+      assert 'float32[2@X,4]' in str(jax.typeof(l(sharded_array)))
+      assert 'float32[2@X,4@Y]' in str(jax.typeof(l(sharded_array, out_sharding=P("X", "Y"))))
+
   @parameterized.product(use_hijax=[True, False])
   def test_logical_rules(self, use_hijax):
     self.enter_context(nnx.use_hijax(use_hijax))

Original file line number	Diff line number	Diff line change
`@@ -209,7 +209,7 @@ def __init__(`
`209`	`209`	`b_metadata=b_metadata,`
`210`	`210`	`)`
`211`	`211`
`212`		`- def __call__(self, x: jax.Array):`
`213`		`- y = super().__call__(x)`
	`212`	`+ def __call__(self, x: jax.Array, out_sharding = None):`
	`213`	`+ y = super().__call__(x, out_sharding=out_sharding)`
`214`	`214`	`y += self.lora(x)`
`215`	`215`	`return y`