google
diff --git a/‎.github/workflows/flax_test.yml‎
Lines changed: 0 additions & 23 deletions b/‎.github/workflows/flax_test.yml‎
Lines changed: 0 additions & 23 deletions
diff --git a/‎docs_nnx/guides/mutable_array.ipynb‎ renamed to ‎docs_nnx/guides/array_ref.ipynb‎
Lines changed: 52 additions & 52 deletions b/‎docs_nnx/guides/mutable_array.ipynb‎ renamed to ‎docs_nnx/guides/array_ref.ipynb‎
Lines changed: 52 additions & 52 deletions
diff --git a/‎docs_nnx/guides/mutable_array.md‎ renamed to ‎docs_nnx/guides/array_ref.md‎
Lines changed: 30 additions & 30 deletions b/‎docs_nnx/guides/mutable_array.md‎ renamed to ‎docs_nnx/guides/array_ref.md‎
Lines changed: 30 additions & 30 deletions
diff --git a/‎docs_nnx/key_concepts.ipynb‎
Lines changed: 1 addition & 1 deletion b/‎docs_nnx/key_concepts.ipynb‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs_nnx/key_concepts.md‎
Lines changed: 1 addition & 1 deletion b/‎docs_nnx/key_concepts.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs_nnx/migrating/nnx_010_to_nnx_011.rst‎
Lines changed: 1 addition & 1 deletion b/‎docs_nnx/migrating/nnx_010_to_nnx_011.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/nnx_toy_examples/10_fsdp_and_optimizer.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/nnx_toy_examples/10_fsdp_and_optimizer.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/nnx_toy_examples/mutable_array_basic.py‎
Lines changed: 9 additions & 15 deletions b/‎examples/nnx_toy_examples/mutable_array_basic.py‎
Lines changed: 9 additions & 15 deletions
diff --git a/‎examples/nnx_toy_examples/mutable_array_demo.py‎
Lines changed: 11 additions & 22 deletions b/‎examples/nnx_toy_examples/mutable_array_demo.py‎
Lines changed: 11 additions & 22 deletions
@@ -79,29 +79,6 @@ jobs:
     - name: Test importing Flax
       run: |
         uv run python -c "import flax"
-  test-mutable-array:
-    name: Run MutableArray tests
-    needs: [pre-commit, commit-count, test-import]
-    runs-on: ubuntu-24.04-16core
-    steps:
-    - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-    - name: Set up Python 3.11
-      id: setup_python
-      uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
-      with:
-        python-version: 3.11
-    - name: Setup uv
-      uses: astral-sh/setup-uv@887a942a15af3a7626099df99e897a18d9e5ab3a # v5.1.0
-      with:
-        version: "0.3.0"
-    - name: Install dependencies
-      run: |
-        uv sync --extra all --extra testing --extra docs
-        uv pip install -U git+https://github.com/jax-ml/jax.git
-    - name: Run MutableArray tests
-      run: |
-        source .venv/bin/activate
-        FLAX_MUTABLE_ARRAY=true pytest tests/nnx/mutable_array_test.py
 
   tests:
     name: Run Tests
 
@@ -8,55 +8,55 @@ jupytext:
     jupytext_version: 1.13.8
 ---
 
-# Mutable Arrays (experimental)
+# Array Refs (experimental)
 
 ```{code-cell} ipython3
+import jax.experimental
 from flax import nnx
 import jax
 import jax.numpy as jnp
-import jax.experimental
 import optax
 ```
 
 ## Basics
 
 +++
 
-### Mutable Arrays 101
+### Array Refs 101
 
 ```{code-cell} ipython3
-m_array = jax.experimental.mutable_array(jnp.array([1, 2, 3]))
+a_ref = nnx.array_ref(jnp.array([1, 2, 3]))
 
 @jax.jit
-def increment(m_array: jax.experimental.MutableArray):  # no return!
-  array: jax.Array = m_array[...]  # access
-  m_array[...] = array + 1         # update
+def increment(a_ref: nnx.ArrayRef):  # no return!
+  array: jax.Array = a_ref[...]  # access
+  a_ref[...] = array + 1         # update
 
-print("[1] =", m_array); increment(m_array); print("[2] =", m_array)
+print("[1] =", a_ref); increment(a_ref); print("[2] =", a_ref)
 ```
 
 ```{code-cell} ipython3
 @jax.jit
 def inc(x):
   x[...] += 1
 
-print(increment.lower(m_array).as_text())
+print(increment.lower(a_ref).as_text())
 ```
 
-### Mutable Variables
+### Variables Refs
 
 ```{code-cell} ipython3
-variable = nnx.Variable(jnp.array([1, 2, 3]), mutable=True)
-print(f"{variable.mutable = }\n")
+variable = nnx.Variable(jnp.array([1, 2, 3]), use_ref=True)
+print(f"{variable.has_ref = }\n")
 
 print("[1] =", variable); increment(variable); print("[2] =", variable)
 ```
 
 ```{code-cell} ipython3
-with nnx.use_mutable_arrays(True):
+with nnx.use_refs(True):
   variable = nnx.Variable(jnp.array([1, 2, 3]))
 
-print(f"{variable.mutable = }")
+print(f"{variable.has_ref = }")
 ```
 
 ### Changing Status
@@ -70,12 +70,12 @@ class Linear(nnx.Module):
   def __call__(self, x):
     return x @ self.kernel + self.bias[None]
 
-model = Linear(1, 3, rngs=nnx.Rngs(0)) # without mutable arrays
-mutable_model = nnx.mutable(model) # convert to mutable arrays
-frozen_model = nnx.freeze(mutable_model) # freeze mutable arrays again
+model = Linear(1, 3, rngs=nnx.Rngs(0)) # without array refs
+refs_model = nnx.to_refs(model) # convert to array refs
+arrays_model = nnx.to_arrays(refs_model) # convert to regular arrays
 
-print("nnx.mutable(model) =", mutable_model)
-print("nnx.freeze(mutable_model) =", frozen_model)
+print("nnx.to_refs(model) =", refs_model)
+print("nnx.to_arrays(refs_model) =", arrays_model)
 ```
 
 ## Examples
@@ -96,7 +96,7 @@ class Block(nnx.Module):
 ### Training Loop
 
 ```{code-cell} ipython3
-with nnx.use_mutable_arrays(True):
+with nnx.use_refs(True):
   model = Block(2, 64, 3, rngs=nnx.Rngs(0))
   optimizer = nnx.Optimizer(model, optax.adam(1e-3), wrt=nnx.Param)
 
@@ -107,7 +107,7 @@ def train_step(model, optimizer, x, y):
     model =  nnx.merge(graphdef, params, nondiff)
     return ((model(x) - y) ** 2).mean()
 
-  loss, grads = jax.value_and_grad(loss_fn)(nnx.freeze(params))  # freeze MutableArrays for jax.grad
+  loss, grads = jax.value_and_grad(loss_fn)(nnx.to_arrays(params))  # freeze ArrayRefs for jax.grad
   optimizer.update(model, grads)
 
   return loss
@@ -122,7 +122,7 @@ train_step(model, optimizer, x=jnp.ones((10, 2)), y=jnp.ones((10, 3)))
 def create_stack(rngs):
   return Block(2, 64, 2, rngs=rngs)
 
-with nnx.use_mutable_arrays(True):
+with nnx.use_refs(True):
   block_stack = create_stack(nnx.Rngs(0).fork(split=8))
 
 def scan_fn(x, block):
@@ -147,17 +147,17 @@ def create_model(rngs):
   return Block(2, 64, 3, rngs=rngs)
 
 try:
-  with nnx.use_mutable_arrays(True):
+  with nnx.use_refs(True):
     model = create_model(nnx.Rngs(0))
 except Exception as e:
   print(f"Error:", e)
 ```
 
 ```{code-cell} ipython3
-with nnx.use_mutable_arrays(False): # <-- disable mutable arrays
+with nnx.use_refs(False): # <-- disable array refs
   model = create_model(nnx.Rngs(0))
 
-model = nnx.mutable(model) # convert to mutable after creation
+model = nnx.to_refs(model) # convert to mutable after creation
 
 print("model.linear =", model.linear)
 ```
@@ -167,7 +167,7 @@ print("model.linear =", model.linear)
 def create_model(rngs):
   return Block(2, 64, 3, rngs=rngs)
 
-with nnx.use_mutable_arrays(True):
+with nnx.use_refs(True):
   model = create_model(nnx.Rngs(0))
 
 print("model.linear =", model.linear)
@@ -182,7 +182,7 @@ def get_error(f, *args):
   except Exception as e:
     return f"{type(e).__name__}: {e}"
   
-x = jax.experimental.mutable_array(jnp.array(0))
+x = nnx.array_ref(jnp.array(0))
 
 @jax.jit
 def f(a, b):
@@ -192,12 +192,12 @@ print(get_error(f, x, x))
 ```
 
 ```{code-cell} ipython3
-class SharedVariables(nnx.Object):
+class SharedVariables(nnx.Pytree):
   def __init__(self):
     self.a = nnx.Variable(jnp.array(0))
     self.b = self.a
 
-class SharedModules(nnx.Object):
+class SharedModules(nnx.Pytree):
   def __init__(self):
     self.a = Linear(1, 1, rngs=nnx.Rngs(0))
     self.b = self.a
@@ -206,7 +206,7 @@ class SharedModules(nnx.Object):
 def g(pytree):
   ...
 
-with nnx.use_mutable_arrays(True):
+with nnx.use_refs(True):
   shared_variables = SharedVariables()
   shared_modules = SharedModules()
 
 
@@ -146,7 +146,7 @@
       " [ 0.6772455   0.2807398 ]\n",
       " [ 0.16276604  0.16813846]\n",
       " [ 0.310975   -0.43336964]]\n",
-      "treedef = PyTreeDef(CustomNode(Linear[(('_object__state', 'bias', 'kernel'), (('_object__nodes', frozenset({'kernel', '_object__state', 'bias'})), ('bias_init', <function zeros at 0x117826700>), ('dot_general', <function dot_general at 0x1172aa480>), ('dtype', None), ('in_features', 4), ('kernel_init', <function variance_scaling.<locals>.init at 0x120f45260>), ('out_features', 2), ('param_dtype', <class 'jax.numpy.float32'>), ('precision', None), ('promote_dtype', <function promote_dtype at 0x120f45440>), ('use_bias', True)))], [CustomNode(ObjectState[(False, False)], []), CustomNode(Param[()], [*]), CustomNode(Param[()], [*])]))\n"
+      "treedef = PyTreeDef(CustomNode(Linear[(('_pytree__state', 'bias', 'kernel'), (('_object__nodes', frozenset({'kernel', '_pytree__state', 'bias'})), ('bias_init', <function zeros at 0x117826700>), ('dot_general', <function dot_general at 0x1172aa480>), ('dtype', None), ('in_features', 4), ('kernel_init', <function variance_scaling.<locals>.init at 0x120f45260>), ('out_features', 2), ('param_dtype', <class 'jax.numpy.float32'>), ('precision', None), ('promote_dtype', <function promote_dtype at 0x120f45440>), ('use_bias', True)))], [CustomNode(ObjectState[(False, False)], []), CustomNode(Param[()], [*]), CustomNode(Param[()], [*])]))\n"
      ]
     }
    ],
 
@@ -103,7 +103,7 @@ linear = jax.tree.unflatten(treedef, [value for _, value in arrays])
      [ 0.6772455   0.2807398 ]
      [ 0.16276604  0.16813846]
      [ 0.310975   -0.43336964]]
-    treedef = PyTreeDef(CustomNode(Linear[(('_object__state', 'bias', 'kernel'), (('_object__nodes', frozenset({'kernel', '_object__state', 'bias'})), ('bias_init', <function zeros at 0x117826700>), ('dot_general', <function dot_general at 0x1172aa480>), ('dtype', None), ('in_features', 4), ('kernel_init', <function variance_scaling.<locals>.init at 0x120f45260>), ('out_features', 2), ('param_dtype', <class 'jax.numpy.float32'>), ('precision', None), ('promote_dtype', <function promote_dtype at 0x120f45440>), ('use_bias', True)))], [CustomNode(ObjectState[(False, False)], []), CustomNode(Param[()], [*]), CustomNode(Param[()], [*])]))
+    treedef = PyTreeDef(CustomNode(Linear[(('_pytree__state', 'bias', 'kernel'), (('_pytree__nodes', frozenset({'kernel', '_pytree__state', 'bias'})), ('bias_init', <function zeros at 0x117826700>), ('dot_general', <function dot_general at 0x1172aa480>), ('dtype', None), ('in_features', 4), ('kernel_init', <function variance_scaling.<locals>.init at 0x120f45260>), ('out_features', 2), ('param_dtype', <class 'jax.numpy.float32'>), ('precision', None), ('promote_dtype', <function promote_dtype at 0x120f45440>), ('use_bias', True)))], [CustomNode(PytreeState[(False, False)], []), CustomNode(Param[()], [*]), CustomNode(Param[()], [*])]))
 
 
 
 
@@ -226,5 +226,5 @@ use the ``is_leaf`` argument to specify that NNX modules and other NNX objects s
   type_names = jax.tree.map(
       lambda x: type(x).__name__,
       modules,
-      is_leaf=lambda x: isinstance(x, nnx.Object)  # <-- specify that NNX objects are leaves
+      is_leaf=lambda x: isinstance(x, nnx.Pytree)  # <-- specify that NNX objects are leaves
   )
@@ -75,7 +75,7 @@ class SGDState(nnx.Variable):
   pass
 
 
-class SGD(nnx.Object):
+class SGD(nnx.Pytree):
   def __init__(self, params: nnx.State, lr, decay=0.9):
     def init_optimizer_state(variable: nnx.Variable):
       return SGDState(
 
@@ -13,14 +13,11 @@
 # limitations under the License.
 
 # %%
-import os
-
-os.environ['FLAX_MUTABLE_ARRAY'] = 'true'
-
 import jax
 import jax.numpy as jnp
 import matplotlib.pyplot as plt
 import numpy as np
+import optax
 
 from flax import nnx
 
@@ -57,24 +54,21 @@ def __call__(self, x):
     self.count[...] += 1
     return self.linear2(jax.nn.relu(self.linear1(x)) * 0.5)
 
-
-model = MLP(din=1, dhidden=32, dout=1, rngs=nnx.Rngs(0))
+with nnx.use_refs(True):
+  model = MLP(din=1, dhidden=32, dout=1, rngs=nnx.Rngs(0))
+  optimizer = nnx.Optimizer(model, optax.sgd(learning_rate=0.1), wrt=nnx.Param)
 
 
 @jax.jit
-def train_step(model, x, y):
-  graphdef, params, counts = nnx.pure(nnx.split(model, nnx.Param, Count))
+def train_step(model, optimizer, x, y):
+  graphdef, params, counts = nnx.split(model, nnx.Param, Count)
 
   def loss_fn(params):
     model = nnx.merge(graphdef, params, counts)
     return jnp.mean((y - model(x)) ** 2)
 
-  grads = jax.grad(loss_fn)(nnx.freeze(params))
-
-  def sgd(w, g):
-    w[...] -= 0.1 * g[...]
-
-  jax.tree.map(sgd, params, grads)
+  grads = jax.grad(loss_fn)(nnx.to_arrays(params))
+  optimizer.update(model, grads)
 
 
 @jax.jit
@@ -84,7 +78,7 @@ def test_step(model: MLP, x, y):
 
 total_steps = 10_000
 for step, (x, y) in enumerate(dataset(32)):
-  train_step(model, x, y)
+  train_step(model, optimizer, x, y)
 
   if step % 1000 == 0:
     logs = test_step(model, X, Y)
 
@@ -40,12 +40,6 @@ def dataset(batch_size):
 # so we use a new Pytree type as the base. The main difference with current NNX is that
 # attributes that contain arrays or other pytrees now need to be explicitly marked as
 # using `nnx.data` to be included in the pytree.
-#
-# Variable changes in a couple of ways:
-# * its now implements the pytree protocol
-# * it can only hold arrays
-# * it has a mutable attribute, when True it will hold a MutableArray
-# * [...] is used to access & mutate underlying array
 class Linear(nnx.Module):
   def __init__(self, din: int, dout: int, *, rngs: nnx.Rngs):
     self.din, self.dout = din, dout
@@ -103,7 +97,7 @@ def __call__(
       mean = jnp.mean(x, axis=0)
       var = jnp.var(x, axis=0)
       # ema updates
-      # stop gradient is used until a MutableArray supports updates from grad tracers
+      # stop gradient is used until a ArrayRef supports updates from grad tracers
       sg = jax.lax.stop_gradient
       self.mean[...] = sg(self.mu * self.mean[...] + (1 - self.mu) * mean)
       self.var[...] = sg(self.mu * self.var[...] + (1 - self.mu) * var)
@@ -131,7 +125,7 @@ def __init__(
     use_scan: bool = True,
     rngs: nnx.Rngs,
   ):
-    self.count: nnx.MutableArray = nnx.mutable_array(jnp.array(0))
+    self.count: nnx.ArrayRef = nnx.array_ref(jnp.array(0))
     self.block_in = Block(din, dhidden, rngs=rngs)
     self.linear_out = Linear(dhidden, dout, rngs=rngs)
 
@@ -142,9 +136,9 @@ def __init__(
 
       @jax.vmap
       def create_block(rngs, /):
-        return nnx.freeze(Block(dhidden, dhidden, rngs=rngs))
+        return nnx.to_arrays(Block(dhidden, dhidden, rngs=rngs))
 
-      self.blocks = nnx.mutable(create_block(rngs.fork(split=num_blocks)))
+      self.blocks = nnx.to_refs(create_block(rngs.fork(split=num_blocks)))
     else:
       self.blocks = [Block(dhidden, dhidden, rngs=rngs) for i in range(num_blocks)]
 
@@ -175,11 +169,11 @@ class OptState(nnx.Variable): ...
 
 
 # Optimizer are an interesting case as they are inherently stateful and
-# pose a good use case for MutableArray. Here we implement SGD with
+# pose a good use case for ArrayRef. Here we implement SGD with
 # momentum. The optimizer receives the params as constructor arguments but doesn't
 # hold a reference to them, it only uses the params to initialize its state
 # by creating new OptState Variables that reuse the param's metadata.
-class SGD(nnx.Object):
+class SGD(nnx.Pytree):
   def __init__(self, params, lr: float, decay: float = 0.9):
     self.lr = lr
     self.decay = decay
@@ -205,29 +199,24 @@ def update(self, params, grads):
     momentum = nnx.pure(self.momentum)
 
     def update_fn(
-      param: nnx.MutableArray, momentum: nnx.MutableArray, grad: jax.Array
+      param: nnx.ArrayRef, momentum: nnx.ArrayRef, grad: jax.Array
     ):
       momentum[...] = self.decay * momentum[...] + (1 - self.decay) * grad[...]
       param[...] -= self.lr * momentum[...]
 
     jax.tree.map(update_fn, params, momentum, grads)
 
 # ## Training
-# To setup the training loop we first instantiate the model and optimizer.
-# Variables are immutable (only contain Arrays) by default as it can make
-# initialization easier, however this means we have to use 'mutable' to
-# create the MutableArrays that will be updated during training.
 
-# activate mutable arrays
-with nnx.use_mutable_arrays(True):
+with nnx.use_refs(True):
   rngs = nnx.Rngs(params=0, dropout=1)
   model = Model(
     num_blocks=3, din=1, dhidden=256, dout=1, use_scan=False, rngs=rngs
   )
   optimizer = SGD(params=nnx.state(model, nnx.Param), lr=3e-3, decay=0.99)
 
 # Create a copy of the model structure and set its attributes to eval model.
-# This works because they share the underlying MutableArrays so both models
+# This works because they share the underlying ArrayRefs so both models
 # will always be in sync.
 eval_model = nnx.merge(*nnx.split(model))
 eval_model.set_attributes(use_stats=True, deterministic=True)
@@ -249,8 +238,8 @@ def loss_fn(params):
     return loss
 
   # For the time being we have to use 'freeze' make the Variables immutable
-  # as 'jax.grad' doesn't support MutableArrays yet.
-  grads = jax.grad(loss_fn)(nnx.freeze(params))
+  # as 'jax.grad' doesn't support ArrayRefs yet.
+  grads = jax.grad(loss_fn)(nnx.to_arrays(params))
   # 'update' mutates the optimizer's state and the params in place
   # so we don't need to return anything 🚀
   optimizer.update(params, grads)
Original file line number	Diff line number	Diff line change
`@@ -146,7 +146,7 @@`
`146`	`146`	`" [ 0.6772455 0.2807398 ]\n",`
`147`	`147`	`" [ 0.16276604 0.16813846]\n",`
`148`	`148`	`" [ 0.310975 -0.43336964]]\n",`
`149`		- "treedef = PyTreeDef(CustomNode(Linear[(('_object__state', 'bias', 'kernel'), (('_object__nodes', frozenset({'kernel', '_object__state', 'bias'})), ('bias_init', <function zeros at 0x117826700>), ('dot_general', <function dot_general at 0x1172aa480>), ('dtype', None), ('in_features', 4), ('kernel_init', <function variance_scaling.<locals>.init at 0x120f45260>), ('out_features', 2), ('param_dtype', <class 'jax.numpy.float32'>), ('precision', None), ('promote_dtype', <function promote_dtype at 0x120f45440>), ('use_bias', True)))], [CustomNode(ObjectState[(False, False)], []), CustomNode(Param[()], []), CustomNode(Param[()], [])]))\n"
	`149`	+ "treedef = PyTreeDef(CustomNode(Linear[(('_pytree__state', 'bias', 'kernel'), (('_object__nodes', frozenset({'kernel', '_pytree__state', 'bias'})), ('bias_init', <function zeros at 0x117826700>), ('dot_general', <function dot_general at 0x1172aa480>), ('dtype', None), ('in_features', 4), ('kernel_init', <function variance_scaling.<locals>.init at 0x120f45260>), ('out_features', 2), ('param_dtype', <class 'jax.numpy.float32'>), ('precision', None), ('promote_dtype', <function promote_dtype at 0x120f45440>), ('use_bias', True)))], [CustomNode(ObjectState[(False, False)], []), CustomNode(Param[()], []), CustomNode(Param[()], [])]))\n"
`150`	`150`	`]`
`151`	`151`	`}`
`152`	`152`	`],`
Original file line number	Diff line number	Diff line change
@@ -226,5 +226,5 @@ use the ``is_leaf`` argument to specify that NNX modules and other NNX objects s
`226`	`226`	`type_names = jax.tree.map(`
`227`	`227`	`lambda x: type(x).__name__,`
`228`	`228`	`modules,`
`229`		`- is_leaf=lambda x: isinstance(x, nnx.Object) # <-- specify that NNX objects are leaves`
	`229`	`+ is_leaf=lambda x: isinstance(x, nnx.Pytree) # <-- specify that NNX objects are leaves`
`230`	`230`	`)`