Lightning-AI · IvanYashchuk · Feb 7, 2025 · Feb 7, 2025 · Mar 4, 2025 · Mar 7, 2025
@@ -56,6 +56,7 @@
     OrderedSet,
     ProxyDict,
 )
+from thunder.core.codeutils import is_literal
 import thunder.clang as clang
 from thunder.clang import (
     empty,
@@ -2329,6 +2330,8 @@ def iter_bound_symbols(bound_symbols):
     for symbol in bound_symbols:
         if symbol.sym.id in trace_interpreter_skip_list:
             continue
+        elif all(is_literal(sym_out) for sym_out in symbol.flat_outs):
+            continue
         elif symbol.output is None:
             continue
         else:
@@ -2626,12 +2629,7 @@ def vjp_impl_const(symbol, *args, **kwargs):
     def _vjp_impl(*args, **kwargs):
         primals, kwargs = tree_map(lambda x: x.primal if isinstance(x, VJPDual) else x, (args, kwargs))
         out_primal, out_residuals = vjp_impl(*primals, **kwargs)
-        # We are saving the residuals and pullback only in the first output
-        # backward_pass then retrieves the residuals and pullback from the first output
-        if isinstance(out_primal, Sequence):
-            return (VJPDual(out_primal[0], out_residuals), *(VJPDual(o, tuple()) for o in out_primal[1:]))
-
-        return (VJPDual(out_primal, out_residuals),)
+        return tree_map(lambda x: VJPDual(x, out_residuals), sequencify(out_primal))
 
     return _vjp_impl
 

@@ -1939,6 +1939,26 @@ def func(x):
     torch.testing.assert_close(actual_gr, expected_gr)
 
 
+def test_unused_first_output():
+    def forward(x):
+        _, x_2 = torch.split(x, 2)
+        return x_2
+
+    jforward = thunder.jit(forward)
+
+    x = make_tensor([4, 2], dtype=torch.bfloat16, device="cpu", requires_grad=True)
+
+    actual = jforward(x)
+    expected = forward(x)
+    torch.testing.assert_close(actual, expected)
+
+    grad_o = torch.randn_like(actual)
+
+    actual_grad = torch.autograd.grad(actual, x, grad_o)
+    expected_grad = torch.autograd.grad(expected, x, grad_o)
+    torch.testing.assert_close(actual_grad, expected_grad)
+
+
 @pytest.mark.parametrize("device", ("cuda", "cpu"))
 def test_backward_recomputation_decomposed_ops(device):
     if device == "cuda" and not torch.cuda.is_available():