add atleast_{1d, 2d, 3d} ops & fix snippet_phantom_grad_vs_torch_consistency test (#1881)

khushi-411 · pre-commit-ci[bot] · web-flow · commit 2d18b7a07fb5 · 2025-03-31T12:37:53.000-05:00
Co-authored-by: pre-commit-ci[bot] &lt;66853113+pre-commit-ci[bot]@users.noreply.github.com&gt;
diff --git a/thunder/executors/torchex.py b/thunder/executors/torchex.py
@@ -1175,6 +1175,9 @@ def _triu_transform(a: TensorLike, /, diagonal: int = 0, *, fill_value: None | N
 argmax = _register_torch_operation("argmax")
 argmin = _register_torch_operation("argmin")
 topk = _register_torch_operation("topk")
+atleast_1d = _register_torch_operation("atleast_1d")
+atleast_2d = _register_torch_operation("atleast_2d")
+atleast_3d = _register_torch_operation("atleast_3d")
 
 
 #
@@ -1259,6 +1262,9 @@ def _topk_transform(
 _register_implementation(ltorch.argmax, argmax, checker=_always_executable)
 _register_implementation(ltorch.argmin, argmin, checker=_always_executable)
 _register_implementation(ltorch.topk, topk, checker=_always_executable, execution_transform=_topk_transform)
+_register_implementation(ltorch.atleast_1d, atleast_1d, checker=_always_executable)
+_register_implementation(ltorch.atleast_2d, atleast_2d, checker=_always_executable)
+_register_implementation(ltorch.atleast_3d, atleast_3d, checker=_always_executable)
 
 
 #
diff --git a/thunder/tests/opinfos.py b/thunder/tests/opinfos.py
@@ -5356,6 +5356,7 @@ def unsqueeze_sample_generator(op, device, dtype, requires_grad, **kwargs):
 
 unsqueeze_opinfo = OpInfo(
     clang.unsqueeze,
+    supports_grad=True,
     sample_input_generator=unsqueeze_sample_generator,
     jax_reference=jax.lax.expand_dims if JAX_AVAILABLE else None,
     test_directives=(
@@ -6018,6 +6019,53 @@ def topk_error_generator(op, device, **kwargs):
 reduction_ops.append(topk_opinfo)
 
 
+def atleast_1d2d3d_sample_generator(op, device, dtype, requires_grad, **kwargs):
+    make = partial(make_tensor, dtype=dtype, device=device, requires_grad=requires_grad)
+
+    cases = (
+        (),
+        (4,),
+        (5, 5),
+        (6, 7, 8),
+        (3, 3, 3, 3),
+    )
+
+    for c in cases:
+        yield SampleInput(make(c))
+
+    yield SampleInput(make(()), make((2,)))
+    yield SampleInput(make((2,)), make((5, 5)))
+    yield SampleInput(make(()), make((2,)), make((4, 4)))
+    yield SampleInput(make(2, 3), make(4, 5), make(6, 6, 6), make(5, 5, 5, 5))
+
+
+atleast_1d_opinfo = OpInfo(
+    ltorch.atleast_1d,
+    supports_grad=True,
+    sample_input_generator=atleast_1d2d3d_sample_generator,
+    torch_reference=torch.atleast_1d,
+)
+reduction_ops.append(atleast_1d_opinfo)
+
+
+atleast_2d_opinfo = OpInfo(
+    ltorch.atleast_2d,
+    supports_grad=True,
+    sample_input_generator=atleast_1d2d3d_sample_generator,
+    torch_reference=torch.atleast_2d,
+)
+reduction_ops.append(atleast_2d_opinfo)
+
+
+atleast_3d_opinfo = OpInfo(
+    ltorch.atleast_3d,
+    supports_grad=True,
+    sample_input_generator=atleast_1d2d3d_sample_generator,
+    torch_reference=torch.atleast_3d,
+)
+reduction_ops.append(atleast_3d_opinfo)
+
+
 opinfos.extend(reduction_ops)
 
 
diff --git a/thunder/tests/test_grad.py b/thunder/tests/test_grad.py
@@ -1320,7 +1320,12 @@ def is_output_differentiable(x):
         # torch.return_types.topk(
         # values=tensor([1., 1.]),
         # indices=tensor([0, 1]))
-        return x.grad_fn is not None
+        return x.grad_fn is not None or is_returning_self(x)
+
+    def is_returning_self(x):
+        if x.is_leaf and x.requires_grad:
+            return True
+        return False
 
     def filter_differentiable_outputs(outputs):
         if isinstance(outputs, torch.Tensor):
@@ -1380,7 +1385,10 @@ def upcast_tensors(x: Any) -> Any:
     thunder_flat_grads = grad_op(*sample.args, **sample.kwargs)
 
     assert_closer(
-        reference=reference_grad_result, candidate=thunder_flat_grads, competitor=torch_grad_result, comparator=comp
+        reference=reference_grad_result,
+        candidate=thunder_flat_grads,
+        competitor=torch_grad_result,
+        comparator=comp,
     )
 
 
diff --git a/thunder/torch/__init__.py b/thunder/torch/__init__.py
@@ -3005,6 +3005,42 @@ def topk(
     return clang.topk(a, k, dim, largest, sorted, out=out)
 
 
+@torchsymbol(torch.atleast_1d, is_method=True)
+def atleast_1d(*args: Union[TensorLike, Sequence[TensorLike]]) -> Union[TensorLike, tuple[TensorLike, ...]]:
+    res = tuple(a if a.ndim >= 1 else unsqueeze(a, 0) for a in args)
+    return res if len(res) > 1 else res[0]
+
+
+@torchsymbol(torch.atleast_2d, is_method=True)
+def atleast_2d(*args: Union[TensorLike, Sequence[TensorLike]]) -> Union[TensorLike, tuple[TensorLike, ...]]:
+
+    def _unsqueeze_atleast(a):
+        if a.ndim == 0:
+            return a.unsqueeze(0).unsqueeze(1)
+        elif a.ndim == 1:
+            return a.unsqueeze(0)
+        return a
+
+    res = tuple(_unsqueeze_atleast(a) if isinstance(a, TensorProxy) else a for a in args)
+    return res if len(res) > 1 else res[0]
+
+
+@torchsymbol(torch.atleast_3d, is_method=True)
+def atleast_3d(*args: Union[TensorLike, Sequence[TensorLike]]) -> Union[TensorLike, tuple[TensorLike, ...]]:
+
+    def _unsqueeze_atleast(a):
+        if a.ndim == 0:
+            return a.reshape(1, 1, 1)
+        elif a.ndim == 1:
+            return a.reshape(1, -1, 1)
+        elif a.ndim == 2:
+            return a.unsqueeze(-1)
+        return a
+
+    res = tuple(_unsqueeze_atleast(a) if isinstance(a, TensorProxy) else a for a in args)
+    return res if len(res) > 1 else res[0]
+
+
 @torchsymbol(torch.sort, is_method=True)
 def sort(
     a: TensorLike, /, dim: None | int = None, descending: bool = False, stable: bool = False, *, out=None
diff --git a/thunder/torch/default_torch_ops.py b/thunder/torch/default_torch_ops.py
@@ -26,9 +26,6 @@
         torch.arctanh,
         torch.argsort,
         torch.argwhere,
-        torch.atleast_1d,
-        torch.atleast_2d,
-        torch.atleast_3d,
         torch.batch_norm_backward_elemt,
         torch.batch_norm_backward_reduce,
         torch.batch_norm_elemt,