@@ -2537,18 +2537,19 @@ def div_sample_generator(op, device, dtype, requires_grad, **kwargs):
2537
2537
torch_reference = torch .div ,
2538
2538
test_directives = (
2539
2539
# NOTE: PyTorch doesn't support boolean division
2540
- # TODO: fix dtype mismatch when using nvfuser executors
2541
2540
DecorateInfo (
2542
2541
pytest .mark .xfail ,
2543
2542
"test_core_vs_torch_consistency" ,
2544
2543
dtypes = (datatypes .bool8 ,),
2545
2544
devicetypes = (devices .DeviceType .CPU , devices .DeviceType .CUDA ),
2546
2545
),
2546
+ # NOTE: bfloat16 and float16 is skipped
2547
+ # See: https://github.com/Lightning-AI/lightning-thunder/issues/1724
2547
2548
DecorateInfo (
2548
2549
pytest .mark .xfail ,
2549
2550
"test_core_vs_torch_consistency" ,
2550
2551
executors = ("nvfuser" ,),
2551
- dtypes = (datatypes .bool8 , datatypes .bfloat16 , datatypes .float16 , datatypes . float32 ),
2552
+ dtypes = (datatypes .bool8 , datatypes .bfloat16 , datatypes .float16 ),
2552
2553
),
2553
2554
DecorateInfo (pytest .mark .xfail , "test_vjp_correctness" ),
2554
2555
),
@@ -2718,6 +2719,17 @@ def where_sample_generator(op, device, dtype, requires_grad, **kwargs):
2718
2719
pred , a , b = make (pred_shape , dtype = torch .bool , requires_grad = False ), make (a_shape ), make (b_shape )
2719
2720
yield SampleInput (pred , a , b )
2720
2721
2722
+ # NOTE: requires_grad needs tensor inputs on non-pred.
2723
+ if not requires_grad :
2724
+ # generate scalar inputs
2725
+ dtypes = [float , int , bool , complex ]
2726
+
2727
+ for dtype in dtypes :
2728
+ pred = make ([2 , 3 ], dtype = torch .bool , requires_grad = False )
2729
+ a = dtype (1.0 )
2730
+ b = dtype (0.0 )
2731
+ yield SampleInput (pred , a , b )
2732
+
2721
2733
2722
2734
def where_error_generator (op , device , dtype = torch .float32 , ** kwargs ):
2723
2735
make = partial (make_tensor , device = device , dtype = dtype )
0 commit comments