[Not for landing] piggy back on titan for scale init test

fduwjj · fduwjj · commit f0bf91c558d2 · 2025-02-13T08:51:13.000-08:00
ghstack-source-id: 8852e63 Pull Request resolved: #841
diff --git a/train.py b/train.py
@@ -430,7 +430,16 @@ def loss_fn(pred, labels):
 
 
 if __name__ == "__main__":
-    config = JobConfig()
-    config.parse_args()
-    main(config)
-    torch.distributed.destroy_process_group()
+    # The first one is just for warm up.
+    for root_size in [128, 8, 16, 32, 64, 128, 256]:
+        os.environ["TORCH_NCCL_RANKS_PER_ROOT"] = str(root_size)
+        start = time.perf_counter()
+        torch.distributed.init_process_group(backend="nccl")
+        torch.cuda.set_device(int(os.environ["LOCAL_RANK"]))
+        torch.distributed.barrier()
+        end = time.perf_counter()
+        torch.distributed.destroy_process_group()
+        print(f"Time to init process group: {end - start:.6f} seconds for {root_size} ranks per roots")
+    # config = JobConfig()
+    # config.parse_args()
+    # main(config)