veScale is an internal PyTorch Distributed library, enabling hyperscale distributed training of LLMs and RLs. This repo open-sources a small piece of veScale for a better community.
-
veScale 2025-2026: https://arxiv.org/abs/2602.22437
-
veScale 2023-2025: https://arxiv.org/abs/2509.07003
@misc{wang2026vescalefsdpflexiblehighperformancefsdp,
title={veScale-FSDP: Flexible and High-Performance FSDP at Scale},
author={Zezhou Wang and Youjie Li and Zhiqi Lin and Jiacheng Yang and Cong Xie and Guanyu Feng and Zheng Zhong and Ziyue Huang and Hongyu Zhu and Zhi Zhang and Yanghua Peng and Xin Liu},
year={2026},
eprint={2602.22437},
archivePrefix={arXiv},
primaryClass={cs.DC},
url={https://arxiv.org/abs/2602.22437},
}
@misc{li2025vescale,
title={veScale: Consistent and Efficient Tensor Programming with Eager-Mode SPMD},
author={Youjie Li and Cheng Wan and Zhiqi Lin and Hongyu Zhu and Jiacheng Yang and Ziang Song and Xinyi Di and Jiawei Wu and Huiyao Shu and Wenlei Bao and Yanghua Peng and Haibin Lin and Li-Wen Chang},
year={2025},
eprint={2509.07003},
archivePrefix={arXiv},
primaryClass={cs.PL},
url={https://arxiv.org/abs/2509.07003},
}The veScale Project is under the Apache License v2.0.