diff --git a/AUTHORS b/AUTHORS index 6b33898018b..05537efd8df 100644 --- a/AUTHORS +++ b/AUTHORS @@ -10,6 +10,7 @@ Alex McKinley Alexey Novikov Alexia Ingerson alexia.ingerson +Ali Mohammed Amir Shehata Amir Shehata Amith Abraham @@ -65,6 +66,7 @@ Chris Taylor Chuck Fossen Chuck Fossen Chuck Fossen +Cody D'Ambrosio Cody Mann Coni Gehler ct-clmsn @@ -177,8 +179,10 @@ Ken Raffenetti Kevan Rehm Kevan rehm Kevin Ji <1146876+kevinji@users.noreply.github.com> +Kevin Ji kseager Kyle Gerheiser +Kyle Kim Latchesar Ionkov Leena Radeke Lindsay Reiser @@ -210,8 +214,11 @@ mmubarak Mohan Gandhi muttormark Mélian Fruchart +Nathan Na Neil Spruit Nicholas Sielicki +Nick Mazzilli +Nicolas Morey Nicolas Morey-Chaisemartin Nikhil Nanal nikhil nanal @@ -220,6 +227,7 @@ nikhilnanal nikhilnanal Nikita Gusev Nikola Dancejic +Nitin Bhat Noam Beer Oblomov, Sergey Oblomov, Sergey @@ -292,6 +300,7 @@ Steven Dashevsky Steven Vormwald Sung-Eun Choi Sung-Eun Choi +Sunita Nadampalli Sylvain Didelot Sylvain Didelot szegel diff --git a/Makefile.am b/Makefile.am index bab64020630..33cf721d218 100644 --- a/Makefile.am +++ b/Makefile.am @@ -235,7 +235,7 @@ src_libfabric_la_LIBADD = src_libfabric_la_DEPENDENCIES = libfabric.map if !EMBEDDED -src_libfabric_la_LDFLAGS += -version-info 30:0:29 +src_libfabric_la_LDFLAGS += -version-info 31:0:30 endif src_libfabric_la_LDFLAGS += -export-dynamic \ $(libfabric_version_script) diff --git a/NEWS.md b/NEWS.md index e15855f27a8..d515f44a5e8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,6 +6,250 @@ bug fixes (and other actions) for each version of Libfabric since version 1.0. New major releases include all fixes from minor releases with earlier release dates. +v2.4.0, Mon December 15, 2025 +============================= + +## Core ## + +- hmem/cuda: Adding more robust libgdrapi libpaths +- Update bindings/rust/README.md to reflect the recommended build process. +- Update build.rs to support both cargo build & cargo publish work directories. +- Update Cargo.toml in preparation for crates.io publishing. +- configure: Fix sanitizer detection logic +- Introduce a lightweight Rust bindings for Libfabric, using bindgen. +- include/ofi_indexer: introduce new ofi_array_at_max function +- man/fi_cxi: fixup info for FI_CXI_RDZV_GET_MIN +- man/fi_getinfo: Update the capabilities with mode bits requirements +- man/fi_cq: Document `FI_GETWAITOBJ` for `fi_control` +- man/fi_fabric: Update `fi_tostr()` datatypes + +## CXI ## + +- Add domain rx match mode override +- Set rendezvous eager size default to 2K +- Change cuda dmabuf default to enabled +- Do not abort if MR match count do not reconcile +- Allow CP for triggered CQ to remap to Best Effort +- Fix sl-driver path for testing +- Set max domain TX CQs to 14 +- Use cxil_alloc_trig_cp to distinguish trig and tx cmdqs +- Add FI_EBUSY debug messages +- Fix validation of service id +- Fix criterion test_sw tap files +- Cxip_cmdq_cp_modify fix +- Revert check cmdq_ack_counter before selecting cmdq +- Fix RNR protocol send byte/error counting +- Release TX credit when pending RNR retry +- Update rocr test fine grained flags +- Check cmdq_ack_counter before selecting cmdq +- Fix DEVICE in fi_info_test +- Introduce non-debug tracing +- Reset timer on rx of ARM packet +- Fix performance issue with close_mc() +- Increase vni range in auth_key tests +- Bump up FI_VERSION to 2.3 +- Support auth_key ranges +- Fix use of hw_cps and memory leak + +## EFA ## + +- Fix cq data size in efa-rdm pkt post +- fix test_efa_rdm_mr_reg_cuda_memory unit test +- adjust the memory barrier positions +- Optimize RTW packet sending by replacing efa_rdm_ope_post_send +- Adjust logging level for txe releases +- Add tracepoints for handshake +- Add flags to MR logs +- Grow efa_tx_pkt_pool and ope_pool during rdm ep creation +- Do not use rdma write when unsolicited recv support is inconsistent +- Determine whether using device rdma based on p2p +- Introduce pke generation counter for protocol path +- Enable data path direct for efa-rdm +- Update the function signature for efa_data_path_direct_cq_initialize +- Move efa_cq_open_ibv_cq to efa_cq.c +- Do not track rx pkt pool for non-debug build +- Temporarily disable FI_OPT_EFA_SENDRECV_IN_ORDER_ALIGNED_128_BYTES support for efa protocol +- do not ignore local read completion +- Add missing lttng tps in efa_post_send +- Fix the remote cq data flags for zcpy recv +- Optimize the WQE post in data path direct +- fix typos in error messages +- Only show help message for OPE warn logs +- configure: replace no-brake space with regular space character +- Remove unused function declarations +- Acquire CQ's `ep_list_lock` during counter progress +- Add asserts to detect erroneous CQE dereferences +- Ignore rma completion to a removed peer +- Remove the incorrect check for device max_msg_size +- Fix function signature mismatch +- Set FI_RX_CQ_DATA for efa direct with NULL hints +- Do not fail fi_getinfo for the wrong fabric +- Log warnings only for internal OPE failures or if CQ error entry not written +- Add unit tests for LRU AH eviction +- Evict AH with no explicit AV entries when AH limit reached +- Add locking assertions and update unit tests +- Remove efa_conn_release unsafe +- Require FI_RX_CQ_DATA on devices without unsolicited write recv +- Add LLTng tracepoints for direct data path operations +- Don't warn users about non-EFA devices +- Support FI_RX_CQ_DATA for efa-direct +- Fix deadlocks in AV insert/remove/close and CQ read paths +- Don't try to release a lock that is not taken +- set RUNPATH if custom rdma-core provided +- Remove rx_msg_flags from efa_rdm_msg_recv/efa_rdm_msg_recvv +- Update tracepoints in the receive path +- Slide recv-win on RTM/RTA error +- Insert read and write packets to tx debug list + +## LNX ## + +- remove force setting DEVICE_ONLY flag +- set core hints proto to UNSPEC +- remove iov count failures +- add wait object implementation + +## PSM3 ## + +- Fix integer overflow in psm3_ips_scbctrl_init() + +## RXD ## + +- Allow dg_addr to be 0 + +## SHM ## + +- properly disable xpmem for FI_HMEM + +## UCX ## + +- Use safe dlist_foreach during unmap +- Fix multi-threading support +- Fix completion flags for RMA operations +- Return base_addr in fi_mr_raw_attr() +- Fix completion entries generation for msg operations. +- Fix ops structs initialization +- Change ucx from FI_AV_MAP to FI_AV_TABLE + +## Util ## + +- Handle signal interrupts in uffd memory monitor +- Use kdreg2 instead of memhooks as default when available + +## Fabtests ## + +- cq_data: Fix the error propagation +- multi_ep: fix fi_info usage +- Add OOB finalize sync option +- Add av_lookup_good test +- pytest: Increase message size step for 0-1MB range +- bandwidth_rma: Add server-client sync during RMA writedata warmup +- bandwidth_rma: Wait for all pending completions after warmup +- efa: Wait for send completion from each endpoint +- efa: Use oob_port in multi_ep_stress +- efa: Fixes to multi_ep_mt test +- Disable FI_RX_CQ_DATA correctly +- getinfo: Remove test_caps_regression +- efa: Add multi_ep_stress test +- rdm_bw_mt: Fix performance timer in uni_bandwidth function +- efa: Implement fallback logic for remote fi_info paht +- efa: support testing different rx cq data mode +- Support toggling rx cq data mode +- Add lnx ubertest file +- Add lnx exclude file +- efa: Add AV removal to multi_ep_mt test +- Fix build issues for MacOS +- efa: cuda dmabuf validation logic +- cuda dmabuf validation logic +- Change default AV to AV_TABLE + + +v2.3.1, Mon October 20, 2025 +============================ + +## Core ## + +- include/windows: Add definition for realpath() + +## EFA ## + +- Tune recvwindow and peer reorder buffer pool sizes +- Allocate peer reorder buffers from a pre-allocated pool +- Allocate peer map entry pool during the rdm ep create +- Create shm info on demand for efa_rdm_ep_open +- Check whether to enable shm inside efa_shm_info_create +- Ring doorbell when reaching the max batch wqe cnt +- Fix the queued ope progress in ep close +- Logging improvements and unit test fix +- Grab srx lock when releasing rx entries in AV close +- Remove duplicate macro for rdma sge limit +- Fix memory leak from efa_srx_unexp_pkt unit test +- Fix unit test build error for rdma-core without HAVE_CAPS_UNSOLICITED_WRITE_RECV +- Check rdma iov limit in data path direct +- Add packet entry flag tracking for double linked list management +- Remove the extra dlist remove for peers in ep close +- Add traces for completions bypassing util-cq +- Update shared domain caps and modes +- Move domain check inside the fabric lock +- Take domain lock to protect concurrent access to domain fields +- Return matching fabric/domain in fi_getinfo +- Remove duplicated efa_cq_construct_cq_entry +- Fix the memleak for efa_rdm_peer +- Introduce error status for ope +- Cleanup the queue flags after dlist_remove +- Allocate cq err_buf on demand +- Bypass util cq in the data path of efa-direct +- Fix the unsolicited write recv support in unit-test +- Add unit-tests for lock types +- Shift the src_addr correctly in sreadfrom +- Acquire the same lock for qp lifecycle +- Ignore stale cqes from destroyed qps +- Use IBV_QUERY_QP_DATA_IN_ORDER_DEVICE_ONLY flag when available + +## OPX ## + +- 0 byte write_data Error +- 16B 3-nodes seg fault with openmpi +- Atomics and bounce buffer fixes +- Only open IPC cache if HMEM initialized and IPC enabled +- Fix ROCR IPC build errors +- Use getpid() instead of gettid() for POSIX compliancy. +- Fix compilation error + +## RXD ## + +- Fix return of EAGAIN when not enough tx entries + +## UCX ## + +- Use noop callback in ucx_inject_write +- Add a noop callback function for inject send path + +## UDP ## + +- Cleanup CQ parameters +- Fix CQ overflow case + +## Util ## + +- Remove unmatched rx entry from unspec unexpected queue +- Add status enum to util rx entry +- Extract helper function to lookup existing fabric/domain +- Use hints name for fabric/domain lookup + +## Verbs ## + +- Return PCI bus information with fi_getinfo() +- Fix compiler warning on the bound of 'strncpy' call + +## Fabtests ## + +- efa: Run FI_MORE test with more iterations +- efa: Add implicit AV test +- Split OOB address exchange function +- efa: Fix the av operation +- efa: Improve rnr_read_cq_error test + + v2.3.0, Mon September 15, 2025 ============================== diff --git a/configure.ac b/configure.ac index daf56cfa4a8..f5ec31eee3f 100644 --- a/configure.ac +++ b/configure.ac @@ -10,7 +10,7 @@ dnl dnl Process this file with autoconf to produce a configure script. AC_PREREQ([2.60]) -AC_INIT([libfabric], [2.4.0a1], [ofiwg@lists.openfabrics.org]) +AC_INIT([libfabric], [2.4.0rc1], [ofiwg@lists.openfabrics.org]) AC_CONFIG_SRCDIR([src/fabric.c]) AC_CONFIG_AUX_DIR(config) AC_CONFIG_MACRO_DIR(config) diff --git a/fabtests/configure.ac b/fabtests/configure.ac index d05bcb5790d..7014836f52f 100644 --- a/fabtests/configure.ac +++ b/fabtests/configure.ac @@ -5,7 +5,7 @@ dnl dnl Process this file with autoconf to produce a configure script. AC_PREREQ(2.57) -AC_INIT([fabtests], [2.4.0a1], [ofiwg@lists.openfabrics.org]) +AC_INIT([fabtests], [2.4.0rc1], [ofiwg@lists.openfabrics.org]) AC_CONFIG_AUX_DIR(config) AC_CONFIG_MACRO_DIR(config) AC_CONFIG_HEADERS(config.h) diff --git a/include/rdma/fabric.h b/include/rdma/fabric.h index b1324ad8d82..f2c47178084 100644 --- a/include/rdma/fabric.h +++ b/include/rdma/fabric.h @@ -73,7 +73,7 @@ extern "C" { #endif #define FI_MAJOR_VERSION 2 -#define FI_MINOR_VERSION 3 +#define FI_MINOR_VERSION 4 #define FI_REVISION_VERSION 0 /* Removing these breaks the build for some apps. diff --git a/include/windows/config.h b/include/windows/config.h index 4888196900a..7729f4d2c4f 100644 --- a/include/windows/config.h +++ b/include/windows/config.h @@ -262,7 +262,7 @@ #define PACKAGE_TARNAME PACKAGE /* Define to the version of this package. */ -#define PACKAGE_VERSION "2.4.0a1" +#define PACKAGE_VERSION "2.4.0rc1" /* Define to the full name and version of this package. */ #define PACKAGE_STRING PACKAGE_NAME " " PACKAGE_VERSION