@@ -67,7 +67,7 @@ std::string get_ofi_lib_path() {
6767 return ofi_lib_path;
6868}
6969
70- std::string get_relative_ofi_lib_path () {
70+ static std::string get_relative_ccl_root_path () {
7171 Dl_info info;
7272
7373 if (dladdr ((void *)ccl::get_library_version, &info)) {
@@ -84,16 +84,15 @@ std::string get_relative_ofi_lib_path() {
8484 char * ccl_root_cstr = dirname (libccl_dir);
8585
8686 auto ccl_root = std::string (ccl_root_cstr);
87- setenv ( " FI_PROVIDER_PATH " , (ccl_root + " /lib/libfabric/prov " ). c_str (), 0 );
88- return ccl_root + " /lib/libfabric/libfabric.so " ;
87+
88+ return ccl_root;
8989 }
9090 }
9191
92- LOG_DEBUG (" Could not fetch relative path to libfabric. Fallback to `libfabric.so`" );
93- return " libfabric.so" ;
92+ return {};
9493}
9594
96- bool ofi_api_init () {
95+ static bool load_libfabric () {
9796 ofi_lib_info.ops = &ofi_lib_ops;
9897 ofi_lib_info.fn_names = ofi_fn_names;
9998 ofi_lib_info.path = get_ofi_lib_path ();
@@ -106,7 +105,20 @@ bool ofi_api_init() {
106105 print_error (error, ofi_lib_info);
107106 LOG_INFO (" Retrying to load libfabric.so using relative path" );
108107
109- ofi_lib_info.path = get_relative_ofi_lib_path ();
108+ auto realtive_root = get_relative_ccl_root_path ();
109+ if (realtive_root.empty ()) {
110+ return false ;
111+ }
112+
113+ // Path up to IMPI 2021.14
114+ ofi_lib_info.path = realtive_root + " /lib/libfabric/libfabric.so" ;
115+ error = load_library (ofi_lib_info);
116+ if (error == CCL_LOAD_LB_SUCCESS) {
117+ return true ;
118+ }
119+
120+ // Path in IMPI 2021.15
121+ ofi_lib_info.path = realtive_root + " /lib/libfabric.so" ;
110122 error = load_library (ofi_lib_info);
111123 if (error == CCL_LOAD_LB_SUCCESS) {
112124 return true ;
@@ -116,6 +128,46 @@ bool ofi_api_init() {
116128 return false ;
117129}
118130
131+ static void setup_providers () {
132+ const char * fi_provider_path = getenv (" FI_PROVIDER_PATH" );
133+ if (fi_provider_path != nullptr ) {
134+ LOG_DEBUG (" FI_PROVIDER_PATH is already set to: " , fi_provider_path);
135+ return ;
136+ }
137+
138+ char libfabric_path[PATH_MAX];
139+ dlinfo (ofi_lib_info.handle , RTLD_DI_ORIGIN, &libfabric_path);
140+
141+ // Add realpath to resolve any symlinks and get the absolute path
142+ char real_libfabric_path[PATH_MAX];
143+ if (!realpath (libfabric_path, real_libfabric_path)) {
144+ LOG_ERROR (" Failed to resolve libfabric realpath: " , strerror (errno));
145+ return ;
146+ }
147+
148+ std::string primary_path = std::string (real_libfabric_path);
149+ std::string secondary_path = primary_path + " /prov" ;
150+
151+ // Construct the full provider path with colon separator
152+ std::string full_provider_path = primary_path + " :" + secondary_path;
153+
154+ if (setenv (" FI_PROVIDER_PATH" , full_provider_path.c_str (), 1 ) != 0 ) {
155+ LOG_ERROR (" Failed to set FI_PROVIDER_PATH with error: " , strerror (errno));
156+ return ;
157+ }
158+
159+ LOG_DEBUG (" FI_PROVIDER_PATH set to: " , full_provider_path);
160+ }
161+
162+ bool ofi_api_init () {
163+ if (load_libfabric () == false ) {
164+ return false ;
165+ }
166+
167+ setup_providers ();
168+ return true ;
169+ }
170+
119171void ofi_api_fini () {
120172 LOG_DEBUG (" close OFI lib: handle: " , ofi_lib_info.handle );
121173 close_library (ofi_lib_info);
0 commit comments