Skip to content

Commit 4f1449d

Browse files
authored
Merge pull request #182 from maciekpac/release/ccl_2021.16.2
Intel(R) oneAPI Collective Communications Library (oneCCL) 2021.16.2
2 parents ca1d314 + a1175bf commit 4f1449d

File tree

10 files changed

+119
-72
lines changed

10 files changed

+119
-72
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,7 @@ endif()
343343

344344
set(CCL_MAJOR_VERSION "2021")
345345
set(CCL_MINOR_VERSION "16")
346-
set(CCL_UPDATE_VERSION "1")
346+
set(CCL_UPDATE_VERSION "2")
347347
set(CCL_PRODUCT_STATUS "Gold")
348348
string(TIMESTAMP CCL_PRODUCT_BUILD_DATE "%Y-%m-%dT %H:%M:%SZ")
349349
get_vcs_properties("git")

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ oneCCL is governed by the [UXL Foundation](http://www.uxlfoundation.org) and is
3131
- [Contribute](#contribute)
3232
- [License](#license)
3333
- [Security Policy](#security-policy)
34-
34+
3535
## Prerequisites
3636

3737
See [System Requirements](https://www.intel.com/content/www/us/en/developer/articles/system-requirements/oneapi-collective-communication-library-system-requirements.html) to learn about hardware and software requirements before getting started with oneCCL.

doc/rst/source/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,5 +139,5 @@ def get_git_last_updated_date():
139139
'extra_footer': '<p align="right"><a href="https://www.intel.com/content/www/us/en/privacy/intel-cookie-notice.html">Cookies</a></p><br><div>Last updated: ' + last_updated + '</div>'
140140
}
141141

142-
html_theme_options["logo"] = {"text": "oneCCL Documentation 2021.16.1"}
142+
html_theme_options["logo"] = {"text": "oneCCL Documentation 2021.16.2"}
143143

doc/rst/source/env-variables.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2148,7 +2148,7 @@ Profiling
21482148
#########
21492149

21502150
CCL_ITT_LEVEL
2151-
*************
2151+
-------------
21522152

21532153
**Syntax**
21542154

@@ -2174,10 +2174,10 @@ CCL_ITT_LEVEL
21742174

21752175
Set this environment variable to specify Intel\ |reg|\ Instrumentation and Tracing Technology (ITT) profiling level.
21762176
Once the environment variable is enabled (`value>0`), it is possible to collect and display profiling
2177-
data for |product_short| using tools such as Intel\ |reg|\ VTune\ |tm|\ Profiler and `Unified Tracing and Profiling Tool <https://github.com/intel/pti-gpu/tree/master/tools/unitrace>`_.
2177+
data for |product_short| using tools such as Intel\ |reg|\ VTune\ |tm|\ Profiler and `Unified Tracing and Profiling Tool<https://github.com/intel/pti-gpu/tree/master/tools/unitrace>_`.
21782178

21792179
CCL_PROFILING_ENABLE
2180-
********************
2180+
---------------------
21812181

21822182
**Syntax**
21832183

include/oneapi/ccl/config.h

Lines changed: 0 additions & 43 deletions
This file was deleted.

src/atl/mpi/atl_mpi_ctx.cpp

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -688,17 +688,11 @@ atl_status_t atl_mpi_ctx::check_impi_env(const atl_attr_t& attr) {
688688

689689
if (!getenv("ONEAPI_ROOT") && !getenv("I_MPI_ROOT")) {
690690
atl_mpi_lib_type_t type = ATL_MPI_LIB_IMPI;
691-
LOG_ERROR("CCL/MPI uses ",
692-
mpi_lib_infos[type].version_prefix_1,
693-
" but neither I_MPI_ROOT nor ONEAPI_ROOT is set. ",
694-
"Please source ",
695-
mpi_lib_infos[type].kind_value,
696-
" version of ",
697-
mpi_lib_infos[type].version_prefix_1,
698-
" (",
699-
mpi_lib_infos[type].min_version_value,
700-
" or higher version).");
701-
return ATL_STATUS_FAILURE;
691+
LOG_INFO("oneCCL MPI network transport layer is using ",
692+
mpi_lib_infos[type].version_prefix_1,
693+
" but $I_MPI_ROOT is not set.",
694+
" Transport variables will be initialized automatically.",
695+
" To override them run `source $I_MPI_ROOT/env/vars.sh`");
702696
}
703697

704698
return ATL_STATUS_SUCCESS;

src/common/api_wrapper/api_wrapper.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,9 @@ void print_error(int error, lib_info_t& info) {
156156
" - path contains invalid characters");
157157
}
158158
else if (error == CCL_LOAD_LB_DLOPEN_ERROR) {
159-
LOG_WARN("could not open the library: ", info.path.c_str(), " - ", dlerror());
159+
// Log as `info`, because in some cases we have to test multiple paths
160+
// and we do not want to write excess information into users screen
161+
LOG_INFO("could not open the library: ", info.path.c_str(), " - ", dlerror());
160162
}
161163
}
162164

src/common/api_wrapper/ofi_api_wrapper.cpp

Lines changed: 94 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,12 @@
1313
See the License for the specific language governing permissions and
1414
limitations under the License.
1515
*/
16+
#include <dlfcn.h>
17+
#include <libgen.h>
1618
#include <sys/stat.h>
1719

1820
#include "common/api_wrapper/api_wrapper.hpp"
21+
#include "common/log/log.hpp"
1922
#include "common/api_wrapper/ofi_api_wrapper.hpp"
2023

2124
namespace ccl {
@@ -64,20 +67,105 @@ std::string get_ofi_lib_path() {
6467
return ofi_lib_path;
6568
}
6669

67-
bool ofi_api_init() {
68-
bool ret = true;
70+
static std::string get_relative_ccl_root_path() {
71+
Dl_info info;
72+
73+
if (dladdr((void*)ccl::get_library_version, &info)) {
74+
char libccl_path[PATH_MAX];
75+
76+
if (realpath(info.dli_fname, libccl_path) != nullptr) {
77+
// We have to use `realpath`, so the dirname will work correctly,
78+
// because if there's any symlink like `..` in the path it will not work
79+
libccl_path[PATH_MAX - 1] = '\0';
80+
81+
// Remove `libccl.so` from the path to get directory like `$CCL_ROOT/lib`
82+
char* libccl_dir = dirname(libccl_path);
83+
// Remove the `lib` from the path the get just the `CCL_ROOT`
84+
char* ccl_root_cstr = dirname(libccl_dir);
85+
86+
auto ccl_root = std::string(ccl_root_cstr);
6987

88+
return ccl_root;
89+
}
90+
}
91+
92+
return {};
93+
}
94+
95+
static bool load_libfabric() {
7096
ofi_lib_info.ops = &ofi_lib_ops;
7197
ofi_lib_info.fn_names = ofi_fn_names;
7298
ofi_lib_info.path = get_ofi_lib_path();
7399

74100
int error = load_library(ofi_lib_info);
75-
if (error != CCL_LOAD_LB_SUCCESS) {
76-
print_error(error, ofi_lib_info);
77-
ret = false;
101+
if (error == CCL_LOAD_LB_SUCCESS) {
102+
return true;
103+
}
104+
105+
print_error(error, ofi_lib_info);
106+
LOG_INFO("Retrying to load libfabric.so using relative path");
107+
108+
auto realtive_root = get_relative_ccl_root_path();
109+
if (realtive_root.empty()) {
110+
return false;
111+
}
112+
113+
// Path up to IMPI 2021.14
114+
ofi_lib_info.path = realtive_root + "/lib/libfabric/libfabric.so";
115+
error = load_library(ofi_lib_info);
116+
if (error == CCL_LOAD_LB_SUCCESS) {
117+
return true;
118+
}
119+
120+
// Path in IMPI 2021.15
121+
ofi_lib_info.path = realtive_root + "/lib/libfabric.so";
122+
error = load_library(ofi_lib_info);
123+
if (error == CCL_LOAD_LB_SUCCESS) {
124+
return true;
125+
}
126+
127+
print_error(error, ofi_lib_info);
128+
return false;
129+
}
130+
131+
static void setup_providers() {
132+
const char* fi_provider_path = getenv("FI_PROVIDER_PATH");
133+
if (fi_provider_path != nullptr) {
134+
LOG_DEBUG("FI_PROVIDER_PATH is already set to: ", fi_provider_path);
135+
return;
136+
}
137+
138+
char libfabric_path[PATH_MAX];
139+
dlinfo(ofi_lib_info.handle, RTLD_DI_ORIGIN, &libfabric_path);
140+
141+
// Add realpath to resolve any symlinks and get the absolute path
142+
char real_libfabric_path[PATH_MAX];
143+
if (!realpath(libfabric_path, real_libfabric_path)) {
144+
LOG_ERROR("Failed to resolve libfabric realpath: ", strerror(errno));
145+
return;
146+
}
147+
148+
std::string primary_path = std::string(real_libfabric_path);
149+
std::string secondary_path = primary_path + "/prov";
150+
151+
// Construct the full provider path with colon separator
152+
std::string full_provider_path = primary_path + ":" + secondary_path;
153+
154+
if (setenv("FI_PROVIDER_PATH", full_provider_path.c_str(), 1) != 0) {
155+
LOG_ERROR("Failed to set FI_PROVIDER_PATH with error: ", strerror(errno));
156+
return;
157+
}
158+
159+
LOG_DEBUG("FI_PROVIDER_PATH set to: ", full_provider_path);
160+
}
161+
162+
bool ofi_api_init() {
163+
if (load_libfabric() == false) {
164+
return false;
78165
}
79166

80-
return ret;
167+
setup_providers();
168+
return true;
81169
}
82170

83171
void ofi_api_fini() {

src/common/env/env.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -663,13 +663,19 @@ void env_data::parse() {
663663
if (ccl_root.empty()) {
664664
// CCL_ROOT and ONEAPI_ROOT are missing
665665
Dl_info info;
666+
666667
if (dladdr((void*)ccl::get_library_version, &info)) {
667668
char libccl_path[PATH_MAX];
668-
std::strncpy(libccl_path, info.dli_fname, PATH_MAX - 1);
669-
libccl_path[PATH_MAX - 1] = '\0';
670669

671-
char* libccl_dir = dirname(libccl_path);
672-
ccl_root = dirname(libccl_dir);
670+
if (realpath(info.dli_fname, libccl_path) != nullptr) {
671+
// We have to use `realpath`, so the dirname will work correctly
672+
libccl_path[PATH_MAX - 1] = '\0';
673+
674+
char* libccl_dir = dirname(libccl_path);
675+
char* ccl_root_cstr = dirname(libccl_dir);
676+
677+
ccl_root = std::string(ccl_root_cstr);
678+
}
673679
}
674680
}
675681

third-party-programs.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
Intel(R) oneAPI Collective Communications Library (oneCCL)
2-
2021.16.1 Third Party Programs File
2+
2021.16.2 Third Party Programs File
33

44
This file is the "third-party-programs.txt" file specified in the associated
55
Intel end user license agreement for the Intel software you are licensing.

0 commit comments

Comments
 (0)