diff --git a/compiler/CMakeLists.txt b/compiler/CMakeLists.txt index 7c026fec551..7017409a5e4 100644 --- a/compiler/CMakeLists.txt +++ b/compiler/CMakeLists.txt @@ -159,14 +159,10 @@ if(THEROCK_ENABLE_COMPILER) # version script, avoiding symbol interposition issues. ############################################################################## + # comgr provides the rewrite API only; the HSA tool (libhsa-hotswap.so) is the hsa-hotswap subproject. set(_comgr_hotswap_cmake_args) if(THEROCK_ENABLE_HOTSWAP) list(APPEND _comgr_hotswap_cmake_args -DCOMGR_ENABLE_HOTSWAP_TRANSPILE=ON) - if(NOT WIN32) - list(APPEND _comgr_hotswap_cmake_args - -DHOTSWAP_BUILD_TOOL=ON - "-DHOTSWAP_TOOL_HSA_INCLUDE_ROOT=${THEROCK_ROCM_SYSTEMS_SOURCE_DIR}/projects/rocr-runtime/runtime/hsa-runtime") - endif() endif() therock_cmake_subproject_declare(amd-comgr diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 32c799a68cc..1aeebb1c49a 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -143,6 +143,38 @@ if(THEROCK_ENABLE_CORE_RUNTIME) LIB_NAMES libhsa-runtime64.so ) + ############################################################################## + # hsa-hotswap + # HSA_TOOLS_LIB tool (libhsa-hotswap.so) from rocm-systems projects/hotswap. + ############################################################################## + + if(THEROCK_ENABLE_HOTSWAP AND NOT WIN32) + therock_cmake_subproject_declare(hsa-hotswap + USE_DIST_AMDGPU_TARGETS + EXTERNAL_SOURCE_DIR "${THEROCK_ROCM_SYSTEMS_SOURCE_DIR}/projects/hotswap" + BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/hsa-hotswap" + BACKGROUND_BUILD + COMPILER_TOOLCHAIN + "${_system_toolchain}" + BUILD_DEPS + rocm-cmake + RUNTIME_DEPS + amd-comgr + ROCR-Runtime + INTERFACE_LINK_DIRS + "lib" + INTERFACE_INSTALL_RPATH_DIRS + "lib" + ) + therock_cmake_subproject_glob_c_sources(hsa-hotswap SUBDIRS .) + therock_cmake_subproject_activate(hsa-hotswap) + + therock_test_validate_shared_lib( + PATH hsa-hotswap/dist/lib + LIB_NAMES libhsa-hotswap.so + ) + endif() + ############################################################################## # rocminfo ############################################################################## @@ -174,6 +206,10 @@ if(THEROCK_ENABLE_CORE_RUNTIME) run ) + if(THEROCK_ENABLE_HOTSWAP AND NOT WIN32) + list(APPEND _core_runtime_subproject_deps hsa-hotswap) + endif() + if(THEROCK_FLAG_INCLUDE_HRX) ############################################################################ # HRX @@ -632,3 +668,63 @@ if(THEROCK_BUILD_TESTING AND THEROCK_ENABLE_CORE_RUNTIME_TESTS) rocrtst ) endif(THEROCK_BUILD_TESTING AND THEROCK_ENABLE_CORE_RUNTIME_TESTS) + +if(THEROCK_BUILD_TESTING AND THEROCK_ENABLE_CORE_KFDTESTS) + + # kfdtest statically links libhsakmt and LLVM (AMDGPUAsmParser, Core, Support). + # It needs LLVM headers at build time and libhsakmt.a for linking. + set(_kfdtest_build_deps + amd-llvm + ROCR-Runtime + therock-yaml-cpp + ) + + # Get the ROCR-Runtime binary directory to locate libhsakmt + get_target_property(_rocr_binary_dir ROCR-Runtime THEROCK_BINARY_DIR) + + therock_cmake_subproject_declare(kfdtest + USE_TEST_AMDGPU_TARGETS + EXTERNAL_SOURCE_DIR "${THEROCK_ROCM_SYSTEMS_SOURCE_DIR}/projects/rocr-runtime/libhsakmt/tests/kfdtest" + BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/kfdtest" + BACKGROUND_BUILD + CMAKE_ARGS + "-DCMAKE_PREFIX_PATH=" + "-DLLVM_DIR=" + "-DROCM_DIR=" + "-DLIBHSAKMT_PATH=${_rocr_binary_dir}/libhsakmt" + "-DCMAKE_EXE_LINKER_FLAGS=-ldl" + "-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON" + COMPILER_TOOLCHAIN + "${_system_toolchain}" + BUILD_DEPS + ${_kfdtest_build_deps} + RUNTIME_DEPS + amd-llvm + ${THEROCK_BUNDLED_LIBDRM} + ${THEROCK_BUNDLED_NUMACTL} + ${THEROCK_BUNDLED_ZLIB} + ${THEROCK_BUNDLED_ZSTD} + INTERFACE_LINK_DIRS + "lib" + "lib/rocm_sysdeps/lib" + INTERFACE_INSTALL_RPATH_DIRS + "lib" + "lib/rocm_sysdeps/lib" + ) + therock_cmake_subproject_glob_c_sources(kfdtest SUBDIRS .) + therock_cmake_subproject_activate(kfdtest) + + therock_provide_artifact(kfdtest + TARGET_NEUTRAL + DESCRIPTOR artifact-core-kfdtest.toml + COMPONENTS + dbg + dev + doc + lib + run + test + SUBPROJECT_DEPS + kfdtest + ) +endif(THEROCK_BUILD_TESTING AND THEROCK_ENABLE_CORE_KFDTESTS) diff --git a/core/artifact-core-runtime.toml b/core/artifact-core-runtime.toml index c536923f91e..0a7d82cf25a 100644 --- a/core/artifact-core-runtime.toml +++ b/core/artifact-core-runtime.toml @@ -10,6 +10,18 @@ [components.dbg."core/rocminfo/stage"] [components.doc."core/rocminfo/stage"] +# hsa-hotswap (only built when THEROCK_ENABLE_HOTSWAP) +[components.lib."core/hsa-hotswap/stage"] +optional = true +[components.run."core/hsa-hotswap/stage"] +optional = true +[components.dbg."core/hsa-hotswap/stage"] +optional = true +[components.dev."core/hsa-hotswap/stage"] +optional = true +[components.doc."core/hsa-hotswap/stage"] +optional = true + # hrx [components.lib."core/hrx/stage"] optional = true diff --git a/rocm-systems b/rocm-systems index 72822631d42..75469b9ff38 160000 --- a/rocm-systems +++ b/rocm-systems @@ -1 +1 @@ -Subproject commit 72822631d427f33ab85fdb7f54689b2092df6413 +Subproject commit 75469b9ff3806866805649b75884aae8cbbfee22 diff --git a/tests/test_rocm_sanity.py b/tests/test_rocm_sanity.py index bb3f9d74feb..bfcaa9d87f5 100644 --- a/tests/test_rocm_sanity.py +++ b/tests/test_rocm_sanity.py @@ -163,3 +163,75 @@ def test_rocm_agent_enumerator(self): return_code = process.returncode check.equal(return_code, 0) check.is_true(output) + + # The hotswap HSA tool (libhsa-hotswap.so) is Linux-only. + @pytest.mark.skipif(is_windows(), reason="hotswap HSA tool is Linux-only") + # TODO(#3312): rocminfo currently fails under ASAN builds. + @pytest.mark.skipif( + is_asan(), reason="rocminfo test fails with ASAN build, see TheRock#3312" + ) + def test_hotswap_tool_loads(self): + """When hotswap is enabled, the HSA tool must load cleanly under ROCr. + + THEROCK_ENABLE_HOTSWAP builds comgr with the hotswap transpiler, so + libamd_comgr.so exports ``amd_comgr_hotswap_rewrite``; that symbol is a + reliable signal that hotswap was enabled in this build. When it is, the + HSA_TOOLS_LIB tool ``libhsa-hotswap.so`` must be packaged and must load + cleanly. Running rocminfo triggers hsa_init, which is when ROCr dlopen's + HSA_TOOLS_LIB tools. The forwarding allowlist is gfx1250->gfx1250 only, + so the tool stays inert on other targets and rocminfo must still succeed. + + Skipped when hotswap is not enabled in the build. + """ + lib_dir = THEROCK_BIN_DIR.parent / "lib" + # libamd_comgr.so may only be present versioned (e.g. libamd_comgr.so.3.3.0) + # in the lib component; the unversioned symlink can live in the dev package. + comgr_libs = sorted(lib_dir.glob("libamd_comgr.so*")) + if not comgr_libs: + pytest.skip(f"libamd_comgr.so* not found in {lib_dir}") + comgr = comgr_libs[0] + + # comgr exports amd_comgr_hotswap_rewrite only when hotswap is enabled. + nm = subprocess.run( + ["nm", "-D", "--defined-only", str(comgr)], + capture_output=True, + text=True, + ) + if nm.returncode != 0: + pytest.skip("could not inspect libamd_comgr.so symbols (nm unavailable)") + if "amd_comgr_hotswap_rewrite" not in nm.stdout: + pytest.skip( + "hotswap not enabled in this build " + "(libamd_comgr.so does not export amd_comgr_hotswap_rewrite)" + ) + + # Hotswap is enabled -> the HSA tool must be packaged. + tool = lib_dir / "libhsa-hotswap.so" + assert tool.exists(), ( + "hotswap is enabled (libamd_comgr.so exports amd_comgr_hotswap_rewrite) " + f"but the HSA tool is missing: {tool}" + ) + + # rocminfo triggers hsa_init -> ROCr LoadTools dlopen's the tool. + env = os.environ.copy() + env["HSA_TOOLS_LIB"] = str(tool) + env["HSA_TOOLS_REPORT_LOAD_FAILURE"] = "1" + process = subprocess.run( + [f"{THEROCK_BIN_DIR}/rocminfo"], + capture_output=True, + text=True, + env=env, + ) + combined = process.stdout + process.stderr + logger.info(combined) + # ROCr prints "Tool lib \"...\" failed to load." if the dlopen fails. + check.is_not_in( + "failed to load", + combined, + "ROCr failed to load the hotswap tool via HSA_TOOLS_LIB", + ) + check.equal( + process.returncode, + 0, + "rocminfo failed with the hotswap tool loaded via HSA_TOOLS_LIB", + )