From 5921768dd1a164e790ca17c29b8999373e03b812 Mon Sep 17 00:00:00 2001 From: Jacob Lambert Date: Mon, 29 Jun 2026 10:54:29 -0700 Subject: [PATCH 1/2] feat(hotswap): build, package, and sanity-check libhsa-hotswap.so Integrate the relocated hotswap HSA tool. The HSA_TOOLS_LIB tool moved from comgr (libamd_comgr_hotswap_tool.so, removed by ROCm/llvm-project#3007) to rocm-systems projects/hotswap (libhsa-hotswap.so); comgr keeps only the amd_comgr_hotswap_rewrite API. - compiler/CMakeLists.txt: drop the removed HOTSWAP_BUILD_TOOL args; keep COMGR_ENABLE_HOTSWAP_TRANSPILE. - core/CMakeLists.txt + core/artifact-core-runtime.toml: declare the hsa-hotswap subproject (rocm-systems projects/hotswap; deps amd-comgr + ROCR-Runtime) and package libhsa-hotswap.so into the core-runtime artifact. - tests/test_rocm_sanity.py: add test_hotswap_tool_loads. When hotswap is enabled (libamd_comgr.so exports amd_comgr_hotswap_rewrite), libhsa-hotswap.so must be packaged and load cleanly under ROCr (rocminfo triggers hsa_init -> ROCr dlopens HSA_TOOLS_LIB tools). The allowlist is gfx1250->gfx1250 only, so the tool stays inert on other targets and rocminfo still succeeds. Skips when hotswap is disabled. --- compiler/CMakeLists.txt | 6 +-- core/CMakeLists.txt | 96 +++++++++++++++++++++++++++++++++ core/artifact-core-runtime.toml | 12 +++++ tests/test_rocm_sanity.py | 72 +++++++++++++++++++++++++ 4 files changed, 181 insertions(+), 5 deletions(-) diff --git a/compiler/CMakeLists.txt b/compiler/CMakeLists.txt index 7c026fec551..7017409a5e4 100644 --- a/compiler/CMakeLists.txt +++ b/compiler/CMakeLists.txt @@ -159,14 +159,10 @@ if(THEROCK_ENABLE_COMPILER) # version script, avoiding symbol interposition issues. ############################################################################## + # comgr provides the rewrite API only; the HSA tool (libhsa-hotswap.so) is the hsa-hotswap subproject. set(_comgr_hotswap_cmake_args) if(THEROCK_ENABLE_HOTSWAP) list(APPEND _comgr_hotswap_cmake_args -DCOMGR_ENABLE_HOTSWAP_TRANSPILE=ON) - if(NOT WIN32) - list(APPEND _comgr_hotswap_cmake_args - -DHOTSWAP_BUILD_TOOL=ON - "-DHOTSWAP_TOOL_HSA_INCLUDE_ROOT=${THEROCK_ROCM_SYSTEMS_SOURCE_DIR}/projects/rocr-runtime/runtime/hsa-runtime") - endif() endif() therock_cmake_subproject_declare(amd-comgr diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 32c799a68cc..1aeebb1c49a 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -143,6 +143,38 @@ if(THEROCK_ENABLE_CORE_RUNTIME) LIB_NAMES libhsa-runtime64.so ) + ############################################################################## + # hsa-hotswap + # HSA_TOOLS_LIB tool (libhsa-hotswap.so) from rocm-systems projects/hotswap. + ############################################################################## + + if(THEROCK_ENABLE_HOTSWAP AND NOT WIN32) + therock_cmake_subproject_declare(hsa-hotswap + USE_DIST_AMDGPU_TARGETS + EXTERNAL_SOURCE_DIR "${THEROCK_ROCM_SYSTEMS_SOURCE_DIR}/projects/hotswap" + BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/hsa-hotswap" + BACKGROUND_BUILD + COMPILER_TOOLCHAIN + "${_system_toolchain}" + BUILD_DEPS + rocm-cmake + RUNTIME_DEPS + amd-comgr + ROCR-Runtime + INTERFACE_LINK_DIRS + "lib" + INTERFACE_INSTALL_RPATH_DIRS + "lib" + ) + therock_cmake_subproject_glob_c_sources(hsa-hotswap SUBDIRS .) + therock_cmake_subproject_activate(hsa-hotswap) + + therock_test_validate_shared_lib( + PATH hsa-hotswap/dist/lib + LIB_NAMES libhsa-hotswap.so + ) + endif() + ############################################################################## # rocminfo ############################################################################## @@ -174,6 +206,10 @@ if(THEROCK_ENABLE_CORE_RUNTIME) run ) + if(THEROCK_ENABLE_HOTSWAP AND NOT WIN32) + list(APPEND _core_runtime_subproject_deps hsa-hotswap) + endif() + if(THEROCK_FLAG_INCLUDE_HRX) ############################################################################ # HRX @@ -632,3 +668,63 @@ if(THEROCK_BUILD_TESTING AND THEROCK_ENABLE_CORE_RUNTIME_TESTS) rocrtst ) endif(THEROCK_BUILD_TESTING AND THEROCK_ENABLE_CORE_RUNTIME_TESTS) + +if(THEROCK_BUILD_TESTING AND THEROCK_ENABLE_CORE_KFDTESTS) + + # kfdtest statically links libhsakmt and LLVM (AMDGPUAsmParser, Core, Support). + # It needs LLVM headers at build time and libhsakmt.a for linking. + set(_kfdtest_build_deps + amd-llvm + ROCR-Runtime + therock-yaml-cpp + ) + + # Get the ROCR-Runtime binary directory to locate libhsakmt + get_target_property(_rocr_binary_dir ROCR-Runtime THEROCK_BINARY_DIR) + + therock_cmake_subproject_declare(kfdtest + USE_TEST_AMDGPU_TARGETS + EXTERNAL_SOURCE_DIR "${THEROCK_ROCM_SYSTEMS_SOURCE_DIR}/projects/rocr-runtime/libhsakmt/tests/kfdtest" + BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/kfdtest" + BACKGROUND_BUILD + CMAKE_ARGS + "-DCMAKE_PREFIX_PATH=" + "-DLLVM_DIR=" + "-DROCM_DIR=" + "-DLIBHSAKMT_PATH=${_rocr_binary_dir}/libhsakmt" + "-DCMAKE_EXE_LINKER_FLAGS=-ldl" + "-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON" + COMPILER_TOOLCHAIN + "${_system_toolchain}" + BUILD_DEPS + ${_kfdtest_build_deps} + RUNTIME_DEPS + amd-llvm + ${THEROCK_BUNDLED_LIBDRM} + ${THEROCK_BUNDLED_NUMACTL} + ${THEROCK_BUNDLED_ZLIB} + ${THEROCK_BUNDLED_ZSTD} + INTERFACE_LINK_DIRS + "lib" + "lib/rocm_sysdeps/lib" + INTERFACE_INSTALL_RPATH_DIRS + "lib" + "lib/rocm_sysdeps/lib" + ) + therock_cmake_subproject_glob_c_sources(kfdtest SUBDIRS .) + therock_cmake_subproject_activate(kfdtest) + + therock_provide_artifact(kfdtest + TARGET_NEUTRAL + DESCRIPTOR artifact-core-kfdtest.toml + COMPONENTS + dbg + dev + doc + lib + run + test + SUBPROJECT_DEPS + kfdtest + ) +endif(THEROCK_BUILD_TESTING AND THEROCK_ENABLE_CORE_KFDTESTS) diff --git a/core/artifact-core-runtime.toml b/core/artifact-core-runtime.toml index c536923f91e..0a7d82cf25a 100644 --- a/core/artifact-core-runtime.toml +++ b/core/artifact-core-runtime.toml @@ -10,6 +10,18 @@ [components.dbg."core/rocminfo/stage"] [components.doc."core/rocminfo/stage"] +# hsa-hotswap (only built when THEROCK_ENABLE_HOTSWAP) +[components.lib."core/hsa-hotswap/stage"] +optional = true +[components.run."core/hsa-hotswap/stage"] +optional = true +[components.dbg."core/hsa-hotswap/stage"] +optional = true +[components.dev."core/hsa-hotswap/stage"] +optional = true +[components.doc."core/hsa-hotswap/stage"] +optional = true + # hrx [components.lib."core/hrx/stage"] optional = true diff --git a/tests/test_rocm_sanity.py b/tests/test_rocm_sanity.py index bb3f9d74feb..bfcaa9d87f5 100644 --- a/tests/test_rocm_sanity.py +++ b/tests/test_rocm_sanity.py @@ -163,3 +163,75 @@ def test_rocm_agent_enumerator(self): return_code = process.returncode check.equal(return_code, 0) check.is_true(output) + + # The hotswap HSA tool (libhsa-hotswap.so) is Linux-only. + @pytest.mark.skipif(is_windows(), reason="hotswap HSA tool is Linux-only") + # TODO(#3312): rocminfo currently fails under ASAN builds. + @pytest.mark.skipif( + is_asan(), reason="rocminfo test fails with ASAN build, see TheRock#3312" + ) + def test_hotswap_tool_loads(self): + """When hotswap is enabled, the HSA tool must load cleanly under ROCr. + + THEROCK_ENABLE_HOTSWAP builds comgr with the hotswap transpiler, so + libamd_comgr.so exports ``amd_comgr_hotswap_rewrite``; that symbol is a + reliable signal that hotswap was enabled in this build. When it is, the + HSA_TOOLS_LIB tool ``libhsa-hotswap.so`` must be packaged and must load + cleanly. Running rocminfo triggers hsa_init, which is when ROCr dlopen's + HSA_TOOLS_LIB tools. The forwarding allowlist is gfx1250->gfx1250 only, + so the tool stays inert on other targets and rocminfo must still succeed. + + Skipped when hotswap is not enabled in the build. + """ + lib_dir = THEROCK_BIN_DIR.parent / "lib" + # libamd_comgr.so may only be present versioned (e.g. libamd_comgr.so.3.3.0) + # in the lib component; the unversioned symlink can live in the dev package. + comgr_libs = sorted(lib_dir.glob("libamd_comgr.so*")) + if not comgr_libs: + pytest.skip(f"libamd_comgr.so* not found in {lib_dir}") + comgr = comgr_libs[0] + + # comgr exports amd_comgr_hotswap_rewrite only when hotswap is enabled. + nm = subprocess.run( + ["nm", "-D", "--defined-only", str(comgr)], + capture_output=True, + text=True, + ) + if nm.returncode != 0: + pytest.skip("could not inspect libamd_comgr.so symbols (nm unavailable)") + if "amd_comgr_hotswap_rewrite" not in nm.stdout: + pytest.skip( + "hotswap not enabled in this build " + "(libamd_comgr.so does not export amd_comgr_hotswap_rewrite)" + ) + + # Hotswap is enabled -> the HSA tool must be packaged. + tool = lib_dir / "libhsa-hotswap.so" + assert tool.exists(), ( + "hotswap is enabled (libamd_comgr.so exports amd_comgr_hotswap_rewrite) " + f"but the HSA tool is missing: {tool}" + ) + + # rocminfo triggers hsa_init -> ROCr LoadTools dlopen's the tool. + env = os.environ.copy() + env["HSA_TOOLS_LIB"] = str(tool) + env["HSA_TOOLS_REPORT_LOAD_FAILURE"] = "1" + process = subprocess.run( + [f"{THEROCK_BIN_DIR}/rocminfo"], + capture_output=True, + text=True, + env=env, + ) + combined = process.stdout + process.stderr + logger.info(combined) + # ROCr prints "Tool lib \"...\" failed to load." if the dlopen fails. + check.is_not_in( + "failed to load", + combined, + "ROCr failed to load the hotswap tool via HSA_TOOLS_LIB", + ) + check.equal( + process.returncode, + 0, + "rocminfo failed with the hotswap tool loaded via HSA_TOOLS_LIB", + ) From d53a4280883ad96d68554bf3c4a5a88d9ecb6881 Mon Sep 17 00:00:00 2001 From: Jacob Lambert Date: Mon, 29 Jun 2026 10:54:40 -0700 Subject: [PATCH 2/2] hotswap: temporary rocm-systems pin (drop when SMP advances) Temp-pin rocm-systems to a hotswap-only integration tip carrying ROCm/rocm-systems#7629 + #7715 (both merged to develop, not yet in TheRock's rocm-systems pin). Drop this commit once the rocm-systems SMP bump brings them into the pin. --- rocm-systems | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rocm-systems b/rocm-systems index 72822631d42..75469b9ff38 160000 --- a/rocm-systems +++ b/rocm-systems @@ -1 +1 @@ -Subproject commit 72822631d427f33ab85fdb7f54689b2092df6413 +Subproject commit 75469b9ff3806866805649b75884aae8cbbfee22