From af01f7a63731d1409c348bacd6d06fff36e4d0b5 Mon Sep 17 00:00:00 2001 From: harsh-amd <149193359+harsh-amd@users.noreply.github.com> Date: Mon, 22 Jun 2026 06:02:28 -0700 Subject: [PATCH 01/16] hotswap: gate entry trampoline rewrites explicitly --- projects/hotswap/hotswap.cpp | 7 +-- projects/hotswap/hotswap_gfx_query.cpp | 67 ++++++++++++++++++-------- projects/hotswap/hotswap_gfx_query.hpp | 23 +++++++-- projects/hotswap/hotswap_tool.cpp | 62 ++++++++++++++++-------- 4 files changed, 112 insertions(+), 47 deletions(-) diff --git a/projects/hotswap/hotswap.cpp b/projects/hotswap/hotswap.cpp index 740d4f0f2248..d6f2256de50f 100644 --- a/projects/hotswap/hotswap.cpp +++ b/projects/hotswap/hotswap.cpp @@ -77,7 +77,8 @@ int RetargetCodeObject(const void *elf_data, size_t elf_size, return static_cast(status); } - status = amd_comgr_set_data(input, elf_size, static_cast(elf_data)); + status = + amd_comgr_set_data(input, elf_size, static_cast(elf_data)); if (status != AMD_COMGR_STATUS_SUCCESS) { fprintf(stderr, "hotswap: failed to set COMGR input data\n"); amd_comgr_release_data(input); @@ -115,8 +116,8 @@ int RetargetCodeObject(const void *elf_data, size_t elf_size, return -1; } - status = amd_comgr_get_data(output, &output_size, - static_cast(output_buf)); + status = + amd_comgr_get_data(output, &output_size, static_cast(output_buf)); amd_comgr_release_data(output); if (status != AMD_COMGR_STATUS_SUCCESS) { diff --git a/projects/hotswap/hotswap_gfx_query.cpp b/projects/hotswap/hotswap_gfx_query.cpp index 6361b4ebe968..283a8c68fdd7 100644 --- a/projects/hotswap/hotswap_gfx_query.cpp +++ b/projects/hotswap/hotswap_gfx_query.cpp @@ -18,27 +18,30 @@ namespace rocr::hotswap { std::string get_agent_isa_name(hsa_agent_t agent) { std::string name; - hsa_agent_iterate_isas(agent, [](hsa_isa_t isa, void *data) -> hsa_status_t { - uint32_t len = 0; - if (hsa_isa_get_info_alt(isa, HSA_ISA_INFO_NAME_LENGTH, &len) != - HSA_STATUS_SUCCESS) - return HSA_STATUS_ERROR; - - auto &out = *static_cast(data); - out.resize(len); - - if (hsa_isa_get_info_alt(isa, HSA_ISA_INFO_NAME, out.data()) != - HSA_STATUS_SUCCESS) { - out.clear(); - return HSA_STATUS_ERROR; - } - - // HSA returns null-terminated length; trim it. - if (!out.empty() && out.back() == '\0') - out.pop_back(); - - return HSA_STATUS_INFO_BREAK; // only need the first ISA - }, &name); + hsa_agent_iterate_isas( + agent, + [](hsa_isa_t isa, void *data) -> hsa_status_t { + uint32_t len = 0; + if (hsa_isa_get_info_alt(isa, HSA_ISA_INFO_NAME_LENGTH, &len) != + HSA_STATUS_SUCCESS) + return HSA_STATUS_ERROR; + + auto &out = *static_cast(data); + out.resize(len); + + if (hsa_isa_get_info_alt(isa, HSA_ISA_INFO_NAME, out.data()) != + HSA_STATUS_SUCCESS) { + out.clear(); + return HSA_STATUS_ERROR; + } + + // HSA returns null-terminated length; trim it. + if (!out.empty() && out.back() == '\0') + out.pop_back(); + + return HSA_STATUS_INFO_BREAK; // only need the first ISA + }, + &name); return name; } @@ -99,4 +102,26 @@ bool gate_allows_hotswap(const AgentGfxRevision &gfx) { gfx.asic_revision == 0; // A0 } +bool gate_allows_entry_trampolines(const AgentGfxRevision &gfx) { + return gfx.gfx_target == "gfx1250"; +} + +bool gate_allows_hotswap_rewrite(const AgentGfxRevision &gfx, + bool entry_trampolines_requested) { + if (gate_allows_hotswap(gfx)) { + return true; + } + return entry_trampolines_requested && gate_allows_entry_trampolines(gfx); +} + +std::string add_gfx1250_stepping_feature(const std::string &isa_name, + bool is_b0) { + if (extract_gfx_target(isa_name) != "gfx1250" || + isa_name.find(":gfx1250-b0-specific+") != std::string::npos || + isa_name.find(":gfx1250-b0-specific-") != std::string::npos) { + return isa_name; + } + return isa_name + (is_b0 ? ":gfx1250-b0-specific+" : ":gfx1250-b0-specific-"); +} + } // namespace rocr::hotswap diff --git a/projects/hotswap/hotswap_gfx_query.hpp b/projects/hotswap/hotswap_gfx_query.hpp index 3fe08813ffa1..9cfefc9ecc9f 100644 --- a/projects/hotswap/hotswap_gfx_query.hpp +++ b/projects/hotswap/hotswap_gfx_query.hpp @@ -5,7 +5,7 @@ //===----------------------------------------------------------------------===// // // Portable helpers for discovering an agent's gfx target and ASIC revision via -// the HSA runtime (HSA_AMD_AGENT_INFO_ASIC_REVISION). +// the HSA runtime (HSA_AMD_AGENT_INFO_ASIC_REVISION). // //===----------------------------------------------------------------------===// @@ -45,13 +45,28 @@ std::string extract_gfx_target(const std::string &isa_name); // gate_allows_hotswap() (below) to decide whether to act. AgentGfxRevision query_agent_gfx_revision(hsa_agent_t agent); -// Clears the per-agent-handle cache used by query_agent_gfx_revision(). +// Clears the per-agent-handle cache used by query_agent_gfx_revision(). void reset_gfx_revision_cache(); -// HotSwap's activation policy: rewriting is performed only for gfx1250 silicon -// at ASIC revision A0 (and only when the revision was successfully queried). +// HotSwap's default activation policy: B0-to-A0 rewriting is performed only +// for gfx1250 silicon at ASIC revision A0 (and only when the revision was +// successfully queried). bool gate_allows_hotswap(const AgentGfxRevision &gfx); +// Entry trampolines are a separate, opt-in rewrite that applies to gfx1250 +// regardless of stepping when AMD_COMGR_HOTSWAP_ENTRY_TRAMPOLINES is enabled. +bool gate_allows_entry_trampolines(const AgentGfxRevision &gfx); + +// Combined activation policy for libhsa-hotswap.so. +bool gate_allows_hotswap_rewrite(const AgentGfxRevision &gfx, + bool entry_trampolines_requested); + +// Adds COMGR's hotswap-local gfx1250 stepping feature to an ISA name while +// preserving any existing feature suffixes. Non-gfx1250 ISA names and ISA +// names that already carry the stepping feature are returned unchanged. +std::string add_gfx1250_stepping_feature(const std::string &isa_name, + bool is_b0); + } // namespace rocr::hotswap #endif // ROCR_HOTSWAP_GFX_QUERY_HPP diff --git a/projects/hotswap/hotswap_tool.cpp b/projects/hotswap/hotswap_tool.cpp index ab709d5b57b9..26a92d2f142e 100644 --- a/projects/hotswap/hotswap_tool.cpp +++ b/projects/hotswap/hotswap_tool.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -32,7 +33,7 @@ // The agent's gfx target and ASIC revision are read through the HSA runtime // (HSA_AMD_AGENT_INFO_ASIC_REVISION) in hotswap_gfx_query.{hpp,cpp}, which is -// portable across Linux and Windows. +// portable across Linux and Windows. #define HSA_HOTSWAP_EXPORT __attribute__((visibility("default"))) @@ -40,8 +41,11 @@ namespace { namespace hotswap_io = rocr::hotswap::platform_io; +using rocr::hotswap::add_gfx1250_stepping_feature; using rocr::hotswap::AgentGfxRevision; +using rocr::hotswap::extract_gfx_target; using rocr::hotswap::gate_allows_hotswap; +using rocr::hotswap::gate_allows_hotswap_rewrite; using rocr::hotswap::get_agent_isa_name; using rocr::hotswap::query_agent_gfx_revision; @@ -95,8 +99,9 @@ CoreApiTable *g_core_table = nullptr; decltype(hsa_code_object_reader_create_from_memory) *g_orig_reader_create_from_memory = nullptr; -decltype(hsa_code_object_reader_create_from_file) - *g_orig_reader_create_from_file = nullptr; +decltype( + hsa_code_object_reader_create_from_file) *g_orig_reader_create_from_file = + nullptr; decltype(hsa_code_object_reader_destroy) *g_orig_reader_destroy = nullptr; decltype(hsa_executable_load_agent_code_object) *g_orig_load_agent_code_object = nullptr; @@ -107,6 +112,11 @@ void stash_bytes(uint64_t handle, const uint8_t *data, size_t size) { g_reader_map[handle] = ReaderEntry{std::move(vec), false, false}; } +bool entry_trampolines_requested() { + const char *value = std::getenv("AMD_COMGR_HOTSWAP_ENTRY_TRAMPOLINES"); + return value && value[0] != '\0' && std::strcmp(value, "0") != 0; +} + bool try_get_reader_entry(uint64_t handle, ByteVec *bytes, bool *from_file) { std::scoped_lock lock(g_reader_map_mutex); const auto it = g_reader_map.find(handle); @@ -215,7 +225,8 @@ hotswap_reader_destroy(hsa_code_object_reader_t code_object_reader) { return g_orig_reader_destroy(code_object_reader); } -hsa_status_t load_original_reader(hsa_executable_t executable, hsa_agent_t agent, +hsa_status_t load_original_reader(hsa_executable_t executable, + hsa_agent_t agent, hsa_code_object_reader_t code_object_reader, const char *options, hsa_loaded_code_object_t *loaded_code_object, @@ -228,14 +239,14 @@ hsa_status_t load_original_reader(hsa_executable_t executable, hsa_agent_t agent return status; } -hsa_status_t load_rewritten_reader(hsa_executable_t executable, hsa_agent_t agent, - const char *options, +hsa_status_t load_rewritten_reader(hsa_executable_t executable, + hsa_agent_t agent, const char *options, hsa_loaded_code_object_t *loaded_code_object, void *out_elf, size_t out_elf_size) { OwnedElf owned_elf(out_elf, &std::free); hsa_code_object_reader_t new_reader = {}; - hsa_status_t status = - g_orig_reader_create_from_memory(owned_elf.get(), out_elf_size, &new_reader); + hsa_status_t status = g_orig_reader_create_from_memory( + owned_elf.get(), out_elf_size, &new_reader); if (status != HSA_STATUS_SUCCESS) { return status; } @@ -253,21 +264,34 @@ hsa_status_t load_rewritten_reader(hsa_executable_t executable, hsa_agent_t agen return status; } -hsa_status_t try_retarget_and_load(hsa_executable_t executable, hsa_agent_t agent, +hsa_status_t try_retarget_and_load(hsa_executable_t executable, + hsa_agent_t agent, hsa_code_object_reader_t code_object_reader, const char *options, hsa_loaded_code_object_t *loaded_code_object, - const ByteVec &local_bytes) { + const ByteVec &local_bytes, + const AgentGfxRevision &gfx) { // Source ISA from the code object, target ISA from the running GPU. - const std::string source_isa = rocr::hotswap::GetCodeObjectIsaName( + std::string source_isa = rocr::hotswap::GetCodeObjectIsaName( local_bytes->data(), local_bytes->size()); - const std::string target_isa = get_agent_isa_name(agent); + std::string target_isa = get_agent_isa_name(agent); + if (source_isa.empty() || target_isa.empty()) { HOTSWAP_LOG("hotswap: rewrite SKIP empty isa (src='%s' tgt='%s' size=%zu)\n", source_isa.c_str(), target_isa.c_str(), local_bytes->size()); return HSA_STATUS_ERROR_INVALID_CODE_OBJECT; } + if (extract_gfx_target(source_isa) == "gfx1250" && + extract_gfx_target(target_isa) == "gfx1250") { + source_isa = add_gfx1250_stepping_feature(source_isa, true); + target_isa = + add_gfx1250_stepping_feature(target_isa, !gate_allows_hotswap(gfx)); + } + + // Route through RetargetCodeObject for unified logging, validation, + // and COMGR interaction. Do NOT skip when source == target: B0-to-A0 + // patching uses the same ISA name on both sides. void *out_elf = nullptr; size_t out_elf_size = 0; const int rc = rocr::hotswap::RetargetCodeObject( @@ -311,20 +335,20 @@ hsa_status_t HSA_API hotswap_load_agent_code_object( reader_from_file); } - // Gate HotSwap to gfx1250 A0 silicon. On any other GPU or stepping, load - // the original code object unchanged instead of routing through COMGR. const AgentGfxRevision gfx = query_agent_gfx_revision(agent); - if (!gate_allows_hotswap(gfx)) { + const bool entry_trampolines = entry_trampolines_requested(); + if (!gate_allows_hotswap_rewrite(gfx, entry_trampolines)) { HOTSWAP_LOG("hotswap: gate BLOCKED (gfx=%s rev=%u valid=%d)\n", - gfx.gfx_target.c_str(), gfx.asic_revision, gfx.revision_valid); + gfx.gfx_target.c_str(), gfx.asic_revision, + gfx.revision_valid); return load_original_reader(executable, agent, code_object_reader, options, loaded_code_object, reader_from_file); } - const hsa_status_t status = try_retarget_and_load( - executable, agent, code_object_reader, options, loaded_code_object, - local_bytes); + const hsa_status_t status = + try_retarget_and_load(executable, agent, code_object_reader, options, + loaded_code_object, local_bytes, gfx); if (status == HSA_STATUS_SUCCESS) { return status; } From d0a2361732614838a5f6c0dc1ca6146a14a0a6bc Mon Sep 17 00:00:00 2001 From: harsh-amd <149193359+harsh-amd@users.noreply.github.com> Date: Mon, 22 Jun 2026 06:02:37 -0700 Subject: [PATCH 02/16] hotswap: test entry trampoline rewrite gating --- projects/hotswap/tests/hotswap_tool_test.cpp | 83 +++++++++++++++++--- 1 file changed, 73 insertions(+), 10 deletions(-) diff --git a/projects/hotswap/tests/hotswap_tool_test.cpp b/projects/hotswap/tests/hotswap_tool_test.cpp index e5dc33e91e91..a7a816cea8d5 100644 --- a/projects/hotswap/tests/hotswap_tool_test.cpp +++ b/projects/hotswap/tests/hotswap_tool_test.cpp @@ -18,7 +18,7 @@ // * ISA name <- hsa_agent_iterate_isas / hsa_isa_get_info_alt // * ASIC revision <- hsa_agent_get_info(HSA_AMD_AGENT_INFO_ASIC_REVISION) // -// This path is portable: it depends only on HSA so the same query/gate logic is +// This path is portable: it depends only on HSA so the same query/gate logic is // exercised for both Linux and Windows builds. // //===----------------------------------------------------------------------===// @@ -34,22 +34,24 @@ // --------------------------------------------------------------------------- namespace { struct FakeEnv { - std::string isa_name; // ISA reported for the agent - bool asic_rev_ok = true; // hsa_agent_get_info(ASIC_REVISION) success - uint32_t asic_revision = 0; // reported ASIC revision (0 == A0) + std::string isa_name; // ISA reported for the agent + bool asic_rev_ok = true; // hsa_agent_get_info(ASIC_REVISION) success + uint32_t asic_revision = 0; // reported ASIC revision (0 == A0) // Counters used to assert short-circuiting and caching behavior. - int isa_query_calls = 0; // get_agent_isa_name -> iterate_isas - int asic_rev_calls = 0; // ASIC revision queries + int isa_query_calls = 0; // get_agent_isa_name -> iterate_isas + int asic_rev_calls = 0; // ASIC revision queries }; FakeEnv g_env; -} // namespace +} // namespace // The unit under test (brings in hsa.h and the query helper declarations). #include "hotswap_gfx_query.hpp" +using rocr::hotswap::add_gfx1250_stepping_feature; using rocr::hotswap::AgentGfxRevision; using rocr::hotswap::gate_allows_hotswap; +using rocr::hotswap::gate_allows_hotswap_rewrite; using rocr::hotswap::query_agent_gfx_revision; using rocr::hotswap::reset_gfx_revision_cache; @@ -97,7 +99,7 @@ hsa_status_t hsa_agent_get_info(hsa_agent_t /*agent*/, return HSA_STATUS_ERROR; } -} // extern "C" +} // extern "C" // --------------------------------------------------------------------------- // Minimal test harness. @@ -194,12 +196,69 @@ void test_Gfx1250NonA0Blocks() { printf("TEST Gfx1250NonA0Blocks...\n"); reset_env(); g_env.isa_name = kGfx1250Isa; - g_env.asic_revision = 1; // A1 + g_env.asic_revision = 1; // A1 const AgentGfxRevision g = query_agent_gfx_revision(fresh_agent()); run("ASIC revision is A1 (1)", g.revision_valid && g.asic_revision == 1); run("gate blocks gfx1250 A1", gate_allows_hotswap(g) == false); } +// The entry-trampoline flag opens an explicit gfx1250 path independent of the +// B0/A0 retargeting gate. +void test_EntryTrampolineFlagAllowsGfx1250NonA0() { + printf("TEST EntryTrampolineFlagAllowsGfx1250NonA0...\n"); + reset_env(); + g_env.isa_name = kGfx1250Isa; + g_env.asic_revision = 1; // A1/B0-side path, not A0. + const AgentGfxRevision g = query_agent_gfx_revision(fresh_agent()); + run("default gate blocks non-A0", + gate_allows_hotswap_rewrite(g, false) == false); + run("trampoline gate allows gfx1250 non-A0", + gate_allows_hotswap_rewrite(g, true) == true); +} + +// If ASIC revision cannot be queried, the explicit trampoline flag can still +// route gfx1250 through COMGR. The target is treated as non-A0 for B0/A0 patch +// selection so the trampoline pass does not imply A0-specific fixes. +void test_EntryTrampolineFlagAllowsGfx1250UnknownRevision() { + printf("TEST EntryTrampolineFlagAllowsGfx1250UnknownRevision...\n"); + reset_env(); + g_env.isa_name = kGfx1250Isa; + g_env.asic_rev_ok = false; + const AgentGfxRevision g = query_agent_gfx_revision(fresh_agent()); + run("default gate blocks unknown revision", + gate_allows_hotswap_rewrite(g, false) == false); + run("trampoline gate allows gfx1250 unknown revision", + gate_allows_hotswap_rewrite(g, true) == true); +} + +// The trampoline flag is not a global rewrite enable; non-gfx1250 targets still +// load unchanged. +void test_EntryTrampolineFlagBlocksOtherTargets() { + printf("TEST EntryTrampolineFlagBlocksOtherTargets...\n"); + reset_env(); + g_env.isa_name = kGfx942Isa; + g_env.asic_revision = 0; + const AgentGfxRevision g = query_agent_gfx_revision(fresh_agent()); + run("trampoline gate blocks gfx942", + gate_allows_hotswap_rewrite(g, true) == false); +} + +void test_AddGfx1250SteppingFeature() { + printf("TEST AddGfx1250SteppingFeature...\n"); + run("B0 feature appended to bare gfx1250", + add_gfx1250_stepping_feature(kGfx1250Isa, true) == + std::string(kGfx1250Isa) + ":gfx1250-b0-specific+"); + run("A0 feature appended after existing ISA features", + add_gfx1250_stepping_feature(kGfx1250IsaWithFeatures, false) == + std::string(kGfx1250IsaWithFeatures) + ":gfx1250-b0-specific-"); + run("existing B0 feature is preserved", + add_gfx1250_stepping_feature( + "amdgcn-amd-amdhsa--gfx1250:gfx1250-b0-specific+", false) == + "amdgcn-amd-amdhsa--gfx1250:gfx1250-b0-specific+"); + run("non-gfx1250 ISA is unchanged", + add_gfx1250_stepping_feature(kGfx942Isa, true) == kGfx942Isa); +} + // ASIC revision query failure -> revision_valid false and gate blocks, even for // gfx1250. The query is still attempted exactly once. void test_AsicRevisionQueryFailure() { @@ -250,7 +309,7 @@ void test_DistinctHandlesIndependent() { a2.gfx_target == "gfx942" && gate_allows_hotswap(a2) == false); } -} // namespace +} // namespace int main() { test_Gfx1250A0Passes(); @@ -258,6 +317,10 @@ int main() { test_NonGfx1250Blocks(); test_NearMissTargetBlocks(); test_Gfx1250NonA0Blocks(); + test_EntryTrampolineFlagAllowsGfx1250NonA0(); + test_EntryTrampolineFlagAllowsGfx1250UnknownRevision(); + test_EntryTrampolineFlagBlocksOtherTargets(); + test_AddGfx1250SteppingFeature(); test_AsicRevisionQueryFailure(); test_ResultIsCachedPerHandle(); test_DistinctHandlesIndependent(); From 4a1d65ee0c09bec1696adb2b615de78fd0b626b8 Mon Sep 17 00:00:00 2001 From: harsh-amd <149193359+harsh-amd@users.noreply.github.com> Date: Sun, 21 Jun 2026 13:18:05 -0500 Subject: [PATCH 03/16] hotswap: test HSA tools loader rewrite gating --- projects/hotswap/CMakeLists.txt | 15 + .../hotswap/tests/hotswap_loader_test.cpp | 519 ++++++++++++++++++ 2 files changed, 534 insertions(+) create mode 100644 projects/hotswap/tests/hotswap_loader_test.cpp diff --git a/projects/hotswap/CMakeLists.txt b/projects/hotswap/CMakeLists.txt index d71ae9143bee..a4de359351e4 100644 --- a/projects/hotswap/CMakeLists.txt +++ b/projects/hotswap/CMakeLists.txt @@ -83,6 +83,21 @@ target_include_directories(hotswap_tool_test PRIVATE ${HSA_RUNTIME_INC}/.. ) +# Loader-path tests for libhsa-hotswap.so. The test includes hotswap_tool.cpp +# directly and supplies stub HSA/COMGR entry points, so it does not link the +# real HSA runtime or COMGR. +add_executable(hotswap_loader_test + tests/hotswap_loader_test.cpp + hotswap_gfx_query.cpp + ${HOTSWAP_PLATFORM_IO_SRC} +) +target_include_directories(hotswap_loader_test PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} + ${HSA_RUNTIME_INC} + ${HSA_RUNTIME_INC}/.. +) + enable_testing() add_test(NAME hotswap_test COMMAND hotswap_test) add_test(NAME hotswap_tool_test COMMAND hotswap_tool_test) +add_test(NAME hotswap_loader_test COMMAND hotswap_loader_test) diff --git a/projects/hotswap/tests/hotswap_loader_test.cpp b/projects/hotswap/tests/hotswap_loader_test.cpp new file mode 100644 index 000000000000..8a5d575d4fc1 --- /dev/null +++ b/projects/hotswap/tests/hotswap_loader_test.cpp @@ -0,0 +1,519 @@ +//===- hotswap_loader_test.cpp - Tests for HSA tools loader path ----------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This test includes hotswap_tool.cpp directly so it can drive the wrapped HSA +// API-table entry points without a GPU or a real HSA runtime. The original HSA +// functions and COMGR retarget call are replaced with stubs below. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include +#include +#include + +#include + +#ifndef _WIN32 +#include +#include +#endif + +namespace { + +struct FakeEnv { + std::string agent_isa = "amdgcn-amd-amdhsa--gfx1250"; + bool asic_rev_ok = true; + uint32_t asic_revision = 0; + + int retarget_calls = 0; + int load_calls = 0; + int memory_reader_create_calls = 0; + int file_reader_create_calls = 0; + int reader_destroy_calls = 0; + int isa_query_calls = 0; + int asic_rev_calls = 0; + + int retarget_status = 0; + hsa_status_t load_status = HSA_STATUS_SUCCESS; + hsa_status_t reader_create_status = HSA_STATUS_SUCCESS; + + uint64_t next_reader_handle = 100; + uint64_t last_loaded_reader = 0; + uint64_t last_created_reader = 0; + uint64_t retarget_reader_handle = 0; + + std::string retarget_source_isa; + std::string retarget_target_isa; + void *retarget_output_ptr = nullptr; +}; + +FakeEnv g_env; + +} // namespace + +#include "../hotswap_tool.cpp" + +namespace rocr::hotswap { + +int RetargetCodeObject(const void *elf_data, size_t elf_size, + const char *source_isa, const char *target_isa, + void **out_data, size_t *out_size) { + ++g_env.retarget_calls; + g_env.retarget_source_isa = source_isa ? source_isa : ""; + g_env.retarget_target_isa = target_isa ? target_isa : ""; + + if (!out_data || !out_size) { + return -1; + } + *out_data = const_cast(elf_data); + *out_size = elf_size; + + if (g_env.retarget_status != 0) { + return g_env.retarget_status; + } + + void *copy = std::malloc(elf_size); + if (!copy) { + return -1; + } + std::memcpy(copy, elf_data, elf_size); + g_env.retarget_output_ptr = copy; + *out_data = copy; + *out_size = elf_size; + return 0; +} + +} // namespace rocr::hotswap + +extern "C" { + +hsa_status_t hsa_agent_iterate_isas(hsa_agent_t /*agent*/, + hsa_status_t (*callback)(hsa_isa_t isa, + void *data), + void *data) { + ++g_env.isa_query_calls; + hsa_isa_t isa{}; + isa.handle = 1; + const hsa_status_t status = callback(isa, data); + return status == HSA_STATUS_INFO_BREAK ? HSA_STATUS_SUCCESS : status; +} + +hsa_status_t hsa_isa_get_info_alt(hsa_isa_t /*isa*/, hsa_isa_info_t attribute, + void *value) { + if (attribute == HSA_ISA_INFO_NAME_LENGTH) { + *static_cast(value) = + static_cast(g_env.agent_isa.size() + 1); + return HSA_STATUS_SUCCESS; + } + if (attribute == HSA_ISA_INFO_NAME) { + std::memcpy(value, g_env.agent_isa.c_str(), g_env.agent_isa.size() + 1); + return HSA_STATUS_SUCCESS; + } + return HSA_STATUS_ERROR; +} + +hsa_status_t hsa_agent_get_info(hsa_agent_t /*agent*/, + hsa_agent_info_t attribute, void *value) { + if (attribute == + static_cast(HSA_AMD_AGENT_INFO_ASIC_REVISION)) { + ++g_env.asic_rev_calls; + if (!g_env.asic_rev_ok) { + return HSA_STATUS_ERROR; + } + *static_cast(value) = g_env.asic_revision; + return HSA_STATUS_SUCCESS; + } + return HSA_STATUS_ERROR; +} + +} // extern "C" + +namespace { + +constexpr const char *kGfx1250Isa = "amdgcn-amd-amdhsa--gfx1250"; +constexpr const char *kGfx942Isa = "amdgcn-amd-amdhsa--gfx942"; +constexpr const char *kGfx1250B0Isa = + "amdgcn-amd-amdhsa--gfx1250:gfx1250-b0-specific+"; +constexpr const char *kGfx1250A0Isa = + "amdgcn-amd-amdhsa--gfx1250:gfx1250-b0-specific-"; + +int tests_passed = 0; +int tests_failed = 0; + +void check(bool cond, const char *name) { + if (cond) { + ++tests_passed; + std::printf(" PASS: %s\n", name); + } else { + ++tests_failed; + std::fprintf(stderr, " FAIL: %s\n", name); + } +} + +void set_entry_trampoline_env(const char *value) { +#ifdef _WIN32 + _putenv_s("AMD_COMGR_HOTSWAP_ENTRY_TRAMPOLINES", value ? value : ""); +#else + if (value) { + setenv("AMD_COMGR_HOTSWAP_ENTRY_TRAMPOLINES", value, 1); + } else { + unsetenv("AMD_COMGR_HOTSWAP_ENTRY_TRAMPOLINES"); + } +#endif +} + +void reset_state() { + { + std::scoped_lock lock(g_reader_map_mutex); + g_reader_map.clear(); + } + { + std::scoped_lock lock(g_rewritten_elfs_mutex); + g_rewritten_elfs.clear(); + } + g_core_table = nullptr; + g_orig_reader_create_from_memory = nullptr; + g_orig_reader_create_from_file = nullptr; + g_orig_reader_destroy = nullptr; + g_orig_load_agent_code_object = nullptr; + rocr::hotswap::reset_gfx_revision_cache(); + set_entry_trampoline_env(nullptr); + g_env = FakeEnv{}; +} + +size_t align4(size_t value) { return (value + 3) & ~size_t{3}; } + +std::vector make_code_object(const std::string &isa) { + constexpr size_t EhdrOff = 0; + constexpr size_t PhdrOff = sizeof(Elf64_Ehdr); + constexpr size_t NoteOff = 0x100; + const std::string metadata = "---\namdhsa.target: '" + isa + "'\n"; + constexpr char Name[] = "AMD"; + const size_t NameSize = sizeof(Name); + const size_t DescSize = metadata.size(); + const size_t NoteSize = sizeof(Elf64_Nhdr) + align4(NameSize) + align4(DescSize); + std::vector elf(NoteOff + NoteSize, 0); + + auto *ehdr = reinterpret_cast(elf.data() + EhdrOff); + std::memcpy(ehdr->e_ident, ELFMAG, SELFMAG); + ehdr->e_ident[EI_CLASS] = ELFCLASS64; + ehdr->e_ident[EI_DATA] = ELFDATA2LSB; + ehdr->e_ident[EI_VERSION] = EV_CURRENT; + ehdr->e_type = ET_DYN; + ehdr->e_machine = EM_AMDGPU; + ehdr->e_version = EV_CURRENT; + ehdr->e_phoff = PhdrOff; + ehdr->e_ehsize = sizeof(Elf64_Ehdr); + ehdr->e_phentsize = sizeof(Elf64_Phdr); + ehdr->e_phnum = 1; + + auto *phdr = reinterpret_cast(elf.data() + PhdrOff); + phdr->p_type = PT_NOTE; + phdr->p_offset = NoteOff; + phdr->p_filesz = NoteSize; + + auto *nhdr = reinterpret_cast(elf.data() + NoteOff); + nhdr->n_namesz = NameSize; + nhdr->n_descsz = DescSize; + nhdr->n_type = 32; // NT_AMDGPU_METADATA + + size_t cursor = NoteOff + sizeof(Elf64_Nhdr); + std::memcpy(elf.data() + cursor, Name, NameSize); + cursor += align4(NameSize); + std::memcpy(elf.data() + cursor, metadata.data(), metadata.size()); + return elf; +} + +hsa_status_t HSA_API fake_reader_create_from_memory( + const void * /*code_object*/, size_t /*size*/, + hsa_code_object_reader_t *code_object_reader) { + ++g_env.memory_reader_create_calls; + if (g_env.reader_create_status != HSA_STATUS_SUCCESS) { + return g_env.reader_create_status; + } + code_object_reader->handle = g_env.next_reader_handle++; + g_env.last_created_reader = code_object_reader->handle; + if (g_env.retarget_output_ptr) { + g_env.retarget_reader_handle = code_object_reader->handle; + } + return HSA_STATUS_SUCCESS; +} + +hsa_status_t HSA_API fake_reader_create_from_file( + hsa_file_t /*file*/, hsa_code_object_reader_t *code_object_reader) { + ++g_env.file_reader_create_calls; + code_object_reader->handle = g_env.next_reader_handle++; + g_env.last_created_reader = code_object_reader->handle; + return HSA_STATUS_SUCCESS; +} + +hsa_status_t HSA_API +fake_reader_destroy(hsa_code_object_reader_t code_object_reader) { + ++g_env.reader_destroy_calls; + if (code_object_reader.handle == g_env.retarget_reader_handle) { + g_env.retarget_reader_handle = 0; + } + return HSA_STATUS_SUCCESS; +} + +hsa_status_t HSA_API fake_load_agent_code_object( + hsa_executable_t /*executable*/, hsa_agent_t /*agent*/, + hsa_code_object_reader_t code_object_reader, const char * /*options*/, + hsa_loaded_code_object_t *loaded_code_object) { + ++g_env.load_calls; + g_env.last_loaded_reader = code_object_reader.handle; + if (loaded_code_object) { + loaded_code_object->handle = 0xC0DE; + } + return g_env.load_status; +} + +CoreApiTable install_tool() { + CoreApiTable core{}; + core.hsa_code_object_reader_create_from_memory_fn = + fake_reader_create_from_memory; + core.hsa_code_object_reader_create_from_file_fn = fake_reader_create_from_file; + core.hsa_code_object_reader_destroy_fn = fake_reader_destroy; + core.hsa_executable_load_agent_code_object_fn = fake_load_agent_code_object; + + HsaApiTable table{}; + table.core_ = &core; + check(OnLoad(&table, 0, 0, nullptr), "OnLoad installs with complete table"); + return core; +} + +hsa_agent_t fake_agent() { + hsa_agent_t agent{}; + agent.handle = 42; + return agent; +} + +hsa_code_object_reader_t create_memory_reader(CoreApiTable &core, + const std::vector &elf) { + hsa_code_object_reader_t reader{}; + const hsa_status_t status = core.hsa_code_object_reader_create_from_memory_fn( + elf.data(), elf.size(), &reader); + check(status == HSA_STATUS_SUCCESS, "memory reader creation succeeds"); + return reader; +} + +hsa_status_t load_reader(CoreApiTable &core, hsa_code_object_reader_t reader) { + hsa_loaded_code_object_t loaded{}; + return core.hsa_executable_load_agent_code_object_fn( + hsa_executable_t{}, fake_agent(), reader, nullptr, &loaded); +} + +void test_OnLoadInstallsWrappers() { + std::printf("TEST OnLoadInstallsWrappers...\n"); + reset_state(); + CoreApiTable core = install_tool(); + check(core.hsa_code_object_reader_create_from_memory_fn != + fake_reader_create_from_memory, + "memory reader entry point is wrapped"); + check(core.hsa_code_object_reader_create_from_file_fn != + fake_reader_create_from_file, + "file reader entry point is wrapped"); + check(core.hsa_executable_load_agent_code_object_fn != + fake_load_agent_code_object, + "load entry point is wrapped"); +} + +void test_ReadElfIsaNoteFindsMetadata() { + std::printf("TEST ReadElfIsaNoteFindsMetadata...\n"); + reset_state(); + std::vector elf = make_code_object(kGfx1250Isa); + check(read_elf_isa_note(elf.data(), elf.size()) == kGfx1250Isa, + "valid AMDGPU metadata note yields ISA"); + const uint8_t not_elf[] = {0x7f, 'E', 'L', 'F'}; + check(read_elf_isa_note(not_elf, sizeof(not_elf)).empty(), + "malformed ELF yields empty ISA"); +} + +void test_FlagUnsetLoadsOriginalForNonA0Gfx1250() { + std::printf("TEST FlagUnsetLoadsOriginalForNonA0Gfx1250...\n"); + reset_state(); + g_env.agent_isa = kGfx1250Isa; + g_env.asic_revision = 1; + CoreApiTable core = install_tool(); + std::vector elf = make_code_object(kGfx1250Isa); + const hsa_code_object_reader_t reader = create_memory_reader(core, elf); + check(load_reader(core, reader) == HSA_STATUS_SUCCESS, "load succeeds"); + check(g_env.retarget_calls == 0, "flag unset skips non-A0 gfx1250 rewrite"); + check(g_env.last_loaded_reader == reader.handle, + "original reader is loaded when flag is unset"); +} + +void test_FlagZeroLoadsOriginalForNonA0Gfx1250() { + std::printf("TEST FlagZeroLoadsOriginalForNonA0Gfx1250...\n"); + reset_state(); + set_entry_trampoline_env("0"); + g_env.agent_isa = kGfx1250Isa; + g_env.asic_revision = 1; + CoreApiTable core = install_tool(); + std::vector elf = make_code_object(kGfx1250Isa); + const hsa_code_object_reader_t reader = create_memory_reader(core, elf); + check(load_reader(core, reader) == HSA_STATUS_SUCCESS, "load succeeds"); + check(g_env.retarget_calls == 0, "flag value 0 disables entry trampolines"); + check(g_env.last_loaded_reader == reader.handle, + "original reader is loaded when flag is 0"); +} + +void test_FlagOneRetargetsB0ToB0() { + std::printf("TEST FlagOneRetargetsB0ToB0...\n"); + reset_state(); + set_entry_trampoline_env("1"); + g_env.agent_isa = kGfx1250Isa; + g_env.asic_revision = 1; + CoreApiTable core = install_tool(); + std::vector elf = make_code_object(kGfx1250Isa); + const hsa_code_object_reader_t reader = create_memory_reader(core, elf); + check(load_reader(core, reader) == HSA_STATUS_SUCCESS, "load succeeds"); + check(g_env.retarget_calls == 1, "flag 1 routes B0 gfx1250 through COMGR"); + check(g_env.retarget_source_isa == kGfx1250B0Isa, + "source ISA is tagged as B0"); + check(g_env.retarget_target_isa == kGfx1250B0Isa, + "non-A0 target ISA is tagged as B0"); + check(g_env.last_loaded_reader != reader.handle, + "rewritten reader is loaded instead of original reader"); + check(g_rewritten_elfs.size() == 1, + "rewritten ELF is retained after successful load"); +} + +void test_A0RetargetsWithoutFlag() { + std::printf("TEST A0RetargetsWithoutFlag...\n"); + reset_state(); + g_env.agent_isa = kGfx1250Isa; + g_env.asic_revision = 0; + CoreApiTable core = install_tool(); + std::vector elf = make_code_object(kGfx1250Isa); + const hsa_code_object_reader_t reader = create_memory_reader(core, elf); + check(load_reader(core, reader) == HSA_STATUS_SUCCESS, "load succeeds"); + check(g_env.retarget_calls == 1, "A0 gfx1250 keeps default rewrite path"); + check(g_env.retarget_source_isa == kGfx1250B0Isa, + "source code object ISA is tagged as B0"); + check(g_env.retarget_target_isa == kGfx1250A0Isa, + "A0 agent ISA is tagged as A0"); +} + +void test_FlagOneRetargetsUnknownRevisionAsB0Target() { + std::printf("TEST FlagOneRetargetsUnknownRevisionAsB0Target...\n"); + reset_state(); + set_entry_trampoline_env("1"); + g_env.agent_isa = kGfx1250Isa; + g_env.asic_rev_ok = false; + CoreApiTable core = install_tool(); + std::vector elf = make_code_object(kGfx1250Isa); + const hsa_code_object_reader_t reader = create_memory_reader(core, elf); + check(load_reader(core, reader) == HSA_STATUS_SUCCESS, "load succeeds"); + check(g_env.retarget_calls == 1, + "flag 1 routes unknown-revision gfx1250 through COMGR"); + check(g_env.retarget_target_isa == kGfx1250B0Isa, + "unknown revision is treated as non-A0 for target suffix"); +} + +void test_FlagOneBlocksNonGfx1250() { + std::printf("TEST FlagOneBlocksNonGfx1250...\n"); + reset_state(); + set_entry_trampoline_env("1"); + g_env.agent_isa = kGfx942Isa; + g_env.asic_revision = 0; + CoreApiTable core = install_tool(); + std::vector elf = make_code_object(kGfx942Isa); + const hsa_code_object_reader_t reader = create_memory_reader(core, elf); + check(load_reader(core, reader) == HSA_STATUS_SUCCESS, "load succeeds"); + check(g_env.retarget_calls == 0, + "entry trampoline flag does not enable non-gfx1250 rewrite"); + check(g_env.last_loaded_reader == reader.handle, + "non-gfx1250 uses original reader"); +} + +void test_RetargetFailureFallsBackToOriginalReader() { + std::printf("TEST RetargetFailureFallsBackToOriginalReader...\n"); + reset_state(); + set_entry_trampoline_env("1"); + g_env.agent_isa = kGfx1250Isa; + g_env.asic_revision = 1; + g_env.retarget_status = -1; + CoreApiTable core = install_tool(); + std::vector elf = make_code_object(kGfx1250Isa); + const hsa_code_object_reader_t reader = create_memory_reader(core, elf); + check(load_reader(core, reader) == HSA_STATUS_SUCCESS, "fallback load succeeds"); + check(g_env.retarget_calls == 1, "COMGR retarget was attempted"); + check(g_env.last_loaded_reader == reader.handle, + "retarget failure falls back to original reader"); +} + +#ifndef _WIN32 +void test_FileReaderPathKeepsFileBackedReaderAlive() { + std::printf("TEST FileReaderPathKeepsFileBackedReaderAlive...\n"); + reset_state(); + set_entry_trampoline_env("1"); + g_env.agent_isa = kGfx942Isa; + g_env.asic_revision = 0; + CoreApiTable core = install_tool(); + + std::vector elf = make_code_object(kGfx942Isa); + char path[] = "/tmp/hotswap-loader-test-XXXXXX"; + const int fd = mkstemp(path); + check(fd >= 0, "temporary code object file is created"); + if (fd < 0) { + return; + } + unlink(path); + const ssize_t written = write(fd, elf.data(), elf.size()); + check(written == static_cast(elf.size()), + "temporary code object file is written"); + lseek(fd, 0, SEEK_SET); + + hsa_code_object_reader_t reader{}; + const hsa_status_t create_status = + core.hsa_code_object_reader_create_from_file_fn(fd, &reader); + close(fd); + check(create_status == HSA_STATUS_SUCCESS, "file reader creation succeeds"); + check(g_env.memory_reader_create_calls == 1, + "file reader path converts file contents to a memory reader"); + + check(load_reader(core, reader) == HSA_STATUS_SUCCESS, "load succeeds"); + check(g_env.retarget_calls == 0, "non-gfx1250 file reader is not rewritten"); + check(g_env.last_loaded_reader == reader.handle, + "file-backed original reader is loaded"); + check(core.hsa_code_object_reader_destroy_fn(reader) == HSA_STATUS_SUCCESS, + "file-backed reader destroy succeeds"); + + bool retained = false; + { + std::scoped_lock lock(g_reader_map_mutex); + const auto it = g_reader_map.find(reader.handle); + retained = it != g_reader_map.end() && it->second.from_file; + } + check(retained, "file-backed reader bytes stay alive after successful load"); +} +#endif + +} // namespace + +int main() { + test_OnLoadInstallsWrappers(); + test_ReadElfIsaNoteFindsMetadata(); + test_FlagUnsetLoadsOriginalForNonA0Gfx1250(); + test_FlagZeroLoadsOriginalForNonA0Gfx1250(); + test_FlagOneRetargetsB0ToB0(); + test_A0RetargetsWithoutFlag(); + test_FlagOneRetargetsUnknownRevisionAsB0Target(); + test_FlagOneBlocksNonGfx1250(); + test_RetargetFailureFallsBackToOriginalReader(); +#ifndef _WIN32 + test_FileReaderPathKeepsFileBackedReaderAlive(); +#endif + reset_state(); + + std::printf("\n%d passed, %d failed\n", tests_passed, tests_failed); + return tests_failed ? EXIT_FAILURE : EXIT_SUCCESS; +} From ad38f93ef78dc07c4aac55a75f868288e591c204 Mon Sep 17 00:00:00 2001 From: harsh-amd <149193359+harsh-amd@users.noreply.github.com> Date: Fri, 26 Jun 2026 12:59:50 -0700 Subject: [PATCH 04/16] hotswap: expand entry trampoline gfx12.5 gating --- projects/hotswap/hotswap_gfx_query.cpp | 13 +- projects/hotswap/hotswap_gfx_query.hpp | 16 +- projects/hotswap/hotswap_tool.cpp | 22 ++- .../hotswap/tests/hotswap_loader_test.cpp | 151 ++++++++++++------ projects/hotswap/tests/hotswap_tool_test.cpp | 55 ++++++- 5 files changed, 189 insertions(+), 68 deletions(-) diff --git a/projects/hotswap/hotswap_gfx_query.cpp b/projects/hotswap/hotswap_gfx_query.cpp index 283a8c68fdd7..bd6200edd146 100644 --- a/projects/hotswap/hotswap_gfx_query.cpp +++ b/projects/hotswap/hotswap_gfx_query.cpp @@ -51,7 +51,9 @@ std::string extract_gfx_target(const std::string &isa_name) { if (pos == std::string::npos) return {}; auto end = std::find_if_not(isa_name.begin() + pos, isa_name.end(), - [](unsigned char c) { return std::isalnum(c); }); + [](unsigned char c) { + return std::isalnum(c) || c == '-' || c == '_'; + }); return isa_name.substr(pos, end - isa_name.begin() - pos); } @@ -60,6 +62,13 @@ std::mutex g_cache_mutex; std::unordered_map g_cache; } // namespace +bool is_gfx12_5_entry_trampoline_target(const std::string &gfx_target) { + constexpr char Gfx125Prefix[] = "gfx125"; + return gfx_target == "gfx12-5-generic" || + (gfx_target.size() > sizeof(Gfx125Prefix) - 1 && + gfx_target.rfind(Gfx125Prefix, 0) == 0); +} + AgentGfxRevision query_agent_gfx_revision(hsa_agent_t agent) { { std::scoped_lock lock(g_cache_mutex); @@ -103,7 +112,7 @@ bool gate_allows_hotswap(const AgentGfxRevision &gfx) { } bool gate_allows_entry_trampolines(const AgentGfxRevision &gfx) { - return gfx.gfx_target == "gfx1250"; + return is_gfx12_5_entry_trampoline_target(gfx.gfx_target); } bool gate_allows_hotswap_rewrite(const AgentGfxRevision &gfx, diff --git a/projects/hotswap/hotswap_gfx_query.hpp b/projects/hotswap/hotswap_gfx_query.hpp index 9cfefc9ecc9f..26944dd8ff37 100644 --- a/projects/hotswap/hotswap_gfx_query.hpp +++ b/projects/hotswap/hotswap_gfx_query.hpp @@ -33,10 +33,10 @@ struct AgentGfxRevision { // "amdgcn-amd-amdhsa--gfx1250:sramecc+:xnack-"), or an empty string on failure. std::string get_agent_isa_name(hsa_agent_t agent); -// Extracts the gfx target (e.g. "gfx1250") from a full HSA ISA name. Returns an -// empty string when no gfx target is present. The returned token stops at the -// first non-alphanumeric character so feature suffixes (":sramecc+", etc.) are -// dropped. +// Extracts the gfx target (e.g. "gfx1250" or "gfx12-5-generic") from a full +// HSA ISA name. Returns an empty string when no gfx target is present. The +// returned token preserves hyphenated processor names and stops before feature +// suffixes (":sramecc+", etc.). std::string extract_gfx_target(const std::string &isa_name); // Queries the agent's gfx target and ASIC revision via the HSA runtime. The @@ -53,10 +53,14 @@ void reset_gfx_revision_cache(); // successfully queried). bool gate_allows_hotswap(const AgentGfxRevision &gfx); -// Entry trampolines are a separate, opt-in rewrite that applies to gfx1250 -// regardless of stepping when AMD_COMGR_HOTSWAP_ENTRY_TRAMPOLINES is enabled. +// Entry trampolines are a separate, opt-in rewrite that applies to the gfx12.5 +// family (gfx125* and gfx12-5-generic) when +// AMD_COMGR_HOTSWAP_ENTRY_TRAMPOLINES is enabled. bool gate_allows_entry_trampolines(const AgentGfxRevision &gfx); +// True when a parsed gfx target is in the COMGR entry-trampoline family. +bool is_gfx12_5_entry_trampoline_target(const std::string &gfx_target); + // Combined activation policy for libhsa-hotswap.so. bool gate_allows_hotswap_rewrite(const AgentGfxRevision &gfx, bool entry_trampolines_requested); diff --git a/projects/hotswap/hotswap_tool.cpp b/projects/hotswap/hotswap_tool.cpp index 26a92d2f142e..90e0d875d912 100644 --- a/projects/hotswap/hotswap_tool.cpp +++ b/projects/hotswap/hotswap_tool.cpp @@ -47,6 +47,7 @@ using rocr::hotswap::extract_gfx_target; using rocr::hotswap::gate_allows_hotswap; using rocr::hotswap::gate_allows_hotswap_rewrite; using rocr::hotswap::get_agent_isa_name; +using rocr::hotswap::is_gfx12_5_entry_trampoline_target; using rocr::hotswap::query_agent_gfx_revision; using ByteVec = std::shared_ptr>; @@ -266,11 +267,11 @@ hsa_status_t load_rewritten_reader(hsa_executable_t executable, hsa_status_t try_retarget_and_load(hsa_executable_t executable, hsa_agent_t agent, - hsa_code_object_reader_t code_object_reader, const char *options, hsa_loaded_code_object_t *loaded_code_object, const ByteVec &local_bytes, - const AgentGfxRevision &gfx) { + const AgentGfxRevision &gfx, + bool entry_trampolines) { // Source ISA from the code object, target ISA from the running GPU. std::string source_isa = rocr::hotswap::GetCodeObjectIsaName( local_bytes->data(), local_bytes->size()); @@ -282,8 +283,17 @@ hsa_status_t try_retarget_and_load(hsa_executable_t executable, return HSA_STATUS_ERROR_INVALID_CODE_OBJECT; } - if (extract_gfx_target(source_isa) == "gfx1250" && - extract_gfx_target(target_isa) == "gfx1250") { + std::string source_gfx = extract_gfx_target(source_isa); + std::string target_gfx = extract_gfx_target(target_isa); + if (entry_trampolines && source_gfx != target_gfx && + (source_gfx == "gfx12-5-generic" || target_gfx == "gfx12-5-generic") && + is_gfx12_5_entry_trampoline_target(source_gfx) && + is_gfx12_5_entry_trampoline_target(target_gfx)) { + target_isa = source_isa; + target_gfx = source_gfx; + } + + if (source_gfx == "gfx1250" && target_gfx == "gfx1250") { source_isa = add_gfx1250_stepping_feature(source_isa, true); target_isa = add_gfx1250_stepping_feature(target_isa, !gate_allows_hotswap(gfx)); @@ -347,8 +357,8 @@ hsa_status_t HSA_API hotswap_load_agent_code_object( } const hsa_status_t status = - try_retarget_and_load(executable, agent, code_object_reader, options, - loaded_code_object, local_bytes, gfx); + try_retarget_and_load(executable, agent, options, loaded_code_object, + local_bytes, gfx, entry_trampolines); if (status == HSA_STATUS_SUCCESS) { return status; } diff --git a/projects/hotswap/tests/hotswap_loader_test.cpp b/projects/hotswap/tests/hotswap_loader_test.cpp index 8a5d575d4fc1..089c08ae363d 100644 --- a/projects/hotswap/tests/hotswap_loader_test.cpp +++ b/projects/hotswap/tests/hotswap_loader_test.cpp @@ -6,7 +6,7 @@ // // This test includes hotswap_tool.cpp directly so it can drive the wrapped HSA // API-table entry points without a GPU or a real HSA runtime. The original HSA -// functions and COMGR retarget call are replaced with stubs below. +// functions and HotSwap COMGR calls are replaced with stubs below. // //===----------------------------------------------------------------------===// @@ -62,6 +62,32 @@ FakeEnv g_env; namespace rocr::hotswap { +std::string GetCodeObjectIsaName(const void *elf_data, size_t elf_size) { + if (!elf_data || elf_size == 0) { + return {}; + } + + constexpr char IsaPrefix[] = "amdgcn-amd-amdhsa--"; + const char *begin = static_cast(elf_data); + const char *end = begin + elf_size; + const char *match = std::search(begin, end, IsaPrefix, + IsaPrefix + sizeof(IsaPrefix) - 1); + if (match == end) { + return {}; + } + + const char *limit = match; + while (limit != end) { + const char c = *limit; + if (c == '\0' || c == '\'' || c == '"' || c == '\n' || c == '\r' || + c == ' ' || c == '\t') { + break; + } + ++limit; + } + return std::string(match, limit); +} + int RetargetCodeObject(const void *elf_data, size_t elf_size, const char *source_isa, const char *target_isa, void **out_data, size_t *out_size) { @@ -139,6 +165,9 @@ namespace { constexpr const char *kGfx1250Isa = "amdgcn-amd-amdhsa--gfx1250"; constexpr const char *kGfx942Isa = "amdgcn-amd-amdhsa--gfx942"; +constexpr const char *kGfx1251Isa = "amdgcn-amd-amdhsa--gfx1251"; +constexpr const char *kGfx12_5GenericIsa = + "amdgcn-amd-amdhsa--gfx12-5-generic"; constexpr const char *kGfx1250B0Isa = "amdgcn-amd-amdhsa--gfx1250:gfx1250-b0-specific+"; constexpr const char *kGfx1250A0Isa = @@ -188,47 +217,9 @@ void reset_state() { g_env = FakeEnv{}; } -size_t align4(size_t value) { return (value + 3) & ~size_t{3}; } - std::vector make_code_object(const std::string &isa) { - constexpr size_t EhdrOff = 0; - constexpr size_t PhdrOff = sizeof(Elf64_Ehdr); - constexpr size_t NoteOff = 0x100; const std::string metadata = "---\namdhsa.target: '" + isa + "'\n"; - constexpr char Name[] = "AMD"; - const size_t NameSize = sizeof(Name); - const size_t DescSize = metadata.size(); - const size_t NoteSize = sizeof(Elf64_Nhdr) + align4(NameSize) + align4(DescSize); - std::vector elf(NoteOff + NoteSize, 0); - - auto *ehdr = reinterpret_cast(elf.data() + EhdrOff); - std::memcpy(ehdr->e_ident, ELFMAG, SELFMAG); - ehdr->e_ident[EI_CLASS] = ELFCLASS64; - ehdr->e_ident[EI_DATA] = ELFDATA2LSB; - ehdr->e_ident[EI_VERSION] = EV_CURRENT; - ehdr->e_type = ET_DYN; - ehdr->e_machine = EM_AMDGPU; - ehdr->e_version = EV_CURRENT; - ehdr->e_phoff = PhdrOff; - ehdr->e_ehsize = sizeof(Elf64_Ehdr); - ehdr->e_phentsize = sizeof(Elf64_Phdr); - ehdr->e_phnum = 1; - - auto *phdr = reinterpret_cast(elf.data() + PhdrOff); - phdr->p_type = PT_NOTE; - phdr->p_offset = NoteOff; - phdr->p_filesz = NoteSize; - - auto *nhdr = reinterpret_cast(elf.data() + NoteOff); - nhdr->n_namesz = NameSize; - nhdr->n_descsz = DescSize; - nhdr->n_type = 32; // NT_AMDGPU_METADATA - - size_t cursor = NoteOff + sizeof(Elf64_Nhdr); - std::memcpy(elf.data() + cursor, Name, NameSize); - cursor += align4(NameSize); - std::memcpy(elf.data() + cursor, metadata.data(), metadata.size()); - return elf; + return std::vector(metadata.begin(), metadata.end()); } hsa_status_t HSA_API fake_reader_create_from_memory( @@ -325,14 +316,15 @@ void test_OnLoadInstallsWrappers() { "load entry point is wrapped"); } -void test_ReadElfIsaNoteFindsMetadata() { - std::printf("TEST ReadElfIsaNoteFindsMetadata...\n"); +void test_GetCodeObjectIsaNameFindsMetadata() { + std::printf("TEST GetCodeObjectIsaNameFindsMetadata...\n"); reset_state(); std::vector elf = make_code_object(kGfx1250Isa); - check(read_elf_isa_note(elf.data(), elf.size()) == kGfx1250Isa, - "valid AMDGPU metadata note yields ISA"); + check(rocr::hotswap::GetCodeObjectIsaName(elf.data(), elf.size()) == + kGfx1250Isa, + "metadata payload yields ISA"); const uint8_t not_elf[] = {0x7f, 'E', 'L', 'F'}; - check(read_elf_isa_note(not_elf, sizeof(not_elf)).empty(), + check(rocr::hotswap::GetCodeObjectIsaName(not_elf, sizeof(not_elf)).empty(), "malformed ELF yields empty ISA"); } @@ -386,6 +378,60 @@ void test_FlagOneRetargetsB0ToB0() { "rewritten ELF is retained after successful load"); } +void test_FlagOneRetargetsGfx125FamilyWithoutSteppingSuffix() { + std::printf("TEST FlagOneRetargetsGfx125FamilyWithoutSteppingSuffix...\n"); + reset_state(); + set_entry_trampoline_env("1"); + g_env.agent_isa = kGfx1251Isa; + g_env.asic_revision = 1; + CoreApiTable core = install_tool(); + std::vector elf = make_code_object(kGfx1251Isa); + const hsa_code_object_reader_t reader = create_memory_reader(core, elf); + check(load_reader(core, reader) == HSA_STATUS_SUCCESS, "load succeeds"); + check(g_env.retarget_calls == 1, "flag 1 routes gfx1251 through COMGR"); + check(g_env.retarget_source_isa == kGfx1251Isa, + "gfx1251 source ISA is not tagged with gfx1250 stepping feature"); + check(g_env.retarget_target_isa == kGfx1251Isa, + "gfx1251 target ISA is not tagged with gfx1250 stepping feature"); +} + +void test_FlagOneRetargetsGfx12_5GenericWithoutSteppingSuffix() { + std::printf("TEST FlagOneRetargetsGfx12_5GenericWithoutSteppingSuffix...\n"); + reset_state(); + set_entry_trampoline_env("1"); + g_env.agent_isa = kGfx12_5GenericIsa; + g_env.asic_revision = 1; + CoreApiTable core = install_tool(); + std::vector elf = make_code_object(kGfx12_5GenericIsa); + const hsa_code_object_reader_t reader = create_memory_reader(core, elf); + check(load_reader(core, reader) == HSA_STATUS_SUCCESS, "load succeeds"); + check(g_env.retarget_calls == 1, + "flag 1 routes gfx12-5-generic through COMGR"); + check(g_env.retarget_source_isa == kGfx12_5GenericIsa, + "generic source ISA is not tagged with gfx1250 stepping feature"); + check(g_env.retarget_target_isa == kGfx12_5GenericIsa, + "generic target ISA is not tagged with gfx1250 stepping feature"); +} + +void test_FlagOneRetargetsGenericSourceOnConcreteGfx125Agent() { + std::printf( + "TEST FlagOneRetargetsGenericSourceOnConcreteGfx125Agent...\n"); + reset_state(); + set_entry_trampoline_env("1"); + g_env.agent_isa = kGfx1251Isa; + g_env.asic_revision = 1; + CoreApiTable core = install_tool(); + std::vector elf = make_code_object(kGfx12_5GenericIsa); + const hsa_code_object_reader_t reader = create_memory_reader(core, elf); + check(load_reader(core, reader) == HSA_STATUS_SUCCESS, "load succeeds"); + check(g_env.retarget_calls == 1, + "flag 1 routes generic source on concrete gfx125 agent through COMGR"); + check(g_env.retarget_source_isa == kGfx12_5GenericIsa, + "generic source ISA is preserved"); + check(g_env.retarget_target_isa == kGfx12_5GenericIsa, + "generic target ISA is preserved to avoid processor retargeting"); +} + void test_A0RetargetsWithoutFlag() { std::printf("TEST A0RetargetsWithoutFlag...\n"); reset_state(); @@ -418,8 +464,8 @@ void test_FlagOneRetargetsUnknownRevisionAsB0Target() { "unknown revision is treated as non-A0 for target suffix"); } -void test_FlagOneBlocksNonGfx1250() { - std::printf("TEST FlagOneBlocksNonGfx1250...\n"); +void test_FlagOneBlocksNonGfx12_5() { + std::printf("TEST FlagOneBlocksNonGfx12_5...\n"); reset_state(); set_entry_trampoline_env("1"); g_env.agent_isa = kGfx942Isa; @@ -429,9 +475,9 @@ void test_FlagOneBlocksNonGfx1250() { const hsa_code_object_reader_t reader = create_memory_reader(core, elf); check(load_reader(core, reader) == HSA_STATUS_SUCCESS, "load succeeds"); check(g_env.retarget_calls == 0, - "entry trampoline flag does not enable non-gfx1250 rewrite"); + "entry trampoline flag does not enable non-gfx12.5 rewrite"); check(g_env.last_loaded_reader == reader.handle, - "non-gfx1250 uses original reader"); + "non-gfx12.5 uses original reader"); } void test_RetargetFailureFallsBackToOriginalReader() { @@ -501,13 +547,16 @@ void test_FileReaderPathKeepsFileBackedReaderAlive() { int main() { test_OnLoadInstallsWrappers(); - test_ReadElfIsaNoteFindsMetadata(); + test_GetCodeObjectIsaNameFindsMetadata(); test_FlagUnsetLoadsOriginalForNonA0Gfx1250(); test_FlagZeroLoadsOriginalForNonA0Gfx1250(); test_FlagOneRetargetsB0ToB0(); + test_FlagOneRetargetsGfx125FamilyWithoutSteppingSuffix(); + test_FlagOneRetargetsGfx12_5GenericWithoutSteppingSuffix(); + test_FlagOneRetargetsGenericSourceOnConcreteGfx125Agent(); test_A0RetargetsWithoutFlag(); test_FlagOneRetargetsUnknownRevisionAsB0Target(); - test_FlagOneBlocksNonGfx1250(); + test_FlagOneBlocksNonGfx12_5(); test_RetargetFailureFallsBackToOriginalReader(); #ifndef _WIN32 test_FileReaderPathKeepsFileBackedReaderAlive(); diff --git a/projects/hotswap/tests/hotswap_tool_test.cpp b/projects/hotswap/tests/hotswap_tool_test.cpp index a7a816cea8d5..42539d69e684 100644 --- a/projects/hotswap/tests/hotswap_tool_test.cpp +++ b/projects/hotswap/tests/hotswap_tool_test.cpp @@ -140,6 +140,9 @@ const char *kGfx1250IsaWithFeatures = "amdgcn-amd-amdhsa--gfx1250:sramecc+:xnack-"; const char *kGfx942Isa = "amdgcn-amd-amdhsa--gfx942"; const char *kGfx1251Isa = "amdgcn-amd-amdhsa--gfx1251"; +const char *kGfx12_5GenericIsa = "amdgcn-amd-amdhsa--gfx12-5-generic"; +const char *kGfx12_5GenericIsaWithFeatures = + "amdgcn-amd-amdhsa--gfx12-5-generic:sramecc+"; // The gate applied in hotswap_load_agent_code_object() is the shared // rocr::hotswap::gate_allows_hotswap(), exercised directly below so the tests @@ -191,6 +194,20 @@ void test_NearMissTargetBlocks() { run("gate blocks gfx1251 (exact match)", gate_allows_hotswap(g) == false); } +// Hyphenated generic processor names must be preserved while stripping feature +// suffixes; otherwise the gfx12.5 trampoline gate would see only "gfx12". +void test_Gfx12_5GenericFeatureSuffixParsed() { + printf("TEST Gfx12_5GenericFeatureSuffixParsed...\n"); + reset_env(); + g_env.isa_name = kGfx12_5GenericIsaWithFeatures; + g_env.asic_revision = 0; + const AgentGfxRevision g = query_agent_gfx_revision(fresh_agent()); + run("feature suffix stripped -> gfx12-5-generic", + g.gfx_target == "gfx12-5-generic"); + run("default B0/A0 gate blocks gfx12-5-generic", + gate_allows_hotswap(g) == false); +} + // gfx1250 but a non-A0 stepping -> gate blocks. void test_Gfx1250NonA0Blocks() { printf("TEST Gfx1250NonA0Blocks...\n"); @@ -216,9 +233,38 @@ void test_EntryTrampolineFlagAllowsGfx1250NonA0() { gate_allows_hotswap_rewrite(g, true) == true); } +// The explicit trampoline gate covers the broader gfx125* family, while the +// default B0/A0 patch gate remains exact gfx1250 A0 only. +void test_EntryTrampolineFlagAllowsGfx125Family() { + printf("TEST EntryTrampolineFlagAllowsGfx125Family...\n"); + reset_env(); + g_env.isa_name = kGfx1251Isa; + g_env.asic_revision = 1; + const AgentGfxRevision g = query_agent_gfx_revision(fresh_agent()); + run("default gate blocks gfx1251", + gate_allows_hotswap_rewrite(g, false) == false); + run("trampoline gate allows gfx1251", + gate_allows_hotswap_rewrite(g, true) == true); +} + +// COMGR's entry-trampoline pass also accepts the gfx12.5 generic processor +// name, so the HSA tool must route that target when explicitly requested. +void test_EntryTrampolineFlagAllowsGfx12_5Generic() { + printf("TEST EntryTrampolineFlagAllowsGfx12_5Generic...\n"); + reset_env(); + g_env.isa_name = kGfx12_5GenericIsa; + g_env.asic_revision = 1; + const AgentGfxRevision g = query_agent_gfx_revision(fresh_agent()); + run("default gate blocks gfx12-5-generic", + gate_allows_hotswap_rewrite(g, false) == false); + run("trampoline gate allows gfx12-5-generic", + gate_allows_hotswap_rewrite(g, true) == true); +} + // If ASIC revision cannot be queried, the explicit trampoline flag can still -// route gfx1250 through COMGR. The target is treated as non-A0 for B0/A0 patch -// selection so the trampoline pass does not imply A0-specific fixes. +// route gfx12.5 targets through COMGR. The exact gfx1250 target is treated as +// non-A0 for B0/A0 patch selection so the trampoline pass does not imply +// A0-specific fixes. void test_EntryTrampolineFlagAllowsGfx1250UnknownRevision() { printf("TEST EntryTrampolineFlagAllowsGfx1250UnknownRevision...\n"); reset_env(); @@ -231,7 +277,7 @@ void test_EntryTrampolineFlagAllowsGfx1250UnknownRevision() { gate_allows_hotswap_rewrite(g, true) == true); } -// The trampoline flag is not a global rewrite enable; non-gfx1250 targets still +// The trampoline flag is not a global rewrite enable; non-gfx12.5 targets still // load unchanged. void test_EntryTrampolineFlagBlocksOtherTargets() { printf("TEST EntryTrampolineFlagBlocksOtherTargets...\n"); @@ -316,8 +362,11 @@ int main() { test_Gfx1250FeatureSuffixParsed(); test_NonGfx1250Blocks(); test_NearMissTargetBlocks(); + test_Gfx12_5GenericFeatureSuffixParsed(); test_Gfx1250NonA0Blocks(); test_EntryTrampolineFlagAllowsGfx1250NonA0(); + test_EntryTrampolineFlagAllowsGfx125Family(); + test_EntryTrampolineFlagAllowsGfx12_5Generic(); test_EntryTrampolineFlagAllowsGfx1250UnknownRevision(); test_EntryTrampolineFlagBlocksOtherTargets(); test_AddGfx1250SteppingFeature(); From b6522ad5bd32a3f2b9948a13b763ebe32a2994de Mon Sep 17 00:00:00 2001 From: harsh-amd <149193359+harsh-amd@users.noreply.github.com> Date: Fri, 26 Jun 2026 13:36:08 -0700 Subject: [PATCH 05/16] hotswap: remove formatting-only churn --- projects/hotswap/hotswap.cpp | 7 ++- projects/hotswap/hotswap_gfx_query.cpp | 49 +++++++++----------- projects/hotswap/hotswap_gfx_query.hpp | 4 +- projects/hotswap/hotswap_tool.cpp | 31 ++++++------- projects/hotswap/tests/hotswap_tool_test.cpp | 24 +++++----- 5 files changed, 52 insertions(+), 63 deletions(-) diff --git a/projects/hotswap/hotswap.cpp b/projects/hotswap/hotswap.cpp index d6f2256de50f..740d4f0f2248 100644 --- a/projects/hotswap/hotswap.cpp +++ b/projects/hotswap/hotswap.cpp @@ -77,8 +77,7 @@ int RetargetCodeObject(const void *elf_data, size_t elf_size, return static_cast(status); } - status = - amd_comgr_set_data(input, elf_size, static_cast(elf_data)); + status = amd_comgr_set_data(input, elf_size, static_cast(elf_data)); if (status != AMD_COMGR_STATUS_SUCCESS) { fprintf(stderr, "hotswap: failed to set COMGR input data\n"); amd_comgr_release_data(input); @@ -116,8 +115,8 @@ int RetargetCodeObject(const void *elf_data, size_t elf_size, return -1; } - status = - amd_comgr_get_data(output, &output_size, static_cast(output_buf)); + status = amd_comgr_get_data(output, &output_size, + static_cast(output_buf)); amd_comgr_release_data(output); if (status != AMD_COMGR_STATUS_SUCCESS) { diff --git a/projects/hotswap/hotswap_gfx_query.cpp b/projects/hotswap/hotswap_gfx_query.cpp index bd6200edd146..5e3acf38001e 100644 --- a/projects/hotswap/hotswap_gfx_query.cpp +++ b/projects/hotswap/hotswap_gfx_query.cpp @@ -18,30 +18,27 @@ namespace rocr::hotswap { std::string get_agent_isa_name(hsa_agent_t agent) { std::string name; - hsa_agent_iterate_isas( - agent, - [](hsa_isa_t isa, void *data) -> hsa_status_t { - uint32_t len = 0; - if (hsa_isa_get_info_alt(isa, HSA_ISA_INFO_NAME_LENGTH, &len) != - HSA_STATUS_SUCCESS) - return HSA_STATUS_ERROR; - - auto &out = *static_cast(data); - out.resize(len); - - if (hsa_isa_get_info_alt(isa, HSA_ISA_INFO_NAME, out.data()) != - HSA_STATUS_SUCCESS) { - out.clear(); - return HSA_STATUS_ERROR; - } - - // HSA returns null-terminated length; trim it. - if (!out.empty() && out.back() == '\0') - out.pop_back(); - - return HSA_STATUS_INFO_BREAK; // only need the first ISA - }, - &name); + hsa_agent_iterate_isas(agent, [](hsa_isa_t isa, void *data) -> hsa_status_t { + uint32_t len = 0; + if (hsa_isa_get_info_alt(isa, HSA_ISA_INFO_NAME_LENGTH, &len) != + HSA_STATUS_SUCCESS) + return HSA_STATUS_ERROR; + + auto &out = *static_cast(data); + out.resize(len); + + if (hsa_isa_get_info_alt(isa, HSA_ISA_INFO_NAME, out.data()) != + HSA_STATUS_SUCCESS) { + out.clear(); + return HSA_STATUS_ERROR; + } + + // HSA returns null-terminated length; trim it. + if (!out.empty() && out.back() == '\0') + out.pop_back(); + + return HSA_STATUS_INFO_BREAK; // only need the first ISA + }, &name); return name; } @@ -51,9 +48,7 @@ std::string extract_gfx_target(const std::string &isa_name) { if (pos == std::string::npos) return {}; auto end = std::find_if_not(isa_name.begin() + pos, isa_name.end(), - [](unsigned char c) { - return std::isalnum(c) || c == '-' || c == '_'; - }); + [](unsigned char c) { return std::isalnum(c) || c == '-' || c == '_'; }); return isa_name.substr(pos, end - isa_name.begin() - pos); } diff --git a/projects/hotswap/hotswap_gfx_query.hpp b/projects/hotswap/hotswap_gfx_query.hpp index 26944dd8ff37..5cc303ee8522 100644 --- a/projects/hotswap/hotswap_gfx_query.hpp +++ b/projects/hotswap/hotswap_gfx_query.hpp @@ -5,7 +5,7 @@ //===----------------------------------------------------------------------===// // // Portable helpers for discovering an agent's gfx target and ASIC revision via -// the HSA runtime (HSA_AMD_AGENT_INFO_ASIC_REVISION). +// the HSA runtime (HSA_AMD_AGENT_INFO_ASIC_REVISION). // //===----------------------------------------------------------------------===// @@ -45,7 +45,7 @@ std::string extract_gfx_target(const std::string &isa_name); // gate_allows_hotswap() (below) to decide whether to act. AgentGfxRevision query_agent_gfx_revision(hsa_agent_t agent); -// Clears the per-agent-handle cache used by query_agent_gfx_revision(). +// Clears the per-agent-handle cache used by query_agent_gfx_revision(). void reset_gfx_revision_cache(); // HotSwap's default activation policy: B0-to-A0 rewriting is performed only diff --git a/projects/hotswap/hotswap_tool.cpp b/projects/hotswap/hotswap_tool.cpp index 90e0d875d912..791102e5fe9f 100644 --- a/projects/hotswap/hotswap_tool.cpp +++ b/projects/hotswap/hotswap_tool.cpp @@ -33,7 +33,7 @@ // The agent's gfx target and ASIC revision are read through the HSA runtime // (HSA_AMD_AGENT_INFO_ASIC_REVISION) in hotswap_gfx_query.{hpp,cpp}, which is -// portable across Linux and Windows. +// portable across Linux and Windows. #define HSA_HOTSWAP_EXPORT __attribute__((visibility("default"))) @@ -100,9 +100,8 @@ CoreApiTable *g_core_table = nullptr; decltype(hsa_code_object_reader_create_from_memory) *g_orig_reader_create_from_memory = nullptr; -decltype( - hsa_code_object_reader_create_from_file) *g_orig_reader_create_from_file = - nullptr; +decltype(hsa_code_object_reader_create_from_file) + *g_orig_reader_create_from_file = nullptr; decltype(hsa_code_object_reader_destroy) *g_orig_reader_destroy = nullptr; decltype(hsa_executable_load_agent_code_object) *g_orig_load_agent_code_object = nullptr; @@ -226,8 +225,7 @@ hotswap_reader_destroy(hsa_code_object_reader_t code_object_reader) { return g_orig_reader_destroy(code_object_reader); } -hsa_status_t load_original_reader(hsa_executable_t executable, - hsa_agent_t agent, +hsa_status_t load_original_reader(hsa_executable_t executable, hsa_agent_t agent, hsa_code_object_reader_t code_object_reader, const char *options, hsa_loaded_code_object_t *loaded_code_object, @@ -240,14 +238,14 @@ hsa_status_t load_original_reader(hsa_executable_t executable, return status; } -hsa_status_t load_rewritten_reader(hsa_executable_t executable, - hsa_agent_t agent, const char *options, +hsa_status_t load_rewritten_reader(hsa_executable_t executable, hsa_agent_t agent, + const char *options, hsa_loaded_code_object_t *loaded_code_object, void *out_elf, size_t out_elf_size) { OwnedElf owned_elf(out_elf, &std::free); hsa_code_object_reader_t new_reader = {}; - hsa_status_t status = g_orig_reader_create_from_memory( - owned_elf.get(), out_elf_size, &new_reader); + hsa_status_t status = + g_orig_reader_create_from_memory(owned_elf.get(), out_elf_size, &new_reader); if (status != HSA_STATUS_SUCCESS) { return status; } @@ -265,8 +263,7 @@ hsa_status_t load_rewritten_reader(hsa_executable_t executable, return status; } -hsa_status_t try_retarget_and_load(hsa_executable_t executable, - hsa_agent_t agent, +hsa_status_t try_retarget_and_load(hsa_executable_t executable, hsa_agent_t agent, const char *options, hsa_loaded_code_object_t *loaded_code_object, const ByteVec &local_bytes, @@ -276,7 +273,6 @@ hsa_status_t try_retarget_and_load(hsa_executable_t executable, std::string source_isa = rocr::hotswap::GetCodeObjectIsaName( local_bytes->data(), local_bytes->size()); std::string target_isa = get_agent_isa_name(agent); - if (source_isa.empty() || target_isa.empty()) { HOTSWAP_LOG("hotswap: rewrite SKIP empty isa (src='%s' tgt='%s' size=%zu)\n", source_isa.c_str(), target_isa.c_str(), local_bytes->size()); @@ -349,16 +345,15 @@ hsa_status_t HSA_API hotswap_load_agent_code_object( const bool entry_trampolines = entry_trampolines_requested(); if (!gate_allows_hotswap_rewrite(gfx, entry_trampolines)) { HOTSWAP_LOG("hotswap: gate BLOCKED (gfx=%s rev=%u valid=%d)\n", - gfx.gfx_target.c_str(), gfx.asic_revision, - gfx.revision_valid); + gfx.gfx_target.c_str(), gfx.asic_revision, gfx.revision_valid); return load_original_reader(executable, agent, code_object_reader, options, loaded_code_object, reader_from_file); } - const hsa_status_t status = - try_retarget_and_load(executable, agent, options, loaded_code_object, - local_bytes, gfx, entry_trampolines); + const hsa_status_t status = try_retarget_and_load( + executable, agent, options, loaded_code_object, local_bytes, gfx, + entry_trampolines); if (status == HSA_STATUS_SUCCESS) { return status; } diff --git a/projects/hotswap/tests/hotswap_tool_test.cpp b/projects/hotswap/tests/hotswap_tool_test.cpp index 42539d69e684..907fe68518d4 100644 --- a/projects/hotswap/tests/hotswap_tool_test.cpp +++ b/projects/hotswap/tests/hotswap_tool_test.cpp @@ -18,7 +18,7 @@ // * ISA name <- hsa_agent_iterate_isas / hsa_isa_get_info_alt // * ASIC revision <- hsa_agent_get_info(HSA_AMD_AGENT_INFO_ASIC_REVISION) // -// This path is portable: it depends only on HSA so the same query/gate logic is +// This path is portable: it depends only on HSA so the same query/gate logic is // exercised for both Linux and Windows builds. // //===----------------------------------------------------------------------===// @@ -34,22 +34,22 @@ // --------------------------------------------------------------------------- namespace { struct FakeEnv { - std::string isa_name; // ISA reported for the agent - bool asic_rev_ok = true; // hsa_agent_get_info(ASIC_REVISION) success - uint32_t asic_revision = 0; // reported ASIC revision (0 == A0) + std::string isa_name; // ISA reported for the agent + bool asic_rev_ok = true; // hsa_agent_get_info(ASIC_REVISION) success + uint32_t asic_revision = 0; // reported ASIC revision (0 == A0) // Counters used to assert short-circuiting and caching behavior. - int isa_query_calls = 0; // get_agent_isa_name -> iterate_isas - int asic_rev_calls = 0; // ASIC revision queries + int isa_query_calls = 0; // get_agent_isa_name -> iterate_isas + int asic_rev_calls = 0; // ASIC revision queries }; FakeEnv g_env; -} // namespace +} // namespace // The unit under test (brings in hsa.h and the query helper declarations). #include "hotswap_gfx_query.hpp" -using rocr::hotswap::add_gfx1250_stepping_feature; using rocr::hotswap::AgentGfxRevision; +using rocr::hotswap::add_gfx1250_stepping_feature; using rocr::hotswap::gate_allows_hotswap; using rocr::hotswap::gate_allows_hotswap_rewrite; using rocr::hotswap::query_agent_gfx_revision; @@ -99,7 +99,7 @@ hsa_status_t hsa_agent_get_info(hsa_agent_t /*agent*/, return HSA_STATUS_ERROR; } -} // extern "C" +} // extern "C" // --------------------------------------------------------------------------- // Minimal test harness. @@ -213,7 +213,7 @@ void test_Gfx1250NonA0Blocks() { printf("TEST Gfx1250NonA0Blocks...\n"); reset_env(); g_env.isa_name = kGfx1250Isa; - g_env.asic_revision = 1; // A1 + g_env.asic_revision = 1; // A1 const AgentGfxRevision g = query_agent_gfx_revision(fresh_agent()); run("ASIC revision is A1 (1)", g.revision_valid && g.asic_revision == 1); run("gate blocks gfx1250 A1", gate_allows_hotswap(g) == false); @@ -225,7 +225,7 @@ void test_EntryTrampolineFlagAllowsGfx1250NonA0() { printf("TEST EntryTrampolineFlagAllowsGfx1250NonA0...\n"); reset_env(); g_env.isa_name = kGfx1250Isa; - g_env.asic_revision = 1; // A1/B0-side path, not A0. + g_env.asic_revision = 1; // A1/B0-side path, not A0. const AgentGfxRevision g = query_agent_gfx_revision(fresh_agent()); run("default gate blocks non-A0", gate_allows_hotswap_rewrite(g, false) == false); @@ -355,7 +355,7 @@ void test_DistinctHandlesIndependent() { a2.gfx_target == "gfx942" && gate_allows_hotswap(a2) == false); } -} // namespace +} // namespace int main() { test_Gfx1250A0Passes(); From 88fe57553a2bd13e7af457aa014a73d28a3a97b3 Mon Sep 17 00:00:00 2001 From: harsh-amd <149193359+harsh-amd@users.noreply.github.com> Date: Sat, 27 Jun 2026 10:33:54 -0700 Subject: [PATCH 06/16] hotswap: tighten gfx12.5 trampoline gate --- projects/hotswap/hotswap_gfx_query.cpp | 25 ++-- projects/hotswap/hotswap_gfx_query.hpp | 8 +- .../hotswap/tests/hotswap_loader_test.cpp | 116 +----------------- projects/hotswap/tests/hotswap_tool_test.cpp | 17 +++ 4 files changed, 37 insertions(+), 129 deletions(-) diff --git a/projects/hotswap/hotswap_gfx_query.cpp b/projects/hotswap/hotswap_gfx_query.cpp index 5e3acf38001e..5432e35bcf1a 100644 --- a/projects/hotswap/hotswap_gfx_query.cpp +++ b/projects/hotswap/hotswap_gfx_query.cpp @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -48,7 +49,9 @@ std::string extract_gfx_target(const std::string &isa_name) { if (pos == std::string::npos) return {}; auto end = std::find_if_not(isa_name.begin() + pos, isa_name.end(), - [](unsigned char c) { return std::isalnum(c) || c == '-' || c == '_'; }); + [](unsigned char c) { + return std::isalnum(c) || c == '-'; + }); return isa_name.substr(pos, end - isa_name.begin() - pos); } @@ -59,9 +62,16 @@ std::unordered_map g_cache; bool is_gfx12_5_entry_trampoline_target(const std::string &gfx_target) { constexpr char Gfx125Prefix[] = "gfx125"; - return gfx_target == "gfx12-5-generic" || - (gfx_target.size() > sizeof(Gfx125Prefix) - 1 && - gfx_target.rfind(Gfx125Prefix, 0) == 0); + constexpr size_t Gfx125PrefixLen = sizeof(Gfx125Prefix) - 1; + if (gfx_target == "gfx12-5-generic") { + return true; + } + if (gfx_target.size() <= Gfx125PrefixLen || + gfx_target.compare(0, Gfx125PrefixLen, Gfx125Prefix) != 0) { + return false; + } + return std::all_of(gfx_target.begin() + Gfx125PrefixLen, gfx_target.end(), + [](unsigned char c) { return std::isdigit(c); }); } AgentGfxRevision query_agent_gfx_revision(hsa_agent_t agent) { @@ -106,16 +116,13 @@ bool gate_allows_hotswap(const AgentGfxRevision &gfx) { gfx.asic_revision == 0; // A0 } -bool gate_allows_entry_trampolines(const AgentGfxRevision &gfx) { - return is_gfx12_5_entry_trampoline_target(gfx.gfx_target); -} - bool gate_allows_hotswap_rewrite(const AgentGfxRevision &gfx, bool entry_trampolines_requested) { if (gate_allows_hotswap(gfx)) { return true; } - return entry_trampolines_requested && gate_allows_entry_trampolines(gfx); + return entry_trampolines_requested && + is_gfx12_5_entry_trampoline_target(gfx.gfx_target); } std::string add_gfx1250_stepping_feature(const std::string &isa_name, diff --git a/projects/hotswap/hotswap_gfx_query.hpp b/projects/hotswap/hotswap_gfx_query.hpp index 5cc303ee8522..959d203ed350 100644 --- a/projects/hotswap/hotswap_gfx_query.hpp +++ b/projects/hotswap/hotswap_gfx_query.hpp @@ -53,12 +53,8 @@ void reset_gfx_revision_cache(); // successfully queried). bool gate_allows_hotswap(const AgentGfxRevision &gfx); -// Entry trampolines are a separate, opt-in rewrite that applies to the gfx12.5 -// family (gfx125* and gfx12-5-generic) when -// AMD_COMGR_HOTSWAP_ENTRY_TRAMPOLINES is enabled. -bool gate_allows_entry_trampolines(const AgentGfxRevision &gfx); - -// True when a parsed gfx target is in the COMGR entry-trampoline family. +// True for COMGR entry-trampoline targets: gfx12-5-generic or concrete gfx125 +// processor names with a numeric suffix (for example, gfx1250 or gfx1251). bool is_gfx12_5_entry_trampoline_target(const std::string &gfx_target); // Combined activation policy for libhsa-hotswap.so. diff --git a/projects/hotswap/tests/hotswap_loader_test.cpp b/projects/hotswap/tests/hotswap_loader_test.cpp index 089c08ae363d..3626a685fb03 100644 --- a/projects/hotswap/tests/hotswap_loader_test.cpp +++ b/projects/hotswap/tests/hotswap_loader_test.cpp @@ -20,11 +20,6 @@ #include -#ifndef _WIN32 -#include -#include -#endif - namespace { struct FakeEnv { @@ -33,25 +28,14 @@ struct FakeEnv { uint32_t asic_revision = 0; int retarget_calls = 0; - int load_calls = 0; - int memory_reader_create_calls = 0; - int file_reader_create_calls = 0; - int reader_destroy_calls = 0; - int isa_query_calls = 0; - int asic_rev_calls = 0; int retarget_status = 0; - hsa_status_t load_status = HSA_STATUS_SUCCESS; - hsa_status_t reader_create_status = HSA_STATUS_SUCCESS; uint64_t next_reader_handle = 100; uint64_t last_loaded_reader = 0; - uint64_t last_created_reader = 0; - uint64_t retarget_reader_handle = 0; std::string retarget_source_isa; std::string retarget_target_isa; - void *retarget_output_ptr = nullptr; }; FakeEnv g_env; @@ -110,7 +94,6 @@ int RetargetCodeObject(const void *elf_data, size_t elf_size, return -1; } std::memcpy(copy, elf_data, elf_size); - g_env.retarget_output_ptr = copy; *out_data = copy; *out_size = elf_size; return 0; @@ -124,7 +107,6 @@ hsa_status_t hsa_agent_iterate_isas(hsa_agent_t /*agent*/, hsa_status_t (*callback)(hsa_isa_t isa, void *data), void *data) { - ++g_env.isa_query_calls; hsa_isa_t isa{}; isa.handle = 1; const hsa_status_t status = callback(isa, data); @@ -149,7 +131,6 @@ hsa_status_t hsa_agent_get_info(hsa_agent_t /*agent*/, hsa_agent_info_t attribute, void *value) { if (attribute == static_cast(HSA_AMD_AGENT_INFO_ASIC_REVISION)) { - ++g_env.asic_rev_calls; if (!g_env.asic_rev_ok) { return HSA_STATUS_ERROR; } @@ -225,32 +206,19 @@ std::vector make_code_object(const std::string &isa) { hsa_status_t HSA_API fake_reader_create_from_memory( const void * /*code_object*/, size_t /*size*/, hsa_code_object_reader_t *code_object_reader) { - ++g_env.memory_reader_create_calls; - if (g_env.reader_create_status != HSA_STATUS_SUCCESS) { - return g_env.reader_create_status; - } code_object_reader->handle = g_env.next_reader_handle++; - g_env.last_created_reader = code_object_reader->handle; - if (g_env.retarget_output_ptr) { - g_env.retarget_reader_handle = code_object_reader->handle; - } return HSA_STATUS_SUCCESS; } hsa_status_t HSA_API fake_reader_create_from_file( hsa_file_t /*file*/, hsa_code_object_reader_t *code_object_reader) { - ++g_env.file_reader_create_calls; code_object_reader->handle = g_env.next_reader_handle++; - g_env.last_created_reader = code_object_reader->handle; return HSA_STATUS_SUCCESS; } hsa_status_t HSA_API fake_reader_destroy(hsa_code_object_reader_t code_object_reader) { - ++g_env.reader_destroy_calls; - if (code_object_reader.handle == g_env.retarget_reader_handle) { - g_env.retarget_reader_handle = 0; - } + (void)code_object_reader; return HSA_STATUS_SUCCESS; } @@ -258,12 +226,11 @@ hsa_status_t HSA_API fake_load_agent_code_object( hsa_executable_t /*executable*/, hsa_agent_t /*agent*/, hsa_code_object_reader_t code_object_reader, const char * /*options*/, hsa_loaded_code_object_t *loaded_code_object) { - ++g_env.load_calls; g_env.last_loaded_reader = code_object_reader.handle; if (loaded_code_object) { loaded_code_object->handle = 0xC0DE; } - return g_env.load_status; + return HSA_STATUS_SUCCESS; } CoreApiTable install_tool() { @@ -301,33 +268,6 @@ hsa_status_t load_reader(CoreApiTable &core, hsa_code_object_reader_t reader) { hsa_executable_t{}, fake_agent(), reader, nullptr, &loaded); } -void test_OnLoadInstallsWrappers() { - std::printf("TEST OnLoadInstallsWrappers...\n"); - reset_state(); - CoreApiTable core = install_tool(); - check(core.hsa_code_object_reader_create_from_memory_fn != - fake_reader_create_from_memory, - "memory reader entry point is wrapped"); - check(core.hsa_code_object_reader_create_from_file_fn != - fake_reader_create_from_file, - "file reader entry point is wrapped"); - check(core.hsa_executable_load_agent_code_object_fn != - fake_load_agent_code_object, - "load entry point is wrapped"); -} - -void test_GetCodeObjectIsaNameFindsMetadata() { - std::printf("TEST GetCodeObjectIsaNameFindsMetadata...\n"); - reset_state(); - std::vector elf = make_code_object(kGfx1250Isa); - check(rocr::hotswap::GetCodeObjectIsaName(elf.data(), elf.size()) == - kGfx1250Isa, - "metadata payload yields ISA"); - const uint8_t not_elf[] = {0x7f, 'E', 'L', 'F'}; - check(rocr::hotswap::GetCodeObjectIsaName(not_elf, sizeof(not_elf)).empty(), - "malformed ELF yields empty ISA"); -} - void test_FlagUnsetLoadsOriginalForNonA0Gfx1250() { std::printf("TEST FlagUnsetLoadsOriginalForNonA0Gfx1250...\n"); reset_state(); @@ -496,58 +436,9 @@ void test_RetargetFailureFallsBackToOriginalReader() { "retarget failure falls back to original reader"); } -#ifndef _WIN32 -void test_FileReaderPathKeepsFileBackedReaderAlive() { - std::printf("TEST FileReaderPathKeepsFileBackedReaderAlive...\n"); - reset_state(); - set_entry_trampoline_env("1"); - g_env.agent_isa = kGfx942Isa; - g_env.asic_revision = 0; - CoreApiTable core = install_tool(); - - std::vector elf = make_code_object(kGfx942Isa); - char path[] = "/tmp/hotswap-loader-test-XXXXXX"; - const int fd = mkstemp(path); - check(fd >= 0, "temporary code object file is created"); - if (fd < 0) { - return; - } - unlink(path); - const ssize_t written = write(fd, elf.data(), elf.size()); - check(written == static_cast(elf.size()), - "temporary code object file is written"); - lseek(fd, 0, SEEK_SET); - - hsa_code_object_reader_t reader{}; - const hsa_status_t create_status = - core.hsa_code_object_reader_create_from_file_fn(fd, &reader); - close(fd); - check(create_status == HSA_STATUS_SUCCESS, "file reader creation succeeds"); - check(g_env.memory_reader_create_calls == 1, - "file reader path converts file contents to a memory reader"); - - check(load_reader(core, reader) == HSA_STATUS_SUCCESS, "load succeeds"); - check(g_env.retarget_calls == 0, "non-gfx1250 file reader is not rewritten"); - check(g_env.last_loaded_reader == reader.handle, - "file-backed original reader is loaded"); - check(core.hsa_code_object_reader_destroy_fn(reader) == HSA_STATUS_SUCCESS, - "file-backed reader destroy succeeds"); - - bool retained = false; - { - std::scoped_lock lock(g_reader_map_mutex); - const auto it = g_reader_map.find(reader.handle); - retained = it != g_reader_map.end() && it->second.from_file; - } - check(retained, "file-backed reader bytes stay alive after successful load"); -} -#endif - } // namespace int main() { - test_OnLoadInstallsWrappers(); - test_GetCodeObjectIsaNameFindsMetadata(); test_FlagUnsetLoadsOriginalForNonA0Gfx1250(); test_FlagZeroLoadsOriginalForNonA0Gfx1250(); test_FlagOneRetargetsB0ToB0(); @@ -558,9 +449,6 @@ int main() { test_FlagOneRetargetsUnknownRevisionAsB0Target(); test_FlagOneBlocksNonGfx12_5(); test_RetargetFailureFallsBackToOriginalReader(); -#ifndef _WIN32 - test_FileReaderPathKeepsFileBackedReaderAlive(); -#endif reset_state(); std::printf("\n%d passed, %d failed\n", tests_passed, tests_failed); diff --git a/projects/hotswap/tests/hotswap_tool_test.cpp b/projects/hotswap/tests/hotswap_tool_test.cpp index 907fe68518d4..3977b45cbed6 100644 --- a/projects/hotswap/tests/hotswap_tool_test.cpp +++ b/projects/hotswap/tests/hotswap_tool_test.cpp @@ -52,6 +52,7 @@ using rocr::hotswap::AgentGfxRevision; using rocr::hotswap::add_gfx1250_stepping_feature; using rocr::hotswap::gate_allows_hotswap; using rocr::hotswap::gate_allows_hotswap_rewrite; +using rocr::hotswap::is_gfx12_5_entry_trampoline_target; using rocr::hotswap::query_agent_gfx_revision; using rocr::hotswap::reset_gfx_revision_cache; @@ -140,6 +141,7 @@ const char *kGfx1250IsaWithFeatures = "amdgcn-amd-amdhsa--gfx1250:sramecc+:xnack-"; const char *kGfx942Isa = "amdgcn-amd-amdhsa--gfx942"; const char *kGfx1251Isa = "amdgcn-amd-amdhsa--gfx1251"; +const char *kGfx125MalformedIsa = "amdgcn-amd-amdhsa--gfx125foo"; const char *kGfx12_5GenericIsa = "amdgcn-amd-amdhsa--gfx12-5-generic"; const char *kGfx12_5GenericIsaWithFeatures = "amdgcn-amd-amdhsa--gfx12-5-generic:sramecc+"; @@ -247,6 +249,20 @@ void test_EntryTrampolineFlagAllowsGfx125Family() { gate_allows_hotswap_rewrite(g, true) == true); } +void test_EntryTrampolineFlagRejectsMalformedGfx125Prefix() { + printf("TEST EntryTrampolineFlagRejectsMalformedGfx125Prefix...\n"); + reset_env(); + g_env.isa_name = kGfx125MalformedIsa; + g_env.asic_revision = 1; + const AgentGfxRevision g = query_agent_gfx_revision(fresh_agent()); + run("malformed gfx125 prefix is parsed", + g.gfx_target == "gfx125foo"); + run("trampoline family predicate rejects malformed suffix", + is_gfx12_5_entry_trampoline_target(g.gfx_target) == false); + run("trampoline gate rejects malformed gfx125 prefix", + gate_allows_hotswap_rewrite(g, true) == false); +} + // COMGR's entry-trampoline pass also accepts the gfx12.5 generic processor // name, so the HSA tool must route that target when explicitly requested. void test_EntryTrampolineFlagAllowsGfx12_5Generic() { @@ -366,6 +382,7 @@ int main() { test_Gfx1250NonA0Blocks(); test_EntryTrampolineFlagAllowsGfx1250NonA0(); test_EntryTrampolineFlagAllowsGfx125Family(); + test_EntryTrampolineFlagRejectsMalformedGfx125Prefix(); test_EntryTrampolineFlagAllowsGfx12_5Generic(); test_EntryTrampolineFlagAllowsGfx1250UnknownRevision(); test_EntryTrampolineFlagBlocksOtherTargets(); From 919f4d0a974d67199f04fe698e4e917b960a25f8 Mon Sep 17 00:00:00 2001 From: harsh-amd <149193359+harsh-amd@users.noreply.github.com> Date: Sat, 27 Jun 2026 10:40:58 -0700 Subject: [PATCH 07/16] hotswap: streamline loader gating tests --- .../hotswap/tests/hotswap_loader_test.cpp | 264 ++++++++---------- 1 file changed, 116 insertions(+), 148 deletions(-) diff --git a/projects/hotswap/tests/hotswap_loader_test.cpp b/projects/hotswap/tests/hotswap_loader_test.cpp index 3626a685fb03..4e5373bba4d0 100644 --- a/projects/hotswap/tests/hotswap_loader_test.cpp +++ b/projects/hotswap/tests/hotswap_loader_test.cpp @@ -24,7 +24,6 @@ namespace { struct FakeEnv { std::string agent_isa = "amdgcn-amd-amdhsa--gfx1250"; - bool asic_rev_ok = true; uint32_t asic_revision = 0; int retarget_calls = 0; @@ -131,9 +130,6 @@ hsa_status_t hsa_agent_get_info(hsa_agent_t /*agent*/, hsa_agent_info_t attribute, void *value) { if (attribute == static_cast(HSA_AMD_AGENT_INFO_ASIC_REVISION)) { - if (!g_env.asic_rev_ok) { - return HSA_STATUS_ERROR; - } *static_cast(value) = g_env.asic_revision; return HSA_STATUS_SUCCESS; } @@ -268,185 +264,157 @@ hsa_status_t load_reader(CoreApiTable &core, hsa_code_object_reader_t reader) { hsa_executable_t{}, fake_agent(), reader, nullptr, &loaded); } -void test_FlagUnsetLoadsOriginalForNonA0Gfx1250() { - std::printf("TEST FlagUnsetLoadsOriginalForNonA0Gfx1250...\n"); - reset_state(); - g_env.agent_isa = kGfx1250Isa; - g_env.asic_revision = 1; - CoreApiTable core = install_tool(); - std::vector elf = make_code_object(kGfx1250Isa); - const hsa_code_object_reader_t reader = create_memory_reader(core, elf); - check(load_reader(core, reader) == HSA_STATUS_SUCCESS, "load succeeds"); - check(g_env.retarget_calls == 0, "flag unset skips non-A0 gfx1250 rewrite"); - check(g_env.last_loaded_reader == reader.handle, - "original reader is loaded when flag is unset"); +void begin_test(const char *name, const char *description) { + std::printf("TEST %s...\n %s\n", name, description); } -void test_FlagZeroLoadsOriginalForNonA0Gfx1250() { - std::printf("TEST FlagZeroLoadsOriginalForNonA0Gfx1250...\n"); +struct LoadResult { + hsa_status_t status = HSA_STATUS_SUCCESS; + uint64_t original_reader = 0; + uint64_t loaded_reader = 0; + int retarget_calls = 0; + std::string source_isa; + std::string target_isa; + size_t retained_elfs = 0; +}; + +LoadResult load_once(const char *code_object_isa, const char *agent_isa, + const char *entry_trampoline_flag, + uint32_t asic_revision = 1, + int retarget_status = 0) { reset_state(); - set_entry_trampoline_env("0"); - g_env.agent_isa = kGfx1250Isa; - g_env.asic_revision = 1; + set_entry_trampoline_env(entry_trampoline_flag); + g_env.agent_isa = agent_isa; + g_env.asic_revision = asic_revision; + g_env.retarget_status = retarget_status; + CoreApiTable core = install_tool(); - std::vector elf = make_code_object(kGfx1250Isa); + std::vector elf = make_code_object(code_object_isa); const hsa_code_object_reader_t reader = create_memory_reader(core, elf); - check(load_reader(core, reader) == HSA_STATUS_SUCCESS, "load succeeds"); - check(g_env.retarget_calls == 0, "flag value 0 disables entry trampolines"); - check(g_env.last_loaded_reader == reader.handle, - "original reader is loaded when flag is 0"); + + LoadResult result; + result.original_reader = reader.handle; + result.status = load_reader(core, reader); + result.loaded_reader = g_env.last_loaded_reader; + result.retarget_calls = g_env.retarget_calls; + result.source_isa = g_env.retarget_source_isa; + result.target_isa = g_env.retarget_target_isa; + result.retained_elfs = g_rewritten_elfs.size(); + return result; } -void test_FlagOneRetargetsB0ToB0() { - std::printf("TEST FlagOneRetargetsB0ToB0...\n"); - reset_state(); - set_entry_trampoline_env("1"); - g_env.agent_isa = kGfx1250Isa; - g_env.asic_revision = 1; - CoreApiTable core = install_tool(); - std::vector elf = make_code_object(kGfx1250Isa); - const hsa_code_object_reader_t reader = create_memory_reader(core, elf); - check(load_reader(core, reader) == HSA_STATUS_SUCCESS, "load succeeds"); - check(g_env.retarget_calls == 1, "flag 1 routes B0 gfx1250 through COMGR"); - check(g_env.retarget_source_isa == kGfx1250B0Isa, +void check_original_load(const LoadResult &result, const char *name) { + check(result.status == HSA_STATUS_SUCCESS, "load succeeds"); + check(result.retarget_calls == 0, name); + check(result.loaded_reader == result.original_reader, + "original reader is loaded"); +} + +void test_DisabledFlagLoadsOriginal() { + begin_test("DisabledFlagLoadsOriginal", + "Unset, empty, and 0 trampoline flags must leave non-A0 gfx1250 " + "on the original reader."); + struct FlagCase { + const char *flag_value; + const char *expectation; + }; + const FlagCase cases[] = { + {nullptr, "unset flag skips non-A0 gfx1250 rewrite"}, + {"", "empty flag skips non-A0 gfx1250 rewrite"}, + {"0", "flag value 0 skips non-A0 gfx1250 rewrite"}, + }; + for (const FlagCase &c : cases) { + const LoadResult result = + load_once(kGfx1250Isa, kGfx1250Isa, c.flag_value); + check_original_load(result, c.expectation); + } +} + +void test_FlagRoutesGfx1250B0() { + begin_test("FlagRoutesGfx1250B0", + "The trampoline flag must route non-A0 gfx1250 through COMGR " + "without selecting the A0 patch path."); + const LoadResult result = load_once(kGfx1250Isa, kGfx1250Isa, "1"); + check(result.status == HSA_STATUS_SUCCESS, "load succeeds"); + check(result.retarget_calls == 1, "flag routes B0 gfx1250 through COMGR"); + check(result.source_isa == kGfx1250B0Isa, "source ISA is tagged as B0"); - check(g_env.retarget_target_isa == kGfx1250B0Isa, + check(result.target_isa == kGfx1250B0Isa, "non-A0 target ISA is tagged as B0"); - check(g_env.last_loaded_reader != reader.handle, + check(result.loaded_reader != result.original_reader, "rewritten reader is loaded instead of original reader"); - check(g_rewritten_elfs.size() == 1, + check(result.retained_elfs == 1, "rewritten ELF is retained after successful load"); } -void test_FlagOneRetargetsGfx125FamilyWithoutSteppingSuffix() { - std::printf("TEST FlagOneRetargetsGfx125FamilyWithoutSteppingSuffix...\n"); - reset_state(); - set_entry_trampoline_env("1"); - g_env.agent_isa = kGfx1251Isa; - g_env.asic_revision = 1; - CoreApiTable core = install_tool(); - std::vector elf = make_code_object(kGfx1251Isa); - const hsa_code_object_reader_t reader = create_memory_reader(core, elf); - check(load_reader(core, reader) == HSA_STATUS_SUCCESS, "load succeeds"); - check(g_env.retarget_calls == 1, "flag 1 routes gfx1251 through COMGR"); - check(g_env.retarget_source_isa == kGfx1251Isa, - "gfx1251 source ISA is not tagged with gfx1250 stepping feature"); - check(g_env.retarget_target_isa == kGfx1251Isa, - "gfx1251 target ISA is not tagged with gfx1250 stepping feature"); -} - -void test_FlagOneRetargetsGfx12_5GenericWithoutSteppingSuffix() { - std::printf("TEST FlagOneRetargetsGfx12_5GenericWithoutSteppingSuffix...\n"); - reset_state(); - set_entry_trampoline_env("1"); - g_env.agent_isa = kGfx12_5GenericIsa; - g_env.asic_revision = 1; - CoreApiTable core = install_tool(); - std::vector elf = make_code_object(kGfx12_5GenericIsa); - const hsa_code_object_reader_t reader = create_memory_reader(core, elf); - check(load_reader(core, reader) == HSA_STATUS_SUCCESS, "load succeeds"); - check(g_env.retarget_calls == 1, - "flag 1 routes gfx12-5-generic through COMGR"); - check(g_env.retarget_source_isa == kGfx12_5GenericIsa, - "generic source ISA is not tagged with gfx1250 stepping feature"); - check(g_env.retarget_target_isa == kGfx12_5GenericIsa, - "generic target ISA is not tagged with gfx1250 stepping feature"); +void test_FlagRoutesGfx12_5Family() { + begin_test("FlagRoutesGfx12_5Family", + "The trampoline flag must route gfx125* and gfx12-5-generic " + "without adding gfx1250 stepping features."); + const char *cases[] = {kGfx1251Isa, kGfx12_5GenericIsa}; + for (const char *isa : cases) { + const LoadResult result = load_once(isa, isa, "1"); + check(result.status == HSA_STATUS_SUCCESS, "load succeeds"); + check(result.retarget_calls == 1, "flag routes gfx12.5 target through COMGR"); + check(result.source_isa == isa, "source ISA is preserved"); + check(result.target_isa == isa, "target ISA is preserved"); + } } -void test_FlagOneRetargetsGenericSourceOnConcreteGfx125Agent() { - std::printf( - "TEST FlagOneRetargetsGenericSourceOnConcreteGfx125Agent...\n"); - reset_state(); - set_entry_trampoline_env("1"); - g_env.agent_isa = kGfx1251Isa; - g_env.asic_revision = 1; - CoreApiTable core = install_tool(); - std::vector elf = make_code_object(kGfx12_5GenericIsa); - const hsa_code_object_reader_t reader = create_memory_reader(core, elf); - check(load_reader(core, reader) == HSA_STATUS_SUCCESS, "load succeeds"); - check(g_env.retarget_calls == 1, - "flag 1 routes generic source on concrete gfx125 agent through COMGR"); - check(g_env.retarget_source_isa == kGfx12_5GenericIsa, +void test_GenericSourceUsesGenericTarget() { + begin_test("GenericSourceUsesGenericTarget", + "A gfx12-5-generic source loaded on a concrete gfx125* agent " + "must stay generic to avoid processor retargeting."); + const LoadResult result = load_once(kGfx12_5GenericIsa, kGfx1251Isa, "1"); + check(result.status == HSA_STATUS_SUCCESS, "load succeeds"); + check(result.retarget_calls == 1, + "generic source on concrete gfx125 agent routes through COMGR"); + check(result.source_isa == kGfx12_5GenericIsa, "generic source ISA is preserved"); - check(g_env.retarget_target_isa == kGfx12_5GenericIsa, + check(result.target_isa == kGfx12_5GenericIsa, "generic target ISA is preserved to avoid processor retargeting"); } void test_A0RetargetsWithoutFlag() { - std::printf("TEST A0RetargetsWithoutFlag...\n"); - reset_state(); - g_env.agent_isa = kGfx1250Isa; - g_env.asic_revision = 0; - CoreApiTable core = install_tool(); - std::vector elf = make_code_object(kGfx1250Isa); - const hsa_code_object_reader_t reader = create_memory_reader(core, elf); - check(load_reader(core, reader) == HSA_STATUS_SUCCESS, "load succeeds"); - check(g_env.retarget_calls == 1, "A0 gfx1250 keeps default rewrite path"); - check(g_env.retarget_source_isa == kGfx1250B0Isa, + begin_test("A0RetargetsWithoutFlag", + "The existing gfx1250 A0 rewrite path must still run without " + "the trampoline flag."); + const LoadResult result = load_once(kGfx1250Isa, kGfx1250Isa, nullptr, 0); + check(result.status == HSA_STATUS_SUCCESS, "load succeeds"); + check(result.retarget_calls == 1, "A0 gfx1250 keeps default rewrite path"); + check(result.source_isa == kGfx1250B0Isa, "source code object ISA is tagged as B0"); - check(g_env.retarget_target_isa == kGfx1250A0Isa, + check(result.target_isa == kGfx1250A0Isa, "A0 agent ISA is tagged as A0"); } -void test_FlagOneRetargetsUnknownRevisionAsB0Target() { - std::printf("TEST FlagOneRetargetsUnknownRevisionAsB0Target...\n"); - reset_state(); - set_entry_trampoline_env("1"); - g_env.agent_isa = kGfx1250Isa; - g_env.asic_rev_ok = false; - CoreApiTable core = install_tool(); - std::vector elf = make_code_object(kGfx1250Isa); - const hsa_code_object_reader_t reader = create_memory_reader(core, elf); - check(load_reader(core, reader) == HSA_STATUS_SUCCESS, "load succeeds"); - check(g_env.retarget_calls == 1, - "flag 1 routes unknown-revision gfx1250 through COMGR"); - check(g_env.retarget_target_isa == kGfx1250B0Isa, - "unknown revision is treated as non-A0 for target suffix"); -} - void test_FlagOneBlocksNonGfx12_5() { - std::printf("TEST FlagOneBlocksNonGfx12_5...\n"); - reset_state(); - set_entry_trampoline_env("1"); - g_env.agent_isa = kGfx942Isa; - g_env.asic_revision = 0; - CoreApiTable core = install_tool(); - std::vector elf = make_code_object(kGfx942Isa); - const hsa_code_object_reader_t reader = create_memory_reader(core, elf); - check(load_reader(core, reader) == HSA_STATUS_SUCCESS, "load succeeds"); - check(g_env.retarget_calls == 0, - "entry trampoline flag does not enable non-gfx12.5 rewrite"); - check(g_env.last_loaded_reader == reader.handle, - "non-gfx12.5 uses original reader"); + begin_test("FlagOneBlocksNonGfx12_5", + "The trampoline flag must not become a global rewrite enable."); + const LoadResult result = load_once(kGfx942Isa, kGfx942Isa, "1", 0); + check_original_load(result, + "entry trampoline flag does not route non-gfx12.5"); } void test_RetargetFailureFallsBackToOriginalReader() { - std::printf("TEST RetargetFailureFallsBackToOriginalReader...\n"); - reset_state(); - set_entry_trampoline_env("1"); - g_env.agent_isa = kGfx1250Isa; - g_env.asic_revision = 1; - g_env.retarget_status = -1; - CoreApiTable core = install_tool(); - std::vector elf = make_code_object(kGfx1250Isa); - const hsa_code_object_reader_t reader = create_memory_reader(core, elf); - check(load_reader(core, reader) == HSA_STATUS_SUCCESS, "fallback load succeeds"); - check(g_env.retarget_calls == 1, "COMGR retarget was attempted"); - check(g_env.last_loaded_reader == reader.handle, + begin_test("RetargetFailureFallsBackToOriginalReader", + "If COMGR rejects a gated rewrite, the loader must still load " + "the original reader."); + const LoadResult result = load_once(kGfx1250Isa, kGfx1250Isa, "1", 1, -1); + check(result.status == HSA_STATUS_SUCCESS, "fallback load succeeds"); + check(result.retarget_calls == 1, "COMGR retarget was attempted"); + check(result.loaded_reader == result.original_reader, "retarget failure falls back to original reader"); } } // namespace int main() { - test_FlagUnsetLoadsOriginalForNonA0Gfx1250(); - test_FlagZeroLoadsOriginalForNonA0Gfx1250(); - test_FlagOneRetargetsB0ToB0(); - test_FlagOneRetargetsGfx125FamilyWithoutSteppingSuffix(); - test_FlagOneRetargetsGfx12_5GenericWithoutSteppingSuffix(); - test_FlagOneRetargetsGenericSourceOnConcreteGfx125Agent(); + test_DisabledFlagLoadsOriginal(); + test_FlagRoutesGfx1250B0(); + test_FlagRoutesGfx12_5Family(); + test_GenericSourceUsesGenericTarget(); test_A0RetargetsWithoutFlag(); - test_FlagOneRetargetsUnknownRevisionAsB0Target(); test_FlagOneBlocksNonGfx12_5(); test_RetargetFailureFallsBackToOriginalReader(); reset_state(); From 738ce56fd12e33d01c44d432e3e45a1915af29e0 Mon Sep 17 00:00:00 2001 From: harsh-amd <149193359+harsh-amd@users.noreply.github.com> Date: Sat, 27 Jun 2026 11:31:10 -0700 Subject: [PATCH 08/16] hotswap: clarify trampoline loader policy --- projects/hotswap/hotswap_tool.cpp | 3 +++ projects/hotswap/tests/hotswap_loader_test.cpp | 3 +++ 2 files changed, 6 insertions(+) diff --git a/projects/hotswap/hotswap_tool.cpp b/projects/hotswap/hotswap_tool.cpp index 791102e5fe9f..9b2f81292d01 100644 --- a/projects/hotswap/hotswap_tool.cpp +++ b/projects/hotswap/hotswap_tool.cpp @@ -281,6 +281,9 @@ hsa_status_t try_retarget_and_load(hsa_executable_t executable, hsa_agent_t agen std::string source_gfx = extract_gfx_target(source_isa); std::string target_gfx = extract_gfx_target(target_isa); + // COMGR's entry-trampoline pass accepts gfx12-5-generic. Keep a generic + // source generic even on a concrete gfx125* agent so this path does not turn + // into processor retargeting. if (entry_trampolines && source_gfx != target_gfx && (source_gfx == "gfx12-5-generic" || target_gfx == "gfx12-5-generic") && is_gfx12_5_entry_trampoline_target(source_gfx) && diff --git a/projects/hotswap/tests/hotswap_loader_test.cpp b/projects/hotswap/tests/hotswap_loader_test.cpp index 4e5373bba4d0..ccbd6778b2e9 100644 --- a/projects/hotswap/tests/hotswap_loader_test.cpp +++ b/projects/hotswap/tests/hotswap_loader_test.cpp @@ -45,6 +45,9 @@ FakeEnv g_env; namespace rocr::hotswap { +// The production helper uses COMGR to read a real code object's ISA metadata. +// This test double only supplies a source ISA so the test can stay focused on +// loader policy without linking COMGR. std::string GetCodeObjectIsaName(const void *elf_data, size_t elf_size) { if (!elf_data || elf_size == 0) { return {}; From 605a93f10a952a9a9050f7aab490cdf842287d92 Mon Sep 17 00:00:00 2001 From: harsh-amd <149193359+harsh-amd@users.noreply.github.com> Date: Sat, 27 Jun 2026 11:38:44 -0700 Subject: [PATCH 09/16] hotswap: centralize rewrite policy decisions --- projects/hotswap/CMakeLists.txt | 12 +- projects/hotswap/hotswap_gfx_query.cpp | 39 ------ projects/hotswap/hotswap_gfx_query.hpp | 23 +--- projects/hotswap/hotswap_rewrite_policy.cpp | 114 ++++++++++++++++++ projects/hotswap/hotswap_rewrite_policy.hpp | 71 +++++++++++ projects/hotswap/hotswap_tool.cpp | 75 +++++------- .../hotswap/tests/hotswap_loader_test.cpp | 11 +- projects/hotswap/tests/hotswap_tool_test.cpp | 101 ++++++++++++---- 8 files changed, 308 insertions(+), 138 deletions(-) create mode 100644 projects/hotswap/hotswap_rewrite_policy.cpp create mode 100644 projects/hotswap/hotswap_rewrite_policy.hpp diff --git a/projects/hotswap/CMakeLists.txt b/projects/hotswap/CMakeLists.txt index a4de359351e4..5934ffa3a705 100644 --- a/projects/hotswap/CMakeLists.txt +++ b/projects/hotswap/CMakeLists.txt @@ -30,6 +30,7 @@ add_library(hsa-hotswap SHARED hotswap_tool.cpp hotswap.cpp hotswap_gfx_query.cpp + hotswap_rewrite_policy.cpp ${HOTSWAP_PLATFORM_IO_SRC} ) @@ -68,14 +69,14 @@ file(WRITE "${_hotswap_co_hdr}" "static const unsigned char kGfx1250MinCo[] = {${_hotswap_co_arr}};\n") target_include_directories(hotswap_test PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) -# Unit tests for the gfx-target / ASIC-revision query logic. The test compiles -# the portable hotswap_gfx_query.cpp unit alongside the test translation unit -# and supplies its own stubs for the HSA entry points, so this target needs -# the relevant headers but must NOT link the real HSA library (doing so would -# clash with the in-file stubs). +# Unit tests for gfx-target / ASIC-revision query logic and rewrite-policy +# decisions. The test supplies its own HSA stubs, so this target needs the +# relevant headers but must NOT link the real HSA library (doing so would clash +# with the in-file stubs). add_executable(hotswap_tool_test tests/hotswap_tool_test.cpp hotswap_gfx_query.cpp + hotswap_rewrite_policy.cpp ) target_include_directories(hotswap_tool_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} @@ -89,6 +90,7 @@ target_include_directories(hotswap_tool_test PRIVATE add_executable(hotswap_loader_test tests/hotswap_loader_test.cpp hotswap_gfx_query.cpp + hotswap_rewrite_policy.cpp ${HOTSWAP_PLATFORM_IO_SRC} ) target_include_directories(hotswap_loader_test PRIVATE diff --git a/projects/hotswap/hotswap_gfx_query.cpp b/projects/hotswap/hotswap_gfx_query.cpp index 5432e35bcf1a..456c9458ed99 100644 --- a/projects/hotswap/hotswap_gfx_query.cpp +++ b/projects/hotswap/hotswap_gfx_query.cpp @@ -8,7 +8,6 @@ #include #include -#include #include #include #include @@ -60,20 +59,6 @@ std::mutex g_cache_mutex; std::unordered_map g_cache; } // namespace -bool is_gfx12_5_entry_trampoline_target(const std::string &gfx_target) { - constexpr char Gfx125Prefix[] = "gfx125"; - constexpr size_t Gfx125PrefixLen = sizeof(Gfx125Prefix) - 1; - if (gfx_target == "gfx12-5-generic") { - return true; - } - if (gfx_target.size() <= Gfx125PrefixLen || - gfx_target.compare(0, Gfx125PrefixLen, Gfx125Prefix) != 0) { - return false; - } - return std::all_of(gfx_target.begin() + Gfx125PrefixLen, gfx_target.end(), - [](unsigned char c) { return std::isdigit(c); }); -} - AgentGfxRevision query_agent_gfx_revision(hsa_agent_t agent) { { std::scoped_lock lock(g_cache_mutex); @@ -111,28 +96,4 @@ void reset_gfx_revision_cache() { g_cache.clear(); } -bool gate_allows_hotswap(const AgentGfxRevision &gfx) { - return gfx.revision_valid && gfx.gfx_target == "gfx1250" && - gfx.asic_revision == 0; // A0 -} - -bool gate_allows_hotswap_rewrite(const AgentGfxRevision &gfx, - bool entry_trampolines_requested) { - if (gate_allows_hotswap(gfx)) { - return true; - } - return entry_trampolines_requested && - is_gfx12_5_entry_trampoline_target(gfx.gfx_target); -} - -std::string add_gfx1250_stepping_feature(const std::string &isa_name, - bool is_b0) { - if (extract_gfx_target(isa_name) != "gfx1250" || - isa_name.find(":gfx1250-b0-specific+") != std::string::npos || - isa_name.find(":gfx1250-b0-specific-") != std::string::npos) { - return isa_name; - } - return isa_name + (is_b0 ? ":gfx1250-b0-specific+" : ":gfx1250-b0-specific-"); -} - } // namespace rocr::hotswap diff --git a/projects/hotswap/hotswap_gfx_query.hpp b/projects/hotswap/hotswap_gfx_query.hpp index 959d203ed350..3d2961802ed0 100644 --- a/projects/hotswap/hotswap_gfx_query.hpp +++ b/projects/hotswap/hotswap_gfx_query.hpp @@ -41,32 +41,13 @@ std::string extract_gfx_target(const std::string &isa_name); // Queries the agent's gfx target and ASIC revision via the HSA runtime. The // result is cached per agent handle, since code-object loads can be frequent. -// This function intentionally encodes no gating policy; callers apply -// gate_allows_hotswap() (below) to decide whether to act. +// This function intentionally encodes no rewrite policy; callers apply the +// policy in hotswap_rewrite_policy.{hpp,cpp}. AgentGfxRevision query_agent_gfx_revision(hsa_agent_t agent); // Clears the per-agent-handle cache used by query_agent_gfx_revision(). void reset_gfx_revision_cache(); -// HotSwap's default activation policy: B0-to-A0 rewriting is performed only -// for gfx1250 silicon at ASIC revision A0 (and only when the revision was -// successfully queried). -bool gate_allows_hotswap(const AgentGfxRevision &gfx); - -// True for COMGR entry-trampoline targets: gfx12-5-generic or concrete gfx125 -// processor names with a numeric suffix (for example, gfx1250 or gfx1251). -bool is_gfx12_5_entry_trampoline_target(const std::string &gfx_target); - -// Combined activation policy for libhsa-hotswap.so. -bool gate_allows_hotswap_rewrite(const AgentGfxRevision &gfx, - bool entry_trampolines_requested); - -// Adds COMGR's hotswap-local gfx1250 stepping feature to an ISA name while -// preserving any existing feature suffixes. Non-gfx1250 ISA names and ISA -// names that already carry the stepping feature are returned unchanged. -std::string add_gfx1250_stepping_feature(const std::string &isa_name, - bool is_b0); - } // namespace rocr::hotswap #endif // ROCR_HOTSWAP_GFX_QUERY_HPP diff --git a/projects/hotswap/hotswap_rewrite_policy.cpp b/projects/hotswap/hotswap_rewrite_policy.cpp new file mode 100644 index 000000000000..783b9e7cf641 --- /dev/null +++ b/projects/hotswap/hotswap_rewrite_policy.cpp @@ -0,0 +1,114 @@ +//===- hotswap_rewrite_policy.cpp - HotSwap rewrite decision policy -------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "hotswap_rewrite_policy.hpp" + +#include +#include +#include + +namespace rocr::hotswap { + +const char *rewrite_kind_name(RewriteKind kind) { + switch (kind) { + case RewriteKind::None: + return "none"; + case RewriteKind::Gfx1250A0Patch: + return "gfx1250-a0-patch"; + case RewriteKind::Gfx12_5EntryTrampoline: + return "gfx12.5-entry-trampoline"; + } + return "unknown"; +} + +bool gate_allows_hotswap(const AgentGfxRevision &gfx) { + return gfx.revision_valid && gfx.gfx_target == "gfx1250" && + gfx.asic_revision == 0; // A0 +} + +bool is_gfx12_5_entry_trampoline_target(const std::string &gfx_target) { + constexpr char Gfx125Prefix[] = "gfx125"; + constexpr size_t Gfx125PrefixLen = sizeof(Gfx125Prefix) - 1; + if (gfx_target == "gfx12-5-generic") { + return true; + } + if (gfx_target.size() <= Gfx125PrefixLen || + gfx_target.compare(0, Gfx125PrefixLen, Gfx125Prefix) != 0) { + return false; + } + return std::all_of(gfx_target.begin() + Gfx125PrefixLen, gfx_target.end(), + [](unsigned char c) { return std::isdigit(c); }); +} + +bool has_candidate_hotswap_rewrite(const AgentGfxRevision &gfx, + const RewriteOptions &options) { + return gate_allows_hotswap(gfx) || + (options.entry_trampolines_requested && + is_gfx12_5_entry_trampoline_target(gfx.gfx_target)); +} + +std::string add_gfx1250_stepping_feature(const std::string &isa_name, + bool is_b0) { + if (extract_gfx_target(isa_name) != "gfx1250" || + isa_name.find(":gfx1250-b0-specific+") != std::string::npos || + isa_name.find(":gfx1250-b0-specific-") != std::string::npos) { + return isa_name; + } + return isa_name + (is_b0 ? ":gfx1250-b0-specific+" : ":gfx1250-b0-specific-"); +} + +RewriteDecision decide_hotswap_rewrite(const AgentGfxRevision &gfx, + const std::string &source_isa, + const std::string &target_isa, + const RewriteOptions &options) { + RewriteDecision decision; + decision.source_isa = source_isa; + decision.target_isa = target_isa; + + if (source_isa.empty() || target_isa.empty()) { + return decision; + } + + std::string source_gfx = extract_gfx_target(source_isa); + std::string target_gfx = extract_gfx_target(target_isa); + + if (gate_allows_hotswap(gfx) && source_gfx == "gfx1250" && + target_gfx == "gfx1250") { + decision.kind = RewriteKind::Gfx1250A0Patch; + decision.source_isa = add_gfx1250_stepping_feature(source_isa, true); + decision.target_isa = add_gfx1250_stepping_feature(target_isa, false); + return decision; + } + + if (!options.entry_trampolines_requested || + !is_gfx12_5_entry_trampoline_target(gfx.gfx_target) || + !is_gfx12_5_entry_trampoline_target(source_gfx) || + !is_gfx12_5_entry_trampoline_target(target_gfx)) { + return decision; + } + + decision.kind = RewriteKind::Gfx12_5EntryTrampoline; + + // COMGR's entry-trampoline pass accepts gfx12-5-generic. When one side is + // generic, keep the COMGR target on the source processor so this path does not + // become processor retargeting. + if (source_gfx != target_gfx && + (source_gfx == "gfx12-5-generic" || target_gfx == "gfx12-5-generic")) { + decision.target_isa = source_isa; + target_gfx = source_gfx; + } + + if (source_gfx == "gfx1250" && target_gfx == "gfx1250") { + decision.source_isa = add_gfx1250_stepping_feature(source_isa, true); + decision.target_isa = + add_gfx1250_stepping_feature(decision.target_isa, + !gate_allows_hotswap(gfx)); + } + + return decision; +} + +} // namespace rocr::hotswap diff --git a/projects/hotswap/hotswap_rewrite_policy.hpp b/projects/hotswap/hotswap_rewrite_policy.hpp new file mode 100644 index 000000000000..368cee39246c --- /dev/null +++ b/projects/hotswap/hotswap_rewrite_policy.hpp @@ -0,0 +1,71 @@ +//===- hotswap_rewrite_policy.hpp - HotSwap rewrite decision policy -------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Centralized policy for selecting HotSwap rewrite modes and the COMGR +// source/target ISA pair. The HSA tools loader owns interception and fallback; +// this unit owns the conditions under which a rewrite is requested. +// +//===----------------------------------------------------------------------===// + +#ifndef ROCR_HOTSWAP_REWRITE_POLICY_HPP +#define ROCR_HOTSWAP_REWRITE_POLICY_HPP + +#include "hotswap_gfx_query.hpp" + +#include + +namespace rocr::hotswap { + +enum class RewriteKind { + None, + Gfx1250A0Patch, + Gfx12_5EntryTrampoline, +}; + +struct RewriteOptions { + bool entry_trampolines_requested = false; +}; + +struct RewriteDecision { + RewriteKind kind = RewriteKind::None; + std::string source_isa; + std::string target_isa; + + bool should_rewrite() const { return kind != RewriteKind::None; } +}; + +const char *rewrite_kind_name(RewriteKind kind); + +// HotSwap's default activation policy: B0-to-A0 rewriting is performed only +// for gfx1250 silicon at ASIC revision A0 (and only when the revision was +// successfully queried). +bool gate_allows_hotswap(const AgentGfxRevision &gfx); + +// True for COMGR entry-trampoline targets: gfx12-5-generic or concrete gfx125 +// processor names with a numeric suffix (for example, gfx1250 or gfx1251). +bool is_gfx12_5_entry_trampoline_target(const std::string &gfx_target); + +// Agent-level precheck used by the loader to avoid source-ISA parsing when no +// rewrite mode can possibly apply. +bool has_candidate_hotswap_rewrite(const AgentGfxRevision &gfx, + const RewriteOptions &options); + +// Adds COMGR's hotswap-local gfx1250 stepping feature to an ISA name while +// preserving any existing feature suffixes. Non-gfx1250 ISA names and ISA +// names that already carry the stepping feature are returned unchanged. +std::string add_gfx1250_stepping_feature(const std::string &isa_name, + bool is_b0); + +// Returns the rewrite mode and COMGR ISA pair for this load, or RewriteKind::None +// when the original code object should be loaded unchanged. +RewriteDecision decide_hotswap_rewrite(const AgentGfxRevision &gfx, + const std::string &source_isa, + const std::string &target_isa, + const RewriteOptions &options); + +} // namespace rocr::hotswap + +#endif // ROCR_HOTSWAP_REWRITE_POLICY_HPP diff --git a/projects/hotswap/hotswap_tool.cpp b/projects/hotswap/hotswap_tool.cpp index 9b2f81292d01..29c2a6498696 100644 --- a/projects/hotswap/hotswap_tool.cpp +++ b/projects/hotswap/hotswap_tool.cpp @@ -16,6 +16,7 @@ #include "hotswap.hpp" #include "hotswap_gfx_query.hpp" #include "hotswap_platform_io.hpp" +#include "hotswap_rewrite_policy.hpp" #include #include #include @@ -41,14 +42,14 @@ namespace { namespace hotswap_io = rocr::hotswap::platform_io; -using rocr::hotswap::add_gfx1250_stepping_feature; using rocr::hotswap::AgentGfxRevision; -using rocr::hotswap::extract_gfx_target; -using rocr::hotswap::gate_allows_hotswap; -using rocr::hotswap::gate_allows_hotswap_rewrite; +using rocr::hotswap::decide_hotswap_rewrite; using rocr::hotswap::get_agent_isa_name; -using rocr::hotswap::is_gfx12_5_entry_trampoline_target; +using rocr::hotswap::has_candidate_hotswap_rewrite; using rocr::hotswap::query_agent_gfx_revision; +using rocr::hotswap::RewriteDecision; +using rocr::hotswap::RewriteOptions; +using rocr::hotswap::rewrite_kind_name; using ByteVec = std::shared_ptr>; using OwnedElf = std::unique_ptr; @@ -267,49 +268,21 @@ hsa_status_t try_retarget_and_load(hsa_executable_t executable, hsa_agent_t agen const char *options, hsa_loaded_code_object_t *loaded_code_object, const ByteVec &local_bytes, - const AgentGfxRevision &gfx, - bool entry_trampolines) { - // Source ISA from the code object, target ISA from the running GPU. - std::string source_isa = rocr::hotswap::GetCodeObjectIsaName( - local_bytes->data(), local_bytes->size()); - std::string target_isa = get_agent_isa_name(agent); - if (source_isa.empty() || target_isa.empty()) { - HOTSWAP_LOG("hotswap: rewrite SKIP empty isa (src='%s' tgt='%s' size=%zu)\n", - source_isa.c_str(), target_isa.c_str(), local_bytes->size()); - return HSA_STATUS_ERROR_INVALID_CODE_OBJECT; - } - - std::string source_gfx = extract_gfx_target(source_isa); - std::string target_gfx = extract_gfx_target(target_isa); - // COMGR's entry-trampoline pass accepts gfx12-5-generic. Keep a generic - // source generic even on a concrete gfx125* agent so this path does not turn - // into processor retargeting. - if (entry_trampolines && source_gfx != target_gfx && - (source_gfx == "gfx12-5-generic" || target_gfx == "gfx12-5-generic") && - is_gfx12_5_entry_trampoline_target(source_gfx) && - is_gfx12_5_entry_trampoline_target(target_gfx)) { - target_isa = source_isa; - target_gfx = source_gfx; - } - - if (source_gfx == "gfx1250" && target_gfx == "gfx1250") { - source_isa = add_gfx1250_stepping_feature(source_isa, true); - target_isa = - add_gfx1250_stepping_feature(target_isa, !gate_allows_hotswap(gfx)); - } - + const RewriteDecision &decision) { // Route through RetargetCodeObject for unified logging, validation, // and COMGR interaction. Do NOT skip when source == target: B0-to-A0 // patching uses the same ISA name on both sides. void *out_elf = nullptr; size_t out_elf_size = 0; const int rc = rocr::hotswap::RetargetCodeObject( - local_bytes->data(), local_bytes->size(), source_isa.c_str(), - target_isa.c_str(), &out_elf, &out_elf_size); + local_bytes->data(), local_bytes->size(), decision.source_isa.c_str(), + decision.target_isa.c_str(), &out_elf, &out_elf_size); - HOTSWAP_LOG("hotswap: rewrite src=%s tgt=%s in=%zu rc=%d out=%zu changed=%d\n", - source_isa.c_str(), target_isa.c_str(), local_bytes->size(), rc, - out_elf_size, out_elf != local_bytes->data()); + HOTSWAP_LOG("hotswap: rewrite kind=%s src=%s tgt=%s in=%zu rc=%d out=%zu " + "changed=%d\n", + rewrite_kind_name(decision.kind), decision.source_isa.c_str(), + decision.target_isa.c_str(), local_bytes->size(), rc, out_elf_size, + out_elf != local_bytes->data()); if (rc != 0 || out_elf == local_bytes->data()) { return HSA_STATUS_ERROR_INVALID_CODE_OBJECT; @@ -345,8 +318,8 @@ hsa_status_t HSA_API hotswap_load_agent_code_object( } const AgentGfxRevision gfx = query_agent_gfx_revision(agent); - const bool entry_trampolines = entry_trampolines_requested(); - if (!gate_allows_hotswap_rewrite(gfx, entry_trampolines)) { + const RewriteOptions rewrite_options{entry_trampolines_requested()}; + if (!has_candidate_hotswap_rewrite(gfx, rewrite_options)) { HOTSWAP_LOG("hotswap: gate BLOCKED (gfx=%s rev=%u valid=%d)\n", gfx.gfx_target.c_str(), gfx.asic_revision, gfx.revision_valid); return load_original_reader(executable, agent, code_object_reader, @@ -354,9 +327,21 @@ hsa_status_t HSA_API hotswap_load_agent_code_object( reader_from_file); } + const std::string source_isa = rocr::hotswap::GetCodeObjectIsaName( + local_bytes->data(), local_bytes->size()); + const std::string target_isa = get_agent_isa_name(agent); + const RewriteDecision decision = + decide_hotswap_rewrite(gfx, source_isa, target_isa, rewrite_options); + if (!decision.should_rewrite()) { + HOTSWAP_LOG("hotswap: decision NONE (gfx=%s src='%s' tgt='%s')\n", + gfx.gfx_target.c_str(), source_isa.c_str(), target_isa.c_str()); + return load_original_reader(executable, agent, code_object_reader, + options, loaded_code_object, + reader_from_file); + } + const hsa_status_t status = try_retarget_and_load( - executable, agent, options, loaded_code_object, local_bytes, gfx, - entry_trampolines); + executable, agent, options, loaded_code_object, local_bytes, decision); if (status == HSA_STATUS_SUCCESS) { return status; } diff --git a/projects/hotswap/tests/hotswap_loader_test.cpp b/projects/hotswap/tests/hotswap_loader_test.cpp index ccbd6778b2e9..acf6b7bb45d9 100644 --- a/projects/hotswap/tests/hotswap_loader_test.cpp +++ b/projects/hotswap/tests/hotswap_loader_test.cpp @@ -393,10 +393,13 @@ void test_A0RetargetsWithoutFlag() { void test_FlagOneBlocksNonGfx12_5() { begin_test("FlagOneBlocksNonGfx12_5", - "The trampoline flag must not become a global rewrite enable."); - const LoadResult result = load_once(kGfx942Isa, kGfx942Isa, "1", 0); - check_original_load(result, - "entry trampoline flag does not route non-gfx12.5"); + "The trampoline flag must not become a global rewrite enable for " + "unsupported agents or source code objects."); + LoadResult result = load_once(kGfx942Isa, kGfx942Isa, "1", 0); + check_original_load(result, "non-gfx12.5 agent does not route"); + + result = load_once(kGfx942Isa, kGfx1251Isa, "1"); + check_original_load(result, "non-gfx12.5 source does not route"); } void test_RetargetFailureFallsBackToOriginalReader() { diff --git a/projects/hotswap/tests/hotswap_tool_test.cpp b/projects/hotswap/tests/hotswap_tool_test.cpp index 3977b45cbed6..09ad81543ff2 100644 --- a/projects/hotswap/tests/hotswap_tool_test.cpp +++ b/projects/hotswap/tests/hotswap_tool_test.cpp @@ -4,16 +4,12 @@ // //===----------------------------------------------------------------------===// // -// Unit tests for query_agent_gfx_revision() in hotswap_gfx_query.cpp, plus the -// gfx1250-A0 gate that hotswap_load_agent_code_object() applies on top of it. +// Unit tests for query_agent_gfx_revision() in hotswap_gfx_query.cpp and the +// rewrite decisions selected by hotswap_rewrite_policy.cpp. // -// The query helpers — query_agent_gfx_revision(), extract_gfx_target() and the -// AgentGfxRevision type — live in their own translation unit (compiled in -// alongside this test), so the test includes only the small -// hotswap_gfx_query.hpp header. The HSA entry -// points the query calls are replaced with in-file stubs (linked in place of -// the real libraries) so the query can be driven entirely from the test without -// GPU hardware: +// The HSA entry points used by the query helper are replaced with in-file stubs +// (linked in place of the real libraries) so query and policy behavior can be +// driven entirely from the test without GPU hardware: // // * ISA name <- hsa_agent_iterate_isas / hsa_isa_get_info_alt // * ASIC revision <- hsa_agent_get_info(HSA_AMD_AGENT_INFO_ASIC_REVISION) @@ -45,16 +41,21 @@ struct FakeEnv { FakeEnv g_env; } // namespace -// The unit under test (brings in hsa.h and the query helper declarations). +// The units under test (bring in HSA query helpers and rewrite policy). #include "hotswap_gfx_query.hpp" +#include "hotswap_rewrite_policy.hpp" using rocr::hotswap::AgentGfxRevision; using rocr::hotswap::add_gfx1250_stepping_feature; +using rocr::hotswap::decide_hotswap_rewrite; using rocr::hotswap::gate_allows_hotswap; -using rocr::hotswap::gate_allows_hotswap_rewrite; +using rocr::hotswap::has_candidate_hotswap_rewrite; using rocr::hotswap::is_gfx12_5_entry_trampoline_target; using rocr::hotswap::query_agent_gfx_revision; using rocr::hotswap::reset_gfx_revision_cache; +using rocr::hotswap::RewriteDecision; +using rocr::hotswap::RewriteKind; +using rocr::hotswap::RewriteOptions; // --------------------------------------------------------------------------- // Stubs replacing the real HSA symbols referenced by the tool. @@ -146,9 +147,8 @@ const char *kGfx12_5GenericIsa = "amdgcn-amd-amdhsa--gfx12-5-generic"; const char *kGfx12_5GenericIsaWithFeatures = "amdgcn-amd-amdhsa--gfx12-5-generic:sramecc+"; -// The gate applied in hotswap_load_agent_code_object() is the shared -// rocr::hotswap::gate_allows_hotswap(), exercised directly below so the tests -// and the tool can never drift apart. +// The rewrite-policy helpers are exercised directly below so the tests and the +// loader can never drift apart. // gfx1250 silicon at ASIC revision A0 -> parsed target + revision, gate passes. void test_Gfx1250A0Passes() { @@ -230,9 +230,9 @@ void test_EntryTrampolineFlagAllowsGfx1250NonA0() { g_env.asic_revision = 1; // A1/B0-side path, not A0. const AgentGfxRevision g = query_agent_gfx_revision(fresh_agent()); run("default gate blocks non-A0", - gate_allows_hotswap_rewrite(g, false) == false); + has_candidate_hotswap_rewrite(g, RewriteOptions{false}) == false); run("trampoline gate allows gfx1250 non-A0", - gate_allows_hotswap_rewrite(g, true) == true); + has_candidate_hotswap_rewrite(g, RewriteOptions{true}) == true); } // The explicit trampoline gate covers the broader gfx125* family, while the @@ -244,9 +244,9 @@ void test_EntryTrampolineFlagAllowsGfx125Family() { g_env.asic_revision = 1; const AgentGfxRevision g = query_agent_gfx_revision(fresh_agent()); run("default gate blocks gfx1251", - gate_allows_hotswap_rewrite(g, false) == false); + has_candidate_hotswap_rewrite(g, RewriteOptions{false}) == false); run("trampoline gate allows gfx1251", - gate_allows_hotswap_rewrite(g, true) == true); + has_candidate_hotswap_rewrite(g, RewriteOptions{true}) == true); } void test_EntryTrampolineFlagRejectsMalformedGfx125Prefix() { @@ -260,7 +260,7 @@ void test_EntryTrampolineFlagRejectsMalformedGfx125Prefix() { run("trampoline family predicate rejects malformed suffix", is_gfx12_5_entry_trampoline_target(g.gfx_target) == false); run("trampoline gate rejects malformed gfx125 prefix", - gate_allows_hotswap_rewrite(g, true) == false); + has_candidate_hotswap_rewrite(g, RewriteOptions{true}) == false); } // COMGR's entry-trampoline pass also accepts the gfx12.5 generic processor @@ -272,9 +272,9 @@ void test_EntryTrampolineFlagAllowsGfx12_5Generic() { g_env.asic_revision = 1; const AgentGfxRevision g = query_agent_gfx_revision(fresh_agent()); run("default gate blocks gfx12-5-generic", - gate_allows_hotswap_rewrite(g, false) == false); + has_candidate_hotswap_rewrite(g, RewriteOptions{false}) == false); run("trampoline gate allows gfx12-5-generic", - gate_allows_hotswap_rewrite(g, true) == true); + has_candidate_hotswap_rewrite(g, RewriteOptions{true}) == true); } // If ASIC revision cannot be queried, the explicit trampoline flag can still @@ -288,9 +288,9 @@ void test_EntryTrampolineFlagAllowsGfx1250UnknownRevision() { g_env.asic_rev_ok = false; const AgentGfxRevision g = query_agent_gfx_revision(fresh_agent()); run("default gate blocks unknown revision", - gate_allows_hotswap_rewrite(g, false) == false); + has_candidate_hotswap_rewrite(g, RewriteOptions{false}) == false); run("trampoline gate allows gfx1250 unknown revision", - gate_allows_hotswap_rewrite(g, true) == true); + has_candidate_hotswap_rewrite(g, RewriteOptions{true}) == true); } // The trampoline flag is not a global rewrite enable; non-gfx12.5 targets still @@ -302,7 +302,59 @@ void test_EntryTrampolineFlagBlocksOtherTargets() { g_env.asic_revision = 0; const AgentGfxRevision g = query_agent_gfx_revision(fresh_agent()); run("trampoline gate blocks gfx942", - gate_allows_hotswap_rewrite(g, true) == false); + has_candidate_hotswap_rewrite(g, RewriteOptions{true}) == false); +} + +void test_RewriteDecisionSelectsModeAndIsaPair() { + printf("TEST RewriteDecisionSelectsModeAndIsaPair...\n"); + + AgentGfxRevision gfx1250_a0; + gfx1250_a0.gfx_target = "gfx1250"; + gfx1250_a0.revision_valid = true; + gfx1250_a0.asic_revision = 0; + RewriteDecision d = + decide_hotswap_rewrite(gfx1250_a0, kGfx1250Isa, kGfx1250Isa, + RewriteOptions{false}); + run("A0 gfx1250 selects default patch", + d.kind == RewriteKind::Gfx1250A0Patch); + run("A0 patch uses B0 source", + d.source_isa == std::string(kGfx1250Isa) + ":gfx1250-b0-specific+"); + run("A0 patch uses A0 target", + d.target_isa == std::string(kGfx1250Isa) + ":gfx1250-b0-specific-"); + + AgentGfxRevision gfx1250_b0; + gfx1250_b0.gfx_target = "gfx1250"; + gfx1250_b0.revision_valid = true; + gfx1250_b0.asic_revision = 1; + d = decide_hotswap_rewrite(gfx1250_b0, kGfx1250Isa, kGfx1250Isa, + RewriteOptions{true}); + run("flagged non-A0 gfx1250 selects trampoline", + d.kind == RewriteKind::Gfx12_5EntryTrampoline); + run("trampoline keeps gfx1250 target B0", + d.target_isa == std::string(kGfx1250Isa) + ":gfx1250-b0-specific+"); + + AgentGfxRevision gfx1251; + gfx1251.gfx_target = "gfx1251"; + gfx1251.revision_valid = true; + gfx1251.asic_revision = 1; + d = decide_hotswap_rewrite(gfx1251, kGfx1251Isa, kGfx1251Isa, + RewriteOptions{true}); + run("flagged gfx1251 selects trampoline", + d.kind == RewriteKind::Gfx12_5EntryTrampoline); + run("gfx1251 ISA pair is preserved", + d.source_isa == kGfx1251Isa && d.target_isa == kGfx1251Isa); + + d = decide_hotswap_rewrite(gfx1251, kGfx12_5GenericIsa, kGfx1251Isa, + RewriteOptions{true}); + run("generic source on concrete gfx125 agent selects trampoline", + d.kind == RewriteKind::Gfx12_5EntryTrampoline); + run("generic source keeps generic target", + d.source_isa == kGfx12_5GenericIsa && d.target_isa == kGfx12_5GenericIsa); + + d = decide_hotswap_rewrite(gfx1251, kGfx942Isa, kGfx1251Isa, + RewriteOptions{true}); + run("non-gfx12.5 source on gfx125 agent is not rewritten", + d.kind == RewriteKind::None); } void test_AddGfx1250SteppingFeature() { @@ -386,6 +438,7 @@ int main() { test_EntryTrampolineFlagAllowsGfx12_5Generic(); test_EntryTrampolineFlagAllowsGfx1250UnknownRevision(); test_EntryTrampolineFlagBlocksOtherTargets(); + test_RewriteDecisionSelectsModeAndIsaPair(); test_AddGfx1250SteppingFeature(); test_AsicRevisionQueryFailure(); test_ResultIsCachedPerHandle(); From 3a66961f5d8550ecc18bc0ee9422a0729b336699 Mon Sep 17 00:00:00 2001 From: harsh-amd <149193359+harsh-amd@users.noreply.github.com> Date: Sat, 27 Jun 2026 11:47:30 -0700 Subject: [PATCH 10/16] hotswap: avoid trampoline processor retargeting --- projects/hotswap/hotswap_rewrite_policy.cpp | 14 ++++++-------- projects/hotswap/tests/hotswap_loader_test.cpp | 15 +++++++++++++++ projects/hotswap/tests/hotswap_tool_test.cpp | 8 ++++++++ 3 files changed, 29 insertions(+), 8 deletions(-) diff --git a/projects/hotswap/hotswap_rewrite_policy.cpp b/projects/hotswap/hotswap_rewrite_policy.cpp index 783b9e7cf641..328dbe3c354d 100644 --- a/projects/hotswap/hotswap_rewrite_policy.cpp +++ b/projects/hotswap/hotswap_rewrite_policy.cpp @@ -92,14 +92,12 @@ RewriteDecision decide_hotswap_rewrite(const AgentGfxRevision &gfx, decision.kind = RewriteKind::Gfx12_5EntryTrampoline; - // COMGR's entry-trampoline pass accepts gfx12-5-generic. When one side is - // generic, keep the COMGR target on the source processor so this path does not - // become processor retargeting. - if (source_gfx != target_gfx && - (source_gfx == "gfx12-5-generic" || target_gfx == "gfx12-5-generic")) { - decision.target_isa = source_isa; - target_gfx = source_gfx; - } + // ROCm/rocm-systems#7581 installs kernel-entry trampolines after the normal + // loader compatibility checks and keys the work off the code object's ISA, not + // a source->agent retarget. Mirror that here by keeping COMGR's target on the + // source processor for all gfx12.5 entry-trampoline rewrites. + decision.target_isa = source_isa; + target_gfx = source_gfx; if (source_gfx == "gfx1250" && target_gfx == "gfx1250") { decision.source_isa = add_gfx1250_stepping_feature(source_isa, true); diff --git a/projects/hotswap/tests/hotswap_loader_test.cpp b/projects/hotswap/tests/hotswap_loader_test.cpp index acf6b7bb45d9..a9cec9598599 100644 --- a/projects/hotswap/tests/hotswap_loader_test.cpp +++ b/projects/hotswap/tests/hotswap_loader_test.cpp @@ -378,6 +378,20 @@ void test_GenericSourceUsesGenericTarget() { "generic target ISA is preserved to avoid processor retargeting"); } +void test_ConcreteSourceUsesSourceTarget() { + begin_test("ConcreteSourceUsesSourceTarget", + "A concrete gfx125* source loaded on a different gfx125* agent " + "must stay on the source processor to avoid retargeting."); + const LoadResult result = load_once(kGfx1250Isa, kGfx1251Isa, "1"); + check(result.status == HSA_STATUS_SUCCESS, "load succeeds"); + check(result.retarget_calls == 1, + "concrete source on different gfx125 agent routes through COMGR"); + check(result.source_isa == kGfx1250B0Isa, + "source ISA is tagged as B0"); + check(result.target_isa == kGfx1250B0Isa, + "target ISA stays on the source processor"); +} + void test_A0RetargetsWithoutFlag() { begin_test("A0RetargetsWithoutFlag", "The existing gfx1250 A0 rewrite path must still run without " @@ -420,6 +434,7 @@ int main() { test_FlagRoutesGfx1250B0(); test_FlagRoutesGfx12_5Family(); test_GenericSourceUsesGenericTarget(); + test_ConcreteSourceUsesSourceTarget(); test_A0RetargetsWithoutFlag(); test_FlagOneBlocksNonGfx12_5(); test_RetargetFailureFallsBackToOriginalReader(); diff --git a/projects/hotswap/tests/hotswap_tool_test.cpp b/projects/hotswap/tests/hotswap_tool_test.cpp index 09ad81543ff2..cd967dd0e5f5 100644 --- a/projects/hotswap/tests/hotswap_tool_test.cpp +++ b/projects/hotswap/tests/hotswap_tool_test.cpp @@ -351,6 +351,14 @@ void test_RewriteDecisionSelectsModeAndIsaPair() { run("generic source keeps generic target", d.source_isa == kGfx12_5GenericIsa && d.target_isa == kGfx12_5GenericIsa); + d = decide_hotswap_rewrite(gfx1251, kGfx1250Isa, kGfx1251Isa, + RewriteOptions{true}); + run("concrete source on different gfx125 agent selects trampoline", + d.kind == RewriteKind::Gfx12_5EntryTrampoline); + run("concrete mismatch keeps source processor", + d.source_isa == std::string(kGfx1250Isa) + ":gfx1250-b0-specific+" && + d.target_isa == d.source_isa); + d = decide_hotswap_rewrite(gfx1251, kGfx942Isa, kGfx1251Isa, RewriteOptions{true}); run("non-gfx12.5 source on gfx125 agent is not rewritten", From 18dd66c03b9bda4f638f05646c3d9b067a4822b9 Mon Sep 17 00:00:00 2001 From: harsh-amd <149193359+harsh-amd@users.noreply.github.com> Date: Sat, 27 Jun 2026 11:57:46 -0700 Subject: [PATCH 11/16] hotswap: clarify rewrite decision edge cases --- projects/hotswap/hotswap_rewrite_policy.cpp | 9 +- projects/hotswap/hotswap_rewrite_policy.hpp | 6 +- .../hotswap/tests/hotswap_loader_test.cpp | 14 +++ projects/hotswap/tests/hotswap_tool_test.cpp | 98 +++++++++++++++---- 4 files changed, 102 insertions(+), 25 deletions(-) diff --git a/projects/hotswap/hotswap_rewrite_policy.cpp b/projects/hotswap/hotswap_rewrite_policy.cpp index 328dbe3c354d..da162662d162 100644 --- a/projects/hotswap/hotswap_rewrite_policy.cpp +++ b/projects/hotswap/hotswap_rewrite_policy.cpp @@ -18,6 +18,8 @@ const char *rewrite_kind_name(RewriteKind kind) { return "none"; case RewriteKind::Gfx1250A0Patch: return "gfx1250-a0-patch"; + case RewriteKind::Gfx1250A0PatchWithEntryTrampoline: + return "gfx1250-a0-patch+gfx12.5-entry-trampoline"; case RewriteKind::Gfx12_5EntryTrampoline: return "gfx12.5-entry-trampoline"; } @@ -77,7 +79,9 @@ RewriteDecision decide_hotswap_rewrite(const AgentGfxRevision &gfx, if (gate_allows_hotswap(gfx) && source_gfx == "gfx1250" && target_gfx == "gfx1250") { - decision.kind = RewriteKind::Gfx1250A0Patch; + decision.kind = options.entry_trampolines_requested + ? RewriteKind::Gfx1250A0PatchWithEntryTrampoline + : RewriteKind::Gfx1250A0Patch; decision.source_isa = add_gfx1250_stepping_feature(source_isa, true); decision.target_isa = add_gfx1250_stepping_feature(target_isa, false); return decision; @@ -97,9 +101,8 @@ RewriteDecision decide_hotswap_rewrite(const AgentGfxRevision &gfx, // a source->agent retarget. Mirror that here by keeping COMGR's target on the // source processor for all gfx12.5 entry-trampoline rewrites. decision.target_isa = source_isa; - target_gfx = source_gfx; - if (source_gfx == "gfx1250" && target_gfx == "gfx1250") { + if (source_gfx == "gfx1250") { decision.source_isa = add_gfx1250_stepping_feature(source_isa, true); decision.target_isa = add_gfx1250_stepping_feature(decision.target_isa, diff --git a/projects/hotswap/hotswap_rewrite_policy.hpp b/projects/hotswap/hotswap_rewrite_policy.hpp index 368cee39246c..cf803a56573d 100644 --- a/projects/hotswap/hotswap_rewrite_policy.hpp +++ b/projects/hotswap/hotswap_rewrite_policy.hpp @@ -22,6 +22,7 @@ namespace rocr::hotswap { enum class RewriteKind { None, Gfx1250A0Patch, + Gfx1250A0PatchWithEntryTrampoline, Gfx12_5EntryTrampoline, }; @@ -34,7 +35,10 @@ struct RewriteDecision { std::string source_isa; std::string target_isa; - bool should_rewrite() const { return kind != RewriteKind::None; } + bool should_rewrite() const { + return kind != RewriteKind::None && !source_isa.empty() && + !target_isa.empty(); + } }; const char *rewrite_kind_name(RewriteKind kind); diff --git a/projects/hotswap/tests/hotswap_loader_test.cpp b/projects/hotswap/tests/hotswap_loader_test.cpp index a9cec9598599..4adbe9e6bb58 100644 --- a/projects/hotswap/tests/hotswap_loader_test.cpp +++ b/projects/hotswap/tests/hotswap_loader_test.cpp @@ -405,6 +405,19 @@ void test_A0RetargetsWithoutFlag() { "A0 agent ISA is tagged as A0"); } +void test_A0WithFlagKeepsA0PatchPair() { + begin_test("A0WithFlagKeepsA0PatchPair", + "The trampoline flag on gfx1250 A0 must preserve the B0-to-A0 " + "patch ISA pair while routing through COMGR."); + const LoadResult result = load_once(kGfx1250Isa, kGfx1250Isa, "1", 0); + check(result.status == HSA_STATUS_SUCCESS, "load succeeds"); + check(result.retarget_calls == 1, "A0 gfx1250 routes through COMGR"); + check(result.source_isa == kGfx1250B0Isa, + "source code object ISA is tagged as B0"); + check(result.target_isa == kGfx1250A0Isa, + "A0 agent ISA remains tagged as A0"); +} + void test_FlagOneBlocksNonGfx12_5() { begin_test("FlagOneBlocksNonGfx12_5", "The trampoline flag must not become a global rewrite enable for " @@ -436,6 +449,7 @@ int main() { test_GenericSourceUsesGenericTarget(); test_ConcreteSourceUsesSourceTarget(); test_A0RetargetsWithoutFlag(); + test_A0WithFlagKeepsA0PatchPair(); test_FlagOneBlocksNonGfx12_5(); test_RetargetFailureFallsBackToOriginalReader(); reset_state(); diff --git a/projects/hotswap/tests/hotswap_tool_test.cpp b/projects/hotswap/tests/hotswap_tool_test.cpp index cd967dd0e5f5..93dddc512eb6 100644 --- a/projects/hotswap/tests/hotswap_tool_test.cpp +++ b/projects/hotswap/tests/hotswap_tool_test.cpp @@ -147,6 +147,16 @@ const char *kGfx12_5GenericIsa = "amdgcn-amd-amdhsa--gfx12-5-generic"; const char *kGfx12_5GenericIsaWithFeatures = "amdgcn-amd-amdhsa--gfx12-5-generic:sramecc+"; +AgentGfxRevision make_gfx_revision(const char *gfx_target, + uint32_t asic_revision, + bool revision_valid = true) { + AgentGfxRevision gfx; + gfx.gfx_target = gfx_target; + gfx.revision_valid = revision_valid; + gfx.asic_revision = asic_revision; + return gfx; +} + // The rewrite-policy helpers are exercised directly below so the tests and the // loader can never drift apart. @@ -305,14 +315,11 @@ void test_EntryTrampolineFlagBlocksOtherTargets() { has_candidate_hotswap_rewrite(g, RewriteOptions{true}) == false); } -void test_RewriteDecisionSelectsModeAndIsaPair() { - printf("TEST RewriteDecisionSelectsModeAndIsaPair...\n"); - - AgentGfxRevision gfx1250_a0; - gfx1250_a0.gfx_target = "gfx1250"; - gfx1250_a0.revision_valid = true; - gfx1250_a0.asic_revision = 0; - RewriteDecision d = +// A0 gfx1250 without the trampoline flag is the original B0-to-A0 patch path. +void test_RewriteDecisionSelectsA0Patch() { + printf("TEST RewriteDecisionSelectsA0Patch...\n"); + const AgentGfxRevision gfx1250_a0 = make_gfx_revision("gfx1250", 0); + const RewriteDecision d = decide_hotswap_rewrite(gfx1250_a0, kGfx1250Isa, kGfx1250Isa, RewriteOptions{false}); run("A0 gfx1250 selects default patch", @@ -321,30 +328,57 @@ void test_RewriteDecisionSelectsModeAndIsaPair() { d.source_isa == std::string(kGfx1250Isa) + ":gfx1250-b0-specific+"); run("A0 patch uses A0 target", d.target_isa == std::string(kGfx1250Isa) + ":gfx1250-b0-specific-"); +} - AgentGfxRevision gfx1250_b0; - gfx1250_b0.gfx_target = "gfx1250"; - gfx1250_b0.revision_valid = true; - gfx1250_b0.asic_revision = 1; - d = decide_hotswap_rewrite(gfx1250_b0, kGfx1250Isa, kGfx1250Isa, +// A0 gfx1250 with the trampoline flag keeps the A0 patch ISA pair but records +// that COMGR's trampoline path is also requested. +void test_RewriteDecisionSelectsA0PatchWithTrampolines() { + printf("TEST RewriteDecisionSelectsA0PatchWithTrampolines...\n"); + const AgentGfxRevision gfx1250_a0 = make_gfx_revision("gfx1250", 0); + const RewriteDecision d = + decide_hotswap_rewrite(gfx1250_a0, kGfx1250Isa, kGfx1250Isa, + RewriteOptions{true}); + run("A0 gfx1250 with flag selects combined rewrite", + d.kind == RewriteKind::Gfx1250A0PatchWithEntryTrampoline); + run("combined rewrite uses B0 source", + d.source_isa == std::string(kGfx1250Isa) + ":gfx1250-b0-specific+"); + run("combined rewrite uses A0 target", + d.target_isa == std::string(kGfx1250Isa) + ":gfx1250-b0-specific-"); +} + +// Non-A0 gfx1250 with the trampoline flag runs trampoline-only B0-to-B0. +void test_RewriteDecisionSelectsGfx1250B0Trampoline() { + printf("TEST RewriteDecisionSelectsGfx1250B0Trampoline...\n"); + const AgentGfxRevision gfx1250_b0 = make_gfx_revision("gfx1250", 1); + const RewriteDecision d = + decide_hotswap_rewrite(gfx1250_b0, kGfx1250Isa, kGfx1250Isa, RewriteOptions{true}); run("flagged non-A0 gfx1250 selects trampoline", d.kind == RewriteKind::Gfx12_5EntryTrampoline); run("trampoline keeps gfx1250 target B0", d.target_isa == std::string(kGfx1250Isa) + ":gfx1250-b0-specific+"); +} - AgentGfxRevision gfx1251; - gfx1251.gfx_target = "gfx1251"; - gfx1251.revision_valid = true; - gfx1251.asic_revision = 1; - d = decide_hotswap_rewrite(gfx1251, kGfx1251Isa, kGfx1251Isa, +// Concrete gfx125* targets other than gfx1250 use the trampoline path unchanged. +void test_RewriteDecisionSelectsGfx125FamilyTrampoline() { + printf("TEST RewriteDecisionSelectsGfx125FamilyTrampoline...\n"); + const AgentGfxRevision gfx1251 = make_gfx_revision("gfx1251", 1); + const RewriteDecision d = + decide_hotswap_rewrite(gfx1251, kGfx1251Isa, kGfx1251Isa, RewriteOptions{true}); run("flagged gfx1251 selects trampoline", d.kind == RewriteKind::Gfx12_5EntryTrampoline); run("gfx1251 ISA pair is preserved", d.source_isa == kGfx1251Isa && d.target_isa == kGfx1251Isa); +} - d = decide_hotswap_rewrite(gfx1251, kGfx12_5GenericIsa, kGfx1251Isa, +// Entry-trampoline rewrites keep COMGR on the source processor when the agent +// reports a different concrete or generic gfx12.5 ISA. +void test_RewriteDecisionKeepsTrampolineSourceProcessor() { + printf("TEST RewriteDecisionKeepsTrampolineSourceProcessor...\n"); + const AgentGfxRevision gfx1251 = make_gfx_revision("gfx1251", 1); + RewriteDecision d = + decide_hotswap_rewrite(gfx1251, kGfx12_5GenericIsa, kGfx1251Isa, RewriteOptions{true}); run("generic source on concrete gfx125 agent selects trampoline", d.kind == RewriteKind::Gfx12_5EntryTrampoline); @@ -358,11 +392,27 @@ void test_RewriteDecisionSelectsModeAndIsaPair() { run("concrete mismatch keeps source processor", d.source_isa == std::string(kGfx1250Isa) + ":gfx1250-b0-specific+" && d.target_isa == d.source_isa); +} - d = decide_hotswap_rewrite(gfx1251, kGfx942Isa, kGfx1251Isa, +// The trampoline flag never routes non-gfx12.5 source code objects through COMGR. +void test_RewriteDecisionRejectsNonGfx12_5Source() { + printf("TEST RewriteDecisionRejectsNonGfx12_5Source...\n"); + const AgentGfxRevision gfx1251 = make_gfx_revision("gfx1251", 1); + const RewriteDecision d = + decide_hotswap_rewrite(gfx1251, kGfx942Isa, kGfx1251Isa, RewriteOptions{true}); run("non-gfx12.5 source on gfx125 agent is not rewritten", d.kind == RewriteKind::None); + run("non-gfx12.5 source decision does not rewrite", + d.should_rewrite() == false); +} + +// Public RewriteDecision objects with incomplete ISA state are not actionable. +void test_RewriteDecisionRequiresIsaPair() { + printf("TEST RewriteDecisionRequiresIsaPair...\n"); + RewriteDecision d; + d.kind = RewriteKind::Gfx12_5EntryTrampoline; + run("missing ISA pair does not rewrite", d.should_rewrite() == false); } void test_AddGfx1250SteppingFeature() { @@ -446,7 +496,13 @@ int main() { test_EntryTrampolineFlagAllowsGfx12_5Generic(); test_EntryTrampolineFlagAllowsGfx1250UnknownRevision(); test_EntryTrampolineFlagBlocksOtherTargets(); - test_RewriteDecisionSelectsModeAndIsaPair(); + test_RewriteDecisionSelectsA0Patch(); + test_RewriteDecisionSelectsA0PatchWithTrampolines(); + test_RewriteDecisionSelectsGfx1250B0Trampoline(); + test_RewriteDecisionSelectsGfx125FamilyTrampoline(); + test_RewriteDecisionKeepsTrampolineSourceProcessor(); + test_RewriteDecisionRejectsNonGfx12_5Source(); + test_RewriteDecisionRequiresIsaPair(); test_AddGfx1250SteppingFeature(); test_AsicRevisionQueryFailure(); test_ResultIsCachedPerHandle(); From 02bd7431a2777a1ef3a2fbc3389b0a7c4dc4cb41 Mon Sep 17 00:00:00 2001 From: harsh-amd <149193359+harsh-amd@users.noreply.github.com> Date: Sat, 27 Jun 2026 12:04:33 -0700 Subject: [PATCH 12/16] hotswap: keep rewrite kind scoped to loader policy --- projects/hotswap/hotswap_rewrite_policy.cpp | 6 +----- projects/hotswap/hotswap_rewrite_policy.hpp | 1 - projects/hotswap/tests/hotswap_tool_test.cpp | 18 +++++++++--------- 3 files changed, 10 insertions(+), 15 deletions(-) diff --git a/projects/hotswap/hotswap_rewrite_policy.cpp b/projects/hotswap/hotswap_rewrite_policy.cpp index da162662d162..1d55dc764b71 100644 --- a/projects/hotswap/hotswap_rewrite_policy.cpp +++ b/projects/hotswap/hotswap_rewrite_policy.cpp @@ -18,8 +18,6 @@ const char *rewrite_kind_name(RewriteKind kind) { return "none"; case RewriteKind::Gfx1250A0Patch: return "gfx1250-a0-patch"; - case RewriteKind::Gfx1250A0PatchWithEntryTrampoline: - return "gfx1250-a0-patch+gfx12.5-entry-trampoline"; case RewriteKind::Gfx12_5EntryTrampoline: return "gfx12.5-entry-trampoline"; } @@ -79,9 +77,7 @@ RewriteDecision decide_hotswap_rewrite(const AgentGfxRevision &gfx, if (gate_allows_hotswap(gfx) && source_gfx == "gfx1250" && target_gfx == "gfx1250") { - decision.kind = options.entry_trampolines_requested - ? RewriteKind::Gfx1250A0PatchWithEntryTrampoline - : RewriteKind::Gfx1250A0Patch; + decision.kind = RewriteKind::Gfx1250A0Patch; decision.source_isa = add_gfx1250_stepping_feature(source_isa, true); decision.target_isa = add_gfx1250_stepping_feature(target_isa, false); return decision; diff --git a/projects/hotswap/hotswap_rewrite_policy.hpp b/projects/hotswap/hotswap_rewrite_policy.hpp index cf803a56573d..c927876df8d2 100644 --- a/projects/hotswap/hotswap_rewrite_policy.hpp +++ b/projects/hotswap/hotswap_rewrite_policy.hpp @@ -22,7 +22,6 @@ namespace rocr::hotswap { enum class RewriteKind { None, Gfx1250A0Patch, - Gfx1250A0PatchWithEntryTrampoline, Gfx12_5EntryTrampoline, }; diff --git a/projects/hotswap/tests/hotswap_tool_test.cpp b/projects/hotswap/tests/hotswap_tool_test.cpp index 93dddc512eb6..8864be96e372 100644 --- a/projects/hotswap/tests/hotswap_tool_test.cpp +++ b/projects/hotswap/tests/hotswap_tool_test.cpp @@ -330,19 +330,19 @@ void test_RewriteDecisionSelectsA0Patch() { d.target_isa == std::string(kGfx1250Isa) + ":gfx1250-b0-specific-"); } -// A0 gfx1250 with the trampoline flag keeps the A0 patch ISA pair but records -// that COMGR's trampoline path is also requested. -void test_RewriteDecisionSelectsA0PatchWithTrampolines() { - printf("TEST RewriteDecisionSelectsA0PatchWithTrampolines...\n"); +// A0 gfx1250 with the trampoline flag still selects the A0 patch ISA pair. +// COMGR independently reads the trampoline flag and runs that pass as requested. +void test_RewriteDecisionA0WithFlagKeepsA0Patch() { + printf("TEST RewriteDecisionA0WithFlagKeepsA0Patch...\n"); const AgentGfxRevision gfx1250_a0 = make_gfx_revision("gfx1250", 0); const RewriteDecision d = decide_hotswap_rewrite(gfx1250_a0, kGfx1250Isa, kGfx1250Isa, RewriteOptions{true}); - run("A0 gfx1250 with flag selects combined rewrite", - d.kind == RewriteKind::Gfx1250A0PatchWithEntryTrampoline); - run("combined rewrite uses B0 source", + run("A0 gfx1250 with flag selects default patch", + d.kind == RewriteKind::Gfx1250A0Patch); + run("A0 patch with flag uses B0 source", d.source_isa == std::string(kGfx1250Isa) + ":gfx1250-b0-specific+"); - run("combined rewrite uses A0 target", + run("A0 patch with flag uses A0 target", d.target_isa == std::string(kGfx1250Isa) + ":gfx1250-b0-specific-"); } @@ -497,7 +497,7 @@ int main() { test_EntryTrampolineFlagAllowsGfx1250UnknownRevision(); test_EntryTrampolineFlagBlocksOtherTargets(); test_RewriteDecisionSelectsA0Patch(); - test_RewriteDecisionSelectsA0PatchWithTrampolines(); + test_RewriteDecisionA0WithFlagKeepsA0Patch(); test_RewriteDecisionSelectsGfx1250B0Trampoline(); test_RewriteDecisionSelectsGfx125FamilyTrampoline(); test_RewriteDecisionKeepsTrampolineSourceProcessor(); From 4b796a67462879ae1199b245ba34fec668c92c47 Mon Sep 17 00:00:00 2001 From: harsh-amd <149193359+harsh-amd@users.noreply.github.com> Date: Sat, 27 Jun 2026 12:10:48 -0700 Subject: [PATCH 13/16] hotswap: simplify COMGR rewrite decisions --- projects/hotswap/hotswap_rewrite_policy.cpp | 46 +++-------- projects/hotswap/hotswap_rewrite_policy.hpp | 35 +++------ projects/hotswap/hotswap_tool.cpp | 21 ++--- projects/hotswap/tests/hotswap_tool_test.cpp | 82 ++++++++------------ 4 files changed, 68 insertions(+), 116 deletions(-) diff --git a/projects/hotswap/hotswap_rewrite_policy.cpp b/projects/hotswap/hotswap_rewrite_policy.cpp index 1d55dc764b71..cf990241c690 100644 --- a/projects/hotswap/hotswap_rewrite_policy.cpp +++ b/projects/hotswap/hotswap_rewrite_policy.cpp @@ -12,18 +12,6 @@ namespace rocr::hotswap { -const char *rewrite_kind_name(RewriteKind kind) { - switch (kind) { - case RewriteKind::None: - return "none"; - case RewriteKind::Gfx1250A0Patch: - return "gfx1250-a0-patch"; - case RewriteKind::Gfx12_5EntryTrampoline: - return "gfx12.5-entry-trampoline"; - } - return "unknown"; -} - bool gate_allows_hotswap(const AgentGfxRevision &gfx) { return gfx.revision_valid && gfx.gfx_target == "gfx1250" && gfx.asic_revision == 0; // A0 @@ -60,16 +48,13 @@ std::string add_gfx1250_stepping_feature(const std::string &isa_name, return isa_name + (is_b0 ? ":gfx1250-b0-specific+" : ":gfx1250-b0-specific-"); } -RewriteDecision decide_hotswap_rewrite(const AgentGfxRevision &gfx, - const std::string &source_isa, - const std::string &target_isa, - const RewriteOptions &options) { - RewriteDecision decision; - decision.source_isa = source_isa; - decision.target_isa = target_isa; - +std::optional +decide_hotswap_rewrite(const AgentGfxRevision &gfx, + const std::string &source_isa, + const std::string &target_isa, + const RewriteOptions &options) { if (source_isa.empty() || target_isa.empty()) { - return decision; + return std::nullopt; } std::string source_gfx = extract_gfx_target(source_isa); @@ -77,32 +62,25 @@ RewriteDecision decide_hotswap_rewrite(const AgentGfxRevision &gfx, if (gate_allows_hotswap(gfx) && source_gfx == "gfx1250" && target_gfx == "gfx1250") { - decision.kind = RewriteKind::Gfx1250A0Patch; - decision.source_isa = add_gfx1250_stepping_feature(source_isa, true); - decision.target_isa = add_gfx1250_stepping_feature(target_isa, false); - return decision; + return RewriteDecision{add_gfx1250_stepping_feature(source_isa, true), + add_gfx1250_stepping_feature(target_isa, false)}; } if (!options.entry_trampolines_requested || !is_gfx12_5_entry_trampoline_target(gfx.gfx_target) || - !is_gfx12_5_entry_trampoline_target(source_gfx) || - !is_gfx12_5_entry_trampoline_target(target_gfx)) { - return decision; + !is_gfx12_5_entry_trampoline_target(source_gfx)) { + return std::nullopt; } - decision.kind = RewriteKind::Gfx12_5EntryTrampoline; - // ROCm/rocm-systems#7581 installs kernel-entry trampolines after the normal // loader compatibility checks and keys the work off the code object's ISA, not // a source->agent retarget. Mirror that here by keeping COMGR's target on the // source processor for all gfx12.5 entry-trampoline rewrites. - decision.target_isa = source_isa; + RewriteDecision decision{source_isa, source_isa}; if (source_gfx == "gfx1250") { decision.source_isa = add_gfx1250_stepping_feature(source_isa, true); - decision.target_isa = - add_gfx1250_stepping_feature(decision.target_isa, - !gate_allows_hotswap(gfx)); + decision.target_isa = add_gfx1250_stepping_feature(source_isa, true); } return decision; diff --git a/projects/hotswap/hotswap_rewrite_policy.hpp b/projects/hotswap/hotswap_rewrite_policy.hpp index c927876df8d2..48a7b79a335d 100644 --- a/projects/hotswap/hotswap_rewrite_policy.hpp +++ b/projects/hotswap/hotswap_rewrite_policy.hpp @@ -4,9 +4,8 @@ // //===----------------------------------------------------------------------===// // -// Centralized policy for selecting HotSwap rewrite modes and the COMGR -// source/target ISA pair. The HSA tools loader owns interception and fallback; -// this unit owns the conditions under which a rewrite is requested. +// Centralized policy for selecting whether HotSwap should call COMGR and which +// source/target ISA pair it should pass. COMGR owns the enabled rewrite passes. // //===----------------------------------------------------------------------===// @@ -15,33 +14,20 @@ #include "hotswap_gfx_query.hpp" +#include #include namespace rocr::hotswap { -enum class RewriteKind { - None, - Gfx1250A0Patch, - Gfx12_5EntryTrampoline, -}; - struct RewriteOptions { bool entry_trampolines_requested = false; }; struct RewriteDecision { - RewriteKind kind = RewriteKind::None; std::string source_isa; std::string target_isa; - - bool should_rewrite() const { - return kind != RewriteKind::None && !source_isa.empty() && - !target_isa.empty(); - } }; -const char *rewrite_kind_name(RewriteKind kind); - // HotSwap's default activation policy: B0-to-A0 rewriting is performed only // for gfx1250 silicon at ASIC revision A0 (and only when the revision was // successfully queried). @@ -52,7 +38,7 @@ bool gate_allows_hotswap(const AgentGfxRevision &gfx); bool is_gfx12_5_entry_trampoline_target(const std::string &gfx_target); // Agent-level precheck used by the loader to avoid source-ISA parsing when no -// rewrite mode can possibly apply. +// COMGR rewrite can possibly apply. bool has_candidate_hotswap_rewrite(const AgentGfxRevision &gfx, const RewriteOptions &options); @@ -62,12 +48,13 @@ bool has_candidate_hotswap_rewrite(const AgentGfxRevision &gfx, std::string add_gfx1250_stepping_feature(const std::string &isa_name, bool is_b0); -// Returns the rewrite mode and COMGR ISA pair for this load, or RewriteKind::None -// when the original code object should be loaded unchanged. -RewriteDecision decide_hotswap_rewrite(const AgentGfxRevision &gfx, - const std::string &source_isa, - const std::string &target_isa, - const RewriteOptions &options); +// Returns the COMGR ISA pair for this load, or std::nullopt when the original +// code object should be loaded unchanged. +std::optional +decide_hotswap_rewrite(const AgentGfxRevision &gfx, + const std::string &source_isa, + const std::string &target_isa, + const RewriteOptions &options); } // namespace rocr::hotswap diff --git a/projects/hotswap/hotswap_tool.cpp b/projects/hotswap/hotswap_tool.cpp index 29c2a6498696..f004fae481c3 100644 --- a/projects/hotswap/hotswap_tool.cpp +++ b/projects/hotswap/hotswap_tool.cpp @@ -49,7 +49,6 @@ using rocr::hotswap::has_candidate_hotswap_rewrite; using rocr::hotswap::query_agent_gfx_revision; using rocr::hotswap::RewriteDecision; using rocr::hotswap::RewriteOptions; -using rocr::hotswap::rewrite_kind_name; using ByteVec = std::shared_ptr>; using OwnedElf = std::unique_ptr; @@ -268,7 +267,8 @@ hsa_status_t try_retarget_and_load(hsa_executable_t executable, hsa_agent_t agen const char *options, hsa_loaded_code_object_t *loaded_code_object, const ByteVec &local_bytes, - const RewriteDecision &decision) { + const RewriteDecision &decision, + const RewriteOptions &rewrite_options) { // Route through RetargetCodeObject for unified logging, validation, // and COMGR interaction. Do NOT skip when source == target: B0-to-A0 // patching uses the same ISA name on both sides. @@ -278,11 +278,11 @@ hsa_status_t try_retarget_and_load(hsa_executable_t executable, hsa_agent_t agen local_bytes->data(), local_bytes->size(), decision.source_isa.c_str(), decision.target_isa.c_str(), &out_elf, &out_elf_size); - HOTSWAP_LOG("hotswap: rewrite kind=%s src=%s tgt=%s in=%zu rc=%d out=%zu " - "changed=%d\n", - rewrite_kind_name(decision.kind), decision.source_isa.c_str(), - decision.target_isa.c_str(), local_bytes->size(), rc, out_elf_size, - out_elf != local_bytes->data()); + HOTSWAP_LOG("hotswap: rewrite src=%s tgt=%s entry_trampolines=%d in=%zu " + "rc=%d out=%zu changed=%d\n", + decision.source_isa.c_str(), decision.target_isa.c_str(), + rewrite_options.entry_trampolines_requested, local_bytes->size(), + rc, out_elf_size, out_elf != local_bytes->data()); if (rc != 0 || out_elf == local_bytes->data()) { return HSA_STATUS_ERROR_INVALID_CODE_OBJECT; @@ -330,9 +330,9 @@ hsa_status_t HSA_API hotswap_load_agent_code_object( const std::string source_isa = rocr::hotswap::GetCodeObjectIsaName( local_bytes->data(), local_bytes->size()); const std::string target_isa = get_agent_isa_name(agent); - const RewriteDecision decision = + const auto decision = decide_hotswap_rewrite(gfx, source_isa, target_isa, rewrite_options); - if (!decision.should_rewrite()) { + if (!decision) { HOTSWAP_LOG("hotswap: decision NONE (gfx=%s src='%s' tgt='%s')\n", gfx.gfx_target.c_str(), source_isa.c_str(), target_isa.c_str()); return load_original_reader(executable, agent, code_object_reader, @@ -341,7 +341,8 @@ hsa_status_t HSA_API hotswap_load_agent_code_object( } const hsa_status_t status = try_retarget_and_load( - executable, agent, options, loaded_code_object, local_bytes, decision); + executable, agent, options, loaded_code_object, local_bytes, *decision, + rewrite_options); if (status == HSA_STATUS_SUCCESS) { return status; } diff --git a/projects/hotswap/tests/hotswap_tool_test.cpp b/projects/hotswap/tests/hotswap_tool_test.cpp index 8864be96e372..2e670fd771cf 100644 --- a/projects/hotswap/tests/hotswap_tool_test.cpp +++ b/projects/hotswap/tests/hotswap_tool_test.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include // --------------------------------------------------------------------------- @@ -54,7 +55,6 @@ using rocr::hotswap::is_gfx12_5_entry_trampoline_target; using rocr::hotswap::query_agent_gfx_revision; using rocr::hotswap::reset_gfx_revision_cache; using rocr::hotswap::RewriteDecision; -using rocr::hotswap::RewriteKind; using rocr::hotswap::RewriteOptions; // --------------------------------------------------------------------------- @@ -157,6 +157,14 @@ AgentGfxRevision make_gfx_revision(const char *gfx_target, return gfx; } +void run_decision_pair(const char *name, + const std::optional &decision, + const std::string &source_isa, + const std::string &target_isa) { + run(name, decision && decision->source_isa == source_isa && + decision->target_isa == target_isa); +} + // The rewrite-policy helpers are exercised directly below so the tests and the // loader can never drift apart. @@ -319,15 +327,12 @@ void test_EntryTrampolineFlagBlocksOtherTargets() { void test_RewriteDecisionSelectsA0Patch() { printf("TEST RewriteDecisionSelectsA0Patch...\n"); const AgentGfxRevision gfx1250_a0 = make_gfx_revision("gfx1250", 0); - const RewriteDecision d = + const auto d = decide_hotswap_rewrite(gfx1250_a0, kGfx1250Isa, kGfx1250Isa, RewriteOptions{false}); - run("A0 gfx1250 selects default patch", - d.kind == RewriteKind::Gfx1250A0Patch); - run("A0 patch uses B0 source", - d.source_isa == std::string(kGfx1250Isa) + ":gfx1250-b0-specific+"); - run("A0 patch uses A0 target", - d.target_isa == std::string(kGfx1250Isa) + ":gfx1250-b0-specific-"); + run_decision_pair("A0 patch uses B0 source and A0 target", d, + std::string(kGfx1250Isa) + ":gfx1250-b0-specific+", + std::string(kGfx1250Isa) + ":gfx1250-b0-specific-"); } // A0 gfx1250 with the trampoline flag still selects the A0 patch ISA pair. @@ -335,41 +340,36 @@ void test_RewriteDecisionSelectsA0Patch() { void test_RewriteDecisionA0WithFlagKeepsA0Patch() { printf("TEST RewriteDecisionA0WithFlagKeepsA0Patch...\n"); const AgentGfxRevision gfx1250_a0 = make_gfx_revision("gfx1250", 0); - const RewriteDecision d = + const auto d = decide_hotswap_rewrite(gfx1250_a0, kGfx1250Isa, kGfx1250Isa, RewriteOptions{true}); - run("A0 gfx1250 with flag selects default patch", - d.kind == RewriteKind::Gfx1250A0Patch); - run("A0 patch with flag uses B0 source", - d.source_isa == std::string(kGfx1250Isa) + ":gfx1250-b0-specific+"); - run("A0 patch with flag uses A0 target", - d.target_isa == std::string(kGfx1250Isa) + ":gfx1250-b0-specific-"); + run_decision_pair("A0 patch with flag keeps B0 source and A0 target", d, + std::string(kGfx1250Isa) + ":gfx1250-b0-specific+", + std::string(kGfx1250Isa) + ":gfx1250-b0-specific-"); } // Non-A0 gfx1250 with the trampoline flag runs trampoline-only B0-to-B0. void test_RewriteDecisionSelectsGfx1250B0Trampoline() { printf("TEST RewriteDecisionSelectsGfx1250B0Trampoline...\n"); const AgentGfxRevision gfx1250_b0 = make_gfx_revision("gfx1250", 1); - const RewriteDecision d = + const auto d = decide_hotswap_rewrite(gfx1250_b0, kGfx1250Isa, kGfx1250Isa, RewriteOptions{true}); - run("flagged non-A0 gfx1250 selects trampoline", - d.kind == RewriteKind::Gfx12_5EntryTrampoline); - run("trampoline keeps gfx1250 target B0", - d.target_isa == std::string(kGfx1250Isa) + ":gfx1250-b0-specific+"); + const std::string b0_isa = + std::string(kGfx1250Isa) + ":gfx1250-b0-specific+"; + run_decision_pair("trampoline-only gfx1250 uses B0 source and B0 target", d, + b0_isa, b0_isa); } // Concrete gfx125* targets other than gfx1250 use the trampoline path unchanged. void test_RewriteDecisionSelectsGfx125FamilyTrampoline() { printf("TEST RewriteDecisionSelectsGfx125FamilyTrampoline...\n"); const AgentGfxRevision gfx1251 = make_gfx_revision("gfx1251", 1); - const RewriteDecision d = + const auto d = decide_hotswap_rewrite(gfx1251, kGfx1251Isa, kGfx1251Isa, RewriteOptions{true}); - run("flagged gfx1251 selects trampoline", - d.kind == RewriteKind::Gfx12_5EntryTrampoline); - run("gfx1251 ISA pair is preserved", - d.source_isa == kGfx1251Isa && d.target_isa == kGfx1251Isa); + run_decision_pair("gfx1251 ISA pair is preserved", d, kGfx1251Isa, + kGfx1251Isa); } // Entry-trampoline rewrites keep COMGR on the source processor when the agent @@ -377,42 +377,29 @@ void test_RewriteDecisionSelectsGfx125FamilyTrampoline() { void test_RewriteDecisionKeepsTrampolineSourceProcessor() { printf("TEST RewriteDecisionKeepsTrampolineSourceProcessor...\n"); const AgentGfxRevision gfx1251 = make_gfx_revision("gfx1251", 1); - RewriteDecision d = + auto d = decide_hotswap_rewrite(gfx1251, kGfx12_5GenericIsa, kGfx1251Isa, RewriteOptions{true}); - run("generic source on concrete gfx125 agent selects trampoline", - d.kind == RewriteKind::Gfx12_5EntryTrampoline); - run("generic source keeps generic target", - d.source_isa == kGfx12_5GenericIsa && d.target_isa == kGfx12_5GenericIsa); + run_decision_pair("generic source keeps generic target", d, + kGfx12_5GenericIsa, kGfx12_5GenericIsa); d = decide_hotswap_rewrite(gfx1251, kGfx1250Isa, kGfx1251Isa, RewriteOptions{true}); - run("concrete source on different gfx125 agent selects trampoline", - d.kind == RewriteKind::Gfx12_5EntryTrampoline); - run("concrete mismatch keeps source processor", - d.source_isa == std::string(kGfx1250Isa) + ":gfx1250-b0-specific+" && - d.target_isa == d.source_isa); + const std::string b0_isa = + std::string(kGfx1250Isa) + ":gfx1250-b0-specific+"; + run_decision_pair("concrete mismatch keeps source processor", d, b0_isa, + b0_isa); } // The trampoline flag never routes non-gfx12.5 source code objects through COMGR. void test_RewriteDecisionRejectsNonGfx12_5Source() { printf("TEST RewriteDecisionRejectsNonGfx12_5Source...\n"); const AgentGfxRevision gfx1251 = make_gfx_revision("gfx1251", 1); - const RewriteDecision d = + const auto d = decide_hotswap_rewrite(gfx1251, kGfx942Isa, kGfx1251Isa, RewriteOptions{true}); run("non-gfx12.5 source on gfx125 agent is not rewritten", - d.kind == RewriteKind::None); - run("non-gfx12.5 source decision does not rewrite", - d.should_rewrite() == false); -} - -// Public RewriteDecision objects with incomplete ISA state are not actionable. -void test_RewriteDecisionRequiresIsaPair() { - printf("TEST RewriteDecisionRequiresIsaPair...\n"); - RewriteDecision d; - d.kind = RewriteKind::Gfx12_5EntryTrampoline; - run("missing ISA pair does not rewrite", d.should_rewrite() == false); + !d.has_value()); } void test_AddGfx1250SteppingFeature() { @@ -502,7 +489,6 @@ int main() { test_RewriteDecisionSelectsGfx125FamilyTrampoline(); test_RewriteDecisionKeepsTrampolineSourceProcessor(); test_RewriteDecisionRejectsNonGfx12_5Source(); - test_RewriteDecisionRequiresIsaPair(); test_AddGfx1250SteppingFeature(); test_AsicRevisionQueryFailure(); test_ResultIsCachedPerHandle(); From 7939f87f4b72a2cbe9ca7a39d6174c7adb0e6801 Mon Sep 17 00:00:00 2001 From: harsh-amd <149193359+harsh-amd@users.noreply.github.com> Date: Sat, 27 Jun 2026 12:18:38 -0700 Subject: [PATCH 14/16] hotswap: clarify loader COMGR boundary --- projects/hotswap/CMakeLists.txt | 6 +- projects/hotswap/hotswap.hpp | 8 +- projects/hotswap/hotswap_gfx_query.hpp | 2 +- ...e_policy.cpp => hotswap_loader_policy.cpp} | 47 +++--- projects/hotswap/hotswap_loader_policy.hpp | 53 ++++++ projects/hotswap/hotswap_rewrite_policy.hpp | 61 ------- projects/hotswap/hotswap_tool.cpp | 15 +- .../hotswap/tests/hotswap_loader_test.cpp | 76 ++++----- projects/hotswap/tests/hotswap_tool_test.cpp | 155 ++++++++---------- 9 files changed, 195 insertions(+), 228 deletions(-) rename projects/hotswap/{hotswap_rewrite_policy.cpp => hotswap_loader_policy.cpp} (62%) create mode 100644 projects/hotswap/hotswap_loader_policy.hpp delete mode 100644 projects/hotswap/hotswap_rewrite_policy.hpp diff --git a/projects/hotswap/CMakeLists.txt b/projects/hotswap/CMakeLists.txt index 5934ffa3a705..dbc24c97de44 100644 --- a/projects/hotswap/CMakeLists.txt +++ b/projects/hotswap/CMakeLists.txt @@ -30,7 +30,7 @@ add_library(hsa-hotswap SHARED hotswap_tool.cpp hotswap.cpp hotswap_gfx_query.cpp - hotswap_rewrite_policy.cpp + hotswap_loader_policy.cpp ${HOTSWAP_PLATFORM_IO_SRC} ) @@ -76,7 +76,7 @@ target_include_directories(hotswap_test PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) add_executable(hotswap_tool_test tests/hotswap_tool_test.cpp hotswap_gfx_query.cpp - hotswap_rewrite_policy.cpp + hotswap_loader_policy.cpp ) target_include_directories(hotswap_tool_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} @@ -90,7 +90,7 @@ target_include_directories(hotswap_tool_test PRIVATE add_executable(hotswap_loader_test tests/hotswap_loader_test.cpp hotswap_gfx_query.cpp - hotswap_rewrite_policy.cpp + hotswap_loader_policy.cpp ${HOTSWAP_PLATFORM_IO_SRC} ) target_include_directories(hotswap_loader_test PRIVATE diff --git a/projects/hotswap/hotswap.hpp b/projects/hotswap/hotswap.hpp index 4034723b25bb..3a495566a307 100644 --- a/projects/hotswap/hotswap.hpp +++ b/projects/hotswap/hotswap.hpp @@ -22,11 +22,9 @@ std::string GetCodeObjectIsaName(const void *elf_data, size_t elf_size); /// /// Both ISA names are supplied by the caller: source_isa typically comes from /// the code object (see GetCodeObjectIsaName) and target_isa from the running -/// GPU (e.g. the HSA agent), but either may be overridden. COMGR's -/// amd_comgr_hotswap_rewrite (linked directly) applies whatever transformation -/// the source/target pair calls for -- same-ISA stepping patches (e.g. gfx1250 -/// B0 to A0) or cross-family transpilation -- and returns the rewritten code -/// object. If no transformation is needed, the output is a copy of the input. +/// GPU (e.g. the HSA agent), but either may be overridden. This wrapper passes +/// the request to COMGR's amd_comgr_hotswap_rewrite and returns COMGR's output +/// bytes to the caller. /// /// On success, *out_data and *out_size describe the rewritten code object. /// If *out_data differs from elf_data, it was allocated by this function diff --git a/projects/hotswap/hotswap_gfx_query.hpp b/projects/hotswap/hotswap_gfx_query.hpp index 3d2961802ed0..db95997e4819 100644 --- a/projects/hotswap/hotswap_gfx_query.hpp +++ b/projects/hotswap/hotswap_gfx_query.hpp @@ -42,7 +42,7 @@ std::string extract_gfx_target(const std::string &isa_name); // Queries the agent's gfx target and ASIC revision via the HSA runtime. The // result is cached per agent handle, since code-object loads can be frequent. // This function intentionally encodes no rewrite policy; callers apply the -// policy in hotswap_rewrite_policy.{hpp,cpp}. +// policy in hotswap_loader_policy.{hpp,cpp}. AgentGfxRevision query_agent_gfx_revision(hsa_agent_t agent); // Clears the per-agent-handle cache used by query_agent_gfx_revision(). diff --git a/projects/hotswap/hotswap_rewrite_policy.cpp b/projects/hotswap/hotswap_loader_policy.cpp similarity index 62% rename from projects/hotswap/hotswap_rewrite_policy.cpp rename to projects/hotswap/hotswap_loader_policy.cpp index cf990241c690..e77a15132652 100644 --- a/projects/hotswap/hotswap_rewrite_policy.cpp +++ b/projects/hotswap/hotswap_loader_policy.cpp @@ -1,10 +1,10 @@ -//===- hotswap_rewrite_policy.cpp - HotSwap rewrite decision policy -------===// +//===- hotswap_loader_policy.cpp - HotSwap loader decision policy ---------===// // // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include "hotswap_rewrite_policy.hpp" +#include "hotswap_loader_policy.hpp" #include #include @@ -17,7 +17,9 @@ bool gate_allows_hotswap(const AgentGfxRevision &gfx) { gfx.asic_revision == 0; // A0 } -bool is_gfx12_5_entry_trampoline_target(const std::string &gfx_target) { +namespace { + +bool is_gfx12_5_target(const std::string &gfx_target) { constexpr char Gfx125Prefix[] = "gfx125"; constexpr size_t Gfx125PrefixLen = sizeof(Gfx125Prefix) - 1; if (gfx_target == "gfx12-5-generic") { @@ -31,15 +33,8 @@ bool is_gfx12_5_entry_trampoline_target(const std::string &gfx_target) { [](unsigned char c) { return std::isdigit(c); }); } -bool has_candidate_hotswap_rewrite(const AgentGfxRevision &gfx, - const RewriteOptions &options) { - return gate_allows_hotswap(gfx) || - (options.entry_trampolines_requested && - is_gfx12_5_entry_trampoline_target(gfx.gfx_target)); -} - -std::string add_gfx1250_stepping_feature(const std::string &isa_name, - bool is_b0) { +std::string with_gfx1250_stepping_feature(const std::string &isa_name, + bool is_b0) { if (extract_gfx_target(isa_name) != "gfx1250" || isa_name.find(":gfx1250-b0-specific+") != std::string::npos || isa_name.find(":gfx1250-b0-specific-") != std::string::npos) { @@ -48,6 +43,15 @@ std::string add_gfx1250_stepping_feature(const std::string &isa_name, return isa_name + (is_b0 ? ":gfx1250-b0-specific+" : ":gfx1250-b0-specific-"); } +} // namespace + +bool has_candidate_hotswap_rewrite(const AgentGfxRevision &gfx, + const RewriteOptions &options) { + return gate_allows_hotswap(gfx) || + (options.gfx12_5_rewrite_requested && + is_gfx12_5_target(gfx.gfx_target)); +} + std::optional decide_hotswap_rewrite(const AgentGfxRevision &gfx, const std::string &source_isa, @@ -62,25 +66,22 @@ decide_hotswap_rewrite(const AgentGfxRevision &gfx, if (gate_allows_hotswap(gfx) && source_gfx == "gfx1250" && target_gfx == "gfx1250") { - return RewriteDecision{add_gfx1250_stepping_feature(source_isa, true), - add_gfx1250_stepping_feature(target_isa, false)}; + return RewriteDecision{with_gfx1250_stepping_feature(source_isa, true), + with_gfx1250_stepping_feature(target_isa, false)}; } - if (!options.entry_trampolines_requested || - !is_gfx12_5_entry_trampoline_target(gfx.gfx_target) || - !is_gfx12_5_entry_trampoline_target(source_gfx)) { + if (!options.gfx12_5_rewrite_requested || + !is_gfx12_5_target(gfx.gfx_target) || !is_gfx12_5_target(source_gfx)) { return std::nullopt; } - // ROCm/rocm-systems#7581 installs kernel-entry trampolines after the normal - // loader compatibility checks and keys the work off the code object's ISA, not - // a source->agent retarget. Mirror that here by keeping COMGR's target on the - // source processor for all gfx12.5 entry-trampoline rewrites. + // ROCm/rocm-systems#7581 established the loader-side invariant that this + // opt-in path uses the code object's processor, not a source->agent retarget. RewriteDecision decision{source_isa, source_isa}; if (source_gfx == "gfx1250") { - decision.source_isa = add_gfx1250_stepping_feature(source_isa, true); - decision.target_isa = add_gfx1250_stepping_feature(source_isa, true); + decision.source_isa = with_gfx1250_stepping_feature(source_isa, true); + decision.target_isa = with_gfx1250_stepping_feature(source_isa, true); } return decision; diff --git a/projects/hotswap/hotswap_loader_policy.hpp b/projects/hotswap/hotswap_loader_policy.hpp new file mode 100644 index 000000000000..c0c5b8833ab3 --- /dev/null +++ b/projects/hotswap/hotswap_loader_policy.hpp @@ -0,0 +1,53 @@ +//===- hotswap_loader_policy.hpp - HotSwap loader decision policy ---------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Centralized loader policy for selecting whether HotSwap should call COMGR and +// which source/target ISA pair it should pass. COMGR owns validation and all +// code-object transformations after the call crosses this boundary. +// +//===----------------------------------------------------------------------===// + +#ifndef ROCR_HOTSWAP_LOADER_POLICY_HPP +#define ROCR_HOTSWAP_LOADER_POLICY_HPP + +#include "hotswap_gfx_query.hpp" + +#include +#include + +namespace rocr::hotswap { + +struct RewriteOptions { + bool gfx12_5_rewrite_requested = false; +}; + +struct RewriteDecision { + std::string source_isa; + std::string target_isa; +}; + +// HotSwap's baseline gfx1250 route is active only for gfx1250 silicon at ASIC +// revision A0 (and only when the revision was successfully queried). +bool gate_allows_hotswap(const AgentGfxRevision &gfx); + +// Agent-level precheck used by the loader to avoid source-ISA parsing when no +// local routing condition can possibly apply. +bool has_candidate_hotswap_rewrite(const AgentGfxRevision &gfx, + const RewriteOptions &options); + +// Returns the COMGR ISA pair for this load, or std::nullopt when the original +// code object should be loaded unchanged. The decision is limited to loader +// routing and ISA-pair construction; COMGR decides which rewrite work, if any, +// is enabled for the request. +std::optional +decide_hotswap_rewrite(const AgentGfxRevision &gfx, + const std::string &source_isa, + const std::string &target_isa, + const RewriteOptions &options); + +} // namespace rocr::hotswap + +#endif // ROCR_HOTSWAP_LOADER_POLICY_HPP diff --git a/projects/hotswap/hotswap_rewrite_policy.hpp b/projects/hotswap/hotswap_rewrite_policy.hpp deleted file mode 100644 index 48a7b79a335d..000000000000 --- a/projects/hotswap/hotswap_rewrite_policy.hpp +++ /dev/null @@ -1,61 +0,0 @@ -//===- hotswap_rewrite_policy.hpp - HotSwap rewrite decision policy -------===// -// -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Centralized policy for selecting whether HotSwap should call COMGR and which -// source/target ISA pair it should pass. COMGR owns the enabled rewrite passes. -// -//===----------------------------------------------------------------------===// - -#ifndef ROCR_HOTSWAP_REWRITE_POLICY_HPP -#define ROCR_HOTSWAP_REWRITE_POLICY_HPP - -#include "hotswap_gfx_query.hpp" - -#include -#include - -namespace rocr::hotswap { - -struct RewriteOptions { - bool entry_trampolines_requested = false; -}; - -struct RewriteDecision { - std::string source_isa; - std::string target_isa; -}; - -// HotSwap's default activation policy: B0-to-A0 rewriting is performed only -// for gfx1250 silicon at ASIC revision A0 (and only when the revision was -// successfully queried). -bool gate_allows_hotswap(const AgentGfxRevision &gfx); - -// True for COMGR entry-trampoline targets: gfx12-5-generic or concrete gfx125 -// processor names with a numeric suffix (for example, gfx1250 or gfx1251). -bool is_gfx12_5_entry_trampoline_target(const std::string &gfx_target); - -// Agent-level precheck used by the loader to avoid source-ISA parsing when no -// COMGR rewrite can possibly apply. -bool has_candidate_hotswap_rewrite(const AgentGfxRevision &gfx, - const RewriteOptions &options); - -// Adds COMGR's hotswap-local gfx1250 stepping feature to an ISA name while -// preserving any existing feature suffixes. Non-gfx1250 ISA names and ISA -// names that already carry the stepping feature are returned unchanged. -std::string add_gfx1250_stepping_feature(const std::string &isa_name, - bool is_b0); - -// Returns the COMGR ISA pair for this load, or std::nullopt when the original -// code object should be loaded unchanged. -std::optional -decide_hotswap_rewrite(const AgentGfxRevision &gfx, - const std::string &source_isa, - const std::string &target_isa, - const RewriteOptions &options); - -} // namespace rocr::hotswap - -#endif // ROCR_HOTSWAP_REWRITE_POLICY_HPP diff --git a/projects/hotswap/hotswap_tool.cpp b/projects/hotswap/hotswap_tool.cpp index f004fae481c3..7199bfde9912 100644 --- a/projects/hotswap/hotswap_tool.cpp +++ b/projects/hotswap/hotswap_tool.cpp @@ -16,7 +16,7 @@ #include "hotswap.hpp" #include "hotswap_gfx_query.hpp" #include "hotswap_platform_io.hpp" -#include "hotswap_rewrite_policy.hpp" +#include "hotswap_loader_policy.hpp" #include #include #include @@ -112,7 +112,7 @@ void stash_bytes(uint64_t handle, const uint8_t *data, size_t size) { g_reader_map[handle] = ReaderEntry{std::move(vec), false, false}; } -bool entry_trampolines_requested() { +bool gfx12_5_rewrite_requested() { const char *value = std::getenv("AMD_COMGR_HOTSWAP_ENTRY_TRAMPOLINES"); return value && value[0] != '\0' && std::strcmp(value, "0") != 0; } @@ -269,19 +269,18 @@ hsa_status_t try_retarget_and_load(hsa_executable_t executable, hsa_agent_t agen const ByteVec &local_bytes, const RewriteDecision &decision, const RewriteOptions &rewrite_options) { - // Route through RetargetCodeObject for unified logging, validation, - // and COMGR interaction. Do NOT skip when source == target: B0-to-A0 - // patching uses the same ISA name on both sides. + // Route through RetargetCodeObject once policy has selected a request. A + // same-processor source/target pair can still be meaningful to COMGR. void *out_elf = nullptr; size_t out_elf_size = 0; const int rc = rocr::hotswap::RetargetCodeObject( local_bytes->data(), local_bytes->size(), decision.source_isa.c_str(), decision.target_isa.c_str(), &out_elf, &out_elf_size); - HOTSWAP_LOG("hotswap: rewrite src=%s tgt=%s entry_trampolines=%d in=%zu " + HOTSWAP_LOG("hotswap: rewrite src=%s tgt=%s gfx12_5_opt_in=%d in=%zu " "rc=%d out=%zu changed=%d\n", decision.source_isa.c_str(), decision.target_isa.c_str(), - rewrite_options.entry_trampolines_requested, local_bytes->size(), + rewrite_options.gfx12_5_rewrite_requested, local_bytes->size(), rc, out_elf_size, out_elf != local_bytes->data()); if (rc != 0 || out_elf == local_bytes->data()) { @@ -318,7 +317,7 @@ hsa_status_t HSA_API hotswap_load_agent_code_object( } const AgentGfxRevision gfx = query_agent_gfx_revision(agent); - const RewriteOptions rewrite_options{entry_trampolines_requested()}; + const RewriteOptions rewrite_options{gfx12_5_rewrite_requested()}; if (!has_candidate_hotswap_rewrite(gfx, rewrite_options)) { HOTSWAP_LOG("hotswap: gate BLOCKED (gfx=%s rev=%u valid=%d)\n", gfx.gfx_target.c_str(), gfx.asic_revision, gfx.revision_valid); diff --git a/projects/hotswap/tests/hotswap_loader_test.cpp b/projects/hotswap/tests/hotswap_loader_test.cpp index 4adbe9e6bb58..5370f2cb86cb 100644 --- a/projects/hotswap/tests/hotswap_loader_test.cpp +++ b/projects/hotswap/tests/hotswap_loader_test.cpp @@ -166,7 +166,7 @@ void check(bool cond, const char *name) { } } -void set_entry_trampoline_env(const char *value) { +void set_gfx12_5_rewrite_env(const char *value) { #ifdef _WIN32 _putenv_s("AMD_COMGR_HOTSWAP_ENTRY_TRAMPOLINES", value ? value : ""); #else @@ -193,7 +193,7 @@ void reset_state() { g_orig_reader_destroy = nullptr; g_orig_load_agent_code_object = nullptr; rocr::hotswap::reset_gfx_revision_cache(); - set_entry_trampoline_env(nullptr); + set_gfx12_5_rewrite_env(nullptr); g_env = FakeEnv{}; } @@ -282,11 +282,11 @@ struct LoadResult { }; LoadResult load_once(const char *code_object_isa, const char *agent_isa, - const char *entry_trampoline_flag, + const char *gfx12_5_rewrite_flag, uint32_t asic_revision = 1, int retarget_status = 0) { reset_state(); - set_entry_trampoline_env(entry_trampoline_flag); + set_gfx12_5_rewrite_env(gfx12_5_rewrite_flag); g_env.agent_isa = agent_isa; g_env.asic_revision = asic_revision; g_env.retarget_status = retarget_status; @@ -313,18 +313,18 @@ void check_original_load(const LoadResult &result, const char *name) { "original reader is loaded"); } -void test_DisabledFlagLoadsOriginal() { - begin_test("DisabledFlagLoadsOriginal", - "Unset, empty, and 0 trampoline flags must leave non-A0 gfx1250 " +void test_OptInDisabledLoadsOriginal() { + begin_test("OptInDisabledLoadsOriginal", + "Unset, empty, and 0 opt-in values must leave non-A0 gfx1250 " "on the original reader."); struct FlagCase { const char *flag_value; const char *expectation; }; const FlagCase cases[] = { - {nullptr, "unset flag skips non-A0 gfx1250 rewrite"}, - {"", "empty flag skips non-A0 gfx1250 rewrite"}, - {"0", "flag value 0 skips non-A0 gfx1250 rewrite"}, + {nullptr, "unset opt-in skips non-A0 gfx1250 rewrite"}, + {"", "empty opt-in skips non-A0 gfx1250 rewrite"}, + {"0", "opt-in value 0 skips non-A0 gfx1250 rewrite"}, }; for (const FlagCase &c : cases) { const LoadResult result = @@ -333,13 +333,14 @@ void test_DisabledFlagLoadsOriginal() { } } -void test_FlagRoutesGfx1250B0() { - begin_test("FlagRoutesGfx1250B0", - "The trampoline flag must route non-A0 gfx1250 through COMGR " - "without selecting the A0 patch path."); +void test_OptInRoutesGfx1250SameProcessor() { + begin_test("OptInRoutesGfx1250SameProcessor", + "The opt-in must route non-A0 gfx1250 through COMGR with a " + "same-processor request."); const LoadResult result = load_once(kGfx1250Isa, kGfx1250Isa, "1"); check(result.status == HSA_STATUS_SUCCESS, "load succeeds"); - check(result.retarget_calls == 1, "flag routes B0 gfx1250 through COMGR"); + check(result.retarget_calls == 1, + "opt-in routes non-A0 gfx1250 through COMGR"); check(result.source_isa == kGfx1250B0Isa, "source ISA is tagged as B0"); check(result.target_isa == kGfx1250B0Isa, @@ -350,15 +351,16 @@ void test_FlagRoutesGfx1250B0() { "rewritten ELF is retained after successful load"); } -void test_FlagRoutesGfx12_5Family() { - begin_test("FlagRoutesGfx12_5Family", - "The trampoline flag must route gfx125* and gfx12-5-generic " +void test_OptInRoutesGfx12_5Family() { + begin_test("OptInRoutesGfx12_5Family", + "The opt-in must route gfx125* and gfx12-5-generic " "without adding gfx1250 stepping features."); const char *cases[] = {kGfx1251Isa, kGfx12_5GenericIsa}; for (const char *isa : cases) { const LoadResult result = load_once(isa, isa, "1"); check(result.status == HSA_STATUS_SUCCESS, "load succeeds"); - check(result.retarget_calls == 1, "flag routes gfx12.5 target through COMGR"); + check(result.retarget_calls == 1, + "opt-in routes gfx12.5 target through COMGR"); check(result.source_isa == isa, "source ISA is preserved"); check(result.target_isa == isa, "target ISA is preserved"); } @@ -392,23 +394,23 @@ void test_ConcreteSourceUsesSourceTarget() { "target ISA stays on the source processor"); } -void test_A0RetargetsWithoutFlag() { - begin_test("A0RetargetsWithoutFlag", - "The existing gfx1250 A0 rewrite path must still run without " - "the trampoline flag."); +void test_A0UsesBaselineRouteWithoutOptIn() { + begin_test("A0UsesBaselineRouteWithoutOptIn", + "The existing gfx1250 A0 route must still call COMGR without " + "the opt-in."); const LoadResult result = load_once(kGfx1250Isa, kGfx1250Isa, nullptr, 0); check(result.status == HSA_STATUS_SUCCESS, "load succeeds"); - check(result.retarget_calls == 1, "A0 gfx1250 keeps default rewrite path"); + check(result.retarget_calls == 1, "A0 gfx1250 keeps baseline route"); check(result.source_isa == kGfx1250B0Isa, "source code object ISA is tagged as B0"); check(result.target_isa == kGfx1250A0Isa, "A0 agent ISA is tagged as A0"); } -void test_A0WithFlagKeepsA0PatchPair() { - begin_test("A0WithFlagKeepsA0PatchPair", - "The trampoline flag on gfx1250 A0 must preserve the B0-to-A0 " - "patch ISA pair while routing through COMGR."); +void test_A0WithOptInKeepsBaselinePair() { + begin_test("A0WithOptInKeepsBaselinePair", + "The opt-in on gfx1250 A0 must preserve the baseline ISA pair " + "while routing through COMGR."); const LoadResult result = load_once(kGfx1250Isa, kGfx1250Isa, "1", 0); check(result.status == HSA_STATUS_SUCCESS, "load succeeds"); check(result.retarget_calls == 1, "A0 gfx1250 routes through COMGR"); @@ -418,9 +420,9 @@ void test_A0WithFlagKeepsA0PatchPair() { "A0 agent ISA remains tagged as A0"); } -void test_FlagOneBlocksNonGfx12_5() { - begin_test("FlagOneBlocksNonGfx12_5", - "The trampoline flag must not become a global rewrite enable for " +void test_OptInBlocksNonGfx12_5() { + begin_test("OptInBlocksNonGfx12_5", + "The opt-in must not become a global rewrite enable for " "unsupported agents or source code objects."); LoadResult result = load_once(kGfx942Isa, kGfx942Isa, "1", 0); check_original_load(result, "non-gfx12.5 agent does not route"); @@ -443,14 +445,14 @@ void test_RetargetFailureFallsBackToOriginalReader() { } // namespace int main() { - test_DisabledFlagLoadsOriginal(); - test_FlagRoutesGfx1250B0(); - test_FlagRoutesGfx12_5Family(); + test_OptInDisabledLoadsOriginal(); + test_OptInRoutesGfx1250SameProcessor(); + test_OptInRoutesGfx12_5Family(); test_GenericSourceUsesGenericTarget(); test_ConcreteSourceUsesSourceTarget(); - test_A0RetargetsWithoutFlag(); - test_A0WithFlagKeepsA0PatchPair(); - test_FlagOneBlocksNonGfx12_5(); + test_A0UsesBaselineRouteWithoutOptIn(); + test_A0WithOptInKeepsBaselinePair(); + test_OptInBlocksNonGfx12_5(); test_RetargetFailureFallsBackToOriginalReader(); reset_state(); diff --git a/projects/hotswap/tests/hotswap_tool_test.cpp b/projects/hotswap/tests/hotswap_tool_test.cpp index 2e670fd771cf..7c9e95bd1673 100644 --- a/projects/hotswap/tests/hotswap_tool_test.cpp +++ b/projects/hotswap/tests/hotswap_tool_test.cpp @@ -5,7 +5,7 @@ //===----------------------------------------------------------------------===// // // Unit tests for query_agent_gfx_revision() in hotswap_gfx_query.cpp and the -// rewrite decisions selected by hotswap_rewrite_policy.cpp. +// COMGR requests selected by hotswap_loader_policy.cpp. // // The HSA entry points used by the query helper are replaced with in-file stubs // (linked in place of the real libraries) so query and policy behavior can be @@ -44,14 +44,12 @@ FakeEnv g_env; // The units under test (bring in HSA query helpers and rewrite policy). #include "hotswap_gfx_query.hpp" -#include "hotswap_rewrite_policy.hpp" +#include "hotswap_loader_policy.hpp" using rocr::hotswap::AgentGfxRevision; -using rocr::hotswap::add_gfx1250_stepping_feature; using rocr::hotswap::decide_hotswap_rewrite; using rocr::hotswap::gate_allows_hotswap; using rocr::hotswap::has_candidate_hotswap_rewrite; -using rocr::hotswap::is_gfx12_5_entry_trampoline_target; using rocr::hotswap::query_agent_gfx_revision; using rocr::hotswap::reset_gfx_revision_cache; using rocr::hotswap::RewriteDecision; @@ -215,7 +213,7 @@ void test_NearMissTargetBlocks() { } // Hyphenated generic processor names must be preserved while stripping feature -// suffixes; otherwise the gfx12.5 trampoline gate would see only "gfx12". +// suffixes; otherwise gfx12.5 opt-in routing would see only "gfx12". void test_Gfx12_5GenericFeatureSuffixParsed() { printf("TEST Gfx12_5GenericFeatureSuffixParsed...\n"); reset_env(); @@ -224,7 +222,7 @@ void test_Gfx12_5GenericFeatureSuffixParsed() { const AgentGfxRevision g = query_agent_gfx_revision(fresh_agent()); run("feature suffix stripped -> gfx12-5-generic", g.gfx_target == "gfx12-5-generic"); - run("default B0/A0 gate blocks gfx12-5-generic", + run("baseline route blocks gfx12-5-generic", gate_allows_hotswap(g) == false); } @@ -239,131 +237,125 @@ void test_Gfx1250NonA0Blocks() { run("gate blocks gfx1250 A1", gate_allows_hotswap(g) == false); } -// The entry-trampoline flag opens an explicit gfx1250 path independent of the -// B0/A0 retargeting gate. -void test_EntryTrampolineFlagAllowsGfx1250NonA0() { - printf("TEST EntryTrampolineFlagAllowsGfx1250NonA0...\n"); +// The explicit opt-in opens a gfx1250 route independent of the baseline A0 +// agent route. +void test_OptInAllowsGfx1250NonA0() { + printf("TEST OptInAllowsGfx1250NonA0...\n"); reset_env(); g_env.isa_name = kGfx1250Isa; g_env.asic_revision = 1; // A1/B0-side path, not A0. const AgentGfxRevision g = query_agent_gfx_revision(fresh_agent()); - run("default gate blocks non-A0", + run("baseline route blocks non-A0", has_candidate_hotswap_rewrite(g, RewriteOptions{false}) == false); - run("trampoline gate allows gfx1250 non-A0", + run("opt-in route allows gfx1250 non-A0", has_candidate_hotswap_rewrite(g, RewriteOptions{true}) == true); } -// The explicit trampoline gate covers the broader gfx125* family, while the -// default B0/A0 patch gate remains exact gfx1250 A0 only. -void test_EntryTrampolineFlagAllowsGfx125Family() { - printf("TEST EntryTrampolineFlagAllowsGfx125Family...\n"); +// The explicit opt-in covers the broader gfx125* family, while the baseline +// route remains exact gfx1250 A0 only. +void test_OptInAllowsGfx125Family() { + printf("TEST OptInAllowsGfx125Family...\n"); reset_env(); g_env.isa_name = kGfx1251Isa; g_env.asic_revision = 1; const AgentGfxRevision g = query_agent_gfx_revision(fresh_agent()); - run("default gate blocks gfx1251", + run("baseline route blocks gfx1251", has_candidate_hotswap_rewrite(g, RewriteOptions{false}) == false); - run("trampoline gate allows gfx1251", + run("opt-in route allows gfx1251", has_candidate_hotswap_rewrite(g, RewriteOptions{true}) == true); } -void test_EntryTrampolineFlagRejectsMalformedGfx125Prefix() { - printf("TEST EntryTrampolineFlagRejectsMalformedGfx125Prefix...\n"); +void test_OptInRejectsMalformedGfx125Prefix() { + printf("TEST OptInRejectsMalformedGfx125Prefix...\n"); reset_env(); g_env.isa_name = kGfx125MalformedIsa; g_env.asic_revision = 1; const AgentGfxRevision g = query_agent_gfx_revision(fresh_agent()); run("malformed gfx125 prefix is parsed", g.gfx_target == "gfx125foo"); - run("trampoline family predicate rejects malformed suffix", - is_gfx12_5_entry_trampoline_target(g.gfx_target) == false); - run("trampoline gate rejects malformed gfx125 prefix", + run("opt-in route rejects malformed gfx125 prefix", has_candidate_hotswap_rewrite(g, RewriteOptions{true}) == false); } -// COMGR's entry-trampoline pass also accepts the gfx12.5 generic processor -// name, so the HSA tool must route that target when explicitly requested. -void test_EntryTrampolineFlagAllowsGfx12_5Generic() { - printf("TEST EntryTrampolineFlagAllowsGfx12_5Generic...\n"); +// The HSA tool routes gfx12-5-generic when explicitly requested. +void test_OptInAllowsGfx12_5Generic() { + printf("TEST OptInAllowsGfx12_5Generic...\n"); reset_env(); g_env.isa_name = kGfx12_5GenericIsa; g_env.asic_revision = 1; const AgentGfxRevision g = query_agent_gfx_revision(fresh_agent()); - run("default gate blocks gfx12-5-generic", + run("baseline route blocks gfx12-5-generic", has_candidate_hotswap_rewrite(g, RewriteOptions{false}) == false); - run("trampoline gate allows gfx12-5-generic", + run("opt-in route allows gfx12-5-generic", has_candidate_hotswap_rewrite(g, RewriteOptions{true}) == true); } -// If ASIC revision cannot be queried, the explicit trampoline flag can still -// route gfx12.5 targets through COMGR. The exact gfx1250 target is treated as -// non-A0 for B0/A0 patch selection so the trampoline pass does not imply -// A0-specific fixes. -void test_EntryTrampolineFlagAllowsGfx1250UnknownRevision() { - printf("TEST EntryTrampolineFlagAllowsGfx1250UnknownRevision...\n"); +// If ASIC revision cannot be queried, the explicit opt-in can still route +// gfx12.5 targets through COMGR. The baseline A0 route remains disabled. +void test_OptInAllowsGfx1250UnknownRevision() { + printf("TEST OptInAllowsGfx1250UnknownRevision...\n"); reset_env(); g_env.isa_name = kGfx1250Isa; g_env.asic_rev_ok = false; const AgentGfxRevision g = query_agent_gfx_revision(fresh_agent()); - run("default gate blocks unknown revision", + run("baseline route blocks unknown revision", has_candidate_hotswap_rewrite(g, RewriteOptions{false}) == false); - run("trampoline gate allows gfx1250 unknown revision", + run("opt-in route allows gfx1250 unknown revision", has_candidate_hotswap_rewrite(g, RewriteOptions{true}) == true); } -// The trampoline flag is not a global rewrite enable; non-gfx12.5 targets still +// The opt-in is not a global rewrite enable; non-gfx12.5 targets still // load unchanged. -void test_EntryTrampolineFlagBlocksOtherTargets() { - printf("TEST EntryTrampolineFlagBlocksOtherTargets...\n"); +void test_OptInBlocksOtherTargets() { + printf("TEST OptInBlocksOtherTargets...\n"); reset_env(); g_env.isa_name = kGfx942Isa; g_env.asic_revision = 0; const AgentGfxRevision g = query_agent_gfx_revision(fresh_agent()); - run("trampoline gate blocks gfx942", + run("opt-in route blocks gfx942", has_candidate_hotswap_rewrite(g, RewriteOptions{true}) == false); } -// A0 gfx1250 without the trampoline flag is the original B0-to-A0 patch path. -void test_RewriteDecisionSelectsA0Patch() { - printf("TEST RewriteDecisionSelectsA0Patch...\n"); +// A gfx1250 A0 agent uses the baseline COMGR request without the opt-in. +void test_RewriteDecisionSelectsBaselineA0Request() { + printf("TEST RewriteDecisionSelectsBaselineA0Request...\n"); const AgentGfxRevision gfx1250_a0 = make_gfx_revision("gfx1250", 0); const auto d = decide_hotswap_rewrite(gfx1250_a0, kGfx1250Isa, kGfx1250Isa, RewriteOptions{false}); - run_decision_pair("A0 patch uses B0 source and A0 target", d, + run_decision_pair("baseline request uses expected source and target ISA", d, std::string(kGfx1250Isa) + ":gfx1250-b0-specific+", std::string(kGfx1250Isa) + ":gfx1250-b0-specific-"); } -// A0 gfx1250 with the trampoline flag still selects the A0 patch ISA pair. -// COMGR independently reads the trampoline flag and runs that pass as requested. -void test_RewriteDecisionA0WithFlagKeepsA0Patch() { - printf("TEST RewriteDecisionA0WithFlagKeepsA0Patch...\n"); +// The explicit opt-in does not change the baseline gfx1250 A0 request. +void test_RewriteDecisionA0WithOptInKeepsBaselineRequest() { + printf("TEST RewriteDecisionA0WithOptInKeepsBaselineRequest...\n"); const AgentGfxRevision gfx1250_a0 = make_gfx_revision("gfx1250", 0); const auto d = decide_hotswap_rewrite(gfx1250_a0, kGfx1250Isa, kGfx1250Isa, RewriteOptions{true}); - run_decision_pair("A0 patch with flag keeps B0 source and A0 target", d, + run_decision_pair("opt-in keeps baseline source and target ISA", d, std::string(kGfx1250Isa) + ":gfx1250-b0-specific+", std::string(kGfx1250Isa) + ":gfx1250-b0-specific-"); } -// Non-A0 gfx1250 with the trampoline flag runs trampoline-only B0-to-B0. -void test_RewriteDecisionSelectsGfx1250B0Trampoline() { - printf("TEST RewriteDecisionSelectsGfx1250B0Trampoline...\n"); +// A non-A0 gfx1250 opt-in uses a same-processor COMGR request. +void test_RewriteDecisionOptInSelectsGfx1250SameProcessorRequest() { + printf("TEST RewriteDecisionOptInSelectsGfx1250SameProcessorRequest...\n"); const AgentGfxRevision gfx1250_b0 = make_gfx_revision("gfx1250", 1); const auto d = decide_hotswap_rewrite(gfx1250_b0, kGfx1250Isa, kGfx1250Isa, RewriteOptions{true}); const std::string b0_isa = std::string(kGfx1250Isa) + ":gfx1250-b0-specific+"; - run_decision_pair("trampoline-only gfx1250 uses B0 source and B0 target", d, + run_decision_pair("gfx1250 request keeps source and target on source ISA", d, b0_isa, b0_isa); } -// Concrete gfx125* targets other than gfx1250 use the trampoline path unchanged. -void test_RewriteDecisionSelectsGfx125FamilyTrampoline() { - printf("TEST RewriteDecisionSelectsGfx125FamilyTrampoline...\n"); +// Concrete gfx125* targets other than gfx1250 use the original ISA pair. +void test_RewriteDecisionOptInSelectsGfx125FamilyRequest() { + printf("TEST RewriteDecisionOptInSelectsGfx125FamilyRequest...\n"); const AgentGfxRevision gfx1251 = make_gfx_revision("gfx1251", 1); const auto d = decide_hotswap_rewrite(gfx1251, kGfx1251Isa, kGfx1251Isa, @@ -372,10 +364,10 @@ void test_RewriteDecisionSelectsGfx125FamilyTrampoline() { kGfx1251Isa); } -// Entry-trampoline rewrites keep COMGR on the source processor when the agent -// reports a different concrete or generic gfx12.5 ISA. -void test_RewriteDecisionKeepsTrampolineSourceProcessor() { - printf("TEST RewriteDecisionKeepsTrampolineSourceProcessor...\n"); +// The opt-in path keeps COMGR on the source processor when the agent reports a +// different concrete or generic gfx12.5 ISA. +void test_RewriteDecisionOptInKeepsSourceProcessor() { + printf("TEST RewriteDecisionOptInKeepsSourceProcessor...\n"); const AgentGfxRevision gfx1251 = make_gfx_revision("gfx1251", 1); auto d = decide_hotswap_rewrite(gfx1251, kGfx12_5GenericIsa, kGfx1251Isa, @@ -391,7 +383,7 @@ void test_RewriteDecisionKeepsTrampolineSourceProcessor() { b0_isa); } -// The trampoline flag never routes non-gfx12.5 source code objects through COMGR. +// The opt-in never routes non-gfx12.5 source code objects through COMGR. void test_RewriteDecisionRejectsNonGfx12_5Source() { printf("TEST RewriteDecisionRejectsNonGfx12_5Source...\n"); const AgentGfxRevision gfx1251 = make_gfx_revision("gfx1251", 1); @@ -402,22 +394,6 @@ void test_RewriteDecisionRejectsNonGfx12_5Source() { !d.has_value()); } -void test_AddGfx1250SteppingFeature() { - printf("TEST AddGfx1250SteppingFeature...\n"); - run("B0 feature appended to bare gfx1250", - add_gfx1250_stepping_feature(kGfx1250Isa, true) == - std::string(kGfx1250Isa) + ":gfx1250-b0-specific+"); - run("A0 feature appended after existing ISA features", - add_gfx1250_stepping_feature(kGfx1250IsaWithFeatures, false) == - std::string(kGfx1250IsaWithFeatures) + ":gfx1250-b0-specific-"); - run("existing B0 feature is preserved", - add_gfx1250_stepping_feature( - "amdgcn-amd-amdhsa--gfx1250:gfx1250-b0-specific+", false) == - "amdgcn-amd-amdhsa--gfx1250:gfx1250-b0-specific+"); - run("non-gfx1250 ISA is unchanged", - add_gfx1250_stepping_feature(kGfx942Isa, true) == kGfx942Isa); -} - // ASIC revision query failure -> revision_valid false and gate blocks, even for // gfx1250. The query is still attempted exactly once. void test_AsicRevisionQueryFailure() { @@ -477,19 +453,18 @@ int main() { test_NearMissTargetBlocks(); test_Gfx12_5GenericFeatureSuffixParsed(); test_Gfx1250NonA0Blocks(); - test_EntryTrampolineFlagAllowsGfx1250NonA0(); - test_EntryTrampolineFlagAllowsGfx125Family(); - test_EntryTrampolineFlagRejectsMalformedGfx125Prefix(); - test_EntryTrampolineFlagAllowsGfx12_5Generic(); - test_EntryTrampolineFlagAllowsGfx1250UnknownRevision(); - test_EntryTrampolineFlagBlocksOtherTargets(); - test_RewriteDecisionSelectsA0Patch(); - test_RewriteDecisionA0WithFlagKeepsA0Patch(); - test_RewriteDecisionSelectsGfx1250B0Trampoline(); - test_RewriteDecisionSelectsGfx125FamilyTrampoline(); - test_RewriteDecisionKeepsTrampolineSourceProcessor(); + test_OptInAllowsGfx1250NonA0(); + test_OptInAllowsGfx125Family(); + test_OptInRejectsMalformedGfx125Prefix(); + test_OptInAllowsGfx12_5Generic(); + test_OptInAllowsGfx1250UnknownRevision(); + test_OptInBlocksOtherTargets(); + test_RewriteDecisionSelectsBaselineA0Request(); + test_RewriteDecisionA0WithOptInKeepsBaselineRequest(); + test_RewriteDecisionOptInSelectsGfx1250SameProcessorRequest(); + test_RewriteDecisionOptInSelectsGfx125FamilyRequest(); + test_RewriteDecisionOptInKeepsSourceProcessor(); test_RewriteDecisionRejectsNonGfx12_5Source(); - test_AddGfx1250SteppingFeature(); test_AsicRevisionQueryFailure(); test_ResultIsCachedPerHandle(); test_DistinctHandlesIndependent(); From b5f054ac6d3ac8902f7f35cd86c3e838846df53b Mon Sep 17 00:00:00 2001 From: harsh-amd <149193359+harsh-amd@users.noreply.github.com> Date: Sat, 27 Jun 2026 12:25:37 -0700 Subject: [PATCH 15/16] hotswap: split loader test support --- projects/hotswap/hotswap_loader_policy.cpp | 56 ++-- .../hotswap/tests/hotswap_loader_test.cpp | 304 +---------------- .../tests/hotswap_loader_test_utils.hpp | 317 ++++++++++++++++++ 3 files changed, 357 insertions(+), 320 deletions(-) create mode 100644 projects/hotswap/tests/hotswap_loader_test_utils.hpp diff --git a/projects/hotswap/hotswap_loader_policy.cpp b/projects/hotswap/hotswap_loader_policy.cpp index e77a15132652..6546a2ea09b8 100644 --- a/projects/hotswap/hotswap_loader_policy.cpp +++ b/projects/hotswap/hotswap_loader_policy.cpp @@ -12,17 +12,26 @@ namespace rocr::hotswap { -bool gate_allows_hotswap(const AgentGfxRevision &gfx) { - return gfx.revision_valid && gfx.gfx_target == "gfx1250" && - gfx.asic_revision == 0; // A0 -} - namespace { +constexpr char Gfx1250[] = "gfx1250"; +constexpr char Gfx12_5Generic[] = "gfx12-5-generic"; +constexpr char Gfx125Prefix[] = "gfx125"; +constexpr char Gfx1250B0Feature[] = ":gfx1250-b0-specific+"; +constexpr char Gfx1250A0Feature[] = ":gfx1250-b0-specific-"; + +enum class Gfx1250Stepping { + B0, + A0, +}; + +const char *gfx1250_stepping_feature(Gfx1250Stepping stepping) { + return stepping == Gfx1250Stepping::B0 ? Gfx1250B0Feature : Gfx1250A0Feature; +} + bool is_gfx12_5_target(const std::string &gfx_target) { - constexpr char Gfx125Prefix[] = "gfx125"; constexpr size_t Gfx125PrefixLen = sizeof(Gfx125Prefix) - 1; - if (gfx_target == "gfx12-5-generic") { + if (gfx_target == Gfx12_5Generic) { return true; } if (gfx_target.size() <= Gfx125PrefixLen || @@ -34,17 +43,22 @@ bool is_gfx12_5_target(const std::string &gfx_target) { } std::string with_gfx1250_stepping_feature(const std::string &isa_name, - bool is_b0) { - if (extract_gfx_target(isa_name) != "gfx1250" || - isa_name.find(":gfx1250-b0-specific+") != std::string::npos || - isa_name.find(":gfx1250-b0-specific-") != std::string::npos) { + Gfx1250Stepping stepping) { + if (extract_gfx_target(isa_name) != Gfx1250 || + isa_name.find(Gfx1250B0Feature) != std::string::npos || + isa_name.find(Gfx1250A0Feature) != std::string::npos) { return isa_name; } - return isa_name + (is_b0 ? ":gfx1250-b0-specific+" : ":gfx1250-b0-specific-"); + return isa_name + gfx1250_stepping_feature(stepping); } } // namespace +bool gate_allows_hotswap(const AgentGfxRevision &gfx) { + return gfx.revision_valid && gfx.gfx_target == Gfx1250 && + gfx.asic_revision == 0; // A0 +} + bool has_candidate_hotswap_rewrite(const AgentGfxRevision &gfx, const RewriteOptions &options) { return gate_allows_hotswap(gfx) || @@ -62,12 +76,12 @@ decide_hotswap_rewrite(const AgentGfxRevision &gfx, } std::string source_gfx = extract_gfx_target(source_isa); - std::string target_gfx = extract_gfx_target(target_isa); - if (gate_allows_hotswap(gfx) && source_gfx == "gfx1250" && - target_gfx == "gfx1250") { - return RewriteDecision{with_gfx1250_stepping_feature(source_isa, true), - with_gfx1250_stepping_feature(target_isa, false)}; + if (gate_allows_hotswap(gfx) && source_gfx == Gfx1250 && + extract_gfx_target(target_isa) == Gfx1250) { + return RewriteDecision{ + with_gfx1250_stepping_feature(source_isa, Gfx1250Stepping::B0), + with_gfx1250_stepping_feature(target_isa, Gfx1250Stepping::A0)}; } if (!options.gfx12_5_rewrite_requested || @@ -79,9 +93,11 @@ decide_hotswap_rewrite(const AgentGfxRevision &gfx, // opt-in path uses the code object's processor, not a source->agent retarget. RewriteDecision decision{source_isa, source_isa}; - if (source_gfx == "gfx1250") { - decision.source_isa = with_gfx1250_stepping_feature(source_isa, true); - decision.target_isa = with_gfx1250_stepping_feature(source_isa, true); + if (source_gfx == Gfx1250) { + decision.source_isa = + with_gfx1250_stepping_feature(source_isa, Gfx1250Stepping::B0); + decision.target_isa = + with_gfx1250_stepping_feature(source_isa, Gfx1250Stepping::B0); } return decision; diff --git a/projects/hotswap/tests/hotswap_loader_test.cpp b/projects/hotswap/tests/hotswap_loader_test.cpp index 5370f2cb86cb..6dca51cd7f4d 100644 --- a/projects/hotswap/tests/hotswap_loader_test.cpp +++ b/projects/hotswap/tests/hotswap_loader_test.cpp @@ -4,315 +4,19 @@ // //===----------------------------------------------------------------------===// // -// This test includes hotswap_tool.cpp directly so it can drive the wrapped HSA -// API-table entry points without a GPU or a real HSA runtime. The original HSA -// functions and HotSwap COMGR calls are replaced with stubs below. +// The test support header includes hotswap_tool.cpp directly so these tests can +// drive the wrapped HSA API-table entry points without a GPU, real HSA runtime, +// or real COMGR. // //===----------------------------------------------------------------------===// -#include -#include #include #include -#include -#include -#include -#include +#include "hotswap_loader_test_utils.hpp" namespace { -struct FakeEnv { - std::string agent_isa = "amdgcn-amd-amdhsa--gfx1250"; - uint32_t asic_revision = 0; - - int retarget_calls = 0; - - int retarget_status = 0; - - uint64_t next_reader_handle = 100; - uint64_t last_loaded_reader = 0; - - std::string retarget_source_isa; - std::string retarget_target_isa; -}; - -FakeEnv g_env; - -} // namespace - -#include "../hotswap_tool.cpp" - -namespace rocr::hotswap { - -// The production helper uses COMGR to read a real code object's ISA metadata. -// This test double only supplies a source ISA so the test can stay focused on -// loader policy without linking COMGR. -std::string GetCodeObjectIsaName(const void *elf_data, size_t elf_size) { - if (!elf_data || elf_size == 0) { - return {}; - } - - constexpr char IsaPrefix[] = "amdgcn-amd-amdhsa--"; - const char *begin = static_cast(elf_data); - const char *end = begin + elf_size; - const char *match = std::search(begin, end, IsaPrefix, - IsaPrefix + sizeof(IsaPrefix) - 1); - if (match == end) { - return {}; - } - - const char *limit = match; - while (limit != end) { - const char c = *limit; - if (c == '\0' || c == '\'' || c == '"' || c == '\n' || c == '\r' || - c == ' ' || c == '\t') { - break; - } - ++limit; - } - return std::string(match, limit); -} - -int RetargetCodeObject(const void *elf_data, size_t elf_size, - const char *source_isa, const char *target_isa, - void **out_data, size_t *out_size) { - ++g_env.retarget_calls; - g_env.retarget_source_isa = source_isa ? source_isa : ""; - g_env.retarget_target_isa = target_isa ? target_isa : ""; - - if (!out_data || !out_size) { - return -1; - } - *out_data = const_cast(elf_data); - *out_size = elf_size; - - if (g_env.retarget_status != 0) { - return g_env.retarget_status; - } - - void *copy = std::malloc(elf_size); - if (!copy) { - return -1; - } - std::memcpy(copy, elf_data, elf_size); - *out_data = copy; - *out_size = elf_size; - return 0; -} - -} // namespace rocr::hotswap - -extern "C" { - -hsa_status_t hsa_agent_iterate_isas(hsa_agent_t /*agent*/, - hsa_status_t (*callback)(hsa_isa_t isa, - void *data), - void *data) { - hsa_isa_t isa{}; - isa.handle = 1; - const hsa_status_t status = callback(isa, data); - return status == HSA_STATUS_INFO_BREAK ? HSA_STATUS_SUCCESS : status; -} - -hsa_status_t hsa_isa_get_info_alt(hsa_isa_t /*isa*/, hsa_isa_info_t attribute, - void *value) { - if (attribute == HSA_ISA_INFO_NAME_LENGTH) { - *static_cast(value) = - static_cast(g_env.agent_isa.size() + 1); - return HSA_STATUS_SUCCESS; - } - if (attribute == HSA_ISA_INFO_NAME) { - std::memcpy(value, g_env.agent_isa.c_str(), g_env.agent_isa.size() + 1); - return HSA_STATUS_SUCCESS; - } - return HSA_STATUS_ERROR; -} - -hsa_status_t hsa_agent_get_info(hsa_agent_t /*agent*/, - hsa_agent_info_t attribute, void *value) { - if (attribute == - static_cast(HSA_AMD_AGENT_INFO_ASIC_REVISION)) { - *static_cast(value) = g_env.asic_revision; - return HSA_STATUS_SUCCESS; - } - return HSA_STATUS_ERROR; -} - -} // extern "C" - -namespace { - -constexpr const char *kGfx1250Isa = "amdgcn-amd-amdhsa--gfx1250"; -constexpr const char *kGfx942Isa = "amdgcn-amd-amdhsa--gfx942"; -constexpr const char *kGfx1251Isa = "amdgcn-amd-amdhsa--gfx1251"; -constexpr const char *kGfx12_5GenericIsa = - "amdgcn-amd-amdhsa--gfx12-5-generic"; -constexpr const char *kGfx1250B0Isa = - "amdgcn-amd-amdhsa--gfx1250:gfx1250-b0-specific+"; -constexpr const char *kGfx1250A0Isa = - "amdgcn-amd-amdhsa--gfx1250:gfx1250-b0-specific-"; - -int tests_passed = 0; -int tests_failed = 0; - -void check(bool cond, const char *name) { - if (cond) { - ++tests_passed; - std::printf(" PASS: %s\n", name); - } else { - ++tests_failed; - std::fprintf(stderr, " FAIL: %s\n", name); - } -} - -void set_gfx12_5_rewrite_env(const char *value) { -#ifdef _WIN32 - _putenv_s("AMD_COMGR_HOTSWAP_ENTRY_TRAMPOLINES", value ? value : ""); -#else - if (value) { - setenv("AMD_COMGR_HOTSWAP_ENTRY_TRAMPOLINES", value, 1); - } else { - unsetenv("AMD_COMGR_HOTSWAP_ENTRY_TRAMPOLINES"); - } -#endif -} - -void reset_state() { - { - std::scoped_lock lock(g_reader_map_mutex); - g_reader_map.clear(); - } - { - std::scoped_lock lock(g_rewritten_elfs_mutex); - g_rewritten_elfs.clear(); - } - g_core_table = nullptr; - g_orig_reader_create_from_memory = nullptr; - g_orig_reader_create_from_file = nullptr; - g_orig_reader_destroy = nullptr; - g_orig_load_agent_code_object = nullptr; - rocr::hotswap::reset_gfx_revision_cache(); - set_gfx12_5_rewrite_env(nullptr); - g_env = FakeEnv{}; -} - -std::vector make_code_object(const std::string &isa) { - const std::string metadata = "---\namdhsa.target: '" + isa + "'\n"; - return std::vector(metadata.begin(), metadata.end()); -} - -hsa_status_t HSA_API fake_reader_create_from_memory( - const void * /*code_object*/, size_t /*size*/, - hsa_code_object_reader_t *code_object_reader) { - code_object_reader->handle = g_env.next_reader_handle++; - return HSA_STATUS_SUCCESS; -} - -hsa_status_t HSA_API fake_reader_create_from_file( - hsa_file_t /*file*/, hsa_code_object_reader_t *code_object_reader) { - code_object_reader->handle = g_env.next_reader_handle++; - return HSA_STATUS_SUCCESS; -} - -hsa_status_t HSA_API -fake_reader_destroy(hsa_code_object_reader_t code_object_reader) { - (void)code_object_reader; - return HSA_STATUS_SUCCESS; -} - -hsa_status_t HSA_API fake_load_agent_code_object( - hsa_executable_t /*executable*/, hsa_agent_t /*agent*/, - hsa_code_object_reader_t code_object_reader, const char * /*options*/, - hsa_loaded_code_object_t *loaded_code_object) { - g_env.last_loaded_reader = code_object_reader.handle; - if (loaded_code_object) { - loaded_code_object->handle = 0xC0DE; - } - return HSA_STATUS_SUCCESS; -} - -CoreApiTable install_tool() { - CoreApiTable core{}; - core.hsa_code_object_reader_create_from_memory_fn = - fake_reader_create_from_memory; - core.hsa_code_object_reader_create_from_file_fn = fake_reader_create_from_file; - core.hsa_code_object_reader_destroy_fn = fake_reader_destroy; - core.hsa_executable_load_agent_code_object_fn = fake_load_agent_code_object; - - HsaApiTable table{}; - table.core_ = &core; - check(OnLoad(&table, 0, 0, nullptr), "OnLoad installs with complete table"); - return core; -} - -hsa_agent_t fake_agent() { - hsa_agent_t agent{}; - agent.handle = 42; - return agent; -} - -hsa_code_object_reader_t create_memory_reader(CoreApiTable &core, - const std::vector &elf) { - hsa_code_object_reader_t reader{}; - const hsa_status_t status = core.hsa_code_object_reader_create_from_memory_fn( - elf.data(), elf.size(), &reader); - check(status == HSA_STATUS_SUCCESS, "memory reader creation succeeds"); - return reader; -} - -hsa_status_t load_reader(CoreApiTable &core, hsa_code_object_reader_t reader) { - hsa_loaded_code_object_t loaded{}; - return core.hsa_executable_load_agent_code_object_fn( - hsa_executable_t{}, fake_agent(), reader, nullptr, &loaded); -} - -void begin_test(const char *name, const char *description) { - std::printf("TEST %s...\n %s\n", name, description); -} - -struct LoadResult { - hsa_status_t status = HSA_STATUS_SUCCESS; - uint64_t original_reader = 0; - uint64_t loaded_reader = 0; - int retarget_calls = 0; - std::string source_isa; - std::string target_isa; - size_t retained_elfs = 0; -}; - -LoadResult load_once(const char *code_object_isa, const char *agent_isa, - const char *gfx12_5_rewrite_flag, - uint32_t asic_revision = 1, - int retarget_status = 0) { - reset_state(); - set_gfx12_5_rewrite_env(gfx12_5_rewrite_flag); - g_env.agent_isa = agent_isa; - g_env.asic_revision = asic_revision; - g_env.retarget_status = retarget_status; - - CoreApiTable core = install_tool(); - std::vector elf = make_code_object(code_object_isa); - const hsa_code_object_reader_t reader = create_memory_reader(core, elf); - - LoadResult result; - result.original_reader = reader.handle; - result.status = load_reader(core, reader); - result.loaded_reader = g_env.last_loaded_reader; - result.retarget_calls = g_env.retarget_calls; - result.source_isa = g_env.retarget_source_isa; - result.target_isa = g_env.retarget_target_isa; - result.retained_elfs = g_rewritten_elfs.size(); - return result; -} - -void check_original_load(const LoadResult &result, const char *name) { - check(result.status == HSA_STATUS_SUCCESS, "load succeeds"); - check(result.retarget_calls == 0, name); - check(result.loaded_reader == result.original_reader, - "original reader is loaded"); -} - void test_OptInDisabledLoadsOriginal() { begin_test("OptInDisabledLoadsOriginal", "Unset, empty, and 0 opt-in values must leave non-A0 gfx1250 " diff --git a/projects/hotswap/tests/hotswap_loader_test_utils.hpp b/projects/hotswap/tests/hotswap_loader_test_utils.hpp new file mode 100644 index 000000000000..6fc202e5bff1 --- /dev/null +++ b/projects/hotswap/tests/hotswap_loader_test_utils.hpp @@ -0,0 +1,317 @@ +//===- hotswap_loader_test_utils.hpp - Loader test support ----------------===// +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef ROCR_HOTSWAP_LOADER_TEST_UTILS_HPP +#define ROCR_HOTSWAP_LOADER_TEST_UTILS_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace { + +struct FakeEnv { + std::string agent_isa = "amdgcn-amd-amdhsa--gfx1250"; + uint32_t asic_revision = 0; + int retarget_calls = 0; + int retarget_status = 0; + uint64_t next_reader_handle = 100; + uint64_t last_loaded_reader = 0; + std::string retarget_source_isa; + std::string retarget_target_isa; +}; + +FakeEnv g_env; +CoreApiTable g_test_core; + +} // namespace + +#include "../hotswap_tool.cpp" + +namespace rocr::hotswap { + +// The production helper uses COMGR to read a real code object's ISA metadata. +// This test double only supplies a source ISA so the test can stay focused on +// loader policy without linking COMGR. +std::string GetCodeObjectIsaName(const void *elf_data, size_t elf_size) { + if (!elf_data || elf_size == 0) { + return {}; + } + + constexpr char IsaPrefix[] = "amdgcn-amd-amdhsa--"; + const char *begin = static_cast(elf_data); + const char *end = begin + elf_size; + const char *match = std::search(begin, end, IsaPrefix, + IsaPrefix + sizeof(IsaPrefix) - 1); + if (match == end) { + return {}; + } + + const char *limit = match; + while (limit != end) { + const char c = *limit; + if (c == '\0' || c == '\'' || c == '"' || c == '\n' || c == '\r' || + c == ' ' || c == '\t') { + break; + } + ++limit; + } + return std::string(match, limit); +} + +int RetargetCodeObject(const void *elf_data, size_t elf_size, + const char *source_isa, const char *target_isa, + void **out_data, size_t *out_size) { + ++g_env.retarget_calls; + g_env.retarget_source_isa = source_isa ? source_isa : ""; + g_env.retarget_target_isa = target_isa ? target_isa : ""; + + if (!out_data || !out_size) { + return -1; + } + *out_data = const_cast(elf_data); + *out_size = elf_size; + + if (g_env.retarget_status != 0) { + return g_env.retarget_status; + } + + void *copy = std::malloc(elf_size); + if (!copy) { + return -1; + } + std::memcpy(copy, elf_data, elf_size); + *out_data = copy; + *out_size = elf_size; + return 0; +} + +} // namespace rocr::hotswap + +extern "C" { + +hsa_status_t hsa_agent_iterate_isas(hsa_agent_t /*agent*/, + hsa_status_t (*callback)(hsa_isa_t isa, + void *data), + void *data) { + hsa_isa_t isa{}; + isa.handle = 1; + const hsa_status_t status = callback(isa, data); + return status == HSA_STATUS_INFO_BREAK ? HSA_STATUS_SUCCESS : status; +} + +hsa_status_t hsa_isa_get_info_alt(hsa_isa_t /*isa*/, hsa_isa_info_t attribute, + void *value) { + if (attribute == HSA_ISA_INFO_NAME_LENGTH) { + *static_cast(value) = + static_cast(g_env.agent_isa.size() + 1); + return HSA_STATUS_SUCCESS; + } + if (attribute == HSA_ISA_INFO_NAME) { + std::memcpy(value, g_env.agent_isa.c_str(), g_env.agent_isa.size() + 1); + return HSA_STATUS_SUCCESS; + } + return HSA_STATUS_ERROR; +} + +hsa_status_t hsa_agent_get_info(hsa_agent_t /*agent*/, + hsa_agent_info_t attribute, void *value) { + if (attribute == + static_cast(HSA_AMD_AGENT_INFO_ASIC_REVISION)) { + *static_cast(value) = g_env.asic_revision; + return HSA_STATUS_SUCCESS; + } + return HSA_STATUS_ERROR; +} + +} // extern "C" + +namespace { + +constexpr const char *kGfx1250Isa = "amdgcn-amd-amdhsa--gfx1250"; +constexpr const char *kGfx942Isa = "amdgcn-amd-amdhsa--gfx942"; +constexpr const char *kGfx1251Isa = "amdgcn-amd-amdhsa--gfx1251"; +constexpr const char *kGfx12_5GenericIsa = + "amdgcn-amd-amdhsa--gfx12-5-generic"; +constexpr const char *kGfx1250B0Isa = + "amdgcn-amd-amdhsa--gfx1250:gfx1250-b0-specific+"; +constexpr const char *kGfx1250A0Isa = + "amdgcn-amd-amdhsa--gfx1250:gfx1250-b0-specific-"; + +int tests_passed = 0; +int tests_failed = 0; + +void check(bool cond, const char *name) { + if (cond) { + ++tests_passed; + std::printf(" PASS: %s\n", name); + } else { + ++tests_failed; + std::fprintf(stderr, " FAIL: %s\n", name); + } +} + +void begin_test(const char *name, const char *description) { + std::printf("TEST %s...\n %s\n", name, description); +} + +void set_gfx12_5_rewrite_env(const char *value) { +#ifdef _WIN32 + _putenv_s("AMD_COMGR_HOTSWAP_ENTRY_TRAMPOLINES", value ? value : ""); +#else + if (value) { + setenv("AMD_COMGR_HOTSWAP_ENTRY_TRAMPOLINES", value, 1); + } else { + unsetenv("AMD_COMGR_HOTSWAP_ENTRY_TRAMPOLINES"); + } +#endif +} + +void reset_state() { + { + std::scoped_lock lock(g_reader_map_mutex); + g_reader_map.clear(); + } + { + std::scoped_lock lock(g_rewritten_elfs_mutex); + g_rewritten_elfs.clear(); + } + g_core_table = nullptr; + g_orig_reader_create_from_memory = nullptr; + g_orig_reader_create_from_file = nullptr; + g_orig_reader_destroy = nullptr; + g_orig_load_agent_code_object = nullptr; + rocr::hotswap::reset_gfx_revision_cache(); + set_gfx12_5_rewrite_env(nullptr); + g_env = FakeEnv{}; +} + +std::vector make_code_object(const std::string &isa) { + const std::string metadata = "---\namdhsa.target: '" + isa + "'\n"; + return std::vector(metadata.begin(), metadata.end()); +} + +hsa_status_t HSA_API fake_reader_create_from_memory( + const void * /*code_object*/, size_t /*size*/, + hsa_code_object_reader_t *code_object_reader) { + code_object_reader->handle = g_env.next_reader_handle++; + return HSA_STATUS_SUCCESS; +} + +hsa_status_t HSA_API fake_reader_create_from_file( + hsa_file_t /*file*/, hsa_code_object_reader_t *code_object_reader) { + code_object_reader->handle = g_env.next_reader_handle++; + return HSA_STATUS_SUCCESS; +} + +hsa_status_t HSA_API +fake_reader_destroy(hsa_code_object_reader_t code_object_reader) { + (void)code_object_reader; + return HSA_STATUS_SUCCESS; +} + +hsa_status_t HSA_API fake_load_agent_code_object( + hsa_executable_t /*executable*/, hsa_agent_t /*agent*/, + hsa_code_object_reader_t code_object_reader, const char * /*options*/, + hsa_loaded_code_object_t *loaded_code_object) { + g_env.last_loaded_reader = code_object_reader.handle; + if (loaded_code_object) { + loaded_code_object->handle = 0xC0DE; + } + return HSA_STATUS_SUCCESS; +} + +CoreApiTable &install_tool() { + g_test_core = CoreApiTable{}; + g_test_core.hsa_code_object_reader_create_from_memory_fn = + fake_reader_create_from_memory; + g_test_core.hsa_code_object_reader_create_from_file_fn = + fake_reader_create_from_file; + g_test_core.hsa_code_object_reader_destroy_fn = fake_reader_destroy; + g_test_core.hsa_executable_load_agent_code_object_fn = + fake_load_agent_code_object; + + HsaApiTable table{}; + table.core_ = &g_test_core; + check(OnLoad(&table, 0, 0, nullptr), "OnLoad installs with complete table"); + return g_test_core; +} + +hsa_agent_t fake_agent() { + hsa_agent_t agent{}; + agent.handle = 42; + return agent; +} + +hsa_code_object_reader_t create_memory_reader(CoreApiTable &core, + const std::vector &elf) { + hsa_code_object_reader_t reader{}; + const hsa_status_t status = core.hsa_code_object_reader_create_from_memory_fn( + elf.data(), elf.size(), &reader); + check(status == HSA_STATUS_SUCCESS, "memory reader creation succeeds"); + return reader; +} + +hsa_status_t load_reader(CoreApiTable &core, hsa_code_object_reader_t reader) { + hsa_loaded_code_object_t loaded{}; + return core.hsa_executable_load_agent_code_object_fn( + hsa_executable_t{}, fake_agent(), reader, nullptr, &loaded); +} + +struct LoadResult { + hsa_status_t status = HSA_STATUS_SUCCESS; + uint64_t original_reader = 0; + uint64_t loaded_reader = 0; + int retarget_calls = 0; + std::string source_isa; + std::string target_isa; + size_t retained_elfs = 0; +}; + +LoadResult load_once(const char *code_object_isa, const char *agent_isa, + const char *gfx12_5_rewrite_flag, + uint32_t asic_revision = 1, + int retarget_status = 0) { + reset_state(); + set_gfx12_5_rewrite_env(gfx12_5_rewrite_flag); + g_env.agent_isa = agent_isa; + g_env.asic_revision = asic_revision; + g_env.retarget_status = retarget_status; + + CoreApiTable &core = install_tool(); + std::vector elf = make_code_object(code_object_isa); + const hsa_code_object_reader_t reader = create_memory_reader(core, elf); + + LoadResult result; + result.original_reader = reader.handle; + result.status = load_reader(core, reader); + result.loaded_reader = g_env.last_loaded_reader; + result.retarget_calls = g_env.retarget_calls; + result.source_isa = g_env.retarget_source_isa; + result.target_isa = g_env.retarget_target_isa; + result.retained_elfs = g_rewritten_elfs.size(); + return result; +} + +void check_original_load(const LoadResult &result, const char *name) { + check(result.status == HSA_STATUS_SUCCESS, "load succeeds"); + check(result.retarget_calls == 0, name); + check(result.loaded_reader == result.original_reader, + "original reader is loaded"); +} + +} // namespace + +#endif // ROCR_HOTSWAP_LOADER_TEST_UTILS_HPP From f1ab65def9d18648d62d2843b7dbc45e5e99f881 Mon Sep 17 00:00:00 2001 From: harsh-amd <149193359+harsh-amd@users.noreply.github.com> Date: Mon, 22 Jun 2026 06:02:19 -0700 Subject: [PATCH 16/16] rocr: remove loader-side gfx125x entry trampolines --- .../runtime/hsa-runtime/loader/executable.cpp | 145 ------------------ .../runtime/hsa-runtime/loader/executable.hpp | 12 -- 2 files changed, 157 deletions(-) diff --git a/projects/rocr-runtime/runtime/hsa-runtime/loader/executable.cpp b/projects/rocr-runtime/runtime/hsa-runtime/loader/executable.cpp index 6e1a8efac1ed..6b37c3a6b2f8 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/loader/executable.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/loader/executable.cpp @@ -175,67 +175,6 @@ void LoaderOptions::PrintHelp(std::ostream& out) const static const char *LOADER_DUMP_PREFIX = "amdcode"; -// Kernel-entry trampoline (gfx125x / RDNA4). -// -// We cannot reserve space immediately in front of each kernel entry: that would -// require a non-uniform relayout of the loaded code segment, which breaks every -// intra-segment PC-relative reference the compiler baked in. Instead we allocate -// a separate *executable* region (AMDGPU_HSA_SEGMENT_CODE_AGENT, which carries -// AllocateExecutable in the loader context) and, for each kernel, emit a stub -// that jumps to the real entry; the kernel descriptor's entry offset is then -// rewritten so dispatch lands in the stub first. -// -// The jump is absolute (the pool is not within S_BRANCH range of the code), so -// the stub does a global cache writeback (SCOPE_CU) and a v_nop, then loads the -// 64-bit entry address into a scratch SGPR pair and sets PC. -// s[100:101] is a safe fixed scratch: RDNA gives every wave 128 physical SGPRs and -// these indices are well above the preloaded user+system SGPRs (<= ~20), so they -// are never a live kernel input -- the kernel writes them before it reads them. -// -// gfx1250 encodings verified with: llvm-mc --arch=amdgcn --mcpu=gfx1250 --show-encoding -// global_wb -> 0xEE0B007C, 0x00000000, 0x00000000 -// v_nop (padding) -> 0x7E000000 -// s_mov_b32 s100, + literal -> 0xBEE400FF -// s_mov_b32 s101, + literal -> 0xBEE500FF -// s_set_pc_i64 s[100:101] -> 0xBE804864 -// s_code_end (padding) -> 0xBF9F0000 -static constexpr size_t kTrampolineStubStride = AMD_ISA_ALIGN_BYTES; // 256: one stub, entry-aligned - -// The CP (CPC) instruction-prefetches forward from a kernel's entry PC when it -// dispatches. Because dispatch now lands on a stub inside our pool, that prefetch -// reads ahead from the stub and would run off the end of the pool into the next, -// unmapped page -- a CPC read page/permission fault (observed on gfx1250). The -// prefetch length is per-kernel: COMPUTE_PGM_RSRC3.INST_PREF_SIZE (6 bits, GFX11+) -// counts 128-byte instruction-cache lines to prefetch ahead of the entry. We size -// a trailing guard from the largest INST_PREF_SIZE in the pool so the prefetch from -// any stub always lands in mapped, readable memory inside this same allocation. The -// guard is never executed (the stub sets PC away first); it only needs to be present -// and readable, which the allocation's zero-fill already guarantees. -static constexpr size_t kInstPrefUnitBytes = 128; // GFX11+ CP I$ prefetch line size - -static void BuildTrampolineGfx1250(uint8_t* buf, uint64_t target) { - auto* w = reinterpret_cast(buf); - - w[0] = 0xEE0B007C; // global_wb - w[1] = 0x00000000; // : - w[2] = 0x00000000; // : - w[3] = 0x7E000000; // v_nop (padding) - w[4] = 0xBEE400FF; // s_mov_b32 s100, target_lo - w[5] = static_cast(target); - w[6] = 0xBEE500FF; // s_mov_b32 s101, target_hi - w[7] = static_cast(target >> 32); - w[8] = 0xBE804864; // s_set_pc_i64 s[100:101] - for (size_t i = 9; i < kTrampolineStubStride / sizeof(uint32_t); ++i) - w[i] = 0xBF9F0000; // s_code_end (prefetch-safe padding) -} - -// gfx12.5 family: CO v3+ reports either a generic mach name (gfx12-5-generic) or -// discrete targets (gfx1250, gfx1251, …) in the amdgcn-amd-amdhsa-- ISA string. -static bool CodeObjectIsaIsGfx125Family(const std::string& codeIsa) { - if (codeIsa.find("gfx12-5-generic") != std::string::npos) return true; - return codeIsa.find("gfx125") != std::string::npos; -} - Loader* Loader::Create(Context* context) { return new AmdHsaCodeLoader(context); @@ -1316,11 +1255,6 @@ hsa_status_t ExecutableImpl::LoadCodeObject( return HSA_STATUS_ERROR_INVALID_CODE_OBJECT; } - // Kernel-entry trampolines (gfx125x). Gate on this code object's ISA and reset - // the per-object fixup list collected by LoadDefinitionSymbol. - trampoline_enabled_gfx125x_ = CodeObjectIsaIsGfx125Family(codeIsa); - kd_fixups_.clear(); - uint32_t majorVersion, minorVersion; if (!code->GetCodeObjectVersion(&majorVersion, &minorVersion)) { logger_ << "LoaderError: failed to determine code object's version\n"; @@ -1381,14 +1315,6 @@ hsa_status_t ExecutableImpl::LoadCodeObject( status = ApplyRelocations(agent, code.get()); if (status != HSA_STATUS_SUCCESS) { return status; } - // Emit kernel-entry trampolines into the host shadow now that the image is - // final (post-relocation) and still unfrozen. The single Freeze DMA carries - // them to device along with the rewritten descriptors. - if (trampoline_enabled_gfx125x_ && !kd_fixups_.empty()) { - status = InstallTrampolinesGfx125x(agent); - if (status != HSA_STATUS_SUCCESS) { return status; } - } - code.reset(); if (loaderOptions.DumpAll()->is_set() || loaderOptions.DumpExec()->is_set()) { @@ -1499,59 +1425,6 @@ hsa_status_t ExecutableImpl::LoadSegmentV2(const code::Segment *data_segment, return HSA_STATUS_SUCCESS; } -hsa_status_t ExecutableImpl::InstallTrampolinesGfx125x(hsa_agent_t agent) { - const size_t n = kd_fixups_.size(); - - // Size the trailing prefetch guard from the largest CP instruction-prefetch - // window among this pool's kernels (INST_PREF_SIZE lines * 128 B). The forward - // prefetch from the last stub reaches its_entry + INST_PREF_SIZE*128; since that - // stub's own slot (one stub stride) already lies inside the pool, only the - // remainder, (INST_PREF_SIZE*128 - stub_size), can spill past the pool and needs - // a guard. (Clamp to 0 when the window fits within a stub slot.) - uint32_t max_pref_lines = 0; - for (const auto& f : kd_fixups_) - max_pref_lines = std::max(max_pref_lines, f.inst_pref); - const size_t pref_bytes = static_cast(max_pref_lines) * kInstPrefUnitBytes; - const size_t guard = - pref_bytes > kTrampolineStubStride ? pref_bytes - kTrampolineStubStride : 0; - const size_t pool = n * kTrampolineStubStride + guard; - - // AMDGPU_HSA_SEGMENT_CODE_AGENT yields *executable* device memory: the loader - // context backs it with RegionMemory(..., is_code=true), which sets - // core::MemoryRegion::AllocateExecutable (see amd_loader_context.cpp). - void* ptr = context_->SegmentAlloc(AMDGPU_HSA_SEGMENT_CODE_AGENT, agent, pool, - AMD_ISA_ALIGN_BYTES, /*zero=*/true); - if (!ptr) return HSA_STATUS_ERROR_OUT_OF_RESOURCES; - - // vaddr == 0: Address()/Copy() index by raw byte offset into the pool. - auto tramp = std::make_shared(this, agent, AMDGPU_HSA_SEGMENT_CODE_AGENT, - ptr, pool, /*vaddr=*/0, /*storage_offset=*/0); - objects.push_back(tramp); // freed via Destroy() in ~ExecutableImpl - trampoline_segments_.push_back(tramp); // frozen in ExecutableImpl::Freeze - - for (size_t i = 0; i < n; ++i) { - const KdFixup& f = kd_fixups_[i]; - const uint64_t stub_off = i * kTrampolineStubStride; - // Device addresses are valid pre-Freeze (RegionMemory::ptr_ is set at alloc). - const uint64_t kd_dev = reinterpret_cast(f.code_seg->Address(f.kd_vaddr)); - const uint64_t entry_dev = reinterpret_cast(f.code_seg->Address(f.kd_vaddr + f.entry_off)); - const uint64_t stub_dev = reinterpret_cast(tramp->Address(stub_off)); - - uint8_t blob[kTrampolineStubStride]; - BuildTrampolineGfx1250(blob, entry_dev); // stub jumps to the real entry - tramp->Copy(stub_off, blob, sizeof(blob)); // -> trampoline host shadow - - // Redirect dispatch onto the stub: kernel_object(kd_dev) + new_off == stub. - int64_t new_off = static_cast(stub_dev) - static_cast(kd_dev); - f.code_seg->Copy(f.kd_vaddr + llvm::amdhsa::KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET, - &new_off, sizeof(new_off)); // -> code host shadow - } - - // The prefetch guard is left as the allocation's zero-fill (zero=true): it is - // committed and readable -- all the CP prefetch needs -- and is never executed. - return HSA_STATUS_SUCCESS; -} - hsa_status_t ExecutableImpl::LoadSymbol(hsa_agent_t agent, code::Symbol* sym, uint32_t majorVersion) @@ -1600,17 +1473,6 @@ hsa_status_t ExecutableImpl::LoadDefinitionSymbol(hsa_agent_t agent, llvm::amdhsa::kernel_descriptor_t kd; sym->GetSection()->getData(sym->SectionOffset(), &kd, sizeof(kd)); - if (trampoline_enabled_gfx125x_) { - // Record this descriptor; the trampoline is installed after relocations. - // sym->VAddr() is the descriptor's ELF vaddr (matches SymbolAddress below). - // INST_PREF_SIZE (GFX11+) = number of 128B I$ lines the CP prefetches ahead - // of the entry; captured here to size the trampoline's prefetch guard. - uint32_t inst_pref = AMDHSA_BITS_GET(kd.compute_pgm_rsrc3, - rocr::llvm::amdhsa::COMPUTE_PGM_RSRC3_GFX10_PLUS_INST_PREF_SIZE); - kd_fixups_.push_back({ SymbolSegment(agent, sym), sym->VAddr(), - kd.kernel_code_entry_byte_offset, inst_pref }); - } - uint32_t kernarg_segment_size = kd.kernarg_size; // FIXME: If 0 then the compiler is not specifying the size. uint32_t kernarg_segment_alignment = 16; // FIXME: Use the minumum HSA required alignment. uint32_t group_segment_size = kd.group_segment_fixed_size; @@ -2092,13 +1954,6 @@ hsa_status_t ExecutableImpl::Freeze(const char *options) { } } - // Trampoline pools are not part of any LoadedCodeObject's segment list - // (that must stay size==1 for v2+); freeze them explicitly so their host->device - // DMA and code-cache invalidation happen alongside the code segments. - for (auto &ts : trampoline_segments_) { - ts->Freeze(); - } - state_ = HSA_EXECUTABLE_STATE_FROZEN; return HSA_STATUS_SUCCESS; } diff --git a/projects/rocr-runtime/runtime/hsa-runtime/loader/executable.hpp b/projects/rocr-runtime/runtime/hsa-runtime/loader/executable.hpp index b17c2da5410c..9d8a238fb1f5 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/loader/executable.hpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/loader/executable.hpp @@ -623,10 +623,6 @@ friend class AmdHsaCodeLoader; Segment* SymbolSegment(hsa_agent_t agent, amd::hsa::code::Symbol* sym); Segment* SectionSegment(hsa_agent_t agent, amd::hsa::code::Section* sec); - // gfx125x: allocate a separate executable region and emit, per kernel, a stub - // that jumps to the real entry, then redirect the kernel descriptor to it. - hsa_status_t InstallTrampolinesGfx125x(hsa_agent_t agent); - amd::hsa::common::ReaderWriterLock rw_lock_; hsa_profile_t profile_; Context *context_; @@ -641,14 +637,6 @@ friend class AmdHsaCodeLoader; std::vector> objects; std::shared_ptr program_allocation_segment; std::vector> loaded_code_objects; - - // Kernel-entry trampolines (gfx125x). - // kd_fixups_ is collected per-LoadCodeObject; trampoline_segments_ persists for - // the lifetime of the executable so it can be frozen and destroyed normally. - struct KdFixup { Segment* code_seg; uint64_t kd_vaddr; int64_t entry_off; uint32_t inst_pref; }; - bool trampoline_enabled_gfx125x_ = false; - std::vector kd_fixups_; - std::vector> trampoline_segments_; }; class AmdHsaCodeLoader : public Loader {