diff --git a/projects/clr/rocclr/device/hotswap.hpp b/projects/clr/rocclr/device/hotswap.hpp index 09366a64848..5eae921eae1 100644 --- a/projects/clr/rocclr/device/hotswap.hpp +++ b/projects/clr/rocclr/device/hotswap.hpp @@ -27,7 +27,7 @@ namespace amd { namespace hotswap { // On when this tool is loaded via HSA_TOOLS_LIB (name must match ROCR LoadTools). -inline constexpr const char* kHotswapToolLib = "libamd_comgr_hotswap_tool.so"; +inline constexpr const char* kHotswapToolLib = "libhsa-hotswap.so"; inline bool Enabled() { const char* tools_lib = std::getenv("HSA_TOOLS_LIB"); diff --git a/projects/hotswap/CMakeLists.txt b/projects/hotswap/CMakeLists.txt index 6710d980717..d71ae9143be 100644 --- a/projects/hotswap/CMakeLists.txt +++ b/projects/hotswap/CMakeLists.txt @@ -1,6 +1,8 @@ cmake_minimum_required(VERSION 3.16) project(hotswap LANGUAGES CXX) +include(GNUInstallDirs) + set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) @@ -10,6 +12,7 @@ set(HSA_RUNTIME_INC "${CMAKE_CURRENT_SOURCE_DIR}/../rocr-runtime/runtime/hsa-run # COMGR is required — provides amd_comgr_hotswap_rewrite. find_package(amd_comgr CONFIG REQUIRED) +find_package(hsa-runtime64 CONFIG REQUIRED) find_path(HSA_INCLUDE_DIR hsa.h PATHS ${HSA_RUNTIME_INC} NO_DEFAULT_PATH REQUIRED) @@ -36,15 +39,35 @@ target_include_directories(hsa-hotswap PRIVATE ${HSA_RUNTIME_INC}/.. ) -target_link_libraries(hsa-hotswap PRIVATE amd_comgr) +target_link_libraries(hsa-hotswap PRIVATE + amd_comgr + hsa-runtime64::hsa-runtime64 +) set_target_properties(hsa-hotswap PROPERTIES POSITION_INDEPENDENT_CODE ON) +install(TARGETS hsa-hotswap + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) + # Test executable add_executable(hotswap_test tests/hotswap_test.cpp hotswap.cpp) target_include_directories(hotswap_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) target_link_libraries(hotswap_test PRIVATE amd_comgr) +# Embed the gfx1250 fixture code object as a byte array so the test exercises the +# real parse + rewrite path with no GPU and no runtime file dependency (works in +# any CI). Regenerate the .hsaco per tests/fixtures/README.md. +set(_hotswap_co "${CMAKE_CURRENT_SOURCE_DIR}/tests/fixtures/gfx1250_min.hsaco") +set(_hotswap_co_hdr "${CMAKE_CURRENT_BINARY_DIR}/gfx1250_min_hsaco.h") +file(READ "${_hotswap_co}" _hotswap_co_hex HEX) +string(REGEX REPLACE "(..)" "0x\\1," _hotswap_co_arr "${_hotswap_co_hex}") +file(WRITE "${_hotswap_co_hdr}" + "// Generated from tests/fixtures/gfx1250_min.hsaco. Do not edit.\n" + "static const unsigned char kGfx1250MinCo[] = {${_hotswap_co_arr}};\n") +target_include_directories(hotswap_test PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) + # Unit tests for the gfx-target / ASIC-revision query logic. The test compiles # the portable hotswap_gfx_query.cpp unit alongside the test translation unit # and supplies its own stubs for the HSA entry points, so this target needs diff --git a/projects/hotswap/hotswap.cpp b/projects/hotswap/hotswap.cpp index 5267e3998fa..740d4f0f224 100644 --- a/projects/hotswap/hotswap.cpp +++ b/projects/hotswap/hotswap.cpp @@ -5,14 +5,51 @@ //===----------------------------------------------------------------------===// #include "hotswap.hpp" -#include +#include "amd_comgr/amd_comgr.h" #include #include #include #include +#include namespace rocr::hotswap { +std::string GetCodeObjectIsaName(const void *elf_data, size_t elf_size) { + if (!elf_data || elf_size == 0) { + return {}; + } + + amd_comgr_data_t data = {0}; + if (amd_comgr_create_data(AMD_COMGR_DATA_KIND_EXECUTABLE, &data) != + AMD_COMGR_STATUS_SUCCESS) { + return {}; + } + + std::string isa; + if (amd_comgr_set_data(data, elf_size, + static_cast(elf_data)) == + AMD_COMGR_STATUS_SUCCESS) { + size_t isa_len = 0; + if (amd_comgr_get_data_isa_name(data, &isa_len, nullptr) == + AMD_COMGR_STATUS_SUCCESS && + isa_len > 0) { + isa.resize(isa_len); + if (amd_comgr_get_data_isa_name(data, &isa_len, isa.data()) == + AMD_COMGR_STATUS_SUCCESS) { + // Reported size includes the terminating NUL. + if (!isa.empty() && isa.back() == '\0') { + isa.pop_back(); + } + } else { + isa.clear(); + } + } + } + + amd_comgr_release_data(data); + return isa; +} + int RetargetCodeObject(const void *elf_data, size_t elf_size, const char *source_isa, const char *target_isa, void **out_data, size_t *out_size) { @@ -47,7 +84,6 @@ int RetargetCodeObject(const void *elf_data, size_t elf_size, return static_cast(status); } - // Call the hotswap rewrite API. amd_comgr_data_t output = {0}; status = amd_comgr_hotswap_rewrite(input, source_isa, target_isa, &output); amd_comgr_release_data(input); diff --git a/projects/hotswap/hotswap.hpp b/projects/hotswap/hotswap.hpp index a730b0ae670..4034723b25b 100644 --- a/projects/hotswap/hotswap.hpp +++ b/projects/hotswap/hotswap.hpp @@ -8,14 +8,25 @@ #define ROCR_HOTSWAP_HPP #include +#include namespace rocr::hotswap { -/// Rewrite a code object from source_isa to target_isa via COMGR. +/// Read a code object's own ISA name via COMGR (amd_comgr_get_data_isa_name). /// -/// Called by the hotswap tools lib when the code object's ISA differs from -/// the agent's ISA, or when stepping patches are needed (e.g., B0-to-A0). -/// Delegates to COMGR's amd_comgr_hotswap_rewrite (linked directly). +/// Uses COMGR's LLVM-canonical parser, so it tracks triple normalization +/// without hand-rolled metadata parsing. Returns an empty string on failure. +std::string GetCodeObjectIsaName(const void *elf_data, size_t elf_size); + +/// Retarget a code object from source_isa to target_isa via COMGR. +/// +/// Both ISA names are supplied by the caller: source_isa typically comes from +/// the code object (see GetCodeObjectIsaName) and target_isa from the running +/// GPU (e.g. the HSA agent), but either may be overridden. COMGR's +/// amd_comgr_hotswap_rewrite (linked directly) applies whatever transformation +/// the source/target pair calls for -- same-ISA stepping patches (e.g. gfx1250 +/// B0 to A0) or cross-family transpilation -- and returns the rewritten code +/// object. If no transformation is needed, the output is a copy of the input. /// /// On success, *out_data and *out_size describe the rewritten code object. /// If *out_data differs from elf_data, it was allocated by this function diff --git a/projects/hotswap/hotswap_tool.cpp b/projects/hotswap/hotswap_tool.cpp index 444bd1caaa8..ab709d5b57b 100644 --- a/projects/hotswap/hotswap_tool.cpp +++ b/projects/hotswap/hotswap_tool.cpp @@ -21,14 +21,11 @@ #include #include #include -#include -#include #include #include #include #include #include -#include #include #include #include @@ -57,16 +54,43 @@ struct ReaderEntry { bool keepalive_after_load = false; }; +// State for the HSA_TOOLS_LIB callbacks. Ordinary file-scope statics: OnUnload +// is driven late (from HIP's atexit -> hsa_shut_down -> UnloadTools), and it +// deliberately does NOT touch any of this heap-owning state -- it only restores +// the API-table function pointers (POD). That keeps teardown safe regardless of +// C++ static-destruction order; the maps/buffers are freed by their normal +// static destructors at exit. (An earlier version clear()ed these in OnUnload, +// which crashed: by the time OnUnload runs, the static dtors have already freed +// them -> use-after-free SIGSEGV.) std::mutex g_reader_map_mutex; std::unordered_map g_reader_map; // Rewritten ELF buffers must outlive the executable because ROCR's -// LoadedCodeObjectImpl stores a raw pointer to the ELF data (used by -// debuggers, profilers, and hsa_ven_amd_loader queries). We keep them -// alive until OnUnload. +// LoadedCodeObjectImpl stores a raw pointer to the ELF data (used by debuggers, +// profilers, and hsa_ven_amd_loader queries). Those queries happen during the +// executable's life, not during teardown, so freeing them at process exit (via +// the normal static destructor) is safe. std::mutex g_rewritten_elfs_mutex; std::vector g_rewritten_elfs; +// Opt-in diagnostic logging (HSA_HOTSWAP_VERBOSE=1). Off by default so +// production stays quiet; lets us confirm which code objects are intercepted, +// gated, and rewritten. Runtime-gated (no rebuild to toggle); when off the cost +// is a single cached bool load per call site. +bool verbose() { + static const bool v = [] { + const char *e = std::getenv("HSA_HOTSWAP_VERBOSE"); + return e && e[0] && e[0] != '0'; + }(); + return v; +} + +#define HOTSWAP_LOG(...) \ + do { \ + if (verbose()) \ + fprintf(stderr, __VA_ARGS__); \ + } while (0) + CoreApiTable *g_core_table = nullptr; decltype(hsa_code_object_reader_create_from_memory) @@ -83,22 +107,6 @@ void stash_bytes(uint64_t handle, const uint8_t *data, size_t size) { g_reader_map[handle] = ReaderEntry{std::move(vec), false, false}; } -bool checked_add(size_t lhs, size_t rhs, size_t *out) { - if (lhs > std::numeric_limits::max() - rhs) { - return false; - } - *out = lhs + rhs; - return true; -} - -bool checked_mul(size_t lhs, size_t rhs, size_t *out) { - if (lhs != 0 && rhs > std::numeric_limits::max() / lhs) { - return false; - } - *out = lhs * rhs; - return true; -} - bool try_get_reader_entry(uint64_t handle, ByteVec *bytes, bool *from_file) { std::scoped_lock lock(g_reader_map_mutex); const auto it = g_reader_map.find(handle); @@ -129,145 +137,6 @@ void mark_reader_keepalive(uint64_t handle) { } } -// Validate ELF64 header and return pointer, or nullptr on failure. -const Elf64_Ehdr *validate_elf64(const uint8_t *elf, size_t size) { - if (size < sizeof(Elf64_Ehdr)) { - return nullptr; - } - const auto *ehdr = reinterpret_cast(elf); - if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0) { - return nullptr; - } - if (ehdr->e_ident[EI_CLASS] != ELFCLASS64) { - return nullptr; - } - return ehdr; -} - -bool validate_program_header_table(const Elf64_Ehdr *ehdr, size_t size) { - return ehdr->e_phoff != 0 && ehdr->e_phoff <= size && ehdr->e_phnum != 0 && - ehdr->e_phentsize >= sizeof(Elf64_Phdr); -} - -bool compute_program_header_offset(const Elf64_Ehdr *ehdr, size_t size, - uint16_t index, size_t *hdr_offset) { - size_t hdr_index_offset = 0; - if (!checked_mul(static_cast(index), - static_cast(ehdr->e_phentsize), - &hdr_index_offset) || - !checked_add(ehdr->e_phoff, hdr_index_offset, hdr_offset) || - *hdr_offset > size || sizeof(Elf64_Phdr) > size - *hdr_offset) { - return false; - } - return true; -} - -bool compute_note_segment_bounds(const Elf64_Phdr *phdr, size_t size, - size_t *note_offset, size_t *note_end) { - *note_offset = phdr->p_offset; - return *note_offset <= size && phdr->p_filesz <= size - *note_offset && - checked_add(*note_offset, phdr->p_filesz, note_end); -} - -bool compute_note_layout(size_t note_offset, size_t note_end, - const Elf64_Nhdr *nhdr, size_t *desc_off, - size_t *next_note) { - size_t raw_name_size = 0; - size_t raw_desc_size = 0; - size_t name_sz_aligned = 0; - size_t desc_sz_aligned = 0; - if (!checked_add(static_cast(nhdr->n_namesz), 3, &raw_name_size) || - !checked_add(static_cast(nhdr->n_descsz), 3, &raw_desc_size)) { - return false; - } - - name_sz_aligned = raw_name_size & ~size_t{3}; - desc_sz_aligned = raw_desc_size & ~size_t{3}; - return checked_add(note_offset, sizeof(Elf64_Nhdr), desc_off) && - checked_add(*desc_off, name_sz_aligned, desc_off) && - checked_add(*desc_off, desc_sz_aligned, next_note) && - *next_note <= note_end; -} - -// Search a single NT_AMDGPU_METADATA note descriptor for the ISA triple. -std::string find_isa_in_metadata(const char *desc, size_t desc_size) { - const char prefix[] = "amdgcn-amd-amdhsa--"; - const size_t prefix_len = sizeof(prefix) - 1; - for (size_t j = 0; j + prefix_len <= desc_size; ++j) { - if (memcmp(desc + j, prefix, prefix_len) == 0) { - size_t len = 0; - while (j + len < desc_size && desc[j + len] != '\0' && - desc[j + len] != '\n' && desc[j + len] != '\'' && - desc[j + len] != '"' && desc[j + len] != ' ') { - ++len; - } - return std::string(desc + j, len); - } - } - return {}; -} - -std::string read_elf_isa_from_note_segment(const uint8_t *elf, size_t note_offset, - size_t note_end) { - while (note_offset <= note_end && - sizeof(Elf64_Nhdr) <= note_end - note_offset) { - const auto *nhdr = reinterpret_cast(elf + note_offset); - size_t desc_off = 0; - size_t next_note = 0; - if (!compute_note_layout(note_offset, note_end, nhdr, &desc_off, - &next_note)) { - break; - } - - constexpr uint32_t NT_AMDGPU_METADATA = 32; - if (nhdr->n_type == NT_AMDGPU_METADATA && nhdr->n_descsz > 0 && - desc_off + nhdr->n_descsz <= note_end) { - const char *desc = reinterpret_cast(elf + desc_off); - std::string result = find_isa_in_metadata(desc, nhdr->n_descsz); - if (!result.empty()) { - return result; - } - } - - note_offset = next_note; - } - return {}; -} - -// Parse ELF PT_NOTE segments to find the AMDGPU ISA name from -// NT_AMDGPU_METADATA (type 32) notes in v3+ code objects. -std::string read_elf_isa_note(const uint8_t *elf, size_t size) { - const Elf64_Ehdr *ehdr = validate_elf64(elf, size); - if (!ehdr || !validate_program_header_table(ehdr, size)) { - return {}; - } - - for (uint16_t i = 0; i < ehdr->e_phnum; ++i) { - size_t hdr_offset = 0; - if (!compute_program_header_offset(ehdr, size, i, &hdr_offset)) { - break; - } - const auto *phdr = - reinterpret_cast(elf + hdr_offset); - if (phdr->p_type != PT_NOTE) { - continue; - } - - size_t note_offset = 0; - size_t note_end = 0; - if (!compute_note_segment_bounds(phdr, size, ¬e_offset, ¬e_end)) { - continue; - } - - std::string result = read_elf_isa_from_note_segment(elf, note_offset, - note_end); - if (!result.empty()) { - return result; - } - } - return {}; -} - hsa_status_t HSA_API hotswap_reader_create_from_memory( const void *code_object, size_t size, hsa_code_object_reader_t *code_object_reader) { @@ -280,6 +149,8 @@ hsa_status_t HSA_API hotswap_reader_create_from_memory( try { stash_bytes(reader.handle, static_cast(code_object), size); + HOTSWAP_LOG("hotswap: reader_create_from_memory handle=%lu size=%zu\n", + static_cast(reader.handle), size); } catch (const std::bad_alloc &) { // Fall back to the original load path without rewrite support. } @@ -318,6 +189,9 @@ hsa_status_t HSA_API hotswap_reader_create_from_file( std::scoped_lock lock(g_reader_map_mutex); g_reader_map[reader.handle] = ReaderEntry{std::move(vec), true, false}; } + HOTSWAP_LOG("hotswap: reader_create_from_file handle=%lu size=%lld\n", + static_cast(reader.handle), + static_cast(file_size)); *code_object_reader = reader; return HSA_STATUS_SUCCESS; } catch (const std::bad_alloc &) { @@ -384,23 +258,26 @@ hsa_status_t try_retarget_and_load(hsa_executable_t executable, hsa_agent_t agen const char *options, hsa_loaded_code_object_t *loaded_code_object, const ByteVec &local_bytes) { - const std::string source_isa = - read_elf_isa_note(local_bytes->data(), local_bytes->size()); + // Source ISA from the code object, target ISA from the running GPU. + const std::string source_isa = rocr::hotswap::GetCodeObjectIsaName( + local_bytes->data(), local_bytes->size()); const std::string target_isa = get_agent_isa_name(agent); - if (source_isa.empty() || target_isa.empty()) { + HOTSWAP_LOG("hotswap: rewrite SKIP empty isa (src='%s' tgt='%s' size=%zu)\n", + source_isa.c_str(), target_isa.c_str(), local_bytes->size()); return HSA_STATUS_ERROR_INVALID_CODE_OBJECT; } - // Route through RetargetCodeObject for unified logging, validation, - // and COMGR interaction. Do NOT skip when source == target: B0-to-A0 - // patching uses the same ISA name on both sides. void *out_elf = nullptr; size_t out_elf_size = 0; const int rc = rocr::hotswap::RetargetCodeObject( local_bytes->data(), local_bytes->size(), source_isa.c_str(), target_isa.c_str(), &out_elf, &out_elf_size); + HOTSWAP_LOG("hotswap: rewrite src=%s tgt=%s in=%zu rc=%d out=%zu changed=%d\n", + source_isa.c_str(), target_isa.c_str(), local_bytes->size(), rc, + out_elf_size, out_elf != local_bytes->data()); + if (rc != 0 || out_elf == local_bytes->data()) { return HSA_STATUS_ERROR_INVALID_CODE_OBJECT; } @@ -424,6 +301,10 @@ hsa_status_t HSA_API hotswap_load_agent_code_object( try_get_reader_entry(code_object_reader.handle, &local_bytes, &reader_from_file); + HOTSWAP_LOG("hotswap: load_agent_code_object handle=%lu has_bytes=%d\n", + static_cast(code_object_reader.handle), + local_bytes ? 1 : 0); + if (!local_bytes) { return load_original_reader(executable, agent, code_object_reader, options, loaded_code_object, @@ -434,6 +315,8 @@ hsa_status_t HSA_API hotswap_load_agent_code_object( // the original code object unchanged instead of routing through COMGR. const AgentGfxRevision gfx = query_agent_gfx_revision(agent); if (!gate_allows_hotswap(gfx)) { + HOTSWAP_LOG("hotswap: gate BLOCKED (gfx=%s rev=%u valid=%d)\n", + gfx.gfx_target.c_str(), gfx.asic_revision, gfx.revision_valid); return load_original_reader(executable, agent, code_object_reader, options, loaded_code_object, reader_from_file); @@ -523,16 +406,10 @@ void OnUnload() { g_orig_reader_destroy = nullptr; g_orig_load_agent_code_object = nullptr; - { - std::scoped_lock lock(g_reader_map_mutex); - g_reader_map.clear(); - } - - { - std::scoped_lock lock(g_rewritten_elfs_mutex); - g_rewritten_elfs.clear(); - } - + // Intentionally do NOT clear g_reader_map / g_rewritten_elfs here. OnUnload is + // driven from HIP's atexit hsa_shut_down, which runs AFTER this library's + // static destructors have already freed those containers; touching them here + // is a use-after-free. They are freed exactly once, by their static dtors. fprintf(stderr, "hotswap: tool unloaded\n"); } diff --git a/projects/hotswap/tests/fixtures/README.md b/projects/hotswap/tests/fixtures/README.md new file mode 100644 index 00000000000..0756c21e4c8 --- /dev/null +++ b/projects/hotswap/tests/fixtures/README.md @@ -0,0 +1,16 @@ +# HotSwap test fixtures + +## gfx1250_min.hsaco + +A minimal gfx1250 code object (empty kernel) used by `hotswap_test` to exercise +the real parse -> ISA-derivation -> rewrite path. It carries a valid +`NT_AMDGPU_METADATA` note (`amdhsa.target: amdgcn-amd-amdhsa--gfx1250`, +`.gfx1250_revision: B0`), which is what `amd_comgr_get_data_isa_name` reads. + +Regenerate with the ROCm clang: + +```bash +echo 'kernel void k(){}' > k.cl +clang -target amdgcn-amd-amdhsa -mcpu=gfx1250 -mcode-object-version=5 \ + -nogpulib -x cl -cl-std=CL2.0 -O2 k.cl -o gfx1250_min.hsaco +``` diff --git a/projects/hotswap/tests/fixtures/gfx1250_min.hsaco b/projects/hotswap/tests/fixtures/gfx1250_min.hsaco new file mode 100644 index 00000000000..edb84446d9a Binary files /dev/null and b/projects/hotswap/tests/fixtures/gfx1250_min.hsaco differ diff --git a/projects/hotswap/tests/hotswap_test.cpp b/projects/hotswap/tests/hotswap_test.cpp index 32753e2820b..710e8188525 100644 --- a/projects/hotswap/tests/hotswap_test.cpp +++ b/projects/hotswap/tests/hotswap_test.cpp @@ -5,13 +5,17 @@ //===----------------------------------------------------------------------===// #include "hotswap.hpp" +// Generated from tests/fixtures/gfx1250_min.hsaco: kGfx1250MinCo[]. Embedding it +// keeps the test GPU-free and free of any runtime file dependency (CI-portable). +#include "gfx1250_min_hsaco.h" #include #include -#include static int tests_passed = 0; static int tests_failed = 0; +static constexpr const char *kGfx1250Isa = "amdgcn-amd-amdhsa--gfx1250"; + static void check(bool cond, const char *name) { if (cond) { ++tests_passed; @@ -22,35 +26,68 @@ static void check(bool cond, const char *name) { } } -static void test_RetargetPassthrough() { - printf("TEST RetargetPassthrough...\n"); +// A real gfx1250 code object: COMGR parses its ISA name from the metadata note. +static void test_GetIsaNameRealCodeObject() { + printf("TEST GetIsaNameRealCodeObject...\n"); + const std::string isa = + rocr::hotswap::GetCodeObjectIsaName(kGfx1250MinCo, sizeof(kGfx1250MinCo)); + check(isa == kGfx1250Isa, "GetCodeObjectIsaName reads gfx1250 from a real CO"); +} + +// A 16-byte fake ELF has no metadata note: the ISA name cannot be parsed. +static void test_GetIsaNameInvalidCodeObject() { + printf("TEST GetIsaNameInvalidCodeObject...\n"); const unsigned char fake_elf[] = {0x7f, 'E', 'L', 'F', 0x02, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; + const std::string isa = + rocr::hotswap::GetCodeObjectIsaName(fake_elf, sizeof(fake_elf)); + check(isa.empty(), "GetCodeObjectIsaName returns empty for an invalid CO"); +} + +// A real gfx1250 CO rewrites successfully and yields a fresh output buffer. +static void test_RetargetRealCodeObject() { + printf("TEST RetargetRealCodeObject...\n"); void *out_data = nullptr; size_t out_size = 0; const int rc = rocr::hotswap::RetargetCodeObject( - fake_elf, sizeof(fake_elf), "amdgcn-amd-amdhsa--gfx1250", - "amdgcn-amd-amdhsa--gfx1250", &out_data, &out_size); - - check(rc == 0, "RetargetCodeObject succeeds"); - check(out_size == sizeof(fake_elf), "output size matches input"); - if (out_data && out_data != fake_elf) { - check(memcmp(out_data, fake_elf, sizeof(fake_elf)) == 0, - "output bytes match input (passthrough)"); + kGfx1250MinCo, sizeof(kGfx1250MinCo), kGfx1250Isa, kGfx1250Isa, &out_data, + &out_size); + + check(rc == 0, "RetargetCodeObject rewrites a real CO"); + check(out_data != nullptr && + out_data != static_cast(kGfx1250MinCo), + "out_data is a fresh allocation"); + check(out_size > 0, "out_size is non-zero"); + if (out_data && out_data != static_cast(kGfx1250MinCo)) { std::free(out_data); - } else { - check(out_data != nullptr && out_data != fake_elf, - "out_data is a new allocation"); } } +// An un-rewritable (fake) CO fails and leaves the original code object intact. +static void test_RetargetInvalidCodeObjectFallsBack() { + printf("TEST RetargetInvalidCodeObjectFallsBack...\n"); + const unsigned char fake_elf[] = {0x7f, 'E', 'L', 'F', 0x02, 0x01, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00}; + void *out_data = nullptr; + size_t out_size = 0; + const int rc = rocr::hotswap::RetargetCodeObject( + fake_elf, sizeof(fake_elf), kGfx1250Isa, kGfx1250Isa, &out_data, + &out_size); + + check(rc != 0, "RetargetCodeObject fails for an un-rewritable CO"); + check(out_data == static_cast(fake_elf), + "out_data still points to original input after failure"); + check(out_size == sizeof(fake_elf), + "out_size still matches original after failure"); +} + static void test_RetargetNullOutputPointers() { printf("TEST RetargetNullOutputPointers...\n"); const unsigned char fake_elf[] = {0x7f, 'E', 'L', 'F'}; const int rc = rocr::hotswap::RetargetCodeObject( - fake_elf, sizeof(fake_elf), "amdgcn-amd-amdhsa--gfx1250", - "amdgcn-amd-amdhsa--gfx1250", nullptr, nullptr); + fake_elf, sizeof(fake_elf), kGfx1250Isa, kGfx1250Isa, nullptr, nullptr); check(rc != 0, "RetargetCodeObject rejects null output pointers"); } @@ -59,66 +96,38 @@ static void test_RetargetNullInputs() { void *out_data = nullptr; size_t out_size = 0; const int rc = rocr::hotswap::RetargetCodeObject( - nullptr, 0, "amdgcn-amd-amdhsa--gfx1250", "amdgcn-amd-amdhsa--gfx1250", - &out_data, &out_size); + nullptr, 0, kGfx1250Isa, kGfx1250Isa, &out_data, &out_size); check(rc != 0, "RetargetCodeObject rejects null elf_data"); } -static void test_RetargetOutputOwnership() { - // Verifies that on success, out_data is a NEW allocation (not the input). - // This catches the bug where out_elf was stashed unconditionally — - // the caller must be able to distinguish "new buffer to free" from - // "original buffer, don't free". - printf("TEST RetargetOutputOwnership...\n"); - const unsigned char fake_elf[] = {0x7f, 'E', 'L', 'F', 0x02, 0x01, 0x01, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00}; - void *out_data = nullptr; - size_t out_size = 0; - const int rc = rocr::hotswap::RetargetCodeObject( - fake_elf, sizeof(fake_elf), "amdgcn-amd-amdhsa--gfx1250", - "amdgcn-amd-amdhsa--gfx1250", &out_data, &out_size); - - check(rc == 0, "RetargetCodeObject succeeds"); - check(out_data != static_cast(fake_elf), - "out_data is NOT the original input (new allocation)"); - check(out_data != nullptr, "out_data is non-null"); - - if (out_data && out_data != fake_elf) { - // Verify we can write to it (proves it's a heap allocation, not const) - memset(out_data, 0xAB, out_size); - std::free(out_data); - check(true, "out_data is writable and freeable"); - } -} - -static void test_RetargetFailureLeavesOriginal() { - // Verifies that on failure, out_data points to the original input - // (not a dangling pointer or null). This catches use-after-free bugs - // where the fallback buffer was freed prematurely. - printf("TEST RetargetFailureLeavesOriginal...\n"); +static void test_RetargetNullSourceOrTarget() { + printf("TEST RetargetNullSourceOrTarget...\n"); const unsigned char fake_elf[] = {0x7f, 'E', 'L', 'F'}; void *out_data = nullptr; size_t out_size = 0; - - // Use an unsupported ISA pair to force failure - const int rc = rocr::hotswap::RetargetCodeObject( - fake_elf, sizeof(fake_elf), "amdgcn-amd-amdhsa--gfx_unsupported", - "amdgcn-amd-amdhsa--gfx_unsupported", &out_data, &out_size); - - check(rc != 0, "RetargetCodeObject fails for unsupported ISA"); - check(out_data == static_cast(fake_elf), - "out_data still points to original input after failure"); - check(out_size == sizeof(fake_elf), - "out_size still matches original after failure"); + const int rc_src = rocr::hotswap::RetargetCodeObject( + fake_elf, sizeof(fake_elf), nullptr, kGfx1250Isa, &out_data, &out_size); + check(rc_src != 0, "RetargetCodeObject rejects null source_isa"); + const int rc_tgt = rocr::hotswap::RetargetCodeObject( + fake_elf, sizeof(fake_elf), kGfx1250Isa, nullptr, &out_data, &out_size); + check(rc_tgt != 0, "RetargetCodeObject rejects null target_isa"); } int main() { - test_RetargetPassthrough(); + // Line-buffer stdout so the tool's stderr diagnostics interleave next to the + // test that triggered them (e.g. under ctest -V) instead of clumping. + setvbuf(stdout, nullptr, _IOLBF, 0); + printf("Note: the negative tests below intentionally trigger tool diagnostics\n" + "on stderr (\"COMGR rewrite failed ... rc=2\", \"invalid null ...\").\n" + "These are EXPECTED; a passing run is the PASS lines + the final count.\n\n"); + + test_GetIsaNameRealCodeObject(); + test_GetIsaNameInvalidCodeObject(); + test_RetargetRealCodeObject(); + test_RetargetInvalidCodeObjectFallsBack(); test_RetargetNullOutputPointers(); test_RetargetNullInputs(); - test_RetargetOutputOwnership(); - test_RetargetFailureLeavesOriginal(); + test_RetargetNullSourceOrTarget(); printf("\n%d passed, %d failed\n", tests_passed, tests_failed); return tests_failed ? EXIT_FAILURE : EXIT_SUCCESS; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp index 9bae7f5f7db..0df56a48471 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp @@ -2811,12 +2811,12 @@ void Runtime::LoadTools() { getpid(), rocp_reg_status, rocprofiler_register_error_string(rocp_reg_status)); } - // rocprofiler-register (v3) provides tracing only; the comgr hotswap tool must + // rocprofiler-register (v3) provides tracing only; the hotswap tool must // intercept and modify HSA calls (it wraps hsa_code_object_reader_create_from_memory), // which requires the v1 HSA_TOOLS_LIB path. Allow v1 registration specifically for // that first-party tool so it loads via HSA_TOOLS_LIB alone, without re-enabling v1 // for other tools. General v1 behavior is otherwise unchanged. - static constexpr const char* kHotswapToolLib = "libamd_comgr_hotswap_tool.so"; + static constexpr const char* kHotswapToolLib = "libhsa-hotswap.so"; bool allow_v1_registration = flag().tools_lib_names().find(kHotswapToolLib) != std::string::npos; if (os::IsEnvVarSet("HSA_TOOLS_ROCPROFILER_V1_TOOLS")) {