Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion projects/clr/rocclr/device/hotswap.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ namespace amd {
namespace hotswap {

// On when this tool is loaded via HSA_TOOLS_LIB (name must match ROCR LoadTools).
inline constexpr const char* kHotswapToolLib = "libamd_comgr_hotswap_tool.so";
inline constexpr const char* kHotswapToolLib = "libhsa-hotswap.so";

inline bool Enabled() {
const char* tools_lib = std::getenv("HSA_TOOLS_LIB");
Expand Down
25 changes: 24 additions & 1 deletion projects/hotswap/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
cmake_minimum_required(VERSION 3.16)
project(hotswap LANGUAGES CXX)

include(GNUInstallDirs)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

Expand All @@ -10,6 +12,7 @@ set(HSA_RUNTIME_INC "${CMAKE_CURRENT_SOURCE_DIR}/../rocr-runtime/runtime/hsa-run

# COMGR is required — provides amd_comgr_hotswap_rewrite.
find_package(amd_comgr CONFIG REQUIRED)
find_package(hsa-runtime64 CONFIG REQUIRED)

find_path(HSA_INCLUDE_DIR hsa.h PATHS ${HSA_RUNTIME_INC} NO_DEFAULT_PATH REQUIRED)

Expand All @@ -36,15 +39,35 @@ target_include_directories(hsa-hotswap PRIVATE
${HSA_RUNTIME_INC}/..
)

target_link_libraries(hsa-hotswap PRIVATE amd_comgr)
target_link_libraries(hsa-hotswap PRIVATE
amd_comgr
hsa-runtime64::hsa-runtime64
Comment thread
nirmie marked this conversation as resolved.
)
set_target_properties(hsa-hotswap PROPERTIES
POSITION_INDEPENDENT_CODE ON)

install(TARGETS hsa-hotswap
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})

# Test executable
add_executable(hotswap_test tests/hotswap_test.cpp hotswap.cpp)
target_include_directories(hotswap_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(hotswap_test PRIVATE amd_comgr)

# Embed the gfx1250 fixture code object as a byte array so the test exercises the
# real parse + rewrite path with no GPU and no runtime file dependency (works in
# any CI). Regenerate the .hsaco per tests/fixtures/README.md.
set(_hotswap_co "${CMAKE_CURRENT_SOURCE_DIR}/tests/fixtures/gfx1250_min.hsaco")
set(_hotswap_co_hdr "${CMAKE_CURRENT_BINARY_DIR}/gfx1250_min_hsaco.h")
file(READ "${_hotswap_co}" _hotswap_co_hex HEX)
string(REGEX REPLACE "(..)" "0x\\1," _hotswap_co_arr "${_hotswap_co_hex}")
file(WRITE "${_hotswap_co_hdr}"
"// Generated from tests/fixtures/gfx1250_min.hsaco. Do not edit.\n"
"static const unsigned char kGfx1250MinCo[] = {${_hotswap_co_arr}};\n")
target_include_directories(hotswap_test PRIVATE ${CMAKE_CURRENT_BINARY_DIR})

# Unit tests for the gfx-target / ASIC-revision query logic. The test compiles
# the portable hotswap_gfx_query.cpp unit alongside the test translation unit
# and supplies its own stubs for the HSA entry points, so this target needs
Expand Down
40 changes: 38 additions & 2 deletions projects/hotswap/hotswap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,51 @@
//===----------------------------------------------------------------------===//

#include "hotswap.hpp"
#include <amd_comgr.h>
#include "amd_comgr/amd_comgr.h"
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <memory>
#include <string>

namespace rocr::hotswap {

std::string GetCodeObjectIsaName(const void *elf_data, size_t elf_size) {
if (!elf_data || elf_size == 0) {
return {};
}

amd_comgr_data_t data = {0};
if (amd_comgr_create_data(AMD_COMGR_DATA_KIND_EXECUTABLE, &data) !=
AMD_COMGR_STATUS_SUCCESS) {
return {};
}

std::string isa;
if (amd_comgr_set_data(data, elf_size,
static_cast<const char *>(elf_data)) ==
AMD_COMGR_STATUS_SUCCESS) {
size_t isa_len = 0;
if (amd_comgr_get_data_isa_name(data, &isa_len, nullptr) ==
AMD_COMGR_STATUS_SUCCESS &&
isa_len > 0) {
isa.resize(isa_len);
if (amd_comgr_get_data_isa_name(data, &isa_len, isa.data()) ==
AMD_COMGR_STATUS_SUCCESS) {
// Reported size includes the terminating NUL.
if (!isa.empty() && isa.back() == '\0') {
isa.pop_back();
}
} else {
isa.clear();
}
}
}

amd_comgr_release_data(data);
return isa;
}

int RetargetCodeObject(const void *elf_data, size_t elf_size,
const char *source_isa, const char *target_isa,
void **out_data, size_t *out_size) {
Comment thread
harsh-amd marked this conversation as resolved.
Expand Down Expand Up @@ -47,7 +84,6 @@ int RetargetCodeObject(const void *elf_data, size_t elf_size,
return static_cast<int>(status);
}

// Call the hotswap rewrite API.
amd_comgr_data_t output = {0};
status = amd_comgr_hotswap_rewrite(input, source_isa, target_isa, &output);
amd_comgr_release_data(input);
Expand Down
19 changes: 15 additions & 4 deletions projects/hotswap/hotswap.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,25 @@
#define ROCR_HOTSWAP_HPP

#include <cstddef>
#include <string>

namespace rocr::hotswap {

/// Rewrite a code object from source_isa to target_isa via COMGR.
/// Read a code object's own ISA name via COMGR (amd_comgr_get_data_isa_name).
///
/// Called by the hotswap tools lib when the code object's ISA differs from
/// the agent's ISA, or when stepping patches are needed (e.g., B0-to-A0).
/// Delegates to COMGR's amd_comgr_hotswap_rewrite (linked directly).
/// Uses COMGR's LLVM-canonical parser, so it tracks triple normalization
/// without hand-rolled metadata parsing. Returns an empty string on failure.
std::string GetCodeObjectIsaName(const void *elf_data, size_t elf_size);

/// Retarget a code object from source_isa to target_isa via COMGR.
///
/// Both ISA names are supplied by the caller: source_isa typically comes from
/// the code object (see GetCodeObjectIsaName) and target_isa from the running
/// GPU (e.g. the HSA agent), but either may be overridden. COMGR's
/// amd_comgr_hotswap_rewrite (linked directly) applies whatever transformation
/// the source/target pair calls for -- same-ISA stepping patches (e.g. gfx1250
/// B0 to A0) or cross-family transpilation -- and returns the rewritten code
/// object. If no transformation is needed, the output is a copy of the input.
///
/// On success, *out_data and *out_size describe the rewritten code object.
/// If *out_data differs from elf_data, it was allocated by this function
Expand Down
167 changes: 3 additions & 164 deletions projects/hotswap/hotswap_tool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,11 @@
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <elf.h>
#include <hsa.h>
#include <hsa_api_trace.h>
#include <hsa_ext_amd.h>
#include <memory>
#include <mutex>
#include <limits>
#include <string>
#include <unordered_map>
#include <vector>
Expand Down Expand Up @@ -83,22 +80,6 @@ void stash_bytes(uint64_t handle, const uint8_t *data, size_t size) {
g_reader_map[handle] = ReaderEntry{std::move(vec), false, false};
}

bool checked_add(size_t lhs, size_t rhs, size_t *out) {
if (lhs > std::numeric_limits<size_t>::max() - rhs) {
return false;
}
*out = lhs + rhs;
return true;
}

bool checked_mul(size_t lhs, size_t rhs, size_t *out) {
if (lhs != 0 && rhs > std::numeric_limits<size_t>::max() / lhs) {
return false;
}
*out = lhs * rhs;
return true;
}

bool try_get_reader_entry(uint64_t handle, ByteVec *bytes, bool *from_file) {
std::scoped_lock lock(g_reader_map_mutex);
const auto it = g_reader_map.find(handle);
Expand Down Expand Up @@ -129,145 +110,6 @@ void mark_reader_keepalive(uint64_t handle) {
}
}

// Validate ELF64 header and return pointer, or nullptr on failure.
const Elf64_Ehdr *validate_elf64(const uint8_t *elf, size_t size) {
if (size < sizeof(Elf64_Ehdr)) {
return nullptr;
}
const auto *ehdr = reinterpret_cast<const Elf64_Ehdr *>(elf);
if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0) {
return nullptr;
}
if (ehdr->e_ident[EI_CLASS] != ELFCLASS64) {
return nullptr;
}
return ehdr;
}

bool validate_program_header_table(const Elf64_Ehdr *ehdr, size_t size) {
return ehdr->e_phoff != 0 && ehdr->e_phoff <= size && ehdr->e_phnum != 0 &&
ehdr->e_phentsize >= sizeof(Elf64_Phdr);
}

bool compute_program_header_offset(const Elf64_Ehdr *ehdr, size_t size,
uint16_t index, size_t *hdr_offset) {
size_t hdr_index_offset = 0;
if (!checked_mul(static_cast<size_t>(index),
static_cast<size_t>(ehdr->e_phentsize),
&hdr_index_offset) ||
!checked_add(ehdr->e_phoff, hdr_index_offset, hdr_offset) ||
*hdr_offset > size || sizeof(Elf64_Phdr) > size - *hdr_offset) {
return false;
}
return true;
}

bool compute_note_segment_bounds(const Elf64_Phdr *phdr, size_t size,
size_t *note_offset, size_t *note_end) {
*note_offset = phdr->p_offset;
return *note_offset <= size && phdr->p_filesz <= size - *note_offset &&
checked_add(*note_offset, phdr->p_filesz, note_end);
}

bool compute_note_layout(size_t note_offset, size_t note_end,
const Elf64_Nhdr *nhdr, size_t *desc_off,
size_t *next_note) {
size_t raw_name_size = 0;
size_t raw_desc_size = 0;
size_t name_sz_aligned = 0;
size_t desc_sz_aligned = 0;
if (!checked_add(static_cast<size_t>(nhdr->n_namesz), 3, &raw_name_size) ||
!checked_add(static_cast<size_t>(nhdr->n_descsz), 3, &raw_desc_size)) {
return false;
}

name_sz_aligned = raw_name_size & ~size_t{3};
desc_sz_aligned = raw_desc_size & ~size_t{3};
return checked_add(note_offset, sizeof(Elf64_Nhdr), desc_off) &&
checked_add(*desc_off, name_sz_aligned, desc_off) &&
checked_add(*desc_off, desc_sz_aligned, next_note) &&
*next_note <= note_end;
}

// Search a single NT_AMDGPU_METADATA note descriptor for the ISA triple.
std::string find_isa_in_metadata(const char *desc, size_t desc_size) {
const char prefix[] = "amdgcn-amd-amdhsa--";
const size_t prefix_len = sizeof(prefix) - 1;
for (size_t j = 0; j + prefix_len <= desc_size; ++j) {
if (memcmp(desc + j, prefix, prefix_len) == 0) {
size_t len = 0;
while (j + len < desc_size && desc[j + len] != '\0' &&
desc[j + len] != '\n' && desc[j + len] != '\'' &&
desc[j + len] != '"' && desc[j + len] != ' ') {
++len;
}
return std::string(desc + j, len);
}
}
return {};
}

std::string read_elf_isa_from_note_segment(const uint8_t *elf, size_t note_offset,
size_t note_end) {
while (note_offset <= note_end &&
sizeof(Elf64_Nhdr) <= note_end - note_offset) {
const auto *nhdr = reinterpret_cast<const Elf64_Nhdr *>(elf + note_offset);
size_t desc_off = 0;
size_t next_note = 0;
if (!compute_note_layout(note_offset, note_end, nhdr, &desc_off,
&next_note)) {
break;
}

constexpr uint32_t NT_AMDGPU_METADATA = 32;
if (nhdr->n_type == NT_AMDGPU_METADATA && nhdr->n_descsz > 0 &&
desc_off + nhdr->n_descsz <= note_end) {
const char *desc = reinterpret_cast<const char *>(elf + desc_off);
std::string result = find_isa_in_metadata(desc, nhdr->n_descsz);
if (!result.empty()) {
return result;
}
}

note_offset = next_note;
}
return {};
}

// Parse ELF PT_NOTE segments to find the AMDGPU ISA name from
// NT_AMDGPU_METADATA (type 32) notes in v3+ code objects.
std::string read_elf_isa_note(const uint8_t *elf, size_t size) {
const Elf64_Ehdr *ehdr = validate_elf64(elf, size);
if (!ehdr || !validate_program_header_table(ehdr, size)) {
return {};
}

for (uint16_t i = 0; i < ehdr->e_phnum; ++i) {
size_t hdr_offset = 0;
if (!compute_program_header_offset(ehdr, size, i, &hdr_offset)) {
break;
}
const auto *phdr =
reinterpret_cast<const Elf64_Phdr *>(elf + hdr_offset);
if (phdr->p_type != PT_NOTE) {
continue;
}

size_t note_offset = 0;
size_t note_end = 0;
if (!compute_note_segment_bounds(phdr, size, &note_offset, &note_end)) {
continue;
}

std::string result = read_elf_isa_from_note_segment(elf, note_offset,
note_end);
if (!result.empty()) {
return result;
}
}
return {};
}

hsa_status_t HSA_API hotswap_reader_create_from_memory(
const void *code_object, size_t size,
hsa_code_object_reader_t *code_object_reader) {
Expand Down Expand Up @@ -384,17 +226,14 @@ hsa_status_t try_retarget_and_load(hsa_executable_t executable, hsa_agent_t agen
const char *options,
hsa_loaded_code_object_t *loaded_code_object,
const ByteVec &local_bytes) {
const std::string source_isa =
read_elf_isa_note(local_bytes->data(), local_bytes->size());
// Source ISA from the code object, target ISA from the running GPU.
const std::string source_isa = rocr::hotswap::GetCodeObjectIsaName(
local_bytes->data(), local_bytes->size());
const std::string target_isa = get_agent_isa_name(agent);

if (source_isa.empty() || target_isa.empty()) {
return HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
}

// Route through RetargetCodeObject for unified logging, validation,
// and COMGR interaction. Do NOT skip when source == target: B0-to-A0
// patching uses the same ISA name on both sides.
void *out_elf = nullptr;
size_t out_elf_size = 0;
const int rc = rocr::hotswap::RetargetCodeObject(
Expand Down
16 changes: 16 additions & 0 deletions projects/hotswap/tests/fixtures/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# HotSwap test fixtures

## gfx1250_min.hsaco

A minimal gfx1250 code object (empty kernel) used by `hotswap_test` to exercise
the real parse -> ISA-derivation -> rewrite path. It carries a valid
`NT_AMDGPU_METADATA` note (`amdhsa.target: amdgcn-amd-amdhsa--gfx1250`,
`.gfx1250_revision: B0`), which is what `amd_comgr_get_data_isa_name` reads.

Regenerate with the ROCm clang:

```bash
echo 'kernel void k(){}' > k.cl
clang -target amdgcn-amd-amdhsa -mcpu=gfx1250 -mcode-object-version=5 \
-nogpulib -x cl -cl-std=CL2.0 -O2 k.cl -o gfx1250_min.hsaco
```
Binary file added projects/hotswap/tests/fixtures/gfx1250_min.hsaco
Comment thread
nirmie marked this conversation as resolved.
Binary file not shown.
Loading
Loading