Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ const char* kGfx1250IsaWithFeatures =
"amdgcn-amd-amdhsa--gfx1250:sramecc+:xnack-";
const char* kGfx942Isa = "amdgcn-amd-amdhsa--gfx942";
const char* kGfx1251Isa = "amdgcn-amd-amdhsa--gfx1251";
const char* kGfx12_5GenericIsaWithFeatures =
"amdgcn-amd-amdhsa--gfx12-5-generic:sramecc+";

void ResetTestEnv() {
g_fake_hsa_env = FakeHsaEnv{};
Expand Down Expand Up @@ -96,7 +98,9 @@ hsa_status_t hsa_agent_get_info(hsa_agent_t /*agent*/,
namespace {

using rocr::hotswap::AgentGfxRevision;
using rocr::hotswap::ExtractGfxTarget;
using rocr::hotswap::GetAgentGfxRevision;
using rocr::hotswap::IsGfx12_5Target;
using rocr::hotswap::IsHotswapSupportedGfxRevision;

TEST(HotswapGfxQuery, Gfx1250A0Passes) {
Expand Down Expand Up @@ -145,6 +149,30 @@ TEST(HotswapGfxQuery, NearMissTargetBlocks) {
EXPECT_FALSE(IsHotswapSupportedGfxRevision(revision));
}

TEST(HotswapGfxQuery, Gfx12_5GenericFeatureSuffixParsed) {
ResetTestEnv();
g_fake_hsa_env.isa_name = kGfx12_5GenericIsaWithFeatures;
g_fake_hsa_env.asic_revision = 0;

const AgentGfxRevision revision = GetAgentGfxRevision(MakeFreshAgent());

EXPECT_EQ(revision.gfx_target, "gfx12-5-generic");
EXPECT_FALSE(IsHotswapSupportedGfxRevision(revision));
}

TEST(HotswapGfxQuery, Gfx12_5TargetPredicateIsStrict) {
EXPECT_TRUE(IsGfx12_5Target("gfx1250"));
EXPECT_TRUE(IsGfx12_5Target("gfx1251"));
EXPECT_TRUE(IsGfx12_5Target("gfx12-5-generic"));
EXPECT_FALSE(IsGfx12_5Target("gfx125"));
EXPECT_FALSE(IsGfx12_5Target("gfx125foo"));
EXPECT_FALSE(IsGfx12_5Target("gfx942"));
}

TEST(HotswapGfxQuery, ExtractGfxTargetStopsBeforeUnderscore) {
EXPECT_EQ(ExtractGfxTarget("amdgcn-amd-amdhsa--gfx125_foo"), "gfx125");
}

TEST(HotswapGfxQuery, Gfx1250NonA0Blocks) {
ResetTestEnv();
g_fake_hsa_env.isa_name = kGfx1250Isa;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,14 @@
namespace {

constexpr const char* kGfx1250Isa = "amdgcn-amd-amdhsa--gfx1250";
constexpr const char* kGfx1251Isa = "amdgcn-amd-amdhsa--gfx1251";
constexpr const char* kGfx12_5GenericIsa =
"amdgcn-amd-amdhsa--gfx12-5-generic";
constexpr const char* kGfx942Isa = "amdgcn-amd-amdhsa--gfx942";
constexpr const char* kGfx1250B0Isa =
"amdgcn-amd-amdhsa--gfx1250:gfx1250-b0-specific+";
constexpr const char* kGfx1250A0Isa =
"amdgcn-amd-amdhsa--gfx1250:gfx1250-b0-specific-";

struct FakeHsaEnv {
std::string isa_name = kGfx1250Isa;
Expand Down Expand Up @@ -210,6 +218,68 @@ rocr::hotswap::CodeObjectView MakeRealCodeObjectView() {
return code_object;
}

rocr::hotswap::AgentGfxRevision MakeRevision(const std::string& gfx_target,
uint32_t asic_revision,
bool has_asic_revision = true) {
rocr::hotswap::AgentGfxRevision revision;
revision.gfx_target = gfx_target;
revision.asic_revision = asic_revision;
revision.has_asic_revision = has_asic_revision;
return revision;
}

TEST(HotswapRewriteDecision, A0RetargetsWithoutEntryTrampolineFlag) {
const auto decision = rocr::hotswap::DecideHotswapRewriteForTesting(
MakeRevision("gfx1250", 0), kGfx1250Isa, kGfx1250Isa, {});

ASSERT_TRUE(decision.has_value());
EXPECT_EQ(decision->source_isa, kGfx1250B0Isa);
EXPECT_EQ(decision->target_isa, kGfx1250A0Isa);
}

TEST(HotswapRewriteDecision, NonA0Gfx1250NeedsEntryTrampolineFlag) {
const auto no_flag = rocr::hotswap::DecideHotswapRewriteForTesting(
MakeRevision("gfx1250", 1), kGfx1250Isa, kGfx1250Isa, {});
const auto with_flag = rocr::hotswap::DecideHotswapRewriteForTesting(
MakeRevision("gfx1250", 1), kGfx1250Isa, kGfx1250Isa, {true});

EXPECT_FALSE(no_flag.has_value());
ASSERT_TRUE(with_flag.has_value());
EXPECT_EQ(with_flag->source_isa, kGfx1250B0Isa);
EXPECT_EQ(with_flag->target_isa, kGfx1250B0Isa);
}

TEST(HotswapRewriteDecision, EntryTrampolineFlagRoutesGfx12_5Family) {
const auto concrete = rocr::hotswap::DecideHotswapRewriteForTesting(
MakeRevision("gfx1251", 1), kGfx1251Isa, kGfx1251Isa, {true});
const auto generic = rocr::hotswap::DecideHotswapRewriteForTesting(
MakeRevision("gfx12-5-generic", 1), kGfx12_5GenericIsa,
kGfx12_5GenericIsa, {true});

ASSERT_TRUE(concrete.has_value());
EXPECT_EQ(concrete->source_isa, kGfx1251Isa);
EXPECT_EQ(concrete->target_isa, kGfx1251Isa);
ASSERT_TRUE(generic.has_value());
EXPECT_EQ(generic->source_isa, kGfx12_5GenericIsa);
EXPECT_EQ(generic->target_isa, kGfx12_5GenericIsa);
}

TEST(HotswapRewriteDecision, EntryTrampolineFlagUsesGenericSourceAsTarget) {
const auto decision = rocr::hotswap::DecideHotswapRewriteForTesting(
MakeRevision("gfx1251", 1), kGfx12_5GenericIsa, kGfx1251Isa, {true});

ASSERT_TRUE(decision.has_value());
EXPECT_EQ(decision->source_isa, kGfx12_5GenericIsa);
EXPECT_EQ(decision->target_isa, kGfx12_5GenericIsa);
}

TEST(HotswapRewriteDecision, EntryTrampolineFlagBlocksNonGfx12_5) {
const auto decision = rocr::hotswap::DecideHotswapRewriteForTesting(
MakeRevision("gfx942", 0), kGfx942Isa, kGfx942Isa, {true});

EXPECT_FALSE(decision.has_value());
}

TEST(HotswapRewrite, GetIsaNameRealCodeObject) {
const std::string isa =
rocr::hotswap::GetCodeObjectIsaName(kGfx1250MinCo, sizeof(kGfx1250MinCo));
Expand Down Expand Up @@ -321,7 +391,7 @@ TEST(HotswapRewrite, RuntimeLoadUsesRewrittenCodeObject) {
rocr::hotswap::RetainedRewrittenElfBufferCountForTesting(executable), 0u);
}

TEST(HotswapRewrite, RuntimeLoadNonA0FallsBackToOriginal) {
TEST(HotswapRewrite, RuntimeLoadNonA0FallsBackWithoutEntryTrampolineFlag) {
ResetRuntimeTestEnv();
g_fake_hsa_env.asic_revision = 1;
LoadRecorder load;
Expand Down
6 changes: 6 additions & 0 deletions projects/rocr-runtime/runtime/docs/data/env_variables.rst
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,12 @@
- | 0, ``false``, ``off``, ``no``, ``n``, or ``f``: Disable HotSwap diagnostic logging.
| Any other non-empty value: Enable HotSwap diagnostic logging.
* - | ``AMD_COMGR_HOTSWAP_ENTRY_TRAMPOLINES``
| Enables opt-in COMGR entry-trampoline HotSwap rewriting for gfx12.5 targets.
- ``0``
- | 0, empty, ``false``, ``off``, ``no``, ``n``, or ``f``: Disable entry-trampoline rewrites.
| Any other non-empty value: Enable entry-trampoline rewrites for gfx125* and ``gfx12-5-generic`` targets.
* - | ``HSA_ENABLE_DXG_DETECTION``
| Controls detection of the DXG driver (/dev/dxg) on WSL2.
- ``1``
Expand Down
15 changes: 15 additions & 0 deletions projects/rocr-runtime/runtime/hsa-runtime/core/inc/hotswap.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
#include <cstddef>
#include <cstdlib>
#include <memory>
#include <optional>
#include <string>

#include "core/inc/amd_hsa_loader.hpp"
Expand All @@ -54,6 +55,8 @@
namespace rocr {
namespace hotswap {

struct AgentGfxRevision;

using OwnedElfBuffer = std::unique_ptr<void, decltype(&std::free)>;

struct CodeObjectView {
Expand All @@ -62,6 +65,15 @@ struct CodeObjectView {
std::string uri;
};

struct RewriteOptions {
bool gfx12_5_rewrite_requested = false;
};

struct RewriteDecision {
std::string source_isa;
std::string target_isa;
};

using LoadOriginalCodeObjectFn = hsa_status_t (*)(
void* context, hsa_agent_t agent, hsa_code_object_t code_object,
const char* options, const std::string& uri,
Expand Down Expand Up @@ -103,6 +115,9 @@ void RetainRewrittenElfBuffer(hsa_executable_t executable,
void ReleaseRetainedRewrittenElfBuffers(hsa_executable_t executable);

#ifdef ROCR_HOTSWAP_TESTING
std::optional<RewriteDecision> DecideHotswapRewriteForTesting(
const AgentGfxRevision& gfx, const std::string& source_isa,
const std::string& target_isa, const RewriteOptions& options);
size_t RetainedRewrittenElfBufferCountForTesting(hsa_executable_t executable);
#endif

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ struct AgentGfxRevision {

std::string GetAgentIsaName(hsa_agent_t agent);
std::string ExtractGfxTarget(const std::string& isa_name);
bool IsGfx12_5Target(const std::string& gfx_target);
AgentGfxRevision GetAgentGfxRevision(hsa_agent_t agent);
void ResetAgentGfxRevisionCache();

Expand Down
108 changes: 95 additions & 13 deletions projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hotswap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,15 @@ namespace {
std::mutex g_retained_rewritten_elf_buffers_mutex;
std::unordered_map<uint64_t, std::vector<OwnedElfBuffer>> g_retained_rewritten_elf_buffers;

constexpr char kGfx1250[] = "gfx1250";
constexpr char kGfx1250B0Feature[] = ":gfx1250-b0-specific+";
constexpr char kGfx1250A0Feature[] = ":gfx1250-b0-specific-";

enum class Gfx1250Stepping {
kB0,
kA0,
};

bool IsEnvFlagEnabled(const char* name) {
if (!os::IsEnvVarSet(name)) {
return false;
Expand All @@ -87,6 +96,10 @@ bool IsEnvFlagEnabled(const char* name) {

bool IsHotswapDisabledByEnv() { return IsEnvFlagEnabled("HSA_HOTSWAP_DISABLE"); }

bool IsGfx12_5RewriteRequested() {
return IsEnvFlagEnabled("AMD_COMGR_HOTSWAP_ENTRY_TRAMPOLINES");
}

bool IsVerboseLoggingEnabled() {
static const bool verbose = IsEnvFlagEnabled("HSA_HOTSWAP_VERBOSE");
return verbose;
Expand Down Expand Up @@ -227,6 +240,59 @@ ComgrApi* GetComgrApi() {
return ready ? &api : nullptr;
}

const char* Gfx1250SteppingFeature(Gfx1250Stepping stepping) {
return stepping == Gfx1250Stepping::kB0 ? kGfx1250B0Feature
: kGfx1250A0Feature;
}

std::string WithGfx1250SteppingFeature(const std::string& isa_name,
Gfx1250Stepping stepping) {
if (ExtractGfxTarget(isa_name) != kGfx1250 ||
isa_name.find(kGfx1250B0Feature) != std::string::npos ||
isa_name.find(kGfx1250A0Feature) != std::string::npos) {
return isa_name;
}
return isa_name + Gfx1250SteppingFeature(stepping);
}

bool HasCandidateHotswapRewrite(const AgentGfxRevision& gfx,
const RewriteOptions& options) {
return IsHotswapSupportedGfxRevision(gfx) ||
(options.gfx12_5_rewrite_requested &&
IsGfx12_5Target(gfx.gfx_target));
}

std::optional<RewriteDecision> DecideHotswapRewrite(
const AgentGfxRevision& gfx, const std::string& source_isa,
const std::string& target_isa, const RewriteOptions& options) {
if (source_isa.empty() || target_isa.empty()) {
return std::nullopt;
}

const std::string source_gfx = ExtractGfxTarget(source_isa);
const std::string target_gfx = ExtractGfxTarget(target_isa);
if (IsHotswapSupportedGfxRevision(gfx) && source_gfx == kGfx1250 &&
target_gfx == kGfx1250) {
return RewriteDecision{
WithGfx1250SteppingFeature(source_isa, Gfx1250Stepping::kB0),
WithGfx1250SteppingFeature(target_isa, Gfx1250Stepping::kA0)};
}

if (!options.gfx12_5_rewrite_requested ||
!IsGfx12_5Target(gfx.gfx_target) || !IsGfx12_5Target(source_gfx)) {
return std::nullopt;
}

RewriteDecision decision{source_isa, source_isa};
if (source_gfx == kGfx1250) {
decision.source_isa =
WithGfx1250SteppingFeature(source_isa, Gfx1250Stepping::kB0);
decision.target_isa =
WithGfx1250SteppingFeature(source_isa, Gfx1250Stepping::kB0);
}
return decision;
}

} // namespace

std::string GetCodeObjectIsaName(const void* elf_data, size_t elf_size) {
Expand Down Expand Up @@ -261,12 +327,12 @@ std::string GetCodeObjectIsaName(const void* elf_data, size_t elf_size) {

namespace {

bool IsAgentEligibleForHotswap(hsa_agent_t agent) {
const AgentGfxRevision gfx = GetAgentGfxRevision(agent);
bool IsAgentEligibleForHotswap(const AgentGfxRevision& gfx,
const RewriteOptions& options) {
HOTSWAP_LOG("hotswap: agent gfx=%s asic_revision=%u (valid=%s)\n",
gfx.gfx_target.empty() ? "?" : gfx.gfx_target.c_str(), gfx.asic_revision,
gfx.has_asic_revision ? "yes" : "no");
return IsHotswapSupportedGfxRevision(gfx);
return HasCandidateHotswapRewrite(gfx, options);
}

void LogRewrittenCodeObjectLoadFailure(hsa_status_t status) {
Expand Down Expand Up @@ -333,24 +399,34 @@ bool RetargetCodeObject(const void* elf_data, size_t elf_size, const char* sourc

bool TryRetargetCodeObject(const CodeObjectView& code_object, hsa_agent_t agent,
OwnedElfBuffer* out_elf_buffer, size_t* out_elf_size) {
if (IsHotswapDisabledByEnv() || !code_object.data || code_object.size == 0 ||
!IsAgentEligibleForHotswap(agent)) {
if (IsHotswapDisabledByEnv() || !code_object.data || code_object.size == 0) {
return false;
}

const AgentGfxRevision gfx = GetAgentGfxRevision(agent);
const RewriteOptions options{IsGfx12_5RewriteRequested()};
if (!IsAgentEligibleForHotswap(gfx, options)) {
return false;
}

const std::string source_isa = GetCodeObjectIsaName(code_object.data, code_object.size);
const std::string target_isa = GetAgentIsaName(agent);
if (source_isa.empty() || target_isa.empty()) {
HOTSWAP_LOG("hotswap: rewrite skipped, empty isa (src='%s' tgt='%s')\n", source_isa.c_str(),
target_isa.c_str());
const std::optional<RewriteDecision> decision =
DecideHotswapRewrite(gfx, source_isa, target_isa, options);
if (!decision) {
HOTSWAP_LOG("hotswap: rewrite skipped, no decision (src='%s' tgt='%s')\n",
source_isa.c_str(), target_isa.c_str());
return false;
}

const bool rewritten = RetargetCodeObject(code_object.data, code_object.size, source_isa.c_str(),
target_isa.c_str(), out_elf_buffer, out_elf_size);
HOTSWAP_LOG("hotswap: rewrite src=%s tgt=%s in=%zu out=%zu changed=%d\n", source_isa.c_str(),
target_isa.c_str(), code_object.size, rewritten ? *out_elf_size : 0,
rewritten ? 1 : 0);
const bool rewritten =
RetargetCodeObject(code_object.data, code_object.size,
decision->source_isa.c_str(), decision->target_isa.c_str(),
out_elf_buffer, out_elf_size);
HOTSWAP_LOG("hotswap: rewrite src=%s tgt=%s gfx12_5_opt_in=%d in=%zu out=%zu changed=%d\n",
decision->source_isa.c_str(), decision->target_isa.c_str(),
options.gfx12_5_rewrite_requested, code_object.size,
rewritten ? *out_elf_size : 0, rewritten ? 1 : 0);
return rewritten;
}

Expand Down Expand Up @@ -413,6 +489,12 @@ void ReleaseRetainedRewrittenElfBuffers(hsa_executable_t executable) {
}

#ifdef ROCR_HOTSWAP_TESTING
std::optional<RewriteDecision> DecideHotswapRewriteForTesting(
const AgentGfxRevision& gfx, const std::string& source_isa,
const std::string& target_isa, const RewriteOptions& options) {
return DecideHotswapRewrite(gfx, source_isa, target_isa, options);
}

size_t RetainedRewrittenElfBufferCountForTesting(hsa_executable_t executable) {
std::scoped_lock lock(g_retained_rewritten_elf_buffers_mutex);
const auto it = g_retained_rewritten_elf_buffers.find(executable.handle);
Expand Down
Loading
Loading