diff --git a/projects/rocr-runtime/rocrtst/suites/functional/hotswap/hotswap_gfx_query_test.cc b/projects/rocr-runtime/rocrtst/suites/functional/hotswap/hotswap_gfx_query_test.cc index 905fe0f5778..e67622248f0 100644 --- a/projects/rocr-runtime/rocrtst/suites/functional/hotswap/hotswap_gfx_query_test.cc +++ b/projects/rocr-runtime/rocrtst/suites/functional/hotswap/hotswap_gfx_query_test.cc @@ -33,6 +33,8 @@ const char* kGfx1250IsaWithFeatures = "amdgcn-amd-amdhsa--gfx1250:sramecc+:xnack-"; const char* kGfx942Isa = "amdgcn-amd-amdhsa--gfx942"; const char* kGfx1251Isa = "amdgcn-amd-amdhsa--gfx1251"; +const char* kGfx12_5GenericIsaWithFeatures = + "amdgcn-amd-amdhsa--gfx12-5-generic:sramecc+"; void ResetTestEnv() { g_fake_hsa_env = FakeHsaEnv{}; @@ -96,7 +98,9 @@ hsa_status_t hsa_agent_get_info(hsa_agent_t /*agent*/, namespace { using rocr::hotswap::AgentGfxRevision; +using rocr::hotswap::ExtractGfxTarget; using rocr::hotswap::GetAgentGfxRevision; +using rocr::hotswap::IsGfx12_5Target; using rocr::hotswap::IsHotswapSupportedGfxRevision; TEST(HotswapGfxQuery, Gfx1250A0Passes) { @@ -145,6 +149,30 @@ TEST(HotswapGfxQuery, NearMissTargetBlocks) { EXPECT_FALSE(IsHotswapSupportedGfxRevision(revision)); } +TEST(HotswapGfxQuery, Gfx12_5GenericFeatureSuffixParsed) { + ResetTestEnv(); + g_fake_hsa_env.isa_name = kGfx12_5GenericIsaWithFeatures; + g_fake_hsa_env.asic_revision = 0; + + const AgentGfxRevision revision = GetAgentGfxRevision(MakeFreshAgent()); + + EXPECT_EQ(revision.gfx_target, "gfx12-5-generic"); + EXPECT_FALSE(IsHotswapSupportedGfxRevision(revision)); +} + +TEST(HotswapGfxQuery, Gfx12_5TargetPredicateIsStrict) { + EXPECT_TRUE(IsGfx12_5Target("gfx1250")); + EXPECT_TRUE(IsGfx12_5Target("gfx1251")); + EXPECT_TRUE(IsGfx12_5Target("gfx12-5-generic")); + EXPECT_FALSE(IsGfx12_5Target("gfx125")); + EXPECT_FALSE(IsGfx12_5Target("gfx125foo")); + EXPECT_FALSE(IsGfx12_5Target("gfx942")); +} + +TEST(HotswapGfxQuery, ExtractGfxTargetStopsBeforeUnderscore) { + EXPECT_EQ(ExtractGfxTarget("amdgcn-amd-amdhsa--gfx125_foo"), "gfx125"); +} + TEST(HotswapGfxQuery, Gfx1250NonA0Blocks) { ResetTestEnv(); g_fake_hsa_env.isa_name = kGfx1250Isa; diff --git a/projects/rocr-runtime/rocrtst/suites/functional/hotswap/hotswap_rewrite_test.cc b/projects/rocr-runtime/rocrtst/suites/functional/hotswap/hotswap_rewrite_test.cc index 79769786b7e..3210c14cf4e 100644 --- a/projects/rocr-runtime/rocrtst/suites/functional/hotswap/hotswap_rewrite_test.cc +++ b/projects/rocr-runtime/rocrtst/suites/functional/hotswap/hotswap_rewrite_test.cc @@ -31,6 +31,14 @@ namespace { constexpr const char* kGfx1250Isa = "amdgcn-amd-amdhsa--gfx1250"; +constexpr const char* kGfx1251Isa = "amdgcn-amd-amdhsa--gfx1251"; +constexpr const char* kGfx12_5GenericIsa = + "amdgcn-amd-amdhsa--gfx12-5-generic"; +constexpr const char* kGfx942Isa = "amdgcn-amd-amdhsa--gfx942"; +constexpr const char* kGfx1250B0Isa = + "amdgcn-amd-amdhsa--gfx1250:gfx1250-b0-specific+"; +constexpr const char* kGfx1250A0Isa = + "amdgcn-amd-amdhsa--gfx1250:gfx1250-b0-specific-"; struct FakeHsaEnv { std::string isa_name = kGfx1250Isa; @@ -210,6 +218,68 @@ rocr::hotswap::CodeObjectView MakeRealCodeObjectView() { return code_object; } +rocr::hotswap::AgentGfxRevision MakeRevision(const std::string& gfx_target, + uint32_t asic_revision, + bool has_asic_revision = true) { + rocr::hotswap::AgentGfxRevision revision; + revision.gfx_target = gfx_target; + revision.asic_revision = asic_revision; + revision.has_asic_revision = has_asic_revision; + return revision; +} + +TEST(HotswapRewriteDecision, A0RetargetsWithoutEntryTrampolineFlag) { + const auto decision = rocr::hotswap::DecideHotswapRewriteForTesting( + MakeRevision("gfx1250", 0), kGfx1250Isa, kGfx1250Isa, {}); + + ASSERT_TRUE(decision.has_value()); + EXPECT_EQ(decision->source_isa, kGfx1250B0Isa); + EXPECT_EQ(decision->target_isa, kGfx1250A0Isa); +} + +TEST(HotswapRewriteDecision, NonA0Gfx1250NeedsEntryTrampolineFlag) { + const auto no_flag = rocr::hotswap::DecideHotswapRewriteForTesting( + MakeRevision("gfx1250", 1), kGfx1250Isa, kGfx1250Isa, {}); + const auto with_flag = rocr::hotswap::DecideHotswapRewriteForTesting( + MakeRevision("gfx1250", 1), kGfx1250Isa, kGfx1250Isa, {true}); + + EXPECT_FALSE(no_flag.has_value()); + ASSERT_TRUE(with_flag.has_value()); + EXPECT_EQ(with_flag->source_isa, kGfx1250B0Isa); + EXPECT_EQ(with_flag->target_isa, kGfx1250B0Isa); +} + +TEST(HotswapRewriteDecision, EntryTrampolineFlagRoutesGfx12_5Family) { + const auto concrete = rocr::hotswap::DecideHotswapRewriteForTesting( + MakeRevision("gfx1251", 1), kGfx1251Isa, kGfx1251Isa, {true}); + const auto generic = rocr::hotswap::DecideHotswapRewriteForTesting( + MakeRevision("gfx12-5-generic", 1), kGfx12_5GenericIsa, + kGfx12_5GenericIsa, {true}); + + ASSERT_TRUE(concrete.has_value()); + EXPECT_EQ(concrete->source_isa, kGfx1251Isa); + EXPECT_EQ(concrete->target_isa, kGfx1251Isa); + ASSERT_TRUE(generic.has_value()); + EXPECT_EQ(generic->source_isa, kGfx12_5GenericIsa); + EXPECT_EQ(generic->target_isa, kGfx12_5GenericIsa); +} + +TEST(HotswapRewriteDecision, EntryTrampolineFlagUsesGenericSourceAsTarget) { + const auto decision = rocr::hotswap::DecideHotswapRewriteForTesting( + MakeRevision("gfx1251", 1), kGfx12_5GenericIsa, kGfx1251Isa, {true}); + + ASSERT_TRUE(decision.has_value()); + EXPECT_EQ(decision->source_isa, kGfx12_5GenericIsa); + EXPECT_EQ(decision->target_isa, kGfx12_5GenericIsa); +} + +TEST(HotswapRewriteDecision, EntryTrampolineFlagBlocksNonGfx12_5) { + const auto decision = rocr::hotswap::DecideHotswapRewriteForTesting( + MakeRevision("gfx942", 0), kGfx942Isa, kGfx942Isa, {true}); + + EXPECT_FALSE(decision.has_value()); +} + TEST(HotswapRewrite, GetIsaNameRealCodeObject) { const std::string isa = rocr::hotswap::GetCodeObjectIsaName(kGfx1250MinCo, sizeof(kGfx1250MinCo)); @@ -321,7 +391,7 @@ TEST(HotswapRewrite, RuntimeLoadUsesRewrittenCodeObject) { rocr::hotswap::RetainedRewrittenElfBufferCountForTesting(executable), 0u); } -TEST(HotswapRewrite, RuntimeLoadNonA0FallsBackToOriginal) { +TEST(HotswapRewrite, RuntimeLoadNonA0FallsBackWithoutEntryTrampolineFlag) { ResetRuntimeTestEnv(); g_fake_hsa_env.asic_revision = 1; LoadRecorder load; diff --git a/projects/rocr-runtime/runtime/docs/data/env_variables.rst b/projects/rocr-runtime/runtime/docs/data/env_variables.rst index 4c28309d14b..a78f9ed435d 100644 --- a/projects/rocr-runtime/runtime/docs/data/env_variables.rst +++ b/projects/rocr-runtime/runtime/docs/data/env_variables.rst @@ -105,6 +105,12 @@ - | 0, ``false``, ``off``, ``no``, ``n``, or ``f``: Disable HotSwap diagnostic logging. | Any other non-empty value: Enable HotSwap diagnostic logging. + * - | ``AMD_COMGR_HOTSWAP_ENTRY_TRAMPOLINES`` + | Enables opt-in COMGR entry-trampoline HotSwap rewriting for gfx12.5 targets. + - ``0`` + - | 0, empty, ``false``, ``off``, ``no``, ``n``, or ``f``: Disable entry-trampoline rewrites. + | Any other non-empty value: Enable entry-trampoline rewrites for gfx125* and ``gfx12-5-generic`` targets. + * - | ``HSA_ENABLE_DXG_DETECTION`` | Controls detection of the DXG driver (/dev/dxg) on WSL2. - ``1`` diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hotswap.hpp b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hotswap.hpp index e3303e991d1..a8e1d1b8991 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hotswap.hpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hotswap.hpp @@ -46,6 +46,7 @@ #include #include #include +#include #include #include "core/inc/amd_hsa_loader.hpp" @@ -54,6 +55,8 @@ namespace rocr { namespace hotswap { +struct AgentGfxRevision; + using OwnedElfBuffer = std::unique_ptr; struct CodeObjectView { @@ -62,6 +65,15 @@ struct CodeObjectView { std::string uri; }; +struct RewriteOptions { + bool gfx12_5_rewrite_requested = false; +}; + +struct RewriteDecision { + std::string source_isa; + std::string target_isa; +}; + using LoadOriginalCodeObjectFn = hsa_status_t (*)( void* context, hsa_agent_t agent, hsa_code_object_t code_object, const char* options, const std::string& uri, @@ -103,6 +115,9 @@ void RetainRewrittenElfBuffer(hsa_executable_t executable, void ReleaseRetainedRewrittenElfBuffers(hsa_executable_t executable); #ifdef ROCR_HOTSWAP_TESTING +std::optional DecideHotswapRewriteForTesting( + const AgentGfxRevision& gfx, const std::string& source_isa, + const std::string& target_isa, const RewriteOptions& options); size_t RetainedRewrittenElfBufferCountForTesting(hsa_executable_t executable); #endif diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hotswap_gfx_query.hpp b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hotswap_gfx_query.hpp index 139e774373d..020c31712f1 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hotswap_gfx_query.hpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hotswap_gfx_query.hpp @@ -62,6 +62,7 @@ struct AgentGfxRevision { std::string GetAgentIsaName(hsa_agent_t agent); std::string ExtractGfxTarget(const std::string& isa_name); +bool IsGfx12_5Target(const std::string& gfx_target); AgentGfxRevision GetAgentGfxRevision(hsa_agent_t agent); void ResetAgentGfxRevisionCache(); diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hotswap.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hotswap.cpp index 7648ee924c6..4f7e8022a05 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hotswap.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hotswap.cpp @@ -69,6 +69,15 @@ namespace { std::mutex g_retained_rewritten_elf_buffers_mutex; std::unordered_map> g_retained_rewritten_elf_buffers; +constexpr char kGfx1250[] = "gfx1250"; +constexpr char kGfx1250B0Feature[] = ":gfx1250-b0-specific+"; +constexpr char kGfx1250A0Feature[] = ":gfx1250-b0-specific-"; + +enum class Gfx1250Stepping { + kB0, + kA0, +}; + bool IsEnvFlagEnabled(const char* name) { if (!os::IsEnvVarSet(name)) { return false; @@ -87,6 +96,10 @@ bool IsEnvFlagEnabled(const char* name) { bool IsHotswapDisabledByEnv() { return IsEnvFlagEnabled("HSA_HOTSWAP_DISABLE"); } +bool IsGfx12_5RewriteRequested() { + return IsEnvFlagEnabled("AMD_COMGR_HOTSWAP_ENTRY_TRAMPOLINES"); +} + bool IsVerboseLoggingEnabled() { static const bool verbose = IsEnvFlagEnabled("HSA_HOTSWAP_VERBOSE"); return verbose; @@ -227,6 +240,59 @@ ComgrApi* GetComgrApi() { return ready ? &api : nullptr; } +const char* Gfx1250SteppingFeature(Gfx1250Stepping stepping) { + return stepping == Gfx1250Stepping::kB0 ? kGfx1250B0Feature + : kGfx1250A0Feature; +} + +std::string WithGfx1250SteppingFeature(const std::string& isa_name, + Gfx1250Stepping stepping) { + if (ExtractGfxTarget(isa_name) != kGfx1250 || + isa_name.find(kGfx1250B0Feature) != std::string::npos || + isa_name.find(kGfx1250A0Feature) != std::string::npos) { + return isa_name; + } + return isa_name + Gfx1250SteppingFeature(stepping); +} + +bool HasCandidateHotswapRewrite(const AgentGfxRevision& gfx, + const RewriteOptions& options) { + return IsHotswapSupportedGfxRevision(gfx) || + (options.gfx12_5_rewrite_requested && + IsGfx12_5Target(gfx.gfx_target)); +} + +std::optional DecideHotswapRewrite( + const AgentGfxRevision& gfx, const std::string& source_isa, + const std::string& target_isa, const RewriteOptions& options) { + if (source_isa.empty() || target_isa.empty()) { + return std::nullopt; + } + + const std::string source_gfx = ExtractGfxTarget(source_isa); + const std::string target_gfx = ExtractGfxTarget(target_isa); + if (IsHotswapSupportedGfxRevision(gfx) && source_gfx == kGfx1250 && + target_gfx == kGfx1250) { + return RewriteDecision{ + WithGfx1250SteppingFeature(source_isa, Gfx1250Stepping::kB0), + WithGfx1250SteppingFeature(target_isa, Gfx1250Stepping::kA0)}; + } + + if (!options.gfx12_5_rewrite_requested || + !IsGfx12_5Target(gfx.gfx_target) || !IsGfx12_5Target(source_gfx)) { + return std::nullopt; + } + + RewriteDecision decision{source_isa, source_isa}; + if (source_gfx == kGfx1250) { + decision.source_isa = + WithGfx1250SteppingFeature(source_isa, Gfx1250Stepping::kB0); + decision.target_isa = + WithGfx1250SteppingFeature(source_isa, Gfx1250Stepping::kB0); + } + return decision; +} + } // namespace std::string GetCodeObjectIsaName(const void* elf_data, size_t elf_size) { @@ -261,12 +327,12 @@ std::string GetCodeObjectIsaName(const void* elf_data, size_t elf_size) { namespace { -bool IsAgentEligibleForHotswap(hsa_agent_t agent) { - const AgentGfxRevision gfx = GetAgentGfxRevision(agent); +bool IsAgentEligibleForHotswap(const AgentGfxRevision& gfx, + const RewriteOptions& options) { HOTSWAP_LOG("hotswap: agent gfx=%s asic_revision=%u (valid=%s)\n", gfx.gfx_target.empty() ? "?" : gfx.gfx_target.c_str(), gfx.asic_revision, gfx.has_asic_revision ? "yes" : "no"); - return IsHotswapSupportedGfxRevision(gfx); + return HasCandidateHotswapRewrite(gfx, options); } void LogRewrittenCodeObjectLoadFailure(hsa_status_t status) { @@ -333,24 +399,34 @@ bool RetargetCodeObject(const void* elf_data, size_t elf_size, const char* sourc bool TryRetargetCodeObject(const CodeObjectView& code_object, hsa_agent_t agent, OwnedElfBuffer* out_elf_buffer, size_t* out_elf_size) { - if (IsHotswapDisabledByEnv() || !code_object.data || code_object.size == 0 || - !IsAgentEligibleForHotswap(agent)) { + if (IsHotswapDisabledByEnv() || !code_object.data || code_object.size == 0) { + return false; + } + + const AgentGfxRevision gfx = GetAgentGfxRevision(agent); + const RewriteOptions options{IsGfx12_5RewriteRequested()}; + if (!IsAgentEligibleForHotswap(gfx, options)) { return false; } const std::string source_isa = GetCodeObjectIsaName(code_object.data, code_object.size); const std::string target_isa = GetAgentIsaName(agent); - if (source_isa.empty() || target_isa.empty()) { - HOTSWAP_LOG("hotswap: rewrite skipped, empty isa (src='%s' tgt='%s')\n", source_isa.c_str(), - target_isa.c_str()); + const std::optional decision = + DecideHotswapRewrite(gfx, source_isa, target_isa, options); + if (!decision) { + HOTSWAP_LOG("hotswap: rewrite skipped, no decision (src='%s' tgt='%s')\n", + source_isa.c_str(), target_isa.c_str()); return false; } - const bool rewritten = RetargetCodeObject(code_object.data, code_object.size, source_isa.c_str(), - target_isa.c_str(), out_elf_buffer, out_elf_size); - HOTSWAP_LOG("hotswap: rewrite src=%s tgt=%s in=%zu out=%zu changed=%d\n", source_isa.c_str(), - target_isa.c_str(), code_object.size, rewritten ? *out_elf_size : 0, - rewritten ? 1 : 0); + const bool rewritten = + RetargetCodeObject(code_object.data, code_object.size, + decision->source_isa.c_str(), decision->target_isa.c_str(), + out_elf_buffer, out_elf_size); + HOTSWAP_LOG("hotswap: rewrite src=%s tgt=%s gfx12_5_opt_in=%d in=%zu out=%zu changed=%d\n", + decision->source_isa.c_str(), decision->target_isa.c_str(), + options.gfx12_5_rewrite_requested, code_object.size, + rewritten ? *out_elf_size : 0, rewritten ? 1 : 0); return rewritten; } @@ -413,6 +489,12 @@ void ReleaseRetainedRewrittenElfBuffers(hsa_executable_t executable) { } #ifdef ROCR_HOTSWAP_TESTING +std::optional DecideHotswapRewriteForTesting( + const AgentGfxRevision& gfx, const std::string& source_isa, + const std::string& target_isa, const RewriteOptions& options) { + return DecideHotswapRewrite(gfx, source_isa, target_isa, options); +} + size_t RetainedRewrittenElfBufferCountForTesting(hsa_executable_t executable) { std::scoped_lock lock(g_retained_rewritten_elf_buffers_mutex); const auto it = g_retained_rewritten_elf_buffers.find(executable.handle); diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hotswap_gfx_query.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hotswap_gfx_query.cpp index f8e1f79fea6..f2c770bc314 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hotswap_gfx_query.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hotswap_gfx_query.cpp @@ -44,6 +44,7 @@ #include #include +#include #include #include @@ -85,10 +86,28 @@ std::string ExtractGfxTarget(const std::string& isa_name) { } auto end = std::find_if_not(isa_name.begin() + pos, isa_name.end(), - [](unsigned char c) { return std::isalnum(c); }); + [](unsigned char c) { + return std::isalnum(c) || c == '-'; + }); return isa_name.substr(pos, end - isa_name.begin() - pos); } +bool IsGfx12_5Target(const std::string& gfx_target) { + constexpr char kGfx125Prefix[] = "gfx125"; + constexpr char kGfx12_5Generic[] = "gfx12-5-generic"; + constexpr size_t kGfx125PrefixLen = sizeof(kGfx125Prefix) - 1; + if (gfx_target == kGfx12_5Generic) { + return true; + } + if (gfx_target.size() <= kGfx125PrefixLen || + gfx_target.compare(0, kGfx125PrefixLen, kGfx125Prefix) != 0) { + return false; + } + return std::all_of(gfx_target.begin() + kGfx125PrefixLen, + gfx_target.end(), + [](unsigned char c) { return std::isdigit(c); }); +} + namespace { std::mutex g_agent_gfx_revision_cache_mutex;