diff --git a/.github/workflows/new-prs.yml b/.github/workflows/new-prs.yml index 7de86c0035806..a86500df8f320 100644 --- a/.github/workflows/new-prs.yml +++ b/.github/workflows/new-prs.yml @@ -19,6 +19,8 @@ jobs: runs-on: ubuntu-24.04 permissions: pull-requests: write + container: + image: "ghcr.io/llvm/amd64/ci-ubuntu-24.04-github-automation:latest@sha256:82b5304c5d99cf5d75a2334885aca57490cbb04f37d07fc49a10a2649824e526" # Only comment on PRs that have been opened for the first time, by someone # new to LLVM or to GitHub as a whole. Ideally we'd look for FIRST_TIMER # or FIRST_TIME_CONTRIBUTOR, but this does not appear to work. Instead check @@ -33,26 +35,13 @@ jobs: (github.event.pull_request.author_association != 'MEMBER') && (github.event.pull_request.author_association != 'OWNER') steps: - - name: Checkout Automation Script - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - persist-credentials: false - sparse-checkout: llvm/utils/git/ - ref: main - - - name: Setup Automation Script - working-directory: ./llvm/utils/git/ - run: | - pip install --require-hashes -r requirements.txt - - name: Greet Author - working-directory: ./llvm/utils/git/ env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} ISSUE_NUMBER: ${{ github.event.pull_request.number }} PR_AUTHOR: ${{ github.event.pull_request.user.login }} run: | - python3 ./github-automation.py \ + github-automation.py \ --token "$GH_TOKEN" \ pr-greeter \ --issue-number "$ISSUE_NUMBER" \ diff --git a/.github/workflows/subscriber.yml b/.github/workflows/subscriber.yml index 125f2ce5ef81f..d822424612b14 100644 --- a/.github/workflows/subscriber.yml +++ b/.github/workflows/subscriber.yml @@ -25,7 +25,7 @@ jobs: if: github.repository == 'llvm/llvm-project' runs-on: ubuntu-24.04 container: - image: "ghcr.io/llvm/amd64/ci-ubuntu-24.04-github-automation:latest@sha256:06164c484402046b0d624e5df8b3435a91ea7d204e2416201a9bac8d809b9aa6" + image: "ghcr.io/llvm/amd64/ci-ubuntu-24.04-github-automation:latest@sha256:82b5304c5d99cf5d75a2334885aca57490cbb04f37d07fc49a10a2649824e526" steps: - id: app-token diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h index 3535061b98bda..3362b74e3f303 100644 --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -45,6 +45,7 @@ #include "llvm/Support/RWMutex.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TargetParser/Triple.h" +#include #include #include #include @@ -810,6 +811,29 @@ class BinaryContext { /// final addresses functions will have. uint64_t LayoutStartAddress{0}; + /// Maximum alignment of objects emitted into the main (hot) and cold code + /// sections, populated by the parallel AlignerPass (updateMaxCodeAlignment). + std::atomic MaxMainCodeAlignment{1}; + std::atomic MaxColdCodeAlignment{1}; + + /// Fold \p Alignment into the running max for the main code section (when + /// \p InMainSection) and/or the cold code section (when \p InColdSection), + /// reflecting which output section(s) the object is emitted into. Safe to + /// call concurrently. + void updateMaxCodeAlignment(uint16_t Alignment, bool InMainSection, + bool InColdSection) { + auto AtomicMax = [](std::atomic &Max, uint16_t Value) { + uint16_t Cur = Max.load(std::memory_order_relaxed); + while (Value > Cur && + !Max.compare_exchange_weak(Cur, Value, std::memory_order_relaxed)) + ; + }; + if (InMainSection) + AtomicMax(MaxMainCodeAlignment, Alignment); + if (InColdSection) + AtomicMax(MaxColdCodeAlignment, Alignment); + } + /// Old .text info. uint64_t OldTextSectionAddress{0}; uint64_t OldTextSectionOffset{0}; diff --git a/bolt/lib/Passes/Aligner.cpp b/bolt/lib/Passes/Aligner.cpp index c631120e9de09..3157af1fc5530 100644 --- a/bolt/lib/Passes/Aligner.cpp +++ b/bolt/lib/Passes/Aligner.cpp @@ -165,6 +165,26 @@ Error AlignerPass::runOnFunctions(BinaryContext &BC) { else alignMaxBytes(BF); + // Record the function's effective code alignment so layout passes can align + // the tentative section base to the eventual section alignment without + // re-scanning all functions. AssignSections (run just before this pass) has + // assigned the output sections, so route the alignment to whichever of + // .text / .text.cold the function actually emits into: a whole cold + // function (and its constant island) lands entirely in .text.cold, while a + // split function contributes its (duplicated) island and code to both. + const uint16_t Align = std::max( + BF.getAlignment(), + BF.hasIslandsInfo() ? BF.getConstantIslandAlignment() : uint16_t(0)); + const SmallString<32> MainSectionName = BF.getCodeSectionName(); + const bool InMainSection = + StringRef(MainSectionName) == BC.getMainCodeSectionName(); + bool InColdSection = + StringRef(MainSectionName) == BC.getColdCodeSectionName(); + if (!InColdSection && BF.isSplit()) + InColdSection = StringRef(BF.getCodeSectionName(FragmentNum::cold())) == + BC.getColdCodeSectionName(); + BC.updateMaxCodeAlignment(Align, InMainSection, InColdSection); + if (opts::AlignBlocks && !opts::PreserveBlocksAlignment) alignBlocks(BF, Emitter.MCE.get()); }; diff --git a/bolt/lib/Passes/LongJmp.cpp b/bolt/lib/Passes/LongJmp.cpp index 3e5eba24bea4d..63368443c46b8 100644 --- a/bolt/lib/Passes/LongJmp.cpp +++ b/bolt/lib/Passes/LongJmp.cpp @@ -317,7 +317,9 @@ void LongJmpPass::tentativeBBLayout(const BinaryFunction &Func) { uint64_t LongJmpPass::tentativeLayoutRelocColdPart( const BinaryContext &BC, BinaryFunctionListType &SortedFunctions, uint64_t DotAddress) { - DotAddress = alignTo(DotAddress, llvm::Align(opts::AlignFunctions)); + DotAddress = + alignTo(DotAddress, std::max(opts::AlignFunctions, + BC.MaxColdCodeAlignment.load())); for (BinaryFunction *Func : SortedFunctions) { if (!Func->isSplit()) continue; @@ -452,8 +454,11 @@ void LongJmpPass::tentativeLayout(const BinaryContext &BC, } } - if (!EstimatedTextSize || EstimatedTextSize > BC.OldTextSectionSize) - DotAddress = alignTo(BC.LayoutStartAddress, opts::AlignText); + if (!EstimatedTextSize || EstimatedTextSize > BC.OldTextSectionSize) { + uint64_t TextAlign = + std::max(opts::AlignText, BC.MaxMainCodeAlignment.load()); + DotAddress = alignTo(BC.LayoutStartAddress, TextAlign); + } tentativeLayoutRelocMode(BC, SortedFunctions, DotAddress); } diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp index 58d24e15cde01..6e3022c491a73 100644 --- a/bolt/lib/Rewrite/BinaryPassManager.cpp +++ b/bolt/lib/Rewrite/BinaryPassManager.cpp @@ -518,6 +518,11 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { Manager.registerPass(std::make_unique(PrintPeepholes)); + // Assign each function an output section before AlignerPass and LongJmpPass, + // so those passes can attribute per-section code alignment and tentative + // layout to the final .text / .text.cold sections. + Manager.registerPass(std::make_unique()); + Manager.registerPass(std::make_unique()); // Perform reordering on data contained in one or more sections using @@ -555,9 +560,6 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { Manager.registerPass( std::make_unique(PrintRetpolineInsertion)); - // Assign each function an output section. - Manager.registerPass(std::make_unique()); - // This pass turns tail calls into jumps which makes them invisible to // function reordering. It's unsafe to use any CFG or instruction analysis // after this point. diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 6d8fd0b6e1099..9fcfadbfd752c 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -2284,15 +2284,16 @@ Error RewriteInstance::readSpecialSections() { BC->printSections(BC->outs()); } - if (opts::RelocationMode == cl::BOU_TRUE && !HasTextRelocations) { + if (opts::RelocationMode == cl::boolOrDefault::BOU_TRUE && + !HasTextRelocations) { BC->errs() << "BOLT-ERROR: relocations against code are missing from the input " "file. Cannot proceed in relocations mode (-relocs).\n"; exit(1); } - BC->HasRelocations = - HasTextRelocations && (opts::RelocationMode != cl::BOU_FALSE); + BC->HasRelocations = HasTextRelocations && + (opts::RelocationMode != cl::boolOrDefault::BOU_FALSE); if (BC->IsLinuxKernel && BC->HasRelocations) { BC->outs() << "BOLT-INFO: disabling relocation mode for Linux kernel\n"; @@ -3917,8 +3918,28 @@ void RewriteInstance::runBinaryAnalyses() { NamedRegionTimer T("runBinaryAnalyses", "run binary analysis passes", TimerGroupName, TimerGroupDesc, opts::TimeRewrite); BinaryFunctionPassManager Manager(*BC); - // FIXME: add a pass that warns about which functions do not have CFG, - // and therefore, analysis is most likely to be less accurate. + + // Warn about functions for which BOLT could not reconstruct the CFG: binary + // analyses are less precise on them and may report both false negatives and + // false positives. + unsigned NoCFGCount = 0; + for (const auto &BFI : BC->getBinaryFunctions()) { + const BinaryFunction &BF = BFI.second; + // Skip ignored functions: BOLT does not attempt to build a CFG for them + // (e.g. pseudo functions such as PLT stubs), so a missing CFG there is + // expected rather than a sign of degraded analysis. + if (BF.isIgnored() || BF.hasCFG()) + continue; + ++NoCFGCount; + if (opts::Verbosity >= 1) + BC->errs() << "BOLT-WARNING: no CFG for " << BF + << "; binary analyses may be imprecise\n"; + } + if (NoCFGCount) + BC->errs() << "BOLT-WARNING: " << NoCFGCount + << " function(s) lack CFG; binary-analysis results may be" + " incomplete. Re-run with -v=1 to list these functions.\n"; + using PtrAuthScanner = PAuthGadgetScanner::Analysis; // Accumulate all enabled analyses. diff --git a/bolt/test/binary-analysis/AArch64/cfg-warning.s b/bolt/test/binary-analysis/AArch64/cfg-warning.s new file mode 100644 index 0000000000000..22c1363c7ba07 --- /dev/null +++ b/bolt/test/binary-analysis/AArch64/cfg-warning.s @@ -0,0 +1,38 @@ +## Verify that binary analyses warn about functions for which BOLT could not +## reconstruct the CFG, since analysis results are less reliable for them. + +// RUN: %clang %cflags %s %p/../../Inputs/asm_main.c -o %t.exe +// RUN: llvm-bolt-binary-analysis --scanners=ptrauth-pac-ret %t.exe 2>&1 \ +// RUN: | FileCheck --check-prefix=SUMMARY %s +// RUN: llvm-bolt-binary-analysis --scanners=ptrauth-pac-ret -v=1 %t.exe 2>&1 \ +// RUN: | FileCheck --check-prefix=VERBOSE %s + + .text + +## A function with a regular CFG must not be reported. + .globl f_good + .type f_good,@function +f_good: + ret + .size f_good, .-f_good +// SUMMARY-NOT: BOLT-WARNING:{{.*}}f_good +// VERBOSE-NOT: BOLT-WARNING:{{.*}}f_good + +## An unanalyzable indirect branch prevents BOLT from building the CFG. + .globl f_nocfg + .type f_nocfg,@function +f_nocfg: + adr x2, 1f + br x2 +1: + ret + .size f_nocfg, .-f_nocfg + +## Without -v, only the aggregate warning is emitted; functions are not listed +## individually. +// SUMMARY-NOT: BOLT-WARNING: no CFG for +// SUMMARY: BOLT-WARNING: {{[0-9]+}} function(s) lack CFG; binary-analysis results may be incomplete. Re-run with -v=1 to list these functions. + +## With -v=1, each function lacking a CFG is listed before the summary. +// VERBOSE: BOLT-WARNING: no CFG for {{.*}}f_nocfg{{.*}}; binary analyses may be imprecise +// VERBOSE: BOLT-WARNING: {{[0-9]+}} function(s) lack CFG; binary-analysis results may be incomplete. Re-run with -v=1 to list these functions. diff --git a/clang-tools-extra/clangd/CompileCommands.cpp b/clang-tools-extra/clangd/CompileCommands.cpp index f8bc9a9ca81fd..e8005435e1836 100644 --- a/clang-tools-extra/clangd/CompileCommands.cpp +++ b/clang-tools-extra/clangd/CompileCommands.cpp @@ -135,6 +135,28 @@ std::string detectStandardResourceDir() { return GetResourcesPath("clangd", (void *)&StaticForMainAddr); } +std::optional +detectResourceDirWithClangPath(std::optional ClangPath) { + std::string ResourceDir = detectStandardResourceDir(); + if (llvm::sys::fs::exists(ResourceDir)) + return ResourceDir; + vlog("Auto-detected standard resource directory '{0}' doesn't exist", + ResourceDir); + + if (ClangPath) { + ResourceDir = GetResourcesPath(*ClangPath); + if (llvm::sys::fs::exists(ResourceDir)) + return ResourceDir; + vlog("Auto-detected using clang path '{0}' " + "resource directory '{1}' doesn't exist", + *ClangPath, ResourceDir); + } + + elog("Failed to auto-detect resource directory, " + "specify it manually via --resource-dir command line argument"); + return std::nullopt; +} + // The path passed to argv[0] is important: // - its parent directory is Driver::Dir, used for library discovery // - its basename affects CLI parsing (clang-cl) and other settings @@ -188,7 +210,7 @@ static std::string resolveDriver(llvm::StringRef Driver, bool FollowSymlink, CommandMangler CommandMangler::detect() { CommandMangler Result; Result.ClangPath = detectClangPath(); - Result.ResourceDir = detectStandardResourceDir(); + Result.ResourceDir = detectResourceDirWithClangPath(Result.ClangPath); Result.Sysroot = detectSysroot(); return Result; } diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index d79d82a175c68..11cfd34d051d0 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -905,6 +905,10 @@ T __builtin_elementwise_fshr(T x, T y, T z) perform a funnel shift right. Co first argument is 0 and no second argument is provided. T __builtin_elementwise_clmul(T x, T y) perform a carry-less multiplication of x and y, returning the least integer types significant bits of the wide result. +T __builtin_elementwise_pext(T x, T m) extract bits from x selected by the mask m, pack them contiguously integer types + into the least significant bits of the result, and zero the rest. +T __builtin_elementwise_pdep(T x, T m) deposit the least significant bits of x at the positions integer types + where m has a 1-bit, and zero the rest. ============================================== ====================================================================== ========================================= diff --git a/clang/docs/LifetimeSafety.rst b/clang/docs/LifetimeSafety.rst index 9ae2e6ee54826..be9b32f8d4b2d 100644 --- a/clang/docs/LifetimeSafety.rst +++ b/clang/docs/LifetimeSafety.rst @@ -462,6 +462,12 @@ more accurate checks in calling code. To enable annotation suggestions, use ``-Wlifetime-safety-suggestions``. +Fix-it hints normally insert ``[[clang::lifetimebound]]``. If a visible +object-like macro expands to ``[[clang::lifetimebound]]`` or +``__attribute__((lifetimebound))``, Clang will use the last such macro +visible at the insertion point. To force a project-specific macro spelling, +use ``-lifetime-safety-lifetimebound-macro=``. + .. code-block:: c++ #include @@ -688,5 +694,5 @@ Performance Lifetime analysis relies on Clang's CFG (Control Flow Graph). For functions with very large or complex CFGs, analysis time can sometimes be significant. To mitigate this, the analysis allows to skip functions where the number of CFG blocks exceeds -a certain threshold, controlled by the ``-flifetime-safety-max-cfg-blocks=N`` language +a certain threshold, controlled by the ``-lifetime-safety-max-cfg-blocks=N`` language option. diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 7f056abfbbe24..37428df0974f4 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -305,6 +305,10 @@ Non-comprehensive list of changes in this release integers including ``_BitInt`` types. This includes constexpr evaluation support. +- Added ``__builtin_elementwise_pext`` and ``__builtin_elementwise_pdep`` for + parallel bit extract and parallel bit deposit operations on integers including + ``_BitInt`` types. This includes constexpr evaluation support. + - Deprecated float types support from ``__builtin_elementwise_max`` and ``__builtin_elementwise_min``. @@ -842,6 +846,7 @@ Miscellaneous Clang Crashes Fixed - Fixed an assertion failure in ``isAtEndOfMacroExpansion`` on macro expansions crossing the boundary of two fileIDs. (#GH115007), (#GH21755) - Fixed an assertion failure when ``__builtin_dump_struct`` is used with an immediate-escalated callable. (#GH192846) +- Fixed a crash when diagnosing an invalid out-of-line definition of a member class template. (#GH201490) OpenACC Specific Changes ------------------------ diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 053a257ba6d4a..63cdb787bea16 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -1835,6 +1835,18 @@ def ElementwiseClmul : Builtin { let Prototype = "void(...)"; } +def ElementwisePext : Builtin { + let Spellings = ["__builtin_elementwise_pext"]; + let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr]; + let Prototype = "void(...)"; +} + +def ElementwisePdep : Builtin { + let Spellings = ["__builtin_elementwise_pdep"]; + let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr]; + let Prototype = "void(...)"; +} + def ReduceMax : Builtin { let Spellings = ["__builtin_reduce_max"]; let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr]; diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index c8c371625b568..f0112a2db0f1d 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -1075,51 +1075,33 @@ let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVecto def extractf32x4_mask : X86Builtin<"_Vector<4, float>(_Vector<16, float>, _Constant int, _Vector<4, float>, unsigned char)">; } -let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def vpdpbusd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<16, unsigned char>, _Vector<16, char>)">; -} - -let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vpdpbusd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<32, unsigned char>, _Vector<32, char>)">; -} - -let Features = "avx512vnni", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vpdpbusd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<64, unsigned char>, _Vector<64, char>)">; -} - -let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { def vpdpbusds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<16, unsigned char>, _Vector<16, char>)">; } -let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { + def vpdpbusd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<32, unsigned char>, _Vector<32, char>)">; def vpdpbusds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<32, unsigned char>, _Vector<32, char>)">; } -let Features = "avx512vnni", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512vnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { + def vpdpbusd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<64, unsigned char>, _Vector<64, char>)">; def vpdpbusds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<64, unsigned char>, _Vector<64, char>)">; } -let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def vpdpwssd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<8, short>, _Vector<8, short>)">; -} - -let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vpdpwssd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<16, short>, _Vector<16, short>)">; -} - -let Features = "avx512vnni", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vpdpwssd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<32, short>, _Vector<32, short>)">; -} - -let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { def vpdpwssds128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<8, short>, _Vector<8, short>)">; } -let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl,avx512vnni|avxvnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { + def vpdpwssd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<16, short>, _Vector<16, short>)">; def vpdpwssds256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<16, short>, _Vector<16, short>)">; } -let Features = "avx512vnni", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512vnni", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { + def vpdpwssd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<32, short>, _Vector<32, short>)">; def vpdpwssds512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<32, short>, _Vector<32, short>)">; } diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index 9af036156b1ad..53c4c1084784a 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -549,6 +549,9 @@ class LangOptions : public LangOptionsBase { /// A prefix map for __FILE__, __BASE_FILE__ and __builtin_FILE(). std::map> MacroPrefixMap; + /// Macro name to use in lifetimebound fix-it suggestions. + std::string LifetimeSafetyLifetimeBoundMacro; + /// Triples of the OpenMP targets that the host code codegen should /// take into account in order to generate accurate offloading descriptors. std::vector OMPTargetTriples; diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td index 3bf140ff953b9..24689a6d7a0cb 100644 --- a/clang/include/clang/Basic/arm_neon.td +++ b/clang/include/clang/Basic/arm_neon.td @@ -285,7 +285,7 @@ def SPLATQ : WInst<"splat_laneq", ".(!Q)I", "UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPlmQm", [ImmCheck<1, ImmCheckLaneIndex, 0>]>; -let TargetGuard = "bf16,neon" in { +let TargetGuard = "neon" in { def SPLAT_BF : WInst<"splat_lane", ".(!q)I", "bQb", [ImmCheck<1, ImmCheckLaneIndex, 0>]>; def SPLATQ_BF : WInst<"splat_laneq", ".(!Q)I", "bQb", @@ -2024,8 +2024,7 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "v def VCMLAQ_ROT270_FP64 : SInst<"vcmlaq_rot270", "QQQQ", "d">; } -// V8.2-A BFloat intrinsics -let TargetGuard = "bf16,neon" in { +let TargetGuard = "neon" in { def VCREATE_BF : NoTestOpInst<"vcreate", ".(IU>)", "b", OP_CAST> { let BigEndianSafe = 1; } @@ -2088,7 +2087,11 @@ let TargetGuard = "bf16,neon" in { def VLD2_DUP_BF : WInst<"vld2_dup", "2(c*!)", "bQb">; def VLD3_DUP_BF : WInst<"vld3_dup", "3(c*!)", "bQb">; def VLD4_DUP_BF : WInst<"vld4_dup", "4(c*!)", "bQb">; +} + +// V8.2-A BFloat intrinsics +let TargetGuard = "bf16,neon" in { def VCVT_F32_BF16 : SOpInst<"vcvt_f32_bf16", "(F>)(Bq!)", "Qb", OP_VCVT_F32_BF16>; def VCVT_LOW_F32_BF16 : SOpInst<"vcvt_low_f32", "(F>)(BQ!)", "Qb", OP_VCVT_F32_BF16_LO>; def VCVT_HIGH_F32_BF16 : SOpInst<"vcvt_high_f32", "(F>)(BQ!)", "Qb", OP_VCVT_F32_BF16_HI>; @@ -2108,21 +2111,23 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "b def VCVT_LOW_BF16_F32_A64 : SInst<"vcvt_low_bf16", "BQ", "Qf">; def VCVT_HIGH_BF16_F32_A64 : SInst<"vcvt_high_bf16", "BBQ", "Qf">; def VCVT_BF16_F32 : SInst<"vcvt_bf16", "BQ", "f">; +} +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "neon" in { def COPY_LANE_BF16 : IOpInst<"vcopy_lane", "..I.I", "b", OP_COPY_LN>; def COPYQ_LANE_BF16 : IOpInst<"vcopy_lane", "..IqI", "Qb", OP_COPY_LN>; def COPY_LANEQ_BF16 : IOpInst<"vcopy_laneq", "..IQI", "b", OP_COPY_LN>; def COPYQ_LANEQ_BF16 : IOpInst<"vcopy_laneq", "..I.I", "Qb", OP_COPY_LN>; } -let ArchGuard = "!defined(__aarch64__) && !defined(__arm64ec__)", TargetGuard = "bf16,neon" in { +let ArchGuard = "!defined(__aarch64__) && !defined(__arm64ec__)", TargetGuard = "neon" in { let BigEndianSafe = 1 in { defm VREINTERPRET_BF : REINTERPRET_CROSS_TYPES< "csilUcUsUiUlhfPcPsPlQcQsQiQlQUcQUsQUiQUlQhQfQPcQPsQPl", "bQb">; } } -let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "bf16,neon" in { +let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "neon" in { let BigEndianSafe = 1 in { defm VVREINTERPRET_BF : REINTERPRET_CROSS_TYPES< "csilUcUsUiUlhfdPcPsPlQcQsQiQlQUcQUsQUiQUlQhQfQdQPcQPsQPlQPk", "bQb">; @@ -2155,17 +2160,14 @@ let ArchGuard = "defined(__aarch64__)", TargetGuard = "lut" in { [ImmCheck<3, ImmCheck0_1>]>; def VLUTI4_H_X2_Q : SInst<"vluti4_laneq_x2", ".2(]>; - - let TargetGuard = "lut,bf16" in { - def VLUTI2_BF : SInst<"vluti2_lane", "Q.(]>; - def VLUTI2_BF_Q : SInst<"vluti2_laneq", "Q.(]>; - def VLUTI4_BF_X2 : SInst<"vluti4_lane_x2", ".2(]>; - def VLUTI4_BF_X2_Q : SInst<"vluti4_laneq_x2", ".2(]>; - } + def VLUTI2_BF : SInst<"vluti2_lane", "Q.(]>; + def VLUTI2_BF_Q : SInst<"vluti2_laneq", "Q.(]>; + def VLUTI4_BF_X2 : SInst<"vluti4_lane_x2", ".2(]>; + def VLUTI4_BF_X2_Q : SInst<"vluti4_laneq_x2", ".2(]>; } let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8,neon" in { diff --git a/clang/include/clang/Basic/riscv_sifive_vector.td b/clang/include/clang/Basic/riscv_sifive_vector.td index f483dfdd34e39..1273ecd27ae87 100644 --- a/clang/include/clang/Basic/riscv_sifive_vector.td +++ b/clang/include/clang/Basic/riscv_sifive_vector.td @@ -300,7 +300,7 @@ multiclass RVVOp0Op1Op2BuiltinSet widens> { foreach widen = widens in - let OverloadedName = NAME, + let OverloadedName = "sf_mm_w" # !cast(widen), TWiden = widen, HasVL = false, Log2LMUL = [3], @@ -313,7 +313,7 @@ multiclass RVVSFMatMulFloatBuiltinSet type_range, int widen> { // Currently the XSfmm spec doesn't support w8. foreach type = type_range in - let OverloadedName = name # !strconcat("_w", !cast(widen)), + let OverloadedName = "sf_mm_w" # !cast(widen), TWiden = widen, HasVL = false, Log2LMUL = [3], @@ -390,10 +390,11 @@ let RequiredFeatures = ["xsfmm32a32f"] in let RequiredFeatures = ["xsfmm32a8f"] in foreach e1 = [5, 4] in foreach e2 = [5, 4] in - let OverloadedName = "sf_mm_e" # e1 # "m" # !sub(7, e1) # "_e" # e2 # "m" # !sub(7, e2) in - defm : RVVSFMatMulFloatBuiltinSet< - "sf_mm_e" # e1 # "m" # !sub(7, e1) # "_e" # e2 # "m" # !sub(7, e2), - "0KzUvUvzzz", "UvUv", ["c"], 4>; + defm : RVVSFMatMulFloatBuiltinSet< + "sf_mm_e" # e1 # "m" # !sub(7, e1) # "_e" # e2 # "m" # !sub(7, e2), + !if(!eq(e1, e2), "0Kzvvzzz", "0KzvAvzzz"), + !if(!eq(e1, e2), "vv", "vAv"), + [!if(!eq(e1, 5), "b", "a")], 4>; let RequiredFeatures = ["xsfmm64a64f"] in defm : RVVSFMatMulFloatBuiltinSet<"sf_mm_f_f", "0Kzvvzzz", "v", ["d"], 1>; diff --git a/clang/include/clang/Basic/riscv_vector_common.td b/clang/include/clang/Basic/riscv_vector_common.td index f683e7137e1f2..739bbdd6f66e6 100644 --- a/clang/include/clang/Basic/riscv_vector_common.td +++ b/clang/include/clang/Basic/riscv_vector_common.td @@ -89,6 +89,9 @@ // of the same width // S: given a vector type, computes its equivalent one for LMUL=1. This is a // no-op if the vector was already LMUL=1 +// A: given a vector type whose element type is one of the OFP8 formats +// (float8e4m3 or float8e5m2), computes the vector type with the +// alternative OFP8 element type (e4m3 <-> e5m2). // (Log2EEW:Value): Log2EEW value could be 3/4/5/6 (8/16/32/64), given a // vector type (SEW and LMUL) and EEW (8/16/32/64), computes its // equivalent integer vector type with EEW and corresponding ELMUL (elmul = diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index e80309c59f2e7..3e474ee300e6c 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -2032,6 +2032,15 @@ def lifetime_safety_max_cfg_blocks "count exceeding this threshold. Specify 0 for no limit.">, MarshallingInfoInt>; +def lifetime_safety_lifetimebound_macro + : Joined<["-"], "lifetime-safety-lifetimebound-macro=">, + Group, + Visibility<[ClangOption, CC1Option]>, + MetaVarName<"">, + HelpText<"Use the given macro name when suggesting lifetimebound " + "attributes">, + MarshallingInfoString>; + defm lifetime_safety_inference : BoolFOption<"lifetime-safety-inference", LangOpts<"EnableLifetimeSafetyInference">, DefaultFalse, @@ -2179,10 +2188,10 @@ def fconstexpr_steps_EQ : Joined<["-"], "fconstexpr-steps=">, Group, Visibility<[ClangOption, CC1Option]>, HelpText<"Set the maximum number of steps in constexpr function evaluation (0 = no limit)">, MarshallingInfoInt, "1048576">; -def fexperimental_new_constant_interpreter : Flag<["-"], "fexperimental-new-constant-interpreter">, Group, - HelpText<"Enable the experimental new constant interpreter">, - Visibility<[ClangOption, CC1Option]>, - MarshallingInfoFlag>; +defm experimental_new_constant_interpreter : BoolFOption<"experimental-new-constant-interpreter", + LangOpts<"EnableNewConstInterp">, Default<"CLANG_USE_EXPERIMENTAL_CONST_INTERP">, + PosFlag, + NegFlag>; def fconstexpr_backtrace_limit_EQ : Joined<["-"], "fconstexpr-backtrace-limit=">, Group, Visibility<[ClangOption, CC1Option]>, HelpText<"Set the maximum number of entries to print in a constexpr evaluation backtrace (0 = no limit)">, diff --git a/clang/include/clang/Support/RISCVVIntrinsicUtils.h b/clang/include/clang/Support/RISCVVIntrinsicUtils.h index 678fe22a66e16..3ea5725057f05 100644 --- a/clang/include/clang/Support/RISCVVIntrinsicUtils.h +++ b/clang/include/clang/Support/RISCVVIntrinsicUtils.h @@ -90,7 +90,7 @@ enum class BaseTypeModifier : uint8_t { }; // Modifier for type, used for both scalar and vector types. -enum class TypeModifier : uint8_t { +enum class TypeModifier : uint16_t { NoModifier = 0, Pointer = 1 << 0, Const = 1 << 1, @@ -105,8 +105,10 @@ enum class TypeModifier : uint8_t { // simple enum, so we decide keek LMUL1 in TypeModifier for code size // optimization of clang binary size. LMUL1 = 1 << 7, - MaxOffset = 7, - LLVM_MARK_AS_BITMASK_ENUM(LMUL1), + // Toggle between the two OFP8 element types (FloatE4M3 <-> FloatE5M2). + AltFP8 = 1 << 8, + MaxOffset = 8, + LLVM_MARK_AS_BITMASK_ENUM(AltFP8), }; class Policy { @@ -175,7 +177,7 @@ struct PrototypeDescriptor { VectorTypeModifier VTM = VectorTypeModifier::NoModifier, TypeModifier TM = TypeModifier::NoModifier) : PT(PT), VTM(VTM), TM(TM) {} - constexpr PrototypeDescriptor(uint8_t PT, uint8_t VTM, uint8_t TM) + constexpr PrototypeDescriptor(uint8_t PT, uint8_t VTM, uint16_t TM) : PT(static_cast(PT)), VTM(static_cast(VTM)), TM(static_cast(TM)) {} diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index e8572afe8f69c..9444eeb0c2ad3 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -6962,9 +6962,6 @@ bool Compiler::compileConstructor(const CXXConstructorDecl *Ctor) { return false; } - if (IsUnion && !this->emitActivate(InitExpr)) - return false; - if (!this->visitInitializerPop(InitExpr)) return false; } else if (const IndirectFieldDecl *IFD = Init->getIndirectMember()) { diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp index e5bf9c0c590ac..b022d71ae1e49 100644 --- a/clang/lib/AST/ByteCode/Interp.cpp +++ b/clang/lib/AST/ByteCode/Interp.cpp @@ -97,9 +97,10 @@ static void noteValueLocation(InterpState &S, const Block *B) { } static void diagnoseNonConstVariable(InterpState &S, CodePtr OpPC, - const ValueDecl *VD); + const ValueDecl *VD, + AccessKinds AK = AK_Read); static bool diagnoseUnknownDecl(InterpState &S, CodePtr OpPC, - const ValueDecl *D) { + const ValueDecl *D, AccessKinds AK = AK_Read) { // This function tries pretty hard to produce a good diagnostic. Just skip // that if nobody will see it anyway. if (!S.diagnosing()) @@ -129,7 +130,7 @@ static bool diagnoseUnknownDecl(InterpState &S, CodePtr OpPC, } if (!D->getType().isConstQualified()) { - diagnoseNonConstVariable(S, OpPC, D); + diagnoseNonConstVariable(S, OpPC, D, AK); } else if (const auto *VD = dyn_cast(D)) { if (!VD->getAnyInitializer()) { diagnoseMissingInitializer(S, OpPC, VD); @@ -143,8 +144,13 @@ static bool diagnoseUnknownDecl(InterpState &S, CodePtr OpPC, return false; } +static bool isModification(AccessKinds AK) { + return AK == AK_Assign || AK == AK_Increment || AK == AK_Decrement || + AK == AK_Construct || AK == AK_Destroy; +} + static void diagnoseNonConstVariable(InterpState &S, CodePtr OpPC, - const ValueDecl *VD) { + const ValueDecl *VD, AccessKinds AK) { if (!S.diagnosing()) return; @@ -168,8 +174,12 @@ static void diagnoseNonConstVariable(InterpState &S, CodePtr OpPC, return; if (VD->getType()->isIntegralOrEnumerationType()) { - S.FFDiag(Loc, diag::note_constexpr_ltor_non_const_int, 1) << VD; - S.Note(VD->getLocation(), diag::note_declared_at); + if (isModification(AK)) { + S.FFDiag(Loc, diag::note_constexpr_modify_global); + } else { + S.FFDiag(Loc, diag::note_constexpr_ltor_non_const_int, 1) << VD; + S.Note(VD->getLocation(), diag::note_declared_at); + } return; } @@ -587,11 +597,44 @@ bool CheckConst(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { // The This pointer is writable in constructors and destructors, // even if isConst() returns true. - if (S.initializingBlock(Ptr.block())) - return true; + for (PtrView V : llvm::reverse(S.InitializingPtrs)) { + if (V.block() != Ptr.block()) + continue; + if (!V.getFieldDesc()->IsConst) { + // If the pointer being initialized is not declared as const, + // Ptr is const because of a parent of V, but that is irrelevant + // since V is being initialized and NOT const. + // This is fine, so return true. + return true; + } + + // We know that Ptr is const because of a parent field and we also + // know that V is explicitly marked const. + // But since V is in InitializingPtrs, the fact that it is const doesn't + // matter and it is writable. + // What we now need to check is whether there is a pointer between Ptr and V + // that is marked const but NOT in InitializingPtrs. If that is the case, + // Ptr is currently not writable. + bool FoundProblem = false; + for (PtrView P = Ptr.view(); P != V; P = P.getBase()) { + if (P.getFieldDesc()->IsConst) { + FoundProblem = true; + break; + } + } + + // We couldn't find any pointer that's explicitly marked const, so + // Ptr is writable right now. + if (!FoundProblem) + return true; + // We only need to find the right block once. + break; + } if (!S.checkingPotentialConstantExpression()) { - const QualType Ty = Ptr.getType(); + QualType Ty = Ptr.getType(); + if (!Ptr.getFieldDesc()->IsConst) + Ty.addConst(); const SourceInfo &Loc = S.Current->getSource(OpPC); S.FFDiag(Loc, diag::note_constexpr_modify_const_type) << Ty; } @@ -1233,7 +1276,7 @@ bool CheckDummy(InterpState &S, CodePtr OpPC, const Block *B, AccessKinds AK) { return false; if (AK == AK_Read || AK == AK_Increment || AK == AK_Decrement) - return diagnoseUnknownDecl(S, OpPC, D); + return diagnoseUnknownDecl(S, OpPC, D, AK); if (AK == AK_Destroy || S.getLangOpts().CPlusPlus14) { const SourceInfo &E = S.Current->getSource(OpPC); @@ -1803,6 +1846,7 @@ bool Call(InterpState &S, CodePtr OpPC, const Function *Func, return false; }; + bool InstancePtrTracked = false; if (Func->hasThisPointer()) { size_t ArgSize = Func->getArgSize() + VarArgSize; size_t ThisOffset = ArgSize - (Func->hasRVO() ? primSize(PT_Ptr) : 0); @@ -1845,7 +1889,8 @@ bool Call(InterpState &S, CodePtr OpPC, const Function *Func, if (Func->isDestructor() && !CheckDestructor(S, OpPC, ThisPtr)) return false; - if (Func->isConstructor() || Func->isDestructor()) + InstancePtrTracked = (Func->isConstructor() || Func->isDestructor()); + if (InstancePtrTracked) S.InitializingPtrs.push_back(ThisPtr.view()); } @@ -1872,7 +1917,7 @@ bool Call(InterpState &S, CodePtr OpPC, const Function *Func, InterpStateCCOverride CCOverride(S, Func->isImmediate()); bool Success = Interpret(S); // Remove initializing block again. - if (Func->isConstructor() || Func->isDestructor()) + if (InstancePtrTracked) S.InitializingPtrs.pop_back(); if (!Success) { diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index b16a34543757b..ab94bb96829ac 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -4441,6 +4441,66 @@ static bool interp__builtin_ia32_gfni_mul(InterpState &S, CodePtr OpPC, return true; } +static bool interp__builtin_ia32_vpdp(InterpState &S, CodePtr OpPC, + const CallExpr *Call, bool IsSaturating) { + assert(Call->getNumArgs() == 3); + + QualType SrcT = Call->getArg(0)->getType(); + QualType OpAT = Call->getArg(1)->getType(); + QualType OpBT = Call->getArg(2)->getType(); + QualType DstT = Call->getType(); + if (!SrcT->isVectorType() || !OpAT->isVectorType() || !OpBT->isVectorType() || + !DstT->isVectorType()) + return false; + + const auto *SrcVecT = SrcT->castAs(); + const auto *OpAVecT = OpAT->castAs(); + const auto *OpBVecT = OpBT->castAs(); + const auto *DstVecT = DstT->castAs(); + + assert(OpAVecT->getNumElements() == OpBVecT->getNumElements()); + + unsigned NumSrcElems = SrcVecT->getNumElements(); + unsigned NumOperandElems = OpAVecT->getNumElements(); + unsigned ElemsPerLane = NumOperandElems / NumSrcElems; + + PrimType SrcElemT = *S.getContext().classify(SrcVecT->getElementType()); + PrimType OpAElemT = *S.getContext().classify(OpAVecT->getElementType()); + PrimType OpBElemT = *S.getContext().classify(OpBVecT->getElementType()); + PrimType DstElemT = *S.getContext().classify(DstVecT->getElementType()); + + assert(SrcElemT == DstElemT); + + const Pointer &OpBPtr = S.Stk.pop(); + const Pointer &OpAPtr = S.Stk.pop(); + const Pointer &SrcPtr = S.Stk.pop(); + const Pointer &Dst = S.Stk.peek(); + + for (unsigned I = 0; I != NumSrcElems; ++I) { + APSInt Acc; + INT_TYPE_SWITCH_NO_BOOL(SrcElemT, { Acc = SrcPtr.elem(I).toAPSInt(); }); + Acc = Acc.sext(64); + for (unsigned J = 0; J != ElemsPerLane; ++J) { + APSInt OpA, OpB; + INT_TYPE_SWITCH_NO_BOOL( + OpAElemT, { OpA = OpAPtr.elem(ElemsPerLane * I + J).toAPSInt(); }); + INT_TYPE_SWITCH_NO_BOOL( + OpBElemT, { OpB = OpBPtr.elem(ElemsPerLane * I + J).toAPSInt(); }); + OpA = APSInt(OpA.extend(64), false); + OpB = APSInt(OpB.extend(64), false); + Acc += OpA * OpB; + } + if (IsSaturating) + Acc = APSInt(Acc.truncSSat(32), false); + else + Acc = APSInt(Acc.trunc(32), false); + INT_TYPE_SWITCH_NO_BOOL(DstElemT, + { Dst.elem(I) = static_cast(Acc); }); + } + Dst.initializeAllElements(); + return true; +} + bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, uint32_t BuiltinID) { if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID)) @@ -5116,11 +5176,13 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case clang::X86::BI__builtin_ia32_pdep_si: case clang::X86::BI__builtin_ia32_pdep_di: + case Builtin::BI__builtin_elementwise_pdep: return interp__builtin_elementwise_int_binop(S, OpPC, Call, llvm::APIntOps::expandBits); case clang::X86::BI__builtin_ia32_pext_si: case clang::X86::BI__builtin_ia32_pext_di: + case Builtin::BI__builtin_elementwise_pext: return interp__builtin_elementwise_int_binop(S, OpPC, Call, llvm::APIntOps::compressBits); @@ -6503,7 +6565,20 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return EvalScalarMinMaxFp(A, B, RoundingMode, /*IsMin=*/false); }, /*IsScalar=*/true); - + case X86::BI__builtin_ia32_vpdpwssd128: + case X86::BI__builtin_ia32_vpdpwssd256: + case X86::BI__builtin_ia32_vpdpwssd512: + case X86::BI__builtin_ia32_vpdpbusd128: + case X86::BI__builtin_ia32_vpdpbusd256: + case X86::BI__builtin_ia32_vpdpbusd512: + return interp__builtin_ia32_vpdp(S, OpPC, Call, false); + case X86::BI__builtin_ia32_vpdpwssds128: + case X86::BI__builtin_ia32_vpdpwssds256: + case X86::BI__builtin_ia32_vpdpwssds512: + case X86::BI__builtin_ia32_vpdpbusds128: + case X86::BI__builtin_ia32_vpdpbusds256: + case X86::BI__builtin_ia32_vpdpbusds512: + return interp__builtin_ia32_vpdp(S, OpPC, Call, true); default: S.FFDiag(S.Current->getLocation(OpPC), diag::note_invalid_subexpr_in_const_expr) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 6ac16c2b831d2..220a7a22c6992 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12407,6 +12407,45 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(V, E); }; + auto EvalVectorDotProduct = [&](bool IsSaturating) -> bool { + APValue Source, OperandA, OperandB; + if (!EvaluateVector(E->getArg(0), Source, Info) || + !EvaluateVector(E->getArg(1), OperandA, Info) || + !EvaluateVector(E->getArg(2), OperandB, Info)) { + return false; + } + + unsigned NumSrcElems = Source.getVectorLength(); + unsigned NumOperandElems = OperandA.getVectorLength(); + unsigned ElemsPerLane = NumOperandElems / NumSrcElems; + + assert(OperandA.getVectorLength() == OperandB.getVectorLength()); + + SmallVector Result; + Result.reserve(NumSrcElems); + for (unsigned I = 0; I != NumSrcElems; ++I) { + APSInt DotProduct = Source.getVectorElt(I).getInt(); + DotProduct = DotProduct.extend(64); + for (unsigned J = 0; J != ElemsPerLane; ++J) { + APSInt OpA = APSInt( + OperandA.getVectorElt(ElemsPerLane * I + J).getInt().extend(64), + false); + APSInt OpB = APSInt( + OperandB.getVectorElt(ElemsPerLane * I + J).getInt().extend(64), + false); + DotProduct += OpA * OpB; + } + if (IsSaturating) { + DotProduct = APSInt(DotProduct.truncSSat(32), false); + } else { + DotProduct = APSInt(DotProduct.trunc(32), false); + } + Result.push_back(APValue(DotProduct)); + } + + return Success(APValue(Result.data(), Result.size()), E); + }; + switch (E->getBuiltinCallee()) { default: return false; @@ -14123,6 +14162,10 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { } case Builtin::BI__builtin_elementwise_clmul: return EvaluateBinOpExpr(llvm::APIntOps::clmul); + case Builtin::BI__builtin_elementwise_pext: + return EvaluateBinOpExpr(llvm::APIntOps::compressBits); + case Builtin::BI__builtin_elementwise_pdep: + return EvaluateBinOpExpr(llvm::APIntOps::expandBits); case Builtin::BI__builtin_elementwise_fshl: case Builtin::BI__builtin_elementwise_fshr: { APValue SourceHi, SourceLo, SourceShift; @@ -14771,6 +14814,20 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return false; return Success(R, E); } + case X86::BI__builtin_ia32_vpdpwssd128: + case X86::BI__builtin_ia32_vpdpwssd256: + case X86::BI__builtin_ia32_vpdpwssd512: + case X86::BI__builtin_ia32_vpdpbusd128: + case X86::BI__builtin_ia32_vpdpbusd256: + case X86::BI__builtin_ia32_vpdpbusd512: + return EvalVectorDotProduct(false); + case X86::BI__builtin_ia32_vpdpwssds128: + case X86::BI__builtin_ia32_vpdpwssds256: + case X86::BI__builtin_ia32_vpdpwssds512: + case X86::BI__builtin_ia32_vpdpbusds128: + case X86::BI__builtin_ia32_vpdpbusds256: + case X86::BI__builtin_ia32_vpdpbusds512: + return EvalVectorDotProduct(true); } } @@ -17923,7 +17980,8 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, } case clang::X86::BI__builtin_ia32_pdep_si: - case clang::X86::BI__builtin_ia32_pdep_di: { + case clang::X86::BI__builtin_ia32_pdep_di: + case Builtin::BI__builtin_elementwise_pdep: { APSInt Val, Msk; if (!EvaluateInteger(E->getArg(0), Val, Info) || !EvaluateInteger(E->getArg(1), Msk, Info)) @@ -17932,7 +17990,8 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, } case clang::X86::BI__builtin_ia32_pext_si: - case clang::X86::BI__builtin_ia32_pext_di: { + case clang::X86::BI__builtin_ia32_pext_di: + case Builtin::BI__builtin_elementwise_pext: { APSInt Val, Msk; if (!EvaluateInteger(E->getArg(0), Val, Info) || !EvaluateInteger(E->getArg(1), Msk, Info)) diff --git a/clang/lib/Analysis/FlowSensitive/FormulaSerialization.cpp b/clang/lib/Analysis/FlowSensitive/FormulaSerialization.cpp index 4fe2231b9ff65..5808d6708b44d 100644 --- a/clang/lib/Analysis/FlowSensitive/FormulaSerialization.cpp +++ b/clang/lib/Analysis/FlowSensitive/FormulaSerialization.cpp @@ -203,9 +203,8 @@ parseFormula(llvm::StringRef Str, Arena &A, return F.takeError(); if (!Str.empty()) return llvm::createStringError(llvm::inconvertibleErrorCode(), - ("unexpected suffix of length: " + - llvm::Twine(Str.size() - OriginalSize)) - .str()); + "unexpected suffix of length: " + + llvm::Twine(Str.size() - OriginalSize)); return F; } diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp index 545836cd76fb9..3861117005752 100644 --- a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp +++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp @@ -342,6 +342,20 @@ void FactsGenerator::VisitCastExpr(const CastExpr *CE) { if (Src && Dest && Dest->getLength() == Src->getLength()) flow(Dest, Src, /*Kill=*/true); return; + case CK_LValueToRValueBitCast: + case CK_NonAtomicToAtomic: + case CK_AtomicToNonAtomic: { + // `__builtin_bit_cast`/`std::bit_cast` of a pointer, and + // wrapping/unwrapping `_Atomic(T*)`, preserve the pointer value, so + // propagate the borrow. The operand may be a glvalue, so strip its outer + // lvalue level first. A bit-cast that materializes a pointer from a + // non-pointer representation has no matching source origin and is + // untracked. + OriginList *RVSrc = getRValueOrigins(SubExpr, Src); + if (RVSrc && Dest->getLength() == RVSrc->getLength()) + flow(Dest, RVSrc, /*Kill=*/true); + return; + } default: return; } @@ -372,6 +386,21 @@ void FactsGenerator::VisitUnaryOperator(const UnaryOperator *UO) { killAndFlowOrigin(*UO, *SubExpr); return; } + case UO_PreInc: + case UO_PostInc: + case UO_PreDec: + case UO_PostDec: { + // Inc/dec keeps a pointer in the same allocation, so the result carries the + // operand's loans. Peel the operand's storage origin when the *result* is a + // prvalue (post-inc/dec, or any form in C) -- the inverse of + // getRValueOrigins, which peels when its own argument is a glvalue. + if (!UO->getType()->isPointerType()) + return; + OriginList *SubList = getOriginsList(*UO->getSubExpr()); + flow(getOriginsList(*UO), + UO->isGLValue() ? SubList : SubList->peelOuterOrigin(), /*Kill=*/true); + return; + } default: return; } @@ -472,8 +501,17 @@ void FactsGenerator::VisitBinaryOperator(const BinaryOperator *BO) { killAndFlowOrigin(*BO, *BO->getRHS()); return; } - if (BO->isCompoundAssignmentOp()) + if (BO->isCompoundAssignmentOp()) { + // A pointer compound additive assignment (`p += n`) carries the LHS's loans + // like inc/dec above; in C the result is a prvalue, so peel its outer + // (storage) origin. + if (BO->getType()->isPointerType()) { + OriginList *LHSList = getOriginsList(*BO->getLHS()); + flow(getOriginsList(*BO), IsCMode ? LHSList->peelOuterOrigin() : LHSList, + /*Kill=*/true); + } return; + } if (BO->getType()->isPointerType() && BO->isAdditiveOp()) handlePointerArithmetic(BO); handleUse(BO->getRHS()); diff --git a/clang/lib/Analysis/LifetimeSafety/Origins.cpp b/clang/lib/Analysis/LifetimeSafety/Origins.cpp index 3ff4823ca88a6..c837f246fa17b 100644 --- a/clang/lib/Analysis/LifetimeSafety/Origins.cpp +++ b/clang/lib/Analysis/LifetimeSafety/Origins.cpp @@ -106,6 +106,10 @@ bool OriginManager::hasOrigins(QualType QT, bool IntrinsicOnly) const { if (!IntrinsicOnly && LifetimeAnnotatedOriginTypes.contains(QT.getCanonicalType().getTypePtr())) return true; + // An `_Atomic(T)` wraps T transparently for lifetime purposes (the atomic + // holds the same value); see through it. + if (const auto *AT = QT->getAs()) + return hasOrigins(AT->getValueType(), IntrinsicOnly); const auto *RD = QT->getAsCXXRecordDecl(); if (!RD) return false; @@ -194,6 +198,9 @@ OriginList *OriginManager::createSingleOriginList(OriginID OID) { template OriginList *OriginManager::buildListForType(QualType QT, const T *Node) { assert(hasOrigins(QT) && "buildListForType called for non-pointer type"); + // `_Atomic(T)` is transparent for lifetime purposes: build the node for T. + if (const auto *AT = QT->getAs()) + return buildListForType(AT->getValueType(), Node); OriginList *Head = createNode(Node, QT); if (QT->isPointerOrReferenceType()) { diff --git a/clang/lib/Basic/TargetID.cpp b/clang/lib/Basic/TargetID.cpp index 6d9ba55ccd9d7..67f429607ef27 100644 --- a/clang/lib/Basic/TargetID.cpp +++ b/clang/lib/Basic/TargetID.cpp @@ -32,7 +32,8 @@ getAllPossibleAMDGPUTargetIDFeatures(const llvm::Triple &T, : llvm::AMDGPU::getArchAttrR600(ProcKind); if (Features & llvm::AMDGPU::FEATURE_SRAMECC) Ret.push_back("sramecc"); - if (Features & llvm::AMDGPU::FEATURE_XNACK) + // Only allow xnack in target ID if the processor supports on/off modes. + if (Features & llvm::AMDGPU::FEATURE_XNACK_ON_OFF_MODES) Ret.push_back("xnack"); return Ret; } diff --git a/clang/lib/Basic/Targets/Hexagon.cpp b/clang/lib/Basic/Targets/Hexagon.cpp index 9bf34e67a03fd..615114f0fd1ea 100644 --- a/clang/lib/Basic/Targets/Hexagon.cpp +++ b/clang/lib/Basic/Targets/Hexagon.cpp @@ -116,6 +116,9 @@ void HexagonTargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2"); Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4"); Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8"); + + if (Opts.CPlusPlus && getTriple().getOS() == llvm::Triple::UnknownOS) + Builder.defineMacro("_GNU_SOURCE"); } bool HexagonTargetInfo::initFeatureMap( diff --git a/clang/lib/Basic/Targets/OSTargets.h b/clang/lib/Basic/Targets/OSTargets.h index 943373c20af32..9461680df8bdb 100644 --- a/clang/lib/Basic/Targets/OSTargets.h +++ b/clang/lib/Basic/Targets/OSTargets.h @@ -1092,6 +1092,8 @@ class LLVM_LIBRARY_VISIBILITY QURTTargetInfo : public OSTargetInfo { void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple, MacroBuilder &Builder) const override { Builder.defineMacro("__qurt__"); + if (Opts.CPlusPlus) + Builder.defineMacro("_GNU_SOURCE"); } public: @@ -1105,6 +1107,8 @@ class LLVM_LIBRARY_VISIBILITY H2TargetInfo : public OSTargetInfo { void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple, MacroBuilder &Builder) const override { Builder.defineMacro("__h2__"); + if (Opts.CPlusPlus) + Builder.defineMacro("_GNU_SOURCE"); } public: diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index b52e978fa0f4d..8b077620d2bab 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -959,7 +959,17 @@ static mlir::Value emitCommonNeonBuiltinExpr( case NEON::BI__builtin_neon_vshlq_n_v: return emitCommonNeonShift(cgf.getBuilder(), loc, vTy, ops[0], ops[1], /*shiftLeft=*/true); - case NEON::BI__builtin_neon_vshll_n_v: + case NEON::BI__builtin_neon_vshll_n_v: { + CIRGenBuilderTy &builder = cgf.getBuilder(); + cir::VectorType narrowVecTy = + builder.getExtendedOrTruncatedElementVectorType(vTy, + /*isExtended=*/false, + /*isSigned=*/!usgn); + mlir::Value src = builder.createBitcast(ops[0], narrowVecTy); + mlir::Value extended = builder.createIntCast(src, vTy); + return emitCommonNeonShift(builder, loc, vTy, extended, ops[1], + /*shiftLeft=*/true); + } case NEON::BI__builtin_neon_vshrn_n_v: cgf.cgm.errorNYI(expr->getSourceRange(), std::string("unimplemented AArch64 builtin call: ") + @@ -1474,22 +1484,6 @@ CIRGenFunction::emitAArch64SMEBuiltinExpr(unsigned builtinID, // Some intrinsics are equivalent for codegen. static const std::pair neonEquivalentIntrinsicMap[] = { - { - NEON::BI__builtin_neon_splat_lane_bf16, - NEON::BI__builtin_neon_splat_lane_v, - }, - { - NEON::BI__builtin_neon_splat_laneq_bf16, - NEON::BI__builtin_neon_splat_laneq_v, - }, - { - NEON::BI__builtin_neon_splatq_lane_bf16, - NEON::BI__builtin_neon_splatq_lane_v, - }, - { - NEON::BI__builtin_neon_splatq_laneq_bf16, - NEON::BI__builtin_neon_splatq_laneq_v, - }, { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, @@ -1602,40 +1596,6 @@ static const std::pair neonEquivalentIntrinsicMap[] = { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, }, - {NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v}, - {NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v}, - {NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v}, - {NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v}, - {NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v}, - {NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v}, - {NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v}, - {NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v}, - {NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v}, - {NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v}, - {NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v}, - {NEON::BI__builtin_neon_vld1q_lane_bf16, - NEON::BI__builtin_neon_vld1q_lane_v}, - {NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v}, - {NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v}, - {NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v}, - {NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v}, - {NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v}, - {NEON::BI__builtin_neon_vld2q_lane_bf16, - NEON::BI__builtin_neon_vld2q_lane_v}, - {NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v}, - {NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v}, - {NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v}, - {NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v}, - {NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v}, - {NEON::BI__builtin_neon_vld3q_lane_bf16, - NEON::BI__builtin_neon_vld3q_lane_v}, - {NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v}, - {NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v}, - {NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v}, - {NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v}, - {NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v}, - {NEON::BI__builtin_neon_vld4q_lane_bf16, - NEON::BI__builtin_neon_vld4q_lane_v}, { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, @@ -1812,32 +1772,6 @@ static const std::pair neonEquivalentIntrinsicMap[] = { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, }, - {NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v}, - {NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v}, - {NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v}, - {NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v}, - {NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v}, - {NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v}, - {NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v}, - {NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v}, - {NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v}, - {NEON::BI__builtin_neon_vst1q_lane_bf16, - NEON::BI__builtin_neon_vst1q_lane_v}, - {NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v}, - {NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v}, - {NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v}, - {NEON::BI__builtin_neon_vst2q_lane_bf16, - NEON::BI__builtin_neon_vst2q_lane_v}, - {NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v}, - {NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v}, - {NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v}, - {NEON::BI__builtin_neon_vst3q_lane_bf16, - NEON::BI__builtin_neon_vst3q_lane_v}, - {NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v}, - {NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v}, - {NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v}, - {NEON::BI__builtin_neon_vst4q_lane_bf16, - NEON::BI__builtin_neon_vst4q_lane_v}, // The mangling rules cause us to have one ID for each type for // vldap1(q)_lane and vstl1(q)_lane, but codegen is equivalent for all of // them. Choose an arbitrary one to be handled as tha canonical variation. diff --git a/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp b/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp index 44f481508cd7e..c53453bd3e88b 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp @@ -258,6 +258,36 @@ class AggExprEmitter : public StmtVisitor { break; } + + case CK_NonAtomicToAtomic: + case CK_AtomicToNonAtomic: { + bool isToAtomic = (e->getCastKind() == CK_NonAtomicToAtomic); + if (!isToAtomic) { + cgf.cgm.errorNYI(e->getSourceRange(), + "AggExprEmitter: CK_AtomicToNonAtomic"); + return; + } + + // Determine the atomic and value types. + QualType atomicType = e->getSubExpr()->getType(); + QualType valueType = e->getType(); + if (isToAtomic) + std::swap(atomicType, valueType); + + assert(atomicType->isAtomicType()); + assert(cgf.getContext().hasSameUnqualifiedType( + valueType, atomicType->castAs()->getValueType())); + + // Just recurse normally if we're ignoring the result or the + // atomic type doesn't change representation. + if (dest.isIgnored() || !cgf.cgm.isPaddedAtomicType(atomicType)) + return Visit(e->getSubExpr()); + + cgf.cgm.errorNYI( + e->getSourceRange(), + "AggExprEmitter: AtomicCast not ignored and has padded atomic type"); + return; + } case CK_LValueToRValue: // If we're loading from a volatile type, force the destination // into existence. diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h index fa166c1f39b69..0883a6b891c61 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.h +++ b/clang/lib/CIR/CodeGen/CIRGenModule.h @@ -823,6 +823,9 @@ class CIRGenModule : public CIRGenTypeCache { return *openMPRuntime; } + bool isPaddedAtomicType(QualType type); + bool isPaddedAtomicType(const AtomicType *type); + mlir::IntegerAttr getSize(CharUnits size) { return builder.getSizeFromCharUnits(size); } diff --git a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp index ea37d82c82d79..3170666304a06 100644 --- a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp @@ -226,6 +226,15 @@ static bool isSafeToConvert(const RecordDecl *rd, CIRGenTypes &cgt) { return isSafeToConvert(rd, cgt, alreadyChecked); } +bool CIRGenModule::isPaddedAtomicType(QualType type) { + return isPaddedAtomicType(type->castAs()); +} + +bool CIRGenModule::isPaddedAtomicType(const AtomicType *type) { + return astContext.getTypeSize(type) != + astContext.getTypeSize(type->getValueType()); +} + /// Lay out a tagged decl type like struct or union. mlir::Type CIRGenTypes::convertRecordDeclType(const clang::RecordDecl *rd) { // TagDecl's are not necessarily unique, instead use the (clang) type diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 509ab4245d99a..475bfec6199fc 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -4546,6 +4546,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_elementwise_clmul: return RValue::get( emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::clmul)); + case Builtin::BI__builtin_elementwise_pext: + return RValue::get( + emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::pext)); + case Builtin::BI__builtin_elementwise_pdep: + return RValue::get( + emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::pdep)); case Builtin::BI__builtin_elementwise_add_sat: case Builtin::BI__builtin_elementwise_sub_sat: { diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp index 0dc2e0bb82114..ebbc0addfed2c 100644 --- a/clang/lib/CodeGen/CGExprCXX.cpp +++ b/clang/lib/CodeGen/CGExprCXX.cpp @@ -277,22 +277,27 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( } } - LValue This; - if (IsArrow) { - LValueBaseInfo BaseInfo; - TBAAAccessInfo TBAAInfo; - Address ThisValue = EmitPointerWithAlignment(Base, &BaseInfo, &TBAAInfo); - This = MakeAddrLValue(ThisValue, Base->getType()->getPointeeType(), - BaseInfo, TBAAInfo); - } else { - This = EmitLValue(Base); - } + auto getLValueForThis = [this, IsArrow, + Base](bool EmitCheckedForStore = false) { + // FIXME: Respect EmitCheckedForStore for the IsArrow case. + if (IsArrow) { + LValueBaseInfo BaseInfo; + TBAAAccessInfo TBAAInfo; + Address ThisValue = EmitPointerWithAlignment(Base, &BaseInfo, &TBAAInfo); + return MakeAddrLValue(ThisValue, Base->getType()->getPointeeType(), + BaseInfo, TBAAInfo); + } + if (EmitCheckedForStore) + return EmitCheckedLValue(Base, TCK_Store); + return EmitLValue(Base); + }; if (const CXXConstructorDecl *Ctor = dyn_cast(MD)) { // This is the MSVC p->Ctor::Ctor(...) extension. We assume that's // constructing a new complete object of type Ctor. assert(!RtlArgs); assert(ReturnValue.isNull() && "Constructor shouldn't have return value"); + LValue This = getLValueForThis(); CallArgList Args; commonEmitCXXMemberOrOperatorCall( *this, {Ctor, Ctor_Complete}, This.getPointer(*this), @@ -307,17 +312,22 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( } if (TrivialForCodegen) { - if (isa(MD)) + if (isa(MD)) { + (void)getLValueForThis(); // Emit LHS for side effects. return RValue::get(nullptr); + } if (TrivialAssignment) { // We don't like to generate the trivial copy/move assignment operator // when it isn't necessary; just produce the proper effect here. - // It's important that we use the result of EmitLValue here rather than - // emitting call arguments, in order to preserve TBAA information from - // the RHS. - LValue RHS = isa(CE) ? TrivialAssignmentRHS - : EmitLValue(*CE->arg_begin()); + LValue This = getLValueForThis(/*EmitCheckedForStore=*/true); + + // It's important that we use the result of EmitCheckedLValue here rather + // than emitting call arguments, in order to preserve TBAA information + // from the RHS. + LValue RHS = isa(CE) + ? TrivialAssignmentRHS + : EmitCheckedLValue(*CE->arg_begin(), TCK_Load); EmitAggregateAssign(This, RHS, CE->getType()); return RValue::get(This.getPointer(*this)); } @@ -356,6 +366,7 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( SkippedChecks.set(SanitizerKind::Null, true); } + LValue This = getLValueForThis(); if (sanitizePerformTypeCheck()) EmitTypeCheck(CodeGenFunction::TCK_MemberCall, CallLoc, This.emitRawPointer(*this), diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp index 4ecab6ba79553..ab55b2281fd06 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.cpp +++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp @@ -1522,10 +1522,10 @@ static void initializeBuffer(CodeGenModule &CGM, llvm::GlobalVariable *GV, ArrayRef Args) { LLVMContext &Ctx = CGM.getLLVMContext(); - llvm::Function *InitResFunc = llvm::Function::Create( - llvm::FunctionType::get(CGM.VoidTy, false), - llvm::GlobalValue::InternalLinkage, - ("_init_buffer_" + GV->getName()).str(), CGM.getModule()); + llvm::Function *InitResFunc = + llvm::Function::Create(llvm::FunctionType::get(CGM.VoidTy, false), + llvm::GlobalValue::InternalLinkage, + "_init_buffer_" + GV->getName(), CGM.getModule()); InitResFunc->addFnAttr(llvm::Attribute::AlwaysInline); llvm::BasicBlock *EntryBB = diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp index dd355821fe5ff..aa32bc2a1d5a7 100644 --- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp @@ -858,10 +858,6 @@ static const ARMNeonVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = { // Some intrinsics are equivalent for codegen. static const std::pair NEONEquivalentIntrinsicMap[] = { - { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, }, - { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, }, - { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, }, - { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, }, { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, }, { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, }, { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, }, @@ -890,36 +886,6 @@ static const std::pair NEONEquivalentIntrinsicMap[] = { { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, }, { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, }, { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, }, - { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v }, - { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v }, - { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v }, - { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v }, - { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v }, - { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v }, - { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v }, - { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v }, - { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v }, - { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v }, - { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v }, - { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v }, - { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v }, - { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v }, - { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v }, - { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v }, - { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v }, - { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v }, - { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v }, - { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v }, - { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v }, - { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v }, - { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v }, - { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v }, - { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v }, - { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v }, - { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v }, - { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v }, - { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v }, - { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v }, { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, }, { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, }, { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, }, @@ -964,28 +930,6 @@ static const std::pair NEONEquivalentIntrinsicMap[] = { { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, }, { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, }, { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, }, - { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v }, - { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v }, - { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v }, - { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v }, - { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v }, - { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v }, - { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v }, - { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v }, - { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v }, - { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v }, - { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v }, - { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v }, - { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v }, - { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v }, - { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v }, - { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v }, - { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v }, - { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v }, - { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v }, - { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v }, - { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v }, - { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v }, // The mangling rules cause us to have one ID for each type for vldap1(q)_lane // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an // arbitrary one to be handled as tha canonical variation. diff --git a/clang/lib/CodeGen/TargetBuiltins/X86.cpp b/clang/lib/CodeGen/TargetBuiltins/X86.cpp index acfeb9967cd2f..50125a71fcd5f 100644 --- a/clang/lib/CodeGen/TargetBuiltins/X86.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/X86.cpp @@ -976,6 +976,16 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType()); return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); } + case X86::BI__builtin_ia32_pdep_si: + case X86::BI__builtin_ia32_pdep_di: { + Function *F = CGM.getIntrinsic(Intrinsic::pdep, Ops[0]->getType()); + return Builder.CreateCall(F, Ops); + } + case X86::BI__builtin_ia32_pext_si: + case X86::BI__builtin_ia32_pext_di: { + Function *F = CGM.getIntrinsic(Intrinsic::pext, Ops[0]->getType()); + return Builder.CreateCall(F, Ops); + } case X86::BI__builtin_ia32_undef128: case X86::BI__builtin_ia32_undef256: case X86::BI__builtin_ia32_undef512: diff --git a/clang/lib/CodeGen/Targets/RISCV.cpp b/clang/lib/CodeGen/Targets/RISCV.cpp index ffe1cc6086215..bc3be06d176bc 100644 --- a/clang/lib/CodeGen/Targets/RISCV.cpp +++ b/clang/lib/CodeGen/Targets/RISCV.cpp @@ -67,6 +67,11 @@ class RISCVABIInfo : public DefaultABIInfo { CharUnits Field2Off) const; ABIArgInfo coerceVLSVector(QualType Ty, unsigned ABIVLen = 0) const; + // Some unsupported type e.g. bf16 without zvfbfmin or zvfbfa, should be + // passed as same size i8 type. This function check and return the appropriate + // fixed vector type. + llvm::FixedVectorType * + getVLSCCCompatibleType(llvm::FixedVectorType *FixedVecTy) const; using ABIInfo::appendAttributeMangling; void appendAttributeMangling(TargetClonesAttr *Attr, unsigned Index, @@ -495,10 +500,10 @@ llvm::Type *RISCVABIInfo::detectVLSCCEligibleStruct(QualType Ty, // Turn them into scalable vector type or vector tuple type if legal. if (NumElts == 1) { // Handle single fixed-length vector. + llvm::FixedVectorType *VLSTy = getVLSCCCompatibleType(FixedVecTy); return llvm::ScalableVectorType::get( - FixedVecTy->getElementType(), - llvm::divideCeil(FixedVecTy->getNumElements() * - llvm::RISCV::RVVBitsPerBlock, + VLSTy->getElementType(), + llvm::divideCeil(VLSTy->getNumElements() * llvm::RISCV::RVVBitsPerBlock, ABIVLen)); } @@ -520,6 +525,23 @@ llvm::Type *RISCVABIInfo::detectVLSCCEligibleStruct(QualType Ty, NumElts); } +llvm::FixedVectorType * +RISCVABIInfo::getVLSCCCompatibleType(llvm::FixedVectorType *FixedVecTy) const { + llvm::Type *EltType = FixedVecTy->getElementType(); + const TargetInfo &TI = getContext().getTargetInfo(); + if ((EltType->isHalfTy() && !TI.hasFeature("zvfhmin")) || + (EltType->isBFloatTy() && + !(TI.hasFeature("zvfbfmin") || TI.hasFeature("experimental-zvfbfa"))) || + (EltType->isFloatTy() && !TI.hasFeature("zve32f")) || + (EltType->isDoubleTy() && !TI.hasFeature("zve64d")) || + (EltType->isIntegerTy(64) && !TI.hasFeature("zve64x")) || + EltType->isIntegerTy(128)) + return llvm::FixedVectorType::get(llvm::Type::getInt8Ty(getVMContext()), + FixedVecTy->getNumElements() * + EltType->getScalarSizeInBits() / 8); + return FixedVecTy; +} + // Fixed-length RVV vectors are represented as scalable vectors in function // args/return and must be coerced from fixed vectors. ABIArgInfo RISCVABIInfo::coerceVLSVector(QualType Ty, unsigned ABIVLen) const { @@ -569,27 +591,12 @@ ABIArgInfo RISCVABIInfo::coerceVLSVector(QualType Ty, unsigned ABIVLen) const { // Generic vector // The number of elements needs to be at least 1. + llvm::FixedVectorType *VLSTy = + getVLSCCCompatibleType(llvm::FixedVectorType::get(EltType, NumElts)); ResType = llvm::ScalableVectorType::get( - EltType, - llvm::divideCeil(NumElts * llvm::RISCV::RVVBitsPerBlock, ABIVLen)); - - // If the corresponding extension is not supported, just make it an i8 - // vector with same LMUL. - const TargetInfo &TI = getContext().getTargetInfo(); - if ((EltType->isHalfTy() && !TI.hasFeature("zvfhmin")) || - (EltType->isBFloatTy() && !(TI.hasFeature("zvfbfmin") || - TI.hasFeature("experimental-zvfbfa"))) || - (EltType->isFloatTy() && !TI.hasFeature("zve32f")) || - (EltType->isDoubleTy() && !TI.hasFeature("zve64d")) || - (EltType->isIntegerTy(64) && !TI.hasFeature("zve64x")) || - EltType->isIntegerTy(128)) { - // The number of elements needs to be at least 1. - ResType = llvm::ScalableVectorType::get( - llvm::Type::getInt8Ty(getVMContext()), - llvm::divideCeil(EltType->getScalarSizeInBits() * NumElts * - llvm::RISCV::RVVBitsPerBlock, - 8 * ABIVLen)); - } + VLSTy->getElementType(), + llvm::divideCeil(VLSTy->getNumElements() * llvm::RISCV::RVVBitsPerBlock, + ABIVLen)); } return ABIArgInfo::getDirect(ResType); @@ -826,11 +833,16 @@ llvm::Value *RISCVABIInfo::createCoercedLoad(Address Src, const ABIArgInfo &AI, for (unsigned i = 0; i < NumElts; ++i) { // Extract from struct llvm::Value *ExtractFromLoad = CGF.Builder.CreateExtractValue(Load, i); + auto *FixedVecTy = + cast(ExtractFromLoad->getType()); + llvm::FixedVectorType *VLSTy = getVLSCCCompatibleType(FixedVecTy); + if (VLSTy != FixedVecTy) + ExtractFromLoad = CGF.Builder.CreateBitCast(ExtractFromLoad, VLSTy); // Element in vector tuple type is always i8, so we need to cast back to // it's original element type. EltTy = cast(llvm::VectorType::getWithSizeAndScalar( - cast(EltTy), ExtractFromLoad->getType())); + cast(EltTy), VLSTy)); llvm::Value *VectorVal = llvm::PoisonValue::get(EltTy); // Insert to scalable vector VectorVal = CGF.Builder.CreateInsertVector( @@ -863,9 +875,11 @@ llvm::Value *RISCVABIInfo::createCoercedLoad(Address Src, const ABIArgInfo &AI, if (auto *ArrayTy = dyn_cast(SrcTy)) SrcTy = ArrayTy->getElementType(); Src = Src.withElementType(SrcTy); - [[maybe_unused]] auto *FixedSrcTy = cast(SrcTy); - assert(ScalableDstTy->getElementType() == FixedSrcTy->getElementType()); - auto *Load = CGF.Builder.CreateLoad(Src); + auto *FixedSrcTy = cast(SrcTy); + llvm::Value *Load = CGF.Builder.CreateLoad(Src); + llvm::FixedVectorType *VLSTy = getVLSCCCompatibleType(FixedSrcTy); + if (VLSTy != FixedSrcTy) + Load = CGF.Builder.CreateBitCast(Load, VLSTy); auto *VectorVal = llvm::PoisonValue::get(ScalableDstTy); llvm::Value *Result = CGF.Builder.CreateInsertVector( ScalableDstTy, VectorVal, Load, uint64_t(0), "cast.scalable"); @@ -906,21 +920,26 @@ void RISCVABIInfo::createCoercedStore(llvm::Value *Val, Address Dst, FixedVecTy = ArrayTy->getArrayElementType(); } + llvm::FixedVectorType *VLSTy = + getVLSCCCompatibleType(cast(FixedVecTy)); + // Perform extract element and store for (unsigned i = 0; i < NumElts; ++i) { // Element in vector tuple type is always i8, so we need to cast back // to it's original element type. EltTy = cast(llvm::VectorType::getWithSizeAndScalar( - cast(EltTy), FixedVecTy)); + cast(EltTy), VLSTy)); // Extract scalable vector from tuple llvm::Value *Idx = CGF.Builder.getInt32(i); auto *TupleElement = CGF.Builder.CreateIntrinsic( llvm::Intrinsic::riscv_tuple_extract, {EltTy, TupTy}, {Val, Idx}); // Extract fixed vector from scalable vector - auto *ExtractVec = CGF.Builder.CreateExtractVector( - FixedVecTy, TupleElement, uint64_t(0)); + llvm::Value *ExtractVec = + CGF.Builder.CreateExtractVector(VLSTy, TupleElement, uint64_t(0)); + if (VLSTy != FixedVecTy) + ExtractVec = CGF.Builder.CreateBitCast(ExtractVec, FixedVecTy); // Store fixed vector to corresponding address Address EltPtr = Address::invalid(); if (Dst.getElementType()->isStructTy()) @@ -952,8 +971,12 @@ void RISCVABIInfo::createCoercedStore(llvm::Value *Val, Address Dst, assert(ArrayTy->getNumElements() == 1); EltTy = ArrayTy->getElementType(); } - auto *Coerced = CGF.Builder.CreateExtractVector( - cast(EltTy), Val, uint64_t(0)); + auto *FixedVecTy = cast(EltTy); + llvm::FixedVectorType *VLSTy = getVLSCCCompatibleType(FixedVecTy); + llvm::Value *Coerced = + CGF.Builder.CreateExtractVector(VLSTy, Val, uint64_t(0)); + if (VLSTy != FixedVecTy) + Coerced = CGF.Builder.CreateBitCast(Coerced, FixedVecTy); auto *I = CGF.Builder.CreateStore(Coerced, Dst, DestIsVolatile); CGF.addInstToCurrentSourceAtom(I, Val); } diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 8f189ea189408..d153e99d37280 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -6503,7 +6503,7 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA, StringRef FcValue = C.getArgs().getLastArgValue(options::OPT_dxc_Fc); // TODO: Should we use `MakeCLOutputFilename` here? If so, we can probably // handle this as part of the SLASH_Fa handling below. - return C.addResultFile(C.getArgs().MakeArgString(FcValue.str()), &JA); + return C.addResultFile(C.getArgs().MakeArgString(FcValue), &JA); } if ((JA.getType() == types::TY_Object && @@ -6519,7 +6519,7 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA, // that is using Fc or just printing disassembly. if (TC.isLastOutputProducingJob(C.getArgs(), JA.getKind()) && !FoValue.empty()) - return C.addResultFile(C.getArgs().MakeArgString(FoValue.str()), &JA); + return C.addResultFile(C.getArgs().MakeArgString(FoValue), &JA); StringRef Name = llvm::sys::path::filename(BaseInput); std::pair Split = Name.split('.'); const char *Suffix = types::getTypeTempSuffix(JA.getType(), true); diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index ddc26604a8006..b57579f135b36 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -1130,9 +1130,10 @@ static bool isXnackAvailable(const llvm::Triple &TT, llvm::StringRef TargetID) { auto Features = TT.isAMDGCN() ? llvm::AMDGPU::getArchAttrAMDGCN(ProcKind) : llvm::AMDGPU::getArchAttrR600(ProcKind); - // If processor has xnack always on, Address sanitizer is supported - bool XnackAvailable = (Features & llvm::AMDGPU::FEATURE_XNACK_ALWAYS); - if (XnackAvailable) + // If processor has xnack but doesn't support on/off modes, xnack is always on + bool XnackAlwaysOn = (Features & llvm::AMDGPU::FEATURE_XNACK) && + !(Features & llvm::AMDGPU::FEATURE_XNACK_ON_OFF_MODES); + if (XnackAlwaysOn) return true; // Otherwise, check if xnack+ is explicitly enabled in the target ID diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index ee72d6744bcbf..759794703deb0 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -1943,7 +1943,7 @@ void Clang::AddPPCTargetArgs(const ArgList &Args, if (Arg *A = Args.getLastArg(options::OPT_mtune_EQ)) { CmdArgs.push_back("-tune-cpu"); StringRef CPU = llvm::PPC::getNormalizedPPCTuneCPU(T, A->getValue()); - CmdArgs.push_back(Args.MakeArgString(CPU.str())); + CmdArgs.push_back(Args.MakeArgString(CPU)); } // Select the ABI to use. @@ -6722,8 +6722,15 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Args.AddLastArg(CmdArgs, options::OPT_fexperimental_library); - if (Args.hasArg(options::OPT_fexperimental_new_constant_interpreter)) - CmdArgs.push_back("-fexperimental-new-constant-interpreter"); + if (CLANG_USE_EXPERIMENTAL_CONST_INTERP) { + Args.ClaimAllArgs(options::OPT_fexperimental_new_constant_interpreter); + Args.AddLastArg(CmdArgs, + options::OPT_fno_experimental_new_constant_interpreter); + } else { + Args.ClaimAllArgs(options::OPT_fno_experimental_new_constant_interpreter); + Args.AddLastArg(CmdArgs, + options::OPT_fexperimental_new_constant_interpreter); + } if (Arg *A = Args.getLastArg(options::OPT_fbracket_depth_EQ)) { CmdArgs.push_back("-fbracket-depth"); @@ -9680,7 +9687,9 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, OPT_fsanitize_minimal_runtime, OPT_fno_sanitize_minimal_runtime, OPT_fsanitize_trap_EQ, - OPT_fno_sanitize_trap_EQ}; + OPT_fno_sanitize_trap_EQ, + OPT_fslp_vectorize, + OPT_fno_slp_vectorize}; const llvm::DenseSet LinkerOptions{OPT_mllvm, OPT_Zlinker_input}; auto ToolChainHasRT = [&](const ToolChain &TC, StringRef Name) { return TC.getVFS().exists( diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 48724746d9330..547405eaf7663 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -1157,6 +1157,15 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args, CmdArgs.push_back(Args.MakeArgString(Twine(PluginOptPrefix) + ParallelismOpt + Parallelism)); + // Forward the SLP vectorization preference to the LTO backend by toggling + // the existing -vectorize-slp cl::opt, which the pass honors directly. This + // avoids minting dedicated linker options for what is only pipeline tuning. + if (Arg *A = Args.getLastArg(options::OPT_fslp_vectorize, + options::OPT_fno_slp_vectorize)) + CmdArgs.push_back(Args.MakeArgString( + Twine(PluginOptPrefix) + "-vectorize-slp=" + + (A->getOption().matches(options::OPT_fslp_vectorize) ? "1" : "0"))); + // Pass down GlobalISel options. if (Arg *A = Args.getLastArg(options::OPT_fglobal_isel, options::OPT_fno_global_isel)) { diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index a29d62c99bb95..51a976783b589 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -4630,7 +4630,7 @@ loadAndParseConfigFile(StringRef ConfigFile, llvm::vfs::FileSystem *FS, llvm::SourceMgr::DiagHandlerTy DiagHandler, bool IsDotHFile) { llvm::ErrorOr> Text = - FS->getBufferForFile(ConfigFile.str()); + FS->getBufferForFile(ConfigFile); if (auto EC = Text.getError()) return EC; if (auto EC = parseConfiguration(*Text.get(), Style, AllowUnknownOptions, diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index 92571c012bdb2..0cf01875af833 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -33,8 +33,7 @@ FormatTokenLexer::FormatTokenLexer( LangOpts(getFormattingLangOpts(Style)), SourceMgr(SourceMgr), ID(ID), Style(Style), IdentTable(IdentTable), Keywords(IdentTable), Encoding(Encoding), Allocator(Allocator), FirstInLineIndex(0), - FormattingDisabled(false), FormatOffRegex(Style.OneLineFormatOffRegex), - MacroBlockBeginRegex(Style.MacroBlockBegin), + FormattingDisabled(false), MacroBlockBeginRegex(Style.MacroBlockBegin), MacroBlockEndRegex(Style.MacroBlockEnd), VerilogProtectedBlock(false) { Lex = std::make_unique(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts); @@ -88,12 +87,14 @@ FormatTokenLexer::FormatTokenLexer( ArrayRef FormatTokenLexer::lex() { assert(Tokens.empty()); assert(FirstInLineIndex == 0); + enum { FO_None, FO_CurrentLine, FO_NextLine } FormatOff = FO_None; + llvm::Regex FormatOffRegex(Style.OneLineFormatOffRegex); do { Tokens.push_back(getNextToken()); + auto &Tok = *Tokens.back(); - const auto NewlinesBefore = Tok.NewlinesBefore; - switch (FormatOff) { + switch (const auto NewlinesBefore = Tok.NewlinesBefore; FormatOff) { case FO_NextLine: if (NewlinesBefore > 1) { FormatOff = FO_None; @@ -125,13 +126,16 @@ ArrayRef FormatTokenLexer::lex() { } } } + if (Style.isJavaScript()) { tryParseJSRegexLiteral(); handleTemplateStrings(); } else if (Style.isTextProto()) { tryParsePythonComment(); } + tryMergePreviousTokens(); + if (Style.isCSharp()) { // This needs to come after tokens have been merged so that C# // string literals are correctly identified. @@ -140,9 +144,11 @@ ArrayRef FormatTokenLexer::lex() { handleTableGenMultilineString(); handleTableGenNumericLikeIdentifier(); } + if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline) FirstInLineIndex = Tokens.size() - 1; } while (Tokens.back()->isNot(tok::eof)); + if (Style.InsertNewlineAtEOF) { auto &TokEOF = *Tokens.back(); if (TokEOF.NewlinesBefore == 0) { @@ -150,6 +156,7 @@ ArrayRef FormatTokenLexer::lex() { TokEOF.OriginalColumn = 0; } } + return Tokens; } diff --git a/clang/lib/Format/FormatTokenLexer.h b/clang/lib/Format/FormatTokenLexer.h index 9f5b735efe1d0..3f8c6ba15173d 100644 --- a/clang/lib/Format/FormatTokenLexer.h +++ b/clang/lib/Format/FormatTokenLexer.h @@ -135,7 +135,6 @@ class FormatTokenLexer { TemplateNames, TypeNames, VariableTemplates; bool FormattingDisabled; - llvm::Regex FormatOffRegex; // For one line. llvm::Regex MacroBlockBeginRegex; llvm::Regex MacroBlockEndRegex; diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index c83e82674dee1..534b356175f82 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -108,7 +108,6 @@ class ScopedLineState { Parser.Line->PPLevel = PreBlockLine->PPLevel; Parser.Line->InPPDirective = PreBlockLine->InPPDirective; Parser.Line->InMacroBody = PreBlockLine->InMacroBody; - Parser.Line->IsModuleOrImportDecl = PreBlockLine->IsModuleOrImportDecl; Parser.Line->UnbracedBodyLevel = PreBlockLine->UnbracedBodyLevel; } @@ -4718,6 +4717,7 @@ void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) { Line->FirstStartColumn = 0; Line->IsContinuation = false; Line->SeenDecltypeAuto = false; + Line->IsModuleOrImportDecl = false; if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove) --Line->Level; diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h index 5014fad7e6ef1..22803a837cc63 100644 --- a/clang/lib/Format/UnwrappedLineParser.h +++ b/clang/lib/Format/UnwrappedLineParser.h @@ -48,8 +48,6 @@ struct UnwrappedLine { bool InPragmaDirective = false; /// Whether it is part of a macro body. bool InMacroBody = false; - /// Whether it is a C++20 module/import declaration. - bool IsModuleOrImportDecl = false; /// Nesting level of unbraced body of a control statement. unsigned UnbracedBodyLevel = 0; @@ -63,6 +61,9 @@ struct UnwrappedLine { /// addition to the normal indention level. bool IsContinuation = false; + /// Whether it is a C++20 module/import declaration. + bool IsModuleOrImportDecl = false; + /// If this \c UnwrappedLine closes a block in a sequence of lines, /// \c MatchingOpeningBlockLineIndex stores the index of the corresponding /// opening line. Otherwise, \c MatchingOpeningBlockLineIndex must be diff --git a/clang/lib/Frontend/DependencyFile.cpp b/clang/lib/Frontend/DependencyFile.cpp index d56e17f14e9bc..00f4a54269cfa 100644 --- a/clang/lib/Frontend/DependencyFile.cpp +++ b/clang/lib/Frontend/DependencyFile.cpp @@ -345,7 +345,7 @@ static void PrintFilename(raw_ostream &OS, StringRef Filename, DependencyOutputFormat OutputFormat) { // Convert filename to platform native path llvm::SmallString<256> NativePath; - llvm::sys::path::native(Filename.str(), NativePath); + llvm::sys::path::native(Filename, NativePath); if (OutputFormat == DependencyOutputFormat::NMake) { // Add quotes if needed. These are the characters listed as "special" to diff --git a/clang/lib/Headers/avx512vlvnniintrin.h b/clang/lib/Headers/avx512vlvnniintrin.h index 4b8a199af32e5..8dfcab3f01606 100644 --- a/clang/lib/Headers/avx512vlvnniintrin.h +++ b/clang/lib/Headers/avx512vlvnniintrin.h @@ -15,6 +15,16 @@ #define __AVX512VLVNNIINTRIN_H /* Define the default attributes for the functions in this file. */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vl,avx512vnni"), \ + __min_vector_width__(128))) constexpr +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vl,avx512vnni"), \ + __min_vector_width__(256))) constexpr +#else #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vl,avx512vnni"), \ @@ -23,6 +33,7 @@ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vl,avx512vnni"), \ __min_vector_width__(256))) +#endif /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with /// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed @@ -180,128 +191,112 @@ ((__m128i)__builtin_ia32_vpdpwssds128((__v4si)(S), (__v8hi)(A), (__v8hi)(B))) static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) -{ +_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, (__v8si)_mm256_dpbusd_epi32(__S, __A, __B), (__v8si)__S); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) -{ +_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, (__v8si)_mm256_dpbusd_epi32(__S, __A, __B), (__v8si)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) -{ +_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, (__v8si)_mm256_dpbusds_epi32(__S, __A, __B), (__v8si)__S); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpbusds_epi32( + __mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, (__v8si)_mm256_dpbusds_epi32(__S, __A, __B), (__v8si)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) -{ +_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, (__v8si)_mm256_dpwssd_epi32(__S, __A, __B), (__v8si)__S); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) -{ +_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, (__v8si)_mm256_dpwssd_epi32(__S, __A, __B), (__v8si)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) -{ +_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, (__v8si)_mm256_dpwssds_epi32(__S, __A, __B), (__v8si)__S); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) -{ +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpwssds_epi32( + __mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, (__v8si)_mm256_dpwssds_epi32(__S, __A, __B), (__v8si)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) -{ +_mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, (__v4si)_mm_dpbusd_epi32(__S, __A, __B), (__v4si)__S); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) -{ +_mm_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, (__v4si)_mm_dpbusd_epi32(__S, __A, __B), (__v4si)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) -{ +_mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, (__v4si)_mm_dpbusds_epi32(__S, __A, __B), (__v4si)__S); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) -{ +_mm_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, (__v4si)_mm_dpbusds_epi32(__S, __A, __B), (__v4si)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) -{ +_mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, (__v4si)_mm_dpwssd_epi32(__S, __A, __B), (__v4si)__S); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) -{ +_mm_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, (__v4si)_mm_dpwssd_epi32(__S, __A, __B), (__v4si)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) -{ +_mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, (__v4si)_mm_dpwssds_epi32(__S, __A, __B), (__v4si)__S); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) -{ +_mm_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, (__v4si)_mm_dpwssds_epi32(__S, __A, __B), (__v4si)_mm_setzero_si128()); diff --git a/clang/lib/Headers/avx512vnniintrin.h b/clang/lib/Headers/avx512vnniintrin.h index 2ce88efe4a04f..1e245292ab869 100644 --- a/clang/lib/Headers/avx512vnniintrin.h +++ b/clang/lib/Headers/avx512vnniintrin.h @@ -15,102 +15,99 @@ #define __AVX512VNNIINTRIN_H /* Define the default attributes for the functions in this file. */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512vnni"), \ + __min_vector_width__(512))) constexpr +#else #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("avx512vnni"), \ __min_vector_width__(512))) +#endif -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbusd_epi32(__m512i __S, + __m512i __A, + __m512i __B) { return (__m512i)__builtin_ia32_vpdpbusd512((__v16si)__S, (__v64qu)__A, (__v64qi)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) -{ +_mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512(__U, (__v16si)_mm512_dpbusd_epi32(__S, __A, __B), (__v16si)__S); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbusd_epi32( + __mmask16 __U, __m512i __S, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512(__U, (__v16si)_mm512_dpbusd_epi32(__S, __A, __B), (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbusds_epi32(__m512i __S, + __m512i __A, + __m512i __B) { return (__m512i)__builtin_ia32_vpdpbusds512((__v16si)__S, (__v64qu)__A, (__v64qi)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_dpbusds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpbusds_epi32( + __m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512(__U, (__v16si)_mm512_dpbusds_epi32(__S, __A, __B), (__v16si)__S); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbusds_epi32( + __mmask16 __U, __m512i __S, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512(__U, (__v16si)_mm512_dpbusds_epi32(__S, __A, __B), (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwssd_epi32(__m512i __S, + __m512i __A, + __m512i __B) { return (__m512i)__builtin_ia32_vpdpwssd512((__v16si)__S, (__v32hi)__A, (__v32hi)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) -{ +_mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512(__U, (__v16si)_mm512_dpwssd_epi32(__S, __A, __B), (__v16si)__S); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwssd_epi32( + __mmask16 __U, __m512i __S, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512(__U, (__v16si)_mm512_dpwssd_epi32(__S, __A, __B), (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwssds_epi32(__m512i __S, + __m512i __A, + __m512i __B) { return (__m512i)__builtin_ia32_vpdpwssds512((__v16si)__S, (__v32hi)__A, (__v32hi)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwssds_epi32( + __m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512(__U, (__v16si)_mm512_dpwssds_epi32(__S, __A, __B), (__v16si)__S); } -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) -{ +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwssds_epi32( + __mmask16 __U, __m512i __S, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512(__U, (__v16si)_mm512_dpwssds_epi32(__S, __A, __B), (__v16si)_mm512_setzero_si512()); } #undef __DEFAULT_FN_ATTRS - #endif diff --git a/clang/lib/Headers/avxvnniintrin.h b/clang/lib/Headers/avxvnniintrin.h index 1d2e8c906effc..c92790b69ffbe 100644 --- a/clang/lib/Headers/avxvnniintrin.h +++ b/clang/lib/Headers/avxvnniintrin.h @@ -40,8 +40,21 @@ /* Intrinsics with _avx_ prefix are for compatibility with msvc. */ /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avxvnni"), __min_vector_width__(256))) -#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avxvnni"), __min_vector_width__(128))) +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, __target__("avxvnni"), \ + __min_vector_width__(256))) constexpr +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, __target__("avxvnni"), \ + __min_vector_width__(128))) constexpr +#else +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, __target__("avxvnni"), \ + __min_vector_width__(256))) +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, __target__("avxvnni"), \ + __min_vector_width__(128))) +#endif /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with /// corresponding signed 8-bit integers in \a __B, producing 4 intermediate signed @@ -61,8 +74,7 @@ /// DST[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_dpbusd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) -{ +_mm256_dpbusd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpbusd256((__v8si)__S, (__v32qu)__A, (__v32qi)__B); } @@ -85,8 +97,7 @@ _mm256_dpbusd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) /// DST[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_dpbusds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) -{ +_mm256_dpbusds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpbusds256((__v8si)__S, (__v32qu)__A, (__v32qi)__B); } @@ -107,8 +118,7 @@ _mm256_dpbusds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) /// DST[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_dpwssd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) -{ +_mm256_dpwssd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpwssd256((__v8si)__S, (__v16hi)__A, (__v16hi)__B); } @@ -129,8 +139,7 @@ _mm256_dpwssd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) /// DST[MAX:256] := 0 /// \endcode static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_dpwssds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) -{ +_mm256_dpwssds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpwssds256((__v8si)__S, (__v16hi)__A, (__v16hi)__B); } @@ -153,8 +162,7 @@ _mm256_dpwssds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) /// DST[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_dpbusd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) -{ +_mm_dpbusd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpbusd128((__v4si)__S, (__v16qu)__A, (__v16qi)__B); } @@ -177,8 +185,7 @@ _mm_dpbusd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) /// DST[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_dpbusds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) -{ +_mm_dpbusds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpbusds128((__v4si)__S, (__v16qu)__A, (__v16qi)__B); } @@ -199,8 +206,7 @@ _mm_dpbusds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) /// DST[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_dpwssd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) -{ +_mm_dpwssd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpwssd128((__v4si)__S, (__v8hi)__A, (__v8hi)__B); } @@ -221,8 +227,7 @@ _mm_dpwssd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) /// DST[MAX:128] := 0 /// \endcode static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_dpwssds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) -{ +_mm_dpwssds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpwssds128((__v4si)__S, (__v8hi)__A, (__v8hi)__B); } diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 58d6df302d1a7..af52295df2d8b 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -1283,8 +1283,7 @@ void Parser::parseOMPContextSelectorSet( Diag(Tok.getLocation(), diag::warn_omp_declare_variant_expected) << "=" << ("context set name \"" + getOpenMPContextTraitSetName(TISet.Kind) + - "\"") - .str(); + "\""); // Parse '{'. if (Tok.is(tok::l_brace)) { diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index b8a3f48a32f24..ec4a9037f5c23 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -3701,6 +3701,8 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, case Builtin::BI__builtin_elementwise_add_sat: case Builtin::BI__builtin_elementwise_sub_sat: case Builtin::BI__builtin_elementwise_clmul: + case Builtin::BI__builtin_elementwise_pext: + case Builtin::BI__builtin_elementwise_pdep: if (BuiltinElementwiseMath(TheCall, EltwiseBuiltinArgTyRestriction::IntegerTy)) return ExprError(); diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index cddcf3a010279..d45c3eb35094f 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -11418,7 +11418,7 @@ static bool CheckMultiVersionValue(Sema &S, const FunctionDecl *FD) { auto BareFeat = StringRef{Feat}.substr(1); if (Feat[0] == '-') { S.Diag(FD->getLocation(), diag::err_bad_multiversion_option) - << Feature << ("no-" + BareFeat).str(); + << Feature << ("no-" + BareFeat); return true; } diff --git a/clang/lib/Sema/SemaLifetimeSafety.h b/clang/lib/Sema/SemaLifetimeSafety.h index a8bde363e3397..4bde272fb40a1 100644 --- a/clang/lib/Sema/SemaLifetimeSafety.h +++ b/clang/lib/Sema/SemaLifetimeSafety.h @@ -19,6 +19,7 @@ #include "clang/Analysis/Analyses/LifetimeSafety/LifetimeSafety.h" #include "clang/Basic/DiagnosticSema.h" #include "clang/Lex/Lexer.h" +#include "clang/Lex/Preprocessor.h" #include "clang/Sema/Sema.h" #include @@ -441,27 +442,49 @@ class LifetimeSafetySemaHelperImpl : public LifetimeSafetySemaHelper { } private: - std::pair + std::string getLifetimeBoundFixItText(SourceLocation Loc, bool LeadingSpace, + bool AllowGNUAttrMacro = true) { + StringRef Spelling = S.getLangOpts().LifetimeSafetyLifetimeBoundMacro; + if (Spelling.empty() && Loc.isValid()) { + const Preprocessor &PP = S.getPreprocessor(); + Spelling = PP.getLastMacroWithSpelling( + Loc, {tok::l_square, tok::l_square, PP.getIdentifierInfo("clang"), + tok::coloncolon, PP.getIdentifierInfo("lifetimebound"), + tok::r_square, tok::r_square}); + + if (Spelling.empty() && AllowGNUAttrMacro) + Spelling = PP.getLastMacroWithSpelling( + Loc, {tok::kw___attribute, tok::l_paren, tok::l_paren, + PP.getIdentifierInfo("lifetimebound"), tok::r_paren, + tok::r_paren}); + } + const std::string Text = + Spelling.empty() ? "[[clang::lifetimebound]]" : Spelling.str(); + return LeadingSpace ? " " + Text : Text + " "; + } + + std::pair getLifetimeBoundFixIt(const ParmVarDecl *Decl) { SourceLocation InsertionPoint = Lexer::getLocForEndOfToken( Decl->getEndLoc(), 0, S.getSourceManager(), S.getLangOpts()); - StringRef FixItText = " [[clang::lifetimebound]]"; + bool LeadingSpace = true; if (!Decl->getIdentifier()) { // For unnamed parameters, placing attributes after the type would be // parsed as a type attribute, not a parameter attribute. InsertionPoint = Decl->getBeginLoc(); - FixItText = "[[clang::lifetimebound]] "; + LeadingSpace = false; } else if (Decl->hasDefaultArg()) { // If the parameter has a default argument, place the attribute after the // named argument. InsertionPoint = Lexer::getLocForEndOfToken( Decl->getLocation(), 0, S.getSourceManager(), S.getLangOpts()); } - return {InsertionPoint, FixItText}; + return {InsertionPoint, + getLifetimeBoundFixItText(InsertionPoint, LeadingSpace)}; } - std::pair + std::pair getLifetimeBoundFixIt(const CXXMethodDecl *MD) { const auto MDL = MD->getTypeSourceInfo()->getTypeLoc(); SourceLocation InsertionPoint = Lexer::getLocForEndOfToken( @@ -482,7 +505,9 @@ class LifetimeSafetySemaHelperImpl : public LifetimeSafetySemaHelper { ->getLocation(), 0, S.getSourceManager(), S.getLangOpts()); } - return {InsertionPoint, " [[clang::lifetimebound]]"}; + return {InsertionPoint, + getLifetimeBoundFixItText(InsertionPoint, /*LeadingSpace=*/true, + /*AllowGNUAttrMacro=*/false)}; } std::string getDiagSubjectDescription(const ValueDecl *VD) { diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index 8c94a1ad39208..556fa716d61e7 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -2246,7 +2246,7 @@ DeclResult Sema::CheckClassTemplate( if (ModulePrivateLoc.isValid()) NewTemplate->setModulePrivate(); - if (IsMemberSpecialization) { + if (!Invalid && IsMemberSpecialization) { assert(PrevClassTemplate && "Member specialization without a primary template?"); NewTemplate->setMemberSpecialization(); diff --git a/clang/lib/StaticAnalyzer/Checkers/Yaml.h b/clang/lib/StaticAnalyzer/Checkers/Yaml.h index ec521cbfcdc8e..4bb54e8aa8c12 100644 --- a/clang/lib/StaticAnalyzer/Checkers/Yaml.h +++ b/clang/lib/StaticAnalyzer/Checkers/Yaml.h @@ -37,7 +37,7 @@ std::optional getConfiguration(CheckerManager &Mgr, Checker *Chk, .getFileManager() .getVirtualFileSystem(); llvm::ErrorOr> Buffer = - VFS.getBufferForFile(ConfigFile.str()); + VFS.getBufferForFile(ConfigFile); if (Buffer.getError()) { Mgr.reportInvalidCheckerOptionValue(Chk, Option, diff --git a/clang/lib/Support/RISCVVIntrinsicUtils.cpp b/clang/lib/Support/RISCVVIntrinsicUtils.cpp index 1138b9fc12798..5031bf5a1a8ff 100644 --- a/clang/lib/Support/RISCVVIntrinsicUtils.cpp +++ b/clang/lib/Support/RISCVVIntrinsicUtils.cpp @@ -696,6 +696,9 @@ PrototypeDescriptor::parsePrototypeDescriptor( case 'S': TM |= TypeModifier::LMUL1; break; + case 'A': + TM |= TypeModifier::AltFP8; + break; default: llvm_unreachable("Illegal non-primitive type transformer!"); } @@ -913,6 +916,14 @@ void RVVType::applyModifier(const PrototypeDescriptor &Transformer) { // Update ElementBitwidth need to update Scale too. Scale = LMUL.getScale(ElementBitwidth); break; + case TypeModifier::AltFP8: + if (ScalarType == ScalarTypeKind::FloatE4M3) + ScalarType = ScalarTypeKind::FloatE5M2; + else if (ScalarType == ScalarTypeKind::FloatE5M2) + ScalarType = ScalarTypeKind::FloatE4M3; + else + llvm_unreachable("AltFP8 modifier requires an OFP8 base type"); + break; default: llvm_unreachable("Unknown type modifier mask!"); } diff --git a/clang/test/AST/ByteCode/builtin-functions.cpp b/clang/test/AST/ByteCode/builtin-functions.cpp index 3074a84986520..d6990d1725072 100644 --- a/clang/test/AST/ByteCode/builtin-functions.cpp +++ b/clang/test/AST/ByteCode/builtin-functions.cpp @@ -1404,6 +1404,46 @@ namespace ElementwiseClmul { (vector4uint){0U, 1U, 3U, 7U})) == 27U); } +namespace ElementwisePext { + static_assert(__builtin_elementwise_pext(0U, 0U) == 0U); + static_assert(__builtin_elementwise_pext(0xFFU, 0xFFU) == 0xFFU); + static_assert(__builtin_elementwise_pext(0xFFU, 0x0FU) == 0x0FU); + static_assert(__builtin_elementwise_pext(0xFFU, 0xF0U) == 0x0FU); + static_assert(__builtin_elementwise_pext(0b1010'1010U, 0b1100'1100U) == + 0b0000'1010U); + static_assert(__builtin_elementwise_pext(0b1111'1111U, 0b1010'1010U) == + 0b0000'1111U); +#ifndef __AVR__ + static_assert(__builtin_elementwise_pext((unsigned _BitInt(31))0xFF, + (unsigned _BitInt(31))0x0F) == + (unsigned _BitInt(31))0x0F); +#endif + + static_assert(__builtin_reduce_add(__builtin_elementwise_pext( + (vector4uint){0xAAU, 0xFFU, 0x55U, 0x00U}, + (vector4uint){0xCCU, 0xAAU, 0x0FU, 0x00U})) == 0x1EU); +} + +namespace ElementwisePdep { + static_assert(__builtin_elementwise_pdep(0U, 0U) == 0U); + static_assert(__builtin_elementwise_pdep(0xFFU, 0xFFU) == 0xFFU); + static_assert(__builtin_elementwise_pdep(0x0FU, 0xFFU) == 0x0FU); + static_assert(__builtin_elementwise_pdep(0x0FU, 0xF0U) == 0xF0U); + static_assert(__builtin_elementwise_pdep(0b0000'1010U, 0b1100'1100U) == + 0b1000'1000U); + static_assert(__builtin_elementwise_pdep(0b0000'1111U, 0b1010'1010U) == + 0b1010'1010U); +#ifndef __AVR__ + static_assert(__builtin_elementwise_pdep((unsigned _BitInt(31))0x0F, + (unsigned _BitInt(31))0xFF) == + (unsigned _BitInt(31))0x0F); +#endif + + static_assert(__builtin_reduce_add(__builtin_elementwise_pdep( + (vector4uint){0x0AU, 0x0FU, 0x05U, 0x00U}, + (vector4uint){0xCCU, 0xAAU, 0x0FU, 0x00U})) == 0x137U); +} + namespace BuiltinMemcpy { constexpr int simple() { int a = 12; diff --git a/clang/test/AST/ByteCode/command-line-options.cpp b/clang/test/AST/ByteCode/command-line-options.cpp new file mode 100644 index 0000000000000..e85b66f08d020 --- /dev/null +++ b/clang/test/AST/ByteCode/command-line-options.cpp @@ -0,0 +1,28 @@ +/// This tests that the bytecode interpreter is in use if -fexperimental-new-constant-interpreter is passed. +/// This should be the case regardless of whether CLANG_USE_EXPERIMENTAL_CONST_INTERP is enabled or not. +/// +/// Similarly, it should _not_ be used if -fno-experimental-new-constant-interpreter is passed. +/// +/// All this should be true if the driver is used or -cc1. + + +// RUN: %clang -c -fexperimental-new-constant-interpreter %s -Xclang -verify=bc +// RUN: %clang -cc1 -fexperimental-new-constant-interpreter %s -verify=bc + +// RUN: %clang -c -fno-experimental-new-constant-interpreter %s -Xclang -verify=nobc +// RUN: %clang -cc1 -fno-experimental-new-constant-interpreter %s -verify=nobc + + +/// Note that we're not testing the behavior without those command line options since that +/// depends on the value of CLANG_USE_EXPERIMENTAL_CONST_INTERP, which we can't test for. + + +// bc-no-diagnostics + + +/// We test for the bytecode interperter by trying to bitcast a bitfield. +struct S { + unsigned a : 10; +}; +constexpr S s = __builtin_bit_cast(S, 12); // nobc-error {{must be initialized by a constant expression}} \ + // nobc-note {{constexpr bit_cast involving bit-field is not yet supported}} diff --git a/clang/test/AST/ByteCode/cxx20.cpp b/clang/test/AST/ByteCode/cxx20.cpp index 625e65c769133..7ff70076ee6e4 100644 --- a/clang/test/AST/ByteCode/cxx20.cpp +++ b/clang/test/AST/ByteCode/cxx20.cpp @@ -1,6 +1,11 @@ // RUN: %clang_cc1 -fcxx-exceptions -std=c++20 -verify=both,expected -fcxx-exceptions %s -DNEW_INTERP -fexperimental-new-constant-interpreter // RUN: %clang_cc1 -fcxx-exceptions -std=c++20 -verify=both,ref -fcxx-exceptions %s + +int x; +static_assert(++x, "test"); // both-error {{not an integral constant expression}} \ + // both-note {{cannot modify an object that is visible outside that expression}} + void test_alignas_operand() { alignas(8) char dummy; static_assert(__alignof(dummy) == 8); @@ -1423,3 +1428,37 @@ namespace FuncPtrRef { } static_assert(bullet_five_tests()); } + +namespace ConstWrites { + struct basic_string { + unsigned char a; + constexpr basic_string() { + a = false; + } + }; + struct array { + basic_string str; + }; + + constexpr bool tests() { + const array right{}; + return true; + } + static_assert(tests()); + + struct A { + int n; + constexpr A() : n(1) { n = 2; } + }; + struct B { + const A a; + constexpr B(bool mutate) { + if (mutate) + const_cast(a).n = 3; // both-note {{modification of object of const-qualified type 'const int'}} + } + }; + constexpr B b(false); + static_assert(b.a.n == 2, ""); + constexpr B bad(true); // both-error {{must be initialized by a constant expression}} \ + // both-note {{in call to 'B(true)'}} +} diff --git a/clang/test/CIR/CodeGen/agg-atomic-cast.cpp b/clang/test/CIR/CodeGen/agg-atomic-cast.cpp new file mode 100644 index 0000000000000..12ed04f14edf2 --- /dev/null +++ b/clang/test/CIR/CodeGen/agg-atomic-cast.cpp @@ -0,0 +1,27 @@ +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -Wno-unused-value -emit-llvm %s -o %t.ll +// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG + +struct S { + int data[4]; +}; + +void non_atomic_to_atomic_cast() { + S s; + _Atomic(S) as = s; +} + +// CIR: %[[S_ADDR:.*]] = cir.alloca "s" {{.*}} : !cir.ptr +// CIR: %[[SA_ADDR:.*]] = cir.alloca "as" {{.*}} init : !cir.ptr +// CIR: cir.copy %[[S_ADDR]] to %[[SA_ADDR]] : !cir.ptr + +// LLVM: %[[S_ADDR:.*]] = alloca %struct.S, i64 1, align 4 +// LLVM: %[[SA_ADDR:.*]] = alloca %struct.S, i64 1, align 16 +// LLVM: call void @llvm.memcpy.p0.p0.i64(ptr %[[SA_ADDR]], ptr %[[S_ADDR]], i64 16, i1 false) + +// OGCG: %[[S_ADDR:.*]] = alloca %struct.S, align 4 +// OGCG: %[[SA_ADDR:.*]] = alloca %struct.S, align 16 +// OGCG: call void @llvm.memcpy.p0.p0.i64(ptr align 16 %[[SA_ADDR]], ptr align 4 %[[S_ADDR]], i64 16, i1 false) diff --git a/clang/test/CMakeLists.txt b/clang/test/CMakeLists.txt index 8dd0084c53224..15170bbc2d1ed 100644 --- a/clang/test/CMakeLists.txt +++ b/clang/test/CMakeLists.txt @@ -93,7 +93,7 @@ list(APPEND CLANG_TEST_DEPS clang clang-fuzzer-dictionary clang-resource-headers - clang-format + clang-format-check-format clang-tblgen clang-offload-bundler clang-import-test @@ -255,7 +255,3 @@ if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/debuginfo-tests) endif() add_subdirectory(Analysis/LifetimeSafety) - -if(TARGET check-clang-format) - add_dependencies(check-clang-format clang-format-check-format) -endif() diff --git a/clang/test/CodeGen/AArch64/bf16-getset-intrinsics.c b/clang/test/CodeGen/AArch64/bf16-getset-intrinsics.c index c93e3ca31896c..a0ea29cff0a08 100644 --- a/clang/test/CodeGen/AArch64/bf16-getset-intrinsics.c +++ b/clang/test/CodeGen/AArch64/bf16-getset-intrinsics.c @@ -1,5 +1,5 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1_cg_arm64_neon -target-feature +bf16 -emit-llvm %s -disable-O0-optnone | opt -S -passes=mem2reg,sroa | FileCheck %s +// RUN: %clang_cc1_cg_arm64_neon -emit-llvm %s -disable-O0-optnone | opt -S -passes=mem2reg,sroa | FileCheck %s // REQUIRES: aarch64-registered-target || arm-registered-target diff --git a/clang/test/CodeGen/AArch64/bf16-lane-intrinsics.c b/clang/test/CodeGen/AArch64/bf16-lane-intrinsics.c index ccd6d17412a8b..c8212908315f5 100644 --- a/clang/test/CodeGen/AArch64/bf16-lane-intrinsics.c +++ b/clang/test/CodeGen/AArch64/bf16-lane-intrinsics.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple aarch64 -target-feature +neon -target-feature +bf16 \ +// RUN: %clang_cc1 -triple aarch64 -target-feature +neon \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck --check-prefix=CHECK-LE %s -// RUN: %clang_cc1 -triple aarch64_be -target-feature +neon -target-feature +bf16 \ +// RUN: %clang_cc1 -triple aarch64_be -target-feature +neon \ // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck --check-prefix=CHECK-BE %s // REQUIRES: aarch64-registered-target || arm-registered-target diff --git a/clang/test/CodeGen/AArch64/bf16-ldst-intrinsics.c b/clang/test/CodeGen/AArch64/bf16-ldst-intrinsics.c index 5d778e3b51d0e..44ddd578d81f2 100644 --- a/clang/test/CodeGen/AArch64/bf16-ldst-intrinsics.c +++ b/clang/test/CodeGen/AArch64/bf16-ldst-intrinsics.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple aarch64 -target-feature +neon -target-feature +bf16 \ +// RUN: %clang_cc1 -triple aarch64 -target-feature +neon \ // RUN: -O2 -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK64 -// RUN: %clang_cc1 -triple armv8.6a-arm-none-eabi -target-feature +neon -target-feature +bf16 -mfloat-abi hard \ +// RUN: %clang_cc1 -triple armv8.6a-arm-none-eabi -target-feature +neon -mfloat-abi hard \ // RUN: -O2 -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK32 // REQUIRES: arm-registered-target,aarch64-registered-target diff --git a/clang/test/CodeGen/AArch64/bf16-reinterpret-intrinsics.c b/clang/test/CodeGen/AArch64/bf16-reinterpret-intrinsics.c index 88f2305e2782c..007a0b1b32b9e 100644 --- a/clang/test/CodeGen/AArch64/bf16-reinterpret-intrinsics.c +++ b/clang/test/CodeGen/AArch64/bf16-reinterpret-intrinsics.c @@ -1,5 +1,5 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 -// RUN: %clang_cc1 -triple aarch64 -target-feature +neon -target-feature +bf16 \ +// RUN: %clang_cc1 -triple aarch64 -target-feature +neon \ // RUN: -disable-O0-optnone -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,sroa \ // RUN: | FileCheck %s diff --git a/clang/test/CodeGen/AArch64/neon-intrinsics.c b/clang/test/CodeGen/AArch64/neon-intrinsics.c index 9fde7f37fc192..b37ed5aa29f10 100644 --- a/clang/test/CodeGen/AArch64/neon-intrinsics.c +++ b/clang/test/CodeGen/AArch64/neon-intrinsics.c @@ -5144,159 +5144,6 @@ uint32x4_t test_vqshrn_high_n_u64(uint32x2_t a, uint64x2_t b) { return vqshrn_high_n_u64(a, b, 19); } -// CHECK-LABEL: define dso_local <8 x i16> @test_vshll_n_s8( -// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[A]] to <8 x i16> -// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 3) -// CHECK-NEXT: ret <8 x i16> [[VSHLL_N]] -// -int16x8_t test_vshll_n_s8(int8x8_t a) { - return vshll_n_s8(a, 3); -} - -// CHECK-LABEL: define dso_local <4 x i32> @test_vshll_n_s16( -// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 9) -// CHECK-NEXT: ret <4 x i32> [[VSHLL_N]] -// -int32x4_t test_vshll_n_s16(int16x4_t a) { - return vshll_n_s16(a, 9); -} - -// CHECK-LABEL: define dso_local <2 x i64> @test_vshll_n_s32( -// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 19) -// CHECK-NEXT: ret <2 x i64> [[VSHLL_N]] -// -int64x2_t test_vshll_n_s32(int32x2_t a) { - return vshll_n_s32(a, 19); -} - -// CHECK-LABEL: define dso_local <8 x i16> @test_vshll_n_u8( -// CHECK-SAME: <8 x i8> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[A]] to <8 x i16> -// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 3) -// CHECK-NEXT: ret <8 x i16> [[VSHLL_N]] -// -uint16x8_t test_vshll_n_u8(uint8x8_t a) { - return vshll_n_u8(a, 3); -} - -// CHECK-LABEL: define dso_local <4 x i32> @test_vshll_n_u16( -// CHECK-SAME: <4 x i16> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 9) -// CHECK-NEXT: ret <4 x i32> [[VSHLL_N]] -// -uint32x4_t test_vshll_n_u16(uint16x4_t a) { - return vshll_n_u16(a, 9); -} - -// CHECK-LABEL: define dso_local <2 x i64> @test_vshll_n_u32( -// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 19) -// CHECK-NEXT: ret <2 x i64> [[VSHLL_N]] -// -uint64x2_t test_vshll_n_u32(uint32x2_t a) { - return vshll_n_u32(a, 19); -} - -// CHECK-LABEL: define dso_local <8 x i16> @test_vshll_high_n_s8( -// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> -// CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I]] to <8 x i16> -// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 3) -// CHECK-NEXT: ret <8 x i16> [[VSHLL_N]] -// -int16x8_t test_vshll_high_n_s8(int8x16_t a) { - return vshll_high_n_s8(a, 3); -} - -// CHECK-LABEL: define dso_local <4 x i32> @test_vshll_high_n_s16( -// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <4 x i32> -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 9) -// CHECK-NEXT: ret <4 x i32> [[VSHLL_N]] -// -int32x4_t test_vshll_high_n_s16(int16x8_t a) { - return vshll_high_n_s16(a, 9); -} - -// CHECK-LABEL: define dso_local <2 x i64> @test_vshll_high_n_s32( -// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <2 x i32> -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 19) -// CHECK-NEXT: ret <2 x i64> [[VSHLL_N]] -// -int64x2_t test_vshll_high_n_s32(int32x4_t a) { - return vshll_high_n_s32(a, 19); -} - -// CHECK-LABEL: define dso_local <8 x i16> @test_vshll_high_n_u8( -// CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> -// CHECK-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I]] to <8 x i16> -// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 3) -// CHECK-NEXT: ret <8 x i16> [[VSHLL_N]] -// -uint16x8_t test_vshll_high_n_u8(uint8x16_t a) { - return vshll_high_n_u8(a, 3); -} - -// CHECK-LABEL: define dso_local <4 x i32> @test_vshll_high_n_u16( -// CHECK-SAME: <8 x i16> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <4 x i32> -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 9) -// CHECK-NEXT: ret <4 x i32> [[VSHLL_N]] -// -uint32x4_t test_vshll_high_n_u16(uint16x8_t a) { - return vshll_high_n_u16(a, 9); -} - -// CHECK-LABEL: define dso_local <2 x i64> @test_vshll_high_n_u32( -// CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <2 x i32> -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK-NEXT: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 19) -// CHECK-NEXT: ret <2 x i64> [[VSHLL_N]] -// -uint64x2_t test_vshll_high_n_u32(uint32x4_t a) { - return vshll_high_n_u32(a, 19); -} // CHECK-LABEL: define dso_local <8 x i16> @test_vmovl_high_s8( // CHECK-SAME: <16 x i8> noundef [[A:%.*]]) #[[ATTR0]] { @@ -5372,348 +5219,6 @@ uint64x2_t test_vmovl_high_u32(uint32x4_t a) { return vmovl_high_u32(a); } -// CHECK-LABEL: define dso_local <8 x i16> @test_vaddl_s8( -// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VMOVL_I5_I:%.*]] = sext <8 x i8> [[A]] to <8 x i16> -// CHECK-NEXT: [[VMOVL_I_I:%.*]] = sext <8 x i8> [[B]] to <8 x i16> -// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I5_I]], [[VMOVL_I_I]] -// CHECK-NEXT: ret <8 x i16> [[ADD_I]] -// -int16x8_t test_vaddl_s8(int8x8_t a, int8x8_t b) { - return vaddl_s8(a, b); -} - -// CHECK-LABEL: define dso_local <4 x i32> @test_vaddl_s16( -// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[VMOVL_I5_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK-NEXT: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32> -// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I5_I]], [[VMOVL_I_I]] -// CHECK-NEXT: ret <4 x i32> [[ADD_I]] -// -int32x4_t test_vaddl_s16(int16x4_t a, int16x4_t b) { - return vaddl_s16(a, b); -} - -// CHECK-LABEL: define dso_local <2 x i64> @test_vaddl_s32( -// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[VMOVL_I5_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK-NEXT: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP3]] to <2 x i64> -// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I5_I]], [[VMOVL_I_I]] -// CHECK-NEXT: ret <2 x i64> [[ADD_I]] -// -int64x2_t test_vaddl_s32(int32x2_t a, int32x2_t b) { - return vaddl_s32(a, b); -} - -// CHECK-LABEL: define dso_local <8 x i16> @test_vaddl_u8( -// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VMOVL_I5_I:%.*]] = zext <8 x i8> [[A]] to <8 x i16> -// CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[B]] to <8 x i16> -// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I5_I]], [[VMOVL_I_I]] -// CHECK-NEXT: ret <8 x i16> [[ADD_I]] -// -uint16x8_t test_vaddl_u8(uint8x8_t a, uint8x8_t b) { - return vaddl_u8(a, b); -} - -// CHECK-LABEL: define dso_local <4 x i32> @test_vaddl_u16( -// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[VMOVL_I5_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> -// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I5_I]], [[VMOVL_I_I]] -// CHECK-NEXT: ret <4 x i32> [[ADD_I]] -// -uint32x4_t test_vaddl_u16(uint16x4_t a, uint16x4_t b) { - return vaddl_u16(a, b); -} - -// CHECK-LABEL: define dso_local <2 x i64> @test_vaddl_u32( -// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[VMOVL_I5_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> -// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I5_I]], [[VMOVL_I_I]] -// CHECK-NEXT: ret <2 x i64> [[ADD_I]] -// -uint64x2_t test_vaddl_u32(uint32x2_t a, uint32x2_t b) { - return vaddl_u32(a, b); -} - -// CHECK-LABEL: define dso_local <8 x i16> @test_vaddl_high_s8( -// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I_I12_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> -// CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I12_I]] to <8 x i16> -// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> -// CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> -// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]] -// CHECK-NEXT: ret <8 x i16> [[ADD_I]] -// -int16x8_t test_vaddl_high_s8(int8x16_t a, int8x16_t b) { - return vaddl_high_s8(a, b); -} - -// CHECK-LABEL: define dso_local <4 x i32> @test_vaddl_high_s16( -// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I_I12_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <4 x i32> -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I12_I]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16> -// CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i16> [[TMP4]] to <4 x i32> -// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP5]] -// CHECK-NEXT: ret <4 x i32> [[ADD_I]] -// -int32x4_t test_vaddl_high_s16(int16x8_t a, int16x8_t b) { - return vaddl_high_s16(a, b); -} - -// CHECK-LABEL: define dso_local <2 x i64> @test_vaddl_high_s32( -// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I_I12_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <2 x i32> -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I12_I]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> -// CHECK-NEXT: [[TMP5:%.*]] = sext <2 x i32> [[TMP4]] to <2 x i64> -// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP5]] -// CHECK-NEXT: ret <2 x i64> [[ADD_I]] -// -int64x2_t test_vaddl_high_s32(int32x4_t a, int32x4_t b) { - return vaddl_high_s32(a, b); -} - -// CHECK-LABEL: define dso_local <8 x i16> @test_vaddl_high_u8( -// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I_I12_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> -// CHECK-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I12_I]] to <8 x i16> -// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> -// CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> -// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]] -// CHECK-NEXT: ret <8 x i16> [[ADD_I]] -// -uint16x8_t test_vaddl_high_u8(uint8x16_t a, uint8x16_t b) { - return vaddl_high_u8(a, b); -} - -// CHECK-LABEL: define dso_local <4 x i32> @test_vaddl_high_u16( -// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I_I12_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <4 x i32> -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I12_I]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16> -// CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32> -// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP5]] -// CHECK-NEXT: ret <4 x i32> [[ADD_I]] -// -uint32x4_t test_vaddl_high_u16(uint16x8_t a, uint16x8_t b) { - return vaddl_high_u16(a, b); -} - -// CHECK-LABEL: define dso_local <2 x i64> @test_vaddl_high_u32( -// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I_I12_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <2 x i32> -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I12_I]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> -// CHECK-NEXT: [[TMP5:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64> -// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP5]] -// CHECK-NEXT: ret <2 x i64> [[ADD_I]] -// -uint64x2_t test_vaddl_high_u32(uint32x4_t a, uint32x4_t b) { - return vaddl_high_u32(a, b); -} - -// CHECK-LABEL: define dso_local <8 x i16> @test_vaddw_s8( -// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VMOVL_I_I:%.*]] = sext <8 x i8> [[B]] to <8 x i16> -// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[A]], [[VMOVL_I_I]] -// CHECK-NEXT: ret <8 x i16> [[ADD_I]] -// -int16x8_t test_vaddw_s8(int16x8_t a, int8x8_t b) { - return vaddw_s8(a, b); -} - -// CHECK-LABEL: define dso_local <4 x i32> @test_vaddw_s16( -// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A]], [[VMOVL_I_I]] -// CHECK-NEXT: ret <4 x i32> [[ADD_I]] -// -int32x4_t test_vaddw_s16(int32x4_t a, int16x4_t b) { - return vaddw_s16(a, b); -} - -// CHECK-LABEL: define dso_local <2 x i64> @test_vaddw_s32( -// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[A]], [[VMOVL_I_I]] -// CHECK-NEXT: ret <2 x i64> [[ADD_I]] -// -int64x2_t test_vaddw_s32(int64x2_t a, int32x2_t b) { - return vaddw_s32(a, b); -} - -// CHECK-LABEL: define dso_local <8 x i16> @test_vaddw_u8( -// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[B]] to <8 x i16> -// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[A]], [[VMOVL_I_I]] -// CHECK-NEXT: ret <8 x i16> [[ADD_I]] -// -uint16x8_t test_vaddw_u8(uint16x8_t a, uint8x8_t b) { - return vaddw_u8(a, b); -} - -// CHECK-LABEL: define dso_local <4 x i32> @test_vaddw_u16( -// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A]], [[VMOVL_I_I]] -// CHECK-NEXT: ret <4 x i32> [[ADD_I]] -// -uint32x4_t test_vaddw_u16(uint32x4_t a, uint16x4_t b) { - return vaddw_u16(a, b); -} - -// CHECK-LABEL: define dso_local <2 x i64> @test_vaddw_u32( -// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[A]], [[VMOVL_I_I]] -// CHECK-NEXT: ret <2 x i64> [[ADD_I]] -// -uint64x2_t test_vaddw_u32(uint64x2_t a, uint32x2_t b) { - return vaddw_u32(a, b); -} - -// CHECK-LABEL: define dso_local <8 x i16> @test_vaddw_high_s8( -// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> -// CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> -// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[A]], [[TMP0]] -// CHECK-NEXT: ret <8 x i16> [[ADD_I]] -// -int16x8_t test_vaddw_high_s8(int16x8_t a, int8x16_t b) { - return vaddw_high_s8(a, b); -} - -// CHECK-LABEL: define dso_local <4 x i32> @test_vaddw_high_s16( -// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A]], [[TMP2]] -// CHECK-NEXT: ret <4 x i32> [[ADD_I]] -// -int32x4_t test_vaddw_high_s16(int32x4_t a, int16x8_t b) { - return vaddw_high_s16(a, b); -} - -// CHECK-LABEL: define dso_local <2 x i64> @test_vaddw_high_s32( -// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[A]], [[TMP2]] -// CHECK-NEXT: ret <2 x i64> [[ADD_I]] -// -int64x2_t test_vaddw_high_s32(int64x2_t a, int32x4_t b) { - return vaddw_high_s32(a, b); -} - -// CHECK-LABEL: define dso_local <8 x i16> @test_vaddw_high_u8( -// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> -// CHECK-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> -// CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[A]], [[TMP0]] -// CHECK-NEXT: ret <8 x i16> [[ADD_I]] -// -uint16x8_t test_vaddw_high_u8(uint16x8_t a, uint8x16_t b) { - return vaddw_high_u8(a, b); -} - -// CHECK-LABEL: define dso_local <4 x i32> @test_vaddw_high_u16( -// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A]], [[TMP2]] -// CHECK-NEXT: ret <4 x i32> [[ADD_I]] -// -uint32x4_t test_vaddw_high_u16(uint32x4_t a, uint16x8_t b) { - return vaddw_high_u16(a, b); -} - -// CHECK-LABEL: define dso_local <2 x i64> @test_vaddw_high_u32( -// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[A]], [[TMP2]] -// CHECK-NEXT: ret <2 x i64> [[ADD_I]] -// -uint64x2_t test_vaddw_high_u32(uint64x2_t a, uint32x4_t b) { - return vaddw_high_u32(a, b); -} - // CHECK-LABEL: define dso_local <8 x i16> @test_vsubl_high_s8( // CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] diff --git a/clang/test/CodeGen/AArch64/neon-luti.c b/clang/test/CodeGen/AArch64/neon-luti.c index 4b485636d45b1..4017bfa315a66 100644 --- a/clang/test/CodeGen/AArch64/neon-luti.c +++ b/clang/test/CodeGen/AArch64/neon-luti.c @@ -1,8 +1,8 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 // REQUIRES: aarch64-registered-target #include -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +lut -target-feature +bf16 -O3 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +lut -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +lut -O3 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +lut -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_lane_u8( // CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { diff --git a/clang/test/CodeGen/AArch64/neon/add.c b/clang/test/CodeGen/AArch64/neon/add.c index ff2cb23c772dd..b1dc5c253e7d6 100644 --- a/clang/test/CodeGen/AArch64/neon/add.c +++ b/clang/test/CodeGen/AArch64/neon/add.c @@ -349,3 +349,374 @@ poly128_t test_vaddq_p128(poly128_t a, poly128_t b) { // LLVM-NEXT: ret i128 [[TMP3]] return vaddq_p128(a, b); } + +//===----------------------------------------------------------------------===// +// 2.1.1.1.2. Widening addition +// https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#widening-addition +//===----------------------------------------------------------------------===// + +// LLVM-LABEL: @test_vaddl_s8( +// CIR-LABEL: @vaddl_s8( +int16x8_t test_vaddl_s8(int8x8_t a, int8x8_t b) { + // CIR: cir.add {{.*}} : !cir.vector<8 x !s16i> + + // LLVM-SAME: <8 x i8> {{.*}} [[A:%.*]], <8 x i8> {{.*}} [[B:%.*]]) + // LLVM: [[VMOVL_I5_I:%.*]] = sext <8 x i8> [[A]] to <8 x i16> + // LLVM-NEXT: [[VMOVL_I_I:%.*]] = sext <8 x i8> [[B]] to <8 x i16> + // LLVM-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I5_I]], [[VMOVL_I_I]] + // LLVM-NEXT: ret <8 x i16> [[ADD_I]] + return vaddl_s8(a, b); +} + +// LLVM-LABEL: @test_vaddl_s16( +// CIR-LABEL: @vaddl_s16( +int32x4_t test_vaddl_s16(int16x4_t a, int16x4_t b) { + // CIR: cir.add {{.*}} : !cir.vector<4 x !s32i> + + // LLVM-SAME: <4 x i16> {{.*}} [[A:%.*]], <4 x i16> {{.*}} [[B:%.*]]) + // LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8> + // LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> + // LLVM-NEXT: [[VMOVL_I5_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> + // LLVM-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8> + // LLVM-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> + // LLVM-NEXT: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32> + // LLVM-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I5_I]], [[VMOVL_I_I]] + // LLVM-NEXT: ret <4 x i32> [[ADD_I]] + return vaddl_s16(a, b); +} + +// LLVM-LABEL: @test_vaddl_s32( +// CIR-LABEL: @vaddl_s32( +int64x2_t test_vaddl_s32(int32x2_t a, int32x2_t b) { + // CIR: cir.add {{.*}} : !cir.vector<2 x !s64i> + + // LLVM-SAME: <2 x i32> {{.*}} [[A:%.*]], <2 x i32> {{.*}} [[B:%.*]]) + // LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8> + // LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> + // LLVM-NEXT: [[VMOVL_I5_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> + // LLVM-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8> + // LLVM-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> + // LLVM-NEXT: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP3]] to <2 x i64> + // LLVM-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I5_I]], [[VMOVL_I_I]] + // LLVM-NEXT: ret <2 x i64> [[ADD_I]] + return vaddl_s32(a, b); +} + +// LLVM-LABEL: @test_vaddl_u8( +// CIR-LABEL: @vaddl_u8( +uint16x8_t test_vaddl_u8(uint8x8_t a, uint8x8_t b) { + // CIR: cir.add {{.*}} : !cir.vector<8 x !u16i> + + // LLVM-SAME: <8 x i8> {{.*}} [[A:%.*]], <8 x i8> {{.*}} [[B:%.*]]) + // LLVM: [[VMOVL_I5_I:%.*]] = zext <8 x i8> [[A]] to <8 x i16> + // LLVM-NEXT: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[B]] to <8 x i16> + // LLVM-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I5_I]], [[VMOVL_I_I]] + // LLVM-NEXT: ret <8 x i16> [[ADD_I]] + return vaddl_u8(a, b); +} + +// LLVM-LABEL: @test_vaddl_u16( +// CIR-LABEL: @vaddl_u16( +uint32x4_t test_vaddl_u16(uint16x4_t a, uint16x4_t b) { + // CIR: cir.add {{.*}} : !cir.vector<4 x !u32i> + + // LLVM-SAME: <4 x i16> {{.*}} [[A:%.*]], <4 x i16> {{.*}} [[B:%.*]]) + // LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8> + // LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> + // LLVM-NEXT: [[VMOVL_I5_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> + // LLVM-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8> + // LLVM-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> + // LLVM-NEXT: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> + // LLVM-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I5_I]], [[VMOVL_I_I]] + // LLVM-NEXT: ret <4 x i32> [[ADD_I]] + return vaddl_u16(a, b); +} + +// LLVM-LABEL: @test_vaddl_u32( +// CIR-LABEL: @vaddl_u32( +uint64x2_t test_vaddl_u32(uint32x2_t a, uint32x2_t b) { + // CIR: cir.add {{.*}} : !cir.vector<2 x !u64i> + + // LLVM-SAME: <2 x i32> {{.*}} [[A:%.*]], <2 x i32> {{.*}} [[B:%.*]]) + // LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8> + // LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> + // LLVM-NEXT: [[VMOVL_I5_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> + // LLVM-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8> + // LLVM-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> + // LLVM-NEXT: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> + // LLVM-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I5_I]], [[VMOVL_I_I]] + // LLVM-NEXT: ret <2 x i64> [[ADD_I]] + return vaddl_u32(a, b); +} + +// LLVM-LABEL: @test_vaddl_high_s8( +// CIR-LABEL: @vaddl_high_s8( +int16x8_t test_vaddl_high_s8(int8x16_t a, int8x16_t b) { + // CIR: cir.add {{.*}} : !cir.vector<8 x !s16i> + + // LLVM-SAME: <16 x i8> {{.*}} [[A:%.*]], <16 x i8> {{.*}} [[B:%.*]]) + // LLVM: [[SHUFFLE_I_I12_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> + // LLVM-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I12_I]] to <8 x i16> + // LLVM-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> + // LLVM-NEXT: [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> + // LLVM-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]] + // LLVM-NEXT: ret <8 x i16> [[ADD_I]] + return vaddl_high_s8(a, b); +} + +// LLVM-LABEL: @test_vaddl_high_s16( +// CIR-LABEL: @vaddl_high_s16( +int32x4_t test_vaddl_high_s16(int16x8_t a, int16x8_t b) { + // CIR: cir.add {{.*}} : !cir.vector<4 x !s32i> + + // LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]], <8 x i16> {{.*}} [[B:%.*]]) + // LLVM: [[SHUFFLE_I_I12_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <4 x i32> + // LLVM-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I12_I]] to <8 x i8> + // LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> + // LLVM-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> + // LLVM-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> + // LLVM-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> + // LLVM-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16> + // LLVM-NEXT: [[TMP5:%.*]] = sext <4 x i16> [[TMP4]] to <4 x i32> + // LLVM-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP5]] + // LLVM-NEXT: ret <4 x i32> [[ADD_I]] + return vaddl_high_s16(a, b); +} + +// LLVM-LABEL: @test_vaddl_high_s32( +// CIR-LABEL: @vaddl_high_s32( +int64x2_t test_vaddl_high_s32(int32x4_t a, int32x4_t b) { + // CIR: cir.add {{.*}} : !cir.vector<2 x !s64i> + + // LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]], <4 x i32> {{.*}} [[B:%.*]]) + // LLVM: [[SHUFFLE_I_I12_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <2 x i32> + // LLVM-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I12_I]] to <8 x i8> + // LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> + // LLVM-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> + // LLVM-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> + // LLVM-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> + // LLVM-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> + // LLVM-NEXT: [[TMP5:%.*]] = sext <2 x i32> [[TMP4]] to <2 x i64> + // LLVM-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP5]] + // LLVM-NEXT: ret <2 x i64> [[ADD_I]] + return vaddl_high_s32(a, b); +} + +// LLVM-LABEL: @test_vaddl_high_u8( +// CIR-LABEL: @vaddl_high_u8( +uint16x8_t test_vaddl_high_u8(uint8x16_t a, uint8x16_t b) { + // CIR: cir.add {{.*}} : !cir.vector<8 x !u16i> + + // LLVM-SAME: <16 x i8> {{.*}} [[A:%.*]], <16 x i8> {{.*}} [[B:%.*]]) + // LLVM: [[SHUFFLE_I_I12_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> + // LLVM-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I12_I]] to <8 x i16> + // LLVM-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> + // LLVM-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> + // LLVM-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]] + // LLVM-NEXT: ret <8 x i16> [[ADD_I]] + return vaddl_high_u8(a, b); +} + +// LLVM-LABEL: @test_vaddl_high_u16( +// CIR-LABEL: @vaddl_high_u16( +uint32x4_t test_vaddl_high_u16(uint16x8_t a, uint16x8_t b) { + // CIR: cir.add {{.*}} : !cir.vector<4 x !u32i> + + // LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]], <8 x i16> {{.*}} [[B:%.*]]) + // LLVM: [[SHUFFLE_I_I12_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <4 x i32> + // LLVM-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I12_I]] to <8 x i8> + // LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> + // LLVM-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> + // LLVM-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> + // LLVM-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> + // LLVM-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16> + // LLVM-NEXT: [[TMP5:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32> + // LLVM-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP5]] + // LLVM-NEXT: ret <4 x i32> [[ADD_I]] + return vaddl_high_u16(a, b); +} + +// LLVM-LABEL: @test_vaddl_high_u32( +// CIR-LABEL: @vaddl_high_u32( +uint64x2_t test_vaddl_high_u32(uint32x4_t a, uint32x4_t b) { + // CIR: cir.add {{.*}} : !cir.vector<2 x !u64i> + + // LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]], <4 x i32> {{.*}} [[B:%.*]]) + // LLVM: [[SHUFFLE_I_I12_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <2 x i32> + // LLVM-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I12_I]] to <8 x i8> + // LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> + // LLVM-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> + // LLVM-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> + // LLVM-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> + // LLVM-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> + // LLVM-NEXT: [[TMP5:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64> + // LLVM-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP5]] + // LLVM-NEXT: ret <2 x i64> [[ADD_I]] + return vaddl_high_u32(a, b); +} + +// LLVM-LABEL: @test_vaddw_s8( +// CIR-LABEL: @vaddw_s8( +int16x8_t test_vaddw_s8(int16x8_t a, int8x8_t b) { + // CIR: cir.add {{.*}} : !cir.vector<8 x !s16i> + + // LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]], <8 x i8> {{.*}} [[B:%.*]]) + // LLVM: [[VMOVL_I_I:%.*]] = sext <8 x i8> [[B]] to <8 x i16> + // LLVM-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[A]], [[VMOVL_I_I]] + // LLVM-NEXT: ret <8 x i16> [[ADD_I]] + return vaddw_s8(a, b); +} + +// LLVM-LABEL: @test_vaddw_s16( +// CIR-LABEL: @vaddw_s16( +int32x4_t test_vaddw_s16(int32x4_t a, int16x4_t b) { + // CIR: cir.add {{.*}} : !cir.vector<4 x !s32i> + + // LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]], <4 x i16> {{.*}} [[B:%.*]]) + // LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8> + // LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> + // LLVM-NEXT: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> + // LLVM-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A]], [[VMOVL_I_I]] + // LLVM-NEXT: ret <4 x i32> [[ADD_I]] + return vaddw_s16(a, b); +} + +// LLVM-LABEL: @test_vaddw_s32( +// CIR-LABEL: @vaddw_s32( +int64x2_t test_vaddw_s32(int64x2_t a, int32x2_t b) { + // CIR: cir.add {{.*}} : !cir.vector<2 x !s64i> + + // LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]], <2 x i32> {{.*}} [[B:%.*]]) + // LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8> + // LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> + // LLVM-NEXT: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> + // LLVM-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[A]], [[VMOVL_I_I]] + // LLVM-NEXT: ret <2 x i64> [[ADD_I]] + return vaddw_s32(a, b); +} + +// LLVM-LABEL: @test_vaddw_u8( +// CIR-LABEL: @vaddw_u8( +uint16x8_t test_vaddw_u8(uint16x8_t a, uint8x8_t b) { + // CIR: cir.add {{.*}} : !cir.vector<8 x !u16i> + + // LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]], <8 x i8> {{.*}} [[B:%.*]]) + // LLVM: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[B]] to <8 x i16> + // LLVM-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[A]], [[VMOVL_I_I]] + // LLVM-NEXT: ret <8 x i16> [[ADD_I]] + return vaddw_u8(a, b); +} + +// LLVM-LABEL: @test_vaddw_u16( +// CIR-LABEL: @vaddw_u16( +uint32x4_t test_vaddw_u16(uint32x4_t a, uint16x4_t b) { + // CIR: cir.add {{.*}} : !cir.vector<4 x !u32i> + + // LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]], <4 x i16> {{.*}} [[B:%.*]]) + // LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8> + // LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> + // LLVM-NEXT: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> + // LLVM-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A]], [[VMOVL_I_I]] + // LLVM-NEXT: ret <4 x i32> [[ADD_I]] + return vaddw_u16(a, b); +} + +// LLVM-LABEL: @test_vaddw_u32( +// CIR-LABEL: @vaddw_u32( +uint64x2_t test_vaddw_u32(uint64x2_t a, uint32x2_t b) { + // CIR: cir.add {{.*}} : !cir.vector<2 x !u64i> + + // LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]], <2 x i32> {{.*}} [[B:%.*]]) + // LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8> + // LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> + // LLVM-NEXT: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> + // LLVM-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[A]], [[VMOVL_I_I]] + // LLVM-NEXT: ret <2 x i64> [[ADD_I]] + return vaddw_u32(a, b); +} + +// LLVM-LABEL: @test_vaddw_high_s8( +// CIR-LABEL: @vaddw_high_s8( +int16x8_t test_vaddw_high_s8(int16x8_t a, int8x16_t b) { + // CIR: cir.add {{.*}} : !cir.vector<8 x !s16i> + + // LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]], <16 x i8> {{.*}} [[B:%.*]]) + // LLVM: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> + // LLVM-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> + // LLVM-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[A]], [[TMP0]] + // LLVM-NEXT: ret <8 x i16> [[ADD_I]] + return vaddw_high_s8(a, b); +} + +// LLVM-LABEL: @test_vaddw_high_s16( +// CIR-LABEL: @vaddw_high_s16( +int32x4_t test_vaddw_high_s16(int32x4_t a, int16x8_t b) { + // CIR: cir.add {{.*}} : !cir.vector<4 x !s32i> + + // LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]], <8 x i16> {{.*}} [[B:%.*]]) + // LLVM: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> + // LLVM-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> + // LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> + // LLVM-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> + // LLVM-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A]], [[TMP2]] + // LLVM-NEXT: ret <4 x i32> [[ADD_I]] + return vaddw_high_s16(a, b); +} + +// LLVM-LABEL: @test_vaddw_high_s32( +// CIR-LABEL: @vaddw_high_s32( +int64x2_t test_vaddw_high_s32(int64x2_t a, int32x4_t b) { + // CIR: cir.add {{.*}} : !cir.vector<2 x !s64i> + + // LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]], <4 x i32> {{.*}} [[B:%.*]]) + // LLVM: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> + // LLVM-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> + // LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> + // LLVM-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> + // LLVM-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[A]], [[TMP2]] + // LLVM-NEXT: ret <2 x i64> [[ADD_I]] + return vaddw_high_s32(a, b); +} + +// LLVM-LABEL: @test_vaddw_high_u8( +// CIR-LABEL: @vaddw_high_u8( +uint16x8_t test_vaddw_high_u8(uint16x8_t a, uint8x16_t b) { + // CIR: cir.add {{.*}} : !cir.vector<8 x !u16i> + + // LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]], <16 x i8> {{.*}} [[B:%.*]]) + // LLVM: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> + // LLVM-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> + // LLVM-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[A]], [[TMP0]] + // LLVM-NEXT: ret <8 x i16> [[ADD_I]] + return vaddw_high_u8(a, b); +} + +// LLVM-LABEL: @test_vaddw_high_u16( +// CIR-LABEL: @vaddw_high_u16( +uint32x4_t test_vaddw_high_u16(uint32x4_t a, uint16x8_t b) { + // CIR: cir.add {{.*}} : !cir.vector<4 x !u32i> + + // LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]], <8 x i16> {{.*}} [[B:%.*]]) + // LLVM: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> + // LLVM-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> + // LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> + // LLVM-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> + // LLVM-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A]], [[TMP2]] + // LLVM-NEXT: ret <4 x i32> [[ADD_I]] + return vaddw_high_u16(a, b); +} + +// LLVM-LABEL: @test_vaddw_high_u32( +// CIR-LABEL: @vaddw_high_u32( +uint64x2_t test_vaddw_high_u32(uint64x2_t a, uint32x4_t b) { + // CIR: cir.add {{.*}} : !cir.vector<2 x !u64i> + + // LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]], <4 x i32> {{.*}} [[B:%.*]]) + // LLVM: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> + // LLVM-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> + // LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> + // LLVM-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> + // LLVM-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[A]], [[TMP2]] + // LLVM-NEXT: ret <2 x i64> [[ADD_I]] + return vaddw_high_u32(a, b); +} diff --git a/clang/test/CodeGen/AArch64/neon/intrinsics.c b/clang/test/CodeGen/AArch64/neon/intrinsics.c index ca3ae478d6546..3f309f7359084 100644 --- a/clang/test/CodeGen/AArch64/neon/intrinsics.c +++ b/clang/test/CodeGen/AArch64/neon/intrinsics.c @@ -6881,3 +6881,190 @@ float64_t test_vpmaxnmqd_f64(float64x2_t a) { // LLVM-NEXT: ret double [[VPMAXNMQD_F64_I]] return vpmaxnmqd_f64(a); } + +//===----------------------------------------------------------------------===// +// 2.1.3.1.5. Vector shift left and widen +// https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#vector-shift-left-and-widen +//===----------------------------------------------------------------------===// + +// ALL-LABEL: @test_vshll_n_s8( +int16x8_t test_vshll_n_s8(int8x8_t a) { +// CIR: cir.cast integral %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<8 x !s16i> +// CIR: cir.shift(left, %{{.*}} : !cir.vector<8 x !s16i>, %{{.*}} : !cir.vector<8 x !s16i>) -> !cir.vector<8 x !s16i> + +// LLVM-SAME: <8 x i8> {{.*}} [[A:%.*]]) {{.*}} { +// LLVM: [[TMP0:%.*]] = sext <8 x i8> [[A]] to <8 x i16> +// LLVM-NEXT: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 3) +// LLVM: ret <8 x i16> [[VSHLL_N]] + return vshll_n_s8(a, 3); +} + +// ALL-LABEL: @test_vshll_n_s16( +int32x4_t test_vshll_n_s16(int16x4_t a) { +// CIR: cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<4 x !s16i> +// CIR: cir.cast integral %{{.*}} : !cir.vector<4 x !s16i> -> !cir.vector<4 x !s32i> +// CIR: cir.shift(left, %{{.*}} : !cir.vector<4 x !s32i>, %{{.*}} : !cir.vector<4 x !s32i>) -> !cir.vector<4 x !s32i> + +// LLVM-SAME: <4 x i16> {{.*}} [[A:%.*]]) {{.*}} { +// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> +// LLVM-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> +// LLVM-NEXT: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 9) +// LLVM: ret <4 x i32> [[VSHLL_N]] + return vshll_n_s16(a, 9); +} + +// ALL-LABEL: @test_vshll_n_s32( +int64x2_t test_vshll_n_s32(int32x2_t a) { +// CIR: cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !s32i> +// CIR: cir.cast integral %{{.*}} : !cir.vector<2 x !s32i> -> !cir.vector<2 x !s64i> +// CIR: cir.shift(left, %{{.*}} : !cir.vector<2 x !s64i>, %{{.*}} : !cir.vector<2 x !s64i>) -> !cir.vector<2 x !s64i> + +// LLVM-SAME: <2 x i32> {{.*}} [[A:%.*]]) {{.*}} { +// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> +// LLVM-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> +// LLVM-NEXT: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 19) +// LLVM: ret <2 x i64> [[VSHLL_N]] + return vshll_n_s32(a, 19); +} + +// ALL-LABEL: @test_vshll_n_u8( +uint16x8_t test_vshll_n_u8(uint8x8_t a) { +// CIR: cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<8 x !u8i> +// CIR: cir.cast integral %{{.*}} : !cir.vector<8 x !u8i> -> !cir.vector<8 x !u16i> +// CIR: cir.shift(left, %{{.*}} : !cir.vector<8 x !u16i>, %{{.*}} : !cir.vector<8 x !u16i>) -> !cir.vector<8 x !u16i> + +// LLVM-SAME: <8 x i8> {{.*}} [[A:%.*]]) {{.*}} { +// LLVM: [[TMP0:%.*]] = zext <8 x i8> [[A]] to <8 x i16> +// LLVM-NEXT: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 3) +// LLVM: ret <8 x i16> [[VSHLL_N]] + return vshll_n_u8(a, 3); +} + +// ALL-LABEL: @test_vshll_n_u16( +uint32x4_t test_vshll_n_u16(uint16x4_t a) { +// CIR: cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<4 x !u16i> +// CIR: cir.cast integral %{{.*}} : !cir.vector<4 x !u16i> -> !cir.vector<4 x !u32i> +// CIR: cir.shift(left, %{{.*}} : !cir.vector<4 x !u32i>, %{{.*}} : !cir.vector<4 x !u32i>) -> !cir.vector<4 x !u32i> + +// LLVM-SAME: <4 x i16> {{.*}} [[A:%.*]]) {{.*}} { +// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> +// LLVM-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> +// LLVM-NEXT: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 9) +// LLVM: ret <4 x i32> [[VSHLL_N]] + return vshll_n_u16(a, 9); +} + +// ALL-LABEL: @test_vshll_n_u32( +uint64x2_t test_vshll_n_u32(uint32x2_t a) { +// CIR: cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !u32i> +// CIR: cir.cast integral %{{.*}} : !cir.vector<2 x !u32i> -> !cir.vector<2 x !u64i> +// CIR: cir.shift(left, %{{.*}} : !cir.vector<2 x !u64i>, %{{.*}} : !cir.vector<2 x !u64i>) -> !cir.vector<2 x !u64i> + +// LLVM-SAME: <2 x i32> {{.*}} [[A:%.*]]) {{.*}} { +// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> +// LLVM-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> +// LLVM-NEXT: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 19) +// LLVM: ret <2 x i64> [[VSHLL_N]] + return vshll_n_u32(a, 19); +} + +// ALL-LABEL: @test_vshll_high_n_s8( +int16x8_t test_vshll_high_n_s8(int8x16_t a) { +// CIR: cir.call @vget_high_s8 +// CIR: cir.cast integral %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<8 x !s16i> +// CIR: cir.shift(left, %{{.*}} : !cir.vector<8 x !s16i>, %{{.*}} : !cir.vector<8 x !s16i>) -> !cir.vector<8 x !s16i> + +// LLVM-SAME: <16 x i8> {{.*}} [[A:%.*]]) {{.*}} { +// LLVM: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> +// LLVM-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I]] to <8 x i16> +// LLVM-NEXT: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 3) +// LLVM: ret <8 x i16> [[VSHLL_N]] + return vshll_high_n_s8(a, 3); +} + +// ALL-LABEL: @test_vshll_high_n_s16( +int32x4_t test_vshll_high_n_s16(int16x8_t a) { +// CIR: cir.call @vget_high_s16 +// CIR: cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<4 x !s16i> +// CIR: cir.cast integral %{{.*}} : !cir.vector<4 x !s16i> -> !cir.vector<4 x !s32i> +// CIR: cir.shift(left, %{{.*}} : !cir.vector<4 x !s32i>, %{{.*}} : !cir.vector<4 x !s32i>) -> !cir.vector<4 x !s32i> + +// LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]]) {{.*}} { +// LLVM: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <4 x i32> +// LLVM-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> +// LLVM-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> +// LLVM-NEXT: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 9) +// LLVM: ret <4 x i32> [[VSHLL_N]] + return vshll_high_n_s16(a, 9); +} + +// ALL-LABEL: @test_vshll_high_n_s32( +int64x2_t test_vshll_high_n_s32(int32x4_t a) { +// CIR: cir.call @vget_high_s32 +// CIR: cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !s32i> +// CIR: cir.cast integral %{{.*}} : !cir.vector<2 x !s32i> -> !cir.vector<2 x !s64i> +// CIR: cir.shift(left, %{{.*}} : !cir.vector<2 x !s64i>, %{{.*}} : !cir.vector<2 x !s64i>) -> !cir.vector<2 x !s64i> + +// LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]]) {{.*}} { +// LLVM: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <2 x i32> +// LLVM-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> +// LLVM-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> +// LLVM-NEXT: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 19) +// LLVM: ret <2 x i64> [[VSHLL_N]] + return vshll_high_n_s32(a, 19); +} + +// ALL-LABEL: @test_vshll_high_n_u8( +uint16x8_t test_vshll_high_n_u8(uint8x16_t a) { +// CIR: cir.call @vget_high_u8 +// CIR: cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<8 x !u8i> +// CIR: cir.cast integral %{{.*}} : !cir.vector<8 x !u8i> -> !cir.vector<8 x !u16i> +// CIR: cir.shift(left, %{{.*}} : !cir.vector<8 x !u16i>, %{{.*}} : !cir.vector<8 x !u16i>) -> !cir.vector<8 x !u16i> + +// LLVM-SAME: <16 x i8> {{.*}} [[A:%.*]]) {{.*}} { +// LLVM: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> +// LLVM-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I]] to <8 x i16> +// LLVM-NEXT: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 3) +// LLVM: ret <8 x i16> [[VSHLL_N]] + return vshll_high_n_u8(a, 3); +} + +// ALL-LABEL: @test_vshll_high_n_u16( +uint32x4_t test_vshll_high_n_u16(uint16x8_t a) { +// CIR: cir.call @vget_high_u16 +// CIR: cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<4 x !u16i> +// CIR: cir.cast integral %{{.*}} : !cir.vector<4 x !u16i> -> !cir.vector<4 x !u32i> +// CIR: cir.shift(left, %{{.*}} : !cir.vector<4 x !u32i>, %{{.*}} : !cir.vector<4 x !u32i>) -> !cir.vector<4 x !u32i> + +// LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]]) {{.*}} { +// LLVM: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <4 x i32> +// LLVM-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> +// LLVM-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> +// LLVM-NEXT: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 9) +// LLVM: ret <4 x i32> [[VSHLL_N]] + return vshll_high_n_u16(a, 9); +} + +// ALL-LABEL: @test_vshll_high_n_u32( +uint64x2_t test_vshll_high_n_u32(uint32x4_t a) { +// CIR: cir.call @vget_high_u32 +// CIR: cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !u32i> +// CIR: cir.cast integral %{{.*}} : !cir.vector<2 x !u32i> -> !cir.vector<2 x !u64i> +// CIR: cir.shift(left, %{{.*}} : !cir.vector<2 x !u64i>, %{{.*}} : !cir.vector<2 x !u64i>) -> !cir.vector<2 x !u64i> + +// LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]]) {{.*}} { +// LLVM: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <2 x i32> +// LLVM-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> +// LLVM-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> +// LLVM-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> +// LLVM-NEXT: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 19) +// LLVM: ret <2 x i64> [[VSHLL_N]] + return vshll_high_n_u32(a, 19); +} diff --git a/clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.c b/clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.c index 0e5b76e7d024d..695bba284597d 100644 --- a/clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.c +++ b/clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.c @@ -150,6 +150,15 @@ struct st_i32x4x9 { __attribute__((vector_size(16))) int i32_9; }; +struct st_bf16x8 { + __attribute__((vector_size(16))) __bf16 bf16; +}; + +struct st_bf16x8x2 { + __attribute__((vector_size(16))) __bf16 bf16_1; + __attribute__((vector_size(16))) __bf16 bf16_2; +}; + typedef int __attribute__((vector_size(256))) int32x64_t; // CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_too_large(ptr noundef dead_on_return %0) @@ -207,6 +216,20 @@ void __attribute__((riscv_vls_cc)) test_st_i32x4x9(struct st_i32x4x9 arg) {} // CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_i32x4x9_256(ptr noundef dead_on_return %arg) void __attribute__((riscv_vls_cc(256))) test_st_i32x4x9_256(struct st_i32x4x9 arg) {} +// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_st_bf16x8( %arg.target_coerce) +// CHECK-LLVM-ZVFBFA: define dso_local riscv_vls_cc(128) void @test_st_bf16x8( %arg.target_coerce) +void __attribute__((riscv_vls_cc)) test_st_bf16x8(struct st_bf16x8 arg) {} +// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_bf16x8_256( %arg.target_coerce) +// CHECK-LLVM-ZVFBFA: define dso_local riscv_vls_cc(256) void @test_st_bf16x8_256( %arg.target_coerce) +void __attribute__((riscv_vls_cc(256))) test_st_bf16x8_256(struct st_bf16x8 arg) {} + +// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @test_st_bf16x8x2(target("riscv.vector.tuple", , 2) %arg.target_coerce) +// CHECK-LLVM-ZVFBFA: define dso_local riscv_vls_cc(128) void @test_st_bf16x8x2(target("riscv.vector.tuple", , 2) %arg.target_coerce) +void __attribute__((riscv_vls_cc)) test_st_bf16x8x2(struct st_bf16x8x2 arg) {} +// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @test_st_bf16x8x2_256(target("riscv.vector.tuple", , 2) %arg.target_coerce) +// CHECK-LLVM-ZVFBFA: define dso_local riscv_vls_cc(256) void @test_st_bf16x8x2_256(target("riscv.vector.tuple", , 2) %arg.target_coerce) +void __attribute__((riscv_vls_cc(256))) test_st_bf16x8x2_256(struct st_bf16x8x2 arg) {} + // CHECK-LLVM-LABEL: define dso_local riscv_vls_cc(128) target("riscv.vector.tuple", , 4) @test_function_prolog_epilog(target("riscv.vector.tuple", , 4) %arg.target_coerce) #0 { // CHECK-LLVM-NEXT: entry: // CHECK-LLVM-NEXT: %retval = alloca %struct.st_i32x4_arr4, align 16 diff --git a/clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.cpp b/clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.cpp index 96a4c9741f738..da94574827123 100644 --- a/clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.cpp +++ b/clang/test/CodeGen/RISCV/riscv-vector-callingconv-llvm-ir.cpp @@ -124,6 +124,15 @@ struct st_i32x4x9 { __attribute__((vector_size(16))) int i32_9; }; +struct st_bf16x8 { + __attribute__((vector_size(16))) __bf16 bf16; +}; + +struct st_bf16x8x2 { + __attribute__((vector_size(16))) __bf16 bf16_1; + __attribute__((vector_size(16))) __bf16 bf16_2; +}; + typedef int __attribute__((vector_size(256))) int32x64_t; // CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z14test_too_largeDv64_i(ptr noundef dead_on_return %0) @@ -180,3 +189,17 @@ typedef int __attribute__((vector_size(256))) int32x64_t; [[riscv::vls_cc]] void test_st_i32x4x9(struct st_i32x4x9 arg) {} // CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z19test_st_i32x4x9_25610st_i32x4x9(ptr noundef dead_on_return %arg) [[riscv::vls_cc(256)]] void test_st_i32x4x9_256(struct st_i32x4x9 arg) {} + +// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z14test_st_bf16x89st_bf16x8( %arg.target_coerce) +// CHECK-LLVM-ZVFBFA: define dso_local riscv_vls_cc(128) void @_Z14test_st_bf16x89st_bf16x8( %arg.target_coerce) +[[riscv::vls_cc]] void test_st_bf16x8(struct st_bf16x8 arg) {} +// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z18test_st_bf16x8_2569st_bf16x8( %arg.target_coerce) +// CHECK-LLVM-ZVFBFA: define dso_local riscv_vls_cc(256) void @_Z18test_st_bf16x8_2569st_bf16x8( %arg.target_coerce) +[[riscv::vls_cc(256)]] void test_st_bf16x8_256(struct st_bf16x8 arg) {} + +// CHECK-LLVM: define dso_local riscv_vls_cc(128) void @_Z16test_st_bf16x8x211st_bf16x8x2(target("riscv.vector.tuple", , 2) %arg.target_coerce) +// CHECK-LLVM-ZVFBFA: define dso_local riscv_vls_cc(128) void @_Z16test_st_bf16x8x211st_bf16x8x2(target("riscv.vector.tuple", , 2) %arg.target_coerce) +[[riscv::vls_cc]] void test_st_bf16x8x2(struct st_bf16x8x2 arg) {} +// CHECK-LLVM: define dso_local riscv_vls_cc(256) void @_Z20test_st_bf16x8x2_25611st_bf16x8x2(target("riscv.vector.tuple", , 2) %arg.target_coerce) +// CHECK-LLVM-ZVFBFA: define dso_local riscv_vls_cc(256) void @_Z20test_st_bf16x8x2_25611st_bf16x8x2(target("riscv.vector.tuple", , 2) %arg.target_coerce) +[[riscv::vls_cc(256)]] void test_st_bf16x8x2_256(struct st_bf16x8x2 arg) {} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/non-overloaded/sf_mm_e4m3_e4m3.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/non-overloaded/sf_mm_e4m3_e4m3.c index d162f449d9239..540c086c9c1ea 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/non-overloaded/sf_mm_e4m3_e4m3.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/non-overloaded/sf_mm_e4m3_e4m3.c @@ -1,18 +1,19 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +xsfmm32a8f \ -// RUN: -disable-O0-optnone -emit-llvm %s -o - | \ -// RUN: opt -S -passes=mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: -target-feature +experimental-zvfofp8min -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s #include -// CHECK-RV64-LABEL: define dso_local void @test_sf_mm_e4m3_e4m3_w4_u8m8_u8m8( +// CHECK-RV64-LABEL: define dso_local void @test_sf_mm_e4m3_e4m3_w4_f8e4m3m8_f8e4m3m8( // CHECK-RV64-SAME: [[vs2:%.*]], [[vs1:%.*]], i64 noundef [[TM:%.*]], i64 noundef [[TN:%.*]], i64 noundef [[TK:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: call void @llvm.riscv.sf.mm.e4m3.e4m3.i64.nxv64i8(i64 0, [[vs2]], [[vs1]], i64 [[TM]], i64 [[TN]], i64 [[TK]], i64 4) // CHECK-RV64-NEXT: ret void // -void test_sf_mm_e4m3_e4m3_w4_u8m8_u8m8(vuint8m8_t vs2, vuint8m8_t vs1, size_t tm, size_t tn, size_t tk) { - return __riscv_sf_mm_e4m3_e4m3_w4_u8m8_u8m8(0, vs2, vs1, tm, tn, tk); +void test_sf_mm_e4m3_e4m3_w4_f8e4m3m8_f8e4m3m8(vfloat8e4m3m8_t vs2, vfloat8e4m3m8_t vs1, size_t tm, size_t tn, size_t tk) { + __riscv_sf_mm_e4m3_e4m3_w4_f8e4m3m8_f8e4m3m8(0, vs2, vs1, tm, tn, tk); } diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/non-overloaded/sf_mm_e4m3_e5m2.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/non-overloaded/sf_mm_e4m3_e5m2.c index 342af1eca65bf..0c7a18ae5c21d 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/non-overloaded/sf_mm_e4m3_e5m2.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/non-overloaded/sf_mm_e4m3_e5m2.c @@ -1,18 +1,19 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +xsfmm32a8f \ -// RUN: -disable-O0-optnone -emit-llvm %s -o - | \ -// RUN: opt -S -passes=mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: -target-feature +experimental-zvfofp8min -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s #include -// CHECK-RV64-LABEL: define dso_local void @test_sf_mm_e4m3_e5m2_w4_u8m8_u8m8( +// CHECK-RV64-LABEL: define dso_local void @test_sf_mm_e4m3_e5m2_w4_f8e4m3m8_f8e5m2m8( // CHECK-RV64-SAME: [[vs2:%.*]], [[vs1:%.*]], i64 noundef [[TM:%.*]], i64 noundef [[TN:%.*]], i64 noundef [[TK:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: call void @llvm.riscv.sf.mm.e4m3.e5m2.i64.nxv64i8(i64 0, [[vs2]], [[vs1]], i64 [[TM]], i64 [[TN]], i64 [[TK]], i64 4) // CHECK-RV64-NEXT: ret void // -void test_sf_mm_e4m3_e5m2_w4_u8m8_u8m8(vuint8m8_t vs2, vuint8m8_t vs1, size_t tm, size_t tn, size_t tk) { - return __riscv_sf_mm_e4m3_e5m2_w4_u8m8_u8m8(0, vs2, vs1, tm, tn, tk); +void test_sf_mm_e4m3_e5m2_w4_f8e4m3m8_f8e5m2m8(vfloat8e4m3m8_t vs2, vfloat8e5m2m8_t vs1, size_t tm, size_t tn, size_t tk) { + __riscv_sf_mm_e4m3_e5m2_w4_f8e4m3m8_f8e5m2m8(0, vs2, vs1, tm, tn, tk); } diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/non-overloaded/sf_mm_e5m2_e4m3.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/non-overloaded/sf_mm_e5m2_e4m3.c index b8f58fe230b76..56118bad71b1c 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/non-overloaded/sf_mm_e5m2_e4m3.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/non-overloaded/sf_mm_e5m2_e4m3.c @@ -1,18 +1,19 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +xsfmm32a8f \ -// RUN: -disable-O0-optnone -emit-llvm %s -o - | \ -// RUN: opt -S -passes=mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: -target-feature +experimental-zvfofp8min -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s #include -// CHECK-RV64-LABEL: define dso_local void @test_sf_mm_e5m2_e4m3_w4_u8m8_u8m8( +// CHECK-RV64-LABEL: define dso_local void @test_sf_mm_e5m2_e4m3_w4_f8e5m2m8_f8e4m3m8( // CHECK-RV64-SAME: [[vs2:%.*]], [[vs1:%.*]], i64 noundef [[TM:%.*]], i64 noundef [[TN:%.*]], i64 noundef [[TK:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: call void @llvm.riscv.sf.mm.e5m2.e4m3.i64.nxv64i8(i64 0, [[vs2]], [[vs1]], i64 [[TM]], i64 [[TN]], i64 [[TK]], i64 4) // CHECK-RV64-NEXT: ret void // -void test_sf_mm_e5m2_e4m3_w4_u8m8_u8m8(vuint8m8_t vs2, vuint8m8_t vs1, size_t tm, size_t tn, size_t tk) { - return __riscv_sf_mm_e5m2_e4m3_w4_u8m8_u8m8(0, vs2, vs1, tm, tn, tk); +void test_sf_mm_e5m2_e4m3_w4_f8e5m2m8_f8e4m3m8(vfloat8e5m2m8_t vs2, vfloat8e4m3m8_t vs1, size_t tm, size_t tn, size_t tk) { + __riscv_sf_mm_e5m2_e4m3_w4_f8e5m2m8_f8e4m3m8(0, vs2, vs1, tm, tn, tk); } diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/non-overloaded/sf_mm_e5m2_e5m2.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/non-overloaded/sf_mm_e5m2_e5m2.c index 7c2eb3227f004..3785a53818439 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/non-overloaded/sf_mm_e5m2_e5m2.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/non-overloaded/sf_mm_e5m2_e5m2.c @@ -1,18 +1,19 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +xsfmm32a8f \ -// RUN: -disable-O0-optnone -emit-llvm %s -o - | \ -// RUN: opt -S -passes=mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: -target-feature +experimental-zvfofp8min -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s #include -// CHECK-RV64-LABEL: define dso_local void @test_sf_mm_e5m2_e5m2_w4_u8m8_u8m8( +// CHECK-RV64-LABEL: define dso_local void @test_sf_mm_e5m2_e5m2_w4_f8e5m2m8_f8e5m2m8( // CHECK-RV64-SAME: [[vs2:%.*]], [[vs1:%.*]], i64 noundef [[TM:%.*]], i64 noundef [[TN:%.*]], i64 noundef [[TK:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: call void @llvm.riscv.sf.mm.e5m2.e5m2.i64.nxv64i8(i64 0, [[vs2]], [[vs1]], i64 [[TM]], i64 [[TN]], i64 [[TK]], i64 4) // CHECK-RV64-NEXT: ret void // -void test_sf_mm_e5m2_e5m2_w4_u8m8_u8m8(vuint8m8_t vs2, vuint8m8_t vs1, size_t tm, size_t tn, size_t tk) { - return __riscv_sf_mm_e5m2_e5m2_w4_u8m8_u8m8(0, vs2, vs1, tm, tn, tk); +void test_sf_mm_e5m2_e5m2_w4_f8e5m2m8_f8e5m2m8(vfloat8e5m2m8_t vs2, vfloat8e5m2m8_t vs1, size_t tm, size_t tn, size_t tk) { + __riscv_sf_mm_e5m2_e5m2_w4_f8e5m2m8_f8e5m2m8(0, vs2, vs1, tm, tn, tk); } diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/overloaded/sf_mm_e4m3_e4m3.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/overloaded/sf_mm_e4m3_e4m3.c index 2f6c4dc324d60..cde4bfdad35ea 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/overloaded/sf_mm_e4m3_e4m3.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/overloaded/sf_mm_e4m3_e4m3.c @@ -1,18 +1,19 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +xsfmm32a8f \ -// RUN: -disable-O0-optnone -emit-llvm %s -o - | \ -// RUN: opt -S -passes=mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: -target-feature +experimental-zvfofp8min -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s #include -// CHECK-RV64-LABEL: define dso_local void @test_sf_mm_e4m3_e4m3_w4_u8m8_u8m8( +// CHECK-RV64-LABEL: define dso_local void @test_sf_mm_e4m3_e4m3_w4_f8e4m3m8_f8e4m3m8( // CHECK-RV64-SAME: [[vs2:%.*]], [[vs1:%.*]], i64 noundef [[TM:%.*]], i64 noundef [[TN:%.*]], i64 noundef [[TK:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: call void @llvm.riscv.sf.mm.e4m3.e4m3.i64.nxv64i8(i64 0, [[vs2]], [[vs1]], i64 [[TM]], i64 [[TN]], i64 [[TK]], i64 4) // CHECK-RV64-NEXT: ret void // -void test_sf_mm_e4m3_e4m3_w4_u8m8_u8m8(vuint8m8_t vs2, vuint8m8_t vs1, size_t tm, size_t tn, size_t tk) { - return __riscv_sf_mm_e4m3_e4m3(0, vs2, vs1, tm, tn, tk); +void test_sf_mm_e4m3_e4m3_w4_f8e4m3m8_f8e4m3m8(vfloat8e4m3m8_t vs2, vfloat8e4m3m8_t vs1, size_t tm, size_t tn, size_t tk) { + __riscv_sf_mm_w4(0, vs2, vs1, tm, tn, tk); } diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/overloaded/sf_mm_e4m3_e5m2.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/overloaded/sf_mm_e4m3_e5m2.c index 40ae780d6c461..0f822a58621d2 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/overloaded/sf_mm_e4m3_e5m2.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/overloaded/sf_mm_e4m3_e5m2.c @@ -1,18 +1,19 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +xsfmm32a8f \ -// RUN: -disable-O0-optnone -emit-llvm %s -o - | \ -// RUN: opt -S -passes=mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: -target-feature +experimental-zvfofp8min -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s #include -// CHECK-RV64-LABEL: define dso_local void @test_sf_mm_e4m3_e5m2_w4_u8m8_u8m8( +// CHECK-RV64-LABEL: define dso_local void @test_sf_mm_e4m3_e5m2_w4_f8e4m3m8_f8e5m2m8( // CHECK-RV64-SAME: [[vs2:%.*]], [[vs1:%.*]], i64 noundef [[TM:%.*]], i64 noundef [[TN:%.*]], i64 noundef [[TK:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: call void @llvm.riscv.sf.mm.e4m3.e5m2.i64.nxv64i8(i64 0, [[vs2]], [[vs1]], i64 [[TM]], i64 [[TN]], i64 [[TK]], i64 4) // CHECK-RV64-NEXT: ret void // -void test_sf_mm_e4m3_e5m2_w4_u8m8_u8m8(vuint8m8_t vs2, vuint8m8_t vs1, size_t tm, size_t tn, size_t tk) { - return __riscv_sf_mm_e4m3_e5m2(0, vs2, vs1, tm, tn, tk); +void test_sf_mm_e4m3_e5m2_w4_f8e4m3m8_f8e5m2m8(vfloat8e4m3m8_t vs2, vfloat8e5m2m8_t vs1, size_t tm, size_t tn, size_t tk) { + __riscv_sf_mm_w4(0, vs2, vs1, tm, tn, tk); } diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/overloaded/sf_mm_e5m2_e4m3.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/overloaded/sf_mm_e5m2_e4m3.c index f4f024cbd0988..0698728b841e7 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/overloaded/sf_mm_e5m2_e4m3.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/overloaded/sf_mm_e5m2_e4m3.c @@ -1,18 +1,19 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +xsfmm32a8f \ -// RUN: -disable-O0-optnone -emit-llvm %s -o - | \ -// RUN: opt -S -passes=mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: -target-feature +experimental-zvfofp8min -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s #include -// CHECK-RV64-LABEL: define dso_local void @test_sf_mm_e5m2_e4m3_w4_u8m8_u8m8( +// CHECK-RV64-LABEL: define dso_local void @test_sf_mm_e5m2_e4m3_w4_f8e5m2m8_f8e4m3m8( // CHECK-RV64-SAME: [[vs2:%.*]], [[vs1:%.*]], i64 noundef [[TM:%.*]], i64 noundef [[TN:%.*]], i64 noundef [[TK:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: call void @llvm.riscv.sf.mm.e5m2.e4m3.i64.nxv64i8(i64 0, [[vs2]], [[vs1]], i64 [[TM]], i64 [[TN]], i64 [[TK]], i64 4) // CHECK-RV64-NEXT: ret void // -void test_sf_mm_e5m2_e4m3_w4_u8m8_u8m8(vuint8m8_t vs2, vuint8m8_t vs1, size_t tm, size_t tn, size_t tk) { - return __riscv_sf_mm_e5m2_e4m3(0, vs2, vs1, tm, tn, tk); +void test_sf_mm_e5m2_e4m3_w4_f8e5m2m8_f8e4m3m8(vfloat8e5m2m8_t vs2, vfloat8e4m3m8_t vs1, size_t tm, size_t tn, size_t tk) { + __riscv_sf_mm_w4(0, vs2, vs1, tm, tn, tk); } diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/overloaded/sf_mm_e5m2_e5m2.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/overloaded/sf_mm_e5m2_e5m2.c index 01399d5d81d39..ac685927cccf9 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/overloaded/sf_mm_e5m2_e5m2.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/overloaded/sf_mm_e5m2_e5m2.c @@ -1,18 +1,19 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 // REQUIRES: riscv-registered-target // RUN: %clang_cc1 -triple riscv64 -target-feature +xsfmm32a8f \ -// RUN: -disable-O0-optnone -emit-llvm %s -o - | \ -// RUN: opt -S -passes=mem2reg | FileCheck --check-prefix=CHECK-RV64 %s +// RUN: -target-feature +experimental-zvfofp8min -disable-O0-optnone \ +// RUN: -emit-llvm %s -o - | opt -S -passes=mem2reg | \ +// RUN: FileCheck --check-prefix=CHECK-RV64 %s #include -// CHECK-RV64-LABEL: define dso_local void @test_sf_mm_e5m2_e5m2_w4_u8m8_u8m8( +// CHECK-RV64-LABEL: define dso_local void @test_sf_mm_e5m2_e5m2_w4_f8e5m2m8_f8e5m2m8( // CHECK-RV64-SAME: [[vs2:%.*]], [[vs1:%.*]], i64 noundef [[TM:%.*]], i64 noundef [[TN:%.*]], i64 noundef [[TK:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-RV64-NEXT: entry: // CHECK-RV64-NEXT: call void @llvm.riscv.sf.mm.e5m2.e5m2.i64.nxv64i8(i64 0, [[vs2]], [[vs1]], i64 [[TM]], i64 [[TN]], i64 [[TK]], i64 4) // CHECK-RV64-NEXT: ret void // -void test_sf_mm_e5m2_e5m2_w4_u8m8_u8m8(vuint8m8_t vs2, vuint8m8_t vs1, size_t tm, size_t tn, size_t tk) { - return __riscv_sf_mm_e5m2_e5m2(0, vs2, vs1, tm, tn, tk); +void test_sf_mm_e5m2_e5m2_w4_f8e5m2m8_f8e5m2m8(vfloat8e5m2m8_t vs2, vfloat8e5m2m8_t vs1, size_t tm, size_t tn, size_t tk) { + __riscv_sf_mm_w4(0, vs2, vs1, tm, tn, tk); } diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/overloaded/sf_mm_f_f.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/overloaded/sf_mm_f_f.c index 2371e4ec24fc9..0f3a01c95f886 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/overloaded/sf_mm_f_f.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/overloaded/sf_mm_f_f.c @@ -15,7 +15,7 @@ // CHECK-RV64-NEXT: ret void // void test_sf_mm_f_f_w2_f16m8(vfloat16m8_t vs2, vfloat16m8_t vs1, size_t tm, size_t tn, size_t tk) { - return __riscv_sf_mm_f_f_w2(0, vs2, vs1, tm, tn, tk); + return __riscv_sf_mm_w2(0, vs2, vs1, tm, tn, tk); } // CHECK-RV64-LABEL: define dso_local void @test_sf_mm_f_f_w1_f32m8( @@ -25,7 +25,7 @@ void test_sf_mm_f_f_w2_f16m8(vfloat16m8_t vs2, vfloat16m8_t vs1, size_t tm, size // CHECK-RV64-NEXT: ret void // void test_sf_mm_f_f_w1_f32m8(vfloat32m8_t vs2, vfloat32m8_t vs1, size_t tm, size_t tn, size_t tk) { - return __riscv_sf_mm_f_f_w1(0, vs2, vs1, tm, tn, tk); + return __riscv_sf_mm_w1(0, vs2, vs1, tm, tn, tk); } // CHECK-RV64-LABEL: define dso_local void @test_sf_mm_f_f_w1_f64m8( @@ -35,6 +35,6 @@ void test_sf_mm_f_f_w1_f32m8(vfloat32m8_t vs2, vfloat32m8_t vs1, size_t tm, size // CHECK-RV64-NEXT: ret void // void test_sf_mm_f_f_w1_f64m8(vfloat64m8_t vs2, vfloat64m8_t vs1, size_t tm, size_t tn, size_t tk) { - return __riscv_sf_mm_f_f_w1(0, vs2, vs1, tm, tn, tk); + return __riscv_sf_mm_w1(0, vs2, vs1, tm, tn, tk); } diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/overloaded/sf_mm_s_s.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/overloaded/sf_mm_s_s.c index 2d34f7d05060a..68cb007a5f5b8 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/overloaded/sf_mm_s_s.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/overloaded/sf_mm_s_s.c @@ -13,6 +13,6 @@ // CHECK-RV64-NEXT: ret void // void test_sf_mm_s_s_w4_i8m8_i8m8(vint8m8_t vs2, vint8m8_t vs1, size_t tm, size_t tn, size_t tk) { - return __riscv_sf_mm_s_s(0, vs2, vs1, tm, tn, tk); + return __riscv_sf_mm_w4(0, vs2, vs1, tm, tn, tk); } diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/overloaded/sf_mm_s_u.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/overloaded/sf_mm_s_u.c index 1f9bc33412871..4ed67810ec03f 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/overloaded/sf_mm_s_u.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/overloaded/sf_mm_s_u.c @@ -13,6 +13,6 @@ // CHECK-RV64-NEXT: ret void // void test_sf_mm_s_u_w4_i8m8_u8m8(vint8m8_t vs2, vuint8m8_t vs1, size_t tm, size_t tn, size_t tk) { - return __riscv_sf_mm_s_u(0, vs2, vs1, tm, tn, tk); + return __riscv_sf_mm_w4(0, vs2, vs1, tm, tn, tk); } diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/overloaded/sf_mm_u_s.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/overloaded/sf_mm_u_s.c index 2c6d538821fb2..601d0ede47c28 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/overloaded/sf_mm_u_s.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/overloaded/sf_mm_u_s.c @@ -13,6 +13,6 @@ // CHECK-RV64-NEXT: ret void // void test_sf_mm_u_s_w4_u8m8_i8m8(vuint8m8_t vs2, vint8m8_t vs1, size_t tm, size_t tn, size_t tk) { - return __riscv_sf_mm_u_s(0, vs2, vs1, tm, tn, tk); + return __riscv_sf_mm_w4(0, vs2, vs1, tm, tn, tk); } diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/overloaded/sf_mm_u_u.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/overloaded/sf_mm_u_u.c index bb1eaf19b04da..ad497e39f0a6f 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/overloaded/sf_mm_u_u.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-sifive/non-policy/overloaded/sf_mm_u_u.c @@ -13,6 +13,6 @@ // CHECK-RV64-NEXT: ret void // void test_sf_mm_u_u_w4_u8m8_u8m8(vuint8m8_t vs2, vuint8m8_t vs1, size_t tm, size_t tn, size_t tk) { - return __riscv_sf_mm_u_u(0, vs2, vs1, tm, tn, tk); + return __riscv_sf_mm_w4(0, vs2, vs1, tm, tn, tk); } diff --git a/clang/test/CodeGen/X86/avx512vlvnni-builtins.c b/clang/test/CodeGen/X86/avx512vlvnni-builtins.c index 11dbd717a9f77..a5cd72abea94a 100644 --- a/clang/test/CodeGen/X86/avx512vlvnni-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlvnni-builtins.c @@ -3,7 +3,13 @@ // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vnni -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vnni -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vnni -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vnni -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vnni -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vnni -target-feature +avx512vl -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s + #include +#include "builtin_test_helpers.h" __m256i test_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_dpbusd_epi32 @@ -11,6 +17,13 @@ __m256i test_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_dpbusd_epi32(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v8si( + _mm256_mask_dpbusd_epi32( + (__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800}, + (__mmask8)0x55, + (__m256i)(__v32qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24, 25,26,27,28, 29,30,31,32}, + (__m256i)(__v32qi){-1,2,-3,4, -5,6,-7,8, -9,10,-11,12, -13,14,-15,16, -17,18,-19,20, -21,22,-23,24, -25,26,-27,28, -29,30,-31,32}), + 110, 200, 342, 400, 574, 600, 806, 800)); __m256i test_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_dpbusd_epi32 @@ -18,12 +31,25 @@ __m256i test_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __ // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_dpbusd_epi32(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v8si( + _mm256_maskz_dpbusd_epi32( + (__mmask8)0x0F, + (__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800}, + (__m256i)(__v32qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24, 25,26,27,28, 29,30,31,32}, + (__m256i)(__v32qi){-1,2,-3,4, -5,6,-7,8, -9,10,-11,12, -13,14,-15,16, -17,18,-19,20, -21,22,-23,24, -25,26,-27,28, -29,30,-31,32}), + 110, 226, 342, 458, 0, 0, 0, 0)); __m256i test_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_dpbusd_epi32 // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}) return _mm256_dpbusd_epi32(__S, __A, __B); } +TEST_CONSTEXPR(match_v8si( + _mm256_dpbusd_epi32( + ((__m256i)(__v8si){2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1}), + ((__m256i)(__v32qu){255,1,8,15, 255,22,6,13, 255,20,4,11, 255,18,2,9, 0,16,23,7, 0,14,21,5, 0,12,19,3, 0,10,17,1}), + ((__m256i)(__v32qi){127,-6,-1,4, 127,9,-5,0, -128,5,-9,-4, -128,1,6,-8, 127,-3,2,7, 127,-7,-2,3, -128,8,-6,-1, -128,4,9,-5})), + -2147451218, -2147451095, 2147451027, 2147450966, -2147483602, 2147483523, 2147483626, -2147483460)); __m256i test_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_dpbusds_epi32 @@ -31,6 +57,13 @@ __m256i test_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __ // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_dpbusds_epi32(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v8si( + _mm256_mask_dpbusds_epi32( + (__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800}, + (__mmask8)0xAA, + (__m256i)(__v32qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24, 25,26,27,28, 29,30,31,32}, + (__m256i)(__v32qi){-1,2,-3,4, -5,6,-7,8, -9,10,-11,12, -13,14,-15,16, -17,18,-19,20, -21,22,-23,24, -25,26,-27,28, -29,30,-31,32}), + 100, 226, 300, 458, 500, 690, 700, 922)); __m256i test_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_dpbusds_epi32 @@ -38,12 +71,25 @@ __m256i test_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, _ // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_dpbusds_epi32(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v8si( + _mm256_maskz_dpbusds_epi32( + (__mmask8)0xFF, + (__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800}, + (__m256i)(__v32qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24, 25,26,27,28, 29,30,31,32}, + (__m256i)(__v32qi){-1,2,-3,4, -5,6,-7,8, -9,10,-11,12, -13,14,-15,16, -17,18,-19,20, -21,22,-23,24, -25,26,-27,28, -29,30,-31,32}), + 110, 226, 342, 458, 574, 690, 806, 922)); __m256i test_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_dpbusds_epi32 // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}) return _mm256_dpbusds_epi32(__S, __A, __B); } +TEST_CONSTEXPR(match_v8si( + _mm256_dpbusds_epi32( + ((__m256i)(__v8si){2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1}), + ((__m256i)(__v32qu){255,1,8,15, 255,22,6,13, 255,20,4,11, 255,18,2,9, 0,16,23,7, 0,14,21,5, 0,12,19,3, 0,10,17,1}), + ((__m256i)(__v32qi){127,-6,-1,4, 127,9,-5,0, -128,5,-9,-4, -128,1,6,-8, 127,-3,2,7, 127,-7,-2,3, -128,8,-6,-1, -128,4,9,-5})), + 2147483647, -2147451095, 2147451027, -2147483647-1, 2147483647, -2147483647-1, 2147483626, -2147483460)); __m256i test_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_dpwssd_epi32 @@ -51,6 +97,13 @@ __m256i test_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_dpwssd_epi32(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v8si( + _mm256_mask_dpwssd_epi32( + (__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800}, + (__mmask8)0xF0, + (__m256i)(__v16hi){1,2, 3,4, 5,6, 7,8, 9,10, 11,12, 13,14, 15,16}, + (__m256i)(__v16hi){-1,2, -3,4, -5,6, -7,8, -9,10, -11,12, -13,14, -15,16}), + 100, 200, 300, 400, 519, 623, 727, 831)); __m256i test_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_dpwssd_epi32 @@ -58,12 +111,25 @@ __m256i test_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __ // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_dpwssd_epi32(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v8si( + _mm256_maskz_dpwssd_epi32( + (__mmask8)0x0F, + (__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800}, + (__m256i)(__v16hi){1,2, 3,4, 5,6, 7,8, 9,10, 11,12, 13,14, 15,16}, + (__m256i)(__v16hi){-1,2, -3,4, -5,6, -7,8, -9,10, -11,12, -13,14, -15,16}), + 103, 207, 311, 415, 0, 0, 0, 0)); __m256i test_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_dpwssd_epi32 // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}) return _mm256_dpwssd_epi32(__S, __A, __B); } +TEST_CONSTEXPR(match_v8si( + _mm256_dpwssd_epi32( + ((__m256i)(__v8si){2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1}), + ((__m256i)(__v16hi){32767,1, 32767,8, 32767,15, 32767,22, -32768,6, -32768,13, -32768,20, -32768,4}), + ((__m256i)(__v16hi){32767,-6, 32767,-1, -32768,4, -32768,9, 32767,-5, 32767,0, -32768,5, -32768,-9})), + -1073807366, -1073807367, 1073774651, 1073774790, 1073774561, 1073774592, -1073741725, -1073741860)); __m256i test_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_dpwssds_epi32 @@ -71,6 +137,13 @@ __m256i test_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __ // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_dpwssds_epi32(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v8si( + _mm256_mask_dpwssds_epi32( + (__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800}, + (__mmask8)0xAA, + (__m256i)(__v16hi){1,2, 3,4, 5,6, 7,8, 9,10, 11,12, 13,14, 15,16}, + (__m256i)(__v16hi){-1,2, -3,4, -5,6, -7,8, -9,10, -11,12, -13,14, -15,16}), + 100, 207, 300, 415, 500, 623, 700, 831)); __m256i test_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_dpwssds_epi32 @@ -78,12 +151,25 @@ __m256i test_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, _ // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_dpwssds_epi32(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v8si( + _mm256_maskz_dpwssds_epi32( + (__mmask8)0xFF, + (__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800}, + (__m256i)(__v16hi){1,2, 3,4, 5,6, 7,8, 9,10, 11,12, 13,14, 15,16}, + (__m256i)(__v16hi){-1,2, -3,4, -5,6, -7,8, -9,10, -11,12, -13,14, -15,16}), + 103, 207, 311, 415, 519, 623, 727, 831)); __m256i test_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_dpwssds_epi32 // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}) return _mm256_dpwssds_epi32(__S, __A, __B); } +TEST_CONSTEXPR(match_v8si( + _mm256_dpwssds_epi32( + ((__m256i)(__v8si){2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1}), + ((__m256i)(__v16hi){32767,1, 32767,8, 32767,15, 32767,22, -32768,6, -32768,13, -32768,20, -32768,4}), + ((__m256i)(__v16hi){32767,-6, 32767,-1, -32768,4, -32768,9, 32767,-5, 32767,0, -32768,5, -32768,-9})), + 2147483647, -1073807367, 1073774651, -2147483647-1, 1073774561, -2147483647-1, 2147483647, -1073741860)); __m128i test_mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_dpbusd_epi32 @@ -91,6 +177,13 @@ __m128i test_mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128 // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_dpbusd_epi32(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v4si( + _mm_mask_dpbusd_epi32( + (__m128i)(__v4si){1000, 2000, 3000, 4000}, + (__mmask8)0x05, + (__m128i)(__v16qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16}, + (__m128i)(__v16qi){-1,2,-3,4, -5,6,-7,8, -9,10,-11,12, -13,14,-15,16}), + 1010, 2000, 3042, 4000)); __m128i test_mm_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_dpbusd_epi32 @@ -98,12 +191,31 @@ __m128i test_mm_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m12 // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_dpbusd_epi32(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v4si( + _mm_maskz_dpbusd_epi32( + (__mmask8)0x03, + (__m128i)(__v4si){1000, 2000, 3000, 4000}, + (__m128i)(__v16qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16}, + (__m128i)(__v16qi){-1,2,-3,4, -5,6,-7,8, -9,10,-11,12, -13,14,-15,16}), + 1010, 2026, 0, 0)); __m128i test_mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_dpbusd_epi32 // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_dpbusd_epi32(__S, __A, __B); } +TEST_CONSTEXPR(match_v4si( + _mm_dpbusd_epi32( + ((__m128i)(__v4si){2147483647, -2147483647-1, 2147483647, -2147483647-1}), + ((__m128i)(__v16qu){255,1,8,15, 255,22,6,13, 0,20,4,11, 0,18,2,9}), + ((__m128i)(__v16qi){127,-6,-1,4, -128,9,-5,0, 127,5,-9,-4, -128,1,6,-8})), + -2147451218, 2147451176, -2147483629, 2147483606)); +TEST_CONSTEXPR(match_v4si( + _mm_dpbusd_epi32( + ((__m128i)(__v4si){-2147483647-1, 2147483647, -2147483647-1, 2147483647}), + ((__m128i)(__v16qu){255,1,8,15, 255,22,6,13, 0,20,4,11, 0,18,2,9}), + ((__m128i)(__v16qi){127,-6,-1,4, -128,9,-5,0, 127,5,-9,-4, -128,1,6,-8})), + -2147451217, 2147451175, -2147483628, 2147483605)); __m128i test_mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_dpbusds_epi32 @@ -111,6 +223,13 @@ __m128i test_mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m12 // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_dpbusds_epi32(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v4si( + _mm_mask_dpbusds_epi32( + (__m128i)(__v4si){1000, 2000, 3000, 4000}, + (__mmask8)0x0A, + (__m128i)(__v16qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16}, + (__m128i)(__v16qi){-1,2,-3,4, -5,6,-7,8, -9,10,-11,12, -13,14,-15,16}), + 1000, 2026, 3000, 4058)); __m128i test_mm_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_dpbusds_epi32 @@ -118,12 +237,31 @@ __m128i test_mm_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m1 // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_dpbusds_epi32(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v4si( + _mm_maskz_dpbusds_epi32( + (__mmask8)0x0F, + (__m128i)(__v4si){1000, 2000, 3000, 4000}, + (__m128i)(__v16qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16}, + (__m128i)(__v16qi){-1,2,-3,4, -5,6,-7,8, -9,10,-11,12, -13,14,-15,16}), + 1010, 2026, 3042, 4058)); __m128i test_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_dpbusds_epi32 // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_dpbusds_epi32(__S, __A, __B); } +TEST_CONSTEXPR(match_v4si( + _mm_dpbusds_epi32( + ((__m128i)(__v4si){2147483647, -2147483647-1, 2147483647, -2147483647-1}), + ((__m128i)(__v16qu){255,1,8,15, 255,22,6,13, 0,20,4,11, 0,18,2,9}), + ((__m128i)(__v16qi){127,-6,-1,4, -128,9,-5,0, 127,5,-9,-4, -128,1,6,-8})), + 2147483647, -2147483647-1, 2147483647, -2147483647-1)); +TEST_CONSTEXPR(match_v4si( + _mm_dpbusds_epi32( + ((__m128i)(__v4si){-2147483647-1, 2147483647, -2147483647-1, 2147483647}), + ((__m128i)(__v16qu){255,1,8,15, 255,22,6,13, 0,20,4,11, 0,18,2,9}), + ((__m128i)(__v16qi){127,-6,-1,4, -128,9,-5,0, 127,5,-9,-4, -128,1,6,-8})), + -2147451217, 2147451175, -2147483628, 2147483605)); __m128i test_mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_dpwssd_epi32 @@ -131,6 +269,13 @@ __m128i test_mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128 // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_dpwssd_epi32(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v4si( + _mm_mask_dpwssd_epi32( + (__m128i)(__v4si){1000, 2000, 3000, 4000}, + (__mmask8)0x05, + (__m128i)(__v8hi){1,2, 3,4, 5,6, 7,8}, + (__m128i)(__v8hi){-1,2, -3,4, -5,6, -7,8}), + 1003, 2000, 3011, 4000)); __m128i test_mm_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_dpwssd_epi32 @@ -138,12 +283,31 @@ __m128i test_mm_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m12 // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_dpwssd_epi32(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v4si( + _mm_maskz_dpwssd_epi32( + (__mmask8)0x03, + (__m128i)(__v4si){1000, 2000, 3000, 4000}, + (__m128i)(__v8hi){1,2, 3,4, 5,6, 7,8}, + (__m128i)(__v8hi){-1,2, -3,4, -5,6, -7,8}), + 1003, 2007, 0, 0)); __m128i test_mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_dpwssd_epi32 // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}) return _mm_dpwssd_epi32(__S, __A, __B); } +TEST_CONSTEXPR(match_v4si( + _mm_dpwssd_epi32( + ((__m128i)(__v4si){2147483647, -2147483647-1, 2147483647, -2147483647-1}), + ((__m128i)(__v8hi){32767,1, 32767,8, -32768,15, -32768,22}), + ((__m128i)(__v8hi){32767,-6, -32768,-1, 32767,4, -32768,9})), + -1073807366, 1073774584, 1073774651, -1073741626)); +TEST_CONSTEXPR(match_v4si( + _mm_dpwssd_epi32( + ((__m128i)(__v4si){-2147483647-1, 2147483647, -2147483647-1, 2147483647}), + ((__m128i)(__v8hi){32767,1, 32767,8, -32768,15, -32768,22}), + ((__m128i)(__v8hi){32767,-6, -32768,-1, 32767,4, -32768,9})), + -1073807365, 1073774583, 1073774652, -1073741627)); __m128i test_mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_dpwssds_epi32 @@ -151,6 +315,13 @@ __m128i test_mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m12 // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_dpwssds_epi32(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v4si( + _mm_mask_dpwssds_epi32( + (__m128i)(__v4si){1000, 2000, 3000, 4000}, + (__mmask8)0x0A, + (__m128i)(__v8hi){1,2, 3,4, 5,6, 7,8}, + (__m128i)(__v8hi){-1,2, -3,4, -5,6, -7,8}), + 1000, 2007, 3000, 4015)); __m128i test_mm_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_dpwssds_epi32 @@ -158,10 +329,28 @@ __m128i test_mm_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m1 // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_dpwssds_epi32(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v4si( + _mm_maskz_dpwssds_epi32( + (__mmask8)0x0F, + (__m128i)(__v4si){1000, 2000, 3000, 4000}, + (__m128i)(__v8hi){1,2, 3,4, 5,6, 7,8}, + (__m128i)(__v8hi){-1,2, -3,4, -5,6, -7,8}), + 1003, 2007, 3011, 4015)); __m128i test_mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_dpwssds_epi32 // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}) return _mm_dpwssds_epi32(__S, __A, __B); } - +TEST_CONSTEXPR(match_v4si( + _mm_dpwssds_epi32( + ((__m128i)(__v4si){2147483647, -2147483647-1, 2147483647, -2147483647-1}), + ((__m128i)(__v8hi){32767,1, 32767,8, -32768,15, -32768,22}), + ((__m128i)(__v8hi){32767,-6, -32768,-1, 32767,4, -32768,9})), + 2147483647, -2147483647-1, 1073774651, -1073741626)); +TEST_CONSTEXPR(match_v4si( + _mm_dpwssds_epi32( + ((__m128i)(__v4si){-2147483647-1, 2147483647, -2147483647-1, 2147483647}), + ((__m128i)(__v8hi){32767,1, 32767,8, -32768,15, -32768,22}), + ((__m128i)(__v8hi){32767,-6, -32768,-1, 32767,4, -32768,9})), + -1073807365, 1073774583, -2147483647-1, 2147483647)); diff --git a/clang/test/CodeGen/X86/avx512vnni-builtins.c b/clang/test/CodeGen/X86/avx512vnni-builtins.c index 6b8465206eedb..c1a8229e53669 100644 --- a/clang/test/CodeGen/X86/avx512vnni-builtins.c +++ b/clang/test/CodeGen/X86/avx512vnni-builtins.c @@ -3,7 +3,13 @@ // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vnni -emit-llvm -o - -Wall -Werror | FileCheck %s // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vnni -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vnni -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vnni -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vnni -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx512vnni -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s + #include +#include "builtin_test_helpers.h" __m512i test_mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_dpbusd_epi32 @@ -11,6 +17,13 @@ __m512i test_mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_dpbusd_epi32(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v16si( + _mm512_mask_dpbusd_epi32( + (__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600}, + (__mmask16)0x5555, + (__m512i)(__v64qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24, 25,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40, 41,42,43,44, 45,46,47,48, 49,50,51,52, 53,54,55,56, 57,58,59,60, 61,62,63,64}, + (__m512i)(__v64qi){-1,2,-3,4, -5,6,-7,8, -9,10,-11,12, -13,14,-15,16, -17,18,-19,20, -21,22,-23,24, -25,26,-27,28, -29,30,-31,32, -33,34,-35,36, -37,38,-39,40, -41,42,-43,44, -45,46,-47,48, -49,50,-51,52, -53,54,-55,56, -57,58,-59,60, -61,62,-63,64}), + 110, 200, 342, 400, 574, 600, 806, 800, 1038, 1000, 1270, 1200, 1502, 1400, 1734, 1600)); __m512i test_mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_dpbusd_epi32 @@ -18,12 +31,25 @@ __m512i test_mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, _ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_dpbusd_epi32(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v16si( + _mm512_maskz_dpbusd_epi32( + (__mmask16)0x00FF, + (__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600}, + (__m512i)(__v64qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24, 25,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40, 41,42,43,44, 45,46,47,48, 49,50,51,52, 53,54,55,56, 57,58,59,60, 61,62,63,64}, + (__m512i)(__v64qi){-1,2,-3,4, -5,6,-7,8, -9,10,-11,12, -13,14,-15,16, -17,18,-19,20, -21,22,-23,24, -25,26,-27,28, -29,30,-31,32, -33,34,-35,36, -37,38,-39,40, -41,42,-43,44, -45,46,-47,48, -49,50,-51,52, -53,54,-55,56, -57,58,-59,60, -61,62,-63,64}), + 110, 226, 342, 458, 574, 690, 806, 922, 0, 0, 0, 0, 0, 0, 0, 0)); __m512i test_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_dpbusd_epi32 // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}) return _mm512_dpbusd_epi32(__S, __A, __B); } +TEST_CONSTEXPR(match_v16si( + _mm512_dpbusd_epi32( + ((__m512i)(__v16si){2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 100, -50, 0, 1000, 7, -1000, 42, 2147483640}), + ((__m512i)(__v64qu){255,1,8,15, 255,22,6,13, 255,20,4,11, 255,18,2,9, 0,16,23,7, 0,14,21,5, 0,12,19,3, 0,10,17,1, 11,18,2,9, 16,23,7,14, 21,5,12,19, 3,10,17,1, 8,15,22,6, 13,20,4,11, 18,2,9,16, 23,7,14,21}), + ((__m512i)(__v64qi){127,-6,-1,4, 127,9,-5,0, -128,5,-9,-4, -128,1,6,-8, 127,-3,2,7, 127,-7,-2,3, -128,8,-6,-1, -128,4,9,-5, 0,5,-9,-4, 1,6,-8,-3, 2,7,-7,-2, 3,8,-6,-1, 4,9,-5,0, 5,-9,-4,1, 6,-8,-3,2, 7,-7,-2,3})), + -2147451218, -2147451095, 2147451027, 2147450966, -2147483602, 2147483523, 2147483626, -2147483460, 136, 6, -45, 986, 64, -1120, 139, -2147483509)); __m512i test_mm512_mask_dpbusds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_dpbusds_epi32 @@ -31,6 +57,13 @@ __m512i test_mm512_mask_dpbusds_epi32(__m512i __S, __mmask16 __U, __m512i __A, _ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_dpbusds_epi32(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v16si( + _mm512_mask_dpbusds_epi32( + (__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600}, + (__mmask16)0x5555, + (__m512i)(__v64qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24, 25,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40, 41,42,43,44, 45,46,47,48, 49,50,51,52, 53,54,55,56, 57,58,59,60, 61,62,63,64}, + (__m512i)(__v64qi){-1,2,-3,4, -5,6,-7,8, -9,10,-11,12, -13,14,-15,16, -17,18,-19,20, -21,22,-23,24, -25,26,-27,28, -29,30,-31,32, -33,34,-35,36, -37,38,-39,40, -41,42,-43,44, -45,46,-47,48, -49,50,-51,52, -53,54,-55,56, -57,58,-59,60, -61,62,-63,64}), + 110, 200, 342, 400, 574, 600, 806, 800, 1038, 1000, 1270, 1200, 1502, 1400, 1734, 1600)); __m512i test_mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_dpbusds_epi32 @@ -38,12 +71,25 @@ __m512i test_mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A, // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_dpbusds_epi32(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v16si( + _mm512_maskz_dpbusds_epi32( + (__mmask16)0x00FF, + (__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600}, + (__m512i)(__v64qu){1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24, 25,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40, 41,42,43,44, 45,46,47,48, 49,50,51,52, 53,54,55,56, 57,58,59,60, 61,62,63,64}, + (__m512i)(__v64qi){-1,2,-3,4, -5,6,-7,8, -9,10,-11,12, -13,14,-15,16, -17,18,-19,20, -21,22,-23,24, -25,26,-27,28, -29,30,-31,32, -33,34,-35,36, -37,38,-39,40, -41,42,-43,44, -45,46,-47,48, -49,50,-51,52, -53,54,-55,56, -57,58,-59,60, -61,62,-63,64}), + 110, 226, 342, 458, 574, 690, 806, 922, 0, 0, 0, 0, 0, 0, 0, 0)); __m512i test_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_dpbusds_epi32 // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}) return _mm512_dpbusds_epi32(__S, __A, __B); } +TEST_CONSTEXPR(match_v16si( + _mm512_dpbusds_epi32( + ((__m512i)(__v16si){2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 100, -50, 0, 1000, 7, -1000, 42, 2147483640}), + ((__m512i)(__v64qu){255,1,8,15, 255,22,6,13, 255,20,4,11, 255,18,2,9, 0,16,23,7, 0,14,21,5, 0,12,19,3, 0,10,17,1, 11,18,2,9, 16,23,7,14, 21,5,12,19, 3,10,17,1, 8,15,22,6, 13,20,4,11, 18,2,9,16, 23,7,14,21}), + ((__m512i)(__v64qi){127,-6,-1,4, 127,9,-5,0, -128,5,-9,-4, -128,1,6,-8, 127,-3,2,7, 127,-7,-2,3, -128,8,-6,-1, -128,4,9,-5, 0,5,-9,-4, 1,6,-8,-3, 2,7,-7,-2, 3,8,-6,-1, 4,9,-5,0, 5,-9,-4,1, 6,-8,-3,2, 7,-7,-2,3})), + 2147483647, -2147451095, 2147451027, -2147483647-1, 2147483647, -2147483647-1, 2147483626, -2147483460, 136, 6, -45, 986, 64, -1120, 139, 2147483647)); __m512i test_mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_dpwssd_epi32 @@ -51,6 +97,13 @@ __m512i test_mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_dpwssd_epi32(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v16si( + _mm512_mask_dpwssd_epi32( + (__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600}, + (__mmask16)0xFF00, + (__m512i)(__v32hi){1,2, 3,4, 5,6, 7,8, 9,10, 11,12, 13,14, 15,16, 17,18, 19,20, 21,22, 23,24, 25,26, 27,28, 29,30, 31,32}, + (__m512i)(__v32hi){-1,2, -3,4, -5,6, -7,8, -9,10, -11,12, -13,14, -15,16, -17,18, -19,20, -21,22, -23,24, -25,26, -27,28, -29,30, -31,32}), + 100, 200, 300, 400, 500, 600, 700, 800, 935, 1039, 1143, 1247, 1351, 1455, 1559, 1663)); __m512i test_mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_dpwssd_epi32 @@ -58,12 +111,25 @@ __m512i test_mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, _ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_dpwssd_epi32(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v16si( + _mm512_maskz_dpwssd_epi32( + (__mmask16)0x000F, + (__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600}, + (__m512i)(__v32hi){1,2, 3,4, 5,6, 7,8, 9,10, 11,12, 13,14, 15,16, 17,18, 19,20, 21,22, 23,24, 25,26, 27,28, 29,30, 31,32}, + (__m512i)(__v32hi){-1,2, -3,4, -5,6, -7,8, -9,10, -11,12, -13,14, -15,16, -17,18, -19,20, -21,22, -23,24, -25,26, -27,28, -29,30, -31,32}), + 103, 207, 311, 415, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m512i test_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_dpwssd_epi32 // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpwssd.512(<16 x i32> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}) return _mm512_dpwssd_epi32(__S, __A, __B); } +TEST_CONSTEXPR(match_v16si( + _mm512_dpwssd_epi32( + ((__m512i)(__v16si){2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 100, -50, 0, 1000, 7, -1000, 42, 2147483640}), + ((__m512i)(__v32hi){32767,1, 32767,8, 32767,15, 32767,22, -32768,6, -32768,13, -32768,20, -32768,4, 11,18, 2,9, 16,23, 7,14, 21,5, 12,19, 3,10, 17,1}), + ((__m512i)(__v32hi){32767,-6, 32767,-1, -32768,4, -32768,9, 32767,-5, 32767,0, -32768,5, -32768,-9, 0,5, -9,-4, 1,6, -8,-3, 2,7, -7,-2, 3,8, -6,-1})), + -1073807366, -1073807367, 1073774651, 1073774790, 1073774561, 1073774592, -1073741725, -1073741860, 190, -104, 154, 902, 84, -1122, 131, 2147483537)); __m512i test_mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_dpwssds_epi32 @@ -71,6 +137,13 @@ __m512i test_mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A, _ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_dpwssds_epi32(__S, __U, __A, __B); } +TEST_CONSTEXPR(match_v16si( + _mm512_mask_dpwssds_epi32( + (__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600}, + (__mmask16)0xAAAA, + (__m512i)(__v32hi){1,2, 3,4, 5,6, 7,8, 9,10, 11,12, 13,14, 15,16, 17,18, 19,20, 21,22, 23,24, 25,26, 27,28, 29,30, 31,32}, + (__m512i)(__v32hi){-1,2, -3,4, -5,6, -7,8, -9,10, -11,12, -13,14, -15,16, -17,18, -19,20, -21,22, -23,24, -25,26, -27,28, -29,30, -31,32}), + 100, 207, 300, 415, 500, 623, 700, 831, 900, 1039, 1100, 1247, 1300, 1455, 1500, 1663)); __m512i test_mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_dpwssds_epi32 @@ -78,10 +151,22 @@ __m512i test_mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A, // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_dpwssds_epi32(__U, __S, __A, __B); } +TEST_CONSTEXPR(match_v16si( + _mm512_maskz_dpwssds_epi32( + (__mmask16)0xFFFF, + (__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600}, + (__m512i)(__v32hi){1,2, 3,4, 5,6, 7,8, 9,10, 11,12, 13,14, 15,16, 17,18, 19,20, 21,22, 23,24, 25,26, 27,28, 29,30, 31,32}, + (__m512i)(__v32hi){-1,2, -3,4, -5,6, -7,8, -9,10, -11,12, -13,14, -15,16, -17,18, -19,20, -21,22, -23,24, -25,26, -27,28, -29,30, -31,32}), + 103, 207, 311, 415, 519, 623, 727, 831, 935, 1039, 1143, 1247, 1351, 1455, 1559, 1663)); __m512i test_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_dpwssds_epi32 // CHECK: call <16 x i32> @llvm.x86.avx512.vpdpwssds.512(<16 x i32> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}) return _mm512_dpwssds_epi32(__S, __A, __B); } - +TEST_CONSTEXPR(match_v16si( + _mm512_dpwssds_epi32( + ((__m512i)(__v16si){2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 100, -50, 0, 1000, 7, -1000, 42, 2147483640}), + ((__m512i)(__v32hi){32767,1, 32767,8, 32767,15, 32767,22, -32768,6, -32768,13, -32768,20, -32768,4, 11,18, 2,9, 16,23, 7,14, 21,5, 12,19, 3,10, 17,1}), + ((__m512i)(__v32hi){32767,-6, 32767,-1, -32768,4, -32768,9, 32767,-5, 32767,0, -32768,5, -32768,-9, 0,5, -9,-4, 1,6, -8,-3, 2,7, -7,-2, 3,8, -6,-1})), + 2147483647, -1073807367, 1073774651, -2147483647-1, 1073774561, -2147483647-1, 2147483647, -1073741860, 190, -104, 154, 902, 84, -1122, 131, 2147483537)); diff --git a/clang/test/CodeGen/X86/avxvnni-builtins.c b/clang/test/CodeGen/X86/avxvnni-builtins.c index 6557a26807eb2..a0297fc729bda 100644 --- a/clang/test/CodeGen/X86/avxvnni-builtins.c +++ b/clang/test/CodeGen/X86/avxvnni-builtins.c @@ -3,100 +3,262 @@ // RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avxvnni -emit-llvm -o - -Wall -Werror | FileCheck %s // RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-apple-darwin -target-feature +avxvnni -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avxvnni -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avxvnni -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=i386-apple-darwin -target-feature +avxvnni -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s +// RUN: %clang_cc1 -x c++ -ffreestanding %s -triple=i386-apple-darwin -target-feature +avxvnni -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s + #include +#include "builtin_test_helpers.h" __m256i test_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_dpbusd_epi32 // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}) return _mm256_dpbusd_epi32(__S, __A, __B); } +TEST_CONSTEXPR(match_v8si( + _mm256_dpbusd_epi32( + ((__m256i)(__v8si){2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1}), + ((__m256i)(__v32qu){255,1,8,15, 255,22,6,13, 255,20,4,11, 255,18,2,9, 0,16,23,7, 0,14,21,5, 0,12,19,3, 0,10,17,1}), + ((__m256i)(__v32qi){127,-6,-1,4, 127,9,-5,0, -128,5,-9,-4, -128,1,6,-8, 127,-3,2,7, 127,-7,-2,3, -128,8,-6,-1, -128,4,9,-5})), + -2147451218, -2147451095, 2147451027, 2147450966, -2147483602, 2147483523, 2147483626, -2147483460)); __m256i test_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_dpbusds_epi32 // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}) return _mm256_dpbusds_epi32(__S, __A, __B); } +TEST_CONSTEXPR(match_v8si( + _mm256_dpbusds_epi32( + ((__m256i)(__v8si){2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1}), + ((__m256i)(__v32qu){255,1,8,15, 255,22,6,13, 255,20,4,11, 255,18,2,9, 0,16,23,7, 0,14,21,5, 0,12,19,3, 0,10,17,1}), + ((__m256i)(__v32qi){127,-6,-1,4, 127,9,-5,0, -128,5,-9,-4, -128,1,6,-8, 127,-3,2,7, 127,-7,-2,3, -128,8,-6,-1, -128,4,9,-5})), + 2147483647, -2147451095, 2147451027, -2147483647-1, 2147483647, -2147483647-1, 2147483626, -2147483460)); __m256i test_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_dpwssd_epi32 // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}) return _mm256_dpwssd_epi32(__S, __A, __B); } +TEST_CONSTEXPR(match_v8si( + _mm256_dpwssd_epi32( + ((__m256i)(__v8si){2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1}), + ((__m256i)(__v16hi){32767,1, 32767,8, 32767,15, 32767,22, -32768,6, -32768,13, -32768,20, -32768,4}), + ((__m256i)(__v16hi){32767,-6, 32767,-1, -32768,4, -32768,9, 32767,-5, 32767,0, -32768,5, -32768,-9})), + -1073807366, -1073807367, 1073774651, 1073774790, 1073774561, 1073774592, -1073741725, -1073741860)); __m256i test_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_dpwssds_epi32 // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}) return _mm256_dpwssds_epi32(__S, __A, __B); } +TEST_CONSTEXPR(match_v8si( + _mm256_dpwssds_epi32( + ((__m256i)(__v8si){2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1}), + ((__m256i)(__v16hi){32767,1, 32767,8, 32767,15, 32767,22, -32768,6, -32768,13, -32768,20, -32768,4}), + ((__m256i)(__v16hi){32767,-6, 32767,-1, -32768,4, -32768,9, 32767,-5, 32767,0, -32768,5, -32768,-9})), + 2147483647, -1073807367, 1073774651, -2147483647-1, 1073774561, -2147483647-1, 2147483647, -1073741860)); +TEST_CONSTEXPR(match_v8si( + _mm256_dpwssds_epi32( + ((__m256i)(__v8si){-2147483647-1, -2147483647, -1000000000, -100, -1, 0, 1000, 2147483647}), + ((__m256i)(__v16hi){-32768,-32768, -32768,-32768, -32768,-32768, -32768,-32768, -32768,-32768, -32768,-32768, -32768,-32768, -32768,-32768}), + ((__m256i)(__v16hi){-32768,-32768, -32768,-32768, -32768,-32768, -32768,-32768, -32768,-32768, -32768,-32768, -32768,-32768, -32768,-32768})), + 0, 1, 1147483648, 2147483548, 2147483647, 2147483647, 2147483647, 2147483647)); __m128i test_mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_dpbusd_epi32 // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_dpbusd_epi32(__S, __A, __B); } +TEST_CONSTEXPR(match_v4si( + _mm_dpbusd_epi32( + ((__m128i)(__v4si){2147483647, -2147483647-1, 2147483647, -2147483647-1}), + ((__m128i)(__v16qu){255,1,8,15, 255,22,6,13, 0,20,4,11, 0,18,2,9}), + ((__m128i)(__v16qi){127,-6,-1,4, -128,9,-5,0, 127,5,-9,-4, -128,1,6,-8})), + -2147451218, 2147451176, -2147483629, 2147483606)); +TEST_CONSTEXPR(match_v4si( + _mm_dpbusd_epi32( + ((__m128i)(__v4si){-2147483647-1, 2147483647, -2147483647-1, 2147483647}), + ((__m128i)(__v16qu){255,1,8,15, 255,22,6,13, 0,20,4,11, 0,18,2,9}), + ((__m128i)(__v16qi){127,-6,-1,4, -128,9,-5,0, 127,5,-9,-4, -128,1,6,-8})), + -2147451217, 2147451175, -2147483628, 2147483605)); __m128i test_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_dpbusds_epi32 // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_dpbusds_epi32(__S, __A, __B); } +TEST_CONSTEXPR(match_v4si( + _mm_dpbusds_epi32( + ((__m128i)(__v4si){2147483647, -2147483647-1, 2147483647, -2147483647-1}), + ((__m128i)(__v16qu){255,1,8,15, 255,22,6,13, 0,20,4,11, 0,18,2,9}), + ((__m128i)(__v16qi){127,-6,-1,4, -128,9,-5,0, 127,5,-9,-4, -128,1,6,-8})), + 2147483647, -2147483647-1, 2147483647, -2147483647-1)); +TEST_CONSTEXPR(match_v4si( + _mm_dpbusds_epi32( + ((__m128i)(__v4si){-2147483647-1, 2147483647, -2147483647-1, 2147483647}), + ((__m128i)(__v16qu){255,1,8,15, 255,22,6,13, 0,20,4,11, 0,18,2,9}), + ((__m128i)(__v16qi){127,-6,-1,4, -128,9,-5,0, 127,5,-9,-4, -128,1,6,-8})), + -2147451217, 2147451175, -2147483628, 2147483605)); __m128i test_mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_dpwssd_epi32 // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}) return _mm_dpwssd_epi32(__S, __A, __B); } +TEST_CONSTEXPR(match_v4si( + _mm_dpwssd_epi32( + ((__m128i)(__v4si){2147483647, -2147483647-1, 2147483647, -2147483647-1}), + ((__m128i)(__v8hi){32767,1, 32767,8, -32768,15, -32768,22}), + ((__m128i)(__v8hi){32767,-6, -32768,-1, 32767,4, -32768,9})), + -1073807366, 1073774584, 1073774651, -1073741626)); +TEST_CONSTEXPR(match_v4si( + _mm_dpwssd_epi32( + ((__m128i)(__v4si){-2147483647-1, 2147483647, -2147483647-1, 2147483647}), + ((__m128i)(__v8hi){32767,1, 32767,8, -32768,15, -32768,22}), + ((__m128i)(__v8hi){32767,-6, -32768,-1, 32767,4, -32768,9})), + -1073807365, 1073774583, 1073774652, -1073741627)); __m128i test_mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_dpwssds_epi32 // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}) return _mm_dpwssds_epi32(__S, __A, __B); } +TEST_CONSTEXPR(match_v4si( + _mm_dpwssds_epi32( + ((__m128i)(__v4si){2147483647, -2147483647-1, 2147483647, -2147483647-1}), + ((__m128i)(__v8hi){32767,1, 32767,8, -32768,15, -32768,22}), + ((__m128i)(__v8hi){32767,-6, -32768,-1, 32767,4, -32768,9})), + 2147483647, -2147483647-1, 1073774651, -1073741626)); +TEST_CONSTEXPR(match_v4si( + _mm_dpwssds_epi32( + ((__m128i)(__v4si){-2147483647-1, 2147483647, -2147483647-1, 2147483647}), + ((__m128i)(__v8hi){32767,1, 32767,8, -32768,15, -32768,22}), + ((__m128i)(__v8hi){32767,-6, -32768,-1, 32767,4, -32768,9})), + -1073807365, 1073774583, -2147483647-1, 2147483647)); +TEST_CONSTEXPR(match_v4si( + _mm_dpwssds_epi32( + ((__m128i)(__v4si){-2147483647-1, -1000000000, -1, 1000}), + ((__m128i)(__v8hi){-32768,-32768, -32768,-32768, -32768,-32768, -32768,-32768}), + ((__m128i)(__v8hi){-32768,-32768, -32768,-32768, -32768,-32768, -32768,-32768})), + 0, 1147483648, 2147483647, 2147483647)); __m256i test_mm256_dpbusd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_dpbusd_avx_epi32 // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}) return _mm256_dpbusd_avx_epi32(__S, __A, __B); } +TEST_CONSTEXPR(match_v8si( + _mm256_dpbusd_avx_epi32( + ((__m256i)(__v8si){2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1}), + ((__m256i)(__v32qu){255,1,8,15, 255,22,6,13, 255,20,4,11, 255,18,2,9, 0,16,23,7, 0,14,21,5, 0,12,19,3, 0,10,17,1}), + ((__m256i)(__v32qi){127,-6,-1,4, 127,9,-5,0, -128,5,-9,-4, -128,1,6,-8, 127,-3,2,7, 127,-7,-2,3, -128,8,-6,-1, -128,4,9,-5})), + -2147451218, -2147451095, 2147451027, 2147450966, -2147483602, 2147483523, 2147483626, -2147483460)); __m256i test_mm256_dpbusds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_dpbusds_avx_epi32 // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}) return _mm256_dpbusds_avx_epi32(__S, __A, __B); } +TEST_CONSTEXPR(match_v8si( + _mm256_dpbusds_avx_epi32( + ((__m256i)(__v8si){2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1}), + ((__m256i)(__v32qu){255,1,8,15, 255,22,6,13, 255,20,4,11, 255,18,2,9, 0,16,23,7, 0,14,21,5, 0,12,19,3, 0,10,17,1}), + ((__m256i)(__v32qi){127,-6,-1,4, 127,9,-5,0, -128,5,-9,-4, -128,1,6,-8, 127,-3,2,7, 127,-7,-2,3, -128,8,-6,-1, -128,4,9,-5})), + 2147483647, -2147451095, 2147451027, -2147483647-1, 2147483647, -2147483647-1, 2147483626, -2147483460)); __m256i test_mm256_dpwssd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_dpwssd_avx_epi32 // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}) return _mm256_dpwssd_avx_epi32(__S, __A, __B); } +TEST_CONSTEXPR(match_v8si( + _mm256_dpwssd_avx_epi32( + ((__m256i)(__v8si){2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1}), + ((__m256i)(__v16hi){32767,1, 32767,8, 32767,15, 32767,22, -32768,6, -32768,13, -32768,20, -32768,4}), + ((__m256i)(__v16hi){32767,-6, 32767,-1, -32768,4, -32768,9, 32767,-5, 32767,0, -32768,5, -32768,-9})), + -1073807366, -1073807367, 1073774651, 1073774790, 1073774561, 1073774592, -1073741725, -1073741860)); __m256i test_mm256_dpwssds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_dpwssds_avx_epi32 // CHECK: call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}) return _mm256_dpwssds_avx_epi32(__S, __A, __B); } +TEST_CONSTEXPR(match_v8si( + _mm256_dpwssds_avx_epi32( + ((__m256i)(__v8si){2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1, 2147483647, -2147483647-1}), + ((__m256i)(__v16hi){32767,1, 32767,8, 32767,15, 32767,22, -32768,6, -32768,13, -32768,20, -32768,4}), + ((__m256i)(__v16hi){32767,-6, 32767,-1, -32768,4, -32768,9, 32767,-5, 32767,0, -32768,5, -32768,-9})), + 2147483647, -1073807367, 1073774651, -2147483647-1, 1073774561, -2147483647-1, 2147483647, -1073741860)); __m128i test_mm_dpbusd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_dpbusd_avx_epi32 // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_dpbusd_avx_epi32(__S, __A, __B); } +TEST_CONSTEXPR(match_v4si( + _mm_dpbusd_avx_epi32( + ((__m128i)(__v4si){2147483647, -2147483647-1, 2147483647, -2147483647-1}), + ((__m128i)(__v16qu){255,1,8,15, 255,22,6,13, 0,20,4,11, 0,18,2,9}), + ((__m128i)(__v16qi){127,-6,-1,4, -128,9,-5,0, 127,5,-9,-4, -128,1,6,-8})), + -2147451218, 2147451176, -2147483629, 2147483606)); +TEST_CONSTEXPR(match_v4si( + _mm_dpbusd_avx_epi32( + ((__m128i)(__v4si){-2147483647-1, 2147483647, -2147483647-1, 2147483647}), + ((__m128i)(__v16qu){255,1,8,15, 255,22,6,13, 0,20,4,11, 0,18,2,9}), + ((__m128i)(__v16qi){127,-6,-1,4, -128,9,-5,0, 127,5,-9,-4, -128,1,6,-8})), + -2147451217, 2147451175, -2147483628, 2147483605)); __m128i test_mm_dpbusds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_dpbusds_avx_epi32 // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_dpbusds_avx_epi32(__S, __A, __B); } +TEST_CONSTEXPR(match_v4si( + _mm_dpbusds_avx_epi32( + ((__m128i)(__v4si){2147483647, -2147483647-1, 2147483647, -2147483647-1}), + ((__m128i)(__v16qu){255,1,8,15, 255,22,6,13, 0,20,4,11, 0,18,2,9}), + ((__m128i)(__v16qi){127,-6,-1,4, -128,9,-5,0, 127,5,-9,-4, -128,1,6,-8})), + 2147483647, -2147483647-1, 2147483647, -2147483647-1)); +TEST_CONSTEXPR(match_v4si( + _mm_dpbusds_avx_epi32( + ((__m128i)(__v4si){-2147483647-1, 2147483647, -2147483647-1, 2147483647}), + ((__m128i)(__v16qu){255,1,8,15, 255,22,6,13, 0,20,4,11, 0,18,2,9}), + ((__m128i)(__v16qi){127,-6,-1,4, -128,9,-5,0, 127,5,-9,-4, -128,1,6,-8})), + -2147451217, 2147451175, -2147483628, 2147483605)); __m128i test_mm_dpwssd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_dpwssd_avx_epi32 // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}) return _mm_dpwssd_avx_epi32(__S, __A, __B); } +TEST_CONSTEXPR(match_v4si( + _mm_dpwssd_avx_epi32( + ((__m128i)(__v4si){2147483647, -2147483647-1, 2147483647, -2147483647-1}), + ((__m128i)(__v8hi){32767,1, 32767,8, -32768,15, -32768,22}), + ((__m128i)(__v8hi){32767,-6, -32768,-1, 32767,4, -32768,9})), + -1073807366, 1073774584, 1073774651, -1073741626)); +TEST_CONSTEXPR(match_v4si( + _mm_dpwssd_avx_epi32( + ((__m128i)(__v4si){-2147483647-1, 2147483647, -2147483647-1, 2147483647}), + ((__m128i)(__v8hi){32767,1, 32767,8, -32768,15, -32768,22}), + ((__m128i)(__v8hi){32767,-6, -32768,-1, 32767,4, -32768,9})), + -1073807365, 1073774583, 1073774652, -1073741627)); __m128i test_mm_dpwssds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_dpwssds_avx_epi32 // CHECK: call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}) return _mm_dpwssds_avx_epi32(__S, __A, __B); } +TEST_CONSTEXPR(match_v4si( + _mm_dpwssds_avx_epi32( + ((__m128i)(__v4si){2147483647, -2147483647-1, 2147483647, -2147483647-1}), + ((__m128i)(__v8hi){32767,1, 32767,8, -32768,15, -32768,22}), + ((__m128i)(__v8hi){32767,-6, -32768,-1, 32767,4, -32768,9})), + 2147483647, -2147483647-1, 1073774651, -1073741626)); +TEST_CONSTEXPR(match_v4si( + _mm_dpwssds_avx_epi32( + ((__m128i)(__v4si){-2147483647-1, 2147483647, -2147483647-1, 2147483647}), + ((__m128i)(__v8hi){32767,1, 32767,8, -32768,15, -32768,22}), + ((__m128i)(__v8hi){32767,-6, -32768,-1, 32767,4, -32768,9})), + -1073807365, 1073774583, -2147483647-1, 2147483647)); diff --git a/clang/test/CodeGen/X86/bmi2-builtins.c b/clang/test/CodeGen/X86/bmi2-builtins.c index 1b2cb9048adb2..c83cc43d9fc3f 100644 --- a/clang/test/CodeGen/X86/bmi2-builtins.c +++ b/clang/test/CodeGen/X86/bmi2-builtins.c @@ -17,12 +17,12 @@ unsigned int test_bzhi_u32(unsigned int __X, unsigned int __Y) { } unsigned int test_pdep_u32(unsigned int __X, unsigned int __Y) { - // CHECK: @llvm.x86.bmi.pdep.32 + // CHECK: @llvm.pdep.i32 return _pdep_u32(__X, __Y); } unsigned int test_pext_u32(unsigned int __X, unsigned int __Y) { - // CHECK: @llvm.x86.bmi.pext.32 + // CHECK: @llvm.pext.i32 return _pext_u32(__X, __Y); } @@ -41,12 +41,12 @@ unsigned long long test_bzhi_u64(unsigned long long __X, unsigned long long __Y) } unsigned long long test_pdep_u64(unsigned long long __X, unsigned long long __Y) { - // CHECK: @llvm.x86.bmi.pdep.64 + // CHECK: @llvm.pdep.i64 return _pdep_u64(__X, __Y); } unsigned long long test_pext_u64(unsigned long long __X, unsigned long long __Y) { - // CHECK: @llvm.x86.bmi.pext.64 + // CHECK: @llvm.pext.i64 return _pext_u64(__X, __Y); } diff --git a/clang/test/CodeGen/arm-bf16-reinterpret-intrinsics.c b/clang/test/CodeGen/arm-bf16-reinterpret-intrinsics.c index f8c3a94133131..de04466b3bce0 100644 --- a/clang/test/CodeGen/arm-bf16-reinterpret-intrinsics.c +++ b/clang/test/CodeGen/arm-bf16-reinterpret-intrinsics.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple armv8.2a-arm-none-eabi -target-feature +neon -target-feature +bf16 -mfloat-abi hard \ +// RUN: %clang_cc1 -triple armv8.2a-arm-none-eabi -target-feature +neon -mfloat-abi hard \ // RUN: -disable-O0-optnone -emit-llvm -o - %s \ // RUN: | opt -S -passes=instcombine \ // RUN: | FileCheck %s diff --git a/clang/test/CodeGen/builtins-elementwise-math.c b/clang/test/CodeGen/builtins-elementwise-math.c index d0e4a6fa10cfc..63371ea729228 100644 --- a/clang/test/CodeGen/builtins-elementwise-math.c +++ b/clang/test/CodeGen/builtins-elementwise-math.c @@ -1330,6 +1330,72 @@ void test_builtin_elementwise_clmul(unsigned int ui1, unsigned int ui2, bi1 = __builtin_elementwise_clmul(bi1, bi2); } +void test_builtin_elementwise_pext(unsigned int ui1, unsigned int ui2, + unsigned short us1, unsigned short us2, + u4 vu1, u4 vu2, + unsigned _BitInt(31) bi1, + unsigned _BitInt(31) bi2) { + // CHECK: [[UI1:%.+]] = load i32, ptr %ui1.addr, align 4 + // CHECK-NEXT: [[UI2:%.+]] = load i32, ptr %ui2.addr, align 4 + // CHECK-NEXT: [[UI3:%.+]] = call i32 @llvm.pext.i32(i32 [[UI1]], i32 [[UI2]]) + // CHECK-NEXT: store i32 [[UI3]], ptr %ui1.addr, align 4 + ui1 = __builtin_elementwise_pext(ui1, ui2); + + // CHECK: [[US1:%.+]] = load i16, ptr %us1.addr, align 2 + // CHECK-NEXT: [[US2:%.+]] = load i16, ptr %us2.addr, align 2 + // CHECK-NEXT: [[US3:%.+]] = call i16 @llvm.pext.i16(i16 [[US1]], i16 [[US2]]) + // CHECK-NEXT: store i16 [[US3]], ptr %us1.addr, align 2 + us1 = __builtin_elementwise_pext(us1, us2); + + // CHECK: [[VU1:%.+]] = load <4 x i32>, ptr %vu1.addr, align 16 + // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, ptr %vu2.addr, align 16 + // CHECK-NEXT: [[VU3:%.+]] = call <4 x i32> @llvm.pext.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]]) + // CHECK-NEXT: store <4 x i32> [[VU3]], ptr %vu1.addr, align 16 + vu1 = __builtin_elementwise_pext(vu1, vu2); + + // CHECK: [[BI1:%.+]] = load i32, ptr %bi1.addr, align 4 + // CHECK-NEXT: [[BI1TRUNC:%.+]] = trunc i32 [[BI1]] to i31 + // CHECK-NEXT: [[BI2:%.+]] = load i32, ptr %bi2.addr, align 4 + // CHECK-NEXT: [[BI2TRUNC:%.+]] = trunc i32 [[BI2]] to i31 + // CHECK-NEXT: [[BIRES:%.+]] = call i31 @llvm.pext.i31(i31 [[BI1TRUNC]], i31 [[BI2TRUNC]]) + // CHECK-NEXT: [[BIRESZEXT:%.+]] = zext i31 [[BIRES]] to i32 + // CHECK-NEXT: store i32 [[BIRESZEXT]], ptr %bi1.addr, align 4 + bi1 = __builtin_elementwise_pext(bi1, bi2); +} + +void test_builtin_elementwise_pdep(unsigned int ui1, unsigned int ui2, + unsigned short us1, unsigned short us2, + u4 vu1, u4 vu2, + unsigned _BitInt(31) bi1, + unsigned _BitInt(31) bi2) { + // CHECK: [[UI1:%.+]] = load i32, ptr %ui1.addr, align 4 + // CHECK-NEXT: [[UI2:%.+]] = load i32, ptr %ui2.addr, align 4 + // CHECK-NEXT: [[UI3:%.+]] = call i32 @llvm.pdep.i32(i32 [[UI1]], i32 [[UI2]]) + // CHECK-NEXT: store i32 [[UI3]], ptr %ui1.addr, align 4 + ui1 = __builtin_elementwise_pdep(ui1, ui2); + + // CHECK: [[US1:%.+]] = load i16, ptr %us1.addr, align 2 + // CHECK-NEXT: [[US2:%.+]] = load i16, ptr %us2.addr, align 2 + // CHECK-NEXT: [[US3:%.+]] = call i16 @llvm.pdep.i16(i16 [[US1]], i16 [[US2]]) + // CHECK-NEXT: store i16 [[US3]], ptr %us1.addr, align 2 + us1 = __builtin_elementwise_pdep(us1, us2); + + // CHECK: [[VU1:%.+]] = load <4 x i32>, ptr %vu1.addr, align 16 + // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, ptr %vu2.addr, align 16 + // CHECK-NEXT: [[VU3:%.+]] = call <4 x i32> @llvm.pdep.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]]) + // CHECK-NEXT: store <4 x i32> [[VU3]], ptr %vu1.addr, align 16 + vu1 = __builtin_elementwise_pdep(vu1, vu2); + + // CHECK: [[BI1:%.+]] = load i32, ptr %bi1.addr, align 4 + // CHECK-NEXT: [[BI1TRUNC:%.+]] = trunc i32 [[BI1]] to i31 + // CHECK-NEXT: [[BI2:%.+]] = load i32, ptr %bi2.addr, align 4 + // CHECK-NEXT: [[BI2TRUNC:%.+]] = trunc i32 [[BI2]] to i31 + // CHECK-NEXT: [[BIRES:%.+]] = call i31 @llvm.pdep.i31(i31 [[BI1TRUNC]], i31 [[BI2TRUNC]]) + // CHECK-NEXT: [[BIRESZEXT:%.+]] = zext i31 [[BIRES]] to i32 + // CHECK-NEXT: store i32 [[BIRESZEXT]], ptr %bi1.addr, align 4 + bi1 = __builtin_elementwise_pdep(bi1, bi2); +} + void test_builtin_elementwise_clzg(si8 vs1, si8 vs2, u4 vu1, long long int lli, short si, _BitInt(31) bi, int i, diff --git a/clang/test/CodeGen/ubsan-aggregate-null-align-bounds.c b/clang/test/CodeGen/ubsan-aggregate-null-align-bounds.c index 9fc3fd6e64584..7fd2e6c2d0300 100644 --- a/clang/test/CodeGen/ubsan-aggregate-null-align-bounds.c +++ b/clang/test/CodeGen/ubsan-aggregate-null-align-bounds.c @@ -15,29 +15,27 @@ struct Agg { int x; }; extern "C" { #endif -// LHS checks - C only -// Note: In C++, aggregate assignment goes through operator= -// which is a different code path (CGExprCXX.cpp). -// FIXME: LHS checks for C++ will be addressed in a follow-up PR - -// C-LABEL: define {{.*}}@test_lhs_ptrcheck_deref( -// C: [[DEST:%.*]] = load ptr, ptr %dest.addr -// C-NEXT: [[CMP:%.*]] = icmp ne ptr [[DEST]], null, !nosanitize -// C-NEXT: [[INT:%.*]] = ptrtoint ptr [[DEST]] to i64, !nosanitize -// C-NEXT: [[AND:%.*]] = and i64 [[INT]], 3, !nosanitize -// C-NEXT: [[ALIGN:%.*]] = icmp eq i64 [[AND]], 0, !nosanitize -// C-NEXT: [[OK:%.*]] = and i1 [[CMP]], [[ALIGN]], !nosanitize -// C-NEXT: br i1 [[OK]], label %cont, label %handler.type_mismatch -// C: handler.type_mismatch: -// C-NEXT: call void @__ubsan_handle_type_mismatch_v1_abort -// C: call void @llvm.memcpy +// LHS checks - both C and C++ + +// CHECK-LABEL: define {{.*}}@test_lhs_ptrcheck_deref( +// CHECK: [[DEST:%.*]] = load ptr, ptr %dest.addr +// CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr [[DEST]], null, !nosanitize +// CHECK-NEXT: [[INT:%.*]] = ptrtoint ptr [[DEST]] to i64, !nosanitize +// CHECK-NEXT: [[AND:%.*]] = and i64 [[INT]], 3, !nosanitize +// CHECK-NEXT: [[ALIGN:%.*]] = icmp eq i64 [[AND]], 0, !nosanitize +// CHECK-NEXT: [[OK:%.*]] = and i1 [[CMP]], [[ALIGN]], !nosanitize +// CHECK-NEXT: br i1 [[OK]], label %cont, label %handler.type_mismatch +// CHECK: handler.type_mismatch: +// CHECK-NEXT: call void @__ubsan_handle_type_mismatch_v1_abort +// CHECK: call void @llvm.memcpy void test_lhs_ptrcheck_deref(AGG *dest) { AGG local = {0}; *dest = local; } -// C-LABEL: define {{.*}}@test_lhs_ptrcheck_subscript( -// C: call void @__ubsan_handle_type_mismatch_v1_abort +// CHECK-LABEL: define {{.*}}@test_lhs_ptrcheck_subscript( +// CHECK: call void @__ubsan_handle_type_mismatch_v1_abort +// CHECK-NOT: call void @__ubsan_handle_type_mismatch_v1_abort void test_lhs_ptrcheck_subscript(AGG arr[4]) { AGG local = {0}; arr[0] = local; @@ -65,6 +63,7 @@ void test_rhs_ptrcheck_deref(AGG *src) { // CHECK-LABEL: define {{.*}}@test_rhs_ptrcheck_subscript( // CHECK: call void @__ubsan_handle_type_mismatch_v1_abort +// CHECK-NOT: call void @__ubsan_handle_type_mismatch_v1_abort void test_rhs_ptrcheck_subscript(AGG arr[4]) { AGG local; local = arr[0]; @@ -75,6 +74,7 @@ void test_rhs_ptrcheck_subscript(AGG arr[4]) { // CHECK-LABEL: define {{.*}}@test_init_from_deref( // CHECK: call void @__ubsan_handle_type_mismatch_v1_abort +// CHECK-NOT: call void @__ubsan_handle_type_mismatch_v1_abort void test_init_from_deref(AGG *src) { AGG local = *src; (void)local; @@ -82,12 +82,15 @@ void test_init_from_deref(AGG *src) { // CHECK-LABEL: define {{.*}}@test_init_from_subscript( // CHECK: call void @__ubsan_handle_type_mismatch_v1_abort +// CHECK-NOT: call void @__ubsan_handle_type_mismatch_v1_abort void test_init_from_subscript(AGG arr[4]) { AGG local = arr[0]; (void)local; } // Array bounds - out-of-bounds access (RHS) +// Note: GCC also does not detect the out-of-bounds access here when compiled as +// C++. // CHECK-LABEL: define {{.*}}@test_oob_rhs( // C: br i1 false, label %cont, label %handler.out_of_bounds @@ -105,15 +108,13 @@ void test_oob_rhs(void) { } // Array bounds - out-of-bounds access (LHS) -// FIXME: LHS checks for C++ will be addressed in a follow-up PR. // CHECK-LABEL: define {{.*}}@test_oob_lhs( -// C: br i1 false, label %cont, label %handler.out_of_bounds -// CXX: br i1 true, label %cont, label %handler.out_of_bounds +// CHECK: br i1 false, label %cont, label %handler.out_of_bounds // CHECK: handler.out_of_bounds: // CHECK-NEXT: call void @__ubsan_handle_out_of_bounds_abort -// C: handler.type_mismatch: -// C-NEXT: call void @__ubsan_handle_type_mismatch_v1_abort +// CHECK: handler.type_mismatch: +// CHECK-NEXT: call void @__ubsan_handle_type_mismatch_v1_abort // CHECK: call void @llvm.memcpy void test_oob_lhs(void) { AGG arr[4]; @@ -126,14 +127,35 @@ void test_oob_lhs(void) { } #endif -// C++ RHS cases - handler call only +// C++ cases - handler call only #ifdef __cplusplus extern "C" { +// C++ LHS cases + +// CXX-LABEL: define {{.*}}@test_cxx_lhs_dot_operator_function_call( +// CXX: call void @__ubsan_handle_type_mismatch_v1_abort +// CXX-NOT: call void @__ubsan_handle_type_mismatch_v1_abort +void test_cxx_lhs_dot_operator_function_call(AGG *src) { + AGG aggValue(void); + (*src).operator=(aggValue()); +} + +// C++ RHS cases + +// CXX-LABEL: define {{.*}}@test_cxx_rhs_operator_function_call( +// CXX: call void @__ubsan_handle_type_mismatch_v1_abort +// CXX-NOT: call void @__ubsan_handle_type_mismatch_v1_abort +void test_cxx_rhs_operator_function_call(AGG *src) { + AGG local = {0}; + local.operator=(*src); +} + // CXX-LABEL: define {{.*}}@test_cxx_direct_init( // CXX: call void @__ubsan_handle_type_mismatch_v1_abort +// CXX-NOT: call void @__ubsan_handle_type_mismatch_v1_abort void test_cxx_direct_init(AGG *src) { AGG local(*src); (void)local; @@ -141,6 +163,7 @@ void test_cxx_direct_init(AGG *src) { // CXX-LABEL: define {{.*}}@test_cxx_brace_init( // CXX: call void @__ubsan_handle_type_mismatch_v1_abort +// CXX-NOT: call void @__ubsan_handle_type_mismatch_v1_abort void test_cxx_brace_init(AGG *src) { AGG local{*src}; (void)local; diff --git a/clang/test/Driver/invalid-target-id.cl b/clang/test/Driver/invalid-target-id.cl index 4f6f140437885..f93e618e460be 100644 --- a/clang/test/Driver/invalid-target-id.cl +++ b/clang/test/Driver/invalid-target-id.cl @@ -39,3 +39,24 @@ // RUN: %s 2>&1 | FileCheck -check-prefix=NOCOLON %s // NOCOLON: error: invalid target ID 'gfx900+xnack' + +// gfx1250 and gfx12-5-generic do not support xnack on/off modes +// RUN: not %clang -target amdgcn-amd-amdhsa \ +// RUN: -mcpu=gfx1250:xnack+ -nostdlib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=XNACK-MODE-GFX1250 %s + +// RUN: not %clang -target amdgcn-amd-amdhsa \ +// RUN: -mcpu=gfx1250:xnack- -nostdlib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=XNACK-MODE-GFX1250 %s + +// XNACK-MODE-GFX1250: error: invalid target ID 'gfx1250:xnack{{[+-]}}' + +// RUN: not %clang -target amdgcn-amd-amdhsa \ +// RUN: -mcpu=gfx12-5-generic:xnack+ -nostdlib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=XNACK-MODE-GFX125 %s + +// RUN: not %clang -target amdgcn-amd-amdhsa \ +// RUN: -mcpu=gfx12-5-generic:xnack- -nostdlib \ +// RUN: %s 2>&1 | FileCheck -check-prefix=XNACK-MODE-GFX125 %s + +// XNACK-MODE-GFX125: error: invalid target ID 'gfx12-5-generic:xnack{{[+-]}}' diff --git a/clang/test/Driver/lto.c b/clang/test/Driver/lto.c index 81165d3b9e8a3..c9ee2f9c26223 100644 --- a/clang/test/Driver/lto.c +++ b/clang/test/Driver/lto.c @@ -117,6 +117,14 @@ // CHECK-GISEL: "-plugin-opt=-global-isel=1" // CHECK-DISABLE-GISEL: "-plugin-opt=-global-isel=0" +// RUN: %clang --target=x86_64-unknown-linux-gnu -### %s -flto -fno-slp-vectorize 2> %t +// RUN: FileCheck --check-prefix=CHECK-NO-SLP < %t %s +// RUN: %clang --target=x86_64-unknown-linux-gnu -### %s -flto -fslp-vectorize 2> %t +// RUN: FileCheck --check-prefix=CHECK-SLP < %t %s + +// CHECK-NO-SLP: "-plugin-opt=-vectorize-slp=0" +// CHECK-SLP: "-plugin-opt=-vectorize-slp=1" + // -flto passes -time-passes when -ftime-report is passed // RUN: %clang --target=x86_64-unknown-linux-gnu -### %s -flto -ftime-report 2> %t // RUN: FileCheck --check-prefix=CHECK-TIME-REPORT < %t %s diff --git a/clang/test/Preprocessor/hexagon-predefines.c b/clang/test/Preprocessor/hexagon-predefines.c index cb3e9492ea07e..f115e6e0a9926 100644 --- a/clang/test/Preprocessor/hexagon-predefines.c +++ b/clang/test/Preprocessor/hexagon-predefines.c @@ -261,3 +261,19 @@ // CHECK-H2: #define __h2__ 1 // CHECK-H2: #define __hexagon__ 1 // CHECK-H2-NOT: #define __linux__ + +// RUN: %clang_cc1 -E -dM -triple hexagon-unknown-elf -x c++ %s | FileCheck \ +// RUN: %s -check-prefix CHECK-CXX-GNU +// CHECK-CXX-GNU: #define _GNU_SOURCE 1 + +// RUN: %clang_cc1 -E -dM -triple hexagon-unknown-h2 -x c++ %s | FileCheck \ +// RUN: %s -check-prefix CHECK-H2-CXX-GNU +// CHECK-H2-CXX-GNU: #define _GNU_SOURCE 1 + +// RUN: %clang_cc1 -E -dM -triple hexagon-unknown-qurt -x c++ %s | FileCheck \ +// RUN: %s -check-prefix CHECK-QURT-CXX-GNU +// CHECK-QURT-CXX-GNU: #define _GNU_SOURCE 1 + +// RUN: %clang_cc1 -E -dM -triple hexagon-unknown-elf %s | FileCheck \ +// RUN: %s -check-prefix CHECK-C-GNU +// CHECK-C-GNU-NOT: #define _GNU_SOURCE diff --git a/clang/test/Sema/LifetimeSafety/annotation-suggestions-fixits.cpp b/clang/test/Sema/LifetimeSafety/annotation-suggestions-fixits.cpp index 18be627211975..99f0d16cd8e68 100644 --- a/clang/test/Sema/LifetimeSafety/annotation-suggestions-fixits.cpp +++ b/clang/test/Sema/LifetimeSafety/annotation-suggestions-fixits.cpp @@ -2,6 +2,12 @@ // RUN: -fexperimental-lifetime-safety-tu-analysis \ // RUN: -Wlifetime-safety-suggestions -Wlifetime-safety-annotation-placement -Wno-dangling \ // RUN: -fdiagnostics-parseable-fixits %s 2>&1 | FileCheck %s +// RUN: %clang_cc1 -fsyntax-only -std=c++17 -flifetime-safety-inference \ +// RUN: -fexperimental-lifetime-safety-tu-analysis \ +// RUN: -Wlifetime-safety-suggestions -Wlifetime-safety-annotation-placement -Wno-dangling \ +// RUN: -DLIFETIMEBOUND_MACRO=[[clang::lifetimebound]] \ +// RUN: -lifetime-safety-lifetimebound-macro=LIFETIMEBOUND_MACRO \ +// RUN: -fdiagnostics-parseable-fixits %s 2>&1 | FileCheck %s --check-prefix=CHECK-MACRO // RUN: cp %s %t.cpp // RUN: %clang_cc1 -std=c++17 -flifetime-safety-inference \ // RUN: -fexperimental-lifetime-safety-tu-analysis \ @@ -9,6 +15,14 @@ // RUN: %clang_cc1 -fsyntax-only -std=c++17 -flifetime-safety-inference \ // RUN: -fexperimental-lifetime-safety-tu-analysis \ // RUN: -Werror=lifetime-safety-suggestions -Wno-dangling %t.cpp +// RUN: cp %s %t.bad-macro.cpp +// RUN: %clang_cc1 -std=c++17 -flifetime-safety-inference \ +// RUN: -fexperimental-lifetime-safety-tu-analysis \ +// RUN: -Wlifetime-safety-suggestions -Wno-dangling \ +// RUN: -lifetime-safety-lifetimebound-macro=BAD_LIFETIMEBOUND_MACRO \ +// RUN: -fixit %t.bad-macro.cpp +// RUN: not %clang_cc1 -fsyntax-only -std=c++17 %t.bad-macro.cpp 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CHECK-BAD-MACRO struct View; @@ -30,6 +44,10 @@ struct [[gsl::Pointer()]] View { View return_view(View a) { // CHECK: :[[@LINE-1]]:18: warning: parameter in intra-TU function should be marked {{\[\[}}clang::lifetimebound]] [-Wlifetime-safety-intra-tu-suggestions] // CHECK: fix-it:"{{.*}}":{[[@LINE-2]]:24-[[@LINE-2]]:24}:" {{\[\[}}clang::lifetimebound]]" + // CHECK-MACRO: :[[@LINE-3]]:18: warning: parameter in intra-TU function should be marked + // CHECK-MACRO: fix-it:"{{.*}}":{[[@LINE-4]]:24-[[@LINE-4]]:24}:" LIFETIMEBOUND_MACRO" + // CHECK-BAD-MACRO: :[[@LINE-5]]:25: error: expected ')' + // CHECK-BAD-MACRO: BAD_LIFETIMEBOUND_MACRO return a; } @@ -97,6 +115,7 @@ struct ViewMember { View get_view() { // CHECK: :[[@LINE-1]]:18: warning: implicit this in intra-TU function should be marked // CHECK: fix-it:"{{.*}}":{[[@LINE-2]]:18-[[@LINE-2]]:18}:" {{\[\[}}clang::lifetimebound]]" + // CHECK-BAD-MACRO: :[[@LINE-3]]:18: error: expected ';' at end of declaration list return data; } @@ -174,3 +193,60 @@ struct TrailingReturn { return data; } }; + +#define GNU_LIFETIMEBOUND_MACRO __attribute__((lifetimebound)) + +View return_view_with_gnu_macro(View a) { + // CHECK: :[[@LINE-1]]:33: warning: parameter in intra-TU function should be marked + // CHECK: fix-it:"{{.*}}":{[[@LINE-2]]:39-[[@LINE-2]]:39}:" GNU_LIFETIMEBOUND_MACRO" + return a; +} + +struct OnlyGNUMember { + MyObj data; + + View get_view() { + // CHECK: :[[@LINE-1]]:18: warning: implicit this in intra-TU function should be marked + // CHECK: fix-it:"{{.*}}":{[[@LINE-2]]:18-[[@LINE-2]]:18}:" {{\[\[}}clang::lifetimebound]]" + return data; + } +}; + +#define LIFETIMEBOUND_MACRO [[clang::lifetimebound]] +#define MY_LIFETIMEBOUND_MACRO [[clang::lifetimebound]] + +View unnamed_macro(View); +// CHECK: :[[@LINE-1]]:20: warning: parameter in intra-TU function should be marked +// CHECK: fix-it:"{{.*}}":{[[@LINE-2]]:20-[[@LINE-2]]:20}:"MY_LIFETIMEBOUND_MACRO " +View unnamed_macro(View a) { + return a; +} + +View return_view_with_macro(View a) { + // CHECK: :[[@LINE-1]]:29: warning: parameter in intra-TU function should be marked + // CHECK: fix-it:"{{.*}}":{[[@LINE-2]]:35-[[@LINE-2]]:35}:" MY_LIFETIMEBOUND_MACRO" + return a; +} + +#define FIRST_LIFETIMEBOUND_MACRO [[clang::lifetimebound]] +#define SECOND_LIFETIMEBOUND_MACRO [[clang::lifetimebound]] + +View return_view_with_latest_macro(View a) { + // CHECK: :[[@LINE-1]]:36: warning: parameter in intra-TU function should be marked + // CHECK: fix-it:"{{.*}}":{[[@LINE-2]]:42-[[@LINE-2]]:42}:" SECOND_LIFETIMEBOUND_MACRO" + // CHECK-MACRO: :[[@LINE-3]]:36: warning: parameter in intra-TU function should be marked + // CHECK-MACRO: fix-it:"{{.*}}":{[[@LINE-4]]:42-[[@LINE-4]]:42}:" LIFETIMEBOUND_MACRO" + return a; +} + +struct MacroMember { + MyObj data; + + View get_view() { + // CHECK: :[[@LINE-1]]:18: warning: implicit this in intra-TU function should be marked + // CHECK: fix-it:"{{.*}}":{[[@LINE-2]]:18-[[@LINE-2]]:18}:" SECOND_LIFETIMEBOUND_MACRO" + // CHECK-MACRO: :[[@LINE-3]]:18: warning: implicit this in intra-TU function should be marked + // CHECK-MACRO: fix-it:"{{.*}}":{[[@LINE-4]]:18-[[@LINE-4]]:18}:" LIFETIMEBOUND_MACRO" + return data; + } +}; diff --git a/clang/test/Sema/LifetimeSafety/dangling-global.cpp b/clang/test/Sema/LifetimeSafety/dangling-global.cpp index 8a96cbced43b4..8d464b0dbe554 100644 --- a/clang/test/Sema/LifetimeSafety/dangling-global.cpp +++ b/clang/test/Sema/LifetimeSafety/dangling-global.cpp @@ -1,6 +1,6 @@ // RUN: %clang_cc1 -fsyntax-only -Wlifetime-safety -Wno-dangling -verify %s -int *global; // expected-note 4 {{this global dangles}} +int *global; // expected-note 10 {{this global dangles}} int *global_backup; // expected-note {{this global dangles}} struct ObjWithStaticField { @@ -70,3 +70,50 @@ void conditional_no_escape(int c) { global = nullptr; // no-warning (void)local; } + +// Pointer compound assignment and increment/decrement keep the pointer in the +// same allocation, so the result carries the borrow. +void via_compound_add() { + int local[10]; + int *p = local; // expected-warning {{stack memory associated with local variable 'local' escapes to the global variable 'global' which will dangle}} + global = (p += 1); +} + +void via_compound_sub() { + int local[10]; + int *p = local + 5; // expected-warning {{stack memory associated with local variable 'local' escapes to the global variable 'global' which will dangle}} + global = (p -= 1); +} + +void via_preinc() { + int local[10]; + int *p = local; // expected-warning {{stack memory associated with local variable 'local' escapes to the global variable 'global' which will dangle}} + global = ++p; +} + +void via_postinc() { + int local[10]; + int *p = local; // expected-warning {{stack memory associated with local variable 'local' escapes to the global variable 'global' which will dangle}} + global = p++; +} + +void via_predec() { + int local[10]; + int *p = local + 5; // expected-warning {{stack memory associated with local variable 'local' escapes to the global variable 'global' which will dangle}} + global = --p; +} + +void via_postdec() { + int local[10]; + int *p = local + 5; // expected-warning {{stack memory associated with local variable 'local' escapes to the global variable 'global' which will dangle}} + global = p--; +} + +// Negative: arithmetic on a pointer into long-lived storage stays silent. +void ok_global_storage() { + static int s[10]; + int *p = s; + p += 1; + ++p; + global = (p -= 1); // no-warning +} diff --git a/clang/test/Sema/LifetimeSafety/misplaced-lifetimebound-intra-tu.cpp b/clang/test/Sema/LifetimeSafety/misplaced-lifetimebound-intra-tu.cpp index 7fa4cae100509..25f5b6e94c28c 100644 --- a/clang/test/Sema/LifetimeSafety/misplaced-lifetimebound-intra-tu.cpp +++ b/clang/test/Sema/LifetimeSafety/misplaced-lifetimebound-intra-tu.cpp @@ -1,5 +1,7 @@ // RUN: %clang_cc1 -fsyntax-only -Wlifetime-safety-intra-tu-misplaced-lifetimebound -Wlifetime-safety-annotation-placement -Wno-dangling -verify %s // RUN: %clang_cc1 -fsyntax-only -Wlifetime-safety-intra-tu-misplaced-lifetimebound -Wno-dangling -fdiagnostics-parseable-fixits %s 2>&1 | FileCheck %s +// RUN: %clang_cc1 -fsyntax-only -Wlifetime-safety-intra-tu-misplaced-lifetimebound -Wno-dangling -lifetime-safety-lifetimebound-macro=CONFIGURED_LIFETIMEBOUND_MACRO \ +// RUN: -fdiagnostics-parseable-fixits %s 2>&1 | FileCheck %s --check-prefix=CHECK-CONFIG // RUN: cp %s %t.intra.cpp // RUN: %clang_cc1 -Wlifetime-safety-intra-tu-misplaced-lifetimebound -Wno-dangling -fixit %t.intra.cpp // RUN: %clang_cc1 -fsyntax-only -Wlifetime-safety-intra-tu-misplaced-lifetimebound -Wno-dangling -Werror %t.intra.cpp @@ -29,11 +31,11 @@ struct S { const MyObj &implicit_this_only( ); // expected-warning {{'lifetimebound' attribute on this definition is not visible to callers before the definition; add it to the declaration instead}} // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:{{[0-9]+}}-[[@LINE-1]]:{{[0-9]+}}}:" {{\[\[clang::lifetimebound\]\]}}" - + const MyObj ¶m_only(const MyObj & // expected-warning {{'lifetimebound' attribute on this definition is not visible to callers before the definition; add it to the declaration instead}} obj // CHECK: fix-it:"{{.*}}":{[[@LINE]]:{{[0-9]+}}-[[@LINE]]:{{[0-9]+}}}:" {{\[\[clang::lifetimebound\]\]}}" ); - + const MyObj &both(const MyObj & // expected-warning {{'lifetimebound' attribute on this definition is not visible to callers before the definition; add it to the declaration instead}} obj, // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE]]:{{[0-9]+}}-[[@LINE]]:{{[0-9]+}}}:" {{\[\[clang::lifetimebound\]\]}}" bool @@ -167,3 +169,48 @@ struct Derived : Base { auto Derived::virtual_get(const MyObj& obj [[clang::lifetimebound]]) const -> const MyObj& { // expected-note {{'lifetimebound' attribute appears here on the definition}} return obj; } + +#define GNU_LIFETIMEBOUND_MACRO __attribute__((lifetimebound)) + +MyObj &gnu_macro_param(MyObj& // expected-warning {{'lifetimebound' attribute on this definition is not visible to callers before the definition; add it to the declaration instead}} + obj // CHECK: fix-it:"{{.*}}":{[[@LINE]]:{{[0-9]+}}-[[@LINE]]:{{[0-9]+}}}:" GNU_LIFETIMEBOUND_MACRO" + ); + +MyObj &gnu_macro_param(MyObj &obj [[clang::lifetimebound]]) { // expected-note {{'lifetimebound' attribute appears here on the definition}} + return obj; +} + +struct OnlyGNUMember { + MyObj data; + const MyObj &only_gnu_this( + ); // expected-warning {{'lifetimebound' attribute on this definition is not visible to callers before the definition; add it to the declaration instead}} + // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:{{[0-9]+}}-[[@LINE-1]]:{{[0-9]+}}}:" {{\[\[clang::lifetimebound\]\]}}" +}; + +const MyObj &OnlyGNUMember::only_gnu_this() [[clang::lifetimebound]] { // expected-note {{'lifetimebound' attribute appears here on the definition}} + return data; +} + +#define CONFIGURED_LIFETIMEBOUND_MACRO [[clang::lifetimebound]] +#define LATEST_VISIBLE_LIFETIMEBOUND_MACRO [[clang::lifetimebound]] + +MyObj &configured_macro_param(MyObj& // expected-warning {{'lifetimebound' attribute on this definition is not visible to callers before the definition; add it to the declaration instead}} + obj // CHECK: fix-it:"{{.*}}":{[[@LINE]]:{{[0-9]+}}-[[@LINE]]:{{[0-9]+}}}:" LATEST_VISIBLE_LIFETIMEBOUND_MACRO" + // CHECK-CONFIG: fix-it:"{{.*}}":{[[@LINE-1]]:{{[0-9]+}}-[[@LINE-1]]:{{[0-9]+}}}:" CONFIGURED_LIFETIMEBOUND_MACRO" + ); + +MyObj &configured_macro_param(MyObj &obj [[clang::lifetimebound]]) { // expected-note {{'lifetimebound' attribute appears here on the definition}} + return obj; +} + +struct ConfiguredMacroMember { + MyObj data; + const MyObj &configured_this( + ); // expected-warning {{'lifetimebound' attribute on this definition is not visible to callers before the definition; add it to the declaration instead}} + // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:{{[0-9]+}}-[[@LINE-1]]:{{[0-9]+}}}:" LATEST_VISIBLE_LIFETIMEBOUND_MACRO" + // CHECK-CONFIG: fix-it:"{{.*}}":{[[@LINE-2]]:{{[0-9]+}}-[[@LINE-2]]:{{[0-9]+}}}:" CONFIGURED_LIFETIMEBOUND_MACRO" +}; + +const MyObj &ConfiguredMacroMember::configured_this() [[clang::lifetimebound]] { // expected-note {{'lifetimebound' attribute appears here on the definition}} + return data; +} diff --git a/clang/test/Sema/LifetimeSafety/safety-c.c b/clang/test/Sema/LifetimeSafety/safety-c.c index 95c8cf7bb00c7..9ab2a57cb08a9 100644 --- a/clang/test/Sema/LifetimeSafety/safety-c.c +++ b/clang/test/Sema/LifetimeSafety/safety-c.c @@ -173,9 +173,44 @@ void *void_pointer_dereference(void) { return &*bytes; } -// FIXME: Atomics are not modeled yet. +// `_Atomic(T)` is transparent for lifetime purposes; a stack address laundered +// through an atomic is caught. int *atomic_pointer_declref(void) { int value; + _Atomic(int *) p = &value; // expected-warning {{stack memory associated with local variable 'value' is returned}} + return p; // expected-note {{returned here}} +} + +int *atomic_pointer_static(void) { + static int value; _Atomic(int *) p = &value; - return p; + return p; // no-warning +} + +int **atomic_pointer_multilevel(void) { + int *inner; + _Atomic(int **) p = &inner; // expected-warning {{stack memory associated with local variable 'inner' is returned}} + return p; // expected-note {{returned here}} +} + +// In C, a pointer compound assignment is a prvalue; its result still carries +// the LHS pointer's loans. +void compound_assign_prvalue(void) { + int *p; + { + int local[10]; + int *q = local; // expected-warning {{local variable 'local' does not live long enough}} + p = (q += 1); + } // expected-note {{destroyed here}} + (void)*p; // expected-note {{later used here}} +} + +void preincrement_prvalue(void) { + int *p; + { + int local[10]; + int *q = local; // expected-warning {{local variable 'local' does not live long enough}} + p = ++q; + } // expected-note {{destroyed here}} + (void)*p; // expected-note {{later used here}} } diff --git a/clang/test/Sema/LifetimeSafety/safety.cpp b/clang/test/Sema/LifetimeSafety/safety.cpp index 7a2644e46a6e1..65bfe69e854ac 100644 --- a/clang/test/Sema/LifetimeSafety/safety.cpp +++ b/clang/test/Sema/LifetimeSafety/safety.cpp @@ -1435,6 +1435,34 @@ void use_trivial_temporary_after_destruction() { use(a); // expected-note {{later used here}} } +namespace cast_modeling { +// A pointer bit-cast (`__builtin_bit_cast`/`std::bit_cast`) preserves the +// value, so a borrow flowed through it is tracked (matching reinterpret_cast). +int *bit_cast_stack() { + int x = 0; + return __builtin_bit_cast(int *, &x); // expected-warning {{stack memory associated with local variable 'x' is returned}} expected-note {{returned here}} +} + +int *bit_cast_static() { + static int s = 0; + return __builtin_bit_cast(int *, &s); // no-warning +} + +void bit_cast_use_after_scope() { + int *p; + { + int local = 0; + p = __builtin_bit_cast(int *, &local); // expected-warning {{local variable 'local' does not live long enough}} + } // expected-note {{destroyed here}} + (void)*p; // expected-note {{later used here}} +} + +int **bit_cast_multilevel() { + int *p = nullptr; + return __builtin_bit_cast(int **, &p); // expected-warning {{stack memory associated with local variable 'p' is returned}} expected-note {{returned here}} +} +} // namespace cast_modeling + namespace FullExprCleanupLoc { void var_initializer() { View v = non_trivially_destructed_temporary() // expected-warning {{temporary object does not live long enough}} \ diff --git a/clang/test/Sema/aarch64-neon-immediate-ranges/luti.c b/clang/test/Sema/aarch64-neon-immediate-ranges/luti.c index bed8cbc1481dd..9daf3018273de 100644 --- a/clang/test/Sema/aarch64-neon-immediate-ranges/luti.c +++ b/clang/test/Sema/aarch64-neon-immediate-ranges/luti.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -target-feature +lut -target-feature +bf16 -ffreestanding -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -target-feature +lut -ffreestanding -fsyntax-only -verify %s #include // REQUIRES: aarch64-registered-target diff --git a/clang/test/Sema/aarch64-neon-target.c b/clang/test/Sema/aarch64-neon-target.c index ff1928832862d..6174a7d0a0694 100644 --- a/clang/test/Sema/aarch64-neon-target.c +++ b/clang/test/Sema/aarch64-neon-target.c @@ -93,9 +93,6 @@ void undefined(uint32x2_t v2i32, uint32x4_t v4i32, uint16x8_t v8i16, uint8x16_t // bf16 vbfdot_f32(v2f32, v4bf16, v4bf16); // expected-error {{always_inline function 'vbfdot_f32' requires target feature 'bf16'}} vcreate_bf16(10); - vdup_lane_bf16(v4bf16, 2); // expected-error {{'__builtin_neon_splat_lane_bf16' needs target feature bf16}} - vdup_n_bf16(bf16); // expected-error {{always_inline function 'vdup_n_bf16' requires target feature 'bf16'}} - vld1_bf16(0); // expected-error {{'__builtin_neon_vld1_bf16' needs target feature bf16}} vcvt_f32_bf16(v4bf16); // expected-error {{always_inline function 'vcvt_f32_bf16' requires target feature 'bf16'}} vcvt_bf16_f32(v4f32); // expected-error {{always_inline function 'vcvt_bf16_f32' requires target feature 'bf16'}} // f16mm / f16f32mm diff --git a/clang/test/Sema/aarch64-neon-without-target-feature.cpp b/clang/test/Sema/aarch64-neon-without-target-feature.cpp index 86dbb343198c5..97c01e0f51f5e 100644 --- a/clang/test/Sema/aarch64-neon-without-target-feature.cpp +++ b/clang/test/Sema/aarch64-neon-without-target-feature.cpp @@ -23,9 +23,9 @@ void undefined(uint32x2_t v2i32, uint32x4_t v4i32, uint16x8_t v8i16, uint8x16_t // bf16 vbfdot_f32(v2f32, v4bf16, v4bf16); // expected-error {{always_inline function 'vbfdot_f32' requires target feature 'neon'}} vcreate_bf16(10); - vdup_lane_bf16(v4bf16, 2); // expected-error {{'__builtin_neon_splat_lane_bf16' needs target feature bf16,neon}} + vdup_lane_bf16(v4bf16, 2); // expected-error {{'__builtin_neon_splat_lane_v' needs target feature neon}} vdup_n_bf16(bf16); // expected-error {{always_inline function 'vdup_n_bf16' requires target feature 'neon'}} - vld1_bf16(0); // expected-error {{'__builtin_neon_vld1_bf16' needs target feature bf16,neon}} + vld1_bf16(0); // expected-error {{'__builtin_neon_vld1_v' needs target feature neon}} vcvt_f32_bf16(v4bf16); // expected-error {{always_inline function 'vcvt_f32_bf16' requires target feature 'neon'}} vcvt_bf16_f32(v4f32); // expected-error {{always_inline function 'vcvt_bf16_f32' requires target feature 'neon'}} vmmlaq_f16_f16(v8f16, v8f16, v8f16); // expected-error {{always_inline function 'vmmlaq_f16_f16' requires target feature 'neon'}} diff --git a/clang/test/Sema/arm-neon-target.c b/clang/test/Sema/arm-neon-target.c index 1dc2b00925d61..f8d2da4aecc45 100644 --- a/clang/test/Sema/arm-neon-target.c +++ b/clang/test/Sema/arm-neon-target.c @@ -56,9 +56,6 @@ void undefined(uint32x2_t v2i32, uint32x4_t v4i32, uint16x8_t v8i16, uint8x16_t // bf16 vbfdot_f32(v2f32, v4bf16, v4bf16); // expected-error {{always_inline function 'vbfdot_f32' requires target feature 'bf16'}} vcreate_bf16(10); - vdup_lane_bf16(v4bf16, 2); // expected-error {{'__builtin_neon_splat_lane_bf16' needs target feature bf16}} - vdup_n_bf16(bf16); // expected-error {{always_inline function 'vdup_n_bf16' requires target feature 'bf16'}} - vld1_bf16(0); // expected-error {{'__builtin_neon_vld1_bf16' needs target feature bf16}} vcvt_f32_bf16(v4bf16); // expected-error {{always_inline function 'vcvt_f32_bf16' requires target feature 'bf16'}} vcvt_bf16_f32(v4f32); // expected-error {{always_inline function 'vcvt_bf16_f32' requires target feature 'bf16'}} // v8.1 - qrdmla diff --git a/clang/test/Sema/builtins-elementwise-math.c b/clang/test/Sema/builtins-elementwise-math.c index 4ffdcee3ca9c7..511d1d8b43329 100644 --- a/clang/test/Sema/builtins-elementwise-math.c +++ b/clang/test/Sema/builtins-elementwise-math.c @@ -214,6 +214,74 @@ void test_builtin_elementwise_clmul(int i, short s, double d, float4 v, vu = __builtin_elementwise_clmul(vu, vu); } +void test_builtin_elementwise_pext(int i, short s, double d, float4 v, + int3 iv, unsigned3 uv, unsigned u, + unsigned4 vu, int *p) { + i = __builtin_elementwise_pext(p, d); + // expected-error@-1 {{1st argument must be a scalar or vector of integer types (was 'int *')}} + + struct Foo foo = __builtin_elementwise_pext(i, i); + // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}} + + i = __builtin_elementwise_pext(i); + // expected-error@-1 {{too few arguments to function call, expected 2, have 1}} + + i = __builtin_elementwise_pext(); + // expected-error@-1 {{too few arguments to function call, expected 2, have 0}} + + i = __builtin_elementwise_pext(i, i, i); + // expected-error@-1 {{too many arguments to function call, expected 2, have 3}} + + i = __builtin_elementwise_pext(v, v); + // expected-error@-1 {{1st argument must be a scalar or vector of integer types (was 'float4' (vector of 4 'float' values))}} + + i = __builtin_elementwise_pext(i, s); + // expected-error@-1 {{arguments are of different types ('int' vs 'short')}} + + i = __builtin_elementwise_pext(uv, iv); + // expected-error@-1 {{arguments are of different types ('unsigned3' (vector of 3 'unsigned int' values) vs 'int3' (vector of 3 'int' values))}} + + unsigned _BitInt(31) ext; // expected-warning {{'_BitInt' in C17 and earlier is a Clang extension}} + ext = __builtin_elementwise_pext(ext, ext); + + u = __builtin_elementwise_pext(u, u); + vu = __builtin_elementwise_pext(vu, vu); +} + +void test_builtin_elementwise_pdep(int i, short s, double d, float4 v, + int3 iv, unsigned3 uv, unsigned u, + unsigned4 vu, int *p) { + i = __builtin_elementwise_pdep(p, d); + // expected-error@-1 {{1st argument must be a scalar or vector of integer types (was 'int *')}} + + struct Foo foo = __builtin_elementwise_pdep(i, i); + // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}} + + i = __builtin_elementwise_pdep(i); + // expected-error@-1 {{too few arguments to function call, expected 2, have 1}} + + i = __builtin_elementwise_pdep(); + // expected-error@-1 {{too few arguments to function call, expected 2, have 0}} + + i = __builtin_elementwise_pdep(i, i, i); + // expected-error@-1 {{too many arguments to function call, expected 2, have 3}} + + i = __builtin_elementwise_pdep(v, v); + // expected-error@-1 {{1st argument must be a scalar or vector of integer types (was 'float4' (vector of 4 'float' values))}} + + i = __builtin_elementwise_pdep(i, s); + // expected-error@-1 {{arguments are of different types ('int' vs 'short')}} + + i = __builtin_elementwise_pdep(uv, iv); + // expected-error@-1 {{arguments are of different types ('unsigned3' (vector of 3 'unsigned int' values) vs 'int3' (vector of 3 'int' values))}} + + unsigned _BitInt(31) ext; // expected-warning {{'_BitInt' in C17 and earlier is a Clang extension}} + ext = __builtin_elementwise_pdep(ext, ext); + + u = __builtin_elementwise_pdep(u, u); + vu = __builtin_elementwise_pdep(vu, vu); +} + void test_builtin_elementwise_max(int i, short s, double d, float4 v, int3 iv, unsigned3 uv, int *p) { i = __builtin_elementwise_max(p, d); // expected-error@-1 {{1st argument must be a vector, integer or floating-point type (was 'int *')}} diff --git a/clang/test/Sema/enable_if.c b/clang/test/Sema/enable_if.c index 3ef8310a2fef7..80f8cce5918ed 100644 --- a/clang/test/Sema/enable_if.c +++ b/clang/test/Sema/enable_if.c @@ -112,29 +112,50 @@ void f(int n) __attribute__((enable_if())); // expected-error{{'enable_if' attr void f(int n) __attribute__((enable_if(unresolvedid, "chosen when 'unresolvedid' is non-zero"))); // expected-error{{use of undeclared identifier 'unresolvedid'}} int global; -void f(int n) __attribute__((enable_if(global == 0, "chosen when 'global' is zero"))); // expected-error{{'enable_if' attribute expression never produces a constant expression}} // expected-note{{subexpression not valid in a constant expression}} +void f(int n) __attribute__((enable_if(global == 0, "chosen when 'global' is zero"))); // expected-error{{'enable_if' attribute expression never produces a constant expression}} \ + // expected-note{{subexpression not valid in a constant expression}} enum { cst = 7 }; void return_cst(void) __attribute__((overloadable)) __attribute__((enable_if(cst == 7, "chosen when 'cst' is 7"))); void test_return_cst(void) { return_cst(); } -void f2(void) __attribute__((overloadable)) __attribute__((enable_if(1, "always chosen"))); -void f2(void) __attribute__((overloadable)) __attribute__((enable_if(0, "never chosen"))); -void f2(void) __attribute__((overloadable)) __attribute__((enable_if(TRUE, "always chosen #2"))); +void f2(void) __attribute__((overloadable)) __attribute__((enable_if(1, "always chosen"))); // #f2_1 +void f2(void) __attribute__((overloadable)) __attribute__((enable_if(0, "never chosen"))); // #f2_2 +void f2(void) __attribute__((overloadable)) __attribute__((enable_if(TRUE, "always chosen #2"))); // #f2_3 void test6(void) { - void (*p1)(void) = &f2; // expected-error{{initializing 'void (*)(void)' with an expression of incompatible type ''}} expected-note@121{{candidate function}} expected-note@122{{candidate function made ineligible by enable_if}} expected-note@123{{candidate function}} - void (*p2)(void) = f2; // expected-error{{initializing 'void (*)(void)' with an expression of incompatible type ''}} expected-note@121{{candidate function}} expected-note@122{{candidate function made ineligible by enable_if}} expected-note@123{{candidate function}} - void *p3 = (void*)&f2; // expected-error{{address of overloaded function 'f2' is ambiguous}} expected-note@121{{candidate function}} expected-note@122{{candidate function made ineligible by enable_if}} expected-note@123{{candidate function}} - void *p4 = (void*)f2; // expected-error{{address of overloaded function 'f2' is ambiguous}} expected-note@121{{candidate function}} expected-note@122{{candidate function made ineligible by enable_if}} expected-note@123{{candidate function}} + void (*p1)(void) = &f2; // expected-error {{initializing 'void (*)(void)' with an expression of incompatible type ''}} \ + // expected-note@#f2_1 {{candidate function}} \ + // expected-note@#f2_2 {{candidate function made ineligible by enable_if}} \ + // expected-note@#f2_3 {{candidate function}} + void (*p2)(void) = f2; // expected-error {{initializing 'void (*)(void)' with an expression of incompatible type ''}} \ + // expected-note@#f2_1 {{candidate function}} \ + // expected-note@#f2_2 {{candidate function made ineligible by enable_if}} \ + // expected-note@#f2_3 {{candidate function}} + void *p3 = (void*)&f2; // expected-error {{address of overloaded function 'f2' is ambiguous}} \ + // expected-note@#f2_1 {{candidate function}} \ + // expected-note@#f2_2 {{candidate function made ineligible by enable_if}} \ + // expected-note@#f2_3 {{candidate function}} + void *p4 = (void*)f2; // expected-error {{address of overloaded function 'f2' is ambiguous}} \ + // expected-note@#f2_1 {{candidate function}} \ + // expected-note@#f2_2 {{candidate function made ineligible by enable_if}} \ + // expected-note@#f2_3 {{candidate function}} } -void f3(int m) __attribute__((overloadable)) __attribute__((enable_if(m >= 0, "positive"))); -void f3(int m) __attribute__((overloadable)) __attribute__((enable_if(m < 0, "negative"))); +void f3(int m) __attribute__((overloadable)) __attribute__((enable_if(m >= 0, "positive"))); // #f3_1 +void f3(int m) __attribute__((overloadable)) __attribute__((enable_if(m < 0, "negative"))); // #f3_2 void test7(void) { - void (*p1)(int) = &f3; // expected-error{{initializing 'void (*)(int)' with an expression of incompatible type ''}} expected-note@131{{candidate function made ineligible by enable_if}} expected-note@132{{candidate function made ineligible by enable_if}} - void (*p2)(int) = f3; // expected-error{{initializing 'void (*)(int)' with an expression of incompatible type ''}} expected-note@131{{candidate function made ineligible by enable_if}} expected-note@132{{candidate function made ineligible by enable_if}} - void *p3 = (void*)&f3; // expected-error{{address of overloaded function 'f3' does not match required type 'void'}} expected-note@131{{candidate function made ineligible by enable_if}} expected-note@132{{candidate function made ineligible by enable_if}} - void *p4 = (void*)f3; // expected-error{{address of overloaded function 'f3' does not match required type 'void'}} expected-note@131{{candidate function made ineligible by enable_if}} expected-note@132{{candidate function made ineligible by enable_if}} + void (*p1)(int) = &f3; // expected-error {{initializing 'void (*)(int)' with an expression of incompatible type ''}} \ + // expected-note@#f3_1 {{candidate function made ineligible by enable_if}} \ + // expected-note@#f3_2 {{candidate function made ineligible by enable_if}} + void (*p2)(int) = f3; // expected-error {{initializing 'void (*)(int)' with an expression of incompatible type ''}} \ + // expected-note@#f3_1 {{candidate function made ineligible by enable_if}} \ + // expected-note@#f3_2 {{candidate function made ineligible by enable_if}} + void *p3 = (void*)&f3; // expected-error {{address of overloaded function 'f3' does not match required type 'void'}} \ + // expected-note@#f3_1 {{candidate function made ineligible by enable_if}} \ + // expected-note@#f3_2 {{candidate function made ineligible by enable_if}} + void *p4 = (void*)f3; // expected-error {{address of overloaded function 'f3' does not match required type 'void'}} \ + // expected-note@#f3_1 {{candidate function made ineligible by enable_if}} \ + // expected-note@#f3_2 {{candidate function made ineligible by enable_if}} } void f4(int m) __attribute__((enable_if(0, ""))); diff --git a/clang/test/Sema/sifive-xsfmm.c b/clang/test/Sema/sifive-xsfmm.c index 7e055dd70c4b1..a53938fdf77aa 100644 --- a/clang/test/Sema/sifive-xsfmm.c +++ b/clang/test/Sema/sifive-xsfmm.c @@ -1,23 +1,25 @@ // RUN: %clang_cc1 -triple riscv64 -target-feature +v \ // RUN: -target-feature +xsfmmbase -target-feature +xsfmm32a -target-feature +xsfmm32a8f \ // RUN: -target-feature +xsfmm32a16f -target-feature +xsfmm32a32f -target-feature +xsfmm64a64f \ -// RUN: -target-feature +xsfmm32a4f -target-feature +xsfmm32a8i -disable-O0-optnone \ +// RUN: -target-feature +xsfmm32a4f -target-feature +xsfmm32a8i \ +// RUN: -target-feature +experimental-zvfofp8min -disable-O0-optnone \ // RUN: -fsyntax-only %s -verify // REQUIRES: riscv-registered-target #include -void test(vfloat32m8_t arg0, vuint8m8_t arg1) { - __riscv_sf_mm_f_f_w1(4, arg0, arg0, 1, 2, 3); - __riscv_sf_mm_e5m2_e4m3(8, arg1, arg1, 1, 2, 3); - __riscv_sf_mm_u_u(12, arg1, arg1, 1, 2, 3); +void test(vfloat32m8_t arg0, vuint8m8_t arg1, vfloat8e5m2m8_t arg2, + vfloat8e4m3m8_t arg3) { + __riscv_sf_mm_w1(4, arg0, arg0, 1, 2, 3); + __riscv_sf_mm_w4(8, arg2, arg3, 1, 2, 3); + __riscv_sf_mm_w4(12, arg1, arg1, 1, 2, 3); __riscv_sf_vtzero_t_e8w1(0, 0, 0); - __riscv_sf_mm_f_f_w1(5, arg0, arg0, 1, 2, 3); /* expected-error {{argument should be a multiple of 4}} */ - __riscv_sf_mm_e5m2_e4m3(7, arg1, arg1, 1, 2, 3); /* expected-error {{argument should be a multiple of 4}} */ - __riscv_sf_mm_u_u(15, arg1, arg1, 1, 2, 3); /* expected-error {{argument should be a multiple of 4}} */ - __riscv_sf_mm_f_f_w1(16, arg0, arg0, 1, 2, 3); /* expected-error {{argument value 16 is outside the valid range [0, 15]}} */ - __riscv_sf_mm_e5m2_e4m3(20, arg1, arg1, 1, 2, 3); /* expected-error {{argument value 20 is outside the valid range [0, 15]}} */ - __riscv_sf_mm_u_u(24, arg1, arg1, 1, 2, 3); /* expected-error {{argument value 24 is outside the valid range [0, 15]}} */ + __riscv_sf_mm_w1(5, arg0, arg0, 1, 2, 3); /* expected-error {{argument should be a multiple of 4}} */ + __riscv_sf_mm_w4(7, arg2, arg3, 1, 2, 3); /* expected-error {{argument should be a multiple of 4}} */ + __riscv_sf_mm_w4(15, arg1, arg1, 1, 2, 3); /* expected-error {{argument should be a multiple of 4}} */ + __riscv_sf_mm_w1(16, arg0, arg0, 1, 2, 3); /* expected-error {{argument value 16 is outside the valid range [0, 15]}} */ + __riscv_sf_mm_w4(20, arg2, arg3, 1, 2, 3); /* expected-error {{argument value 20 is outside the valid range [0, 15]}} */ + __riscv_sf_mm_w4(24, arg1, arg1, 1, 2, 3); /* expected-error {{argument value 24 is outside the valid range [0, 15]}} */ __riscv_sf_vtzero_t_e8w1(18, 0, 0); /* expected-error {{argument value 18 is outside the valid range [0, 15]}} */ __riscv_sf_vtzero_t_e16w1(3, 0, 0); /* expected-error {{argument should be a multiple of 2}} */ __riscv_sf_vtzero_t_e16w2(3, 0, 0); /* expected-error {{argument should be a multiple of 4}} */ diff --git a/clang/test/SemaCXX/enable_if.cpp b/clang/test/SemaCXX/enable_if.cpp index 4b0a253d89fed..9b35bf2ac0c8d 100644 --- a/clang/test/SemaCXX/enable_if.cpp +++ b/clang/test/SemaCXX/enable_if.cpp @@ -1,5 +1,6 @@ // RUN: %clang_cc1 -std=c++11 -verify %s // RUN: %clang_cc1 -std=c++2a -verify %s + typedef int (*fp)(int); int surrogate(int); struct Incomplete; // expected-note{{forward declaration of 'Incomplete'}} \ @@ -118,7 +119,8 @@ template class C { int fn3(bool b) __attribute__((enable_if(b, ""))); // FIXME: This test should net 0 error messages. template void test3() { - fn3(sizeof(T) == 1); // expected-error{{no matching function for call to 'fn3'}} expected-note@-2{{candidate disabled}} + fn3(sizeof(T) == 1); // expected-error{{no matching function for call to 'fn3'}} \ + // expected-note@-2{{candidate disabled}} } template @@ -138,7 +140,8 @@ void test4() { void h(int); template void outer() { void local_function() __attribute__((enable_if(::h(T()), ""))); - local_function(); // expected-error{{no matching function for call to 'local_function'}} expected-note@-1{{candidate disabled}} + local_function(); // expected-error{{no matching function for call to 'local_function'}} \ + // expected-note@-1{{candidate disabled}} }; namespace PR20988 { @@ -160,7 +163,8 @@ namespace PR20988 { int fn3(bool b) __attribute__((enable_if(b, ""))); // FIXME: This test should net 0 error messages. template void test3() { - fn3(sizeof(T) == 1); // expected-error{{no matching function for call to 'fn3'}} expected-note@-2{{candidate disabled}} + fn3(sizeof(T) == 1); // expected-error {{no matching function for call to 'fn3'}} \ + // expected-note@-2 {{candidate disabled}} } } @@ -188,14 +192,22 @@ namespace FnPtrs { a = &ovlBar; } - int ovlConflict(int m) __attribute__((enable_if(true, ""))); - int ovlConflict(int m) __attribute__((enable_if(1, ""))); + int ovlConflict(int m) __attribute__((enable_if(true, ""))); // #ovl_1 + int ovlConflict(int m) __attribute__((enable_if(1, ""))); // #ovl_2 void test3() { - int (*p)(int) = ovlConflict; // expected-error{{address of overloaded function 'ovlConflict' is ambiguous}} expected-note@191{{candidate function}} expected-note@192{{candidate function}} - int (*p2)(int) = &ovlConflict; // expected-error{{address of overloaded function 'ovlConflict' is ambiguous}} expected-note@191{{candidate function}} expected-note@192{{candidate function}} + int (*p)(int) = ovlConflict; // expected-error {{address of overloaded function 'ovlConflict' is ambiguous}} \ + // expected-note@#ovl_1 {{candidate function}} \ + // expected-note@#ovl_2 {{candidate function}} + int (*p2)(int) = &ovlConflict; // expected-error {{address of overloaded function 'ovlConflict' is ambiguous}} \ + // expected-note@#ovl_1 {{candidate function}} \ + // expected-note@#ovl_2 {{candidate function}} int (*a)(int); - a = ovlConflict; // expected-error{{assigning to 'int (*)(int)' from incompatible type ''}} expected-note@191{{candidate function}} expected-note@192{{candidate function}} - a = &ovlConflict; // expected-error{{assigning to 'int (*)(int)' from incompatible type ''}} expected-note@191{{candidate function}} expected-note@192{{candidate function}} + a = ovlConflict; // expected-error {{assigning to 'int (*)(int)' from incompatible type ''}} \ + // expected-note@#ovl_1 {{candidate function}} \ + // expected-note@#ovl_2 {{candidate function}} + a = &ovlConflict; // expected-error {{assigning to 'int (*)(int)' from incompatible type ''}} \ + // expected-note@#ovl_1 {{candidate function}} \ + // expected-note@#ovl_2 {{candidate function}} } template @@ -211,37 +223,59 @@ namespace FnPtrs { } template - T templatedBar(T m) __attribute__((enable_if(m > 0, ""))) { return T(); } + T templatedBar(T m) __attribute__((enable_if(m > 0, ""))) { return T(); } // #tbar void test5() { - int (*p)(int) = templatedBar; // expected-error{{address of overloaded function 'templatedBar' does not match required type 'int (int)'}} expected-note@214{{candidate function made ineligible by enable_if}} - int (*p2)(int) = &templatedBar; // expected-error{{address of overloaded function 'templatedBar' does not match required type 'int (int)'}} expected-note@214{{candidate function made ineligible by enable_if}} + int (*p)(int) = templatedBar; // expected-error {{address of overloaded function 'templatedBar' does not match required type 'int (int)'}} \ + // expected-note@#tbar {{candidate function made ineligible by enable_if}} + int (*p2)(int) = &templatedBar; // expected-error {{address of overloaded function 'templatedBar' does not match required type 'int (int)'}} \ + // expected-note@#tbar {{candidate function made ineligible by enable_if}} int (*a)(int); - a = templatedBar; // expected-error{{assigning to 'int (*)(int)' from incompatible type ''}} expected-note@214{{candidate function made ineligible by enable_if}} - a = &templatedBar; // expected-error{{assigning to 'int (*)(int)' from incompatible type ''}} expected-note@214{{candidate function made ineligible by enable_if}} + a = templatedBar; // expected-error {{assigning to 'int (*)(int)' from incompatible type ''}} \ + // expected-note@#tbar {{candidate function made ineligible by enable_if}} + a = &templatedBar; // expected-error {{assigning to 'int (*)(int)' from incompatible type ''}} \ + // expected-note@#tbar {{candidate function made ineligible by enable_if}} } template - T templatedConflict(T m) __attribute__((enable_if(false, ""))) { return T(); } + T templatedConflict(T m) __attribute__((enable_if(false, ""))) { return T(); } // #conflict1 template - T templatedConflict(T m) __attribute__((enable_if(true, ""))) { return T(); } + T templatedConflict(T m) __attribute__((enable_if(true, ""))) { return T(); } // #conflict2 template - T templatedConflict(T m) __attribute__((enable_if(1, ""))) { return T(); } + T templatedConflict(T m) __attribute__((enable_if(1, ""))) { return T(); } // #conflict3 void test6() { - int (*p)(int) = templatedConflict; // expected-error{{address of overloaded function 'templatedConflict' is ambiguous}} expected-note@224{{candidate function made ineligible by enable_if}} expected-note@226{{candidate function}} expected-note@228{{candidate function}} - int (*p0)(int) = &templatedConflict; // expected-error{{address of overloaded function 'templatedConflict' is ambiguous}} expected-note@224{{candidate function made ineligible by enable_if}} expected-note@226{{candidate function}} expected-note@228{{candidate function}} + int (*p)(int) = templatedConflict; // expected-error {{address of overloaded function 'templatedConflict' is ambiguous}} \ + // expected-note@#conflict1 {{candidate function made ineligible by enable_if}} \ + // expected-note@#conflict2 {{candidate function}} \ + // expected-note@#conflict3 {{candidate function}} + int (*p0)(int) = &templatedConflict; // expected-error {{address of overloaded function 'templatedConflict' is ambiguous}} \ + // expected-note@#conflict1 {{candidate function made ineligible by enable_if}} \ + // expected-note@#conflict2 {{candidate function}} \ + // expected-note@#conflict3 {{candidate function}} int (*a)(int); - a = templatedConflict; // expected-error{{assigning to 'int (*)(int)' from incompatible type ''}} expected-note@226{{candidate function}} expected-note@228{{candidate function}} - a = &templatedConflict; // expected-error{{assigning to 'int (*)(int)' from incompatible type ''}} expected-note@226{{candidate function}} expected-note@228{{candidate function}} + a = templatedConflict; // expected-error {{assigning to 'int (*)(int)' from incompatible type ''}} \ + // expected-note@#conflict2 {{candidate function}} \ + // expected-note@#conflict3 {{candidate function}} + a = &templatedConflict; // expected-error {{assigning to 'int (*)(int)' from incompatible type ''}} \ + // expected-note@#conflict2 {{candidate function}} \ + // expected-note@#conflict3 {{candidate function}} } - int ovlNoCandidate(int m) __attribute__((enable_if(false, ""))); - int ovlNoCandidate(int m) __attribute__((enable_if(0, ""))); + int ovlNoCandidate(int m) __attribute__((enable_if(false, ""))); // #ovlno1 + int ovlNoCandidate(int m) __attribute__((enable_if(0, ""))); // #ovlno2 void test7() { - int (*p)(int) = ovlNoCandidate; // expected-error{{address of overloaded function 'ovlNoCandidate' does not match required type}} expected-note@237{{made ineligible by enable_if}} expected-note@238{{made ineligible by enable_if}} - int (*p2)(int) = &ovlNoCandidate; // expected-error{{address of overloaded function 'ovlNoCandidate' does not match required type}} expected-note@237{{made ineligible by enable_if}} expected-note@238{{made ineligible by enable_if}} + int (*p)(int) = ovlNoCandidate; // expected-error {{address of overloaded function 'ovlNoCandidate' does not match required type}} \ + // expected-note@#ovlno1 {{made ineligible by enable_if}} \ + // expected-note@#ovlno2 {{made ineligible by enable_if}} + int (*p2)(int) = &ovlNoCandidate; // expected-error {{address of overloaded function 'ovlNoCandidate' does not match required type}} \ + // expected-note@#ovlno1 {{made ineligible by enable_if}} \ + // expected-note@#ovlno2 {{made ineligible by enable_if}} int (*a)(int); - a = ovlNoCandidate; // expected-error{{assigning to 'int (*)(int)' from incompatible type ''}} expected-note@237{{made ineligible by enable_if}} expected-note@238{{made ineligible by enable_if}} - a = &ovlNoCandidate; // expected-error{{assigning to 'int (*)(int)' from incompatible type ''}} expected-note@237{{made ineligible by enable_if}} expected-note@238{{made ineligible by enable_if}} + a = ovlNoCandidate; // expected-error {{assigning to 'int (*)(int)' from incompatible type ''}} \ + // expected-note@#ovlno1 {{made ineligible by enable_if}} \ + // expected-note@#ovlno2 {{made ineligible by enable_if}} + a = &ovlNoCandidate; // expected-error {{assigning to 'int (*)(int)' from incompatible type ''}} \ + // expected-note@#ovlno1 {{made ineligible by enable_if}} \ + // expected-note@#ovlno2 {{made ineligible by enable_if}} } int noOvlNoCandidate(int m) __attribute__((enable_if(false, ""))); diff --git a/clang/test/SemaTemplate/instantiate-member-template.cpp b/clang/test/SemaTemplate/instantiate-member-template.cpp index 4c74f5fb938b6..3e1b9d16202b4 100644 --- a/clang/test/SemaTemplate/instantiate-member-template.cpp +++ b/clang/test/SemaTemplate/instantiate-member-template.cpp @@ -259,3 +259,9 @@ namespace rdar8986308 { } } + +namespace GH201490 { + template struct A {}; + template struct B : A {}; + template<> template class A::B {}; // expected-error{{out-of-line definition of 'B' does not match any declaration in 'GH201490::A'}} +} diff --git a/clang/test/lit.cfg.py b/clang/test/lit.cfg.py index dc0d87f0a29a1..f7b3a77266cb8 100644 --- a/clang/test/lit.cfg.py +++ b/clang/test/lit.cfg.py @@ -336,9 +336,7 @@ def have_host_clang_repl_cuda(): "default-cxx-stdlib={}".format(config.clang_default_cxx_stdlib) ) -# As of 2011.08, crash-recovery tests still do not pass on FreeBSD. -if platform.system() not in ["FreeBSD"]: - config.available_features.add("crash-recovery") +config.available_features.add("crash-recovery") # ANSI escape sequences in non-dumb terminal if platform.system() not in ["Windows"]: diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index f85422dbf4f42..c42cc147cf21e 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -24909,6 +24909,15 @@ TEST_F(FormatTest, Cpp20ModulesSupport) { verifyFormat("import /* comment */;", Style); verifyFormat("import ; // Trailing comment", Style); + Style.BreakStringLiterals = true; + Style.ColumnLimit = 20; + verifyFormat("export module foobar;\n" + "char *s = \"s1\"\n" + " \"s2\";", + "export module foobar;\n" + "char *s = \"s1\" \"s2\";", + Style); + // Somewhat gracefully handle import in pre-C++20 code. verifyFormat("import /* not keyword */ = val ? 2 : 1;"); verifyFormat("_world->import();"); diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform.h b/compiler-rt/lib/tsan/rtl/tsan_platform.h index b1cde8962fc58..5edec59d5af52 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_platform.h +++ b/compiler-rt/lib/tsan/rtl/tsan_platform.h @@ -738,13 +738,16 @@ struct MappingGoRiscv64_48 { Go on linux/s390x 0000 0000 1000 - 1000 0000 0000: executable and heap - 16 TiB 1000 0000 0000 - 4000 0000 0000: - -4000 0000 0000 - 6000 0000 0000: shadow - 64TiB (4 * app) -6000 0000 0000 - 9000 0000 0000: - -9000 0000 0000 - 9800 0000 0000: metainfo - 8TiB (0.5 * app) +4000 0000 0000 - 6000 0000 0000: shadow - 32 TiB (2 * app) +6000 0000 0000 - 7000 0000 0000: - +7000 0000 0000 - 7800 0000 0000: metainfo - 8 TiB (0.5 * app) +7800 0000 0000 - 8000 0000 0000: - */ struct MappingGoS390x { - static const uptr kMetaShadowBeg = 0x900000000000ull; - static const uptr kMetaShadowEnd = 0x980000000000ull; + // Keep the mapping below 2^47 for QEMU linux-user on x86-64 hosts with + // four-level page tables. + static const uptr kMetaShadowBeg = 0x700000000000ull; + static const uptr kMetaShadowEnd = 0x780000000000ull; static const uptr kShadowBeg = 0x400000000000ull; static const uptr kShadowEnd = 0x600000000000ull; static const uptr kLoAppMemBeg = 0x000000001000ull; diff --git a/compiler-rt/test/builtins/Unit/lit.cfg.py b/compiler-rt/test/builtins/Unit/lit.cfg.py index 8d602d1c417fa..2bb72630a41e9 100644 --- a/compiler-rt/test/builtins/Unit/lit.cfg.py +++ b/compiler-rt/test/builtins/Unit/lit.cfg.py @@ -107,7 +107,9 @@ def get_libgcc_file_name(): if config.target_os == "Haiku": config.substitutions.append(("%librt ", base_lib + " -lroot ")) else: - config.substitutions.append(("%librt ", base_lib + " -lc -lm ")) + config.substitutions.append( + ("%librt ", "-lm -Wl,--start-group " + base_lib + " -lc -Wl,--end-group ") + ) builtins_test_crt = get_required_attr(config, "builtins_test_crt") if builtins_test_crt: diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 138e0fbe0fde8..b19890a5367b7 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -6618,7 +6618,9 @@ class FirConverter : public Fortran::lower::AbstractConverter { !Fortran::semantics::IsAllocatable(sym) && Fortran::semantics::IsSaved(sym)) { mlir::Location loc = toLocation(); - TODO(loc, "non-ALLOCATABLE SAVE Coarray outside the main program."); + TODO( + loc, + "coarray: non-ALLOCATABLE SAVE coarray outside the main program"); } } Fortran::lower::defineModuleVariable(*this, var); diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp index 9e309858deb4c..2834531dccd96 100644 --- a/flang/lib/Lower/ConvertVariable.cpp +++ b/flang/lib/Lower/ConvertVariable.cpp @@ -703,8 +703,8 @@ static void instantiateGlobal(Fortran::lower::AbstractConverter &converter, if (Fortran::evaluate::IsCoarray(sym)) if (hasFinalization(sym) || hasAllocatableDirectComponent(sym)) - TODO(loc, "Coarray with an allocatable direct component and/or requiring " - "finalization."); + TODO(loc, "coarray: coarray with an allocatable direct component and/or " + "requiring finalization"); if (var.isModuleOrSubmoduleVariable()) { // A non-intrinsic module global is defined when lowering the module. @@ -2337,7 +2337,7 @@ void Fortran::lower::mapSymbolAttributes( if (Fortran::evaluate::IsCoarray(sym)) // Operation in MIF dialect to create an alias of the coarray not // yet supported (by using the procedure provided by PRIF). - TODO(loc, "coarray dummy argument not yet supported."); + TODO(loc, "coarray: dummy argument not yet supported"); mlir::Value dummyArg = symMap.lookupSymbol(sym).getAddr(); if (lowerToBoxValue(sym, dummyArg, converter)) { @@ -2638,7 +2638,8 @@ void Fortran::lower::mapSymbolAttributes( "must be a non-ALLOCATABLE coarray"); if (Fortran::semantics::IsSaved(sym) && sym.owner().kind() != Fortran::semantics::Scope::Kind::MainProgram) - TODO(loc, "non-ALLOCATABLE SAVE Coarray outside the main program."); + TODO(loc, + "coarray: non-ALLOCATABLE SAVE coarray outside the main program"); ; Fortran::lower::genAllocateCoarray(converter, loc, sym, addr); ::genDeclareSymbol(converter, symMap, sym, addr, len, extents, lbounds, diff --git a/flang/lib/Lower/MultiImageFortran.cpp b/flang/lib/Lower/MultiImageFortran.cpp index cab220c019546..dc84a00c29d10 100644 --- a/flang/lib/Lower/MultiImageFortran.cpp +++ b/flang/lib/Lower/MultiImageFortran.cpp @@ -158,7 +158,7 @@ Fortran::lower::genChangeTeamStmt(Fortran::lower::AbstractConverter &converter, const std::list &coarrayAssocList = std::get>(stmt.t); if (coarrayAssocList.size()) - TODO(loc, "Coarrays provided in the association list."); + TODO(loc, "coarray: coarrays provided in the association list"); // Handle TEAM-VALUE const auto *teamExpr = @@ -458,10 +458,10 @@ mlir::Value Fortran::lower::genAllocateCoarray( fir::ExtendedValue Fortran::lower::CoarrayExprHelper::genAddr( const Fortran::evaluate::CoarrayRef &expr) { (void)symMap; - TODO(converter.getCurrentLocation(), "co-array address"); + TODO(converter.getCurrentLocation(), "coarray: coarray address"); } fir::ExtendedValue Fortran::lower::CoarrayExprHelper::genValue( const Fortran::evaluate::CoarrayRef &expr) { - TODO(converter.getCurrentLocation(), "co-array value"); + TODO(converter.getCurrentLocation(), "coarray: coarray value"); } diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index facca9867e4bb..99ce48206c33b 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -4520,7 +4520,12 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, defOp.u); }, [&](const clause::ProcedureDesignator &pd) -> std::string { - return pd.v.sym()->name().ToString(); + // Qualify the name with the scope in which the user-defined + // reduction is declared so that reductions with the same name + // in different scopes produce distinct omp.declare_reduction ops. + const semantics::Symbol *sym = pd.v.sym(); + std::string name = sym->name().ToString(); + return converter.mangleName(name, sym->GetUltimate().owner()); }, }, redOp.u); diff --git a/flang/lib/Lower/Support/ReductionProcessor.cpp b/flang/lib/Lower/Support/ReductionProcessor.cpp index b3a27736d1616..7db48601d5aba 100644 --- a/flang/lib/Lower/Support/ReductionProcessor.cpp +++ b/flang/lib/Lower/Support/ReductionProcessor.cpp @@ -692,10 +692,14 @@ bool ReductionProcessor::processReductionArguments( if (!ReductionProcessor::supportedIntrinsicProcReduction( *reductionIntrinsic)) { // If not an intrinsic is has to be a custom reduction op, and should - // be available in the module. + // be available in the module. The op is named using the scope in + // which the user-defined reduction was declared, so qualify the + // lookup name the same way the declaration and use sides do. semantics::Symbol *sym = reductionIntrinsic->v.sym(); mlir::ModuleOp module = builder.getModule(); - auto decl = module.lookupSymbol(getRealName(sym).ToString()); + std::string declName = getRealName(sym).ToString(); + declName = converter.mangleName(declName, sym->GetUltimate().owner()); + auto decl = module.lookupSymbol(declName); if (!decl) return false; } @@ -843,8 +847,14 @@ bool ReductionProcessor::processReductionArguments( // Custom reductions we can just add to the symbols without // generating the declare reduction op. semantics::Symbol *sym = reductionIntrinsic->v.sym(); - reductionDeclSymbols.push_back(mlir::SymbolRefAttr::get( - builder.getContext(), sym->name().ToString())); + // Qualify the name with the scope in which the user-defined + // reduction was declared so that reductions with the same name in + // different scopes refer to distinct omp.declare_reduction ops. + std::string reductionName = getRealName(sym).ToString(); + reductionName = + converter.mangleName(reductionName, sym->GetUltimate().owner()); + reductionDeclSymbols.push_back( + mlir::SymbolRefAttr::get(builder.getContext(), reductionName)); ++idx; continue; } diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index 3d6e1c71fe8d9..04f7741adf943 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -8300,7 +8300,7 @@ IntrinsicLibrary::genThisImage(mlir::Type resultType, mlir::Value team = fir::getBase(args[args.size() - 1]); if (!coarrayIsAbsent) - TODO(loc, "this_image with coarray argument."); + TODO(loc, "coarray: this_image with coarray argument"); mlir::Value res = mif::ThisImageOp::create(builder, loc, team); return builder.createConvert(loc, resultType, res); } diff --git a/flang/lib/Optimizer/Transforms/MIFOpConversion.cpp b/flang/lib/Optimizer/Transforms/MIFOpConversion.cpp index 91860c21659a5..5121455817bd6 100644 --- a/flang/lib/Optimizer/Transforms/MIFOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/MIFOpConversion.cpp @@ -430,7 +430,7 @@ struct MIFThisImageOpConversion mlir::Location loc = op.getLoc(); if (op.getCoarray()) - TODO(loc, "mif.this_image op with coarray argument."); + TODO(loc, "coarray: mif.this_image op with coarray argument"); else { mlir::Type i32Ty = builder.getI32Type(); mlir::Type boxTy = fir::BoxType::get(rewriter.getNoneType()); diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 7c531ae0046ae..e2220156d13cd 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -5694,6 +5694,12 @@ void OmpStructureChecker::Enter(const parser::OmpClause::DynamicAllocators &x) { void OmpStructureChecker::Enter(const parser::OmpClause::ReverseOffload &x) { CheckAllowedRequiresClause(llvm::omp::Clause::OMPC_reverse_offload); + if (IsAllowedClause(llvm::omp::Clause::OMPC_reverse_offload)) { + unsigned version{context_.langOptions().OpenMPVersion}; + context_.Say(GetContext().clauseSource, + "%s clause is not supported and will be ignored"_warn_en_US, + GetUpperName(llvm::omp::Clause::OMPC_reverse_offload, version)); + } } void OmpStructureChecker::Enter(const parser::OmpClause::UnifiedAddress &x) { diff --git a/flang/test/Integration/split-lto-unit-2.f90 b/flang/test/Integration/split-lto-unit-2.f90 index 3ccfa973d4cf9..843c6b5e717f5 100644 --- a/flang/test/Integration/split-lto-unit-2.f90 +++ b/flang/test/Integration/split-lto-unit-2.f90 @@ -1,19 +1,19 @@ ! Check that -flto=thin without -fsplit-lto-unit has EnableSplitLTOUnit = 0 ! RUN: %flang -flto=thin -S -o - %s | FileCheck %s --check-prefix=SPLIT0 -! RUN: %flang -flto=thin --target=x86_64-linux-gnu -S -o - %s | FileCheck %s --check-prefix=SPLIT0 -! RUN: %flang -flto=thin --target=x86_64-apple-macosx -S -o - %s | FileCheck %s --check-prefix=SPLIT0 +! RUN: %if x86-registered-target %{ %flang -flto=thin --target=x86_64-linux-gnu -S -o - %s | FileCheck %s --check-prefix=SPLIT0 %} +! RUN: %if x86-registered-target %{ %flang -flto=thin --target=x86_64-apple-macosx -S -o - %s | FileCheck %s --check-prefix=SPLIT0 %} ! Check that -flto=thin with -fsplit-lto-unit has EnableSplitLTOUnit = 1 ! RUN: %flang -flto=thin -fsplit-lto-unit -S -o - %s | FileCheck %s --check-prefix=SPLIT1 -! RUN: %flang -flto=thin --target=x86_64-linux-gnu -fsplit-lto-unit -S -o - %s | FileCheck %s --check-prefix=SPLIT1 -! RUN: %flang -flto=thin --target=x86_64-apple-macosx -fsplit-lto-unit -S -o - %s | FileCheck %s --check-prefix=SPLIT1 +! RUN: %if x86-registered-target %{ %flang -flto=thin --target=x86_64-linux-gnu -fsplit-lto-unit -S -o - %s | FileCheck %s --check-prefix=SPLIT1 %} +! RUN: %if x86-registered-target %{ %flang -flto=thin --target=x86_64-apple-macosx -fsplit-lto-unit -S -o - %s | FileCheck %s --check-prefix=SPLIT1 %} ! Check that regular LTO has EnableSplitLTOUnit = 1 ! RUN: %flang -flto -S -o - %s | FileCheck %s --implicit-check-not="EnableSplitLTOUnit" --check-prefix=SPLIT1 -! RUN: %flang -flto --target=x86_64-linux-gnu -S -o - %s | FileCheck %s --check-prefix=SPLIT1 +! RUN: %if x86-registered-target %{ %flang -flto --target=x86_64-linux-gnu -S -o - %s | FileCheck %s --check-prefix=SPLIT1 %} ! Check that regular LTO has no EnableSplitLTOUnit for apple targets -! RUN: %flang -flto --target=x86_64-apple-macosx -S -o - %s | FileCheck %s --check-prefix=NOSPLIT +! RUN: %if x86-registered-target %{ %flang -flto --target=x86_64-apple-macosx -S -o - %s | FileCheck %s --check-prefix=NOSPLIT %} ! SPLIT0: !{i32 1, !"EnableSplitLTOUnit", i32 0} ! SPLIT1: !{i32 1, !"EnableSplitLTOUnit", i32 1} diff --git a/flang/test/Lower/MIF/coarray_allocation3.f90 b/flang/test/Lower/MIF/coarray_allocation3.f90 index 328fc446c132a..b4fc02baee064 100644 --- a/flang/test/Lower/MIF/coarray_allocation3.f90 +++ b/flang/test/Lower/MIF/coarray_allocation3.f90 @@ -1,6 +1,6 @@ ! RUN: not %flang_fc1 -emit-hlfir -fcoarray %s -o - 2>&1 | FileCheck %s -!CHECK: not yet implemented: Coarray with an allocatable direct component and/or requiring finalization. +!CHECK: not yet implemented: coarray: coarray with an allocatable direct component and/or requiring finalization module m_test implicit none diff --git a/flang/test/Lower/MIF/coarray_allocation4.f90 b/flang/test/Lower/MIF/coarray_allocation4.f90 index fe4741e099744..f0278ea2cd534 100644 --- a/flang/test/Lower/MIF/coarray_allocation4.f90 +++ b/flang/test/Lower/MIF/coarray_allocation4.f90 @@ -1,6 +1,6 @@ ! RUN: not %flang_fc1 -emit-hlfir -fcoarray %s -o - 2>&1 | FileCheck %s -!CHECK: not yet implemented: non-ALLOCATABLE SAVE Coarray outside the main program. +!CHECK: not yet implemented: coarray: non-ALLOCATABLE SAVE coarray outside the main program subroutine test_coarray_save() implicit none diff --git a/flang/test/Lower/MIF/coarray_allocation5.f90 b/flang/test/Lower/MIF/coarray_allocation5.f90 index 19fb71aa4edbd..42058363bf9fb 100644 --- a/flang/test/Lower/MIF/coarray_allocation5.f90 +++ b/flang/test/Lower/MIF/coarray_allocation5.f90 @@ -1,6 +1,6 @@ ! RUN: not %flang_fc1 -emit-hlfir -fcoarray %s -o - 2>&1 | FileCheck %s -!CHECK: not yet implemented: non-ALLOCATABLE SAVE Coarray outside the main program. +!CHECK: not yet implemented: coarray: non-ALLOCATABLE SAVE coarray outside the main program module m_coarray_test implicit none diff --git a/flang/test/Lower/OpenMP/Todo/multiple-types-declare_reduction.f90 b/flang/test/Lower/OpenMP/Todo/multiple-types-declare_reduction.f90 index e4931018b07ec..aa83d7f832c9b 100644 --- a/flang/test/Lower/OpenMP/Todo/multiple-types-declare_reduction.f90 +++ b/flang/test/Lower/OpenMP/Todo/multiple-types-declare_reduction.f90 @@ -29,7 +29,7 @@ program main end program main ! Verify declare reduction is created for integer -! CHECK-LABEL: omp.declare_reduction @myred : i32 +! CHECK-LABEL: omp.declare_reduction @_QQFmyred : i32 ! CHECK: init { ! CHECK: arith.constant 0 : i32 ! CHECK: omp.yield @@ -42,10 +42,10 @@ end program main ! Verify reduction is used in first parallel loop (integer) ! CHECK: omp.parallel ! CHECK: omp.wsloop -! CHECK-SAME: reduction(@myred +! CHECK-SAME: reduction(@_QQFmyred ! Verify reduction is used in second parallel loop (real) ! CHECK: omp.parallel ! CHECK: omp.wsloop -! CHECK-SAME: reduction(@myred +! CHECK-SAME: reduction(@_QQFmyred ! CHECK: arith.addf diff --git a/flang/test/Lower/OpenMP/declare-reduction-character-allocatable.f90 b/flang/test/Lower/OpenMP/declare-reduction-character-allocatable.f90 index daa0d41063858..e4af5818ecb71 100644 --- a/flang/test/Lower/OpenMP/declare-reduction-character-allocatable.f90 +++ b/flang/test/Lower/OpenMP/declare-reduction-character-allocatable.f90 @@ -19,7 +19,7 @@ program test_character_reduction end program test_character_reduction ! Verify the declare_reduction is generated with reference type for character -! CHECK-LABEL: omp.declare_reduction @char_max : !fir.ref> +! CHECK-LABEL: omp.declare_reduction @_QQFchar_max : !fir.ref> ! CHECK: init { ! CHECK: omp.yield @@ -30,4 +30,4 @@ end program test_character_reduction ! Verify the reduction is used in the parallel sections ! CHECK: omp.parallel -! CHECK: omp.sections reduction(byref @char_max +! CHECK: omp.sections reduction(byref @_QQFchar_max diff --git a/flang/test/Lower/OpenMP/declare-reduction-finalizer.f90 b/flang/test/Lower/OpenMP/declare-reduction-finalizer.f90 index 22a653179ce2d..d0f8cbc2edbc3 100644 --- a/flang/test/Lower/OpenMP/declare-reduction-finalizer.f90 +++ b/flang/test/Lower/OpenMP/declare-reduction-finalizer.f90 @@ -25,7 +25,7 @@ end subroutine cleanup end module m1 -! CHECK-LABEL: omp.declare_reduction @plus_t{{.*}} : !fir.ref<{{.*}}> +! CHECK-LABEL: omp.declare_reduction @_QQFplus_t{{.*}} : !fir.ref<{{.*}}> ! ! -- alloc region ! CHECK: alloc { diff --git a/flang/test/Lower/OpenMP/declare-reduction-initializer-component.f90 b/flang/test/Lower/OpenMP/declare-reduction-initializer-component.f90 index b42fa610d17e0..39f1eb6e71f44 100644 --- a/flang/test/Lower/OpenMP/declare-reduction-initializer-component.f90 +++ b/flang/test/Lower/OpenMP/declare-reduction-initializer-component.f90 @@ -25,7 +25,7 @@ subroutine test_component_init() !$omp end parallel do end subroutine -!CHECK: omp.declare_reduction @add_member : !fir.ref> +!CHECK: omp.declare_reduction @_QQFtest_component_initadd_member : !fir.ref> !CHECK-SAME: alloc { !CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.type<_QFtest_component_initTt{member:i32}> !CHECK: omp.yield(%[[ALLOCA]] : !fir.ref>) diff --git a/flang/test/Lower/OpenMP/declare-reduction-initializer-defined-assign.f90 b/flang/test/Lower/OpenMP/declare-reduction-initializer-defined-assign.f90 index bdf48626fd2b3..366d28c47b706 100644 --- a/flang/test/Lower/OpenMP/declare-reduction-initializer-defined-assign.f90 +++ b/flang/test/Lower/OpenMP/declare-reduction-initializer-defined-assign.f90 @@ -41,7 +41,7 @@ subroutine test_defined_assign_init() !$omp end parallel do end subroutine -!CHECK: omp.declare_reduction @add_t : +!CHECK: omp.declare_reduction @_QQFtest_defined_assign_initadd_t : !CHECK-SAME: alloc { !CHECK: %[[ALLOCA:.*]] = fir.alloca !CHECK: omp.yield(%[[ALLOCA]] : diff --git a/flang/test/Lower/OpenMP/declare-reduction-initializer-rhs-call.f90 b/flang/test/Lower/OpenMP/declare-reduction-initializer-rhs-call.f90 index 7d409b27464e2..988219d65b1a6 100644 --- a/flang/test/Lower/OpenMP/declare-reduction-initializer-rhs-call.f90 +++ b/flang/test/Lower/OpenMP/declare-reduction-initializer-rhs-call.f90 @@ -36,7 +36,7 @@ subroutine test_rhs_call() !$omp end parallel do end subroutine -!CHECK: omp.declare_reduction @add_t : +!CHECK: omp.declare_reduction @_QQFtest_rhs_calladd_t : !CHECK-SAME: alloc { !CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.type<_QMmTt{member:i32}> !CHECK: omp.yield(%[[ALLOCA]] : diff --git a/flang/test/Lower/OpenMP/declare-reduction-no-initializer-intrinsic.f90 b/flang/test/Lower/OpenMP/declare-reduction-no-initializer-intrinsic.f90 index 225aa8ccd3aec..cb768d3b92744 100644 --- a/flang/test/Lower/OpenMP/declare-reduction-no-initializer-intrinsic.f90 +++ b/flang/test/Lower/OpenMP/declare-reduction-no-initializer-intrinsic.f90 @@ -3,7 +3,7 @@ ! Test declare reduction without initializer clause for intrinsic types. ! Without an initializer, the private variable should be zero-initialized. -! CHECK-DAG: omp.declare_reduction @char_max : !fir.ref> +! CHECK-DAG: omp.declare_reduction @_QQFchar_max : !fir.ref> ! CHECK: init { ! CHECK: %[[CHZERO:.*]] = fir.zero_bits !fir.char<1,10> ! CHECK: fir.store %[[CHZERO]] @@ -72,7 +72,7 @@ program test_no_init_intrinsic !$omp end parallel do ! Test fixed-length character reduction without initializer - ! CHECK: omp.wsloop {{.*}} reduction(byref @char_max + ! CHECK: omp.wsloop {{.*}} reduction(byref @_QQFchar_max !$omp parallel do reduction(char_max: s) do i = 1, 10 continue diff --git a/flang/test/Lower/OpenMP/declare-reduction-no-initializer-target-derived.f90 b/flang/test/Lower/OpenMP/declare-reduction-no-initializer-target-derived.f90 index b3931d6d26238..007a704a6962c 100644 --- a/flang/test/Lower/OpenMP/declare-reduction-no-initializer-target-derived.f90 +++ b/flang/test/Lower/OpenMP/declare-reduction-no-initializer-target-derived.f90 @@ -4,7 +4,7 @@ ! default component values, used in a target offload region. ! The init region must initialize components using the type's default values. -! CHECK-LABEL: omp.declare_reduction @add_pts +! CHECK-LABEL: omp.declare_reduction @_QQFadd_pts ! CHECK-SAME: : !fir.ref> ! CHECK: init { ! CHECK: ^bb0(%{{.*}}: !fir.ref>, diff --git a/flang/test/Lower/OpenMP/declare-reduction-same-name-different-scope.f90 b/flang/test/Lower/OpenMP/declare-reduction-same-name-different-scope.f90 new file mode 100644 index 0000000000000..066758ae746e9 --- /dev/null +++ b/flang/test/Lower/OpenMP/declare-reduction-same-name-different-scope.f90 @@ -0,0 +1,35 @@ +! Test that two user-defined reductions sharing the same name but declared in +! different scopes lower to distinct omp.declare_reduction operations, and that +! a reduction clause refers to the declaration visible in its own scope rather +! than one leaking in from another scope (issue #181270). + +!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s + +module m +contains + subroutine dummy +!$omp declare reduction (a:integer:omp_out=omp_out+omp_in) initializer(omp_priv=10000) + end subroutine dummy + + subroutine test +!$omp declare reduction (a:integer:omp_out=omp_out+omp_in) initializer(omp_priv=0) + integer::x1,i + x1=0 +!$omp parallel do reduction(a:x1) + do i=1,10 + x1=x1+1 + end do +!$omp end parallel do + end subroutine test +end module m + +! CHECK: omp.declare_reduction @[[TEST_RED:_QQMmFtesta]] : i32 init { +! CHECK: %[[C0:.*]] = arith.constant 0 : i32 +! CHECK: omp.yield(%[[C0]] : i32) + +! CHECK: omp.declare_reduction @[[DUMMY_RED:_QQMmFdummya]] : i32 init { +! CHECK: %[[C10000:.*]] = arith.constant 10000 : i32 +! CHECK: omp.yield(%[[C10000]] : i32) + +! CHECK-LABEL: func.func @_QMmPtest() +! CHECK: omp.wsloop {{.*}}reduction(@[[TEST_RED]] %{{.*}} -> %{{.*}} : !fir.ref) diff --git a/flang/test/Lower/OpenMP/declare-reduction-target-intrinsic.f90 b/flang/test/Lower/OpenMP/declare-reduction-target-intrinsic.f90 index b74133209bf01..43caea5e2dfd4 100644 --- a/flang/test/Lower/OpenMP/declare-reduction-target-intrinsic.f90 +++ b/flang/test/Lower/OpenMP/declare-reduction-target-intrinsic.f90 @@ -4,21 +4,21 @@ ! These should generate inline constant initialization (no runtime calls), ! so they work on GPU targets without requiring the device Fortran runtime. -! CHECK-LABEL: omp.declare_reduction @addc : complex init { +! CHECK-LABEL: omp.declare_reduction @_QQFaddc : complex init { ! CHECK: %[[CZERO:.*]] = fir.zero_bits complex ! CHECK: omp.yield(%[[CZERO]] : complex) ! CHECK: } combiner { ! CHECK: fir.addc ! CHECK: } -! CHECK-LABEL: omp.declare_reduction @addr : f32 init { +! CHECK-LABEL: omp.declare_reduction @_QQFaddr : f32 init { ! CHECK: %[[FZERO:.*]] = fir.zero_bits f32 ! CHECK: omp.yield(%[[FZERO]] : f32) ! CHECK: } combiner { ! CHECK: arith.addf ! CHECK: } -! CHECK-LABEL: omp.declare_reduction @addi : i32 init { +! CHECK-LABEL: omp.declare_reduction @_QQFaddi : i32 init { ! CHECK: %[[IZERO:.*]] = fir.zero_bits i32 ! CHECK: omp.yield(%[[IZERO]] : i32) ! CHECK: } combiner { @@ -26,16 +26,16 @@ ! CHECK: } ! CHECK: omp.target -! CHECK: omp.teams reduction(@addi -! CHECK: omp.wsloop reduction(@addi +! CHECK: omp.teams reduction(@_QQFaddi +! CHECK: omp.wsloop reduction(@_QQFaddi ! CHECK: omp.target -! CHECK: omp.teams reduction(@addr -! CHECK: omp.wsloop reduction(@addr +! CHECK: omp.teams reduction(@_QQFaddr +! CHECK: omp.wsloop reduction(@_QQFaddr ! CHECK: omp.target -! CHECK: omp.teams reduction(@addc -! CHECK: omp.wsloop reduction(@addc +! CHECK: omp.teams reduction(@_QQFaddc +! CHECK: omp.wsloop reduction(@_QQFaddc program test_target_named_reduction implicit none diff --git a/flang/test/Lower/OpenMP/omp-declare-reduction-combsub.f90 b/flang/test/Lower/OpenMP/omp-declare-reduction-combsub.f90 index 098b3f84aa2f3..ae1eb9747bd40 100644 --- a/flang/test/Lower/OpenMP/omp-declare-reduction-combsub.f90 +++ b/flang/test/Lower/OpenMP/omp-declare-reduction-combsub.f90 @@ -17,7 +17,7 @@ subroutine combine_me(out, in) integer out, in end subroutine combine_me end interface -!CHECK: omp.declare_reduction @red_add : i32 init { +!CHECK: omp.declare_reduction @_QQFfuncred_add : i32 init { !CHECK: ^bb0(%[[OMP_ORIG_ARG_I:.*]]: i32): !CHECK: %[[OMP_PRIV:.*]] = fir.alloca i32 !CHECK: %[[OMP_ORIG:.*]] = fir.alloca i32 diff --git a/flang/test/Lower/OpenMP/omp-declare-reduction-derivedtype.f90 b/flang/test/Lower/OpenMP/omp-declare-reduction-derivedtype.f90 index 1fea2aee64f69..b92f42e2e25de 100644 --- a/flang/test/Lower/OpenMP/omp-declare-reduction-derivedtype.f90 +++ b/flang/test/Lower/OpenMP/omp-declare-reduction-derivedtype.f90 @@ -41,7 +41,7 @@ function func(x, n, init) end function func end module maxtype_mod -!CHECK: omp.declare_reduction @red_add_max : !fir.ref<[[MAXTYPE:.*]]> {{.*}} alloc { +!CHECK: omp.declare_reduction @_QQMmaxtype_modFfuncred_add_max : !fir.ref<[[MAXTYPE:.*]]> {{.*}} alloc { !CHECK: %[[ALLOCA:.*]] = fir.alloca [[MAXTYPE:.*]] !CHECK: omp.yield(%[[ALLOCA]] : !fir.ref<[[MAXTYPE]]>) !CHECK: } init { diff --git a/flang/test/Lower/OpenMP/omp-declare-reduction-initsub.f90 b/flang/test/Lower/OpenMP/omp-declare-reduction-initsub.f90 index 4aacc7cb2efba..2f6e432a72b13 100644 --- a/flang/test/Lower/OpenMP/omp-declare-reduction-initsub.f90 +++ b/flang/test/Lower/OpenMP/omp-declare-reduction-initsub.f90 @@ -17,7 +17,7 @@ subroutine initme(x,n) integer x,n end subroutine initme end interface -!CHECK: omp.declare_reduction @red_add : i32 init { +!CHECK: omp.declare_reduction @_QQFfuncred_add : i32 init { !CHECK: ^bb0(%[[OMP_ORIG_ARG_I:.*]]: i32): !CHECK: %[[OMP_PRIV:.*]] = fir.alloca i32 !CHECK: %[[OMP_ORIG:.*]] = fir.alloca i32 diff --git a/flang/test/Lower/OpenMP/omp-declare-reduction.f90 b/flang/test/Lower/OpenMP/omp-declare-reduction.f90 index a41f6b214b9d8..73e3c28622a58 100644 --- a/flang/test/Lower/OpenMP/omp-declare-reduction.f90 +++ b/flang/test/Lower/OpenMP/omp-declare-reduction.f90 @@ -4,7 +4,7 @@ subroutine declare_red() integer :: my_var -!CHECK: omp.declare_reduction @my_red : i32 init { +!CHECK: omp.declare_reduction @_QQFdeclare_redmy_red : i32 init { !CHECK: ^bb0(%[[OMP_ORIG_ARG_I:.*]]: i32): !CHECK: %[[OMP_PRIV:.*]] = fir.alloca i32 !CHECK: %[[OMP_ORIG:.*]] = fir.alloca i32 diff --git a/flang/test/Semantics/OpenMP/declarative-directive01.f90 b/flang/test/Semantics/OpenMP/declarative-directive01.f90 index c213d0ae7a6f2..70c5618cc8760 100644 --- a/flang/test/Semantics/OpenMP/declarative-directive01.f90 +++ b/flang/test/Semantics/OpenMP/declarative-directive01.f90 @@ -6,6 +6,7 @@ subroutine requires_1(a) real(8), intent(inout) :: a + !WARNING: REVERSE_OFFLOAD clause is not supported and will be ignored !$omp requires reverse_offload, unified_shared_memory, atomic_default_mem_order(relaxed) a = a + 0.01 end subroutine requires_1 diff --git a/flang/test/Semantics/OpenMP/requires01.f90 b/flang/test/Semantics/OpenMP/requires01.f90 index 007135749cc82..cd39f06841f25 100644 --- a/flang/test/Semantics/OpenMP/requires01.f90 +++ b/flang/test/Semantics/OpenMP/requires01.f90 @@ -1,5 +1,6 @@ ! RUN: %python %S/../test_errors.py %s %flang -fopenmp +!WARNING: REVERSE_OFFLOAD clause is not supported and will be ignored !$omp requires reverse_offload unified_shared_memory !ERROR: NOWAIT clause is not allowed on the REQUIRES directive diff --git a/flang/test/Semantics/OpenMP/requires03.f90 b/flang/test/Semantics/OpenMP/requires03.f90 index 4a23a6a4105fe..699967d108040 100644 --- a/flang/test/Semantics/OpenMP/requires03.f90 +++ b/flang/test/Semantics/OpenMP/requires03.f90 @@ -12,6 +12,7 @@ end subroutine f subroutine g !ERROR: REQUIRES directive with 'DYNAMIC_ALLOCATORS' clause found lexically after device construct !$omp requires dynamic_allocators + !WARNING: REVERSE_OFFLOAD clause is not supported and will be ignored !ERROR: REQUIRES directive with 'REVERSE_OFFLOAD' clause found lexically after device construct !$omp requires reverse_offload !ERROR: REQUIRES directive with 'UNIFIED_ADDRESS' clause found lexically after device construct diff --git a/flang/test/Semantics/OpenMP/requires04.f90 b/flang/test/Semantics/OpenMP/requires04.f90 index a1647bc5db7a7..18790054e733d 100644 --- a/flang/test/Semantics/OpenMP/requires04.f90 +++ b/flang/test/Semantics/OpenMP/requires04.f90 @@ -14,6 +14,7 @@ end subroutine f subroutine g !ERROR: REQUIRES directive with 'DYNAMIC_ALLOCATORS' clause found lexically after device construct !$omp requires dynamic_allocators + !WARNING: REVERSE_OFFLOAD clause is not supported and will be ignored !ERROR: REQUIRES directive with 'REVERSE_OFFLOAD' clause found lexically after device construct !$omp requires reverse_offload !ERROR: REQUIRES directive with 'UNIFIED_ADDRESS' clause found lexically after device construct diff --git a/flang/test/Semantics/OpenMP/requires05.f90 b/flang/test/Semantics/OpenMP/requires05.f90 index ce9138ae94f7f..871d0c433685b 100644 --- a/flang/test/Semantics/OpenMP/requires05.f90 +++ b/flang/test/Semantics/OpenMP/requires05.f90 @@ -13,6 +13,7 @@ end subroutine f subroutine g !ERROR: REQUIRES directive with 'DYNAMIC_ALLOCATORS' clause found lexically after device construct !$omp requires dynamic_allocators + !WARNING: REVERSE_OFFLOAD clause is not supported and will be ignored !ERROR: REQUIRES directive with 'REVERSE_OFFLOAD' clause found lexically after device construct !$omp requires reverse_offload !ERROR: REQUIRES directive with 'UNIFIED_ADDRESS' clause found lexically after device construct diff --git a/flang/test/Semantics/OpenMP/requires06.f90 b/flang/test/Semantics/OpenMP/requires06.f90 index ba9bbf31b6e07..c41de68fc0f6a 100644 --- a/flang/test/Semantics/OpenMP/requires06.f90 +++ b/flang/test/Semantics/OpenMP/requires06.f90 @@ -11,6 +11,7 @@ end subroutine f subroutine g !ERROR: REQUIRES directive with 'DYNAMIC_ALLOCATORS' clause found lexically after device construct !$omp requires dynamic_allocators + !WARNING: REVERSE_OFFLOAD clause is not supported and will be ignored !ERROR: REQUIRES directive with 'REVERSE_OFFLOAD' clause found lexically after device construct !$omp requires reverse_offload !ERROR: REQUIRES directive with 'UNIFIED_ADDRESS' clause found lexically after device construct diff --git a/flang/test/Semantics/OpenMP/requires07.f90 b/flang/test/Semantics/OpenMP/requires07.f90 index 2a36b4def9199..a47def1518a99 100644 --- a/flang/test/Semantics/OpenMP/requires07.f90 +++ b/flang/test/Semantics/OpenMP/requires07.f90 @@ -12,6 +12,7 @@ end subroutine f subroutine g !ERROR: REQUIRES directive with 'DYNAMIC_ALLOCATORS' clause found lexically after device construct !$omp requires dynamic_allocators + !WARNING: REVERSE_OFFLOAD clause is not supported and will be ignored !ERROR: REQUIRES directive with 'REVERSE_OFFLOAD' clause found lexically after device construct !$omp requires reverse_offload !ERROR: REQUIRES directive with 'UNIFIED_ADDRESS' clause found lexically after device construct diff --git a/flang/test/Semantics/OpenMP/requires08.f90 b/flang/test/Semantics/OpenMP/requires08.f90 index 5f3b084078ccf..593e5e31c76ce 100644 --- a/flang/test/Semantics/OpenMP/requires08.f90 +++ b/flang/test/Semantics/OpenMP/requires08.f90 @@ -14,6 +14,7 @@ end subroutine f subroutine g !ERROR: REQUIRES directive with 'DYNAMIC_ALLOCATORS' clause found lexically after device construct !$omp requires dynamic_allocators + !WARNING: REVERSE_OFFLOAD clause is not supported and will be ignored !ERROR: REQUIRES directive with 'REVERSE_OFFLOAD' clause found lexically after device construct !$omp requires reverse_offload !ERROR: REQUIRES directive with 'UNIFIED_ADDRESS' clause found lexically after device construct diff --git a/flang/test/Semantics/OpenMP/requires10.f90 b/flang/test/Semantics/OpenMP/requires10.f90 index 9f9832da3726e..7c2bc10e60284 100644 --- a/flang/test/Semantics/OpenMP/requires10.f90 +++ b/flang/test/Semantics/OpenMP/requires10.f90 @@ -2,12 +2,14 @@ subroutine f00(x) logical :: x + !WARNING: REVERSE_OFFLOAD clause is not supported and will be ignored !ERROR: An argument to REVERSE_OFFLOAD is an OpenMP v6.0 feature, try -fopenmp-version=60 !ERROR: Must be a constant value !$omp requires reverse_offload(x) end subroutine f01 + !WARNING: REVERSE_OFFLOAD clause is not supported and will be ignored !WARNING: An argument to REVERSE_OFFLOAD is an OpenMP v6.0 feature, try -fopenmp-version=60 !$omp requires reverse_offload(.true.) end diff --git a/libcxx/include/__cstddef/byte.h b/libcxx/include/__cstddef/byte.h index 3d97db1bea293..295150fd1ead5 100644 --- a/libcxx/include/__cstddef/byte.h +++ b/libcxx/include/__cstddef/byte.h @@ -23,7 +23,7 @@ _LIBCPP_BEGIN_UNVERSIONED_NAMESPACE_STD enum class byte : unsigned char {}; -_LIBCPP_HIDE_FROM_ABI inline constexpr byte operator|(byte __lhs, byte __rhs) noexcept { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr byte operator|(byte __lhs, byte __rhs) noexcept { return static_cast( static_cast(static_cast(__lhs) | static_cast(__rhs))); } @@ -32,7 +32,7 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr byte& operator|=(byte& __lhs, byte __rhs) return __lhs = __lhs | __rhs; } -_LIBCPP_HIDE_FROM_ABI inline constexpr byte operator&(byte __lhs, byte __rhs) noexcept { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr byte operator&(byte __lhs, byte __rhs) noexcept { return static_cast( static_cast(static_cast(__lhs) & static_cast(__rhs))); } @@ -41,7 +41,7 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr byte& operator&=(byte& __lhs, byte __rhs) return __lhs = __lhs & __rhs; } -_LIBCPP_HIDE_FROM_ABI inline constexpr byte operator^(byte __lhs, byte __rhs) noexcept { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr byte operator^(byte __lhs, byte __rhs) noexcept { return static_cast( static_cast(static_cast(__lhs) ^ static_cast(__rhs))); } @@ -50,7 +50,7 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr byte& operator^=(byte& __lhs, byte __rhs) return __lhs = __lhs ^ __rhs; } -_LIBCPP_HIDE_FROM_ABI inline constexpr byte operator~(byte __b) noexcept { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline constexpr byte operator~(byte __b) noexcept { return static_cast(static_cast(~static_cast(__b))); } @@ -60,7 +60,7 @@ _LIBCPP_HIDE_FROM_ABI constexpr byte& operator<<=(byte& __lhs, _Integer __shift) } template ::value, int> = 0> -_LIBCPP_HIDE_FROM_ABI constexpr byte operator<<(byte __lhs, _Integer __shift) noexcept { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr byte operator<<(byte __lhs, _Integer __shift) noexcept { return static_cast(static_cast(static_cast(__lhs) << __shift)); } @@ -70,7 +70,7 @@ _LIBCPP_HIDE_FROM_ABI constexpr byte& operator>>=(byte& __lhs, _Integer __shift) } template ::value, int> = 0> -_LIBCPP_HIDE_FROM_ABI constexpr byte operator>>(byte __lhs, _Integer __shift) noexcept { +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr byte operator>>(byte __lhs, _Integer __shift) noexcept { return static_cast(static_cast(static_cast(__lhs) >> __shift)); } diff --git a/libcxx/include/map b/libcxx/include/map index c983a3ed07cd4..de0a475ee572f 100644 --- a/libcxx/include/map +++ b/libcxx/include/map @@ -1370,13 +1370,15 @@ public: __tree_.__node_handle_merge_unique(__source.__tree_); } template - _LIBCPP_HIDE_FROM_ABI void merge(multimap& __source) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void + merge(multimap& __source) { _LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR( __source.get_allocator() == get_allocator(), "merging container with incompatible allocator"); __tree_.__node_handle_merge_unique(__source.__tree_); } template - _LIBCPP_HIDE_FROM_ABI void merge(multimap&& __source) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void + merge(multimap&& __source) { _LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR( __source.get_allocator() == get_allocator(), "merging container with incompatible allocator"); __tree_.__node_handle_merge_unique(__source.__tree_); @@ -1739,10 +1741,11 @@ public: protected: key_compare comp; - _LIBCPP_HIDE_FROM_ABI value_compare(key_compare __c) : comp(__c) {} + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 value_compare(key_compare __c) : comp(__c) {} public: - _LIBCPP_HIDE_FROM_ABI bool operator()(const value_type& __x, const value_type& __y) const { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool + operator()(const value_type& __x, const value_type& __y) const { return comp(__x.first, __y.first); } }; @@ -1775,26 +1778,28 @@ public: template friend class multimap; - _LIBCPP_HIDE_FROM_ABI multimap() _NOEXCEPT_( + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 multimap() _NOEXCEPT_( is_nothrow_default_constructible::value&& is_nothrow_default_constructible::value&& is_nothrow_copy_constructible::value) : __tree_(__vc(key_compare())) {} - _LIBCPP_HIDE_FROM_ABI explicit multimap(const key_compare& __comp) _NOEXCEPT_( + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 explicit multimap(const key_compare& __comp) _NOEXCEPT_( is_nothrow_default_constructible::value&& is_nothrow_copy_constructible::value) : __tree_(__vc(__comp)) {} - _LIBCPP_HIDE_FROM_ABI explicit multimap(const key_compare& __comp, const allocator_type& __a) + _LIBCPP_HIDE_FROM_ABI + _LIBCPP_CONSTEXPR_SINCE_CXX26 explicit multimap(const key_compare& __comp, const allocator_type& __a) : __tree_(__vc(__comp), typename __base::allocator_type(__a)) {} template - _LIBCPP_HIDE_FROM_ABI multimap(_InputIterator __f, _InputIterator __l, const key_compare& __comp = key_compare()) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + multimap(_InputIterator __f, _InputIterator __l, const key_compare& __comp = key_compare()) : __tree_(__vc(__comp)) { insert(__f, __l); } template - _LIBCPP_HIDE_FROM_ABI + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 multimap(_InputIterator __f, _InputIterator __l, const key_compare& __comp, const allocator_type& __a) : __tree_(__vc(__comp), typename __base::allocator_type(__a)) { insert(__f, __l); @@ -1802,7 +1807,7 @@ public: # if _LIBCPP_STD_VER >= 23 template <_ContainerCompatibleRange _Range> - _LIBCPP_HIDE_FROM_ABI + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 multimap(from_range_t, _Range&& __range, const key_compare& __comp = key_compare(), @@ -1814,45 +1819,50 @@ public: # if _LIBCPP_STD_VER >= 14 template - _LIBCPP_HIDE_FROM_ABI multimap(_InputIterator __f, _InputIterator __l, const allocator_type& __a) + _LIBCPP_HIDE_FROM_ABI + _LIBCPP_CONSTEXPR_SINCE_CXX26 multimap(_InputIterator __f, _InputIterator __l, const allocator_type& __a) : multimap(__f, __l, key_compare(), __a) {} # endif # if _LIBCPP_STD_VER >= 23 template <_ContainerCompatibleRange _Range> - _LIBCPP_HIDE_FROM_ABI multimap(from_range_t, _Range&& __range, const allocator_type& __a) + _LIBCPP_HIDE_FROM_ABI + _LIBCPP_CONSTEXPR_SINCE_CXX26 multimap(from_range_t, _Range&& __range, const allocator_type& __a) : multimap(from_range, std::forward<_Range>(__range), key_compare(), __a) {} # endif - _LIBCPP_HIDE_FROM_ABI multimap(const multimap& __m) = default; + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 multimap(const multimap& __m) = default; - _LIBCPP_HIDE_FROM_ABI multimap& operator=(const multimap& __m) = default; + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 multimap& operator=(const multimap& __m) = default; # ifndef _LIBCPP_CXX03_LANG - _LIBCPP_HIDE_FROM_ABI multimap(multimap&& __m) = default; + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 multimap(multimap&& __m) = default; - _LIBCPP_HIDE_FROM_ABI multimap(multimap&& __m, const allocator_type& __a) : __tree_(std::move(__m.__tree_), __a) {} + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 multimap(multimap&& __m, const allocator_type& __a) + : __tree_(std::move(__m.__tree_), __a) {} - _LIBCPP_HIDE_FROM_ABI multimap& operator=(multimap&& __m) = default; + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 multimap& operator=(multimap&& __m) = default; - _LIBCPP_HIDE_FROM_ABI multimap(initializer_list __il, const key_compare& __comp = key_compare()) + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 + multimap(initializer_list __il, const key_compare& __comp = key_compare()) : __tree_(__vc(__comp)) { insert(__il.begin(), __il.end()); } - _LIBCPP_HIDE_FROM_ABI + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 multimap(initializer_list __il, const key_compare& __comp, const allocator_type& __a) : __tree_(__vc(__comp), typename __base::allocator_type(__a)) { insert(__il.begin(), __il.end()); } # if _LIBCPP_STD_VER >= 14 - _LIBCPP_HIDE_FROM_ABI multimap(initializer_list __il, const allocator_type& __a) + _LIBCPP_HIDE_FROM_ABI + _LIBCPP_CONSTEXPR_SINCE_CXX26 multimap(initializer_list __il, const allocator_type& __a) : multimap(__il, key_compare(), __a) {} # endif - _LIBCPP_HIDE_FROM_ABI multimap& operator=(initializer_list __il) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 multimap& operator=(initializer_list __il) { clear(); insert(__il.begin(), __il.end()); return *this; @@ -1860,193 +1870,247 @@ public: # endif // _LIBCPP_CXX03_LANG - _LIBCPP_HIDE_FROM_ABI explicit multimap(const allocator_type& __a) : __tree_(typename __base::allocator_type(__a)) {} + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 explicit multimap(const allocator_type& __a) + : __tree_(typename __base::allocator_type(__a)) {} - _LIBCPP_HIDE_FROM_ABI multimap(const multimap& __m, const allocator_type& __a) : __tree_(__m.__tree_, __a) {} + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 multimap(const multimap& __m, const allocator_type& __a) + : __tree_(__m.__tree_, __a) {} - _LIBCPP_HIDE_FROM_ABI ~multimap() { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 ~multimap() { static_assert(sizeof(std::__diagnose_non_const_comparator<_Key, _Compare>()), ""); } - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator begin() _NOEXCEPT { return __tree_.begin(); } - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator begin() const _NOEXCEPT { return __tree_.begin(); } - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator end() _NOEXCEPT { return __tree_.end(); } - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator end() const _NOEXCEPT { return __tree_.end(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator begin() _NOEXCEPT { + return __tree_.begin(); + } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator begin() const _NOEXCEPT { + return __tree_.begin(); + } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator end() _NOEXCEPT { + return __tree_.end(); + } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator end() const _NOEXCEPT { + return __tree_.end(); + } - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI reverse_iterator rbegin() _NOEXCEPT { return reverse_iterator(end()); } - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rbegin() const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 reverse_iterator rbegin() _NOEXCEPT { + return reverse_iterator(end()); + } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator + rbegin() const _NOEXCEPT { return const_reverse_iterator(end()); } - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI reverse_iterator rend() _NOEXCEPT { return reverse_iterator(begin()); } - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rend() const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 reverse_iterator rend() _NOEXCEPT { + return reverse_iterator(begin()); + } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator rend() const _NOEXCEPT { return const_reverse_iterator(begin()); } - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const _NOEXCEPT { return begin(); } - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator cend() const _NOEXCEPT { return end(); } - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crbegin() const _NOEXCEPT { return rbegin(); } - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const _NOEXCEPT { return rend(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator cbegin() const _NOEXCEPT { + return begin(); + } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator cend() const _NOEXCEPT { + return end(); + } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator + crbegin() const _NOEXCEPT { + return rbegin(); + } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_reverse_iterator crend() const _NOEXCEPT { + return rend(); + } - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const _NOEXCEPT { return __tree_.size() == 0; } - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __tree_.size(); } - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { return __tree_.max_size(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool empty() const _NOEXCEPT { + return __tree_.size() == 0; + } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type size() const _NOEXCEPT { + return __tree_.size(); + } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type max_size() const _NOEXCEPT { + return __tree_.max_size(); + } - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI allocator_type get_allocator() const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 allocator_type get_allocator() const _NOEXCEPT { return allocator_type(__tree_.__alloc()); } - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI key_compare key_comp() const { return __tree_.value_comp().key_comp(); } - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI value_compare value_comp() const { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 key_compare key_comp() const { + return __tree_.value_comp().key_comp(); + } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 value_compare value_comp() const { return value_compare(__tree_.value_comp().key_comp()); } # ifndef _LIBCPP_CXX03_LANG template - _LIBCPP_HIDE_FROM_ABI iterator emplace(_Args&&... __args) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator emplace(_Args&&... __args) { return __tree_.__emplace_multi(std::forward<_Args>(__args)...); } template - _LIBCPP_HIDE_FROM_ABI iterator emplace_hint(const_iterator __p, _Args&&... __args) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator emplace_hint(const_iterator __p, _Args&&... __args) { return __tree_.__emplace_hint_multi(__p.__i_, std::forward<_Args>(__args)...); } template ::value, int> = 0> - _LIBCPP_HIDE_FROM_ABI iterator insert(_Pp&& __p) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator insert(_Pp&& __p) { return __tree_.__emplace_multi(std::forward<_Pp>(__p)); } template ::value, int> = 0> - _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __pos, _Pp&& __p) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator insert(const_iterator __pos, _Pp&& __p) { return __tree_.__emplace_hint_multi(__pos.__i_, std::forward<_Pp>(__p)); } - _LIBCPP_HIDE_FROM_ABI iterator insert(value_type&& __v) { return __tree_.__emplace_multi(std::move(__v)); } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator insert(value_type&& __v) { + return __tree_.__emplace_multi(std::move(__v)); + } - _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __p, value_type&& __v) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator insert(const_iterator __p, value_type&& __v) { return __tree_.__emplace_hint_multi(__p.__i_, std::move(__v)); } - _LIBCPP_HIDE_FROM_ABI void insert(initializer_list __il) { insert(__il.begin(), __il.end()); } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void insert(initializer_list __il) { + insert(__il.begin(), __il.end()); + } # endif // _LIBCPP_CXX03_LANG - _LIBCPP_HIDE_FROM_ABI iterator insert(const value_type& __v) { return __tree_.__emplace_multi(__v); } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator insert(const value_type& __v) { + return __tree_.__emplace_multi(__v); + } - _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __p, const value_type& __v) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator insert(const_iterator __p, const value_type& __v) { return __tree_.__emplace_hint_multi(__p.__i_, __v); } template - _LIBCPP_HIDE_FROM_ABI void insert(_InputIterator __f, _InputIterator __l) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void insert(_InputIterator __f, _InputIterator __l) { __tree_.__insert_range_multi(__f, __l); } # if _LIBCPP_STD_VER >= 23 template <_ContainerCompatibleRange _Range> - _LIBCPP_HIDE_FROM_ABI void insert_range(_Range&& __range) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void insert_range(_Range&& __range) { __tree_.__insert_range_multi(ranges::begin(__range), ranges::end(__range)); } # endif - _LIBCPP_HIDE_FROM_ABI iterator erase(const_iterator __p) { return __tree_.erase(__p.__i_); } - _LIBCPP_HIDE_FROM_ABI iterator erase(iterator __p) { return __tree_.erase(__p.__i_); } - _LIBCPP_HIDE_FROM_ABI size_type erase(const key_type& __k) { return __tree_.__erase_multi(__k); } - _LIBCPP_HIDE_FROM_ABI iterator erase(const_iterator __f, const_iterator __l) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator erase(const_iterator __p) { + return __tree_.erase(__p.__i_); + } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator erase(iterator __p) { return __tree_.erase(__p.__i_); } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type erase(const key_type& __k) { + return __tree_.__erase_multi(__k); + } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator erase(const_iterator __f, const_iterator __l) { return __tree_.erase(__f.__i_, __l.__i_); } # if _LIBCPP_STD_VER >= 17 - _LIBCPP_HIDE_FROM_ABI iterator insert(node_type&& __nh) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator insert(node_type&& __nh) { _LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR(__nh.empty() || __nh.get_allocator() == get_allocator(), "node_type with incompatible allocator passed to multimap::insert()"); return __tree_.template __node_handle_insert_multi(std::move(__nh)); } - _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __hint, node_type&& __nh) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator insert(const_iterator __hint, node_type&& __nh) { _LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR(__nh.empty() || __nh.get_allocator() == get_allocator(), "node_type with incompatible allocator passed to multimap::insert()"); return __tree_.template __node_handle_insert_multi(__hint.__i_, std::move(__nh)); } - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI node_type extract(key_type const& __key) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 node_type extract(key_type const& __key) { return __tree_.template __node_handle_extract(__key); } - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI node_type extract(const_iterator __it) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 node_type extract(const_iterator __it) { return __tree_.template __node_handle_extract(__it.__i_); } template - _LIBCPP_HIDE_FROM_ABI void merge(multimap& __source) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void + merge(multimap& __source) { _LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR( __source.get_allocator() == get_allocator(), "merging container with incompatible allocator"); return __tree_.__node_handle_merge_multi(__source.__tree_); } template - _LIBCPP_HIDE_FROM_ABI void merge(multimap&& __source) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void + merge(multimap&& __source) { _LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR( __source.get_allocator() == get_allocator(), "merging container with incompatible allocator"); return __tree_.__node_handle_merge_multi(__source.__tree_); } template - _LIBCPP_HIDE_FROM_ABI void merge(map& __source) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void + merge(map& __source) { _LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR( __source.get_allocator() == get_allocator(), "merging container with incompatible allocator"); return __tree_.__node_handle_merge_multi(__source.__tree_); } template - _LIBCPP_HIDE_FROM_ABI void merge(map&& __source) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void + merge(map&& __source) { _LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR( __source.get_allocator() == get_allocator(), "merging container with incompatible allocator"); return __tree_.__node_handle_merge_multi(__source.__tree_); } # endif - _LIBCPP_HIDE_FROM_ABI void clear() _NOEXCEPT { __tree_.clear(); } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void clear() _NOEXCEPT { __tree_.clear(); } - _LIBCPP_HIDE_FROM_ABI void swap(multimap& __m) _NOEXCEPT_(__is_nothrow_swappable_v<__base>) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void swap(multimap& __m) + _NOEXCEPT_(__is_nothrow_swappable_v<__base>) { __tree_.swap(__m.__tree_); } - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator find(const key_type& __k) { return __tree_.find(__k); } - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator find(const key_type& __k) const { return __tree_.find(__k); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator find(const key_type& __k) { + return __tree_.find(__k); + } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator find(const key_type& __k) const { + return __tree_.find(__k); + } # if _LIBCPP_STD_VER >= 14 template || __is_transparently_comparable_v<_Comp, key_type, _K2>, int> = 0> - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator find(const _K2& __k) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator find(const _K2& __k) { return __tree_.find(__k); } template || __is_transparently_comparable_v<_Comp, key_type, _K2>, int> = 0> - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator find(const _K2& __k) const { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator find(const _K2& __k) const { return __tree_.find(__k); } # endif - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type count(const key_type& __k) const { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type count(const key_type& __k) const { return __tree_.__count_multi(__k); } # if _LIBCPP_STD_VER >= 14 template , int> = 0> - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_type count(const _K2& __k) const { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 size_type count(const _K2& __k) const { return __tree_.__count_multi(__k); } # endif # if _LIBCPP_STD_VER >= 20 - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool contains(const key_type& __k) const { return find(__k) != end(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool contains(const key_type& __k) const { + return find(__k) != end(); + } template || __is_transparently_comparable_v<_Comp, key_type, _K2>, int> = 0> - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool contains(const _K2& __k) const { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool contains(const _K2& __k) const { return find(__k) != end(); } # endif // _LIBCPP_STD_VER >= 20 - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const key_type& __k) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator lower_bound(const key_type& __k) { return __tree_.__lower_bound_multi(__k); } - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const key_type& __k) const { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator + lower_bound(const key_type& __k) const { return __tree_.__lower_bound_multi(__k); } @@ -2054,23 +2118,25 @@ public: template || __is_transparently_comparable_v<_Comp, key_type, _K2>, int> = 0> - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const _K2& __k) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator lower_bound(const _K2& __k) { return __tree_.__lower_bound_multi(__k); } template || __is_transparently_comparable_v<_Comp, key_type, _K2>, int> = 0> - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const _K2& __k) const { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator + lower_bound(const _K2& __k) const { return __tree_.__lower_bound_multi(__k); } # endif - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const key_type& __k) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator upper_bound(const key_type& __k) { return __tree_.__upper_bound_multi(__k); } - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const key_type& __k) const { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator + upper_bound(const key_type& __k) const { return __tree_.__upper_bound_multi(__k); } @@ -2078,30 +2144,35 @@ public: template || __is_transparently_comparable_v<_Comp, key_type, _K2>, int> = 0> - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const _K2& __k) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator upper_bound(const _K2& __k) { return __tree_.__upper_bound_multi(__k); } template || __is_transparently_comparable_v<_Comp, key_type, _K2>, int> = 0> - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const _K2& __k) const { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator + upper_bound(const _K2& __k) const { return __tree_.__upper_bound_multi(__k); } # endif - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI pair equal_range(const key_type& __k) { + [[__nodiscard__]] + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair equal_range(const key_type& __k) { return __tree_.__equal_range_multi(__k); } - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI pair equal_range(const key_type& __k) const { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair + equal_range(const key_type& __k) const { return __tree_.__equal_range_multi(__k); } # if _LIBCPP_STD_VER >= 14 template , int> = 0> - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI pair equal_range(const _K2& __k) { + [[__nodiscard__]] + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair equal_range(const _K2& __k) { return __tree_.__equal_range_multi(__k); } template , int> = 0> - [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI pair equal_range(const _K2& __k) const { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 pair + equal_range(const _K2& __k) const { return __tree_.__equal_range_multi(__k); } # endif @@ -2175,7 +2246,7 @@ struct __specialized_algorithm<_Algorithm::__for_each, __single_range - _LIBCPP_HIDE_FROM_ABI static auto operator()(_Map&& __map, _Func __func, _Proj __proj) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 static auto operator()(_Map&& __map, _Func __func, _Proj __proj) { auto [_, __func2] = __specialized_algorithm<_Algorithm::__for_each, __single_range>()( __map.__tree_, std::move(__func), std::move(__proj)); return std::make_pair(__map.end(), std::move(__func2)); @@ -2184,7 +2255,7 @@ struct __specialized_algorithm<_Algorithm::__for_each, __single_range -inline _LIBCPP_HIDE_FROM_ABI bool +inline _LIBCPP_HIDE_FROM_ABI bool _LIBCPP_CONSTEXPR_SINCE_CXX26 operator==(const multimap<_Key, _Tp, _Compare, _Allocator>& __x, const multimap<_Key, _Tp, _Compare, _Allocator>& __y) { return __x.size() == __y.size() && std::equal(__x.begin(), __x.end(), __y.begin()); } @@ -2192,31 +2263,31 @@ operator==(const multimap<_Key, _Tp, _Compare, _Allocator>& __x, const multimap< # if _LIBCPP_STD_VER <= 17 template -inline _LIBCPP_HIDE_FROM_ABI bool +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool operator<(const multimap<_Key, _Tp, _Compare, _Allocator>& __x, const multimap<_Key, _Tp, _Compare, _Allocator>& __y) { return std::lexicographical_compare(__x.begin(), __x.end(), __y.begin(), __y.end()); } template -inline _LIBCPP_HIDE_FROM_ABI bool +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool operator!=(const multimap<_Key, _Tp, _Compare, _Allocator>& __x, const multimap<_Key, _Tp, _Compare, _Allocator>& __y) { return !(__x == __y); } template -inline _LIBCPP_HIDE_FROM_ABI bool +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool operator>(const multimap<_Key, _Tp, _Compare, _Allocator>& __x, const multimap<_Key, _Tp, _Compare, _Allocator>& __y) { return __y < __x; } template -inline _LIBCPP_HIDE_FROM_ABI bool +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool operator>=(const multimap<_Key, _Tp, _Compare, _Allocator>& __x, const multimap<_Key, _Tp, _Compare, _Allocator>& __y) { return !(__x < __y); } template -inline _LIBCPP_HIDE_FROM_ABI bool +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool operator<=(const multimap<_Key, _Tp, _Compare, _Allocator>& __x, const multimap<_Key, _Tp, _Compare, _Allocator>& __y) { return !(__y < __x); } @@ -2224,7 +2295,7 @@ operator<=(const multimap<_Key, _Tp, _Compare, _Allocator>& __x, const multimap< # else // #if _LIBCPP_STD_VER <= 17 template -_LIBCPP_HIDE_FROM_ABI __synth_three_way_result> +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 __synth_three_way_result> operator<=>(const multimap<_Key, _Tp, _Compare, _Allocator>& __x, const multimap<_Key, _Tp, _Compare, _Allocator>& __y) { return std::lexicographical_compare_three_way(__x.begin(), __x.end(), __y.begin(), __y.end(), __synth_three_way); @@ -2233,7 +2304,7 @@ operator<=>(const multimap<_Key, _Tp, _Compare, _Allocator>& __x, # endif // #if _LIBCPP_STD_VER <= 17 template -inline _LIBCPP_HIDE_FROM_ABI void +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void swap(multimap<_Key, _Tp, _Compare, _Allocator>& __x, multimap<_Key, _Tp, _Compare, _Allocator>& __y) _NOEXCEPT_(_NOEXCEPT_(__x.swap(__y))) { __x.swap(__y); @@ -2241,7 +2312,7 @@ swap(multimap<_Key, _Tp, _Compare, _Allocator>& __x, multimap<_Key, _Tp, _Compar # if _LIBCPP_STD_VER >= 20 template -inline _LIBCPP_HIDE_FROM_ABI typename multimap<_Key, _Tp, _Compare, _Allocator>::size_type +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 typename multimap<_Key, _Tp, _Compare, _Allocator>::size_type erase_if(multimap<_Key, _Tp, _Compare, _Allocator>& __c, _Predicate __pred) { return std::__libcpp_erase_if_container(__c, __pred); } diff --git a/libcxx/include/string b/libcxx/include/string index a201d2ca44354..e5bc854cd34f3 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -1003,15 +1003,9 @@ public: } # endif // _LIBCPP_CXX03_LANG - template <__enable_if_t<__is_allocator_v<_Allocator>, int> = 0> - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string(const _CharT* _LIBCPP_DIAGNOSE_NULLPTR __s) { - _LIBCPP_ASSERT_NON_NULL(__s != nullptr, "basic_string(const char*) detected nullptr"); - __init(__s, traits_type::length(__s)); - } - template <__enable_if_t<__is_allocator_v<_Allocator>, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 - basic_string(const _CharT* _LIBCPP_DIAGNOSE_NULLPTR __s, const _Allocator& __a) + basic_string(const _CharT* _LIBCPP_DIAGNOSE_NULLPTR __s, const _Allocator& __a = _Allocator()) : __alloc_(__a) { _LIBCPP_ASSERT_NON_NULL(__s != nullptr, "basic_string(const char*, allocator) detected nullptr"); __init(__s, traits_type::length(__s)); @@ -1021,22 +1015,14 @@ public: basic_string(nullptr_t) = delete; # endif - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string(const _CharT* __s, size_type __n) - _LIBCPP_DIAGNOSE_NULLPTR_IF(__n != 0 && __s == nullptr, " if n is not zero") { - _LIBCPP_ASSERT_NON_NULL(__n == 0 || __s != nullptr, "basic_string(const char*, n) detected nullptr"); - __init(__s, __n); - } - - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 - basic_string(const _CharT* __s, size_type __n, const _Allocator& __a) + _LIBCPP_HIDE_FROM_ABI + _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string(const _CharT* __s, size_type __n, const _Allocator& __a = _Allocator()) _LIBCPP_DIAGNOSE_NULLPTR_IF(__n != 0 && __s == nullptr, " if n is not zero") : __alloc_(__a) { _LIBCPP_ASSERT_NON_NULL(__n == 0 || __s != nullptr, "basic_string(const char*, n, allocator) detected nullptr"); __init(__s, __n); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string(size_type __n, _CharT __c) { __init(__n, __c); } - # if _LIBCPP_STD_VER >= 23 _LIBCPP_HIDE_FROM_ABI constexpr basic_string( basic_string&& __str, size_type __pos, const _Allocator& __alloc = _Allocator()) @@ -1059,7 +1045,8 @@ public: # endif template <__enable_if_t<__is_allocator_v<_Allocator>, int> = 0> - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string(size_type __n, _CharT __c, const _Allocator& __a) + _LIBCPP_HIDE_FROM_ABI + _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string(size_type __n, _CharT __c, const _Allocator& __a = _Allocator()) : __alloc_(__a) { __init(__n, __c); } @@ -1098,29 +1085,16 @@ public: __enable_if_t<__can_be_converted_to_string_view_v<_CharT, _Traits, _Tp> && !is_same<__remove_cvref_t<_Tp>, basic_string>::value, int> = 0> - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 explicit basic_string(const _Tp& __t) { - __self_view __sv = __t; - __init(__sv.data(), __sv.size()); - } - - template && - !is_same<__remove_cvref_t<_Tp>, basic_string>::value, - int> = 0> - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 explicit basic_string(const _Tp& __t, const allocator_type& __a) + _LIBCPP_HIDE_FROM_ABI + _LIBCPP_CONSTEXPR_SINCE_CXX20 explicit basic_string(const _Tp& __t, const allocator_type& __a = allocator_type()) : __alloc_(__a) { __self_view __sv = __t; __init(__sv.data(), __sv.size()); } - template ::value, int> = 0> - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string(_InputIterator __first, _InputIterator __last) { - __init(__first, __last); - } - template ::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 - basic_string(_InputIterator __first, _InputIterator __last, const allocator_type& __a) + basic_string(_InputIterator __first, _InputIterator __last, const allocator_type& __a = allocator_type()) : __alloc_(__a) { __init(__first, __last); } @@ -1139,11 +1113,8 @@ public: # endif # ifndef _LIBCPP_CXX03_LANG - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string(initializer_list<_CharT> __il) { - __init(__il.begin(), __il.end()); - } - - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string(initializer_list<_CharT> __il, const _Allocator& __a) + _LIBCPP_HIDE_FROM_ABI + _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string(initializer_list<_CharT> __il, const _Allocator& __a = _Allocator()) : __alloc_(__a) { __init(__il.begin(), __il.end()); } diff --git a/libcxx/include/string.h b/libcxx/include/string.h index 6bdcd6a6eecbd..2ffcb913010b6 100644 --- a/libcxx/include/string.h +++ b/libcxx/include/string.h @@ -64,6 +64,8 @@ size_t strlen(const char* s); # include_next # endif +# include + // MSVCRT, GNU libc and its derivates may already have the correct prototype in // . This macro can be defined by users if their C library provides // the right signature. diff --git a/libcxx/test/libcxx/concepts/concepts.arithmetic/__libcpp_signed_integer.compile.pass.cpp b/libcxx/test/libcxx/concepts/concepts.arithmetic/__libcpp_signed_integer.compile.pass.cpp index 1f2d9685bbe5a..524b22cc4bef3 100644 --- a/libcxx/test/libcxx/concepts/concepts.arithmetic/__libcpp_signed_integer.compile.pass.cpp +++ b/libcxx/test/libcxx/concepts/concepts.arithmetic/__libcpp_signed_integer.compile.pass.cpp @@ -79,7 +79,7 @@ static_assert(!std::__signed_integer); static_assert(!std::__signed_integer); // Extended signed integer types per [basic.fundamental]/p3 Note 1. -#if TEST_HAS_EXTENSION(bit_int) +#if TEST_HAS_BITINT static_assert(std::__signed_integer); static_assert(std::__signed_integer); static_assert(std::__signed_integer); diff --git a/libcxx/test/libcxx/concepts/concepts.arithmetic/__libcpp_unsigned_integer.compile.pass.cpp b/libcxx/test/libcxx/concepts/concepts.arithmetic/__libcpp_unsigned_integer.compile.pass.cpp index 3f78f170b7038..234cc56f1697d 100644 --- a/libcxx/test/libcxx/concepts/concepts.arithmetic/__libcpp_unsigned_integer.compile.pass.cpp +++ b/libcxx/test/libcxx/concepts/concepts.arithmetic/__libcpp_unsigned_integer.compile.pass.cpp @@ -79,7 +79,7 @@ static_assert(!std::__unsigned_integer); static_assert(!std::__unsigned_integer); // Extended unsigned integer types per [basic.fundamental]/p3 Note 1. -#if TEST_HAS_EXTENSION(bit_int) +#if TEST_HAS_BITINT static_assert(std::__unsigned_integer); static_assert(std::__unsigned_integer); static_assert(std::__unsigned_integer); diff --git a/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp b/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp index 1fa7e5a5ceb7f..b720ef5218d1a 100644 --- a/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp +++ b/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp @@ -41,6 +41,7 @@ #include #include #include +#include #include "test_macros.h" @@ -295,6 +296,17 @@ void tuple_test() { "empty std::tuple"); } +void pair_test() { + std::pair ints(1, 2); + ComparePrettyPrintToChars(ints, "{first = 1, second = 2}"); + + std::pair mixed("hello", 42); + ComparePrettyPrintToChars(mixed, "{first = \"hello\", second = 42}"); + + std::pair> nested(1, {2, 3}); + ComparePrettyPrintToChars(nested, "{first = 1, second = {first = 2, second = 3}}"); +} + void unique_ptr_test() { std::unique_ptr matilda(new std::string("Matilda")); ComparePrettyPrintToRegex( @@ -747,6 +759,7 @@ int main(int, char**) { //u16string_test(); u32string_test(); tuple_test(); + pair_test(); unique_ptr_test(); shared_ptr_test(); bitset_test(); diff --git a/libcxx/test/libcxx/language.support/nodiscard.verify.cpp b/libcxx/test/libcxx/language.support/nodiscard.verify.cpp index a5ac8b6cfc8e9..b247033516621 100644 --- a/libcxx/test/libcxx/language.support/nodiscard.verify.cpp +++ b/libcxx/test/libcxx/language.support/nodiscard.verify.cpp @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -86,6 +87,29 @@ void test() { } #endif +#if TEST_STD_VER >= 17 + { // + std::byte b{0}; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + b | b; + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + b & b; + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + b ^ b; + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + ~b; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + b << 1; + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + b >> 1; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + std::to_integer(b); + } +#endif + { // { std::bad_exception bex; diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/for_each.associative.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/for_each.associative.pass.cpp index 0fcd3ab27635a..645e1e4af792e 100644 --- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/for_each.associative.pass.cpp +++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/for_each.associative.pass.cpp @@ -78,9 +78,7 @@ TEST_CONSTEXPR_CXX26 bool test() { if (!TEST_IS_CONSTANT_EVALUATED) test_node_container >([](int i) { return i; }); test_node_container >([](int i) { return std::make_pair(i, i); }); - // FIXME: remove when multimap is made constexpr - if (!TEST_IS_CONSTANT_EVALUATED) - test_node_container >([](int i) { return std::make_pair(i, i); }); + test_node_container >([](int i) { return std::make_pair(i, i); }); return true; } diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.associative.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.associative.pass.cpp index 0a1bbe024cffa..e0186654e4bbc 100644 --- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.associative.pass.cpp +++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.associative.pass.cpp @@ -258,11 +258,9 @@ TEST_CONSTEXPR_CXX26 bool test() { // FIXME: remove when multiset is made constexpr test_node_container >([](int i) { return i; }); - - // FIXME: remove when multimap is made constexpr - test_node_container >([](int i) { return std::make_pair(i, i); }); } test_node_container >([](int i) { return std::make_pair(i, i); }); + test_node_container >([](int i) { return std::make_pair(i, i); }); if (!TEST_IS_CONSTANT_EVALUATED) { // FIXME: remove when set is made constexpr @@ -270,12 +268,10 @@ TEST_CONSTEXPR_CXX26 bool test() { // FIXME: remove when multiset is made constexpr test_invoke_set_like(); - - // FIXME: remove when multimap is made constexpr - test_invoke_map_like(); } test_invoke_map_like(); + test_invoke_map_like(); return true; } diff --git a/libcxx/test/std/containers/associative/map/map.cons/move_alloc.pass.cpp b/libcxx/test/std/containers/associative/map/map.cons/move_alloc.pass.cpp index 169639f5afa78..a7bdbea189f8f 100644 --- a/libcxx/test/std/containers/associative/map/map.cons/move_alloc.pass.cpp +++ b/libcxx/test/std/containers/associative/map/map.cons/move_alloc.pass.cpp @@ -161,6 +161,7 @@ int main(int, char**) { #if TEST_STD_VER >= 26 // FIXME: It is not yet possible to replace a `const MoveOnly` key subobject during constant evaluation. + // See https://github.com/llvm/llvm-project/issues/204617. // static_assert(test_move_alloc()); static_assert(test_move_alloc()); #endif diff --git a/libcxx/test/std/containers/associative/map/map.cons/move_assign.pass.cpp b/libcxx/test/std/containers/associative/map/map.cons/move_assign.pass.cpp index 4fffb2fe04300..eeea47fc94d18 100644 --- a/libcxx/test/std/containers/associative/map/map.cons/move_assign.pass.cpp +++ b/libcxx/test/std/containers/associative/map/map.cons/move_assign.pass.cpp @@ -107,6 +107,7 @@ int main(int, char**) { #if TEST_STD_VER >= 26 // FIXME: It is not yet possible to replace a `const MoveOnly` key subobject during constant evaluation. + // See https://github.com/llvm/llvm-project/issues/204617. // static_assert(test_move_assign()); static_assert(test_move_assign()); #endif diff --git a/libcxx/test/std/containers/associative/map/map.modifiers/merge.pass.cpp b/libcxx/test/std/containers/associative/map/map.modifiers/merge.pass.cpp index c46234a8ff7fc..c3f8f266a929b 100644 --- a/libcxx/test/std/containers/associative/map/map.modifiers/merge.pass.cpp +++ b/libcxx/test/std/containers/associative/map/map.modifiers/merge.pass.cpp @@ -132,7 +132,7 @@ bool test() { first.merge(second); first.merge(std::move(second)); } - if (!TEST_IS_CONSTANT_EVALUATED) { + { std::multimap second; first.merge(second); first.merge(std::move(second)); diff --git a/libcxx/test/std/containers/associative/map/map.ops/contains.pass.cpp b/libcxx/test/std/containers/associative/map/map.ops/contains.pass.cpp index 64ccb401e134c..e93b12bd71370 100644 --- a/libcxx/test/std/containers/associative/map/map.ops/contains.pass.cpp +++ b/libcxx/test/std/containers/associative/map/map.ops/contains.pass.cpp @@ -49,8 +49,7 @@ TEST_CONSTEXPR_CXX26 bool test() { test, std::pair >( -1, std::make_pair(1, E{}), std::make_pair(2, E{}), std::make_pair(3, E{}), std::make_pair(4, E{})); } - // FIXME: remove when multimap is made constexpr - if (!TEST_IS_CONSTANT_EVALUATED) { + { test, std::pair >( 'e', std::make_pair('a', 10), std::make_pair('b', 11), std::make_pair('c', 12), std::make_pair('d', 13)); diff --git a/libcxx/test/std/containers/associative/map/map.ops/contains_transparent.pass.cpp b/libcxx/test/std/containers/associative/map/map.ops/contains_transparent.pass.cpp index 778bd312469a9..4b9038f7121d6 100644 --- a/libcxx/test/std/containers/associative/map/map.ops/contains_transparent.pass.cpp +++ b/libcxx/test/std/containers/associative/map/map.ops/contains_transparent.pass.cpp @@ -41,11 +41,7 @@ TEST_CONSTEXPR_CXX26 bool test() { TEST_CONSTEXPR_CXX26 bool test() { test, int, Comp> >(); - - // FIXME: remove when multimap is made constexpr - if (!TEST_IS_CONSTANT_EVALUATED) { - test, int, Comp> >(); - } + test, int, Comp> >(); return true; } diff --git a/libcxx/test/std/containers/associative/multimap/empty.pass.cpp b/libcxx/test/std/containers/associative/multimap/empty.pass.cpp index c183cc385a916..bb7b6624b2991 100644 --- a/libcxx/test/std/containers/associative/multimap/empty.pass.cpp +++ b/libcxx/test/std/containers/associative/multimap/empty.pass.cpp @@ -10,7 +10,7 @@ // class multimap -// bool empty() const; +// bool empty() const; // constexpr since C++26 #include #include @@ -18,7 +18,8 @@ #include "test_macros.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 +bool test() { { typedef std::multimap M; M m; @@ -40,5 +41,14 @@ int main(int, char**) { } #endif + return true; +} + +int main(int, char**) { + test(); + +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif return 0; } diff --git a/libcxx/test/std/containers/associative/multimap/get_allocator.pass.cpp b/libcxx/test/std/containers/associative/multimap/get_allocator.pass.cpp index 102dd0b5a36cc..92c128a9e5010 100644 --- a/libcxx/test/std/containers/associative/multimap/get_allocator.pass.cpp +++ b/libcxx/test/std/containers/associative/multimap/get_allocator.pass.cpp @@ -10,7 +10,7 @@ // class multimap -// allocator_type get_allocator() const +// allocator_type get_allocator() const // constexpr since C++26 #include #include @@ -19,7 +19,8 @@ #include "test_allocator.h" #include "test_macros.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 +bool test() { typedef std::pair ValueType; { std::allocator alloc; @@ -32,5 +33,14 @@ int main(int, char**) { assert(m.get_allocator() == alloc); } + return true; +} + +int main(int, char**) { + test(); + +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif return 0; } diff --git a/libcxx/test/std/containers/associative/multimap/incomplete_type.pass.cpp b/libcxx/test/std/containers/associative/multimap/incomplete_type.pass.cpp index 470275aea064b..3bcd40cad9838 100644 --- a/libcxx/test/std/containers/associative/multimap/incomplete_type.pass.cpp +++ b/libcxx/test/std/containers/associative/multimap/incomplete_type.pass.cpp @@ -26,11 +26,22 @@ struct A { inline bool operator==(A const& L, A const& R) { return &L == &R; } inline bool operator<(A const& L, A const& R) { return L.data < R.data; } -int main(int, char**) { + +TEST_CONSTEXPR_CXX26 +bool test() { A a; // Make sure that the allocator isn't rebound to and incomplete type std::multimap, complete_type_allocator > > m; + return true; +} + +int main(int, char**) { + test(); + +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif return 0; } diff --git a/libcxx/test/std/containers/associative/multimap/iterator.pass.cpp b/libcxx/test/std/containers/associative/multimap/iterator.pass.cpp index ffdc39ff35563..dfac6418fcde1 100644 --- a/libcxx/test/std/containers/associative/multimap/iterator.pass.cpp +++ b/libcxx/test/std/containers/associative/multimap/iterator.pass.cpp @@ -10,20 +10,20 @@ // class multimap -// iterator begin(); -// const_iterator begin() const; -// iterator end(); -// const_iterator end() const; +// iterator begin(); // constexpr since C++26 +// const_iterator begin() const; // constexpr since C++26 +// iterator end(); // constexpr since C++26 +// const_iterator end() const; // constexpr since C++26 // -// reverse_iterator rbegin(); -// const_reverse_iterator rbegin() const; -// reverse_iterator rend(); -// const_reverse_iterator rend() const; +// reverse_iterator rbegin(); // constexpr since C++26 +// const_reverse_iterator rbegin() const; // constexpr since C++26 +// reverse_iterator rend(); // constexpr since C++26 +// const_reverse_iterator rend() const; // constexpr since C++26 // -// const_iterator cbegin() const; -// const_iterator cend() const; -// const_reverse_iterator crbegin() const; -// const_reverse_iterator crend() const; +// const_iterator cbegin() const; // constexpr since C++26 +// const_iterator cend() const; // constexpr since C++26 +// const_reverse_iterator crbegin() const; // constexpr since C++26 +// const_reverse_iterator crend() const; // constexpr since C++26 #include #include @@ -32,7 +32,8 @@ #include "test_macros.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 +bool test() { { typedef std::pair V; V ar[] = {V(1, 1), V(1, 1.5), V(1, 2), V(2, 1), V(2, 1.5), V(2, 2), V(3, 1), V(3, 1.5), @@ -165,5 +166,14 @@ int main(int, char**) { } #endif + return true; +} + +int main(int, char**) { + test(); + +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif return 0; } diff --git a/libcxx/test/std/containers/associative/multimap/max_size.pass.cpp b/libcxx/test/std/containers/associative/multimap/max_size.pass.cpp index c6208d27336b5..4d78bdaf99625 100644 --- a/libcxx/test/std/containers/associative/multimap/max_size.pass.cpp +++ b/libcxx/test/std/containers/associative/multimap/max_size.pass.cpp @@ -10,7 +10,7 @@ // class multimap -// size_type max_size() const; +// size_type max_size() const; // constexpr since C++26 #include #include @@ -20,7 +20,8 @@ #include "test_allocator.h" #include "test_macros.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 +bool test() { typedef std::pair KV; { typedef limited_allocator A; @@ -45,5 +46,14 @@ int main(int, char**) { assert(c.max_size() <= alloc_max_size(c.get_allocator())); } + return true; +} + +int main(int, char**) { + test(); + +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif return 0; } diff --git a/libcxx/test/std/containers/associative/multimap/multimap.cons/alloc.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.cons/alloc.pass.cpp index 10184633a82de..1d7af0ab1ffff 100644 --- a/libcxx/test/std/containers/associative/multimap/multimap.cons/alloc.pass.cpp +++ b/libcxx/test/std/containers/associative/multimap/multimap.cons/alloc.pass.cpp @@ -10,7 +10,7 @@ // class multimap -// explicit multimap(const allocator_type& a); +// explicit multimap(const allocator_type& a); // constexpr since C++26 #include #include @@ -19,7 +19,8 @@ #include "test_allocator.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 +bool test() { { typedef std::less C; typedef test_allocator > A; @@ -47,5 +48,14 @@ int main(int, char**) { } #endif + return true; +} + +int main(int, char**) { + test(); + +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif return 0; } diff --git a/libcxx/test/std/containers/associative/multimap/multimap.cons/assign_initializer_list.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.cons/assign_initializer_list.pass.cpp index d1de8fab172cf..1c806c31f335c 100644 --- a/libcxx/test/std/containers/associative/multimap/multimap.cons/assign_initializer_list.pass.cpp +++ b/libcxx/test/std/containers/associative/multimap/multimap.cons/assign_initializer_list.pass.cpp @@ -12,7 +12,7 @@ // class multimap -// multimap& operator=(initializer_list il); +// multimap& operator=(initializer_list il); // constexpr since C++26 #include #include @@ -20,7 +20,8 @@ #include "test_macros.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 +bool test() { { typedef std::multimap C; typedef C::value_type V; @@ -58,5 +59,14 @@ int main(int, char**) { assert(*++i == V(3, 2)); } + return true; +} + +int main(int, char**) { + test(); + +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif return 0; } diff --git a/libcxx/test/std/containers/associative/multimap/multimap.cons/compare.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.cons/compare.pass.cpp index 84584a427ead0..d9a2257dbacfc 100644 --- a/libcxx/test/std/containers/associative/multimap/multimap.cons/compare.pass.cpp +++ b/libcxx/test/std/containers/associative/multimap/multimap.cons/compare.pass.cpp @@ -10,7 +10,7 @@ // class multimap -// explicit multimap(const key_compare& comp); +// explicit multimap(const key_compare& comp); // constexpr since C++26 #include #include @@ -19,7 +19,8 @@ #include "../../../test_compare.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 +bool test() { { typedef test_less C; const std::multimap m(C(3)); @@ -37,5 +38,14 @@ int main(int, char**) { } #endif + return true; +} + +int main(int, char**) { + test(); + +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif return 0; } diff --git a/libcxx/test/std/containers/associative/multimap/multimap.cons/compare_alloc.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.cons/compare_alloc.pass.cpp index 207e7e271234f..a72bbabd376d3 100644 --- a/libcxx/test/std/containers/associative/multimap/multimap.cons/compare_alloc.pass.cpp +++ b/libcxx/test/std/containers/associative/multimap/multimap.cons/compare_alloc.pass.cpp @@ -10,7 +10,7 @@ // class multimap -// multimap(const key_compare& comp, const allocator_type& a); +// multimap(const key_compare& comp, const allocator_type& a); // constexpr since C++26 #include #include @@ -20,7 +20,8 @@ #include "test_allocator.h" #include "min_allocator.h" -int main(int, char**) { +TEST_CONSTEXPR_CXX26 +bool test() { { typedef test_less C; typedef test_allocator > A; @@ -51,5 +52,14 @@ int main(int, char**) { } #endif + return true; +} + +int main(int, char**) { + test(); + +#if TEST_STD_VER >= 26 + static_assert(test()); +#endif return 0; } diff --git a/libcxx/test/std/containers/associative/multimap/multimap.cons/copy.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.cons/copy.pass.cpp index 724755d1ef655..999d6be70b3c8 100644 --- a/libcxx/test/std/containers/associative/multimap/multimap.cons/copy.pass.cpp +++ b/libcxx/test/std/containers/associative/multimap/multimap.cons/copy.pass.cpp @@ -10,7 +10,7 @@ // class multimap -// multimap(const multimap& m); +// multimap(const multimap& m); // constexpr since C++26 #include #include @@ -21,7 +21,7 @@ #include "min_allocator.h" template