From 6d187f0eff662a50057dfb379aeb5e9111239bf0 Mon Sep 17 00:00:00 2001 From: Alex Lorenz Date: Fri, 12 Jul 2019 22:06:08 +0000 Subject: [PATCH 001/451] [macCatalyst] Use macCatalyst pretty name in .build_version darwin assembly command 'macCatalyst' is more readable than 'maccatalyst'. I renamed the objdump output, but the assembly should match it as well. llvm-svn: 365964 --- llvm/lib/MC/MCAsmStreamer.cpp | 2 +- llvm/lib/MC/MCParser/DarwinAsmParser.cpp | 2 +- llvm/test/CodeGen/X86/macCatalyst.ll | 2 +- llvm/test/MC/MachO/build-version-maccatalyst.s | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp index a78092ec45824..7e8f02e3a1aa5 100644 --- a/llvm/lib/MC/MCAsmStreamer.cpp +++ b/llvm/lib/MC/MCAsmStreamer.cpp @@ -541,7 +541,7 @@ static const char *getPlatformName(MachO::PlatformType Type) { case MachO::PLATFORM_TVOS: return "tvos"; case MachO::PLATFORM_WATCHOS: return "watchos"; case MachO::PLATFORM_BRIDGEOS: return "bridgeos"; - case MachO::PLATFORM_MACCATALYST: return "maccatalyst"; + case MachO::PLATFORM_MACCATALYST: return "macCatalyst"; case MachO::PLATFORM_IOSSIMULATOR: return "iossimulator"; case MachO::PLATFORM_TVOSSIMULATOR: return "tvossimulator"; case MachO::PLATFORM_WATCHOSSIMULATOR: return "watchossimulator"; diff --git a/llvm/lib/MC/MCParser/DarwinAsmParser.cpp b/llvm/lib/MC/MCParser/DarwinAsmParser.cpp index 1b5b01267343a..1160934dc62c4 100644 --- a/llvm/lib/MC/MCParser/DarwinAsmParser.cpp +++ b/llvm/lib/MC/MCParser/DarwinAsmParser.cpp @@ -1169,7 +1169,7 @@ bool DarwinAsmParser::parseBuildVersion(StringRef Directive, SMLoc Loc) { .Case("ios", MachO::PLATFORM_IOS) .Case("tvos", MachO::PLATFORM_TVOS) .Case("watchos", MachO::PLATFORM_WATCHOS) - .Case("maccatalyst", MachO::PLATFORM_MACCATALYST) + .Case("macCatalyst", MachO::PLATFORM_MACCATALYST) .Default(0); if (Platform == 0) return Error(PlatformLoc, "unknown platform name"); diff --git a/llvm/test/CodeGen/X86/macCatalyst.ll b/llvm/test/CodeGen/X86/macCatalyst.ll index 7a126f1665d21..fbf3af54771b0 100644 --- a/llvm/test/CodeGen/X86/macCatalyst.ll +++ b/llvm/test/CodeGen/X86/macCatalyst.ll @@ -1,3 +1,3 @@ ; RUN: llc %s -o - | FileCheck %s target triple="x86_64-apple-ios13.0-macabi" -; CHECK: .build_version maccatalyst, 13, 0 +; CHECK: .build_version macCatalyst, 13, 0 diff --git a/llvm/test/MC/MachO/build-version-maccatalyst.s b/llvm/test/MC/MachO/build-version-maccatalyst.s index 9056780810d10..aff5589264e37 100644 --- a/llvm/test/MC/MachO/build-version-maccatalyst.s +++ b/llvm/test/MC/MachO/build-version-maccatalyst.s @@ -1,4 +1,4 @@ // RUN: llvm-mc -triple x86_64-apple-ios %s | FileCheck %s -.build_version maccatalyst,13,0 -// CHECK: .build_version maccatalyst, 13, 0 +.build_version macCatalyst,13,0 +// CHECK: .build_version macCatalyst, 13, 0 From d8ddf839505a1aeb8a7b1b3cdeea8a5cad3b1db0 Mon Sep 17 00:00:00 2001 From: Wouter van Oortmerssen Date: Fri, 12 Jul 2019 22:08:25 +0000 Subject: [PATCH 002/451] [WebAssembly] refactored utilities to not depend on MachineInstr Summary: Most of these functions can work for MachineInstr and MCInst equally now. Reviewers: dschuff Subscribers: MatzeB, sbc100, jgravelle-google, aheejin, sunfish, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64643 llvm-svn: 365965 --- .../MCTargetDesc/WebAssemblyMCTargetDesc.h | 255 ++++++++++++++++-- .../WebAssembly/WebAssemblyArgumentMove.cpp | 4 +- .../WebAssembly/WebAssemblyCFGStackify.cpp | 2 +- .../WebAssembly/WebAssemblyExplicitLocals.cpp | 8 +- .../WebAssembly/WebAssemblyFrameLowering.cpp | 3 +- .../WebAssembly/WebAssemblyMCInstLower.cpp | 4 +- .../WebAssemblyPrepareForLiveIntervals.cpp | 4 +- .../WebAssembly/WebAssemblyRegNumbering.cpp | 2 +- .../WebAssembly/WebAssemblyRegStackify.cpp | 4 +- .../WebAssembly/WebAssemblyUtilities.cpp | 219 +-------------- .../Target/WebAssembly/WebAssemblyUtilities.h | 10 - 11 files changed, 254 insertions(+), 261 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h index 67532013afd8a..a0d526b8a2e03 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -122,9 +122,27 @@ enum TOF { namespace llvm { namespace WebAssembly { +/// This is used to indicate block signatures. +enum class ExprType : unsigned { + Void = 0x40, + I32 = 0x7F, + I64 = 0x7E, + F32 = 0x7D, + F64 = 0x7C, + V128 = 0x7B, + ExceptRef = 0x68, + Invalid = 0x00 +}; + +/// Instruction opcodes emitted via means other than CodeGen. +static const unsigned Nop = 0x01; +static const unsigned End = 0x0b; + +wasm::ValType toValType(const MVT &Ty); + /// Return the default p2align value for a load or store with the given opcode. -inline unsigned GetDefaultP2AlignAny(unsigned Opcode) { - switch (Opcode) { +inline unsigned GetDefaultP2AlignAny(unsigned Opc) { + switch (Opc) { case WebAssembly::LOAD8_S_I32: case WebAssembly::LOAD8_S_I32_S: case WebAssembly::LOAD8_U_I32: @@ -337,31 +355,230 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opcode) { } } -inline unsigned GetDefaultP2Align(unsigned Opcode) { - auto Align = GetDefaultP2AlignAny(Opcode); +inline unsigned GetDefaultP2Align(unsigned Opc) { + auto Align = GetDefaultP2AlignAny(Opc); if (Align == -1U) { llvm_unreachable("Only loads and stores have p2align values"); } return Align; } -/// This is used to indicate block signatures. -enum class ExprType : unsigned { - Void = 0x40, - I32 = 0x7F, - I64 = 0x7E, - F32 = 0x7D, - F64 = 0x7C, - V128 = 0x7B, - ExceptRef = 0x68, - Invalid = 0x00 -}; +inline bool isArgument(unsigned Opc) { + switch (Opc) { + case WebAssembly::ARGUMENT_i32: + case WebAssembly::ARGUMENT_i32_S: + case WebAssembly::ARGUMENT_i64: + case WebAssembly::ARGUMENT_i64_S: + case WebAssembly::ARGUMENT_f32: + case WebAssembly::ARGUMENT_f32_S: + case WebAssembly::ARGUMENT_f64: + case WebAssembly::ARGUMENT_f64_S: + case WebAssembly::ARGUMENT_v16i8: + case WebAssembly::ARGUMENT_v16i8_S: + case WebAssembly::ARGUMENT_v8i16: + case WebAssembly::ARGUMENT_v8i16_S: + case WebAssembly::ARGUMENT_v4i32: + case WebAssembly::ARGUMENT_v4i32_S: + case WebAssembly::ARGUMENT_v2i64: + case WebAssembly::ARGUMENT_v2i64_S: + case WebAssembly::ARGUMENT_v4f32: + case WebAssembly::ARGUMENT_v4f32_S: + case WebAssembly::ARGUMENT_v2f64: + case WebAssembly::ARGUMENT_v2f64_S: + return true; + default: + return false; + } +} -/// Instruction opcodes emitted via means other than CodeGen. -static const unsigned Nop = 0x01; -static const unsigned End = 0x0b; +inline bool isCopy(unsigned Opc) { + switch (Opc) { + case WebAssembly::COPY_I32: + case WebAssembly::COPY_I32_S: + case WebAssembly::COPY_I64: + case WebAssembly::COPY_I64_S: + case WebAssembly::COPY_F32: + case WebAssembly::COPY_F32_S: + case WebAssembly::COPY_F64: + case WebAssembly::COPY_F64_S: + case WebAssembly::COPY_V128: + case WebAssembly::COPY_V128_S: + case WebAssembly::COPY_EXCEPT_REF: + case WebAssembly::COPY_EXCEPT_REF_S: + return true; + default: + return false; + } +} -wasm::ValType toValType(const MVT &Ty); +inline bool isTee(unsigned Opc) { + switch (Opc) { + case WebAssembly::TEE_I32: + case WebAssembly::TEE_I32_S: + case WebAssembly::TEE_I64: + case WebAssembly::TEE_I64_S: + case WebAssembly::TEE_F32: + case WebAssembly::TEE_F32_S: + case WebAssembly::TEE_F64: + case WebAssembly::TEE_F64_S: + case WebAssembly::TEE_V128: + case WebAssembly::TEE_V128_S: + return true; + default: + return false; + } +} + +inline bool isCallDirect(unsigned Opc) { + switch (Opc) { + case WebAssembly::CALL_VOID: + case WebAssembly::CALL_VOID_S: + case WebAssembly::CALL_i32: + case WebAssembly::CALL_i32_S: + case WebAssembly::CALL_i64: + case WebAssembly::CALL_i64_S: + case WebAssembly::CALL_f32: + case WebAssembly::CALL_f32_S: + case WebAssembly::CALL_f64: + case WebAssembly::CALL_f64_S: + case WebAssembly::CALL_v16i8: + case WebAssembly::CALL_v16i8_S: + case WebAssembly::CALL_v8i16: + case WebAssembly::CALL_v8i16_S: + case WebAssembly::CALL_v4i32: + case WebAssembly::CALL_v4i32_S: + case WebAssembly::CALL_v2i64: + case WebAssembly::CALL_v2i64_S: + case WebAssembly::CALL_v4f32: + case WebAssembly::CALL_v4f32_S: + case WebAssembly::CALL_v2f64: + case WebAssembly::CALL_v2f64_S: + case WebAssembly::CALL_ExceptRef: + case WebAssembly::CALL_ExceptRef_S: + case WebAssembly::RET_CALL: + case WebAssembly::RET_CALL_S: + return true; + default: + return false; + } +} + +inline bool isCallIndirect(unsigned Opc) { + switch (Opc) { + case WebAssembly::CALL_INDIRECT_VOID: + case WebAssembly::CALL_INDIRECT_VOID_S: + case WebAssembly::CALL_INDIRECT_i32: + case WebAssembly::CALL_INDIRECT_i32_S: + case WebAssembly::CALL_INDIRECT_i64: + case WebAssembly::CALL_INDIRECT_i64_S: + case WebAssembly::CALL_INDIRECT_f32: + case WebAssembly::CALL_INDIRECT_f32_S: + case WebAssembly::CALL_INDIRECT_f64: + case WebAssembly::CALL_INDIRECT_f64_S: + case WebAssembly::CALL_INDIRECT_v16i8: + case WebAssembly::CALL_INDIRECT_v16i8_S: + case WebAssembly::CALL_INDIRECT_v8i16: + case WebAssembly::CALL_INDIRECT_v8i16_S: + case WebAssembly::CALL_INDIRECT_v4i32: + case WebAssembly::CALL_INDIRECT_v4i32_S: + case WebAssembly::CALL_INDIRECT_v2i64: + case WebAssembly::CALL_INDIRECT_v2i64_S: + case WebAssembly::CALL_INDIRECT_v4f32: + case WebAssembly::CALL_INDIRECT_v4f32_S: + case WebAssembly::CALL_INDIRECT_v2f64: + case WebAssembly::CALL_INDIRECT_v2f64_S: + case WebAssembly::CALL_INDIRECT_ExceptRef: + case WebAssembly::CALL_INDIRECT_ExceptRef_S: + case WebAssembly::RET_CALL_INDIRECT: + case WebAssembly::RET_CALL_INDIRECT_S: + return true; + default: + return false; + } +} + +/// Returns the operand number of a callee, assuming the argument is a call +/// instruction. +inline unsigned getCalleeOpNo(unsigned Opc) { + switch (Opc) { + case WebAssembly::CALL_VOID: + case WebAssembly::CALL_VOID_S: + case WebAssembly::CALL_INDIRECT_VOID: + case WebAssembly::CALL_INDIRECT_VOID_S: + case WebAssembly::RET_CALL: + case WebAssembly::RET_CALL_S: + case WebAssembly::RET_CALL_INDIRECT: + case WebAssembly::RET_CALL_INDIRECT_S: + return 0; + case WebAssembly::CALL_i32: + case WebAssembly::CALL_i32_S: + case WebAssembly::CALL_i64: + case WebAssembly::CALL_i64_S: + case WebAssembly::CALL_f32: + case WebAssembly::CALL_f32_S: + case WebAssembly::CALL_f64: + case WebAssembly::CALL_f64_S: + case WebAssembly::CALL_v16i8: + case WebAssembly::CALL_v16i8_S: + case WebAssembly::CALL_v8i16: + case WebAssembly::CALL_v8i16_S: + case WebAssembly::CALL_v4i32: + case WebAssembly::CALL_v4i32_S: + case WebAssembly::CALL_v2i64: + case WebAssembly::CALL_v2i64_S: + case WebAssembly::CALL_v4f32: + case WebAssembly::CALL_v4f32_S: + case WebAssembly::CALL_v2f64: + case WebAssembly::CALL_v2f64_S: + case WebAssembly::CALL_ExceptRef: + case WebAssembly::CALL_ExceptRef_S: + case WebAssembly::CALL_INDIRECT_i32: + case WebAssembly::CALL_INDIRECT_i32_S: + case WebAssembly::CALL_INDIRECT_i64: + case WebAssembly::CALL_INDIRECT_i64_S: + case WebAssembly::CALL_INDIRECT_f32: + case WebAssembly::CALL_INDIRECT_f32_S: + case WebAssembly::CALL_INDIRECT_f64: + case WebAssembly::CALL_INDIRECT_f64_S: + case WebAssembly::CALL_INDIRECT_v16i8: + case WebAssembly::CALL_INDIRECT_v16i8_S: + case WebAssembly::CALL_INDIRECT_v8i16: + case WebAssembly::CALL_INDIRECT_v8i16_S: + case WebAssembly::CALL_INDIRECT_v4i32: + case WebAssembly::CALL_INDIRECT_v4i32_S: + case WebAssembly::CALL_INDIRECT_v2i64: + case WebAssembly::CALL_INDIRECT_v2i64_S: + case WebAssembly::CALL_INDIRECT_v4f32: + case WebAssembly::CALL_INDIRECT_v4f32_S: + case WebAssembly::CALL_INDIRECT_v2f64: + case WebAssembly::CALL_INDIRECT_v2f64_S: + case WebAssembly::CALL_INDIRECT_ExceptRef: + case WebAssembly::CALL_INDIRECT_ExceptRef_S: + return 1; + default: + llvm_unreachable("Not a call instruction"); + } +} + +inline bool isMarker(unsigned Opc) { + switch (Opc) { + case WebAssembly::BLOCK: + case WebAssembly::BLOCK_S: + case WebAssembly::END_BLOCK: + case WebAssembly::END_BLOCK_S: + case WebAssembly::LOOP: + case WebAssembly::LOOP_S: + case WebAssembly::END_LOOP: + case WebAssembly::END_LOOP_S: + case WebAssembly::TRY: + case WebAssembly::TRY_S: + case WebAssembly::END_TRY: + case WebAssembly::END_TRY_S: + return true; + default: + return false; + } +} } // end namespace WebAssembly } // end namespace llvm diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp index 3dcf34aeb280f..02f5cc6da77ca 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp @@ -78,7 +78,7 @@ bool WebAssemblyArgumentMove::runOnMachineFunction(MachineFunction &MF) { // Look for the first NonArg instruction. for (MachineInstr &MI : EntryMBB) { - if (!WebAssembly::isArgument(MI)) { + if (!WebAssembly::isArgument(MI.getOpcode())) { InsertPt = MI; break; } @@ -87,7 +87,7 @@ bool WebAssemblyArgumentMove::runOnMachineFunction(MachineFunction &MF) { // Now move any argument instructions later in the block // to before our first NonArg instruction. for (MachineInstr &MI : llvm::make_range(InsertPt, EntryMBB.end())) { - if (WebAssembly::isArgument(MI)) { + if (WebAssembly::isArgument(MI.getOpcode())) { EntryMBB.insert(InsertPt, MI.removeFromParent()); Changed = true; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp index a429bee466400..a23a47d2e89ad 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp @@ -894,7 +894,7 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) { // We wrap up the current range when we see a marker even if we haven't // finished a BB. - if (RangeEnd && WebAssembly::isMarker(MI)) { + if (RangeEnd && WebAssembly::isMarker(MI.getOpcode())) { NeedAppendixBlock = true; // Record the range. nullptr here means the unwind destination is the // caller. diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp index 5343697527445..de7e912129fb6 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp @@ -205,7 +205,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) { E = MF.begin()->end(); I != E;) { MachineInstr &MI = *I++; - if (!WebAssembly::isArgument(MI)) + if (!WebAssembly::isArgument(MI.getOpcode())) break; unsigned Reg = MI.getOperand(0).getReg(); assert(!MFI.isVRegStackified(Reg)); @@ -227,7 +227,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) { for (MachineBasicBlock &MBB : MF) { for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) { MachineInstr &MI = *I++; - assert(!WebAssembly::isArgument(MI)); + assert(!WebAssembly::isArgument(MI.getOpcode())); if (MI.isDebugInstr() || MI.isLabel()) continue; @@ -235,7 +235,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) { // Replace tee instructions with local.tee. The difference is that tee // instructions have two defs, while local.tee instructions have one def // and an index of a local to write to. - if (WebAssembly::isTee(MI)) { + if (WebAssembly::isTee(MI.getOpcode())) { assert(MFI.isVRegStackified(MI.getOperand(0).getReg())); assert(!MFI.isVRegStackified(MI.getOperand(1).getReg())); unsigned OldReg = MI.getOperand(2).getReg(); @@ -356,7 +356,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) { } // Coalesce and eliminate COPY instructions. - if (WebAssembly::isCopy(MI)) { + if (WebAssembly::isCopy(MI.getOpcode())) { MRI.replaceRegWith(MI.getOperand(1).getReg(), MI.getOperand(0).getReg()); MI.eraseFromParent(); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp index a1c567fa3c073..5299068efdd44 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp @@ -164,7 +164,8 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF, auto &MRI = MF.getRegInfo(); auto InsertPt = MBB.begin(); - while (InsertPt != MBB.end() && WebAssembly::isArgument(*InsertPt)) + while (InsertPt != MBB.end() && + WebAssembly::isArgument(InsertPt->getOpcode())) ++InsertPt; DebugLoc DL; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp index 27f13d9639a06..611f05f949691 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp @@ -16,7 +16,7 @@ #include "WebAssemblyAsmPrinter.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblyRuntimeLibcallSignatures.h" -#include "WebAssemblyUtilities.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/Constants.h" @@ -221,7 +221,7 @@ void WebAssemblyMCInstLower::lower(const MachineInstr *MI, // call_indirect instructions have a callee operand at the end which // doesn't count as a param. - if (WebAssembly::isCallIndirect(*MI)) + if (WebAssembly::isCallIndirect(MI->getOpcode())) Params.pop_back(); auto *WasmSym = cast(Sym); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp index 12b70f7ce4f13..3bfbf607344db 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp @@ -64,7 +64,7 @@ FunctionPass *llvm::createWebAssemblyPrepareForLiveIntervals() { // Test whether the given register has an ARGUMENT def. static bool hasArgumentDef(unsigned Reg, const MachineRegisterInfo &MRI) { for (const auto &Def : MRI.def_instructions(Reg)) - if (WebAssembly::isArgument(Def)) + if (WebAssembly::isArgument(Def.getOpcode())) return true; return false; } @@ -114,7 +114,7 @@ bool WebAssemblyPrepareForLiveIntervals::runOnMachineFunction( // liveness reflects the fact that these really are live-in values. for (auto MII = Entry.begin(), MIE = Entry.end(); MII != MIE;) { MachineInstr &MI = *MII++; - if (WebAssembly::isArgument(MI)) { + if (WebAssembly::isArgument(MI.getOpcode())) { MI.removeFromParent(); Entry.insert(Entry.begin(), &MI); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp index 424b2ca2f841c..cdca23f55b29f 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp @@ -72,7 +72,7 @@ bool WebAssemblyRegNumbering::runOnMachineFunction(MachineFunction &MF) { // variables. Assign the numbers for them first. MachineBasicBlock &EntryMBB = MF.front(); for (MachineInstr &MI : EntryMBB) { - if (!WebAssembly::isArgument(MI)) + if (!WebAssembly::isArgument(MI.getOpcode())) break; int64_t Imm = MI.getOperand(1).getImm(); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp index 6463e268c9059..31ba6f0e4c237 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -252,7 +252,7 @@ static void query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read, // Analyze calls. if (MI.isCall()) { - unsigned CalleeOpNo = WebAssembly::getCalleeOpNo(MI); + unsigned CalleeOpNo = WebAssembly::getCalleeOpNo(MI.getOpcode()); queryCallee(MI, CalleeOpNo, Read, Write, Effects, StackPointer); } } @@ -826,7 +826,7 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { // Argument instructions represent live-in registers and not real // instructions. - if (WebAssembly::isArgument(*Def)) + if (WebAssembly::isArgument(Def->getOpcode())) continue; // Currently catch's return value register cannot be stackified, because diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp index cca27d9c8d4e4..e9d88d4818a58 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp @@ -24,72 +24,6 @@ const char *const WebAssembly::StdTerminateFn = "_ZSt9terminatev"; const char *const WebAssembly::PersonalityWrapperFn = "_Unwind_Wasm_CallPersonality"; -bool WebAssembly::isArgument(const MachineInstr &MI) { - switch (MI.getOpcode()) { - case WebAssembly::ARGUMENT_i32: - case WebAssembly::ARGUMENT_i32_S: - case WebAssembly::ARGUMENT_i64: - case WebAssembly::ARGUMENT_i64_S: - case WebAssembly::ARGUMENT_f32: - case WebAssembly::ARGUMENT_f32_S: - case WebAssembly::ARGUMENT_f64: - case WebAssembly::ARGUMENT_f64_S: - case WebAssembly::ARGUMENT_v16i8: - case WebAssembly::ARGUMENT_v16i8_S: - case WebAssembly::ARGUMENT_v8i16: - case WebAssembly::ARGUMENT_v8i16_S: - case WebAssembly::ARGUMENT_v4i32: - case WebAssembly::ARGUMENT_v4i32_S: - case WebAssembly::ARGUMENT_v2i64: - case WebAssembly::ARGUMENT_v2i64_S: - case WebAssembly::ARGUMENT_v4f32: - case WebAssembly::ARGUMENT_v4f32_S: - case WebAssembly::ARGUMENT_v2f64: - case WebAssembly::ARGUMENT_v2f64_S: - return true; - default: - return false; - } -} - -bool WebAssembly::isCopy(const MachineInstr &MI) { - switch (MI.getOpcode()) { - case WebAssembly::COPY_I32: - case WebAssembly::COPY_I32_S: - case WebAssembly::COPY_I64: - case WebAssembly::COPY_I64_S: - case WebAssembly::COPY_F32: - case WebAssembly::COPY_F32_S: - case WebAssembly::COPY_F64: - case WebAssembly::COPY_F64_S: - case WebAssembly::COPY_V128: - case WebAssembly::COPY_V128_S: - case WebAssembly::COPY_EXCEPT_REF: - case WebAssembly::COPY_EXCEPT_REF_S: - return true; - default: - return false; - } -} - -bool WebAssembly::isTee(const MachineInstr &MI) { - switch (MI.getOpcode()) { - case WebAssembly::TEE_I32: - case WebAssembly::TEE_I32_S: - case WebAssembly::TEE_I64: - case WebAssembly::TEE_I64_S: - case WebAssembly::TEE_F32: - case WebAssembly::TEE_F32_S: - case WebAssembly::TEE_F64: - case WebAssembly::TEE_F64_S: - case WebAssembly::TEE_V128: - case WebAssembly::TEE_V128_S: - return true; - default: - return false; - } -} - /// Test whether MI is a child of some other node in an expression tree. bool WebAssembly::isChild(const MachineInstr &MI, const WebAssemblyFunctionInfo &MFI) { @@ -103,155 +37,6 @@ bool WebAssembly::isChild(const MachineInstr &MI, MFI.isVRegStackified(Reg); } -bool WebAssembly::isCallDirect(const MachineInstr &MI) { - switch (MI.getOpcode()) { - case WebAssembly::CALL_VOID: - case WebAssembly::CALL_VOID_S: - case WebAssembly::CALL_i32: - case WebAssembly::CALL_i32_S: - case WebAssembly::CALL_i64: - case WebAssembly::CALL_i64_S: - case WebAssembly::CALL_f32: - case WebAssembly::CALL_f32_S: - case WebAssembly::CALL_f64: - case WebAssembly::CALL_f64_S: - case WebAssembly::CALL_v16i8: - case WebAssembly::CALL_v16i8_S: - case WebAssembly::CALL_v8i16: - case WebAssembly::CALL_v8i16_S: - case WebAssembly::CALL_v4i32: - case WebAssembly::CALL_v4i32_S: - case WebAssembly::CALL_v2i64: - case WebAssembly::CALL_v2i64_S: - case WebAssembly::CALL_v4f32: - case WebAssembly::CALL_v4f32_S: - case WebAssembly::CALL_v2f64: - case WebAssembly::CALL_v2f64_S: - case WebAssembly::CALL_ExceptRef: - case WebAssembly::CALL_ExceptRef_S: - case WebAssembly::RET_CALL: - case WebAssembly::RET_CALL_S: - return true; - default: - return false; - } -} - -bool WebAssembly::isCallIndirect(const MachineInstr &MI) { - switch (MI.getOpcode()) { - case WebAssembly::CALL_INDIRECT_VOID: - case WebAssembly::CALL_INDIRECT_VOID_S: - case WebAssembly::CALL_INDIRECT_i32: - case WebAssembly::CALL_INDIRECT_i32_S: - case WebAssembly::CALL_INDIRECT_i64: - case WebAssembly::CALL_INDIRECT_i64_S: - case WebAssembly::CALL_INDIRECT_f32: - case WebAssembly::CALL_INDIRECT_f32_S: - case WebAssembly::CALL_INDIRECT_f64: - case WebAssembly::CALL_INDIRECT_f64_S: - case WebAssembly::CALL_INDIRECT_v16i8: - case WebAssembly::CALL_INDIRECT_v16i8_S: - case WebAssembly::CALL_INDIRECT_v8i16: - case WebAssembly::CALL_INDIRECT_v8i16_S: - case WebAssembly::CALL_INDIRECT_v4i32: - case WebAssembly::CALL_INDIRECT_v4i32_S: - case WebAssembly::CALL_INDIRECT_v2i64: - case WebAssembly::CALL_INDIRECT_v2i64_S: - case WebAssembly::CALL_INDIRECT_v4f32: - case WebAssembly::CALL_INDIRECT_v4f32_S: - case WebAssembly::CALL_INDIRECT_v2f64: - case WebAssembly::CALL_INDIRECT_v2f64_S: - case WebAssembly::CALL_INDIRECT_ExceptRef: - case WebAssembly::CALL_INDIRECT_ExceptRef_S: - case WebAssembly::RET_CALL_INDIRECT: - case WebAssembly::RET_CALL_INDIRECT_S: - return true; - default: - return false; - } -} - -unsigned WebAssembly::getCalleeOpNo(const MachineInstr &MI) { - switch (MI.getOpcode()) { - case WebAssembly::CALL_VOID: - case WebAssembly::CALL_VOID_S: - case WebAssembly::CALL_INDIRECT_VOID: - case WebAssembly::CALL_INDIRECT_VOID_S: - case WebAssembly::RET_CALL: - case WebAssembly::RET_CALL_S: - case WebAssembly::RET_CALL_INDIRECT: - case WebAssembly::RET_CALL_INDIRECT_S: - return 0; - case WebAssembly::CALL_i32: - case WebAssembly::CALL_i32_S: - case WebAssembly::CALL_i64: - case WebAssembly::CALL_i64_S: - case WebAssembly::CALL_f32: - case WebAssembly::CALL_f32_S: - case WebAssembly::CALL_f64: - case WebAssembly::CALL_f64_S: - case WebAssembly::CALL_v16i8: - case WebAssembly::CALL_v16i8_S: - case WebAssembly::CALL_v8i16: - case WebAssembly::CALL_v8i16_S: - case WebAssembly::CALL_v4i32: - case WebAssembly::CALL_v4i32_S: - case WebAssembly::CALL_v2i64: - case WebAssembly::CALL_v2i64_S: - case WebAssembly::CALL_v4f32: - case WebAssembly::CALL_v4f32_S: - case WebAssembly::CALL_v2f64: - case WebAssembly::CALL_v2f64_S: - case WebAssembly::CALL_ExceptRef: - case WebAssembly::CALL_ExceptRef_S: - case WebAssembly::CALL_INDIRECT_i32: - case WebAssembly::CALL_INDIRECT_i32_S: - case WebAssembly::CALL_INDIRECT_i64: - case WebAssembly::CALL_INDIRECT_i64_S: - case WebAssembly::CALL_INDIRECT_f32: - case WebAssembly::CALL_INDIRECT_f32_S: - case WebAssembly::CALL_INDIRECT_f64: - case WebAssembly::CALL_INDIRECT_f64_S: - case WebAssembly::CALL_INDIRECT_v16i8: - case WebAssembly::CALL_INDIRECT_v16i8_S: - case WebAssembly::CALL_INDIRECT_v8i16: - case WebAssembly::CALL_INDIRECT_v8i16_S: - case WebAssembly::CALL_INDIRECT_v4i32: - case WebAssembly::CALL_INDIRECT_v4i32_S: - case WebAssembly::CALL_INDIRECT_v2i64: - case WebAssembly::CALL_INDIRECT_v2i64_S: - case WebAssembly::CALL_INDIRECT_v4f32: - case WebAssembly::CALL_INDIRECT_v4f32_S: - case WebAssembly::CALL_INDIRECT_v2f64: - case WebAssembly::CALL_INDIRECT_v2f64_S: - case WebAssembly::CALL_INDIRECT_ExceptRef: - case WebAssembly::CALL_INDIRECT_ExceptRef_S: - return 1; - default: - llvm_unreachable("Not a call instruction"); - } -} - -bool WebAssembly::isMarker(const MachineInstr &MI) { - switch (MI.getOpcode()) { - case WebAssembly::BLOCK: - case WebAssembly::BLOCK_S: - case WebAssembly::END_BLOCK: - case WebAssembly::END_BLOCK_S: - case WebAssembly::LOOP: - case WebAssembly::LOOP_S: - case WebAssembly::END_LOOP: - case WebAssembly::END_LOOP_S: - case WebAssembly::TRY: - case WebAssembly::TRY_S: - case WebAssembly::END_TRY: - case WebAssembly::END_TRY_S: - return true; - default: - return false; - } -} - bool WebAssembly::mayThrow(const MachineInstr &MI) { switch (MI.getOpcode()) { case WebAssembly::THROW: @@ -260,12 +45,12 @@ bool WebAssembly::mayThrow(const MachineInstr &MI) { case WebAssembly::RETHROW_S: return true; } - if (isCallIndirect(MI)) + if (isCallIndirect(MI.getOpcode())) return true; if (!MI.isCall()) return false; - const MachineOperand &MO = MI.getOperand(getCalleeOpNo(MI)); + const MachineOperand &MO = MI.getOperand(getCalleeOpNo(MI.getOpcode())); assert(MO.isGlobal()); const auto *F = dyn_cast(MO.getGlobal()); if (!F) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h index f80b49662ea64..26cf84de89b92 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h @@ -23,19 +23,9 @@ class WebAssemblyFunctionInfo; namespace WebAssembly { -bool isArgument(const MachineInstr &MI); -bool isCopy(const MachineInstr &MI); -bool isTee(const MachineInstr &MI); bool isChild(const MachineInstr &MI, const WebAssemblyFunctionInfo &MFI); -bool isCallDirect(const MachineInstr &MI); -bool isCallIndirect(const MachineInstr &MI); -bool isMarker(const MachineInstr &MI); bool mayThrow(const MachineInstr &MI); -/// Returns the operand number of a callee, assuming the argument is a call -/// instruction. -unsigned getCalleeOpNo(const MachineInstr &MI); - // Exception-related function names extern const char *const ClangCallTerminateFn; extern const char *const CxaBeginCatchFn; From ec2abbafda627963e600c890ddf06ec3b7a1b399 Mon Sep 17 00:00:00 2001 From: Jan Korous Date: Fri, 12 Jul 2019 22:11:43 +0000 Subject: [PATCH 003/451] [DirectoryWatcher][linux] Fix use of uninitialized value llvm-svn: 365966 --- clang/lib/DirectoryWatcher/linux/DirectoryWatcher-linux.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/DirectoryWatcher/linux/DirectoryWatcher-linux.cpp b/clang/lib/DirectoryWatcher/linux/DirectoryWatcher-linux.cpp index 986ebc5d95fab..87d133f46d8e8 100644 --- a/clang/lib/DirectoryWatcher/linux/DirectoryWatcher-linux.cpp +++ b/clang/lib/DirectoryWatcher/linux/DirectoryWatcher-linux.cpp @@ -220,8 +220,8 @@ void DirectoryWatcherLinux::InotifyPollingLoop() { // Multiple epoll_events can be received for a single file descriptor per // epoll_wait call. - for (const auto &EpollEvent : EpollEventBuffer) { - if (EpollEvent.data.fd == InotifyPollingStopSignal.FDRead) { + for (int i = 0; i < EpollWaitResult; ++i) { + if (EpollEventBuffer[i].data.fd == InotifyPollingStopSignal.FDRead) { StopWork(); return; } From b131ad0be2849fe367eac4ad7cc6eca198a08a28 Mon Sep 17 00:00:00 2001 From: Julie Hockett Date: Fri, 12 Jul 2019 22:19:02 +0000 Subject: [PATCH 004/451] [clang-doc] Fix failing tests on Windows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tests on Windows were failing due to path separator differences. '/' was being used as separator in the expected output, paths in expected output are now changed to their native form before comparing them to the actual output. Committed on behalf of Diego Astiazarán (diegoaat97@gmail.com). Differential Revision: https://reviews.llvm.org/D64669 llvm-svn: 365967 --- .../unittests/clang-doc/HTMLGeneratorTest.cpp | 31 ++++++++++++++----- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/clang-tools-extra/unittests/clang-doc/HTMLGeneratorTest.cpp b/clang-tools-extra/unittests/clang-doc/HTMLGeneratorTest.cpp index e95ceb878a68a..eabc6d45fe581 100644 --- a/clang-tools-extra/unittests/clang-doc/HTMLGeneratorTest.cpp +++ b/clang-tools-extra/unittests/clang-doc/HTMLGeneratorTest.cpp @@ -79,10 +79,11 @@ TEST(HTMLGeneratorTest, emitRecordHTML) { I.DefLoc = Location(10, llvm::SmallString<16>{"test.cpp"}); I.Loc.emplace_back(12, llvm::SmallString<16>{"test.cpp"}); + SmallString<16> PathTo; + llvm::sys::path::native("path/to", PathTo); I.Members.emplace_back("int", "X/Y", "X", AccessSpecifier::AS_private); I.TagType = TagTypeKind::TTK_Class; - I.Parents.emplace_back(EmptySID, "F", InfoType::IT_record, - llvm::SmallString<128>("path/to")); + I.Parents.emplace_back(EmptySID, "F", InfoType::IT_record, PathTo); I.VirtualParents.emplace_back(EmptySID, "G", InfoType::IT_record); I.ChildRecords.emplace_back(EmptySID, "ChildStruct", InfoType::IT_record); @@ -97,6 +98,10 @@ TEST(HTMLGeneratorTest, emitRecordHTML) { llvm::raw_string_ostream Actual(Buffer); auto Err = G->generateDocForInfo(&I, Actual); assert(!Err); + SmallString<16> PathToF; + llvm::sys::path::native("../../../path/to/F.html", PathToF); + SmallString<16> PathToInt; + llvm::sys::path::native("../int.html", PathToInt); std::string Expected = R"raw( class r @@ -107,12 +112,14 @@ TEST(HTMLGeneratorTest, emitRecordHTML) {

Inherits from - F + F , G

Members

    -
  • private int X
  • +
  • private int X

Records

    @@ -143,8 +150,10 @@ TEST(HTMLGeneratorTest, emitFunctionHTML) { I.DefLoc = Location(10, llvm::SmallString<16>{"test.cpp"}); I.Loc.emplace_back(12, llvm::SmallString<16>{"test.cpp"}); - I.ReturnType = TypeInfo(EmptySID, "float", InfoType::IT_default, "path/to"); - I.Params.emplace_back("int", "path/to", "P"); + SmallString<16> PathTo; + llvm::sys::path::native("path/to", PathTo); + I.ReturnType = TypeInfo(EmptySID, "float", InfoType::IT_default, PathTo); + I.Params.emplace_back("int", PathTo, "P"); I.IsMethod = true; I.Parent = Reference(EmptySID, "Parent", InfoType::IT_record); @@ -154,15 +163,21 @@ TEST(HTMLGeneratorTest, emitFunctionHTML) { llvm::raw_string_ostream Actual(Buffer); auto Err = G->generateDocForInfo(&I, Actual); assert(!Err); + SmallString<16> PathToFloat; + llvm::sys::path::native("path/to/float.html", PathToFloat); + SmallString<16> PathToInt; + llvm::sys::path::native("path/to/int.html", PathToInt); std::string Expected = R"raw(

    f

    - float + float f( - int + int P)

    From 000ba715ddbd2a7af17534105f8a0916d4168c3e Mon Sep 17 00:00:00 2001 From: Jan Korous Date: Fri, 12 Jul 2019 22:25:17 +0000 Subject: [PATCH 005/451] [DirectoryWatcher][NFC] Silence warnings in release build llvm-svn: 365968 --- .../linux/DirectoryWatcher-linux.cpp | 5 ++++- .../DirectoryWatcher/DirectoryWatcherTest.cpp | 13 ++++++++++--- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/clang/lib/DirectoryWatcher/linux/DirectoryWatcher-linux.cpp b/clang/lib/DirectoryWatcher/linux/DirectoryWatcher-linux.cpp index 87d133f46d8e8..0c9f799b638d6 100644 --- a/clang/lib/DirectoryWatcher/linux/DirectoryWatcher-linux.cpp +++ b/clang/lib/DirectoryWatcher/linux/DirectoryWatcher-linux.cpp @@ -55,7 +55,10 @@ struct SemaphorePipe { }; void signal() { - ssize_t Result = llvm::sys::RetryAfterSignal(-1, write, FDWrite, "A", 1); +#ifndef NDEBUG + ssize_t Result = +#endif + llvm::sys::RetryAfterSignal(-1, write, FDWrite, "A", 1); assert(Result != -1); } ~SemaphorePipe() { diff --git a/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp b/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp index a2c50fc7d000f..0808ff47dee89 100644 --- a/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp +++ b/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp @@ -38,12 +38,18 @@ struct DirectoryWatcherTestFixture { DirectoryWatcherTestFixture() { SmallString<128> pathBuf; - std::error_code UniqDirRes = createUniqueDirectory("dirwatcher", pathBuf); +#ifndef NDEBUG + std::error_code UniqDirRes = +#endif + createUniqueDirectory("dirwatcher", pathBuf); assert(!UniqDirRes); TestRootDir = pathBuf.str(); path::append(pathBuf, "watch"); TestWatchedDir = pathBuf.str(); - std::error_code CreateDirRes = create_directory(TestWatchedDir, false); +#ifndef NDEBUG + std::error_code CreateDirRes = +#endif + create_directory(TestWatchedDir, false); assert(!CreateDirRes); } @@ -415,8 +421,9 @@ TEST(DirectoryWatcherTest, ChangeMetadata) { const int FD = HopefullyTheFD.get(); const TimePoint<> NewTimePt = std::chrono::system_clock::now() - std::chrono::minutes(1); - +#ifndef NDEBUG std::error_code setTimeRes = +#endif llvm::sys::fs::setLastAccessAndModificationTime(FD, NewTimePt, NewTimePt); assert(!setTimeRes); From 9178b10163f758cbf8a5290ea6a827990427ddc0 Mon Sep 17 00:00:00 2001 From: Alex Lorenz Date: Fri, 12 Jul 2019 22:29:44 +0000 Subject: [PATCH 006/451] NFC: utils/perf-training: Python 3 compatibility for lit.cfg The output of subprocess.check_output is now bytes. We need to decode it. llvm-svn: 365969 --- clang/utils/perf-training/lit.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/utils/perf-training/lit.cfg b/clang/utils/perf-training/lit.cfg index 671d44f83b948..be822d66e38ce 100644 --- a/clang/utils/perf-training/lit.cfg +++ b/clang/utils/perf-training/lit.cfg @@ -10,7 +10,7 @@ def getSysrootFlagsOnDarwin(config, lit_config): # default system root path. if 'darwin' in config.target_triple: try: - out = subprocess.check_output(['xcrun', '--show-sdk-path']).strip() + out = subprocess.check_output(['xcrun', '--show-sdk-path']).strip().decode() res = 0 except OSError: res = -1 From db101864bdc938deb1d63fe4f7da761bd38e5cae Mon Sep 17 00:00:00 2001 From: Alina Sbirlea Date: Fri, 12 Jul 2019 22:30:30 +0000 Subject: [PATCH 007/451] [MemorySSA] Use SetVector to avoid nondeterminism. Summary: Use a SetVector for DeadBlockSet. Resolves PR42574. Reviewers: george.burgess.iv, uabelho, dblaikie Subscribers: jlebar, Prazek, mgrang, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64601 llvm-svn: 365970 --- llvm/include/llvm/Analysis/MemorySSAUpdater.h | 3 +- llvm/lib/Analysis/MemorySSAUpdater.cpp | 2 +- .../lib/Transforms/Scalar/LoopSimplifyCFG.cpp | 4 +- .../Transforms/Scalar/SimpleLoopUnswitch.cpp | 6 +- llvm/lib/Transforms/Utils/Local.cpp | 2 +- llvm/lib/Transforms/Utils/LoopSimplify.cpp | 3 +- .../test/Analysis/MemorySSA/nondeterminism.ll | 122 ++++++++++++++++++ 7 files changed, 133 insertions(+), 9 deletions(-) create mode 100644 llvm/test/Analysis/MemorySSA/nondeterminism.ll diff --git a/llvm/include/llvm/Analysis/MemorySSAUpdater.h b/llvm/include/llvm/Analysis/MemorySSAUpdater.h index 6467d41cc0bf7..d4d8040c1ff66 100644 --- a/llvm/include/llvm/Analysis/MemorySSAUpdater.h +++ b/llvm/include/llvm/Analysis/MemorySSAUpdater.h @@ -31,6 +31,7 @@ #ifndef LLVM_ANALYSIS_MEMORYSSAUPDATER_H #define LLVM_ANALYSIS_MEMORYSSAUPDATER_H +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" @@ -243,7 +244,7 @@ class MemorySSAUpdater { /// Deleted blocks still have successor info, but their predecessor edges and /// Phi nodes may already be updated. Instructions in DeadBlocks should be /// deleted after this call. - void removeBlocks(const SmallPtrSetImpl &DeadBlocks); + void removeBlocks(const SmallSetVector &DeadBlocks); /// Instruction I will be changed to an unreachable. Remove all accesses in /// I's block that follow I (inclusive), and update the Phis in the blocks' diff --git a/llvm/lib/Analysis/MemorySSAUpdater.cpp b/llvm/lib/Analysis/MemorySSAUpdater.cpp index 19559a62eb9ea..4c1feee7fd9af 100644 --- a/llvm/lib/Analysis/MemorySSAUpdater.cpp +++ b/llvm/lib/Analysis/MemorySSAUpdater.cpp @@ -1247,7 +1247,7 @@ void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA, bool OptimizePhis) { } void MemorySSAUpdater::removeBlocks( - const SmallPtrSetImpl &DeadBlocks) { + const SmallSetVector &DeadBlocks) { // First delete all uses of BB in MemoryPhis. for (BasicBlock *BB : DeadBlocks) { Instruction *TI = BB->getTerminator(); diff --git a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp index c650abb412d9b..046f4c8af492e 100644 --- a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp +++ b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp @@ -428,8 +428,8 @@ class ConstantTerminatorFoldingImpl { /// relevant updates to DT and LI. void deleteDeadLoopBlocks() { if (MSSAU) { - SmallPtrSet DeadLoopBlocksSet(DeadLoopBlocks.begin(), - DeadLoopBlocks.end()); + SmallSetVector DeadLoopBlocksSet(DeadLoopBlocks.begin(), + DeadLoopBlocks.end()); MSSAU->removeBlocks(DeadLoopBlocksSet); } diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index 9715329223827..aeac6f548b32e 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -1463,8 +1463,8 @@ deleteDeadClonedBlocks(Loop &L, ArrayRef ExitBlocks, // Remove all MemorySSA in the dead blocks if (MSSAU) { - SmallPtrSet DeadBlockSet(DeadBlocks.begin(), - DeadBlocks.end()); + SmallSetVector DeadBlockSet(DeadBlocks.begin(), + DeadBlocks.end()); MSSAU->removeBlocks(DeadBlockSet); } @@ -1482,7 +1482,7 @@ static void deleteDeadBlocksFromLoop(Loop &L, MemorySSAUpdater *MSSAU) { // Find all the dead blocks tied to this loop, and remove them from their // successors. - SmallPtrSet DeadBlockSet; + SmallSetVector DeadBlockSet; // Start with loop/exit blocks and get a transitive closure of reachable dead // blocks. diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 91d33cb0f20ea..39b6b889f91c4 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -2238,7 +2238,7 @@ bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI, assert(Reachable.size() < F.size()); NumRemoved += F.size()-Reachable.size(); - SmallPtrSet DeadBlockSet; + SmallSetVector DeadBlockSet; for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ++I) { auto *BB = &*I; if (Reachable.count(BB)) diff --git a/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/llvm/lib/Transforms/Utils/LoopSimplify.cpp index 5ec12aafff05b..7e6da02d57077 100644 --- a/llvm/lib/Transforms/Utils/LoopSimplify.cpp +++ b/llvm/lib/Transforms/Utils/LoopSimplify.cpp @@ -681,7 +681,8 @@ static bool simplifyOneLoop(Loop *L, SmallVectorImpl &Worklist, } DT->eraseNode(ExitingBlock); if (MSSAU) { - SmallPtrSet ExitBlockSet{ExitingBlock}; + SmallSetVector ExitBlockSet; + ExitBlockSet.insert(ExitingBlock); MSSAU->removeBlocks(ExitBlockSet); } diff --git a/llvm/test/Analysis/MemorySSA/nondeterminism.ll b/llvm/test/Analysis/MemorySSA/nondeterminism.ll new file mode 100644 index 0000000000000..0bb3df30b5878 --- /dev/null +++ b/llvm/test/Analysis/MemorySSA/nondeterminism.ll @@ -0,0 +1,122 @@ +; RUN: opt -simplifycfg -enable-mssa-loop-dependency -S --preserve-ll-uselistorder %s | FileCheck %s +; REQUIRES: x86-registered-target +; CHECK-LABEL: @n +; CHECK: uselistorder i16 0, { 3, 2, 4, 1, 5, 0, 6 } + +; Note: test was added in an effort to ensure determinism when updating memoryssa. See PR42574. +; If the uselistorder check becomes no longer relevant, the test can be disabled or removed. + +%rec9 = type { i16, i32, i32 } + +@a = global [1 x [1 x %rec9]] zeroinitializer + +define i16 @n() { + br label %..split_crit_edge + +..split_crit_edge: ; preds = %0 + br label %.split + +bb4.us4: ; preds = %bb2.split.us32, %bb6.us28 + %i.4.01.us5 = phi i16 [ %_tmp49.us30, %bb6.us28 ] + br label %g.exit4.us21 + +bb1.i.us14: ; preds = %bb4.us4 + br label %g.exit4.us21 + +g.exit4.us21: ; preds = %bb1.i.us14, %g.exit4.critedge.us9 + %i.4.02.us22 = phi i16 [ %i.4.01.us5, %bb4.us4 ], [ %i.4.01.us5, %bb1.i.us14 ] + br label %bb6.us28 + +bb5.us26: ; preds = %g.exit4.us21 + br label %bb6.us28 + +bb6.us28: ; preds = %bb5.us26, %g.exit4.us21 + %i.4.03.us29 = phi i16 [ %i.4.02.us22, %bb5.us26 ], [ %i.4.02.us22, %g.exit4.us21 ] + %_tmp49.us30 = add nuw nsw i16 %i.4.03.us29, 1 + br label %bb4.us4 + +bb4.us.us: ; preds = %bb2.split.us.us, %bb6.us.us + %i.4.01.us.us = phi i16 [ %_tmp49.us.us, %bb6.us.us ] + br label %bb1.i.us.us + +bb1.i.us.us: ; preds = %bb4.us.us + br label %g.exit4.us.us + +g.exit4.us.us: ; preds = %bb1.i.us.us, %g.exit4.critedge.us.us + %i.4.02.us.us = phi i16 [ %i.4.01.us.us, %bb1.i.us.us ] + br label %bb5.us.us + +bb5.us.us: ; preds = %g.exit4.us.us + br label %bb6.us.us + +bb6.us.us: ; preds = %bb5.us.us, %g.exit4.us.us + %i.4.03.us.us = phi i16 [ %i.4.02.us.us, %bb5.us.us ] + %_tmp49.us.us = add nuw nsw i16 %i.4.03.us.us, 1 + br label %bb4.us.us + + +.split: ; preds = %..split_crit_edge + br label %bb2 + +bb2: ; preds = %.split, %bb7 + %h.3.0 = phi i16 [ undef, %.split ], [ %_tmp53, %bb7 ] + br label %bb2.bb2.split_crit_edge + +bb2.bb2.split_crit_edge: ; preds = %bb2 + br label %bb2.split + +bb2.split.us: ; preds = %bb2 + br label %bb4.us + +bb4.us: ; preds = %bb6.us, %bb2.split.us + %i.4.01.us = phi i16 [ 0, %bb2.split.us ] + br label %bb1.i.us + +g.exit4.critedge.us: ; preds = %bb4.us + br label %g.exit4.us + +bb1.i.us: ; preds = %bb4.us + br label %g.exit4.us + +g.exit4.us: ; preds = %bb1.i.us, %g.exit4.critedge.us + %i.4.02.us = phi i16 [ %i.4.01.us, %g.exit4.critedge.us ], [ %i.4.01.us, %bb1.i.us ] + br label %bb5.us + +bb5.us: ; preds = %g.exit4.us + br label %bb7 + +bb2.split: ; preds = %bb2.bb2.split_crit_edge + br label %bb4 + +bb4: ; preds = %bb2.split, %bb6 + %i.4.01 = phi i16 [ 0, %bb2.split ] + %_tmp16 = getelementptr [1 x [1 x %rec9]], [1 x [1 x %rec9]]* @a, i16 0, i16 %h.3.0, i16 %i.4.01, i32 0 + %_tmp17 = load i16, i16* %_tmp16, align 1 + br label %g.exit4.critedge + +bb1.i: ; preds = %bb4 + br label %g.exit4 + +g.exit4.critedge: ; preds = %bb4 + %_tmp28.c = getelementptr [1 x [1 x %rec9]], [1 x [1 x %rec9]]* @a, i16 0, i16 %h.3.0, i16 %i.4.01, i32 1 + %_tmp29.c = load i32, i32* %_tmp28.c, align 1 + %_tmp30.c = trunc i32 %_tmp29.c to i16 + br label %g.exit4 + +g.exit4: ; preds = %g.exit4.critedge, %bb1.i + %i.4.02 = phi i16 [ %i.4.01, %g.exit4.critedge ], [ %i.4.01, %bb1.i ] + %_tmp41 = getelementptr [1 x [1 x %rec9]], [1 x [1 x %rec9]]* @a, i16 0, i16 %h.3.0, i16 %i.4.02, i32 2 + br label %bb6 + +bb5: ; preds = %g.exit4 + br label %bb6 + +bb6: ; preds = %bb5, %g.exit4 + %i.4.03 = phi i16 [ %i.4.02, %bb5 ], [ %i.4.02, %g.exit4 ] + %_tmp49 = add nuw nsw i16 %i.4.03, 1 + br label %bb7 + +bb7: ; preds = %bb7.us-lcssa.us, %bb7.us-lcssa + %_tmp53 = add nsw i16 %h.3.0, 1 + br label %bb2 +} From b1bff76e22bd39eb46dcae49891fda1cf1cc0bd5 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Fri, 12 Jul 2019 22:37:55 +0000 Subject: [PATCH 008/451] isBytewiseValue checks ConstantVector element by element Summary: Vector of the same value with few undefs will sill be considered "Bytewise" Reviewers: eugenis, pcc, jfb Reviewed By: jfb Subscribers: dexonsmith, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64031 llvm-svn: 365971 --- llvm/lib/Analysis/ValueTracking.cpp | 7 +------ llvm/unittests/Analysis/ValueTrackingTest.cpp | 2 +- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index cf8fa9cee3fc8..ad8034b2d7bc5 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -3253,12 +3253,7 @@ Value *llvm::isBytewiseValue(Value *V, const DataLayout &DL) { return Val; } - if (isa(C)) { - Constant *Splat = cast(C)->getSplatValue(); - return Splat ? isBytewiseValue(Splat, DL) : nullptr; - } - - if (isa(C) || isa(C)) { + if (isa(C)) { Value *Val = UndefInt8; for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I) if (!(Val = Merge(Val, isBytewiseValue(C->getOperand(I), DL)))) diff --git a/llvm/unittests/Analysis/ValueTrackingTest.cpp b/llvm/unittests/Analysis/ValueTrackingTest.cpp index f4316cc8575c4..96b41d93d568a 100644 --- a/llvm/unittests/Analysis/ValueTrackingTest.cpp +++ b/llvm/unittests/Analysis/ValueTrackingTest.cpp @@ -878,7 +878,7 @@ const std::pair IsBytewiseValueTests[] = { "<4 x i8> ", }, { - "", + "i8 5", "<2 x i8> < i8 5, i8 undef >", }, { From 1dfae6fe505ffedf97e9f36d207cb8bbdc9255d8 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Fri, 12 Jul 2019 22:42:01 +0000 Subject: [PATCH 009/451] [AMDGPU] use v32f32 for 3 mfma intrinsics These should really use v32f32, but were defined as v32i32 due to the lack of the v32f32 type. Differential Revision: https://reviews.llvm.org/D64667 llvm-svn: 365972 --- llvm/include/llvm/IR/Intrinsics.td | 1 + llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 12 ++-- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 9 +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 6 +- llvm/lib/Target/AMDGPU/SIInstrInfo.td | 7 +- llvm/lib/Target/AMDGPU/SIInstructions.td | 12 ++++ llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 8 +-- .../CodeGen/AMDGPU/agpr-register-count.ll | 8 +-- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll | 64 +++++++++---------- llvm/test/CodeGen/AMDGPU/spill-agpr.ll | 10 +-- .../test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll | 10 ++- 11 files changed, 87 insertions(+), 60 deletions(-) diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 62e94108a7355..8276d7535c3b2 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -261,6 +261,7 @@ def llvm_v2f32_ty : LLVMType; // 2 x float def llvm_v4f32_ty : LLVMType; // 4 x float def llvm_v8f32_ty : LLVMType; // 8 x float def llvm_v16f32_ty : LLVMType; // 16 x float +def llvm_v32f32_ty : LLVMType; // 32 x float def llvm_v1f64_ty : LLVMType; // 1 x double def llvm_v2f64_ty : LLVMType; // 2 x double def llvm_v4f64_ty : LLVMType; // 4 x double diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 56878e1240749..43e827ec6ab99 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1663,8 +1663,8 @@ def int_amdgcn_buffer_atomic_fadd : AMDGPUBufferAtomicNoRtn; def int_amdgcn_global_atomic_fadd : AMDGPUGlobalAtomicNoRtn; // llvm.amdgcn.mfma.f32.* vdst, srcA, srcB, srcC, cbsz, abid, blgp -def int_amdgcn_mfma_f32_32x32x1f32 : Intrinsic<[llvm_v32i32_ty], - [llvm_float_ty, llvm_float_ty, llvm_v32i32_ty, +def int_amdgcn_mfma_f32_32x32x1f32 : Intrinsic<[llvm_v32f32_ty], + [llvm_float_ty, llvm_float_ty, llvm_v32f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; def int_amdgcn_mfma_f32_16x16x1f32 : Intrinsic<[llvm_v16f32_ty], @@ -1683,8 +1683,8 @@ def int_amdgcn_mfma_f32_16x16x4f32 : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; -def int_amdgcn_mfma_f32_32x32x4f16 : Intrinsic<[llvm_v32i32_ty], - [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v32i32_ty, +def int_amdgcn_mfma_f32_32x32x4f16 : Intrinsic<[llvm_v32f32_ty], + [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v32f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; def int_amdgcn_mfma_f32_16x16x4f16 : Intrinsic<[llvm_v16f32_ty], @@ -1723,8 +1723,8 @@ def int_amdgcn_mfma_i32_16x16x16i8 : Intrinsic<[llvm_v4i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; -def int_amdgcn_mfma_f32_32x32x2bf16 : Intrinsic<[llvm_v32i32_ty], - [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v32i32_ty, +def int_amdgcn_mfma_f32_32x32x2bf16 : Intrinsic<[llvm_v32f32_ty], + [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v32f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; def int_amdgcn_mfma_f32_16x16x2bf16 : Intrinsic<[llvm_v16f32_ty], diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 56922b0505064..14ae62968c65b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -165,6 +165,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setOperationAction(ISD::LOAD, MVT::v16f32, Promote); AddPromotedToType(ISD::LOAD, MVT::v16f32, MVT::v16i32); + setOperationAction(ISD::LOAD, MVT::v32f32, Promote); + AddPromotedToType(ISD::LOAD, MVT::v32f32, MVT::v32i32); + setOperationAction(ISD::LOAD, MVT::i64, Promote); AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32); @@ -256,6 +259,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STORE, MVT::v16f32, Promote); AddPromotedToType(ISD::STORE, MVT::v16f32, MVT::v16i32); + setOperationAction(ISD::STORE, MVT::v32f32, Promote); + AddPromotedToType(ISD::STORE, MVT::v32f32, MVT::v32i32); + setOperationAction(ISD::STORE, MVT::i64, Promote); AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32); @@ -355,7 +361,10 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v5i32, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f32, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i32, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16f32, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16i32, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32f32, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32i32, Custom); setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 3eb1b1c91066c..b90a0d28e9ef0 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -153,6 +153,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, if (Subtarget->hasMAIInsts()) { addRegisterClass(MVT::v32i32, &AMDGPU::AReg_1024RegClass); + addRegisterClass(MVT::v32f32, &AMDGPU::AReg_1024RegClass); } computeRegisterProperties(Subtarget->getRegisterInfo()); @@ -263,8 +264,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, // We only support LOAD/STORE and vector manipulation ops for vectors // with > 4 elements. - for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32, - MVT::v2i64, MVT::v2f64, MVT::v4i16, MVT::v4f16, MVT::v32i32 }) { + for (MVT VT : { MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32, + MVT::v2i64, MVT::v2f64, MVT::v4i16, MVT::v4f16, + MVT::v32i32, MVT::v32f32 }) { for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) { switch (Op) { case ISD::LOAD: diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 98928f00a4568..c382c816e0b40 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2178,14 +2178,13 @@ def VOP_I32_V2I16_V2I16_I32 : VOPProfile <[i32, v2i16, v2i16, i32]>; def VOP_V4F32_F32_F32_V4F32 : VOPProfile <[v4f32, f32, f32, v4f32]>; def VOP_V16F32_F32_F32_V16F32 : VOPProfile <[v16f32, f32, f32, v16f32]>; -// TODO: define v32f32 -def VOP_V32F32_F32_F32_V32F32 : VOPProfile <[v32i32, f32, f32, v32i32]>; +def VOP_V32F32_F32_F32_V32F32 : VOPProfile <[v32f32, f32, f32, v32f32]>; def VOP_V4F32_V4F16_V4F16_V4F32 : VOPProfile <[v4f32, v4f16, v4f16, v4f32]>; def VOP_V16F32_V4F16_V4F16_V16F32 : VOPProfile <[v16f32, v4f16, v4f16, v16f32]>; -def VOP_V32F32_V4F16_V4F16_V32F32 : VOPProfile <[v32i32, v4f16, v4f16, v32i32]>; +def VOP_V32F32_V4F16_V4F16_V32F32 : VOPProfile <[v32f32, v4f16, v4f16, v32f32]>; def VOP_V4F32_V2I16_V2I16_V4F32 : VOPProfile <[v4f32, v2i16, v2i16, v4f32]>; def VOP_V16F32_V2I16_V2I16_V16F32 : VOPProfile <[v16f32, v2i16, v2i16, v16f32]>; -def VOP_V32F32_V2I16_V2I16_V32F32 : VOPProfile <[v32i32, v2i16, v2i16, v32i32]>; +def VOP_V32F32_V2I16_V2I16_V32F32 : VOPProfile <[v32f32, v2i16, v2i16, v32f32]>; def VOP_V4I32_I32_I32_V4I32 : VOPProfile <[v4i32, i32, i32, v4i32]>; def VOP_V16I32_I32_I32_V16I32 : VOPProfile <[v16i32, i32, i32, v16i32]>; def VOP_V32I32_I32_I32_V32I32 : VOPProfile <[v32i32, i32, i32, v32i32]>; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index fd4b6f5e3e31d..70f20bb693704 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -942,6 +942,14 @@ foreach Index = 0-31 in { def Insert_Element_v32i32_#Index : Insert_Element < i32, v32i32, Index, !cast(sub#Index) >; + + def Extract_Element_v32f32_#Index : Extract_Element < + f32, v32f32, Index, !cast(sub#Index) + >; + + def Insert_Element_v32f32_#Index : Insert_Element < + f32, v32f32, Index, !cast(sub#Index) + >; } // FIXME: Why do only some of these type combinations for SReg and @@ -1034,6 +1042,10 @@ def : BitConvert ; def : BitConvert ; def : BitConvert ; +// 1024-bit bitcast +def : BitConvert ; +def : BitConvert ; + /********** =================== **********/ /********** Src & Dst modifiers **********/ /********** =================== **********/ diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 14d41d84cbadc..4767f3c30ed32 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -757,11 +757,11 @@ def VRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 3 let isAllocatable = 0; } -def SGPR_1024 : RegisterClass<"AMDGPU", [v32i32], 32, (add SGPR_1024Regs)> { +def SGPR_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32, (add SGPR_1024Regs)> { let AllocationPriority = 19; } -def SReg_1024 : RegisterClass<"AMDGPU", [v32i32], 32, +def SReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32, (add SGPR_1024)> { let CopyCost = 16; let AllocationPriority = 19; @@ -812,7 +812,7 @@ def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add VGPR_512)> { let AllocationPriority = 7; } -def VReg_1024 : RegisterClass<"AMDGPU", [v32i32], 32, (add VGPR_1024)> { +def VReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32, (add VGPR_1024)> { let Size = 1024; let CopyCost = 32; let AllocationPriority = 8; @@ -840,7 +840,7 @@ def AReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add AGPR_512)> { } // TODO: add v32f32 value type -def AReg_1024 : RegisterClass<"AMDGPU", [v32i32], 32, (add AGPR_1024)> { +def AReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32, (add AGPR_1024)> { let Size = 1024; let CopyCost = 65; let AllocationPriority = 8; diff --git a/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll b/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll index ab4fcc54f65c8..dfedd2402a03f 100644 --- a/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll +++ b/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll @@ -1,15 +1,15 @@ ; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -declare <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x i32>, i32, i32, i32) +declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32) ; GCN-LABEL: {{^}}test_32_agprs: ; GCN: v_mfma_f32_32x32x1f32 a[0:31], {{v[0-9]+}}, {{v[0-9]+}}, 0 ; GCN-NOT: v28 ; GCN: NumVgprs: 32 ; GCN: VGPRBlocks: 7 -define amdgpu_kernel void @test_32_agprs(<32 x i32> addrspace(1)* %arg) { +define amdgpu_kernel void @test_32_agprs(<32 x float> addrspace(1)* %arg) { bb: - %mai.1 = tail call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x i32> , i32 0, i32 0, i32 0) - store <32 x i32> %mai.1, <32 x i32> addrspace(1)* %arg + %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> , i32 0, i32 0, i32 0) + store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll index 0ce08777c14b5..5ac03632fbbe0 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll @@ -1,11 +1,11 @@ ; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -declare <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x i32>, i32, i32, i32) +declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32) declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float, float, <16 x float>, i32, i32, i32) declare <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float, float, <4 x float>, i32, i32, i32) declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x2f32(float, float, <16 x float>, i32, i32, i32) declare <4 x float> @llvm.amdgcn.mfma.f32.16x16x4f32(float, float, <4 x float>, i32, i32, i32) -declare <32 x i32> @llvm.amdgcn.mfma.f32.32x32x4f16(<4 x half>, <4 x half>, <32 x i32>, i32, i32, i32) +declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x4f16(<4 x half>, <4 x half>, <32 x float>, i32, i32, i32) declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x4f16(<4 x half>, <4 x half>, <16 x float>, i32, i32, i32) declare <4 x float> @llvm.amdgcn.mfma.f32.4x4x4f16(<4 x half>, <4 x half>, <4 x float>, i32, i32, i32) declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x8f16(<4 x half>, <4 x half>, <16 x float>, i32, i32, i32) @@ -15,7 +15,7 @@ declare <16 x i32> @llvm.amdgcn.mfma.i32.16x16x4i8(i32, i32, <16 x i32>, i32, i3 declare <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32, i32, <4 x i32>, i32, i32, i32) declare <16 x i32> @llvm.amdgcn.mfma.i32.32x32x8i8(i32, i32, <16 x i32>, i32, i32, i32) declare <4 x i32> @llvm.amdgcn.mfma.i32.16x16x16i8(i32, i32, <4 x i32>, i32, i32, i32) -declare <32 x i32> @llvm.amdgcn.mfma.f32.32x32x2bf16(<2 x i16>, <2 x i16>, <32 x i32>, i32, i32, i32) +declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x2bf16(<2 x i16>, <2 x i16>, <32 x float>, i32, i32, i32) declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x2bf16(<2 x i16>, <2 x i16>, <16 x float>, i32, i32, i32) declare <4 x float> @llvm.amdgcn.mfma.f32.4x4x2bf16(<2 x i16>, <2 x i16>, <4 x float>, i32, i32, i32) declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x4bf16(<2 x i16>, <2 x i16>, <16 x float>, i32, i32, i32) @@ -100,11 +100,11 @@ declare i32 @llvm.amdgcn.workitem.id.x() ; GCN-DAG: global_store_dwordx4 ; GCN-DAG: global_store_dwordx4 ; GCN-DAG: global_store_dwordx4 -define amdgpu_kernel void @test_mfma_f32_32x32x1f32(<32 x i32> addrspace(1)* %arg) { +define amdgpu_kernel void @test_mfma_f32_32x32x1f32(<32 x float> addrspace(1)* %arg) { bb: - %in.1 = load <32 x i32>, <32 x i32> addrspace(1)* %arg - %mai.1 = tail call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x i32> %in.1, i32 1, i32 2, i32 3) - store <32 x i32> %mai.1, <32 x i32> addrspace(1)* %arg + %in.1 = load <32 x float>, <32 x float> addrspace(1)* %arg + %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 1, i32 2, i32 3) + store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg ret void } @@ -326,14 +326,14 @@ bb: ; GCN-DAG: global_store_dwordx4 ; GCN-DAG: global_store_dwordx4 ; GCN-DAG: global_store_dwordx4 -define amdgpu_kernel void @test_mfma_f32_32x32x4f16(<32 x i32> addrspace(1)* %arg, <4 x half> addrspace(1)* %c) { +define amdgpu_kernel void @test_mfma_f32_32x32x4f16(<32 x float> addrspace(1)* %arg, <4 x half> addrspace(1)* %c) { bb: - %in.1 = load <32 x i32>, <32 x i32> addrspace(1)* %arg + %in.1 = load <32 x float>, <32 x float> addrspace(1)* %arg %c.1 = load <4 x half>, <4 x half> addrspace(1)* %c %c2p = getelementptr <4 x half>, <4 x half> addrspace(1)* %c, i64 1 %c.2 = load <4 x half>, <4 x half> addrspace(1)* %c2p - %mai.1 = tail call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x4f16(<4 x half> %c.1, <4 x half> %c.2, <32 x i32> %in.1, i32 1, i32 2, i32 3) - store <32 x i32> %mai.1, <32 x i32> addrspace(1)* %arg + %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x4f16(<4 x half> %c.1, <4 x half> %c.2, <32 x float> %in.1, i32 1, i32 2, i32 3) + store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg ret void } @@ -794,13 +794,13 @@ bb: ; GCN-DAG: global_store_dwordx4 ; GCN-DAG: global_store_dwordx4 ; GCN-DAG: global_store_dwordx4 -define amdgpu_kernel void @test_mfma_f32_32x32x2bf16(<32 x i32> addrspace(1)* %arg) { +define amdgpu_kernel void @test_mfma_f32_32x32x2bf16(<32 x float> addrspace(1)* %arg) { bb: - %in.1 = load <32 x i32>, <32 x i32> addrspace(1)* %arg + %in.1 = load <32 x float>, <32 x float> addrspace(1)* %arg %a = bitcast i32 1 to <2 x i16> %b = bitcast i32 2 to <2 x i16> - %mai.1 = tail call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x2bf16(<2 x i16> %a, <2 x i16> %b, <32 x i32> %in.1, i32 1, i32 2, i32 3) - store <32 x i32> %mai.1, <32 x i32> addrspace(1)* %arg + %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x2bf16(<2 x i16> %a, <2 x i16> %b, <32 x float> %in.1, i32 1, i32 2, i32 3) + store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg ret void } @@ -957,12 +957,12 @@ bb: ; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_forward_acc: ; GCN: v_mfma_f32_32x32x1f32 [[MAI1:a\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v{{[0-9]+}}, a[{{[0-9]+:[0-9]+}}] ; GCN-NEXT: v_mfma_f32_32x32x1f32 a[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v{{[0-9]+}}, [[MAI1]] -define amdgpu_kernel void @test_mfma_f32_32x32x1f32_forward_acc(<32 x i32> addrspace(1)* %arg) { +define amdgpu_kernel void @test_mfma_f32_32x32x1f32_forward_acc(<32 x float> addrspace(1)* %arg) { bb: - %in.1 = load <32 x i32>, <32 x i32> addrspace(1)* %arg - %mai.1 = tail call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x i32> %in.1, i32 0, i32 0, i32 0) - %mai.2 = tail call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x i32> %mai.1, i32 0, i32 0, i32 0) - store <32 x i32> %mai.2, <32 x i32> addrspace(1)* %arg + %in.1 = load <32 x float>, <32 x float> addrspace(1)* %arg + %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 0, i32 0, i32 0) + %mai.2 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %mai.1, i32 0, i32 0, i32 0) + store <32 x float> %mai.2, <32 x float> addrspace(1)* %arg ret void } @@ -1112,10 +1112,10 @@ bb: ; GCN-DAG: global_store_dwordx4 ; GCN-DAG: global_store_dwordx4 ; GCN-DAG: global_store_dwordx4 -define amdgpu_kernel void @test_mfma_f32_32x32x1f32_imm_splat(<32 x i32> addrspace(1)* %arg) { +define amdgpu_kernel void @test_mfma_f32_32x32x1f32_imm_splat(<32 x float> addrspace(1)* %arg) { bb: - %mai.1 = tail call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x i32> , i32 0, i32 0, i32 0) - store <32 x i32> %mai.1, <32 x i32> addrspace(1)* %arg + %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> , i32 0, i32 0, i32 0) + store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg ret void } @@ -1184,7 +1184,7 @@ bb: ; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_imm: ; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0 -; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 1 +; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 1.0 ; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0 ; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0 ; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0 @@ -1256,10 +1256,10 @@ bb: ; GCN-DAG: global_store_dwordx4 ; GCN-DAG: global_store_dwordx4 ; GCN-DAG: global_store_dwordx4 -define amdgpu_kernel void @test_mfma_f32_32x32x1f32_imm(<32 x i32> addrspace(1)* %arg) { +define amdgpu_kernel void @test_mfma_f32_32x32x1f32_imm(<32 x float> addrspace(1)* %arg) { bb: - %mai.1 = tail call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x i32> , i32 0, i32 0, i32 0) - store <32 x i32> %mai.1, <32 x i32> addrspace(1)* %arg + %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> , i32 0, i32 0, i32 0) + store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg ret void } @@ -1350,12 +1350,12 @@ bb: ; GCN-DAG: global_store_dwordx4 ; GCN-DAG: global_store_dwordx4 ; GCN-DAG: global_store_dwordx4 -define amdgpu_kernel void @test_mfma_f32_32x32x1f32_vecarg(<32 x i32> addrspace(1)* %arg) { +define amdgpu_kernel void @test_mfma_f32_32x32x1f32_vecarg(<32 x float> addrspace(1)* %arg) { bb: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds <32 x i32>, <32 x i32> addrspace(1)* %arg, i32 %tid - %in.1 = load <32 x i32>, <32 x i32> addrspace(1)* %gep - %mai.1 = tail call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x i32> %in.1, i32 1, i32 2, i32 3) - store <32 x i32> %mai.1, <32 x i32> addrspace(1)* %gep + %gep = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %arg, i32 %tid + %in.1 = load <32 x float>, <32 x float> addrspace(1)* %gep + %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 1, i32 2, i32 3) + store <32 x float> %mai.1, <32 x float> addrspace(1)* %gep ret void } diff --git a/llvm/test/CodeGen/AMDGPU/spill-agpr.ll b/llvm/test/CodeGen/AMDGPU/spill-agpr.ll index b12a7bc72a819..9c7279a78e75e 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-agpr.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-agpr.ll @@ -84,23 +84,23 @@ define amdgpu_kernel void @max_10_vgprs_used_9a(i32 addrspace(1)* %p) #1 { ; A2M: buffer_load_dword v[[VSPILL:[0-9]+]], off, s[{{[0-9:]+}}], s{{[0-9]+}} offset:[[FI]] ; 4-byte Folded Reload ; GFX908: v_accvgpr_write_b32 a{{[0-9]+}}, v[[VSPILL]] ; A2V: ScratchSize: 0 -define amdgpu_kernel void @max_32regs_mfma32(i32 addrspace(1)* %arg) #3 { +define amdgpu_kernel void @max_32regs_mfma32(float addrspace(1)* %arg) #3 { bb: %v = call i32 asm sideeffect "", "=a"() br label %use use: - %mai.1 = tail call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 1.0, <32 x i32> , i32 0, i32 0, i32 0) + %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 1.0, <32 x float> , i32 0, i32 0, i32 0) call void asm sideeffect "", "a"(i32 %v) - %elt1 = extractelement <32 x i32> %mai.1, i32 0 - store i32 %elt1, i32 addrspace(1)* %arg + %elt1 = extractelement <32 x float> %mai.1, i32 0 + store float %elt1, float addrspace(1)* %arg ret void } declare i32 @llvm.amdgcn.workitem.id.x() declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float, float, <16 x float>, i32, i32, i32) declare <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float, float, <4 x float>, i32, i32, i32) -declare <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x i32>, i32, i32, i32) +declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32) attributes #0 = { nounwind "amdgpu-num-vgpr"="24" } attributes #1 = { nounwind "amdgpu-num-vgpr"="8" } diff --git a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll index b101e41833b8e..6eef782d1906d 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll @@ -233,19 +233,23 @@ define amdgpu_kernel void @max_256_vgprs_spill_9x32(<32 x float> addrspace(1)* % ret void } +; FIXME: adding an AReg_1024 register class for v32f32 and v32i32 +; produces unnecessary copies and we still have some amount +; of conventional spilling. + ; GCN-LABEL: {{^}}max_256_vgprs_spill_9x32_2bb: ; GFX900-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0 ; GFX900-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1 -; GFX908-NOT: SCRATCH_RSRC +; GFX908-FIXME-NOT: SCRATCH_RSRC ; GFX908-DAG: v_accvgpr_write_b32 a0, v ; GFX900: buffer_store_dword v ; GFX900: buffer_load_dword v -; GFX908-NOT: buffer_ +; GFX908-FIXME-NOT: buffer_ ; GFX908-DAG v_accvgpr_read_b32 ; GCN: NumVgprs: 256 ; GFX900: ScratchSize: 580 -; GFX908: ScratchSize: 0 +; GFX908-FIXME: ScratchSize: 0 ; GCN: VGPRBlocks: 63 ; GCN: NumVGPRsForWavesPerEU: 256 define amdgpu_kernel void @max_256_vgprs_spill_9x32_2bb(<32 x float> addrspace(1)* %p) { From 882fdf68b74d3199cb84b062709b702ed610f547 Mon Sep 17 00:00:00 2001 From: Eric Fiselier Date: Fri, 12 Jul 2019 23:01:48 +0000 Subject: [PATCH 010/451] Fix non-conformance it `std::tuple`. Previously we implemented all one trillion tuple-like constructors using a single generic overload. This worked fairly well, except that it differed in behavior from the standard version because it didn't consider both T&& and T const&. This was observable for certain types. This patch addresses that issue by splitting the generic constructor in two. We now provide both T&& and T const& versions of the tuple-like constructors (sort of). llvm-svn: 365973 --- libcxx/include/tuple | 51 +++++++++++-------- .../tuple.cnstr/convert_copy.pass.cpp | 16 ++++++ 2 files changed, 47 insertions(+), 20 deletions(-) diff --git a/libcxx/include/tuple b/libcxx/include/tuple index de30e86c72b48..031d25a9854fc 100644 --- a/libcxx/include/tuple +++ b/libcxx/include/tuple @@ -601,6 +601,25 @@ class _LIBCPP_TEMPLATE_VIS tuple } }; + template + using _EnableImplicitTupleLikeConstructor = _EnableIf< + _CheckTupleLikeConstructor< + __tuple_like_with_size<_Tuple, sizeof...(_Tp)>::value + && !_PackExpandsToThisTuple<_Tuple>::value + && (!is_lvalue_reference<_Tuple>::value || !_DisableIfLValue) + >::template __enable_implicit<_Tuple>(), + bool + >; + + template + using _EnableExplicitTupleLikeConstructor = _EnableIf< + _CheckTupleLikeConstructor< + __tuple_like_with_size<_Tuple, sizeof...(_Tp)>::value + && !_PackExpandsToThisTuple<_Tuple>::value + && (!is_lvalue_reference<_Tuple>::value || !_DisableIfLValue) + >::template __enable_explicit<_Tuple>(), + bool + >; template friend _LIBCPP_CONSTEXPR_AFTER_CXX11 typename tuple_element<_Jp, tuple<_Up...> >::type& get(tuple<_Up...>&) _NOEXCEPT; template friend _LIBCPP_CONSTEXPR_AFTER_CXX11 @@ -815,35 +834,27 @@ public: typename __make_tuple_types::type(), _VSTD::forward<_Up>(__u)...) {} - template ::value - && !_PackExpandsToThisTuple<_Tuple>::value - >::template __enable_implicit<_Tuple>(), - bool - >::type = false - > + template = false> _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 tuple(_Tuple&& __t) _NOEXCEPT_((is_nothrow_constructible<_BaseT, _Tuple>::value)) : __base_(_VSTD::forward<_Tuple>(__t)) {} - template ::value - && !_PackExpandsToThisTuple<_Tuple>::value - >::template __enable_explicit<_Tuple>(), - bool - >::type = false - > + template = false> + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 + tuple(const _Tuple& __t) _NOEXCEPT_((is_nothrow_constructible<_BaseT, const _Tuple&>::value)) + : __base_(__t) {} + template = false> _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 explicit tuple(_Tuple&& __t) _NOEXCEPT_((is_nothrow_constructible<_BaseT, _Tuple>::value)) : __base_(_VSTD::forward<_Tuple>(__t)) {} + template = false> + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX11 + explicit + tuple(const _Tuple& __t) _NOEXCEPT_((is_nothrow_constructible<_BaseT, const _Tuple&>::value)) + : __base_(__t) {} + template ::value>::type> + explicit ExplicitTwo(T) {} +}; + struct B { int id_; @@ -136,6 +145,13 @@ int main(int, char**) std::tuple t2 = t1; assert(std::get<0>(t2).value == 42); } + { + static_assert(std::is_convertible::value, ""); + static_assert(std::is_convertible&&, const std::tuple&>::value, ""); + ExplicitTwo e; + std::tuple t = std::tuple(std::move(e)); + ((void)t); + } return 0; } From 51a52b58930cd1bb2351bf7017adfd55073f6553 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Fri, 12 Jul 2019 23:30:55 +0000 Subject: [PATCH 011/451] PDB HashTable: Move TraitsT from class parameter to the methods that need it The traits object is only used by a few methods. Deserializing a hash table and walking it is possible without the traits object, so it shouldn't be required to build a dummy object for that use case. The TraitsT object used to be a function template parameter before r327647, this restores it to that state. This makes it clear that the traits object isn't needed at all in 1 of the current 3 uses of HashTable (and I am going to add another use that doesn't need it), and that the default PdbHashTraits isn't used outside of tests. While here, also re-enable 3 checks in the test that were commented out (which requires making HashTableInternals templated and giving FooBar an operator==). No intended behavior change. Differential Revision: https://reviews.llvm.org/D64640 llvm-svn: 365974 --- .../llvm/DebugInfo/PDB/Native/HashTable.h | 63 +++++----- .../DebugInfo/PDB/Native/NamedStreamMap.h | 2 +- .../DebugInfo/PDB/Native/PDBFileBuilder.h | 2 +- .../DebugInfo/PDB/Native/NamedStreamMap.cpp | 7 +- .../DebugInfo/PDB/Native/PDBFileBuilder.cpp | 5 +- .../unittests/DebugInfo/PDB/HashTableTest.cpp | 119 ++++++++++-------- 6 files changed, 102 insertions(+), 96 deletions(-) diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h b/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h index 86c43a482b820..b00873b575b20 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h @@ -31,21 +31,21 @@ namespace pdb { Error readSparseBitVector(BinaryStreamReader &Stream, SparseBitVector<> &V); Error writeSparseBitVector(BinaryStreamWriter &Writer, SparseBitVector<> &Vec); -template class HashTable; +template class HashTable; -template +template class HashTableIterator - : public iterator_facade_base, + : public iterator_facade_base, std::forward_iterator_tag, std::pair> { - friend HashTable; + friend HashTable; - HashTableIterator(const HashTable &Map, uint32_t Index, + HashTableIterator(const HashTable &Map, uint32_t Index, bool IsEnd) : Map(&Map), Index(Index), IsEnd(IsEnd) {} public: - HashTableIterator(const HashTable &Map) : Map(&Map) { + HashTableIterator(const HashTable &Map) : Map(&Map) { int I = Map.Present.find_first(); if (I == -1) { Index = 0; @@ -87,22 +87,14 @@ class HashTableIterator bool isEnd() const { return IsEnd; } uint32_t index() const { return Index; } - const HashTable *Map; + const HashTable *Map; uint32_t Index; bool IsEnd; }; -template struct PdbHashTraits {}; - -template <> struct PdbHashTraits { - uint32_t hashLookupKey(uint32_t N) const { return N; } - uint32_t storageKeyToLookupKey(uint32_t N) const { return N; } - uint32_t lookupKeyToStorageKey(uint32_t N) { return N; } -}; - -template > +template class HashTable { - using iterator = HashTableIterator; + using iterator = HashTableIterator; friend iterator; struct Header { @@ -114,9 +106,7 @@ class HashTable { public: HashTable() { Buckets.resize(8); } - - explicit HashTable(TraitsT Traits) : HashTable(8, std::move(Traits)) {} - HashTable(uint32_t Capacity, TraitsT Traits) : Traits(Traits) { + explicit HashTable(uint32_t Capacity) { Buckets.resize(Capacity); } @@ -221,7 +211,8 @@ class HashTable { /// Find the entry whose key has the specified hash value, using the specified /// traits defining hash function and equality. - template iterator find_as(const Key &K) const { + template + iterator find_as(const Key &K, TraitsT &Traits) const { uint32_t H = Traits.hashLookupKey(K) % capacity(); uint32_t I = H; Optional FirstUnused; @@ -252,12 +243,14 @@ class HashTable { /// Set the entry using a key type that the specified Traits can convert /// from a real key to an internal key. - template bool set_as(const Key &K, ValueT V) { - return set_as_internal(K, std::move(V), None); + template + bool set_as(const Key &K, ValueT V, TraitsT &Traits) { + return set_as_internal(K, std::move(V), Traits, None); } - template ValueT get(const Key &K) const { - auto Iter = find_as(K); + template + ValueT get(const Key &K, TraitsT &Traits) const { + auto Iter = find_as(K, Traits); assert(Iter != end()); return (*Iter).second; } @@ -266,7 +259,6 @@ class HashTable { bool isPresent(uint32_t K) const { return Present.test(K); } bool isDeleted(uint32_t K) const { return Deleted.test(K); } - TraitsT Traits; BucketList Buckets; mutable SparseBitVector<> Present; mutable SparseBitVector<> Deleted; @@ -274,9 +266,10 @@ class HashTable { private: /// Set the entry using a key type that the specified Traits can convert /// from a real key to an internal key. - template - bool set_as_internal(const Key &K, ValueT V, Optional InternalKey) { - auto Entry = find_as(K); + template + bool set_as_internal(const Key &K, ValueT V, TraitsT &Traits, + Optional InternalKey) { + auto Entry = find_as(K, Traits); if (Entry != end()) { assert(isPresent(Entry.index())); assert(Traits.storageKeyToLookupKey(Buckets[Entry.index()].first) == K); @@ -293,15 +286,16 @@ class HashTable { Present.set(Entry.index()); Deleted.reset(Entry.index()); - grow(); + grow(Traits); - assert((find_as(K)) != end()); + assert((find_as(K, Traits)) != end()); return true; } static uint32_t maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; } - void grow() { + template + void grow(TraitsT &Traits) { uint32_t S = size(); uint32_t MaxLoad = maxLoad(capacity()); if (S < maxLoad(capacity())) @@ -313,10 +307,11 @@ class HashTable { // Growing requires rebuilding the table and re-hashing every item. Make a // copy with a larger capacity, insert everything into the copy, then swap // it in. - HashTable NewMap(NewCapacity, Traits); + HashTable NewMap(NewCapacity); for (auto I : Present) { auto LookupKey = Traits.storageKeyToLookupKey(Buckets[I].first); - NewMap.set_as_internal(LookupKey, Buckets[I].second, Buckets[I].first); + NewMap.set_as_internal(LookupKey, Buckets[I].second, Traits, + Buckets[I].first); } Buckets.swap(NewMap.Buckets); diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h b/llvm/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h index c49d796356c7b..1df059ffa9fda 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h @@ -59,7 +59,7 @@ class NamedStreamMap { NamedStreamMapTraits HashTraits; /// Closed hash table from Offset -> StreamNumber, where Offset is the offset /// of the stream name in NamesBuffer. - HashTable OffsetIndexMap; + HashTable OffsetIndexMap; /// Buffer of string data. std::vector NamesBuffer; diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h b/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h index 72000bdc011ac..2abaa5f4cdc47 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h @@ -97,7 +97,7 @@ class PDBFileBuilder { PDBStringTableBuilder Strings; StringTableHashTraits InjectedSourceHashTraits; - HashTable InjectedSourceTable; + HashTable InjectedSourceTable; SmallVector InjectedSources; diff --git a/llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp b/llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp index 1c044e0c26538..4a88391494cd2 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp @@ -46,8 +46,7 @@ uint32_t NamedStreamMapTraits::lookupKeyToStorageKey(StringRef S) { return NS->appendStringData(S); } -NamedStreamMap::NamedStreamMap() - : HashTraits(*this), OffsetIndexMap(1, HashTraits) {} +NamedStreamMap::NamedStreamMap() : HashTraits(*this), OffsetIndexMap(1) {} Error NamedStreamMap::load(BinaryStreamReader &Stream) { uint32_t StringBufferSize; @@ -99,7 +98,7 @@ uint32_t NamedStreamMap::hashString(uint32_t Offset) const { } bool NamedStreamMap::get(StringRef Stream, uint32_t &StreamNo) const { - auto Iter = OffsetIndexMap.find_as(Stream); + auto Iter = OffsetIndexMap.find_as(Stream, HashTraits); if (Iter == OffsetIndexMap.end()) return false; StreamNo = (*Iter).second; @@ -123,5 +122,5 @@ uint32_t NamedStreamMap::appendStringData(StringRef S) { } void NamedStreamMap::set(StringRef Stream, uint32_t StreamNo) { - OffsetIndexMap.set_as(Stream, support::ulittle32_t(StreamNo)); + OffsetIndexMap.set_as(Stream, support::ulittle32_t(StreamNo), HashTraits); } diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp index 84eb4fbbfa631..8f5a048ea4b56 100644 --- a/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp @@ -34,7 +34,7 @@ using namespace llvm::support; PDBFileBuilder::PDBFileBuilder(BumpPtrAllocator &Allocator) : Allocator(Allocator), InjectedSourceHashTraits(Strings), - InjectedSourceTable(2, InjectedSourceHashTraits) {} + InjectedSourceTable(2) {} PDBFileBuilder::~PDBFileBuilder() {} @@ -189,7 +189,8 @@ Error PDBFileBuilder::finalizeMsfLayout() { static_cast(PdbRaw_SrcHeaderBlockVer::SrcVerOne); Entry.CRC = CRC.getCRC(); StringRef VName = getStringTableBuilder().getStringForId(IS.VNameIndex); - InjectedSourceTable.set_as(VName, std::move(Entry)); + InjectedSourceTable.set_as(VName, std::move(Entry), + InjectedSourceHashTraits); } uint32_t SrcHeaderBlockSize = diff --git a/llvm/unittests/DebugInfo/PDB/HashTableTest.cpp b/llvm/unittests/DebugInfo/PDB/HashTableTest.cpp index 4ebde45ff9a62..5f0695bc4cb22 100644 --- a/llvm/unittests/DebugInfo/PDB/HashTableTest.cpp +++ b/llvm/unittests/DebugInfo/PDB/HashTableTest.cpp @@ -27,27 +27,35 @@ using namespace llvm::support; namespace { -class HashTableInternals : public HashTable { +struct IdentityHashTraits { + uint32_t hashLookupKey(uint32_t N) const { return N; } + uint32_t storageKeyToLookupKey(uint32_t N) const { return N; } + uint32_t lookupKeyToStorageKey(uint32_t N) { return N; } +}; + +template +class HashTableInternals : public HashTable { public: - using HashTable::Buckets; - using HashTable::Present; - using HashTable::Deleted; + using HashTable::Buckets; + using HashTable::Present; + using HashTable::Deleted; }; } TEST(HashTableTest, TestSimple) { - HashTableInternals Table; + HashTableInternals<> Table; EXPECT_EQ(0u, Table.size()); EXPECT_GT(Table.capacity(), 0u); - Table.set_as(3u, 7); + IdentityHashTraits Traits; + Table.set_as(3u, 7, Traits); EXPECT_EQ(1u, Table.size()); - ASSERT_NE(Table.end(), Table.find_as(3u)); - EXPECT_EQ(7u, Table.get(3u)); + ASSERT_NE(Table.end(), Table.find_as(3u, Traits)); + EXPECT_EQ(7u, Table.get(3u, Traits)); } TEST(HashTableTest, TestCollision) { - HashTableInternals Table; + HashTableInternals<> Table; EXPECT_EQ(0u, Table.size()); EXPECT_GT(Table.capacity(), 0u); @@ -57,33 +65,35 @@ TEST(HashTableTest, TestCollision) { uint32_t N1 = Table.capacity() + 1; uint32_t N2 = 2 * N1; - Table.set_as(N1, 7); - Table.set_as(N2, 12); + IdentityHashTraits Traits; + Table.set_as(N1, 7, Traits); + Table.set_as(N2, 12, Traits); EXPECT_EQ(2u, Table.size()); - ASSERT_NE(Table.end(), Table.find_as(N1)); - ASSERT_NE(Table.end(), Table.find_as(N2)); + ASSERT_NE(Table.end(), Table.find_as(N1, Traits)); + ASSERT_NE(Table.end(), Table.find_as(N2, Traits)); - EXPECT_EQ(7u, Table.get(N1)); - EXPECT_EQ(12u, Table.get(N2)); + EXPECT_EQ(7u, Table.get(N1, Traits)); + EXPECT_EQ(12u, Table.get(N2, Traits)); } TEST(HashTableTest, TestRemove) { - HashTableInternals Table; + HashTableInternals<> Table; EXPECT_EQ(0u, Table.size()); EXPECT_GT(Table.capacity(), 0u); - Table.set_as(1u, 2); - Table.set_as(3u, 4); + IdentityHashTraits Traits; + Table.set_as(1u, 2, Traits); + Table.set_as(3u, 4, Traits); EXPECT_EQ(2u, Table.size()); - ASSERT_NE(Table.end(), Table.find_as(1u)); - ASSERT_NE(Table.end(), Table.find_as(3u)); + ASSERT_NE(Table.end(), Table.find_as(1u, Traits)); + ASSERT_NE(Table.end(), Table.find_as(3u, Traits)); - EXPECT_EQ(2u, Table.get(1u)); - EXPECT_EQ(4u, Table.get(3u)); + EXPECT_EQ(2u, Table.get(1u, Traits)); + EXPECT_EQ(4u, Table.get(3u, Traits)); } TEST(HashTableTest, TestCollisionAfterMultipleProbes) { - HashTableInternals Table; + HashTableInternals<> Table; EXPECT_EQ(0u, Table.size()); EXPECT_GT(Table.capacity(), 0u); @@ -94,17 +104,18 @@ TEST(HashTableTest, TestCollisionAfterMultipleProbes) { uint32_t N2 = N1 + 1; uint32_t N3 = 2 * N1; - Table.set_as(N1, 7); - Table.set_as(N2, 11); - Table.set_as(N3, 13); + IdentityHashTraits Traits; + Table.set_as(N1, 7, Traits); + Table.set_as(N2, 11, Traits); + Table.set_as(N3, 13, Traits); EXPECT_EQ(3u, Table.size()); - ASSERT_NE(Table.end(), Table.find_as(N1)); - ASSERT_NE(Table.end(), Table.find_as(N2)); - ASSERT_NE(Table.end(), Table.find_as(N3)); + ASSERT_NE(Table.end(), Table.find_as(N1, Traits)); + ASSERT_NE(Table.end(), Table.find_as(N2, Traits)); + ASSERT_NE(Table.end(), Table.find_as(N3, Traits)); - EXPECT_EQ(7u, Table.get(N1)); - EXPECT_EQ(11u, Table.get(N2)); - EXPECT_EQ(13u, Table.get(N3)); + EXPECT_EQ(7u, Table.get(N1, Traits)); + EXPECT_EQ(11u, Table.get(N2, Traits)); + EXPECT_EQ(13u, Table.get(N3, Traits)); } TEST(HashTableTest, Grow) { @@ -112,24 +123,26 @@ TEST(HashTableTest, Grow) { // guaranteed to trigger a grow. Then verify that the size is the same, the // capacity is larger, and all the original items are still in the table. - HashTableInternals Table; + HashTableInternals<> Table; + IdentityHashTraits Traits; uint32_t OldCapacity = Table.capacity(); for (uint32_t I = 0; I < OldCapacity; ++I) { - Table.set_as(OldCapacity + I * 2 + 1, I * 2 + 3); + Table.set_as(OldCapacity + I * 2 + 1, I * 2 + 3, Traits); } EXPECT_EQ(OldCapacity, Table.size()); EXPECT_GT(Table.capacity(), OldCapacity); for (uint32_t I = 0; I < OldCapacity; ++I) { - ASSERT_NE(Table.end(), Table.find_as(OldCapacity + I * 2 + 1)); - EXPECT_EQ(I * 2 + 3, Table.get(OldCapacity + I * 2 + 1)); + ASSERT_NE(Table.end(), Table.find_as(OldCapacity + I * 2 + 1, Traits)); + EXPECT_EQ(I * 2 + 3, Table.get(OldCapacity + I * 2 + 1, Traits)); } } TEST(HashTableTest, Serialization) { - HashTableInternals Table; + HashTableInternals<> Table; + IdentityHashTraits Traits; uint32_t Cap = Table.capacity(); for (uint32_t I = 0; I < Cap; ++I) { - Table.set_as(Cap + I * 2 + 1, I * 2 + 3); + Table.set_as(Cap + I * 2 + 1, I * 2 + 3, Traits); } std::vector Buffer(Table.calculateSerializedLength()); @@ -139,7 +152,7 @@ TEST(HashTableTest, Serialization) { // We should have written precisely the number of bytes we calculated earlier. EXPECT_EQ(Buffer.size(), Writer.getOffset()); - HashTableInternals Table2; + HashTableInternals<> Table2; BinaryStreamReader Reader(Stream); EXPECT_THAT_ERROR(Table2.load(Reader), Succeeded()); // We should have read precisely the number of bytes we calculated earlier. @@ -192,20 +205,19 @@ TEST(HashTableTest, NamedStreamMap) { } while (std::next_permutation(Streams.begin(), Streams.end())); } -namespace { struct FooBar { uint32_t X; uint32_t Y; -}; -} // namespace + bool operator==(const FooBar &RHS) const { + return X == RHS.X && Y == RHS.Y; + } +}; -namespace llvm { -namespace pdb { -template <> struct PdbHashTraits { +struct FooBarHashTraits { std::vector Buffer; - PdbHashTraits() { Buffer.push_back(0); } + FooBarHashTraits() { Buffer.push_back(0); } uint32_t hashLookupKey(StringRef S) const { return llvm::pdb::hashStringV1(S); @@ -225,17 +237,16 @@ template <> struct PdbHashTraits { return N; } }; -} // namespace pdb -} // namespace llvm TEST(HashTableTest, NonTrivialValueType) { - HashTable Table; + HashTableInternals Table; + FooBarHashTraits Traits; uint32_t Cap = Table.capacity(); for (uint32_t I = 0; I < Cap; ++I) { FooBar F; F.X = I; F.Y = I + 1; - Table.set_as(utostr(I), F); + Table.set_as(utostr(I), F, Traits); } std::vector Buffer(Table.calculateSerializedLength()); @@ -245,7 +256,7 @@ TEST(HashTableTest, NonTrivialValueType) { // We should have written precisely the number of bytes we calculated earlier. EXPECT_EQ(Buffer.size(), Writer.getOffset()); - HashTable Table2; + HashTableInternals Table2; BinaryStreamReader Reader(Stream); EXPECT_THAT_ERROR(Table2.load(Reader), Succeeded()); // We should have read precisely the number of bytes we calculated earlier. @@ -253,7 +264,7 @@ TEST(HashTableTest, NonTrivialValueType) { EXPECT_EQ(Table.size(), Table2.size()); EXPECT_EQ(Table.capacity(), Table2.capacity()); - // EXPECT_EQ(Table.Buckets, Table2.Buckets); - // EXPECT_EQ(Table.Present, Table2.Present); - // EXPECT_EQ(Table.Deleted, Table2.Deleted); + EXPECT_EQ(Table.Buckets, Table2.Buckets); + EXPECT_EQ(Table.Present, Table2.Present); + EXPECT_EQ(Table.Deleted, Table2.Deleted); } From 5d9d7c59ee3353675ec89653c3a2b0175305d59a Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Fri, 12 Jul 2019 23:38:31 +0000 Subject: [PATCH 012/451] Re-land [JSONCompilationDatabase] Strip distcc/ccache/gomacc wrappers from parsed commands. Use //net/dir like other test cases for windows compatibility llvm-svn: 365975 --- clang/lib/Tooling/JSONCompilationDatabase.cpp | 50 +++++++++++++++++-- .../Tooling/CompilationDatabaseTest.cpp | 27 ++++++++++ 2 files changed, 73 insertions(+), 4 deletions(-) diff --git a/clang/lib/Tooling/JSONCompilationDatabase.cpp b/clang/lib/Tooling/JSONCompilationDatabase.cpp index 76a82b0fd9bd3..f19a0f7550b96 100644 --- a/clang/lib/Tooling/JSONCompilationDatabase.cpp +++ b/clang/lib/Tooling/JSONCompilationDatabase.cpp @@ -256,15 +256,57 @@ JSONCompilationDatabase::getAllCompileCommands() const { return Commands; } +static llvm::StringRef stripExecutableExtension(llvm::StringRef Name) { + Name.consume_back(".exe"); + return Name; +} + +// There are compiler-wrappers (ccache, distcc, gomacc) that take the "real" +// compiler as an argument, e.g. distcc gcc -O3 foo.c. +// These end up in compile_commands.json when people set CC="distcc gcc". +// Clang's driver doesn't understand this, so we need to unwrap. +static bool unwrapCommand(std::vector &Args) { + if (Args.size() < 2) + return false; + StringRef Wrapper = + stripExecutableExtension(llvm::sys::path::filename(Args.front())); + if (Wrapper == "distcc" || Wrapper == "gomacc" || Wrapper == "ccache") { + // Most of these wrappers support being invoked 3 ways: + // `distcc g++ file.c` This is the mode we're trying to match. + // We need to drop `distcc`. + // `distcc file.c` This acts like compiler is cc or similar. + // Clang's driver can handle this, no change needed. + // `g++ file.c` g++ is a symlink to distcc. + // We don't even notice this case, and all is well. + // + // We need to distinguish between the first and second case. + // The wrappers themselves don't take flags, so Args[1] is a compiler flag, + // an input file, or a compiler. Inputs have extensions, compilers don't. + bool HasCompiler = + (Args[1][0] != '-') && + !llvm::sys::path::has_extension(stripExecutableExtension(Args[1])); + if (HasCompiler) { + Args.erase(Args.begin()); + return true; + } + // If !HasCompiler, wrappers act like GCC. Fine: so do we. + } + return false; +} + static std::vector nodeToCommandLine(JSONCommandLineSyntax Syntax, const std::vector &Nodes) { SmallString<1024> Storage; - if (Nodes.size() == 1) - return unescapeCommandLine(Syntax, Nodes[0]->getValue(Storage)); std::vector Arguments; - for (const auto *Node : Nodes) - Arguments.push_back(Node->getValue(Storage)); + if (Nodes.size() == 1) + Arguments = unescapeCommandLine(Syntax, Nodes[0]->getValue(Storage)); + else + for (const auto *Node : Nodes) + Arguments.push_back(Node->getValue(Storage)); + // There may be multiple wrappers: using distcc and ccache together is common. + while (unwrapCommand(Arguments)) + ; return Arguments; } diff --git a/clang/unittests/Tooling/CompilationDatabaseTest.cpp b/clang/unittests/Tooling/CompilationDatabaseTest.cpp index da7ae09917dfa..fde95445bdab0 100644 --- a/clang/unittests/Tooling/CompilationDatabaseTest.cpp +++ b/clang/unittests/Tooling/CompilationDatabaseTest.cpp @@ -370,6 +370,33 @@ TEST(findCompileArgsInJsonDatabase, FindsEntry) { EXPECT_EQ("command4", FoundCommand.CommandLine[0]) << ErrorMessage; } +TEST(findCompileArgsInJsonDatabase, ParsesCompilerWrappers) { + StringRef Directory("//net/dir"); + StringRef FileName("//net/dir/filename"); + std::vector> Cases = { + {"distcc gcc foo.c", "gcc foo.c"}, + {"gomacc clang++ foo.c", "clang++ foo.c"}, + {"ccache gcc foo.c", "gcc foo.c"}, + {"ccache.exe gcc foo.c", "gcc foo.c"}, + {"ccache g++.exe foo.c", "g++.exe foo.c"}, + {"ccache distcc gcc foo.c", "gcc foo.c"}, + + {"distcc foo.c", "distcc foo.c"}, + {"distcc -I/foo/bar foo.c", "distcc -I/foo/bar foo.c"}, + }; + std::string ErrorMessage; + + for (const auto &Case : Cases) { + std::string DB = + R"([{"directory":"//net/dir", "file":"//net/dir/foo.c", "command":")" + + Case.first + "\"}]"; + CompileCommand FoundCommand = + findCompileArgsInJsonDatabase("//net/dir/foo.c", DB, ErrorMessage); + EXPECT_EQ(Case.second, llvm::join(FoundCommand.CommandLine, " ")) + << Case.first; + } +} + static std::vector unescapeJsonCommandLine(StringRef Command) { std::string JsonDatabase = ("[{\"directory\":\"//net/root\", \"file\":\"test\", \"command\": \"" + From 4765aa14ff429db9301047296c9bd8e201bcb3a2 Mon Sep 17 00:00:00 2001 From: Jan Korous Date: Sat, 13 Jul 2019 00:09:04 +0000 Subject: [PATCH 013/451] [DirectoryWatcher][test][NFC] Add information to test failure reports llvm-svn: 365976 --- .../DirectoryWatcher/DirectoryWatcherTest.cpp | 27 +++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp b/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp index 0808ff47dee89..72bc86d4493cf 100644 --- a/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp +++ b/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp @@ -97,7 +97,9 @@ std::string eventKindToString(const DirectoryWatcher::Event::EventKind K) { struct VerifyingConsumer { std::vector ExpectedInitial; + const std::vector ExpectedInitialCopy; std::vector ExpectedNonInitial; + const std::vector ExpectedNonInitialCopy; std::vector OptionalNonInitial; std::vector UnexpectedInitial; std::vector UnexpectedNonInitial; @@ -108,8 +110,8 @@ struct VerifyingConsumer { const std::vector &ExpectedInitial, const std::vector &ExpectedNonInitial, const std::vector &OptionalNonInitial = {}) - : ExpectedInitial(ExpectedInitial), - ExpectedNonInitial(ExpectedNonInitial), + : ExpectedInitial(ExpectedInitial), ExpectedInitialCopy(ExpectedInitial), + ExpectedNonInitial(ExpectedNonInitial), ExpectedNonInitialCopy(ExpectedNonInitial), OptionalNonInitial(OptionalNonInitial) {} // This method is used by DirectoryWatcher. @@ -181,6 +183,26 @@ struct VerifyingConsumer { } void printUnmetExpectations(llvm::raw_ostream &OS) { + // If there was any issue, print the expected state + if ( + !ExpectedInitial.empty() + || + !ExpectedNonInitial.empty() + || + !UnexpectedInitial.empty() + || + !UnexpectedNonInitial.empty() + ) { + OS << "Expected initial events: \n"; + for (const auto &E : ExpectedInitialCopy) { + OS << eventKindToString(E.Kind) << " " << E.Filename << "\n"; + } + OS << "Expected non-initial events: \n"; + for (const auto &E : ExpectedNonInitialCopy) { + OS << eventKindToString(E.Kind) << " " << E.Filename << "\n"; + } + } + if (!ExpectedInitial.empty()) { OS << "Expected but not seen initial events: \n"; for (const auto &E : ExpectedInitial) { @@ -218,6 +240,7 @@ void checkEventualResultWithTimeout(VerifyingConsumer &TestConsumer) { EXPECT_TRUE(WaitForExpectedStateResult.wait_for(std::chrono::seconds(3)) == std::future_status::ready) << "The expected result state wasn't reached before the time-out."; + std::unique_lock L(TestConsumer.Mtx); EXPECT_TRUE(TestConsumer.result().hasValue()); if (TestConsumer.result().hasValue()) { EXPECT_TRUE(*TestConsumer.result()); From de85380fa02506ebb7ebbd46b4eb3d80f5619e38 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Sat, 13 Jul 2019 00:09:27 +0000 Subject: [PATCH 014/451] [Attributor][FIX] Lookup of (call site) argument attributes llvm-svn: 365977 --- llvm/include/llvm/Transforms/IPO/Attributor.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 3a8e88bd20ff5..0b72394e8e9ec 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -179,9 +179,12 @@ struct Attributor { assert(AAType::ID != Attribute::None && "Cannot lookup generic abstract attributes!"); - // Determine the argument number automatically for llvm::Arguments. + // Determine the argument number automatically for llvm::Arguments if none + // is set. Do not override a given one as it could be a use of the argument + // in a call site. if (auto *Arg = dyn_cast(&V)) - ArgNo = Arg->getArgNo(); + if (ArgNo == -1) + ArgNo = Arg->getArgNo(); // If a function was given together with an argument number, perform the // lookup for the actual argument instead. Don't do it for variadic From b016de51e04fbade2b14d74e36b2cfc6edde9394 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Sat, 13 Jul 2019 00:12:22 +0000 Subject: [PATCH 015/451] [DWARFContext] Strip leading dot in section names The LLVM context doesn't expect the leading dot in the section name. llvm-svn: 365978 --- lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp index 2f693fe5c3243..eb307ce1cce1b 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp @@ -116,6 +116,8 @@ llvm::DWARFContext &DWARFContext::GetAsLLVM() { llvm::StringRef data = llvm::toStringRef(section_data.GetData()); llvm::StringRef name = section.GetName().GetStringRef(); + if (name.startswith(".")) + name = name.drop_front(); section_map.try_emplace( name, llvm::MemoryBuffer::getMemBuffer(data, name, false)); }; From 0291d309291f69525fdf61072b6790fd5e1d67bf Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Sat, 13 Jul 2019 00:20:34 +0000 Subject: [PATCH 016/451] [COFF] Add null check in case of symbols defined in LTO blobs The test case could probably be improved further if the failure path was better understood. Fixes PR42536 llvm-svn: 365979 --- lld/COFF/SymbolTable.cpp | 2 +- .../COFF/Inputs/undefined-symbol-lto-a.ll | 82 +++++++++++++++++++ .../COFF/Inputs/undefined-symbol-lto-b.ll | 29 +++++++ lld/test/COFF/undefined-symbol-lto.test | 30 +++++++ 4 files changed, 142 insertions(+), 1 deletion(-) create mode 100644 lld/test/COFF/Inputs/undefined-symbol-lto-a.ll create mode 100644 lld/test/COFF/Inputs/undefined-symbol-lto-b.ll create mode 100644 lld/test/COFF/undefined-symbol-lto.test diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp index 280a9c28892c8..2173c10c1ca56 100644 --- a/lld/COFF/SymbolTable.cpp +++ b/lld/COFF/SymbolTable.cpp @@ -69,7 +69,7 @@ static Symbol *getSymbol(SectionChunk *sc, uint32_t addr) { for (Symbol *s : sc->file->getSymbols()) { auto *d = dyn_cast_or_null(s); - if (!d || d->getChunk() != sc || d->getValue() > addr || + if (!d || !d->data || d->getChunk() != sc || d->getValue() > addr || (candidate && d->getValue() < candidate->getValue())) continue; diff --git a/lld/test/COFF/Inputs/undefined-symbol-lto-a.ll b/lld/test/COFF/Inputs/undefined-symbol-lto-a.ll new file mode 100644 index 0000000000000..6793ec718e806 --- /dev/null +++ b/lld/test/COFF/Inputs/undefined-symbol-lto-a.ll @@ -0,0 +1,82 @@ +; ModuleID = 't.obj' +source_filename = "t.cpp" +target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-windows-msvc19.21.27702" + +%struct.Init = type { %struct.S } +%struct.S = type { i32 (...)** } +%rtti.CompleteObjectLocator = type { i32, i32, i32, i32, i32, i32 } +%rtti.TypeDescriptor7 = type { i8**, i8*, [8 x i8] } +%rtti.ClassHierarchyDescriptor = type { i32, i32, i32, i32 } +%rtti.BaseClassDescriptor = type { i32, i32, i32, i32, i32, i32, i32 } + +$"??_SS@@6B@" = comdat largest + +$"??_R4S@@6B@" = comdat any + +$"??_R0?AUS@@@8" = comdat any + +$"??_R3S@@8" = comdat any + +$"??_R2S@@8" = comdat any + +$"??_R1A@?0A@EA@S@@8" = comdat any + +@"?d@@3UInit@@A" = dso_local local_unnamed_addr global %struct.Init zeroinitializer, align 8 +@anon.bcb2691509de99310dddb690fcdb4cdc.0 = private unnamed_addr constant { [2 x i8*] } { [2 x i8*] [i8* bitcast (%rtti.CompleteObjectLocator* @"??_R4S@@6B@" to i8*), i8* bitcast (void (%struct.S*)* @"?foo@S@@UEAAXXZ" to i8*)] }, comdat($"??_SS@@6B@"), !type !0 +@"??_R4S@@6B@" = linkonce_odr constant %rtti.CompleteObjectLocator { i32 1, i32 0, i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor7* @"??_R0?AUS@@@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.ClassHierarchyDescriptor* @"??_R3S@@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.CompleteObjectLocator* @"??_R4S@@6B@" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, comdat +@"??_7type_info@@6B@" = external constant i8* +@"??_R0?AUS@@@8" = linkonce_odr global %rtti.TypeDescriptor7 { i8** @"??_7type_info@@6B@", i8* null, [8 x i8] c".?AUS@@\00" }, comdat +@__ImageBase = external dso_local constant i8 +@"??_R3S@@8" = linkonce_odr constant %rtti.ClassHierarchyDescriptor { i32 0, i32 0, i32 1, i32 trunc (i64 sub nuw nsw (i64 ptrtoint ([2 x i32]* @"??_R2S@@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, comdat +@"??_R2S@@8" = linkonce_odr constant [2 x i32] [i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.BaseClassDescriptor* @"??_R1A@?0A@EA@S@@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0], comdat +@"??_R1A@?0A@EA@S@@8" = linkonce_odr constant %rtti.BaseClassDescriptor { i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor7* @"??_R0?AUS@@@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0, i32 0, i32 -1, i32 0, i32 64, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.ClassHierarchyDescriptor* @"??_R3S@@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, comdat +@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_t.cpp, i8* null }] + +@"??_SS@@6B@" = unnamed_addr alias i8*, getelementptr inbounds ({ [2 x i8*] }, { [2 x i8*] }* @anon.bcb2691509de99310dddb690fcdb4cdc.0, i32 0, i32 0, i32 1) + +declare dso_local void @"?undefined_ref@@YAXXZ"() local_unnamed_addr #0 + +declare dllimport void @"?foo@S@@UEAAXXZ"(%struct.S*) unnamed_addr #0 + +; Function Attrs: nounwind sspstrong uwtable +define internal void @_GLOBAL__sub_I_t.cpp() #1 { +entry: + store i32 (...)** bitcast (i8** @"??_SS@@6B@" to i32 (...)**), i32 (...)*** getelementptr inbounds (%struct.Init, %struct.Init* @"?d@@3UInit@@A", i64 0, i32 0, i32 0), align 8 + tail call void @"?undefined_ref@@YAXXZ"() #2 + ret void +} + +attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind sspstrong uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind } + +!llvm.linker.options = !{!1, !2} +!llvm.module.flags = !{!3, !4, !5, !6} +!llvm.ident = !{!7} + +!0 = !{i64 8, !"?AUS@@"} +!1 = !{!"/DEFAULTLIB:libcmt.lib"} +!2 = !{!"/DEFAULTLIB:oldnames.lib"} +!3 = !{i32 1, !"wchar_size", i32 2} +!4 = !{i32 7, !"PIC Level", i32 2} +!5 = !{i32 1, !"ThinLTO", i32 0} +!6 = !{i32 1, !"EnableSplitLTOUnit", i32 0} +!7 = !{!"clang version 9.0.0 (git@github.com:llvm/llvm-project.git 1a285c27fdf6407ceed3398e015d00559f5f533d)"} + +^0 = module: (path: "t.obj", hash: (0, 0, 0, 0, 0)) +^1 = gv: (name: "__ImageBase") ; guid = 434928772013489304 +^2 = gv: (name: "??_R2S@@8", summaries: (variable: (module: ^0, flags: (linkage: linkonce_odr, notEligibleToImport: 1, live: 0, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0), refs: (^1, ^6)))) ; guid = 2160898732728284029 +^3 = gv: (name: "llvm.global_ctors", summaries: (variable: (module: ^0, flags: (linkage: appending, notEligibleToImport: 1, live: 1, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0), refs: (^14)))) ; guid = 2412314959268824392 +^4 = gv: (name: "?foo@S@@UEAAXXZ") ; guid = 6578172636330484861 +^5 = gv: (name: "??_SS@@6B@", summaries: (alias: (module: ^0, flags: (linkage: external, notEligibleToImport: 1, live: 0, dsoLocal: 0, canAutoHide: 0), aliasee: ^10))) ; guid = 8774897714842691026 +^6 = gv: (name: "??_R1A@?0A@EA@S@@8", summaries: (variable: (module: ^0, flags: (linkage: linkonce_odr, notEligibleToImport: 1, live: 0, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0), refs: (^11, ^1, ^8)))) ; guid = 9397802696236423453 +^7 = gv: (name: "?undefined_ref@@YAXXZ") ; guid = 9774674600202276560 +^8 = gv: (name: "??_R3S@@8", summaries: (variable: (module: ^0, flags: (linkage: linkonce_odr, notEligibleToImport: 1, live: 0, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0), refs: (^1, ^2)))) ; guid = 10685958509605791599 +^9 = gv: (name: "??_7type_info@@6B@") ; guid = 10826752452437539368 +^10 = gv: (name: "anon.bcb2691509de99310dddb690fcdb4cdc.0", summaries: (variable: (module: ^0, flags: (linkage: private, notEligibleToImport: 1, live: 0, dsoLocal: 1, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0), vTableFuncs: ((virtFunc: ^4, offset: 8)), refs: (^13, ^4)))) ; guid = 11510395461204283992 +^11 = gv: (name: "??_R0?AUS@@@8", summaries: (variable: (module: ^0, flags: (linkage: linkonce_odr, notEligibleToImport: 1, live: 0, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0), refs: (^9)))) ; guid = 12346607659584231960 +^12 = gv: (name: "?d@@3UInit@@A", summaries: (variable: (module: ^0, flags: (linkage: external, notEligibleToImport: 1, live: 0, dsoLocal: 1, canAutoHide: 0), varFlags: (readonly: 1, writeonly: 1)))) ; guid = 14563354643524156382 +^13 = gv: (name: "??_R4S@@6B@", summaries: (variable: (module: ^0, flags: (linkage: linkonce_odr, notEligibleToImport: 1, live: 0, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0), refs: (^13, ^11, ^1, ^8)))) ; guid = 14703528065171087394 +^14 = gv: (name: "_GLOBAL__sub_I_t.cpp", summaries: (function: (module: ^0, flags: (linkage: internal, notEligibleToImport: 1, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 3, calls: ((callee: ^7)), refs: (^12, ^5)))) ; guid = 15085897428757412588 +^15 = typeidCompatibleVTable: (name: "?AUS@@", summary: ((offset: 8, ^10))) ; guid = 13986515119763165370 diff --git a/lld/test/COFF/Inputs/undefined-symbol-lto-b.ll b/lld/test/COFF/Inputs/undefined-symbol-lto-b.ll new file mode 100644 index 0000000000000..ff73e7c6ba680 --- /dev/null +++ b/lld/test/COFF/Inputs/undefined-symbol-lto-b.ll @@ -0,0 +1,29 @@ +; ModuleID = 'b.obj' +source_filename = "b.cpp" +target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-windows-msvc19.21.27702" + +%struct.S = type { i32 (...)** } + +; Function Attrs: norecurse nounwind readnone sspstrong uwtable +define dso_local void @"?foo@S@@UEAAXXZ"(%struct.S* nocapture %this) unnamed_addr #0 align 2 { +entry: + ret void +} + +attributes #0 = { norecurse nounwind readnone sspstrong uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.linker.options = !{!0, !1} +!llvm.module.flags = !{!2, !3, !4, !5} +!llvm.ident = !{!6} + +!0 = !{!"/DEFAULTLIB:libcmt.lib"} +!1 = !{!"/DEFAULTLIB:oldnames.lib"} +!2 = !{i32 1, !"wchar_size", i32 2} +!3 = !{i32 7, !"PIC Level", i32 2} +!4 = !{i32 1, !"ThinLTO", i32 0} +!5 = !{i32 1, !"EnableSplitLTOUnit", i32 0} +!6 = !{!"clang version 9.0.0 (git@github.com:llvm/llvm-project.git 1a285c27fdf6407ceed3398e015d00559f5f533d)"} + +^0 = module: (path: "b.obj", hash: (0, 0, 0, 0, 0)) +^1 = gv: (name: "?foo@S@@UEAAXXZ", summaries: (function: (module: ^0, flags: (linkage: external, notEligibleToImport: 1, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 1, funcFlags: (readNone: 1, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 0)))) ; guid = 6578172636330484861 diff --git a/lld/test/COFF/undefined-symbol-lto.test b/lld/test/COFF/undefined-symbol-lto.test new file mode 100644 index 0000000000000..6911b121122a4 --- /dev/null +++ b/lld/test/COFF/undefined-symbol-lto.test @@ -0,0 +1,30 @@ +RUN: rm -rf %t && mkdir -p %t && cd %t +RUN: llvm-as %S/Inputs/undefined-symbol-lto-a.ll -o t.obj +RUN: llvm-as %S/Inputs/undefined-symbol-lto-b.ll -o b.obj +RUN: llvm-lib b.obj -out:b.lib +RUN: not lld-link t.obj b.lib -subsystem:console 2>&1 | FileCheck %s + +CHECK: undefined symbol: main +CHECK: referenced by +CHECK: undefined symbol: void __cdecl undefined_ref(void) +CHECK: referenced by + +Originally reported as PR42536. + +a.ll corresponds to this C++: + +struct __declspec(dllimport) S { + virtual void foo(); +}; +void undefined_ref(); +struct Init { + Init() { undefined_ref(); } + S c; +} d; + +b.ll is from this C++: + +struct S { + virtual void foo(); +}; +void S::foo() {} From 41c22b4390c763f6fc36ec984f3786d465c434b5 Mon Sep 17 00:00:00 2001 From: Evgeniy Stepanov Date: Sat, 13 Jul 2019 00:29:03 +0000 Subject: [PATCH 017/451] Extend function attributes bitset size from 64 to 96. Summary: We are going to add a function attribute number 64. Reviewers: pcc, jdoerfert, lebedev.ri Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64663 llvm-svn: 365980 --- llvm/lib/IR/AttributeImpl.h | 12 ++++++------ llvm/lib/IR/Attributes.cpp | 17 ++++++++++++----- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/llvm/lib/IR/AttributeImpl.h b/llvm/lib/IR/AttributeImpl.h index f6898476382dd..f989fa3b910e6 100644 --- a/llvm/lib/IR/AttributeImpl.h +++ b/llvm/lib/IR/AttributeImpl.h @@ -179,9 +179,9 @@ class AttributeSetNode final private TrailingObjects { friend TrailingObjects; - /// Bitset with a bit for each available attribute Attribute::AttrKind. - uint64_t AvailableAttrs; unsigned NumAttrs; ///< Number of attributes in this node. + /// Bitset with a bit for each available attribute Attribute::AttrKind. + uint8_t AvailableAttrs[12] = {}; AttributeSetNode(ArrayRef Attrs); @@ -200,7 +200,7 @@ class AttributeSetNode final unsigned getNumAttributes() const { return NumAttrs; } bool hasAttribute(Attribute::AttrKind Kind) const { - return AvailableAttrs & ((uint64_t)1) << Kind; + return AvailableAttrs[Kind / 8] & ((uint64_t)1) << (Kind % 8); } bool hasAttribute(StringRef Kind) const; bool hasAttributes() const { return NumAttrs != 0; } @@ -244,10 +244,10 @@ class AttributeListImpl final friend TrailingObjects; private: - /// Bitset with a bit for each available attribute Attribute::AttrKind. - uint64_t AvailableFunctionAttrs; LLVMContext &Context; unsigned NumAttrSets; ///< Number of entries in this set. + /// Bitset with a bit for each available attribute Attribute::AttrKind. + uint8_t AvailableFunctionAttrs[12] = {}; // Helper fn for TrailingObjects class. size_t numTrailingObjects(OverloadToken) { return NumAttrSets; } @@ -267,7 +267,7 @@ class AttributeListImpl final /// Return true if the AttributeSet or the FunctionIndex has an /// enum attribute of the given kind. bool hasFnAttribute(Attribute::AttrKind Kind) const { - return AvailableFunctionAttrs & ((uint64_t)1) << Kind; + return AvailableFunctionAttrs[Kind / 8] & ((uint64_t)1) << (Kind % 8); } using iterator = const AttributeSet *; diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp index 90b3c22e80f01..1ba703bb14c76 100644 --- a/llvm/lib/IR/Attributes.cpp +++ b/llvm/lib/IR/Attributes.cpp @@ -718,13 +718,18 @@ LLVM_DUMP_METHOD void AttributeSet::dump() const { //===----------------------------------------------------------------------===// AttributeSetNode::AttributeSetNode(ArrayRef Attrs) - : AvailableAttrs(0), NumAttrs(Attrs.size()) { + : NumAttrs(Attrs.size()) { // There's memory after the node where we can store the entries in. llvm::copy(Attrs, getTrailingObjects()); + static_assert(Attribute::EndAttrKinds <= + sizeof(AvailableAttrs) * CHAR_BIT, + "Too many attributes"); + for (const auto I : *this) { if (!I.isStringAttribute()) { - AvailableAttrs |= ((uint64_t)1) << I.getKindAsEnum(); + Attribute::AttrKind Kind = I.getKindAsEnum(); + AvailableAttrs[Kind / 8] |= 1ULL << (Kind % 8); } } } @@ -896,7 +901,7 @@ static constexpr unsigned attrIdxToArrayIdx(unsigned Index) { AttributeListImpl::AttributeListImpl(LLVMContext &C, ArrayRef Sets) - : AvailableFunctionAttrs(0), Context(C), NumAttrSets(Sets.size()) { + : Context(C), NumAttrSets(Sets.size()) { assert(!Sets.empty() && "pointless AttributeListImpl"); // There's memory after the node where we can store the entries in. @@ -909,8 +914,10 @@ AttributeListImpl::AttributeListImpl(LLVMContext &C, static_assert(attrIdxToArrayIdx(AttributeList::FunctionIndex) == 0U, "function should be stored in slot 0"); for (const auto I : Sets[0]) { - if (!I.isStringAttribute()) - AvailableFunctionAttrs |= 1ULL << I.getKindAsEnum(); + if (!I.isStringAttribute()) { + Attribute::AttrKind Kind = I.getKindAsEnum(); + AvailableFunctionAttrs[Kind / 8] |= 1ULL << (Kind % 8); + } } } From cafb5d24dfa0faf3fa7dfb7eefd7df08aeb01e55 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sat, 13 Jul 2019 00:47:58 +0000 Subject: [PATCH 018/451] clang/test/Driver/fsanitize.c: Fix -fsanitize=vptr using default target The default implementation of getSupportedSanitizers isn't able to turn on the vptr sanitizer, and thus, any platform that runs this test will fail with the error: clang: error: unsupported option '-fsanitize=vptr' for target '' Patch by James Nagurne! llvm-svn: 365981 --- clang/test/Driver/fsanitize.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/Driver/fsanitize.c b/clang/test/Driver/fsanitize.c index 187d4bfd4301e..a275b576688e6 100644 --- a/clang/test/Driver/fsanitize.c +++ b/clang/test/Driver/fsanitize.c @@ -97,7 +97,7 @@ // RUN: %clang -target x86_64-linux-gnu -fsanitize=vptr -fsanitize-undefined-trap-on-error %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-VPTR-TRAP-UNDEF // CHECK-VPTR-TRAP-UNDEF: error: invalid argument '-fsanitize=vptr' not allowed with '-fsanitize-trap=undefined' -// RUN: %clang -fsanitize=vptr -fno-rtti %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-VPTR-NO-RTTI +// RUN: %clang -target x86_64-linux-gnu -fsanitize=vptr -fno-rtti %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-VPTR-NO-RTTI // CHECK-VPTR-NO-RTTI: '-fsanitize=vptr' not allowed with '-fno-rtti' // RUN: %clang -fsanitize=undefined -fno-rtti %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UNDEFINED-NO-RTTI From 4f519b6919d2e6e1fb164b87a0d1a5c390581cbe Mon Sep 17 00:00:00 2001 From: Julian Lettner Date: Sat, 13 Jul 2019 00:55:06 +0000 Subject: [PATCH 019/451] [TSan] Tiny cleanup of UnmangleLongJmpSp for Linux/x86_64 NFC. llvm-svn: 365982 --- compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc index 5d9284c525084..0f23da0e877fe 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc +++ b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc @@ -373,9 +373,7 @@ int ExtractRecvmsgFDs(void *msgp, int *fds, int nfd) { // Reverse operation of libc stack pointer mangling static uptr UnmangleLongJmpSp(uptr mangled_sp) { #if defined(__x86_64__) -# if SANITIZER_FREEBSD || SANITIZER_NETBSD - return mangled_sp; -# else // Linux +# if SANITIZER_LINUX // Reverse of: // xor %fs:0x30, %rsi // rol $0x11, %rsi @@ -385,6 +383,8 @@ static uptr UnmangleLongJmpSp(uptr mangled_sp) { : "=r" (sp) : "0" (mangled_sp)); return sp; +# else + return mangled_sp; # endif #elif defined(__aarch64__) # if SANITIZER_LINUX @@ -394,11 +394,11 @@ static uptr UnmangleLongJmpSp(uptr mangled_sp) { # endif #elif defined(__powerpc64__) // Reverse of: - // ld r4, -28696(r13) - // xor r4, r3, r4 - uptr xor_guard; - asm("ld %0, -28696(%%r13) \n" : "=r" (xor_guard)); - return mangled_sp ^ xor_guard; + // ld r4, -28696(r13) + // xor r4, r3, r4 + uptr xor_key; + asm("ld %0, -28696(%%r13)" : "=r" (xor_key)); + return mangled_sp ^ xor_key; #elif defined(__mips__) return mangled_sp; #else From 0a7f4cdce9fa07f9027181b19db59b5231932487 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Sat, 13 Jul 2019 01:09:21 +0000 Subject: [PATCH 020/451] [Attributor] Only return attributes with a valid state Attributor::getAAFor will now only return AbstractAttributes with a valid AbstractState. This simplifies call sites as they only need to check if the returned pointer is non-null. It also reduces the potential for accidental misuse. llvm-svn: 365983 --- llvm/include/llvm/Transforms/IPO/Attributor.h | 8 ++++++-- llvm/lib/Transforms/IPO/Attributor.cpp | 5 ++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 0b72394e8e9ec..435aaca75d1c2 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -199,8 +199,12 @@ struct Attributor { const auto &KindToAbstractAttributeMap = AAMap.lookup({&V, ArgNo}); if (AAType *AA = static_cast( KindToAbstractAttributeMap.lookup(AAType::ID))) { - QueryMap[AA].insert(&QueryingAA); - return AA; + // Do not return an attribute with an invalid state. This minimizes checks + // at the calls sites and allows the fallback below to kick in. + if (AA->getState().isValidState()) { + QueryMap[AA].insert(&QueryingAA); + return AA; + } } // If no abstract attribute was found and we look for a call site argument, diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 85aa7a63ada57..5fea3d0b87a09 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -655,7 +655,7 @@ ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) { // Try to find a assumed unique return value for the called function. auto *RetCSAA = A.getAAFor(*this, *RV); - if (!RetCSAA || !RetCSAA->isValidState()) { + if (!RetCSAA) { HasOverdefinedReturnedCalls = true; LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned call site (" << *RV << ") with " << (RetCSAA ? "invalid" : "no") @@ -965,8 +965,7 @@ ChangeStatus AANoFreeFunction::updateImpl(Attributor &A) { auto ICS = ImmutableCallSite(I); auto *NoFreeAA = A.getAAFor(*this, *I); - if ((!NoFreeAA || !NoFreeAA->isValidState() || - !NoFreeAA->isAssumedNoFree()) && + if ((!NoFreeAA || !NoFreeAA->isAssumedNoFree()) && !ICS.hasFnAttr(Attribute::NoFree)) { indicatePessimisticFixpoint(); return ChangeStatus::CHANGED; From c7a1db329849b3a5763545a274ed9c91c592553b Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Sat, 13 Jul 2019 01:09:27 +0000 Subject: [PATCH 021/451] [Attributor][NFC] Run clang-format on the attributor files (.h/.cpp) The Attributor files are kept formatted with clang-format, we should try to keep this state. llvm-svn: 365984 --- llvm/include/llvm/Transforms/IPO/Attributor.h | 4 +--- llvm/lib/Transforms/IPO/Attributor.cpp | 19 +++++++------------ 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 435aaca75d1c2..88b6af3abbd3d 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -693,9 +693,7 @@ struct AANoSync : public AbstractAttribute { : AbstractAttribute(V, InfoCache) {} /// See AbstractAttribute::getAttrKind(). - Attribute::AttrKind getAttrKind() const override { - return ID; - } + Attribute::AttrKind getAttrKind() const override { return ID; } static constexpr Attribute::AttrKind ID = Attribute::AttrKind(Attribute::NoSync); diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 5fea3d0b87a09..5a72865db9d0f 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -359,9 +359,7 @@ struct AANoUnwindFunction : AANoUnwind, BooleanState { /// } /// See AbstractAttribute::getManifestPosition(). - ManifestPosition getManifestPosition() const override { - return MP_FUNCTION; - } + ManifestPosition getManifestPosition() const override { return MP_FUNCTION; } const std::string getAsStr() const override { return getAssumed() ? "nounwind" : "may-unwind"; @@ -500,9 +498,7 @@ class AAReturnedValuesImpl final : public AAReturnedValues, AbstractState { const AbstractState &getState() const override { return *this; } /// See AbstractAttribute::getManifestPosition(). - ManifestPosition getManifestPosition() const override { - return MP_ARGUMENT; - } + ManifestPosition getManifestPosition() const override { return MP_ARGUMENT; } /// See AbstractAttribute::updateImpl(Attributor &A). ChangeStatus updateImpl(Attributor &A) override; @@ -742,9 +738,7 @@ struct AANoSyncFunction : AANoSync, BooleanState { /// } /// See AbstractAttribute::getManifestPosition(). - ManifestPosition getManifestPosition() const override { - return MP_FUNCTION; - } + ManifestPosition getManifestPosition() const override { return MP_FUNCTION; } const std::string getAsStr() const override { return getAssumed() ? "nosync" : "may-sync"; @@ -767,7 +761,8 @@ struct AANoSyncFunction : AANoSync, BooleanState { /// Helper function used to determine whether an instruction is volatile. static bool isVolatile(Instruction *I); - /// Helper function uset to check if intrinsic is volatile (memcpy, memmove, memset). + /// Helper function uset to check if intrinsic is volatile (memcpy, memmove, + /// memset). static bool isNoSyncIntrinsic(Instruction *I); }; @@ -870,7 +865,7 @@ ChangeStatus AANoSyncFunction::updateImpl(Attributor &A) { auto *NoSyncAA = A.getAAFor(*this, *I); if (isa(I) && isNoSyncIntrinsic(I)) - continue; + continue; if (ICS && (!NoSyncAA || !NoSyncAA->isAssumedNoSync()) && !ICS.hasFnAttr(Attribute::NoSync)) { @@ -878,7 +873,7 @@ ChangeStatus AANoSyncFunction::updateImpl(Attributor &A) { return ChangeStatus::CHANGED; } - if(ICS) + if (ICS) continue; if (!isVolatile(I) && !isNonRelaxedAtomic(I)) From 81b03d4a08b16217669fcccb96e7cc436ab3d74a Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Sat, 13 Jul 2019 01:47:15 +0000 Subject: [PATCH 022/451] [Sema] Diagnose default-initialization, destruction, and copying of non-trivial C union types This patch diagnoses uses of non-trivial C unions and structs/unions containing non-trivial C unions in the following contexts, which require default-initialization, destruction, or copying of the union objects, instead of disallowing fields of non-trivial types in C unions, which is what we currently do: - function parameters. - function returns. - assignments. - compound literals. - block captures except capturing of `__block` variables by non-escaping blocks. - local and global variable definitions. - lvalue-to-rvalue conversions of volatile types. See the discussion in https://reviews.llvm.org/D62988 for more background. rdar://problem/50679094 Differential Revision: https://reviews.llvm.org/D63753 llvm-svn: 365985 --- clang/include/clang/AST/Decl.h | 24 ++ clang/include/clang/AST/DeclBase.h | 9 +- clang/include/clang/AST/Type.h | 45 ++- .../clang/Basic/DiagnosticSemaKinds.td | 19 +- clang/include/clang/Sema/Sema.h | 42 +++ clang/lib/AST/Type.cpp | 60 +--- clang/lib/Sema/Sema.cpp | 18 +- clang/lib/Sema/SemaDecl.cpp | 329 ++++++++++++++++-- clang/lib/Sema/SemaExpr.cpp | 33 +- clang/lib/Sema/SemaType.cpp | 5 + clang/lib/Serialization/ASTReaderDecl.cpp | 3 + clang/lib/Serialization/ASTWriterDecl.cpp | 9 + .../test/CodeGenObjC/Inputs/strong_in_union.h | 10 - clang/test/CodeGenObjC/strong-in-c-struct.m | 15 +- clang/test/PCH/non-trivial-c-union.m | 24 ++ clang/test/SemaObjC/arc-decls.m | 6 +- clang/test/SemaObjC/non-trivial-c-union.m | 82 +++++ 17 files changed, 611 insertions(+), 122 deletions(-) delete mode 100644 clang/test/CodeGenObjC/Inputs/strong_in_union.h create mode 100644 clang/test/PCH/non-trivial-c-union.m create mode 100644 clang/test/SemaObjC/non-trivial-c-union.m diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h index e593dafb5fc4d..02742801f37c2 100644 --- a/clang/include/clang/AST/Decl.h +++ b/clang/include/clang/AST/Decl.h @@ -3746,6 +3746,30 @@ class RecordDecl : public TagDecl { RecordDeclBits.NonTrivialToPrimitiveDestroy = V; } + bool hasNonTrivialToPrimitiveDefaultInitializeCUnion() const { + return RecordDeclBits.HasNonTrivialToPrimitiveDefaultInitializeCUnion; + } + + void setHasNonTrivialToPrimitiveDefaultInitializeCUnion(bool V) { + RecordDeclBits.HasNonTrivialToPrimitiveDefaultInitializeCUnion = V; + } + + bool hasNonTrivialToPrimitiveDestructCUnion() const { + return RecordDeclBits.HasNonTrivialToPrimitiveDestructCUnion; + } + + void setHasNonTrivialToPrimitiveDestructCUnion(bool V) { + RecordDeclBits.HasNonTrivialToPrimitiveDestructCUnion = V; + } + + bool hasNonTrivialToPrimitiveCopyCUnion() const { + return RecordDeclBits.HasNonTrivialToPrimitiveCopyCUnion; + } + + void setHasNonTrivialToPrimitiveCopyCUnion(bool V) { + RecordDeclBits.HasNonTrivialToPrimitiveCopyCUnion = V; + } + /// Determine whether this class can be passed in registers. In C++ mode, /// it must have at least one trivial, non-deleted copy or move constructor. /// FIXME: This should be set as part of completeDefinition. diff --git a/clang/include/clang/AST/DeclBase.h b/clang/include/clang/AST/DeclBase.h index 26edb7790c261..d64d0cb425db0 100644 --- a/clang/include/clang/AST/DeclBase.h +++ b/clang/include/clang/AST/DeclBase.h @@ -1440,6 +1440,13 @@ class DeclContext { uint64_t NonTrivialToPrimitiveCopy : 1; uint64_t NonTrivialToPrimitiveDestroy : 1; + /// The following bits indicate whether this is or contains a C union that + /// is non-trivial to default-initialize, destruct, or copy. These bits + /// imply the associated basic non-triviality predicates declared above. + uint64_t HasNonTrivialToPrimitiveDefaultInitializeCUnion : 1; + uint64_t HasNonTrivialToPrimitiveDestructCUnion : 1; + uint64_t HasNonTrivialToPrimitiveCopyCUnion : 1; + /// Indicates whether this struct is destroyed in the callee. uint64_t ParamDestroyedInCallee : 1; @@ -1448,7 +1455,7 @@ class DeclContext { }; /// Number of non-inherited bits in RecordDeclBitfields. - enum { NumRecordDeclBits = 11 }; + enum { NumRecordDeclBits = 14 }; /// Stores the bits used by OMPDeclareReductionDecl. /// If modified NumOMPDeclareReductionDeclBits and the accessor diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index 00a2b7643370a..584655fe789e6 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -1129,12 +1129,6 @@ class QualType { PCK_Struct }; - /// Check if this is a non-trivial type that would cause a C struct - /// transitively containing this type to be non-trivial. This function can be - /// used to determine whether a field of this type can be declared inside a C - /// union. - bool isNonTrivialPrimitiveCType(const ASTContext &Ctx) const; - /// Check if this is a non-trivial type that would cause a C struct /// transitively containing this type to be non-trivial to copy and return the /// kind. @@ -1164,6 +1158,22 @@ class QualType { return isDestructedTypeImpl(*this); } + /// Check if this is or contains a C union that is non-trivial to + /// default-initialize, which is a union that has a member that is non-trivial + /// to default-initialize. If this returns true, + /// isNonTrivialToPrimitiveDefaultInitialize returns PDIK_Struct. + bool hasNonTrivialToPrimitiveDefaultInitializeCUnion() const; + + /// Check if this is or contains a C union that is non-trivial to destruct, + /// which is a union that has a member that is non-trivial to destruct. If + /// this returns true, isDestructedType returns DK_nontrivial_c_struct. + bool hasNonTrivialToPrimitiveDestructCUnion() const; + + /// Check if this is or contains a C union that is non-trivial to copy, which + /// is a union that has a member that is non-trivial to copy. If this returns + /// true, isNonTrivialToPrimitiveCopy returns PCK_Struct. + bool hasNonTrivialToPrimitiveCopyCUnion() const; + /// Determine whether expressions of the given type are forbidden /// from being lvalues in C. /// @@ -1236,6 +1246,11 @@ class QualType { const ASTContext &C); static QualType IgnoreParens(QualType T); static DestructionKind isDestructedTypeImpl(QualType type); + + /// Check if \param RD is or contains a non-trivial C union. + static bool hasNonTrivialToPrimitiveDefaultInitializeCUnion(const RecordDecl *RD); + static bool hasNonTrivialToPrimitiveDestructCUnion(const RecordDecl *RD); + static bool hasNonTrivialToPrimitiveCopyCUnion(const RecordDecl *RD); }; } // namespace clang @@ -6249,6 +6264,24 @@ inline Qualifiers::GC QualType::getObjCGCAttr() const { return getQualifiers().getObjCGCAttr(); } +inline bool QualType::hasNonTrivialToPrimitiveDefaultInitializeCUnion() const { + if (auto *RD = getTypePtr()->getBaseElementTypeUnsafe()->getAsRecordDecl()) + return hasNonTrivialToPrimitiveDefaultInitializeCUnion(RD); + return false; +} + +inline bool QualType::hasNonTrivialToPrimitiveDestructCUnion() const { + if (auto *RD = getTypePtr()->getBaseElementTypeUnsafe()->getAsRecordDecl()) + return hasNonTrivialToPrimitiveDestructCUnion(RD); + return false; +} + +inline bool QualType::hasNonTrivialToPrimitiveCopyCUnion() const { + if (auto *RD = getTypePtr()->getBaseElementTypeUnsafe()->getAsRecordDecl()) + return hasNonTrivialToPrimitiveCopyCUnion(RD); + return false; +} + inline FunctionType::ExtInfo getFunctionExtInfo(const Type &t) { if (const auto *PT = t.getAs()) { if (const auto *FT = PT->getPointeeType()->getAs()) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 380db32ba4bf5..c68271b784da1 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -620,8 +620,23 @@ def warn_cstruct_memaccess : Warning< InGroup; def note_nontrivial_field : Note< "field is non-trivial to %select{copy|default-initialize}0">; -def err_nontrivial_primitive_type_in_union : Error< - "non-trivial C types are disallowed in union">; +def err_non_trivial_c_union_in_invalid_context : Error< + "cannot %select{" + "use type %1 for a function/method parameter|" + "use type %1 for function/method return|" + "default-initialize an object of type %1|" + "declare an automatic variable of type %1|" + "copy-initialize an object of type %1|" + "assign to a variable of type %1|" + "construct an automatic compound literal of type %1|" + "capture a variable of type %1|" + "cannot use volatile type %1 where it causes an lvalue-to-rvalue conversion" + "}3 " + "since it %select{contains|is}2 a union that is non-trivial to " + "%select{default-initialize|destruct|copy}0">; +def note_non_trivial_c_union : Note< + "%select{%2 has subobjects that are|%3 has type %2 that is}0 " + "non-trivial to %select{default-initialize|destruct|copy}1">; def warn_dyn_class_memaccess : Warning< "%select{destination for|source of|first operand of|second operand of}0 this " "%1 call is a pointer to %select{|class containing a }2dynamic class %3; " diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 8f66cda46b65c..af762f74d745c 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -2114,6 +2114,48 @@ class Sema { bool SetParamDefaultArgument(ParmVarDecl *Param, Expr *DefaultArg, SourceLocation EqualLoc); + // Contexts where using non-trivial C union types can be disallowed. This is + // passed to err_non_trivial_c_union_in_invalid_context. + enum NonTrivialCUnionContext { + // Function parameter. + NTCUC_FunctionParam, + // Function return. + NTCUC_FunctionReturn, + // Default-initialized object. + NTCUC_DefaultInitializedObject, + // Variable with automatic storage duration. + NTCUC_AutoVar, + // Initializer expression that might copy from another object. + NTCUC_CopyInit, + // Assignment. + NTCUC_Assignment, + // Compound literal. + NTCUC_CompoundLiteral, + // Block capture. + NTCUC_BlockCapture, + // lvalue-to-rvalue conversion of volatile type. + NTCUC_LValueToRValueVolatile, + }; + + /// Emit diagnostics if the initializer or any of its explicit or + /// implicitly-generated subexpressions require copying or + /// default-initializing a type that is or contains a C union type that is + /// non-trivial to copy or default-initialize. + void checkNonTrivialCUnionInInitializer(const Expr *Init, SourceLocation Loc); + + // These flags are passed to checkNonTrivialCUnion. + enum NonTrivialCUnionKind { + NTCUK_Init = 0x1, + NTCUK_Destruct = 0x2, + NTCUK_Copy = 0x4, + }; + + /// Emit diagnostics if a non-trivial C union type or a struct that contains + /// a non-trivial C union is used in an invalid context. + void checkNonTrivialCUnion(QualType QT, SourceLocation Loc, + NonTrivialCUnionContext UseContext, + unsigned NonTrivialKind); + void AddInitializerToDecl(Decl *dcl, Expr *init, bool DirectInit); void ActOnUninitializedDecl(Decl *dcl); void ActOnInitializerError(Decl *Dcl); diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index 733ca232dd037..01e93c11aad87 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -2276,60 +2276,16 @@ bool QualType::isNonWeakInMRRWithObjCWeak(const ASTContext &Context) const { getObjCLifetime() != Qualifiers::OCL_Weak; } -namespace { -// Helper class that determines whether this is a type that is non-trivial to -// primitive copy or move, or is a struct type that has a field of such type. -template -struct IsNonTrivialCopyMoveVisitor - : CopiedTypeVisitor, IsMove, bool> { - using Super = - CopiedTypeVisitor, IsMove, bool>; - IsNonTrivialCopyMoveVisitor(const ASTContext &C) : Ctx(C) {} - void preVisit(QualType::PrimitiveCopyKind PCK, QualType QT) {} - - bool visitWithKind(QualType::PrimitiveCopyKind PCK, QualType QT) { - if (const auto *AT = this->Ctx.getAsArrayType(QT)) - return this->asDerived().visit(Ctx.getBaseElementType(AT)); - return Super::visitWithKind(PCK, QT); - } - - bool visitARCStrong(QualType QT) { return true; } - bool visitARCWeak(QualType QT) { return true; } - bool visitTrivial(QualType QT) { return false; } - // Volatile fields are considered trivial. - bool visitVolatileTrivial(QualType QT) { return false; } - - bool visitStruct(QualType QT) { - const RecordDecl *RD = QT->castAs()->getDecl(); - // We don't want to apply the C restriction in C++ because C++ - // (1) can apply the restriction at a finer grain by banning copying or - // destroying the union, and - // (2) allows users to override these restrictions by declaring explicit - // constructors/etc, which we're not proposing to add to C. - if (isa(RD)) - return false; - for (const FieldDecl *FD : RD->fields()) - if (this->asDerived().visit(FD->getType())) - return true; - return false; - } - - const ASTContext &Ctx; -}; +bool QualType::hasNonTrivialToPrimitiveDefaultInitializeCUnion(const RecordDecl *RD) { + return RD->hasNonTrivialToPrimitiveDefaultInitializeCUnion(); +} -} // namespace +bool QualType::hasNonTrivialToPrimitiveDestructCUnion(const RecordDecl *RD) { + return RD->hasNonTrivialToPrimitiveDestructCUnion(); +} -bool QualType::isNonTrivialPrimitiveCType(const ASTContext &Ctx) const { - if (isNonTrivialToPrimitiveDefaultInitialize()) - return true; - DestructionKind DK = isDestructedType(); - if (DK != DK_none && DK != DK_cxx_destructor) - return true; - if (IsNonTrivialCopyMoveVisitor(Ctx).visit(*this)) - return true; - if (IsNonTrivialCopyMoveVisitor(Ctx).visit(*this)) - return true; - return false; +bool QualType::hasNonTrivialToPrimitiveCopyCUnion(const RecordDecl *RD) { + return RD->hasNonTrivialToPrimitiveCopyCUnion(); } QualType::PrimitiveDefaultInitializeKind diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index 3941643893af6..11fed28b52db0 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -1658,12 +1658,24 @@ static void markEscapingByrefs(const FunctionScopeInfo &FSI, Sema &S) { // Set the EscapingByref flag of __block variables captured by // escaping blocks. for (const BlockDecl *BD : FSI.Blocks) { - if (BD->doesNotEscape()) - continue; for (const BlockDecl::Capture &BC : BD->captures()) { VarDecl *VD = BC.getVariable(); - if (VD->hasAttr()) + if (VD->hasAttr()) { + // Nothing to do if this is a __block variable captured by a + // non-escaping block. + if (BD->doesNotEscape()) + continue; VD->setEscapingByref(); + } + // Check whether the captured variable is or contains an object of + // non-trivial C union type. + QualType CapType = BC.getVariable()->getType(); + if (CapType.hasNonTrivialToPrimitiveDestructCUnion() || + CapType.hasNonTrivialToPrimitiveCopyCUnion()) + S.checkNonTrivialCUnion(BC.getVariable()->getType(), + BD->getCaretLocation(), + Sema::NTCUC_BlockCapture, + Sema::NTCUK_Destruct|Sema::NTCUK_Copy); } } diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index b2a0632c6e7f1..73407afb49f34 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -22,6 +22,7 @@ #include "clang/AST/DeclTemplate.h" #include "clang/AST/EvaluatedExprVisitor.h" #include "clang/AST/ExprCXX.h" +#include "clang/AST/NonTrivialTypeVisitor.h" #include "clang/AST/StmtCXX.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/PartialDiagnostic.h" @@ -6504,6 +6505,11 @@ NamedDecl *Sema::ActOnVariableDeclarator( if (D.isInvalidType()) NewVD->setInvalidDecl(); + + if (NewVD->getType().hasNonTrivialToPrimitiveDestructCUnion() && + NewVD->hasLocalStorage()) + checkNonTrivialCUnion(NewVD->getType(), NewVD->getLocation(), + NTCUC_AutoVar, NTCUK_Destruct); } else { bool Invalid = false; @@ -8924,6 +8930,12 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, << FunctionType::getNameForCallConv(CC); } } + + if (NewFD->getReturnType().hasNonTrivialToPrimitiveDestructCUnion() || + NewFD->getReturnType().hasNonTrivialToPrimitiveCopyCUnion()) + checkNonTrivialCUnion(NewFD->getReturnType(), + NewFD->getReturnTypeSourceRange().getBegin(), + NTCUC_FunctionReturn, NTCUK_Destruct|NTCUK_Copy); } else { // C++11 [replacement.functions]p3: // The program's definitions shall not be specified as inline. @@ -11070,6 +11082,263 @@ bool Sema::DeduceVariableDeclarationType(VarDecl *VDecl, bool DirectInit, return VDecl->isInvalidDecl(); } +void Sema::checkNonTrivialCUnionInInitializer(const Expr *Init, SourceLocation Loc) { + if (auto *CE = dyn_cast(Init)) + Init = CE->getSubExpr(); + + QualType InitType = Init->getType(); + assert((InitType.hasNonTrivialToPrimitiveDefaultInitializeCUnion() || + InitType.hasNonTrivialToPrimitiveCopyCUnion()) && + "shouldn't be called if type doesn't have a non-trivial C struct"); + if (auto *ILE = dyn_cast(Init)) { + for (auto I : ILE->inits()) { + if (!I->getType().hasNonTrivialToPrimitiveDefaultInitializeCUnion() && + !I->getType().hasNonTrivialToPrimitiveCopyCUnion()) + continue; + SourceLocation SL = I->getExprLoc(); + checkNonTrivialCUnionInInitializer(I, SL.isValid() ? SL : Loc); + } + return; + } + + if (isa(Init)) { + if (InitType.hasNonTrivialToPrimitiveDefaultInitializeCUnion()) + checkNonTrivialCUnion(InitType, Loc, NTCUC_DefaultInitializedObject, + NTCUK_Init); + } else { + // Assume all other explicit initializers involving copying some existing + // object. + // TODO: ignore any explicit initializers where we can guarantee + // copy-elision. + if (InitType.hasNonTrivialToPrimitiveCopyCUnion()) + checkNonTrivialCUnion(InitType, Loc, NTCUC_CopyInit, NTCUK_Copy); + } +}; + +namespace { + +struct DiagNonTrivalCUnionDefaultInitializeVisitor + : DefaultInitializedTypeVisitor { + using Super = + DefaultInitializedTypeVisitor; + + DiagNonTrivalCUnionDefaultInitializeVisitor( + QualType OrigTy, SourceLocation OrigLoc, + Sema::NonTrivialCUnionContext UseContext, Sema &S) + : OrigTy(OrigTy), OrigLoc(OrigLoc), UseContext(UseContext), S(S) {} + + void visitWithKind(QualType::PrimitiveDefaultInitializeKind PDIK, QualType QT, + const FieldDecl *FD, bool InNonTrivialUnion) { + if (const auto *AT = S.Context.getAsArrayType(QT)) + return this->asDerived().visit(S.Context.getBaseElementType(AT), FD, + InNonTrivialUnion); + return Super::visitWithKind(PDIK, QT, FD, InNonTrivialUnion); + } + + void visitARCStrong(QualType QT, const FieldDecl *FD, + bool InNonTrivialUnion) { + if (InNonTrivialUnion) + S.Diag(FD->getLocation(), diag::note_non_trivial_c_union) + << 1 << 0 << QT << FD->getName(); + } + + void visitARCWeak(QualType QT, const FieldDecl *FD, bool InNonTrivialUnion) { + if (InNonTrivialUnion) + S.Diag(FD->getLocation(), diag::note_non_trivial_c_union) + << 1 << 0 << QT << FD->getName(); + } + + void visitStruct(QualType QT, const FieldDecl *FD, bool InNonTrivialUnion) { + const RecordDecl *RD = QT->castAs()->getDecl(); + if (RD->isUnion()) { + if (OrigLoc.isValid()) { + bool IsUnion = false; + if (auto *OrigRD = OrigTy->getAsRecordDecl()) + IsUnion = OrigRD->isUnion(); + S.Diag(OrigLoc, diag::err_non_trivial_c_union_in_invalid_context) + << 0 << OrigTy << IsUnion << UseContext; + // Reset OrigLoc so that this diagnostic is emitted only once. + OrigLoc = SourceLocation(); + } + InNonTrivialUnion = true; + } + + if (InNonTrivialUnion) + S.Diag(RD->getLocation(), diag::note_non_trivial_c_union) + << 0 << 0 << QT.getUnqualifiedType() << ""; + + for (const FieldDecl *FD : RD->fields()) + asDerived().visit(FD->getType(), FD, InNonTrivialUnion); + } + + void visitTrivial(QualType QT, const FieldDecl *FD, bool InNonTrivialUnion) {} + + // The non-trivial C union type or the struct/union type that contains a + // non-trivial C union. + QualType OrigTy; + SourceLocation OrigLoc; + Sema::NonTrivialCUnionContext UseContext; + Sema &S; +}; + +struct DiagNonTrivalCUnionDestructedTypeVisitor + : DestructedTypeVisitor { + using Super = + DestructedTypeVisitor; + + DiagNonTrivalCUnionDestructedTypeVisitor( + QualType OrigTy, SourceLocation OrigLoc, + Sema::NonTrivialCUnionContext UseContext, Sema &S) + : OrigTy(OrigTy), OrigLoc(OrigLoc), UseContext(UseContext), S(S) {} + + void visitWithKind(QualType::DestructionKind DK, QualType QT, + const FieldDecl *FD, bool InNonTrivialUnion) { + if (const auto *AT = S.Context.getAsArrayType(QT)) + return this->asDerived().visit(S.Context.getBaseElementType(AT), FD, + InNonTrivialUnion); + return Super::visitWithKind(DK, QT, FD, InNonTrivialUnion); + } + + void visitARCStrong(QualType QT, const FieldDecl *FD, + bool InNonTrivialUnion) { + if (InNonTrivialUnion) + S.Diag(FD->getLocation(), diag::note_non_trivial_c_union) + << 1 << 1 << QT << FD->getName(); + } + + void visitARCWeak(QualType QT, const FieldDecl *FD, bool InNonTrivialUnion) { + if (InNonTrivialUnion) + S.Diag(FD->getLocation(), diag::note_non_trivial_c_union) + << 1 << 1 << QT << FD->getName(); + } + + void visitStruct(QualType QT, const FieldDecl *FD, bool InNonTrivialUnion) { + const RecordDecl *RD = QT->castAs()->getDecl(); + if (RD->isUnion()) { + if (OrigLoc.isValid()) { + bool IsUnion = false; + if (auto *OrigRD = OrigTy->getAsRecordDecl()) + IsUnion = OrigRD->isUnion(); + S.Diag(OrigLoc, diag::err_non_trivial_c_union_in_invalid_context) + << 1 << OrigTy << IsUnion << UseContext; + // Reset OrigLoc so that this diagnostic is emitted only once. + OrigLoc = SourceLocation(); + } + InNonTrivialUnion = true; + } + + if (InNonTrivialUnion) + S.Diag(RD->getLocation(), diag::note_non_trivial_c_union) + << 0 << 1 << QT.getUnqualifiedType() << ""; + + for (const FieldDecl *FD : RD->fields()) + asDerived().visit(FD->getType(), FD, InNonTrivialUnion); + } + + void visitTrivial(QualType QT, const FieldDecl *FD, bool InNonTrivialUnion) {} + void visitCXXDestructor(QualType QT, const FieldDecl *FD, + bool InNonTrivialUnion) {} + + // The non-trivial C union type or the struct/union type that contains a + // non-trivial C union. + QualType OrigTy; + SourceLocation OrigLoc; + Sema::NonTrivialCUnionContext UseContext; + Sema &S; +}; + +struct DiagNonTrivalCUnionCopyVisitor + : CopiedTypeVisitor { + using Super = CopiedTypeVisitor; + + DiagNonTrivalCUnionCopyVisitor(QualType OrigTy, SourceLocation OrigLoc, + Sema::NonTrivialCUnionContext UseContext, + Sema &S) + : OrigTy(OrigTy), OrigLoc(OrigLoc), UseContext(UseContext), S(S) {} + + void visitWithKind(QualType::PrimitiveCopyKind PCK, QualType QT, + const FieldDecl *FD, bool InNonTrivialUnion) { + if (const auto *AT = S.Context.getAsArrayType(QT)) + return this->asDerived().visit(S.Context.getBaseElementType(AT), FD, + InNonTrivialUnion); + return Super::visitWithKind(PCK, QT, FD, InNonTrivialUnion); + } + + void visitARCStrong(QualType QT, const FieldDecl *FD, + bool InNonTrivialUnion) { + if (InNonTrivialUnion) + S.Diag(FD->getLocation(), diag::note_non_trivial_c_union) + << 1 << 2 << QT << FD->getName(); + } + + void visitARCWeak(QualType QT, const FieldDecl *FD, bool InNonTrivialUnion) { + if (InNonTrivialUnion) + S.Diag(FD->getLocation(), diag::note_non_trivial_c_union) + << 1 << 2 << QT << FD->getName(); + } + + void visitStruct(QualType QT, const FieldDecl *FD, bool InNonTrivialUnion) { + const RecordDecl *RD = QT->castAs()->getDecl(); + if (RD->isUnion()) { + if (OrigLoc.isValid()) { + bool IsUnion = false; + if (auto *OrigRD = OrigTy->getAsRecordDecl()) + IsUnion = OrigRD->isUnion(); + S.Diag(OrigLoc, diag::err_non_trivial_c_union_in_invalid_context) + << 2 << OrigTy << IsUnion << UseContext; + // Reset OrigLoc so that this diagnostic is emitted only once. + OrigLoc = SourceLocation(); + } + InNonTrivialUnion = true; + } + + if (InNonTrivialUnion) + S.Diag(RD->getLocation(), diag::note_non_trivial_c_union) + << 0 << 2 << QT.getUnqualifiedType() << ""; + + for (const FieldDecl *FD : RD->fields()) + asDerived().visit(FD->getType(), FD, InNonTrivialUnion); + } + + void preVisit(QualType::PrimitiveCopyKind PCK, QualType QT, + const FieldDecl *FD, bool InNonTrivialUnion) {} + void visitTrivial(QualType QT, const FieldDecl *FD, bool InNonTrivialUnion) {} + void visitVolatileTrivial(QualType QT, const FieldDecl *FD, + bool InNonTrivialUnion) {} + + // The non-trivial C union type or the struct/union type that contains a + // non-trivial C union. + QualType OrigTy; + SourceLocation OrigLoc; + Sema::NonTrivialCUnionContext UseContext; + Sema &S; +}; + +} // namespace + +void Sema::checkNonTrivialCUnion(QualType QT, SourceLocation Loc, + NonTrivialCUnionContext UseContext, + unsigned NonTrivialKind) { + assert((QT.hasNonTrivialToPrimitiveDefaultInitializeCUnion() || + QT.hasNonTrivialToPrimitiveDestructCUnion() || + QT.hasNonTrivialToPrimitiveCopyCUnion()) && + "shouldn't be called if type doesn't have a non-trivial C union"); + + if ((NonTrivialKind & NTCUK_Init) && + QT.hasNonTrivialToPrimitiveDefaultInitializeCUnion()) + DiagNonTrivalCUnionDefaultInitializeVisitor(QT, Loc, UseContext, *this) + .visit(QT, nullptr, false); + if ((NonTrivialKind & NTCUK_Destruct) && + QT.hasNonTrivialToPrimitiveDestructCUnion()) + DiagNonTrivalCUnionDestructedTypeVisitor(QT, Loc, UseContext, *this) + .visit(QT, nullptr, false); + if ((NonTrivialKind & NTCUK_Copy) && QT.hasNonTrivialToPrimitiveCopyCUnion()) + DiagNonTrivalCUnionCopyVisitor(QT, Loc, UseContext, *this) + .visit(QT, nullptr, false); +} + /// AddInitializerToDecl - Adds the initializer Init to the /// declaration dcl. If DirectInit is true, this is C++ direct /// initialization rather than copy initialization. @@ -11475,6 +11744,12 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, bool DirectInit) { CheckForConstantInitializer(Init, DclT); } + QualType InitType = Init->getType(); + if (!InitType.isNull() && + (InitType.hasNonTrivialToPrimitiveDefaultInitializeCUnion() || + InitType.hasNonTrivialToPrimitiveCopyCUnion())) + checkNonTrivialCUnionInInitializer(Init, Init->getExprLoc()); + // We will represent direct-initialization similarly to copy-initialization: // int x(1); -as-> int x = 1; // ClassType x(a,b,c); -as-> ClassType x = ClassType(a,b,c); @@ -11599,7 +11874,14 @@ void Sema::ActOnUninitializedDecl(Decl *RealDecl) { return; } - switch (Var->isThisDeclarationADefinition()) { + VarDecl::DefinitionKind DefKind = Var->isThisDeclarationADefinition(); + if (!Var->isInvalidDecl() && DefKind != VarDecl::DeclarationOnly && + Var->getType().hasNonTrivialToPrimitiveDefaultInitializeCUnion()) + checkNonTrivialCUnion(Var->getType(), Var->getLocation(), + NTCUC_DefaultInitializedObject, NTCUK_Init); + + + switch (DefKind) { case VarDecl::Definition: if (!Var->isStaticDataMember() || !Var->getAnyInitializer()) break; @@ -12692,6 +12974,11 @@ ParmVarDecl *Sema::CheckParameter(DeclContext *DC, SourceLocation StartLoc, Context.getAdjustedParameterType(T), TSInfo, SC, nullptr); + if (New->getType().hasNonTrivialToPrimitiveDestructCUnion() || + New->getType().hasNonTrivialToPrimitiveCopyCUnion()) + checkNonTrivialCUnion(New->getType(), New->getLocation(), + NTCUC_FunctionParam, NTCUK_Destruct|NTCUK_Copy); + // Parameters can not be abstract class types. // For record types, this is done by the AbstractClassUsageDiagnoser once // the class has been completely parsed. @@ -15938,7 +16225,6 @@ void Sema::ActOnFields(Scope *S, SourceLocation RecLoc, Decl *EnclosingDecl, // Verify that all the fields are okay. SmallVector RecFields; - bool ObjCFieldLifetimeErrReported = false; for (ArrayRef::iterator i = Fields.begin(), end = Fields.end(); i != end; ++i) { FieldDecl *FD = cast(*i); @@ -16077,38 +16363,12 @@ void Sema::ActOnFields(Scope *S, SourceLocation RecLoc, Decl *EnclosingDecl, Record->setHasObjectMember(true); if (Record && FDTTy->getDecl()->hasVolatileMember()) Record->setHasVolatileMember(true); - if (Record && Record->isUnion() && - FD->getType().isNonTrivialPrimitiveCType(Context)) - Diag(FD->getLocation(), - diag::err_nontrivial_primitive_type_in_union); } else if (FDTy->isObjCObjectType()) { /// A field cannot be an Objective-c object Diag(FD->getLocation(), diag::err_statically_allocated_object) << FixItHint::CreateInsertion(FD->getLocation(), "*"); QualType T = Context.getObjCObjectPointerType(FD->getType()); FD->setType(T); - } else if (getLangOpts().allowsNonTrivialObjCLifetimeQualifiers() && - Record && !ObjCFieldLifetimeErrReported && Record->isUnion() && - !getLangOpts().CPlusPlus) { - // It's an error in ARC or Weak if a field has lifetime. - // We don't want to report this in a system header, though, - // so we just make the field unavailable. - // FIXME: that's really not sufficient; we need to make the type - // itself invalid to, say, initialize or copy. - QualType T = FD->getType(); - if (T.hasNonTrivialObjCLifetime()) { - SourceLocation loc = FD->getLocation(); - if (getSourceManager().isInSystemHeader(loc)) { - if (!FD->hasAttr()) { - FD->addAttr(UnavailableAttr::CreateImplicit(Context, "", - UnavailableAttr::IR_ARCFieldWithOwnership, loc)); - } - } else { - Diag(FD->getLocation(), diag::err_arc_objc_object_in_tag) - << T->isBlockPointerType() << Record->getTagKind(); - } - ObjCFieldLifetimeErrReported = true; - } } else if (getLangOpts().ObjC && getLangOpts().getGC() != LangOptions::NonGC && Record && !Record->hasObjectMember()) { @@ -16128,14 +16388,23 @@ void Sema::ActOnFields(Scope *S, SourceLocation RecLoc, Decl *EnclosingDecl, if (Record && !getLangOpts().CPlusPlus && !FD->hasAttr()) { QualType FT = FD->getType(); - if (FT.isNonTrivialToPrimitiveDefaultInitialize()) + if (FT.isNonTrivialToPrimitiveDefaultInitialize()) { Record->setNonTrivialToPrimitiveDefaultInitialize(true); + if (FT.hasNonTrivialToPrimitiveDefaultInitializeCUnion() || + Record->isUnion()) + Record->setHasNonTrivialToPrimitiveDefaultInitializeCUnion(true); + } QualType::PrimitiveCopyKind PCK = FT.isNonTrivialToPrimitiveCopy(); - if (PCK != QualType::PCK_Trivial && PCK != QualType::PCK_VolatileTrivial) + if (PCK != QualType::PCK_Trivial && PCK != QualType::PCK_VolatileTrivial) { Record->setNonTrivialToPrimitiveCopy(true); + if (FT.hasNonTrivialToPrimitiveCopyCUnion() || Record->isUnion()) + Record->setHasNonTrivialToPrimitiveCopyCUnion(true); + } if (FT.isDestructedType()) { Record->setNonTrivialToPrimitiveDestroy(true); Record->setParamDestroyedInCallee(true); + if (FT.hasNonTrivialToPrimitiveDestructCUnion() || Record->isUnion()) + Record->setHasNonTrivialToPrimitiveDestructCUnion(true); } if (const auto *RT = FT->getAs()) { diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 746d3e7e11901..1e49a363ab330 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -6066,7 +6066,7 @@ Sema::BuildCompoundLiteralExpr(SourceLocation LParenLoc, TypeSourceInfo *TInfo, ILE->setInit(i, ConstantExpr::Create(Context, Init)); } - Expr *E = new (Context) CompoundLiteralExpr(LParenLoc, TInfo, literalType, + auto *E = new (Context) CompoundLiteralExpr(LParenLoc, TInfo, literalType, VK, LiteralExpr, isFileScope); if (isFileScope) { if (!LiteralExpr->isTypeDependent() && @@ -6084,6 +6084,19 @@ Sema::BuildCompoundLiteralExpr(SourceLocation LParenLoc, TypeSourceInfo *TInfo, return ExprError(); } + // Compound literals that have automatic storage duration are destroyed at + // the end of the scope. Emit diagnostics if it is or contains a C union type + // that is non-trivial to destruct. + if (!isFileScope) + if (E->getType().hasNonTrivialToPrimitiveDestructCUnion()) + checkNonTrivialCUnion(E->getType(), E->getExprLoc(), + NTCUC_CompoundLiteral, NTCUK_Destruct); + + if (E->getType().hasNonTrivialToPrimitiveDefaultInitializeCUnion() || + E->getType().hasNonTrivialToPrimitiveCopyCUnion()) + checkNonTrivialCUnionInInitializer(E->getInitializer(), + E->getInitializer()->getExprLoc()); + return MaybeBindToTemporary(E); } @@ -12533,6 +12546,10 @@ ExprResult Sema::CreateBuiltinBinOp(SourceLocation OpLoc, if (auto *VD = dyn_cast(DRE->getDecl())) if (VD->hasLocalStorage() && getCurScope()->isDeclScope(VD)) BE->getBlockDecl()->setCanAvoidCopyToHeap(); + + if (LHS.get()->getType().hasNonTrivialToPrimitiveCopyCUnion()) + checkNonTrivialCUnion(LHS.get()->getType(), LHS.get()->getExprLoc(), + NTCUC_Assignment, NTCUK_Copy); } RecordModifiableNonNullParam(*this, LHS.get()); break; @@ -13945,6 +13962,11 @@ ExprResult Sema::ActOnBlockStmtExpr(SourceLocation CaretLoc, !BD->isDependentContext()) computeNRVO(Body, BSI); + if (RetTy.hasNonTrivialToPrimitiveDestructCUnion() || + RetTy.hasNonTrivialToPrimitiveCopyCUnion()) + checkNonTrivialCUnion(RetTy, BD->getCaretLocation(), NTCUC_FunctionReturn, + NTCUK_Destruct|NTCUK_Copy); + PopDeclContext(); // Pop the block scope now but keep it alive to the end of this function. @@ -16196,6 +16218,15 @@ static ExprResult rebuildPotentialResultsAsNonOdrUsed(Sema &S, Expr *E, } ExprResult Sema::CheckLValueToRValueConversionOperand(Expr *E) { + // Check whether the operand is or contains an object of non-trivial C union + // type. + if (E->getType().isVolatileQualified() && + (E->getType().hasNonTrivialToPrimitiveDestructCUnion() || + E->getType().hasNonTrivialToPrimitiveCopyCUnion())) + checkNonTrivialCUnion(E->getType(), E->getExprLoc(), + Sema::NTCUC_LValueToRValueVolatile, + NTCUK_Destruct|NTCUK_Copy); + // C++2a [basic.def.odr]p4: // [...] an expression of non-volatile-qualified non-class type to which // the lvalue-to-rvalue conversion is applied [...] diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 88b544068a802..514cbd90d3b85 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -2456,6 +2456,11 @@ bool Sema::CheckFunctionReturnType(QualType T, SourceLocation Loc) { return true; } + if (T.hasNonTrivialToPrimitiveDestructCUnion() || + T.hasNonTrivialToPrimitiveCopyCUnion()) + checkNonTrivialCUnion(T, Loc, NTCUC_FunctionReturn, + NTCUK_Destruct|NTCUK_Copy); + return false; } diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index b40e3cf892650..3cac82ad421c0 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -794,6 +794,9 @@ ASTDeclReader::VisitRecordDeclImpl(RecordDecl *RD) { RD->setNonTrivialToPrimitiveDefaultInitialize(Record.readInt()); RD->setNonTrivialToPrimitiveCopy(Record.readInt()); RD->setNonTrivialToPrimitiveDestroy(Record.readInt()); + RD->setHasNonTrivialToPrimitiveDefaultInitializeCUnion(Record.readInt()); + RD->setHasNonTrivialToPrimitiveDestructCUnion(Record.readInt()); + RD->setHasNonTrivialToPrimitiveCopyCUnion(Record.readInt()); RD->setParamDestroyedInCallee(Record.readInt()); RD->setArgPassingRestrictions((RecordDecl::ArgPassingKind)Record.readInt()); return Redecl; diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp index 3d9dd7131b1de..b71315505de90 100644 --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -476,6 +476,9 @@ void ASTDeclWriter::VisitRecordDecl(RecordDecl *D) { Record.push_back(D->isNonTrivialToPrimitiveDefaultInitialize()); Record.push_back(D->isNonTrivialToPrimitiveCopy()); Record.push_back(D->isNonTrivialToPrimitiveDestroy()); + Record.push_back(D->hasNonTrivialToPrimitiveDefaultInitializeCUnion()); + Record.push_back(D->hasNonTrivialToPrimitiveDestructCUnion()); + Record.push_back(D->hasNonTrivialToPrimitiveCopyCUnion()); Record.push_back(D->isParamDestroyedInCallee()); Record.push_back(D->getArgPassingRestrictions()); @@ -1999,6 +2002,12 @@ void ASTWriter::WriteDeclAbbrevs() { Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // isNonTrivialToPrimitiveDestroy Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); + // hasNonTrivialToPrimitiveDefaultInitializeCUnion + Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); + // hasNonTrivialToPrimitiveDestructCUnion + Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); + // hasNonTrivialToPrimitiveCopyCUnion + Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // isParamDestroyedInCallee Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // getArgPassingRestrictions diff --git a/clang/test/CodeGenObjC/Inputs/strong_in_union.h b/clang/test/CodeGenObjC/Inputs/strong_in_union.h deleted file mode 100644 index abe4549055c60..0000000000000 --- a/clang/test/CodeGenObjC/Inputs/strong_in_union.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef STRONG_IN_UNION_H -#define STRONG_IN_UNION_H -#pragma clang system_header - -typedef union { - id f0; - int *f1; -} U; - -#endif // STRONG_IN_UNION_H diff --git a/clang/test/CodeGenObjC/strong-in-c-struct.m b/clang/test/CodeGenObjC/strong-in-c-struct.m index 19cc1037c4cad..8eeee4af0d30a 100644 --- a/clang/test/CodeGenObjC/strong-in-c-struct.m +++ b/clang/test/CodeGenObjC/strong-in-c-struct.m @@ -1,11 +1,10 @@ -// RUN: %clang_cc1 -triple arm64-apple-ios11 -fobjc-arc -fblocks -fobjc-runtime=ios-11.0 -emit-llvm -o - -DUSESTRUCT -I %S/Inputs %s | FileCheck %s +// RUN: %clang_cc1 -triple arm64-apple-ios11 -fobjc-arc -fblocks -fobjc-runtime=ios-11.0 -emit-llvm -o - -DUSESTRUCT %s | FileCheck %s -// RUN: %clang_cc1 -triple arm64-apple-ios11 -fobjc-arc -fblocks -fobjc-runtime=ios-11.0 -emit-pch -I %S/Inputs -o %t %s -// RUN: %clang_cc1 -triple arm64-apple-ios11 -fobjc-arc -fblocks -fobjc-runtime=ios-11.0 -include-pch %t -emit-llvm -o - -DUSESTRUCT -I %S/Inputs %s | FileCheck %s +// RUN: %clang_cc1 -triple arm64-apple-ios11 -fobjc-arc -fblocks -fobjc-runtime=ios-11.0 -emit-pch -o %t %s +// RUN: %clang_cc1 -triple arm64-apple-ios11 -fobjc-arc -fblocks -fobjc-runtime=ios-11.0 -include-pch %t -emit-llvm -o - -DUSESTRUCT %s | FileCheck %s #ifndef HEADER #define HEADER -#include "strong_in_union.h" typedef void (^BlockTy)(void); @@ -695,14 +694,6 @@ void test_copy_constructor_Bitfield1(Bitfield1 *a) { Bitfield1 t = *a; } -// CHECK: define void @test_strong_in_union() -// CHECK: alloca %{{.*}} -// CHECK-NEXT: ret void - -void test_strong_in_union() { - U t; -} - // CHECK: define void @test_copy_constructor_VolatileArray( // CHECK: call void @__copy_constructor_8_8_s0_AB8s4n16_tv64w32_AE( diff --git a/clang/test/PCH/non-trivial-c-union.m b/clang/test/PCH/non-trivial-c-union.m new file mode 100644 index 0000000000000..abd335497db26 --- /dev/null +++ b/clang/test/PCH/non-trivial-c-union.m @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 -fblocks -fobjc-arc -fobjc-runtime-has-weak -emit-pch -o %t.pch %s +// RUN: %clang_cc1 -fblocks -fobjc-arc -fobjc-runtime-has-weak -include-pch %t.pch -verify %s + +#ifndef HEADER +#define HEADER + +typedef union { + id f0; +} U0; + +#else + +// expected-note@-6 {{'U0' has subobjects that are non-trivial to destruct}} +// expected-note@-7 {{'U0' has subobjects that are non-trivial to copy}} +// expected-note@-8 {{'U0' has subobjects that are non-trivial to default-initialize}} +// expected-note@-8 {{f0 has type '__strong id' that is non-trivial to destruct}} +// expected-note@-9 {{f0 has type '__strong id' that is non-trivial to copy}} +// expected-note@-10 {{f0 has type '__strong id' that is non-trivial to default-initialize}} + +U0 foo0(void); // expected-error {{cannot use type 'U0' for function/method return since it is a union that is non-trivial to destruct}} expected-error {{cannot use type 'U0' for function/method return since it is a union that is non-trivial to copy}} + +U0 g0; // expected-error {{cannot default-initialize an object of type 'U0' since it is a union that is non-trivial to default-initialize}} + +#endif diff --git a/clang/test/SemaObjC/arc-decls.m b/clang/test/SemaObjC/arc-decls.m index 0abd45dac33e2..28c3de996996d 100644 --- a/clang/test/SemaObjC/arc-decls.m +++ b/clang/test/SemaObjC/arc-decls.m @@ -8,11 +8,7 @@ }; union u { - id u; // expected-error {{ARC forbids Objective-C objects in union}} -}; - -union u_nontrivial_c { - struct A a; // expected-error {{non-trivial C types are disallowed in union}} + id u; }; // Volatile fields are fine. diff --git a/clang/test/SemaObjC/non-trivial-c-union.m b/clang/test/SemaObjC/non-trivial-c-union.m new file mode 100644 index 0000000000000..7bd82775451c8 --- /dev/null +++ b/clang/test/SemaObjC/non-trivial-c-union.m @@ -0,0 +1,82 @@ +// RUN: %clang_cc1 -fsyntax-only -fblocks -fobjc-arc -fobjc-runtime-has-weak -verify %s + +typedef union { // expected-note 12 {{'U0' has subobjects that are non-trivial to default-initialize}} expected-note 36 {{'U0' has subobjects that are non-trivial to destruct}} expected-note 28 {{'U0' has subobjects that are non-trivial to copy}} + id f0; // expected-note 12 {{f0 has type '__strong id' that is non-trivial to default-initialize}} expected-note 36 {{f0 has type '__strong id' that is non-trivial to destruct}} expected-note 28 {{f0 has type '__strong id' that is non-trivial to copy}} + __weak id f1; // expected-note 12 {{f1 has type '__weak id' that is non-trivial to default-initialize}} expected-note 36 {{f1 has type '__weak id' that is non-trivial to destruct}} expected-note 28 {{f1 has type '__weak id' that is non-trivial to copy}} +} U0; + +typedef struct { + U0 f0; + id f1; +} S0; + +id g0; +U0 ug0; // expected-error {{cannot default-initialize an object of type 'U0' since it is a union that is non-trivial to default-initialize}} +U0 ug1 = { .f0 = 0 }; +S0 sg0; // expected-error {{cannot default-initialize an object of type 'S0' since it contains a union that is non-trivial to default-initialize}} +S0 sg1 = { .f0 = {0}, .f1 = 0 }; +S0 sg2 = { .f1 = 0 }; // expected-error {{cannot default-initialize an object of type 'U0' since it is a union that is non-trivial to default-initialize}} + +U0 foo0(U0); // expected-error {{cannot use type 'U0' for a function/method parameter since it is a union that is non-trivial to destruct}} expected-error {{cannot use type 'U0' for a function/method parameter since it is a union that is non-trivial to copy}} expected-error {{cannot use type 'U0' for function/method return since it is a union that is non-trivial to destruct}} expected-error {{cannot use type 'U0' for function/method return since it is a union that is non-trivial to copy}} +S0 foo1(S0); // expected-error {{cannot use type 'S0' for a function/method parameter since it contains a union that is non-trivial to destruct}} expected-error {{cannot use type 'S0' for a function/method parameter since it contains a union that is non-trivial to copy}} expected-error {{cannot use type 'S0' for function/method return since it contains a union that is non-trivial to destruct}} expected-error {{cannot use type 'S0' for function/method return since it contains a union that is non-trivial to copy}} + +@interface C +-(U0)m0:(U0)arg; // expected-error {{cannot use type 'U0' for a function/method parameter since it is a union that is non-trivial to destruct}} expected-error {{cannot use type 'U0' for a function/method parameter since it is a union that is non-trivial to copy}} expected-error {{cannot use type 'U0' for function/method return since it is a union that is non-trivial to destruct}} expected-error {{cannot use type 'U0' for function/method return since it is a union that is non-trivial to copy}} +-(S0)m1:(S0)arg; // expected-error {{cannot use type 'S0' for a function/method parameter since it contains a union that is non-trivial to destruct}} expected-error {{cannot use type 'S0' for a function/method parameter since it contains a union that is non-trivial to copy}} expected-error {{cannot use type 'S0' for function/method return since it contains a union that is non-trivial to destruct}} expected-error {{cannot use type 'S0' for function/method return since it contains a union that is non-trivial to copy}} +@end + +void testBlockFunction(void) { + (void)^(U0 a){ return ug0; }; // expected-error {{cannot use type 'U0' for a function/method parameter since it is a union that is non-trivial to destruct}} expected-error {{cannot use type 'U0' for a function/method parameter since it is a union that is non-trivial to copy}} expected-error {{cannot use type 'U0' for function/method return since it is a union that is non-trivial to destruct}} expected-error {{cannot use type 'U0' for function/method return since it is a union that is non-trivial to copy}} + (void)^(S0 a){ return sg0; }; // expected-error {{cannot use type 'S0' for a function/method parameter since it contains a union that is non-trivial to destruct}} expected-error {{cannot use type 'S0' for a function/method parameter since it contains a union that is non-trivial to copy}} expected-error {{cannot use type 'S0' for function/method return since it contains a union that is non-trivial to destruct}} expected-error {{cannot use type 'S0' for function/method return since it contains a union that is non-trivial to copy}} +} +void testAutoVar(void) { + U0 u0; // expected-error {{cannot declare an automatic variable of type 'U0' since it is a union that is non-trivial to destruct}} expected-error {{cannot default-initialize an object of type 'U0' since it is a union that is non-trivial to default-initialize}} + U0 u1 = ug0; // expected-error {{cannot declare an automatic variable of type 'U0' since it is a union that is non-trivial to destruct}} expected-error {{cannot copy-initialize an object of type 'U0' since it is a union that is non-trivial to copy}} + U0 u2 = { g0 }; // expected-error {{cannot declare an automatic variable of type 'U0' since it is a union that is non-trivial to destruct}} + U0 u3 = { .f1 = g0 }; // expected-error {{cannot declare an automatic variable of type 'U0' since it is a union that is non-trivial to destruct}} + S0 s0; // expected-error {{cannot declare an automatic variable of type 'S0' since it contains a union that is non-trivial to destruct}} expected-error {{cannot default-initialize an object of type 'S0' since it contains a union that is non-trivial to default-initialize}} + S0 s1 = sg0; // expected-error {{declare an automatic variable of type 'S0' since it contains a union that is non-trivial to destruct}} expected-error {{cannot copy-initialize an object of type 'S0' since it contains a union that is non-trivial to copy}} + S0 s2 = { ug0 }; // expected-error {{cannot declare an automatic variable of type 'S0' since it contains a union that is non-trivial to destruct}} expected-error {{cannot copy-initialize an object of type 'U0' since it is a union that is non-trivial to copy}} + S0 s3 = { .f0 = ug0 }; // expected-error {{cannot declare an automatic variable of type 'S0' since it contains a union that is non-trivial to destruct}} expected-error {{cannot copy-initialize an object of type 'U0' since it is a union that is non-trivial to copy}} + S0 s4 = { .f1 = g0 }; // expected-error {{cannot declare an automatic variable of type 'S0' since it contains a union that is non-trivial to destruct}} expected-error {{cannot default-initialize an object of type 'U0' since it is a union that is non-trivial to default-initialize}} +} + +void testAssignment(void) { + ug0 = ug1; // expected-error {{cannot assign to a variable of type 'U0' since it is a union that is non-trivial to copy}} + sg0 = sg1; // expected-error {{cannot assign to a variable of type 'S0' since it contains a union that is non-trivial to copy}} +} + +U0 ug2 = (U0){ .f1 = 0 }; // expected-error {{cannot copy-initialize an object of type 'U0' since it is a union that is non-trivial to copy}} +S0 sg3 = (S0){ .f0 = {0}, .f1 = 0 }; // expected-error {{cannot copy-initialize an object of type 'S0' since it contains a union that is non-trivial to copy}} +S0 *sg4 = &(S0){ .f1 = 0 }; // expected-error {{cannot default-initialize an object of type 'U0' since it is a union that is non-trivial to default-initialize}} + +void testCompoundLiteral(void) { + const U0 *t0 = &(U0){ .f0 = g0 }; // expected-error {{cannot construct an automatic compound literal of type 'U0' since it is a union that is non-trivial to destruct}} + const U0 *t1 = &(U0){ .f1 = g0 }; // expected-error {{cannot construct an automatic compound literal of type 'U0' since it is a union that is non-trivial to destruct}} + const S0 *t2 = &(S0){ .f0 = ug0 }; // expected-error {{cannot construct an automatic compound literal of type 'S0' since it contains a union that is non-trivial to destruct}} expected-error {{cannot copy-initialize an object of type 'U0' since it is a union that is non-trivial to copy}} + const S0 *t3 = &(S0){ .f1 = g0 }; // expected-error {{cannot construct an automatic compound literal of type 'S0' since it contains a union that is non-trivial to destruct}} expected-error {{cannot default-initialize an object of type 'U0' since it is a union that is non-trivial to default-initialize}} +} + +typedef void (^BlockTy)(void); +void escapingFunc(BlockTy); +void noescapingFunc(__attribute__((noescape)) BlockTy); + +void testBlockCapture(void) { + U0 t0; // expected-error {{cannot declare an automatic variable of type 'U0' since it is a union that is non-trivial to destruct}} expected-error {{cannot default-initialize an object of type 'U0' since it is a union that is non-trivial to default-initialize}} + S0 t1; // expected-error {{cannot declare an automatic variable of type 'S0' since it contains a union that is non-trivial to destruct}} expected-error {{cannot default-initialize an object of type 'S0' since it contains a union that is non-trivial to default-initialize}} + __block U0 t2; // expected-error {{cannot declare an automatic variable of type 'U0' since it is a union that is non-trivial to destruct}} expected-error {{cannot default-initialize an object of type 'U0' since it is a union that is non-trivial to default-initialize}} + __block S0 t3; // expected-error {{cannot declare an automatic variable of type 'S0' since it contains a union that is non-trivial to destruct}} expected-error {{cannot default-initialize an object of type 'S0' since it contains a union that is non-trivial to default-initialize}} + + escapingFunc(^{ g0 = t0.f0; }); // expected-error {{cannot capture a variable of type 'U0' since it is a union that is non-trivial to destruct}} expected-error {{cannot capture a variable of type 'U0' since it is a union that is non-trivial to copy}} + escapingFunc(^{ g0 = t1.f0.f0; }); // expected-error {{cannot capture a variable of type 'S0' since it contains a union that is non-trivial to destruct}} expected-error {{cannot capture a variable of type 'S0' since it contains a union that is non-trivial to copy}} + escapingFunc(^{ g0 = t2.f0; }); // expected-error {{cannot capture a variable of type 'U0' since it is a union that is non-trivial to destruct}} expected-error {{cannot capture a variable of type 'U0' since it is a union that is non-trivial to copy}} + escapingFunc(^{ g0 = t3.f0.f0; }); // expected-error {{cannot capture a variable of type 'S0' since it contains a union that is non-trivial to destruct}} expected-error {{cannot capture a variable of type 'S0' since it contains a union that is non-trivial to copy}} + noescapingFunc(^{ g0 = t0.f0; }); // expected-error {{cannot capture a variable of type 'U0' since it is a union that is non-trivial to destruct}} expected-error {{cannot capture a variable of type 'U0' since it is a union that is non-trivial to copy}} + noescapingFunc(^{ g0 = t1.f0.f0; }); // expected-error {{cannot capture a variable of type 'S0' since it contains a union that is non-trivial to destruct}} expected-error {{cannot capture a variable of type 'S0' since it contains a union that is non-trivial to copy}} + noescapingFunc(^{ g0 = t2.f0; }); + noescapingFunc(^{ g0 = t3.f0.f0; }); +} + +void testVolatileLValueToRValue(volatile U0 *a) { + (void)*a; // expected-error {{cannot use volatile type 'volatile U0' where it causes an lvalue-to-rvalue conversion since it is a union that is non-trivial to destruct}} // expected-error {{cannot use volatile type 'volatile U0' where it causes an lvalue-to-rvalue conversion since it is a union that is non-trivial to copy}} +} From 087b044c4915717a51f8c0adb18eca5ae7a4f994 Mon Sep 17 00:00:00 2001 From: Nathan Ridge Date: Sat, 13 Jul 2019 03:24:48 +0000 Subject: [PATCH 023/451] [clangd] Implement typeHierarchy/resolve for subtypes Summary: This allows the client to resolve subtypes one level at a time. For supertypes, this is not necessary, because we eagerly compute supertypes and return all levels. Reviewers: sammccall Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D64308 llvm-svn: 365986 --- clang-tools-extra/clangd/ClangdLSPServer.cpp | 8 +++ clang-tools-extra/clangd/ClangdLSPServer.h | 2 + clang-tools-extra/clangd/ClangdServer.cpp | 7 +++ clang-tools-extra/clangd/ClangdServer.h | 5 ++ clang-tools-extra/clangd/Protocol.cpp | 17 +++-- clang-tools-extra/clangd/Protocol.h | 23 +++++-- clang-tools-extra/clangd/XRefs.cpp | 25 +++++++- clang-tools-extra/clangd/XRefs.h | 4 ++ .../clangd/test/type-hierarchy.test | 63 ++++++++++++++++++- .../clangd/unittests/TypeHierarchyTests.cpp | 46 +++++++++++++- 10 files changed, 187 insertions(+), 13 deletions(-) diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp index 2b25058c9672c..5f8b307f721a6 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.cpp +++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp @@ -926,6 +926,13 @@ void ClangdLSPServer::onTypeHierarchy( Params.resolve, Params.direction, std::move(Reply)); } +void ClangdLSPServer::onResolveTypeHierarchy( + const ResolveTypeHierarchyItemParams &Params, + Callback> Reply) { + Server->resolveTypeHierarchy(Params.item, Params.resolve, Params.direction, + std::move(Reply)); +} + void ClangdLSPServer::applyConfiguration( const ConfigurationSettings &Settings) { // Per-file update to the compilation database. @@ -1021,6 +1028,7 @@ ClangdLSPServer::ClangdLSPServer( MsgHandler->bind("workspace/didChangeConfiguration", &ClangdLSPServer::onChangeConfiguration); MsgHandler->bind("textDocument/symbolInfo", &ClangdLSPServer::onSymbolInfo); MsgHandler->bind("textDocument/typeHierarchy", &ClangdLSPServer::onTypeHierarchy); + MsgHandler->bind("typeHierarchy/resolve", &ClangdLSPServer::onResolveTypeHierarchy); // clang-format on } diff --git a/clang-tools-extra/clangd/ClangdLSPServer.h b/clang-tools-extra/clangd/ClangdLSPServer.h index 43e05b3dc8e55..1c37750a18526 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.h +++ b/clang-tools-extra/clangd/ClangdLSPServer.h @@ -100,6 +100,8 @@ class ClangdLSPServer : private DiagnosticsConsumer { Callback>); void onTypeHierarchy(const TypeHierarchyParams &, Callback>); + void onResolveTypeHierarchy(const ResolveTypeHierarchyItemParams &, + Callback>); void onChangeConfiguration(const DidChangeConfigurationParams &); void onSymbolInfo(const TextDocumentPositionParams &, Callback>); diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp index 451dac6f0df4b..10949ef001c02 100644 --- a/clang-tools-extra/clangd/ClangdServer.cpp +++ b/clang-tools-extra/clangd/ClangdServer.cpp @@ -528,6 +528,13 @@ void ClangdServer::typeHierarchy(PathRef File, Position Pos, int Resolve, WorkScheduler.runWithAST("Type Hierarchy", File, Bind(Action, std::move(CB))); } +void ClangdServer::resolveTypeHierarchy( + TypeHierarchyItem Item, int Resolve, TypeHierarchyDirection Direction, + Callback> CB) { + clangd::resolveTypeHierarchy(Item, Resolve, Direction, Index); + CB(Item); +} + void ClangdServer::onFileEvent(const DidChangeWatchedFilesParams &Params) { // FIXME: Do nothing for now. This will be used for indexing and potentially // invalidating other caches. diff --git a/clang-tools-extra/clangd/ClangdServer.h b/clang-tools-extra/clangd/ClangdServer.h index ba39806eb72c4..fa6783b1f1c13 100644 --- a/clang-tools-extra/clangd/ClangdServer.h +++ b/clang-tools-extra/clangd/ClangdServer.h @@ -210,6 +210,11 @@ class ClangdServer { TypeHierarchyDirection Direction, Callback> CB); + /// Resolve type hierarchy item in the given direction. + void resolveTypeHierarchy(TypeHierarchyItem Item, int Resolve, + TypeHierarchyDirection Direction, + Callback> CB); + /// Retrieve the top symbols from the workspace matching a query. void workspaceSymbols(StringRef Query, int Limit, Callback> CB); diff --git a/clang-tools-extra/clangd/Protocol.cpp b/clang-tools-extra/clangd/Protocol.cpp index 7c70afb567df9..600896b1eeee6 100644 --- a/clang-tools-extra/clangd/Protocol.cpp +++ b/clang-tools-extra/clangd/Protocol.cpp @@ -422,8 +422,7 @@ bool fromJSON(const llvm::json::Value &Params, bool fromJSON(const llvm::json::Value &Params, DocumentRangeFormattingParams &R) { llvm::json::ObjectMapper O(Params); - return O && O.map("textDocument", R.textDocument) && - O.map("range", R.range); + return O && O.map("textDocument", R.textDocument) && O.map("range", R.range); } bool fromJSON(const llvm::json::Value &Params, @@ -445,8 +444,8 @@ bool fromJSON(const llvm::json::Value &Params, DocumentSymbolParams &R) { llvm::json::Value toJSON(const DiagnosticRelatedInformation &DRI) { return llvm::json::Object{ - {"location", DRI.location}, - {"message", DRI.message}, + {"location", DRI.location}, + {"message", DRI.message}, }; } @@ -978,6 +977,8 @@ llvm::json::Value toJSON(const TypeHierarchyItem &I) { Result["parents"] = I.parents; if (I.children) Result["children"] = I.children; + if (I.data) + Result["data"] = I.data; return std::move(Result); } @@ -996,10 +997,18 @@ bool fromJSON(const llvm::json::Value &Params, TypeHierarchyItem &I) { O.map("deprecated", I.deprecated); O.map("parents", I.parents); O.map("children", I.children); + O.map("data", I.data); return true; } +bool fromJSON(const llvm::json::Value &Params, + ResolveTypeHierarchyItemParams &P) { + llvm::json::ObjectMapper O(Params); + return O && O.map("item", P.item) && O.map("resolve", P.resolve) && + O.map("direction", P.direction); +} + bool fromJSON(const llvm::json::Value &Params, ReferenceParams &R) { TextDocumentPositionParams &Base = R; return fromJSON(Params, Base); diff --git a/clang-tools-extra/clangd/Protocol.h b/clang-tools-extra/clangd/Protocol.h index 7a1a8c77d2591..a2c9438ea9d5d 100644 --- a/clang-tools-extra/clangd/Protocol.h +++ b/clang-tools-extra/clangd/Protocol.h @@ -1127,7 +1127,7 @@ struct TypeHierarchyItem { SymbolKind kind; /// `true` if the hierarchy item is deprecated. Otherwise, `false`. - bool deprecated; + bool deprecated = false; /// The URI of the text document where this type hierarchy item belongs to. URIForFile uri; @@ -1153,13 +1153,26 @@ struct TypeHierarchyItem { /// descendants. If not defined, the children have not been resolved. llvm::Optional> children; - /// The protocol has a slot here for an optional 'data' filed, which can - /// be used to identify a type hierarchy item in a resolve request. We don't - /// need this (the item itself is sufficient to identify what to resolve) - /// so don't declare it. + /// An optional 'data' filed, which can be used to identify a type hierarchy + /// item in a resolve request. + llvm::Optional data; }; llvm::json::Value toJSON(const TypeHierarchyItem &); llvm::raw_ostream &operator<<(llvm::raw_ostream &, const TypeHierarchyItem &); +bool fromJSON(const llvm::json::Value &, TypeHierarchyItem &); + +/// Parameters for the `typeHierarchy/resolve` request. +struct ResolveTypeHierarchyItemParams { + /// The item to resolve. + TypeHierarchyItem item; + + /// The hierarchy levels to resolve. `0` indicates no level. + int resolve; + + /// The direction of the hierarchy levels to resolve. + TypeHierarchyDirection direction; +}; +bool fromJSON(const llvm::json::Value &, ResolveTypeHierarchyItemParams &); struct ReferenceParams : public TextDocumentPositionParams { // For now, no options like context.includeDeclaration are supported. diff --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp index 1d34499b36edf..59f07ee405eed 100644 --- a/clang-tools-extra/clangd/XRefs.cpp +++ b/clang-tools-extra/clangd/XRefs.cpp @@ -893,7 +893,7 @@ llvm::Optional getDeducedType(ParsedAST &AST, /// Retrieves the deduced type at a given location (auto, decltype). bool hasDeducedType(ParsedAST &AST, SourceLocation SourceLocationBeg) { - return (bool) getDeducedType(AST, SourceLocationBeg); + return (bool)getDeducedType(AST, SourceLocationBeg); } llvm::Optional getHover(ParsedAST &AST, Position Pos, @@ -1104,6 +1104,10 @@ symbolToTypeHierarchyItem(const Symbol &S, const SymbolIndex *Index, // (https://github.com/clangd/clangd/issues/59). THI.range = THI.selectionRange; THI.uri = Loc->uri; + // Store the SymbolID in the 'data' field. The client will + // send this back in typeHierarchy/resolve, allowing us to + // continue resolving additional levels of the type hierarchy. + THI.data = S.ID.str(); return std::move(THI); } @@ -1247,6 +1251,25 @@ getTypeHierarchy(ParsedAST &AST, Position Pos, int ResolveLevels, return Result; } +void resolveTypeHierarchy(TypeHierarchyItem &Item, int ResolveLevels, + TypeHierarchyDirection Direction, + const SymbolIndex *Index) { + // We only support typeHierarchy/resolve for children, because for parents + // we ignore ResolveLevels and return all levels of parents eagerly. + if (Direction == TypeHierarchyDirection::Parents || ResolveLevels == 0) + return; + + Item.children.emplace(); + + if (Index && Item.data) { + // We store the item's SymbolID in the 'data' field, and the client + // passes it back to us in typeHierarchy/resolve. + if (Expected ID = SymbolID::fromStr(*Item.data)) { + fillSubTypes(*ID, *Item.children, Index, ResolveLevels, Item.uri.file()); + } + } +} + FormattedString HoverInfo::present() const { FormattedString Output; if (NamespaceScope) { diff --git a/clang-tools-extra/clangd/XRefs.h b/clang-tools-extra/clangd/XRefs.h index 318133a572a28..3044036c17f15 100644 --- a/clang-tools-extra/clangd/XRefs.h +++ b/clang-tools-extra/clangd/XRefs.h @@ -141,6 +141,10 @@ llvm::Optional getTypeHierarchy( ParsedAST &AST, Position Pos, int Resolve, TypeHierarchyDirection Direction, const SymbolIndex *Index = nullptr, PathRef TUPath = PathRef{}); +void resolveTypeHierarchy(TypeHierarchyItem &Item, int ResolveLevels, + TypeHierarchyDirection Direction, + const SymbolIndex *Index); + /// Retrieves the deduced type at a given location (auto, decltype). /// Retuns None unless SourceLocationBeg starts an auto/decltype token. /// It will return the underlying type. diff --git a/clang-tools-extra/clangd/test/type-hierarchy.test b/clang-tools-extra/clangd/test/type-hierarchy.test index 7161bd143bff3..b2e78ae249dcc 100644 --- a/clang-tools-extra/clangd/test/type-hierarchy.test +++ b/clang-tools-extra/clangd/test/type-hierarchy.test @@ -1,7 +1,7 @@ # RUN: clangd -lit-test < %s | FileCheck -strict-whitespace %s {"jsonrpc":"2.0","id":0,"method":"initialize","params":{"processId":123,"rootPath":"clangd","capabilities":{},"trace":"off"}} --- -{"jsonrpc":"2.0","method":"textDocument/didOpen","params":{"textDocument":{"uri":"test:///main.cpp","languageId":"cpp","version":1,"text":"struct Parent {};\nstruct Child1 : Parent {};\nstruct Child2 : Child1 {};\nstruct Child3 : Child2 {};"}}} +{"jsonrpc":"2.0","method":"textDocument/didOpen","params":{"textDocument":{"uri":"test:///main.cpp","languageId":"cpp","version":1,"text":"struct Parent {};\nstruct Child1 : Parent {};\nstruct Child2 : Child1 {};\nstruct Child3 : Child2 {};\nstruct Child4 : Child3 {};"}}} --- {"jsonrpc":"2.0","id":1,"method":"textDocument/typeHierarchy","params":{"textDocument":{"uri":"test:///main.cpp"},"position":{"line":2,"character":11},"direction":2,"resolve":1}} # CHECK: "id": 1 @@ -9,6 +9,7 @@ # CHECK-NEXT: "result": { # CHECK-NEXT: "children": [ # CHECK-NEXT: { +# CHECK-NEXT: "data": "A6576FE083F2949A", # CHECK-NEXT: "kind": 23, # CHECK-NEXT: "name": "Child3", # CHECK-NEXT: "range": { @@ -114,6 +115,64 @@ # CHECK-NEXT: "uri": "file:///clangd-test/main.cpp" # CHECK-NEXT: } --- -{"jsonrpc":"2.0","id":2,"method":"shutdown"} +{"jsonrpc":"2.0","id":2,"method":"typeHierarchy/resolve","params":{"item":{"uri":"test:///main.cpp","data":"A6576FE083F2949A","name":"Child3","kind":23,"range":{"end":{"character":13,"line":3},"start":{"character":7,"line":3}},"selectionRange":{"end":{"character":13,"line":3},"start":{"character":7,"line":3}}},"direction":0,"resolve":1}} +# CHECK: "id": 2 +# CHECK-NEXT: "jsonrpc": "2.0", +# CHECK-NEXT: "result": { +# CHECK-NEXT: "children": [ +# CHECK-NEXT: { +# CHECK-NEXT: "data": "5705B382DFC77CBC", +# CHECK-NEXT: "kind": 23, +# CHECK-NEXT: "name": "Child4", +# CHECK-NEXT: "range": { +# CHECK-NEXT: "end": { +# CHECK-NEXT: "character": 13, +# CHECK-NEXT: "line": 4 +# CHECK-NEXT: }, +# CHECK-NEXT: "start": { +# CHECK-NEXT: "character": 7, +# CHECK-NEXT: "line": 4 +# CHECK-NEXT: } +# CHECK-NEXT: }, +# CHECK-NEXT: "selectionRange": { +# CHECK-NEXT: "end": { +# CHECK-NEXT: "character": 13, +# CHECK-NEXT: "line": 4 +# CHECK-NEXT: }, +# CHECK-NEXT: "start": { +# CHECK-NEXT: "character": 7, +# CHECK-NEXT: "line": 4 +# CHECK-NEXT: } +# CHECK-NEXT: }, +# CHECK-NEXT: "uri": "file:///clangd-test/main.cpp" +# CHECK-NEXT: } +# CHECK-NEXT: ], +# CHECK-NEXT: "data": "A6576FE083F2949A", +# CHECK-NEXT: "kind": 23, +# CHECK-NEXT: "name": "Child3", +# CHECK-NEXT: "range": { +# CHECK-NEXT: "end": { +# CHECK-NEXT: "character": 13, +# CHECK-NEXT: "line": 3 +# CHECK-NEXT: }, +# CHECK-NEXT: "start": { +# CHECK-NEXT: "character": 7, +# CHECK-NEXT: "line": 3 +# CHECK-NEXT: } +# CHECK-NEXT: }, +# CHECK-NEXT: "selectionRange": { +# CHECK-NEXT: "end": { +# CHECK-NEXT: "character": 13, +# CHECK-NEXT: "line": 3 +# CHECK-NEXT: }, +# CHECK-NEXT: "start": { +# CHECK-NEXT: "character": 7, +# CHECK-NEXT: "line": 3 +# CHECK-NEXT: } +# CHECK-NEXT: }, +# CHECK-NEXT: "uri": "file:///clangd-test/main.cpp" +# CHECK-NEXT: } +--- +{"jsonrpc":"2.0","id":3,"method":"shutdown"} --- {"jsonrpc":"2.0","method":"exit"} diff --git a/clang-tools-extra/clangd/unittests/TypeHierarchyTests.cpp b/clang-tools-extra/clangd/unittests/TypeHierarchyTests.cpp index be16646a72036..633a25fe3b442 100644 --- a/clang-tools-extra/clangd/unittests/TypeHierarchyTests.cpp +++ b/clang-tools-extra/clangd/unittests/TypeHierarchyTests.cpp @@ -42,8 +42,17 @@ MATCHER_P(WithKind, Kind, "") { return arg.kind == Kind; } MATCHER_P(SelectionRangeIs, R, "") { return arg.selectionRange == R; } template ::testing::Matcher Parents(ParentMatchers... ParentsM) { - return Field(&TypeHierarchyItem::parents, HasValue(ElementsAre(ParentsM...))); + return Field(&TypeHierarchyItem::parents, + HasValue(UnorderedElementsAre(ParentsM...))); } +template +::testing::Matcher Children(ChildMatchers... ChildrenM) { + return Field(&TypeHierarchyItem::children, + HasValue(UnorderedElementsAre(ChildrenM...))); +} +// Note: "not resolved" is differnt from "resolved but empty"! +MATCHER(ParentsNotResolved, "") { return !arg.parents; } +MATCHER(ChildrenNotResolved, "") { return !arg.children; } TEST(FindRecordTypeAt, TypeOrVariable) { Annotations Source(R"cpp( @@ -603,6 +612,41 @@ struct Child : Parent {}; EXPECT_THAT(collectSubtypes(Parent, Index.get()), ElementsAre(Child)); } +TEST(Subtypes, LazyResolution) { + Annotations Source(R"cpp( +struct P^arent {}; +struct Child1 : Parent {}; +struct Child2a : Child1 {}; +struct Child2b : Child1 {}; +)cpp"); + + TestTU TU = TestTU::withCode(Source.code()); + auto AST = TU.build(); + auto Index = TU.index(); + + llvm::Optional Result = getTypeHierarchy( + AST, Source.point(), /*ResolveLevels=*/1, + TypeHierarchyDirection::Children, Index.get(), testPath(TU.Filename)); + ASSERT_TRUE(bool(Result)); + EXPECT_THAT( + *Result, + AllOf(WithName("Parent"), WithKind(SymbolKind::Struct), Parents(), + Children(AllOf(WithName("Child1"), WithKind(SymbolKind::Struct), + ParentsNotResolved(), ChildrenNotResolved())))); + + resolveTypeHierarchy((*Result->children)[0], /*ResolveLevels=*/1, + TypeHierarchyDirection::Children, Index.get()); + + EXPECT_THAT( + (*Result->children)[0], + AllOf(WithName("Child1"), WithKind(SymbolKind::Struct), + ParentsNotResolved(), + Children(AllOf(WithName("Child2a"), WithKind(SymbolKind::Struct), + ParentsNotResolved(), ChildrenNotResolved()), + AllOf(WithName("Child2b"), WithKind(SymbolKind::Struct), + ParentsNotResolved(), ChildrenNotResolved())))); +} + } // namespace } // namespace clangd } // namespace clang From d1fdadb22685b88d885833f4a21cca5df45fa303 Mon Sep 17 00:00:00 2001 From: Nathan Ridge Date: Sat, 13 Jul 2019 03:24:54 +0000 Subject: [PATCH 024/451] [clangd] Mark type hierarchy as a supported feature in the docs Reviewers: sammccall Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D64614 llvm-svn: 365987 --- clang-tools-extra/docs/clangd/Features.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang-tools-extra/docs/clangd/Features.rst b/clang-tools-extra/docs/clangd/Features.rst index 3e6e745a691c8..87b6713c44ace 100644 --- a/clang-tools-extra/docs/clangd/Features.rst +++ b/clang-tools-extra/docs/clangd/Features.rst @@ -261,7 +261,7 @@ developed outside clangd or become clangd extensions to LSP. +-------------------------------------+------------+----------+ | Call hierarchy | No | No | +-------------------------------------+------------+----------+ -| Type hierarchy | No | No | +| Type hierarchy | No | Yes | +-------------------------------------+------------+----------+ | Organize Includes | No | No | +-------------------------------------+------------+----------+ From 497bb44fc41f2e9c4d3c5ed8f525da01fa476979 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Sat, 13 Jul 2019 03:30:55 +0000 Subject: [PATCH 025/451] Make Python version setting actually effective This needs to be outside the if to actually work. Also, this adjusts the list of versions to match LLVM. Patch by: Christian Biesinger Differential revision: https://reviews.llvm.org/D64578 llvm-svn: 365988 --- lldb/cmake/modules/LLDBStandalone.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/cmake/modules/LLDBStandalone.cmake b/lldb/cmake/modules/LLDBStandalone.cmake index 80075b91b6e40..803f6bda968b2 100644 --- a/lldb/cmake/modules/LLDBStandalone.cmake +++ b/lldb/cmake/modules/LLDBStandalone.cmake @@ -87,8 +87,8 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) include(CheckAtomic) include(LLVMDistributionSupport) + set(Python_ADDITIONAL_VERSIONS 3.7 3.6 3.5 2.7) if (PYTHON_EXECUTABLE STREQUAL "") - set(Python_ADDITIONAL_VERSIONS 3.7 3.6 3.5 3.4 3.3 3.2 3.1 3.0 2.7 2.6 2.5) include(FindPythonInterp) if( NOT PYTHONINTERP_FOUND ) message(FATAL_ERROR From 118ee5f2e06a9972bd9fd171444cc080c03b0b99 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Sat, 13 Jul 2019 03:59:55 +0000 Subject: [PATCH 026/451] Initialize the non-trivial C union bits I added to RecordDeclBitfields in r365985 These bits weren't being initialized in the RecordDecl's constructor, which probably caused test/Modules/stress1.cpp to fail on a couple of bots. llvm-svn: 365989 --- clang/lib/AST/Decl.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 21dd5425834a9..21cf9da18a8b2 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -4252,6 +4252,9 @@ RecordDecl::RecordDecl(Kind DK, TagKind TK, const ASTContext &C, setNonTrivialToPrimitiveDefaultInitialize(false); setNonTrivialToPrimitiveCopy(false); setNonTrivialToPrimitiveDestroy(false); + setHasNonTrivialToPrimitiveDefaultInitializeCUnion(false); + setHasNonTrivialToPrimitiveDestructCUnion(false); + setHasNonTrivialToPrimitiveCopyCUnion(false); setParamDestroyedInCallee(false); setArgPassingRestrictions(APK_CanPassInRegs); } From 1a6053ebc61cb0b8146f5ca27b74859a9a91e0a3 Mon Sep 17 00:00:00 2001 From: Petr Hosek Date: Sat, 13 Jul 2019 05:31:48 +0000 Subject: [PATCH 027/451] Revert "[COFF] Add null check in case of symbols defined in LTO blobs" This reverts commit r365979: COFF/undefined-symbol-lto.test is failing. llvm-svn: 365990 --- lld/COFF/SymbolTable.cpp | 2 +- .../COFF/Inputs/undefined-symbol-lto-a.ll | 82 ------------------- .../COFF/Inputs/undefined-symbol-lto-b.ll | 29 ------- lld/test/COFF/undefined-symbol-lto.test | 30 ------- 4 files changed, 1 insertion(+), 142 deletions(-) delete mode 100644 lld/test/COFF/Inputs/undefined-symbol-lto-a.ll delete mode 100644 lld/test/COFF/Inputs/undefined-symbol-lto-b.ll delete mode 100644 lld/test/COFF/undefined-symbol-lto.test diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp index 2173c10c1ca56..280a9c28892c8 100644 --- a/lld/COFF/SymbolTable.cpp +++ b/lld/COFF/SymbolTable.cpp @@ -69,7 +69,7 @@ static Symbol *getSymbol(SectionChunk *sc, uint32_t addr) { for (Symbol *s : sc->file->getSymbols()) { auto *d = dyn_cast_or_null(s); - if (!d || !d->data || d->getChunk() != sc || d->getValue() > addr || + if (!d || d->getChunk() != sc || d->getValue() > addr || (candidate && d->getValue() < candidate->getValue())) continue; diff --git a/lld/test/COFF/Inputs/undefined-symbol-lto-a.ll b/lld/test/COFF/Inputs/undefined-symbol-lto-a.ll deleted file mode 100644 index 6793ec718e806..0000000000000 --- a/lld/test/COFF/Inputs/undefined-symbol-lto-a.ll +++ /dev/null @@ -1,82 +0,0 @@ -; ModuleID = 't.obj' -source_filename = "t.cpp" -target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-pc-windows-msvc19.21.27702" - -%struct.Init = type { %struct.S } -%struct.S = type { i32 (...)** } -%rtti.CompleteObjectLocator = type { i32, i32, i32, i32, i32, i32 } -%rtti.TypeDescriptor7 = type { i8**, i8*, [8 x i8] } -%rtti.ClassHierarchyDescriptor = type { i32, i32, i32, i32 } -%rtti.BaseClassDescriptor = type { i32, i32, i32, i32, i32, i32, i32 } - -$"??_SS@@6B@" = comdat largest - -$"??_R4S@@6B@" = comdat any - -$"??_R0?AUS@@@8" = comdat any - -$"??_R3S@@8" = comdat any - -$"??_R2S@@8" = comdat any - -$"??_R1A@?0A@EA@S@@8" = comdat any - -@"?d@@3UInit@@A" = dso_local local_unnamed_addr global %struct.Init zeroinitializer, align 8 -@anon.bcb2691509de99310dddb690fcdb4cdc.0 = private unnamed_addr constant { [2 x i8*] } { [2 x i8*] [i8* bitcast (%rtti.CompleteObjectLocator* @"??_R4S@@6B@" to i8*), i8* bitcast (void (%struct.S*)* @"?foo@S@@UEAAXXZ" to i8*)] }, comdat($"??_SS@@6B@"), !type !0 -@"??_R4S@@6B@" = linkonce_odr constant %rtti.CompleteObjectLocator { i32 1, i32 0, i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor7* @"??_R0?AUS@@@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.ClassHierarchyDescriptor* @"??_R3S@@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.CompleteObjectLocator* @"??_R4S@@6B@" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, comdat -@"??_7type_info@@6B@" = external constant i8* -@"??_R0?AUS@@@8" = linkonce_odr global %rtti.TypeDescriptor7 { i8** @"??_7type_info@@6B@", i8* null, [8 x i8] c".?AUS@@\00" }, comdat -@__ImageBase = external dso_local constant i8 -@"??_R3S@@8" = linkonce_odr constant %rtti.ClassHierarchyDescriptor { i32 0, i32 0, i32 1, i32 trunc (i64 sub nuw nsw (i64 ptrtoint ([2 x i32]* @"??_R2S@@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, comdat -@"??_R2S@@8" = linkonce_odr constant [2 x i32] [i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.BaseClassDescriptor* @"??_R1A@?0A@EA@S@@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0], comdat -@"??_R1A@?0A@EA@S@@8" = linkonce_odr constant %rtti.BaseClassDescriptor { i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor7* @"??_R0?AUS@@@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0, i32 0, i32 -1, i32 0, i32 64, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.ClassHierarchyDescriptor* @"??_R3S@@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, comdat -@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_t.cpp, i8* null }] - -@"??_SS@@6B@" = unnamed_addr alias i8*, getelementptr inbounds ({ [2 x i8*] }, { [2 x i8*] }* @anon.bcb2691509de99310dddb690fcdb4cdc.0, i32 0, i32 0, i32 1) - -declare dso_local void @"?undefined_ref@@YAXXZ"() local_unnamed_addr #0 - -declare dllimport void @"?foo@S@@UEAAXXZ"(%struct.S*) unnamed_addr #0 - -; Function Attrs: nounwind sspstrong uwtable -define internal void @_GLOBAL__sub_I_t.cpp() #1 { -entry: - store i32 (...)** bitcast (i8** @"??_SS@@6B@" to i32 (...)**), i32 (...)*** getelementptr inbounds (%struct.Init, %struct.Init* @"?d@@3UInit@@A", i64 0, i32 0, i32 0), align 8 - tail call void @"?undefined_ref@@YAXXZ"() #2 - ret void -} - -attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind sspstrong uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind } - -!llvm.linker.options = !{!1, !2} -!llvm.module.flags = !{!3, !4, !5, !6} -!llvm.ident = !{!7} - -!0 = !{i64 8, !"?AUS@@"} -!1 = !{!"/DEFAULTLIB:libcmt.lib"} -!2 = !{!"/DEFAULTLIB:oldnames.lib"} -!3 = !{i32 1, !"wchar_size", i32 2} -!4 = !{i32 7, !"PIC Level", i32 2} -!5 = !{i32 1, !"ThinLTO", i32 0} -!6 = !{i32 1, !"EnableSplitLTOUnit", i32 0} -!7 = !{!"clang version 9.0.0 (git@github.com:llvm/llvm-project.git 1a285c27fdf6407ceed3398e015d00559f5f533d)"} - -^0 = module: (path: "t.obj", hash: (0, 0, 0, 0, 0)) -^1 = gv: (name: "__ImageBase") ; guid = 434928772013489304 -^2 = gv: (name: "??_R2S@@8", summaries: (variable: (module: ^0, flags: (linkage: linkonce_odr, notEligibleToImport: 1, live: 0, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0), refs: (^1, ^6)))) ; guid = 2160898732728284029 -^3 = gv: (name: "llvm.global_ctors", summaries: (variable: (module: ^0, flags: (linkage: appending, notEligibleToImport: 1, live: 1, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0), refs: (^14)))) ; guid = 2412314959268824392 -^4 = gv: (name: "?foo@S@@UEAAXXZ") ; guid = 6578172636330484861 -^5 = gv: (name: "??_SS@@6B@", summaries: (alias: (module: ^0, flags: (linkage: external, notEligibleToImport: 1, live: 0, dsoLocal: 0, canAutoHide: 0), aliasee: ^10))) ; guid = 8774897714842691026 -^6 = gv: (name: "??_R1A@?0A@EA@S@@8", summaries: (variable: (module: ^0, flags: (linkage: linkonce_odr, notEligibleToImport: 1, live: 0, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0), refs: (^11, ^1, ^8)))) ; guid = 9397802696236423453 -^7 = gv: (name: "?undefined_ref@@YAXXZ") ; guid = 9774674600202276560 -^8 = gv: (name: "??_R3S@@8", summaries: (variable: (module: ^0, flags: (linkage: linkonce_odr, notEligibleToImport: 1, live: 0, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0), refs: (^1, ^2)))) ; guid = 10685958509605791599 -^9 = gv: (name: "??_7type_info@@6B@") ; guid = 10826752452437539368 -^10 = gv: (name: "anon.bcb2691509de99310dddb690fcdb4cdc.0", summaries: (variable: (module: ^0, flags: (linkage: private, notEligibleToImport: 1, live: 0, dsoLocal: 1, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0), vTableFuncs: ((virtFunc: ^4, offset: 8)), refs: (^13, ^4)))) ; guid = 11510395461204283992 -^11 = gv: (name: "??_R0?AUS@@@8", summaries: (variable: (module: ^0, flags: (linkage: linkonce_odr, notEligibleToImport: 1, live: 0, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0), refs: (^9)))) ; guid = 12346607659584231960 -^12 = gv: (name: "?d@@3UInit@@A", summaries: (variable: (module: ^0, flags: (linkage: external, notEligibleToImport: 1, live: 0, dsoLocal: 1, canAutoHide: 0), varFlags: (readonly: 1, writeonly: 1)))) ; guid = 14563354643524156382 -^13 = gv: (name: "??_R4S@@6B@", summaries: (variable: (module: ^0, flags: (linkage: linkonce_odr, notEligibleToImport: 1, live: 0, dsoLocal: 0, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0), refs: (^13, ^11, ^1, ^8)))) ; guid = 14703528065171087394 -^14 = gv: (name: "_GLOBAL__sub_I_t.cpp", summaries: (function: (module: ^0, flags: (linkage: internal, notEligibleToImport: 1, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 3, calls: ((callee: ^7)), refs: (^12, ^5)))) ; guid = 15085897428757412588 -^15 = typeidCompatibleVTable: (name: "?AUS@@", summary: ((offset: 8, ^10))) ; guid = 13986515119763165370 diff --git a/lld/test/COFF/Inputs/undefined-symbol-lto-b.ll b/lld/test/COFF/Inputs/undefined-symbol-lto-b.ll deleted file mode 100644 index ff73e7c6ba680..0000000000000 --- a/lld/test/COFF/Inputs/undefined-symbol-lto-b.ll +++ /dev/null @@ -1,29 +0,0 @@ -; ModuleID = 'b.obj' -source_filename = "b.cpp" -target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-pc-windows-msvc19.21.27702" - -%struct.S = type { i32 (...)** } - -; Function Attrs: norecurse nounwind readnone sspstrong uwtable -define dso_local void @"?foo@S@@UEAAXXZ"(%struct.S* nocapture %this) unnamed_addr #0 align 2 { -entry: - ret void -} - -attributes #0 = { norecurse nounwind readnone sspstrong uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } - -!llvm.linker.options = !{!0, !1} -!llvm.module.flags = !{!2, !3, !4, !5} -!llvm.ident = !{!6} - -!0 = !{!"/DEFAULTLIB:libcmt.lib"} -!1 = !{!"/DEFAULTLIB:oldnames.lib"} -!2 = !{i32 1, !"wchar_size", i32 2} -!3 = !{i32 7, !"PIC Level", i32 2} -!4 = !{i32 1, !"ThinLTO", i32 0} -!5 = !{i32 1, !"EnableSplitLTOUnit", i32 0} -!6 = !{!"clang version 9.0.0 (git@github.com:llvm/llvm-project.git 1a285c27fdf6407ceed3398e015d00559f5f533d)"} - -^0 = module: (path: "b.obj", hash: (0, 0, 0, 0, 0)) -^1 = gv: (name: "?foo@S@@UEAAXXZ", summaries: (function: (module: ^0, flags: (linkage: external, notEligibleToImport: 1, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 1, funcFlags: (readNone: 1, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 0)))) ; guid = 6578172636330484861 diff --git a/lld/test/COFF/undefined-symbol-lto.test b/lld/test/COFF/undefined-symbol-lto.test deleted file mode 100644 index 6911b121122a4..0000000000000 --- a/lld/test/COFF/undefined-symbol-lto.test +++ /dev/null @@ -1,30 +0,0 @@ -RUN: rm -rf %t && mkdir -p %t && cd %t -RUN: llvm-as %S/Inputs/undefined-symbol-lto-a.ll -o t.obj -RUN: llvm-as %S/Inputs/undefined-symbol-lto-b.ll -o b.obj -RUN: llvm-lib b.obj -out:b.lib -RUN: not lld-link t.obj b.lib -subsystem:console 2>&1 | FileCheck %s - -CHECK: undefined symbol: main -CHECK: referenced by -CHECK: undefined symbol: void __cdecl undefined_ref(void) -CHECK: referenced by - -Originally reported as PR42536. - -a.ll corresponds to this C++: - -struct __declspec(dllimport) S { - virtual void foo(); -}; -void undefined_ref(); -struct Init { - Init() { undefined_ref(); } - S c; -} d; - -b.ll is from this C++: - -struct S { - virtual void foo(); -}; -void S::foo() {} From 1447b60eeb2b3026a0c96bef052843a71002d617 Mon Sep 17 00:00:00 2001 From: Michal Gorny Date: Sat, 13 Jul 2019 06:24:14 +0000 Subject: [PATCH 028/451] [lldb] [test] Un-XFAIL TestFormattersSBAPI on NetBSD llvm-svn: 365991 --- .../lldbsuite/test/python_api/formatters/TestFormattersSBAPI.py | 1 - 1 file changed, 1 deletion(-) diff --git a/lldb/packages/Python/lldbsuite/test/python_api/formatters/TestFormattersSBAPI.py b/lldb/packages/Python/lldbsuite/test/python_api/formatters/TestFormattersSBAPI.py index dd12ac198cd35..8548506fdc463 100644 --- a/lldb/packages/Python/lldbsuite/test/python_api/formatters/TestFormattersSBAPI.py +++ b/lldb/packages/Python/lldbsuite/test/python_api/formatters/TestFormattersSBAPI.py @@ -22,7 +22,6 @@ def setUp(self): self.line = line_number('main.cpp', '// Set break point at this line.') @add_test_categories(['pyapi']) - @expectedFailureNetBSD def test_formatters_api(self): """Test Python APIs for working with formatters""" self.build() From 21a92a8a559ba27907290bafd181e490101a4fcb Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sat, 13 Jul 2019 06:27:35 +0000 Subject: [PATCH 029/451] This reverts commit 632a36bfcfc8273c1861f04ff6758d863c47c784. Some targets such as Python 2.7.16 still use VERSION in their builds. Without VERSION defined, the source code has syntax errors. Reverting as it will probably break many other things. Noticed by Sterling Augustine llvm-svn: 365992 --- clang/docs/LanguageExtensions.rst | 2 -- clang/docs/ReleaseNotes.rst | 10 ---------- clang/lib/Basic/Version.cpp | 2 ++ clang/lib/Frontend/InitPreprocessor.cpp | 6 ++++++ clang/test/Index/complete-exprs.c | 2 ++ clang/test/Preprocessor/init.c | 4 ++++ clang/utils/builtin-defines.c | 1 + 7 files changed, 15 insertions(+), 12 deletions(-) diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 61152a251bdba..ecbf04c3c822a 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -324,8 +324,6 @@ option for a warning and returns true if that is a valid warning option. ... #endif -.. _languageextensions-builtin-macros: - Builtin Macros ============== diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 7185030a92d7c..f0a35050dde08 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -56,11 +56,6 @@ Improvements to Clang's diagnostics Non-comprehensive list of changes in this release ------------------------------------------------- -- The ``__VERSION__`` macro has been removed. - Previously this macro was set to a string aiming to achieve compatibility with - GCC 4.2.1, but that should no longer be necessary. To get Clang's version, - use the :ref:`clang namespaced version macros `. - - ... @@ -84,11 +79,6 @@ Modified Compiler Flags - ... -Removed Compiler Options ------------------------- - -- ... - New Pragmas in Clang -------------------- diff --git a/clang/lib/Basic/Version.cpp b/clang/lib/Basic/Version.cpp index 5fd12762b6893..d6564582e7726 100644 --- a/clang/lib/Basic/Version.cpp +++ b/clang/lib/Basic/Version.cpp @@ -136,6 +136,8 @@ std::string getClangToolFullVersion(StringRef ToolName) { } std::string getClangFullCPPVersion() { + // The version string we report in __VERSION__ is just a compacted version of + // the one we report on the command line. std::string buf; llvm::raw_string_ostream OS(buf); #ifdef CLANG_VENDOR diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index a02c266c094a8..1741ba5e5203e 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -604,6 +604,12 @@ static void InitializePredefinedMacros(const TargetInfo &TI, // Support for #pragma redefine_extname (Sun compatibility) Builder.defineMacro("__PRAGMA_REDEFINE_EXTNAME", "1"); + // As sad as it is, enough software depends on the __VERSION__ for version + // checks that it is necessary to report 4.2.1 (the base GCC version we claim + // compatibility with) first. + Builder.defineMacro("__VERSION__", "\"4.2.1 Compatible " + + Twine(getClangFullCPPVersion()) + "\""); + // Initialize language-specific preprocessor defines. // Standard conforming mode? diff --git a/clang/test/Index/complete-exprs.c b/clang/test/Index/complete-exprs.c index 50f5025f1512f..9beb16deef99b 100644 --- a/clang/test/Index/complete-exprs.c +++ b/clang/test/Index/complete-exprs.c @@ -27,6 +27,7 @@ void f5(float f) { // RUN: c-index-test -code-completion-at=%s:7:10 -Xclang -code-completion-patterns %s | FileCheck -check-prefix=CHECK-CC1 %s // RUN: env CINDEXTEST_EDITING=1 CINDEXTEST_COMPLETION_CACHING=1 c-index-test -code-completion-at=%s:7:10 -Xclang -code-completion-patterns %s | FileCheck -check-prefix=CHECK-CC1 %s // CHECK-CC1: NotImplemented:{TypedText __PRETTY_FUNCTION__} (65) +// CHECK-CC1: macro definition:{TypedText __VERSION__} (70) // CHECK-CC1: FunctionDecl:{ResultType int}{TypedText f}{LeftParen (}{Placeholder int}{RightParen )} (12) (unavailable) // CHECK-CC1-NOT: NotImplemented:{TypedText float} (65) // CHECK-CC1: ParmDecl:{ResultType int}{TypedText j} (8) @@ -38,6 +39,7 @@ void f5(float f) { // RUN: c-index-test -code-completion-at=%s:7:18 -Xclang -code-completion-patterns %s | FileCheck -check-prefix=CHECK-CC1 %s // RUN: c-index-test -code-completion-at=%s:7:22 -Xclang -code-completion-patterns %s | FileCheck -check-prefix=CHECK-CC1 %s // RUN: c-index-test -code-completion-at=%s:7:2 -Xclang -code-completion-patterns %s | FileCheck -check-prefix=CHECK-CC2 %s +// CHECK-CC2: macro definition:{TypedText __VERSION__} (70) // CHECK-CC2: FunctionDecl:{ResultType int}{TypedText f}{LeftParen (}{Placeholder int}{RightParen )} (50) // CHECK-CC2: NotImplemented:{TypedText float} (50) // CHECK-CC2: ParmDecl:{ResultType int}{TypedText j} (34) diff --git a/clang/test/Preprocessor/init.c b/clang/test/Preprocessor/init.c index 00a7c7b6bae95..8df3b4bd2ccf4 100644 --- a/clang/test/Preprocessor/init.c +++ b/clang/test/Preprocessor/init.c @@ -101,6 +101,7 @@ // COMMON:#define __ORDER_PDP_ENDIAN__ 3412 // COMMON:#define __STDC_HOSTED__ 1 // COMMON:#define __STDC__ 1 +// COMMON:#define __VERSION__ {{.*}} // COMMON:#define __clang__ 1 // COMMON:#define __clang_major__ {{[0-9]+}} // COMMON:#define __clang_minor__ {{[0-9]+}} @@ -8168,6 +8169,7 @@ // SPARC:#define __UINT_LEAST8_MAX__ 255 // SPARC:#define __UINT_LEAST8_TYPE__ unsigned char // SPARC:#define __USER_LABEL_PREFIX__ +// SPARC:#define __VERSION__ "4.2.1 Compatible{{.*}} // SPARC:#define __WCHAR_MAX__ 2147483647 // SPARC:#define __WCHAR_TYPE__ int // SPARC:#define __WCHAR_WIDTH__ 32 @@ -9039,6 +9041,7 @@ // X86_64-CLOUDABI:#define __UINT_LEAST8_MAX__ 255 // X86_64-CLOUDABI:#define __UINT_LEAST8_TYPE__ unsigned char // X86_64-CLOUDABI:#define __USER_LABEL_PREFIX__ +// X86_64-CLOUDABI:#define __VERSION__ "4.2.1 Compatible{{.*}} // X86_64-CLOUDABI:#define __WCHAR_MAX__ 2147483647 // X86_64-CLOUDABI:#define __WCHAR_TYPE__ int // X86_64-CLOUDABI:#define __WCHAR_WIDTH__ 32 @@ -10040,6 +10043,7 @@ // WEBASSEMBLY-NEXT:#define __UINT_LEAST8_MAX__ 255 // WEBASSEMBLY-NEXT:#define __UINT_LEAST8_TYPE__ unsigned char // WEBASSEMBLY-NEXT:#define __USER_LABEL_PREFIX__ +// WEBASSEMBLY-NEXT:#define __VERSION__ "{{.*}}" // WEBASSEMBLY-NEXT:#define __WCHAR_MAX__ 2147483647 // WEBASSEMBLY-NEXT:#define __WCHAR_TYPE__ int // WEBASSEMBLY-NOT:#define __WCHAR_UNSIGNED__ diff --git a/clang/utils/builtin-defines.c b/clang/utils/builtin-defines.c index 2936d631e61a7..9bbe5be250269 100644 --- a/clang/utils/builtin-defines.c +++ b/clang/utils/builtin-defines.c @@ -49,6 +49,7 @@ RUN: done; #undef __INT8_TYPE__ #undef __SSP__ #undef __APPLE_CC__ +#undef __VERSION__ #undef __clang__ #undef __llvm__ #undef __nocona From 36fbd0da5fb7ac70146d2118165556d4af19fd8b Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sat, 13 Jul 2019 07:23:12 +0000 Subject: [PATCH 030/451] Simplify with llvm::is_contained. NFC llvm-svn: 365993 --- .../clang-tidy/bugprone/AssertSideEffectCheck.cpp | 3 +-- .../bugprone/ForwardingReferenceOverloadCheck.cpp | 2 +- clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp | 7 ++----- clang-tools-extra/modularize/Modularize.cpp | 2 +- 4 files changed, 5 insertions(+), 9 deletions(-) diff --git a/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.cpp index a28ef1138e539..4e3f76544dc8b 100644 --- a/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.cpp @@ -108,8 +108,7 @@ void AssertSideEffectCheck::check(const MatchFinder::MatchResult &Result) { StringRef MacroName = Lexer::getImmediateMacroName(Loc, SM, LangOpts); // Check if this macro is an assert. - if (std::find(AssertMacros.begin(), AssertMacros.end(), MacroName) != - AssertMacros.end()) { + if (llvm::is_contained(AssertMacros, MacroName)) { AssertMacroName = MacroName; break; } diff --git a/clang-tools-extra/clang-tidy/bugprone/ForwardingReferenceOverloadCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/ForwardingReferenceOverloadCheck.cpp index 57055ff2191e8..2773d38420be3 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ForwardingReferenceOverloadCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/ForwardingReferenceOverloadCheck.cpp @@ -105,7 +105,7 @@ void ForwardingReferenceOverloadCheck::check( // template as the function parameter of that type. (This implies that type // deduction will happen on the type.) const TemplateParameterList *Params = FuncTemplate->getTemplateParameters(); - if (std::find(Params->begin(), Params->end(), TypeParmDecl) == Params->end()) + if (!llvm::is_contained(*Params, TypeParmDecl)) return; // Every parameter after the first must have a default value. diff --git a/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp index 45e59c3ec51ab..d0a95d95ec4b4 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.cpp @@ -242,10 +242,8 @@ class CastSequenceVisitor : public RecursiveASTVisitor { getOutermostMacroName(StartLoc, SM, Context.getLangOpts()); // Check to see if the user wants to replace the macro being expanded. - if (std::find(NullMacros.begin(), NullMacros.end(), OutermostMacroName) == - NullMacros.end()) { + if (!llvm::is_contained(NullMacros, OutermostMacroName)) return skipSubTree(); - } StartLoc = SM.getFileLoc(StartLoc); EndLoc = SM.getFileLoc(EndLoc); @@ -327,8 +325,7 @@ class CastSequenceVisitor : public RecursiveASTVisitor { StringRef Name = Lexer::getImmediateMacroName(OldArgLoc, SM, Context.getLangOpts()); - return std::find(NullMacros.begin(), NullMacros.end(), Name) != - NullMacros.end(); + return llvm::is_contained(NullMacros, Name); } MacroLoc = SM.getExpansionRange(ArgLoc).getBegin(); diff --git a/clang-tools-extra/modularize/Modularize.cpp b/clang-tools-extra/modularize/Modularize.cpp index 59fc5c351ff27..866356d055b62 100644 --- a/clang-tools-extra/modularize/Modularize.cpp +++ b/clang-tools-extra/modularize/Modularize.cpp @@ -369,7 +369,7 @@ getModularizeArgumentsAdjuster(DependencyMap &Dependencies) { // Ignore warnings. (Insert after "clang_tool" at beginning.) NewArgs.insert(NewArgs.begin() + 1, "-w"); // Since we are compiling .h files, assume C++ unless given a -x option. - if (std::find(NewArgs.begin(), NewArgs.end(), "-x") == NewArgs.end()) { + if (!llvm::is_contained(NewArgs, "-x")) { NewArgs.insert(NewArgs.begin() + 2, "-x"); NewArgs.insert(NewArgs.begin() + 3, "c++"); } From 20d34eacf3f118d1e94165beaf3da24a5c1e8011 Mon Sep 17 00:00:00 2001 From: Petr Hosek Date: Sat, 13 Jul 2019 08:07:10 +0000 Subject: [PATCH 031/451] [CMake][Fuchsia] Define asan+noexcept multilib Using noexcept multilib with -fno-exceptions can lead to significant space savings when statically linking libc++abi because we don't need all the unwinding and demangling code. When compiling with ASan, we already get a lot of overhead from the instrumentation itself, when statically linking libc++abi, that overhead is even larger. Having the noexcept variant for ASan can help significantly, we've seen more than 50% size reduction in our system image, which offsets the cost of having to build another multilib. Differential Revision: https://reviews.llvm.org/D64140 llvm-svn: 365994 --- clang/cmake/caches/Fuchsia-stage2.cmake | 10 +++++++++- clang/lib/Driver/ToolChains/Fuchsia.cpp | 5 +++++ .../lib/aarch64-fuchsia/c++/asan+noexcept/libc++.so | 0 .../lib/x86_64-fuchsia/c++/asan+noexcept/libc++.so | 0 clang/test/Driver/fuchsia.cpp | 3 ++- 5 files changed, 16 insertions(+), 2 deletions(-) create mode 100644 clang/test/Driver/Inputs/basic_fuchsia_tree/lib/aarch64-fuchsia/c++/asan+noexcept/libc++.so create mode 100644 clang/test/Driver/Inputs/basic_fuchsia_tree/lib/x86_64-fuchsia/c++/asan+noexcept/libc++.so diff --git a/clang/cmake/caches/Fuchsia-stage2.cmake b/clang/cmake/caches/Fuchsia-stage2.cmake index e93b6e4f07ad8..1f8a9e78763fa 100644 --- a/clang/cmake/caches/Fuchsia-stage2.cmake +++ b/clang/cmake/caches/Fuchsia-stage2.cmake @@ -153,13 +153,21 @@ if(FUCHSIA_SDK) set(RUNTIMES_${target}-unknown-fuchsia+noexcept_LIBCXXABI_ENABLE_EXCEPTIONS OFF CACHE BOOL "") set(RUNTIMES_${target}-unknown-fuchsia+noexcept_LIBCXX_ENABLE_EXCEPTIONS OFF CACHE BOOL "") + set(RUNTIMES_${target}-unknown-fuchsia+asan+noexcept_LLVM_BUILD_COMPILER_RT OFF CACHE BOOL "") + set(RUNTIMES_${target}-unknown-fuchsia+asan+noexcept_LLVM_USE_SANITIZER "Address" CACHE STRING "") + set(RUNTIMES_${target}-unknown-fuchsia+asan+noexcept_LIBCXXABI_ENABLE_NEW_DELETE_DEFINITIONS OFF CACHE BOOL "") + set(RUNTIMES_${target}-unknown-fuchsia+asan+noexcept_LIBCXX_ENABLE_NEW_DELETE_DEFINITIONS OFF CACHE BOOL "") + set(RUNTIMES_${target}-unknown-fuchsia+asan+noexcept_LIBCXXABI_ENABLE_EXCEPTIONS OFF CACHE BOOL "") + set(RUNTIMES_${target}-unknown-fuchsia+asan+noexcept_LIBCXX_ENABLE_EXCEPTIONS OFF CACHE BOOL "") + # Use .build-id link. list(APPEND RUNTIME_BUILD_ID_LINK "${target}-unknown-fuchsia") endforeach() - set(LLVM_RUNTIME_MULTILIBS "asan;noexcept" CACHE STRING "") + set(LLVM_RUNTIME_MULTILIBS "asan;noexcept;asan+noexcept" CACHE STRING "") set(LLVM_RUNTIME_MULTILIB_asan_TARGETS "x86_64-unknown-fuchsia;aarch64-unknown-fuchsia" CACHE STRING "") set(LLVM_RUNTIME_MULTILIB_noexcept_TARGETS "x86_64-unknown-fuchsia;aarch64-unknown-fuchsia" CACHE STRING "") + set(LLVM_RUNTIME_MULTILIB_asan+noexcept_TARGETS "x86_64-unknown-fuchsia;aarch64-unknown-fuchsia" CACHE STRING "") endif() set(LLVM_BUILTIN_TARGETS "${BUILTIN_TARGETS}" CACHE STRING "") diff --git a/clang/lib/Driver/ToolChains/Fuchsia.cpp b/clang/lib/Driver/ToolChains/Fuchsia.cpp index 2344a69adb962..1f5ec9ebb16d5 100644 --- a/clang/lib/Driver/ToolChains/Fuchsia.cpp +++ b/clang/lib/Driver/ToolChains/Fuchsia.cpp @@ -192,6 +192,11 @@ Fuchsia::Fuchsia(const Driver &D, const llvm::Triple &Triple, // ASan has higher priority because we always want the instrumentated version. Multilibs.push_back(Multilib("asan", {}, {}, 2) .flag("+fsanitize=address")); + // Use the asan+noexcept variant with ASan and -fno-exceptions. + Multilibs.push_back(Multilib("asan+noexcept", {}, {}, 3) + .flag("+fsanitize=address") + .flag("-fexceptions") + .flag("+fno-exceptions")); Multilibs.FilterOut([&](const Multilib &M) { std::vector RD = FilePaths(M); return std::all_of(RD.begin(), RD.end(), [&](std::string P) { diff --git a/clang/test/Driver/Inputs/basic_fuchsia_tree/lib/aarch64-fuchsia/c++/asan+noexcept/libc++.so b/clang/test/Driver/Inputs/basic_fuchsia_tree/lib/aarch64-fuchsia/c++/asan+noexcept/libc++.so new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/basic_fuchsia_tree/lib/x86_64-fuchsia/c++/asan+noexcept/libc++.so b/clang/test/Driver/Inputs/basic_fuchsia_tree/lib/x86_64-fuchsia/c++/asan+noexcept/libc++.so new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/fuchsia.cpp b/clang/test/Driver/fuchsia.cpp index 823ded4b91544..a5297e76964e4 100644 --- a/clang/test/Driver/fuchsia.cpp +++ b/clang/test/Driver/fuchsia.cpp @@ -70,8 +70,9 @@ // RUN: -ccc-install-dir %S/Inputs/basic_fuchsia_tree/bin \ // RUN: -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \ // RUN: -fuse-ld=lld 2>&1\ -// RUN: | FileCheck %s -check-prefixes=CHECK-MULTILIB-X86,CHECK-MULTILIB-ASAN-X86 +// RUN: | FileCheck %s -check-prefixes=CHECK-MULTILIB-X86,CHECK-MULTILIB-ASAN-NOEXCEPT-X86 // CHECK-MULTILIB-X86: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" // CHECK-MULTILIB-ASAN-X86: "-L{{.*}}{{/|\\\\}}..{{/|\\\\}}lib{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}c++{{/|\\\\}}asan" // CHECK-MULTILIB-NOEXCEPT-X86: "-L{{.*}}{{/|\\\\}}..{{/|\\\\}}lib{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}c++{{/|\\\\}}noexcept" +// CHECK-MULTILIB-ASAN-NOEXCEPT-X86: "-L{{.*}}{{/|\\\\}}..{{/|\\\\}}lib{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}c++{{/|\\\\}}asan+noexcept" // CHECK-MULTILIB-X86: "-L{{.*}}{{/|\\\\}}..{{/|\\\\}}lib{{/|\\\\}}x86_64-fuchsia{{/|\\\\}}c++" From f1d865398b1cf5c082486fe51b52e0b41986640b Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 13 Jul 2019 08:08:43 +0000 Subject: [PATCH 032/451] Fix -Wdocumentation warning. NFCI. llvm-svn: 365995 --- clang/include/clang/DirectoryWatcher/DirectoryWatcher.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/clang/include/clang/DirectoryWatcher/DirectoryWatcher.h b/clang/include/clang/DirectoryWatcher/DirectoryWatcher.h index 0bf966bb832ab..e74443e0bc81c 100644 --- a/clang/include/clang/DirectoryWatcher/DirectoryWatcher.h +++ b/clang/include/clang/DirectoryWatcher/DirectoryWatcher.h @@ -98,8 +98,7 @@ class DirectoryWatcher { : Kind(Kind), Filename(Filename) {} }; - /// Returns nullptr if \param Path doesn't exist. - /// Returns nullptr if \param Path isn't a directory. + /// Returns nullptr if \param Path doesn't exist or isn't a directory. /// Returns nullptr if OS kernel API told us we can't start watching. In such /// case it's unclear whether just retrying has any chance to succeeed. static std::unique_ptr From 16ac7a5a27c7c3a668c67c776f9230de409bf004 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sat, 13 Jul 2019 09:23:35 +0000 Subject: [PATCH 033/451] [Object] isNotObjectErrorInvalidFileType: fix use-after-move llvm-svn: 365996 --- llvm/lib/Object/Error.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Object/Error.cpp b/llvm/lib/Object/Error.cpp index f2a009000c58d..ab10d23036a26 100644 --- a/llvm/lib/Object/Error.cpp +++ b/llvm/lib/Object/Error.cpp @@ -91,5 +91,5 @@ llvm::Error llvm::object::isNotObjectErrorInvalidFileType(llvm::Error Err) { return Error(std::move(M)); })) return Err2; - return Err; + return Error::success(); } From 327db23b6642499fab917014a4c9934c1649e120 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sat, 13 Jul 2019 09:28:33 +0000 Subject: [PATCH 034/451] [Object] isNotObjectErrorInvalidFileType: simplify llvm-svn: 365997 --- llvm/lib/Object/Error.cpp | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Object/Error.cpp b/llvm/lib/Object/Error.cpp index ab10d23036a26..010c5b42dac25 100644 --- a/llvm/lib/Object/Error.cpp +++ b/llvm/lib/Object/Error.cpp @@ -78,18 +78,15 @@ const std::error_category &object::object_category() { } llvm::Error llvm::object::isNotObjectErrorInvalidFileType(llvm::Error Err) { - if (auto Err2 = - handleErrors(std::move(Err), [](std::unique_ptr M) -> Error { - // Try to handle 'M'. If successful, return a success value from - // the handler. - if (M->convertToErrorCode() == object_error::invalid_file_type) - return Error::success(); + return handleErrors(std::move(Err), [](std::unique_ptr M) -> Error { + // Try to handle 'M'. If successful, return a success value from + // the handler. + if (M->convertToErrorCode() == object_error::invalid_file_type) + return Error::success(); - // We failed to handle 'M' - return it from the handler. - // This value will be passed back from catchErrors and - // wind up in Err2, where it will be returned from this function. - return Error(std::move(M)); - })) - return Err2; - return Error::success(); + // We failed to handle 'M' - return it from the handler. + // This value will be passed back from catchErrors and + // wind up in Err2, where it will be returned from this function. + return Error(std::move(M)); + }); } From 2097f75eabb94c7eafcfba9cbfd6b60f08a4ded6 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sat, 13 Jul 2019 12:04:52 +0000 Subject: [PATCH 035/451] [x86] simplify cmov with same true/false operands llvm-svn: 365998 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 4 ++++ llvm/test/CodeGen/X86/combine-sbb.ll | 3 +-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 6b152fe9d7ac1..e0bcf70248948 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -36783,6 +36783,10 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG, X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2); SDValue Cond = N->getOperand(3); + // cmov X, X, ?, ? --> X + if (TrueOp == FalseOp) + return TrueOp; + // Try to simplify the EFLAGS and condition code operands. // We can't always do this as FCMOV only supports a subset of X86 cond. if (SDValue Flags = combineSetCCEFLAGS(Cond, CC, DAG, Subtarget)) { diff --git a/llvm/test/CodeGen/X86/combine-sbb.ll b/llvm/test/CodeGen/X86/combine-sbb.ll index f9ac10755aaf0..9e68ab4beb16b 100644 --- a/llvm/test/CodeGen/X86/combine-sbb.ll +++ b/llvm/test/CodeGen/X86/combine-sbb.ll @@ -291,9 +291,8 @@ define i32 @PR40483_sub5(i32*, i32) { ; ; X64-LABEL: PR40483_sub5: ; X64: # %bb.0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: subl %esi, (%rdi) -; X64-NEXT: cmovael %eax, %eax +; X64-NEXT: xorl %eax, %eax ; X64-NEXT: retq %3 = load i32, i32* %0, align 8 %4 = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 %3, i32 %1) From 0f6148df23edcd3081f5e761de19edd4f823f16d Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sat, 13 Jul 2019 12:54:48 +0000 Subject: [PATCH 036/451] [InstCombine] add tests for umin/umax via usub.sat; NFC llvm-svn: 365999 --- .../InstCombine/saturating-add-sub.ll | 72 +++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll index 364c80d205f2d..56e10626104cf 100644 --- a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll +++ b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll @@ -1614,3 +1614,75 @@ define i32 @unsigned_sat_constant_using_min_wrong_constant(i32 %x) { %r = add i32 %s, -42 ret i32 %r } + +define i8 @umax(i8 %a, i8 %b) { +; CHECK-LABEL: @umax( +; CHECK-NEXT: [[USUB:%.*]] = tail call i8 @llvm.usub.sat.i8(i8 [[A:%.*]], i8 [[B:%.*]]) +; CHECK-NEXT: [[R:%.*]] = add i8 [[USUB]], [[B]] +; CHECK-NEXT: ret i8 [[R]] +; + %usub = tail call i8 @llvm.usub.sat.i8(i8 %a, i8 %b) + %r = add i8 %usub, %b + ret i8 %r +} + +define <2 x i8> @umax_vec(<2 x i8> %a, <2 x i8> %b) { +; CHECK-LABEL: @umax_vec( +; CHECK-NEXT: [[USUB:%.*]] = tail call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]]) +; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[USUB]], [[B]] +; CHECK-NEXT: ret <2 x i8> [[R]] +; + %usub = tail call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %a, <2 x i8> %b) + %r = add <2 x i8> %usub, %b + ret <2 x i8> %r +} + +define i8 @umax_extra_use(i8 %a, i8 %b) { +; CHECK-LABEL: @umax_extra_use( +; CHECK-NEXT: [[USUB:%.*]] = tail call i8 @llvm.usub.sat.i8(i8 [[A:%.*]], i8 [[B:%.*]]) +; CHECK-NEXT: call void @use(i8 [[USUB]]) +; CHECK-NEXT: [[R:%.*]] = add i8 [[USUB]], [[B]] +; CHECK-NEXT: ret i8 [[R]] +; + %usub = tail call i8 @llvm.usub.sat.i8(i8 %a, i8 %b) + call void @use(i8 %usub) + %r = add i8 %usub, %b + ret i8 %r +} + +define i8 @umin(i8 %a, i8 %b) { +; CHECK-LABEL: @umin( +; CHECK-NEXT: [[USUB:%.*]] = tail call i8 @llvm.usub.sat.i8(i8 [[A:%.*]], i8 [[B:%.*]]) +; CHECK-NEXT: [[R:%.*]] = sub i8 [[A]], [[USUB]] +; CHECK-NEXT: ret i8 [[R]] +; + %usub = tail call i8 @llvm.usub.sat.i8(i8 %a, i8 %b) + %r = sub i8 %a, %usub + ret i8 %r +} + +define <2 x i8> @umin_vec(<2 x i8> %a, <2 x i8> %b) { +; CHECK-LABEL: @umin_vec( +; CHECK-NEXT: [[USUB:%.*]] = tail call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]]) +; CHECK-NEXT: [[R:%.*]] = sub <2 x i8> [[A]], [[USUB]] +; CHECK-NEXT: ret <2 x i8> [[R]] +; + %usub = tail call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %a, <2 x i8> %b) + %r = sub <2 x i8> %a, %usub + ret <2 x i8> %r +} + +define i8 @umin_extra_use(i8 %a, i8 %b) { +; CHECK-LABEL: @umin_extra_use( +; CHECK-NEXT: [[USUB:%.*]] = tail call i8 @llvm.usub.sat.i8(i8 [[A:%.*]], i8 [[B:%.*]]) +; CHECK-NEXT: call void @use(i8 [[USUB]]) +; CHECK-NEXT: [[R:%.*]] = sub i8 [[A]], [[USUB]] +; CHECK-NEXT: ret i8 [[R]] +; + %usub = tail call i8 @llvm.usub.sat.i8(i8 %a, i8 %b) + call void @use(i8 %usub) + %r = sub i8 %a, %usub + ret i8 %r +} + +declare void @use(i8) From 22cc1030f6a9afd14cc48ec0b935ebe8678c0c2e Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sat, 13 Jul 2019 13:16:46 +0000 Subject: [PATCH 037/451] Revert "[InstCombine] add tests for umin/umax via usub.sat; NFC" This reverts commit rL365999 / 0f6148df23edcd3081f5e761de19edd4f823f16d. The tests already exist in this file, and the hoped-for transform (mentioned in D62871) is invalid because of undef as discussed in D63060. llvm-svn: 366000 --- .../InstCombine/saturating-add-sub.ll | 72 ------------------- 1 file changed, 72 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll index 56e10626104cf..364c80d205f2d 100644 --- a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll +++ b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll @@ -1614,75 +1614,3 @@ define i32 @unsigned_sat_constant_using_min_wrong_constant(i32 %x) { %r = add i32 %s, -42 ret i32 %r } - -define i8 @umax(i8 %a, i8 %b) { -; CHECK-LABEL: @umax( -; CHECK-NEXT: [[USUB:%.*]] = tail call i8 @llvm.usub.sat.i8(i8 [[A:%.*]], i8 [[B:%.*]]) -; CHECK-NEXT: [[R:%.*]] = add i8 [[USUB]], [[B]] -; CHECK-NEXT: ret i8 [[R]] -; - %usub = tail call i8 @llvm.usub.sat.i8(i8 %a, i8 %b) - %r = add i8 %usub, %b - ret i8 %r -} - -define <2 x i8> @umax_vec(<2 x i8> %a, <2 x i8> %b) { -; CHECK-LABEL: @umax_vec( -; CHECK-NEXT: [[USUB:%.*]] = tail call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]]) -; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[USUB]], [[B]] -; CHECK-NEXT: ret <2 x i8> [[R]] -; - %usub = tail call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %a, <2 x i8> %b) - %r = add <2 x i8> %usub, %b - ret <2 x i8> %r -} - -define i8 @umax_extra_use(i8 %a, i8 %b) { -; CHECK-LABEL: @umax_extra_use( -; CHECK-NEXT: [[USUB:%.*]] = tail call i8 @llvm.usub.sat.i8(i8 [[A:%.*]], i8 [[B:%.*]]) -; CHECK-NEXT: call void @use(i8 [[USUB]]) -; CHECK-NEXT: [[R:%.*]] = add i8 [[USUB]], [[B]] -; CHECK-NEXT: ret i8 [[R]] -; - %usub = tail call i8 @llvm.usub.sat.i8(i8 %a, i8 %b) - call void @use(i8 %usub) - %r = add i8 %usub, %b - ret i8 %r -} - -define i8 @umin(i8 %a, i8 %b) { -; CHECK-LABEL: @umin( -; CHECK-NEXT: [[USUB:%.*]] = tail call i8 @llvm.usub.sat.i8(i8 [[A:%.*]], i8 [[B:%.*]]) -; CHECK-NEXT: [[R:%.*]] = sub i8 [[A]], [[USUB]] -; CHECK-NEXT: ret i8 [[R]] -; - %usub = tail call i8 @llvm.usub.sat.i8(i8 %a, i8 %b) - %r = sub i8 %a, %usub - ret i8 %r -} - -define <2 x i8> @umin_vec(<2 x i8> %a, <2 x i8> %b) { -; CHECK-LABEL: @umin_vec( -; CHECK-NEXT: [[USUB:%.*]] = tail call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]]) -; CHECK-NEXT: [[R:%.*]] = sub <2 x i8> [[A]], [[USUB]] -; CHECK-NEXT: ret <2 x i8> [[R]] -; - %usub = tail call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %a, <2 x i8> %b) - %r = sub <2 x i8> %a, %usub - ret <2 x i8> %r -} - -define i8 @umin_extra_use(i8 %a, i8 %b) { -; CHECK-LABEL: @umin_extra_use( -; CHECK-NEXT: [[USUB:%.*]] = tail call i8 @llvm.usub.sat.i8(i8 [[A:%.*]], i8 [[B:%.*]]) -; CHECK-NEXT: call void @use(i8 [[USUB]]) -; CHECK-NEXT: [[R:%.*]] = sub i8 [[A]], [[USUB]] -; CHECK-NEXT: ret i8 [[R]] -; - %usub = tail call i8 @llvm.usub.sat.i8(i8 %a, i8 %b) - call void @use(i8 %usub) - %r = sub i8 %a, %usub - ret i8 %r -} - -declare void @use(i8) From 2a7f5204602938ae89b0860e9412603d1951d945 Mon Sep 17 00:00:00 2001 From: Thomas Preud'homme Date: Sat, 13 Jul 2019 13:24:30 +0000 Subject: [PATCH 038/451] FileCheck [7/12]: Arbitrary long numeric expressions Summary: This patch is part of a patch series to add support for FileCheck numeric expressions. This specific patch extend numeric expression to support an arbitrary number of operands, either variable or literals. Copyright: - Linaro (changes up to diff 183612 of revision D55940) - GraphCore (changes in later versions of revision D55940 and in new revision created off D55940) Reviewers: jhenderson, chandlerc, jdenny, probinson, grimar, arichardson, rnk Subscribers: hiraditya, llvm-commits, probinson, dblaikie, grimar, arichardson, tra, rnk, kristina, hfinkel, rogfer01, JonChesterfield Tags: #llvm Differential Revision: https://reviews.llvm.org/D60387 llvm-svn: 366001 --- llvm/docs/CommandGuide/FileCheck.rst | 25 +- llvm/include/llvm/Support/FileCheck.h | 208 ++++++++++------ llvm/lib/Support/FileCheck.cpp | 208 +++++++++------- llvm/test/FileCheck/line-count.txt | 2 +- llvm/test/FileCheck/numeric-expression.txt | 20 +- llvm/test/FileCheck/var-scope.txt | 4 +- llvm/unittests/Support/FileCheckTest.cpp | 273 ++++++++++++--------- 7 files changed, 446 insertions(+), 294 deletions(-) diff --git a/llvm/docs/CommandGuide/FileCheck.rst b/llvm/docs/CommandGuide/FileCheck.rst index a424606d4ce0a..0aa2d89fbcf08 100644 --- a/llvm/docs/CommandGuide/FileCheck.rst +++ b/llvm/docs/CommandGuide/FileCheck.rst @@ -107,10 +107,12 @@ and from the command line. Sets a filecheck pattern variable ``VAR`` with value ``VALUE`` that can be used in ``CHECK:`` lines. -.. option:: -D#= +.. option:: -D#= - Sets a filecheck numeric variable ``NUMVAR`` to ```` that can be used - in ``CHECK:`` lines. + Sets a filecheck numeric variable ``NUMVAR`` to the result of evaluating + ```` that can be used in ``CHECK:`` lines. See section + ``FileCheck Numeric Variables and Expressions`` for details on the format + and meaning of ````. .. option:: -version @@ -590,18 +592,15 @@ For example: would match ``mov r5, 42`` and set ``REG`` to the value ``5``. -The syntax of a numeric substitution is ``[[#]]`` where: +The syntax of a numeric substitution is ``[[#]]`` where ```` is an +expression. An expression is recursively defined as: -* ```` is the name of a defined numeric variable. +* a numeric operand, or +* an expression followed by an operator and a numeric operand. -* ```` is an optional operation to perform on the value of ````. - Currently supported operations are ``+`` and ``-``. - -* ```` is the immediate value that constitutes the second operand of - the operation ````. It must be present if ```` is present, absent - otherwise. - -Spaces are accepted before, after and between any of these elements. +A numeric operand is a previously defined numeric variable, or an integer +literal. The supported operators are ``+`` and ``-``. Spaces are accepted +before, after and between any of these elements. For example: diff --git a/llvm/include/llvm/Support/FileCheck.h b/llvm/include/llvm/Support/FileCheck.h index b3a8433b54e65..caff50b0ca466 100644 --- a/llvm/include/llvm/Support/FileCheck.h +++ b/llvm/include/llvm/Support/FileCheck.h @@ -40,6 +40,54 @@ struct FileCheckRequest { // Numeric substitution handling code. //===----------------------------------------------------------------------===// +/// Base class representing the AST of a given expression. +class FileCheckExpressionAST { +public: + virtual ~FileCheckExpressionAST() = default; + + /// Evaluates and \returns the value of the expression represented by this + /// AST or an error if evaluation fails. + virtual Expected eval() const = 0; +}; + +/// Class representing an unsigned literal in the AST of an expression. +class FileCheckExpressionLiteral : public FileCheckExpressionAST { +private: + /// Actual value of the literal. + uint64_t Value; + +public: + /// Constructs a literal with the specified value. + FileCheckExpressionLiteral(uint64_t Val) : Value(Val) {} + + /// \returns the literal's value. + Expected eval() const { return Value; } +}; + +/// Class to represent an undefined variable error, which quotes that +/// variable's name when printed. +class FileCheckUndefVarError : public ErrorInfo { +private: + StringRef VarName; + +public: + static char ID; + + FileCheckUndefVarError(StringRef VarName) : VarName(VarName) {} + + StringRef getVarName() const { return VarName; } + + std::error_code convertToErrorCode() const override { + return inconvertibleErrorCode(); + } + + /// Print name of variable associated with this error. + void log(raw_ostream &OS) const override { + OS << "\""; + OS.write_escaped(VarName) << "\""; + } +}; + /// Class representing a numeric variable and its associated current value. class FileCheckNumericVariable { private: @@ -81,56 +129,53 @@ class FileCheckNumericVariable { size_t getDefLineNumber() { return DefLineNumber; } }; -/// Type of functions evaluating a given binary operation. -using binop_eval_t = uint64_t (*)(uint64_t, uint64_t); - -/// Class to represent an undefined variable error which prints that variable's -/// name between quotes when printed. -class FileCheckUndefVarError : public ErrorInfo { +/// Class representing the use of a numeric variable in the AST of an +/// expression. +class FileCheckNumericVariableUse : public FileCheckExpressionAST { private: - StringRef VarName; - -public: - static char ID; - - FileCheckUndefVarError(StringRef VarName) : VarName(VarName) {} + /// Name of the numeric variable. + StringRef Name; - StringRef getVarName() const { return VarName; } + /// Pointer to the class instance for the variable this use is about. + FileCheckNumericVariable *NumericVariable; - std::error_code convertToErrorCode() const override { - return inconvertibleErrorCode(); - } +public: + FileCheckNumericVariableUse(StringRef Name, + FileCheckNumericVariable *NumericVariable) + : Name(Name), NumericVariable(NumericVariable) {} - /// Print name of variable associated with this error. - void log(raw_ostream &OS) const override { - OS << "\""; - OS.write_escaped(VarName) << "\""; - } + /// \returns the value of the variable referenced by this instance. + Expected eval() const; }; -/// Class representing an expression consisting of either a single numeric -/// variable or a binary operation between a numeric variable and an -/// immediate. -class FileCheckExpression { +/// Type of functions evaluating a given binary operation. +using binop_eval_t = uint64_t (*)(uint64_t, uint64_t); + +/// Class representing a single binary operation in the AST of an expression. +class FileCheckASTBinop : public FileCheckExpressionAST { private: /// Left operand. - FileCheckNumericVariable *LeftOp; + std::unique_ptr LeftOperand; /// Right operand. - uint64_t RightOp; + std::unique_ptr RightOperand; /// Pointer to function that can evaluate this binary operation. binop_eval_t EvalBinop; public: - FileCheckExpression(binop_eval_t EvalBinop, - FileCheckNumericVariable *OperandLeft, - uint64_t OperandRight) - : LeftOp(OperandLeft), RightOp(OperandRight), EvalBinop(EvalBinop) {} - - /// Evaluates the value of this expression, using EvalBinop to perform the - /// binary operation it consists of. \returns an error if the numeric - /// variable used is undefined, or the expression value otherwise. + FileCheckASTBinop(binop_eval_t EvalBinop, + std::unique_ptr LeftOp, + std::unique_ptr RightOp) + : EvalBinop(EvalBinop) { + LeftOperand = std::move(LeftOp); + RightOperand = std::move(RightOp); + } + + /// Evaluates the value of the binary operation represented by this AST, + /// using EvalBinop on the result of recursively evaluating the operands. + /// \returns the expression value or an error if an undefined numeric + /// variable is used in one of the operands. Expected eval() const; }; @@ -187,15 +232,15 @@ class FileCheckNumericSubstitution : public FileCheckSubstitution { private: /// Pointer to the class representing the expression whose value is to be /// substituted. - FileCheckExpression *Expression; + std::unique_ptr ExpressionAST; public: - FileCheckNumericSubstitution(FileCheckPatternContext *Context, - StringRef ExpressionStr, - FileCheckExpression *Expression, + FileCheckNumericSubstitution(FileCheckPatternContext *Context, StringRef Expr, + std::unique_ptr ExprAST, size_t InsertIdx) - : FileCheckSubstitution(Context, ExpressionStr, InsertIdx), - Expression(Expression) {} + : FileCheckSubstitution(Context, Expr, InsertIdx) { + ExpressionAST = std::move(ExprAST); + } /// \returns a string containing the result of evaluating the expression in /// this substitution, or an error if evaluation failed. @@ -278,10 +323,6 @@ class FileCheckPatternContext { /// easily updating its value. FileCheckNumericVariable *LineVariable = nullptr; - /// Vector holding pointers to all parsed expressions. Used to automatically - /// free the expressions once they are guaranteed to no longer be used. - std::vector> Expressions; - /// Vector holding pointers to all parsed numeric variables. Used to /// automatically free them once they are guaranteed to no longer be used. std::vector> NumericVariables; @@ -313,12 +354,6 @@ class FileCheckPatternContext { void clearLocalVars(); private: - /// Makes a new expression instance and registers it for destruction when - /// the context is destroyed. - FileCheckExpression *makeExpression(binop_eval_t EvalBinop, - FileCheckNumericVariable *OperandLeft, - uint64_t OperandRight); - /// Makes a new numeric variable and registers it for destruction when the /// context is destroyed. template @@ -333,7 +368,8 @@ class FileCheckPatternContext { /// the context is destroyed. FileCheckSubstitution * makeNumericSubstitution(StringRef ExpressionStr, - FileCheckExpression *Expression, size_t InsertIdx); + std::unique_ptr ExpressionAST, + size_t InsertIdx); }; /// Class to represent an error holding a diagnostic with location information @@ -458,13 +494,20 @@ class FileCheckPattern { /// \returns whether \p C is a valid first character for a variable name. static bool isValidVarNameStart(char C); + + /// Parsing information about a variable. + struct VariableProperties { + StringRef Name; + bool IsPseudo; + }; + /// Parses the string at the start of \p Str for a variable name. \returns - /// an error holding a diagnostic against \p SM if parsing fail, or the - /// name of the variable otherwise. In the latter case, sets \p IsPseudo to - /// indicate if it is a pseudo variable and strips \p Str from the variable - /// name. - static Expected parseVariable(StringRef &Str, bool &IsPseudo, - const SourceMgr &SM); + /// a VariableProperties structure holding the variable name and whether it + /// is the name of a pseudo variable, or an error holding a diagnostic + /// against \p SM if parsing fail. If parsing was successful, also strips + /// \p Str from the variable name. + static Expected parseVariable(StringRef &Str, + const SourceMgr &SM); /// Parses \p Expr for the name of a numeric variable to be defined at line /// \p LineNumber. \returns a pointer to the class instance representing that /// variable, creating it if needed, or an error holding a diagnostic against @@ -473,16 +516,19 @@ class FileCheckPattern { parseNumericVariableDefinition(StringRef &Expr, FileCheckPatternContext *Context, size_t LineNumber, const SourceMgr &SM); - /// Parses \p Expr for a numeric substitution block. \returns the class - /// representing the AST of the expression whose value must be substituted, - /// or an error holding a diagnostic against \p SM if parsing fails. If - /// substitution was successful, sets \p DefinedNumericVariable to point to - /// the class representing the numeric variable defined in this numeric + /// Parses \p Expr for a numeric substitution block. Parameter + /// \p IsLegacyLineExpr indicates whether \p Expr should be a legacy @LINE + /// expression. \returns a pointer to the class instance representing the AST + /// of the expression whose value must be substituted, or an error holding a + /// diagnostic against \p SM if parsing fails. If substitution was + /// successful, sets \p DefinedNumericVariable to point to the class + /// representing the numeric variable being defined in this numeric /// substitution block, or None if this block does not define any variable. - Expected parseNumericSubstitutionBlock( + Expected> + parseNumericSubstitutionBlock( StringRef Expr, Optional &DefinedNumericVariable, - const SourceMgr &SM) const; + bool IsLegacyLineExpr, const SourceMgr &SM) const; /// Parses the pattern in \p PatternStr and initializes this FileCheckPattern /// instance accordingly. /// @@ -507,7 +553,7 @@ class FileCheckPattern { Expected match(StringRef Buffer, size_t &MatchLen, const SourceMgr &SM) const; /// Prints the value of successful substitutions or the name of the undefined - /// string or numeric variable preventing a successful substitution. + /// string or numeric variables preventing a successful substitution. void printSubstitutions(const SourceMgr &SM, StringRef Buffer, SMRange MatchRange = None) const; void printFuzzyMatch(const SourceMgr &SM, StringRef Buffer, @@ -536,16 +582,28 @@ class FileCheckPattern { /// was not found. size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM); - /// Parses \p Expr for the use of a numeric variable. \returns the pointer to - /// the class instance representing that variable if successful, or an error + /// Parses \p Name as a (pseudo if \p IsPseudo is true) numeric variable use. + /// \returns the pointer to the class instance representing that variable if + /// successful, or an error holding a diagnostic against \p SM otherwise. + Expected> + parseNumericVariableUse(StringRef Name, bool IsPseudo, + const SourceMgr &SM) const; + enum class AllowedOperand { LineVar, Literal, Any }; + /// Parses \p Expr for use of a numeric operand. Accepts both literal values + /// and numeric variables, depending on the value of \p AO. \returns the + /// class representing that operand in the AST of the expression or an error /// holding a diagnostic against \p SM otherwise. - Expected - parseNumericVariableUse(StringRef &Expr, const SourceMgr &SM) const; - /// Parses \p Expr for a binary operation. - /// \returns the class representing the binary operation of the expression, - /// or an error holding a diagnostic against \p SM otherwise. - Expected parseBinop(StringRef &Expr, - const SourceMgr &SM) const; + Expected> + parseNumericOperand(StringRef &Expr, AllowedOperand AO, + const SourceMgr &SM) const; + /// Parses \p Expr for a binary operation. The left operand of this binary + /// operation is given in \p LeftOp and \p IsLegacyLineExpr indicates whether + /// we are parsing a legacy @LINE expression. \returns the class representing + /// the binary operation in the AST of the expression, or an error holding a + /// diagnostic against \p SM otherwise. + Expected> + parseBinop(StringRef &Expr, std::unique_ptr LeftOp, + bool IsLegacyLineExpr, const SourceMgr &SM) const; }; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Support/FileCheck.cpp b/llvm/lib/Support/FileCheck.cpp index 5ec126f934e65..9fb4d798849d7 100644 --- a/llvm/lib/Support/FileCheck.cpp +++ b/llvm/lib/Support/FileCheck.cpp @@ -35,17 +35,33 @@ void FileCheckNumericVariable::clearValue() { Value = None; } -Expected FileCheckExpression::eval() const { - assert(LeftOp && "Evaluating an empty expression"); - Optional LeftOpValue = LeftOp->getValue(); - // Variable is undefined. - if (!LeftOpValue) - return make_error(LeftOp->getName()); - return EvalBinop(*LeftOpValue, RightOp); +Expected FileCheckNumericVariableUse::eval() const { + Optional Value = NumericVariable->getValue(); + if (Value) + return *Value; + return make_error(Name); +} + +Expected FileCheckASTBinop::eval() const { + Expected LeftOp = LeftOperand->eval(); + Expected RightOp = RightOperand->eval(); + + // Bubble up any error (e.g. undefined variables) in the recursive + // evaluation. + if (!LeftOp || !RightOp) { + Error Err = Error::success(); + if (!LeftOp) + Err = joinErrors(std::move(Err), LeftOp.takeError()); + if (!RightOp) + Err = joinErrors(std::move(Err), RightOp.takeError()); + return std::move(Err); + } + + return EvalBinop(*LeftOp, *RightOp); } Expected FileCheckNumericSubstitution::getResult() const { - Expected EvaluatedValue = Expression->eval(); + Expected EvaluatedValue = ExpressionAST->eval(); if (!EvaluatedValue) return EvaluatedValue.takeError(); return utostr(*EvaluatedValue); @@ -63,15 +79,14 @@ bool FileCheckPattern::isValidVarNameStart(char C) { return C == '_' || isalpha(C); } -Expected FileCheckPattern::parseVariable(StringRef &Str, - bool &IsPseudo, - const SourceMgr &SM) { +Expected +FileCheckPattern::parseVariable(StringRef &Str, const SourceMgr &SM) { if (Str.empty()) return FileCheckErrorDiagnostic::get(SM, Str, "empty variable name"); bool ParsedOneChar = false; unsigned I = 0; - IsPseudo = Str[0] == '@'; + bool IsPseudo = Str[0] == '@'; // Global vars start with '$'. if (Str[0] == '$' || IsPseudo) @@ -89,7 +104,7 @@ Expected FileCheckPattern::parseVariable(StringRef &Str, StringRef Name = Str.take_front(I); Str = Str.substr(I); - return Name; + return VariableProperties {Name, IsPseudo}; } // StringRef holding all characters considered as horizontal whitespaces by @@ -111,13 +126,12 @@ Expected FileCheckPattern::parseNumericVariableDefinition( StringRef &Expr, FileCheckPatternContext *Context, size_t LineNumber, const SourceMgr &SM) { - bool IsPseudo; - Expected ParseVarResult = parseVariable(Expr, IsPseudo, SM); + Expected ParseVarResult = parseVariable(Expr, SM); if (!ParseVarResult) return ParseVarResult.takeError(); - StringRef Name = *ParseVarResult; + StringRef Name = ParseVarResult->Name; - if (IsPseudo) + if (ParseVarResult->IsPseudo) return FileCheckErrorDiagnostic::get( SM, Name, "definition of pseudo numeric variable unsupported"); @@ -143,15 +157,9 @@ FileCheckPattern::parseNumericVariableDefinition( return DefinedNumericVariable; } -Expected -FileCheckPattern::parseNumericVariableUse(StringRef &Expr, +Expected> +FileCheckPattern::parseNumericVariableUse(StringRef Name, bool IsPseudo, const SourceMgr &SM) const { - bool IsPseudo; - Expected ParseVarResult = parseVariable(Expr, IsPseudo, SM); - if (!ParseVarResult) - return ParseVarResult.takeError(); - StringRef Name = *ParseVarResult; - if (IsPseudo && !Name.equals("@LINE")) return FileCheckErrorDiagnostic::get( SM, Name, "invalid pseudo numeric variable '" + Name + "'"); @@ -178,7 +186,32 @@ FileCheckPattern::parseNumericVariableUse(StringRef &Expr, SM, Name, "numeric variable '" + Name + "' defined on the same line as used"); - return NumericVariable; + return llvm::make_unique(Name, NumericVariable); +} + +Expected> +FileCheckPattern::parseNumericOperand(StringRef &Expr, AllowedOperand AO, + const SourceMgr &SM) const { + if (AO == AllowedOperand::LineVar || AO == AllowedOperand::Any) { + // Try to parse as a numeric variable use. + Expected ParseVarResult = + parseVariable(Expr, SM); + if (ParseVarResult) + return parseNumericVariableUse(ParseVarResult->Name, + ParseVarResult->IsPseudo, SM); + if (AO == AllowedOperand::LineVar) + return ParseVarResult.takeError(); + // Ignore the error and retry parsing as a literal. + consumeError(ParseVarResult.takeError()); + } + + // Otherwise, parse it as a literal. + uint64_t LiteralValue; + if (!Expr.consumeInteger(/*Radix=*/10, LiteralValue)) + return llvm::make_unique(LiteralValue); + + return FileCheckErrorDiagnostic::get(SM, Expr, + "invalid operand format '" + Expr + "'"); } static uint64_t add(uint64_t LeftOp, uint64_t RightOp) { @@ -189,20 +222,16 @@ static uint64_t sub(uint64_t LeftOp, uint64_t RightOp) { return LeftOp - RightOp; } -Expected -FileCheckPattern::parseBinop(StringRef &Expr, const SourceMgr &SM) const { - Expected LeftParseResult = - parseNumericVariableUse(Expr, SM); - if (!LeftParseResult) { - return LeftParseResult.takeError(); - } - FileCheckNumericVariable *LeftOp = *LeftParseResult; +Expected> +FileCheckPattern::parseBinop(StringRef &Expr, + std::unique_ptr LeftOp, + bool IsLegacyLineExpr, const SourceMgr &SM) const { + Expr = Expr.ltrim(SpaceChars); + if (Expr.empty()) + return std::move(LeftOp); // Check if this is a supported operation and select a function to perform // it. - Expr = Expr.ltrim(SpaceChars); - if (Expr.empty()) - return Context->makeExpression(add, LeftOp, 0); SMLoc OpLoc = SMLoc::getFromPointer(Expr.data()); char Operator = popFront(Expr); binop_eval_t EvalBinop; @@ -223,22 +252,24 @@ FileCheckPattern::parseBinop(StringRef &Expr, const SourceMgr &SM) const { if (Expr.empty()) return FileCheckErrorDiagnostic::get(SM, Expr, "missing operand in expression"); - uint64_t RightOp; - if (Expr.consumeInteger(10, RightOp)) - return FileCheckErrorDiagnostic::get( - SM, Expr, "invalid offset in expression '" + Expr + "'"); - Expr = Expr.ltrim(SpaceChars); - if (!Expr.empty()) - return FileCheckErrorDiagnostic::get( - SM, Expr, "unexpected characters at end of expression '" + Expr + "'"); + // The second operand in a legacy @LINE expression is always a literal. + AllowedOperand AO = + IsLegacyLineExpr ? AllowedOperand::Literal : AllowedOperand::Any; + Expected> RightOpResult = + parseNumericOperand(Expr, AO, SM); + if (!RightOpResult) + return RightOpResult; - return Context->makeExpression(EvalBinop, LeftOp, RightOp); + Expr = Expr.ltrim(SpaceChars); + return llvm::make_unique(EvalBinop, std::move(LeftOp), + std::move(*RightOpResult)); } -Expected FileCheckPattern::parseNumericSubstitutionBlock( +Expected> +FileCheckPattern::parseNumericSubstitutionBlock( StringRef Expr, Optional &DefinedNumericVariable, - const SourceMgr &SM) const { + bool IsLegacyLineExpr, const SourceMgr &SM) const { // Parse the numeric variable definition. DefinedNumericVariable = None; size_t DefEnd = Expr.find(':'); @@ -259,12 +290,29 @@ Expected FileCheckPattern::parseNumericSubstitutionBlock( return ParseResult.takeError(); DefinedNumericVariable = *ParseResult; - return Context->makeExpression(add, nullptr, 0); + return nullptr; } // Parse the expression itself. Expr = Expr.ltrim(SpaceChars); - return parseBinop(Expr, SM); + // The first operand in a legacy @LINE expression is always the @LINE pseudo + // variable. + AllowedOperand AO = + IsLegacyLineExpr ? AllowedOperand::LineVar : AllowedOperand::Any; + Expected> ParseResult = + parseNumericOperand(Expr, AO, SM); + while (ParseResult && !Expr.empty()) { + ParseResult = + parseBinop(Expr, std::move(*ParseResult), IsLegacyLineExpr, SM); + // Legacy @LINE expressions only allow 2 operands. + if (ParseResult && IsLegacyLineExpr && !Expr.empty()) + return FileCheckErrorDiagnostic::get( + SM, Expr, + "unexpected characters at end of expression '" + Expr + "'"); + } + if (!ParseResult) + return ParseResult; + return std::move(*ParseResult); } bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix, @@ -375,12 +423,15 @@ bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix, PatternStr = UnparsedPatternStr.substr(End + 2); bool IsDefinition = false; + // Whether the substitution block is a legacy use of @LINE with string + // substitution block syntax. + bool IsLegacyLineExpr = false; StringRef DefName; StringRef SubstStr; StringRef MatchRegexp; size_t SubstInsertIdx = RegExStr.size(); - // Parse string variable or legacy expression. + // Parse string variable or legacy @LINE expression. if (!IsNumBlock) { size_t VarEndIdx = MatchStr.find(":"); size_t SpacePos = MatchStr.substr(0, VarEndIdx).find_first_of(" \t"); @@ -391,15 +442,15 @@ bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix, } // Get the name (e.g. "foo") and verify it is well formed. - bool IsPseudo; StringRef OrigMatchStr = MatchStr; - Expected ParseVarResult = - parseVariable(MatchStr, IsPseudo, SM); + Expected ParseVarResult = + parseVariable(MatchStr, SM); if (!ParseVarResult) { logAllUnhandledErrors(ParseVarResult.takeError(), errs()); return true; } - StringRef Name = *ParseVarResult; + StringRef Name = ParseVarResult->Name; + bool IsPseudo = ParseVarResult->IsPseudo; IsDefinition = (VarEndIdx != StringRef::npos); if (IsDefinition) { @@ -424,23 +475,24 @@ bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix, } else { if (IsPseudo) { MatchStr = OrigMatchStr; - IsNumBlock = true; + IsLegacyLineExpr = IsNumBlock = true; } else SubstStr = Name; } } // Parse numeric substitution block. - FileCheckExpression *Expression; + std::unique_ptr ExpressionAST; Optional DefinedNumericVariable; if (IsNumBlock) { - Expected ParseResult = - parseNumericSubstitutionBlock(MatchStr, DefinedNumericVariable, SM); + Expected> ParseResult = + parseNumericSubstitutionBlock(MatchStr, DefinedNumericVariable, + IsLegacyLineExpr, SM); if (!ParseResult) { logAllUnhandledErrors(ParseResult.takeError(), errs()); return true; } - Expression = *ParseResult; + ExpressionAST = std::move(*ParseResult); if (DefinedNumericVariable) { IsDefinition = true; DefName = (*DefinedNumericVariable)->getName(); @@ -468,8 +520,8 @@ bool FileCheckPattern::parsePattern(StringRef PatternStr, StringRef Prefix, // previous CHECK patterns, and substitution of expressions. FileCheckSubstitution *Substitution = IsNumBlock - ? Context->makeNumericSubstitution(SubstStr, Expression, - SubstInsertIdx) + ? Context->makeNumericSubstitution( + SubstStr, std::move(ExpressionAST), SubstInsertIdx) : Context->makeStringSubstitution(SubstStr, SubstInsertIdx); Substitutions.push_back(Substitution); } @@ -660,7 +712,7 @@ void FileCheckPattern::printSubstitutions(const SourceMgr &SM, StringRef Buffer, Expected MatchedValue = Substitution->getResult(); // Substitution failed or is not known at match time, print the undefined - // variable it uses. + // variables it uses. if (!MatchedValue) { bool UndefSeen = false; handleAllErrors(MatchedValue.takeError(), @@ -669,13 +721,11 @@ void FileCheckPattern::printSubstitutions(const SourceMgr &SM, StringRef Buffer, [](const FileCheckErrorDiagnostic &E) {}, [&](const FileCheckUndefVarError &E) { if (!UndefSeen) { - OS << "uses undefined variable "; + OS << "uses undefined variable(s):"; UndefSeen = true; } + OS << " "; E.log(OS); - }, - [](const ErrorInfoBase &E) { - llvm_unreachable("Unexpected error"); }); } else { // Substitution succeeded. Print substituted value. @@ -768,15 +818,6 @@ FileCheckPatternContext::getPatternVarValue(StringRef VarName) { return VarIter->second; } -FileCheckExpression * -FileCheckPatternContext::makeExpression(binop_eval_t EvalBinop, - FileCheckNumericVariable *OperandLeft, - uint64_t OperandRight) { - Expressions.push_back(llvm::make_unique( - EvalBinop, OperandLeft, OperandRight)); - return Expressions.back().get(); -} - template FileCheckNumericVariable * FileCheckPatternContext::makeNumericVariable(Types... args) { @@ -794,10 +835,10 @@ FileCheckPatternContext::makeStringSubstitution(StringRef VarName, } FileCheckSubstitution *FileCheckPatternContext::makeNumericSubstitution( - StringRef ExpressionStr, FileCheckExpression *Expression, - size_t InsertIdx) { + StringRef ExpressionStr, + std::unique_ptr ExpressionAST, size_t InsertIdx) { Substitutions.push_back(llvm::make_unique( - this, ExpressionStr, Expression, InsertIdx)); + this, ExpressionStr, std::move(ExpressionAST), InsertIdx)); return Substitutions.back().get(); } @@ -1777,9 +1818,8 @@ Error FileCheckPatternContext::defineCmdlineVariables( std::pair CmdlineNameVal = CmdlineDef.split('='); StringRef CmdlineName = CmdlineNameVal.first; StringRef OrigCmdlineName = CmdlineName; - bool IsPseudo; - Expected ParseVarResult = - FileCheckPattern::parseVariable(CmdlineName, IsPseudo, SM); + Expected ParseVarResult = + FileCheckPattern::parseVariable(CmdlineName, SM); if (!ParseVarResult) { Errs = joinErrors(std::move(Errs), ParseVarResult.takeError()); continue; @@ -1787,7 +1827,7 @@ Error FileCheckPatternContext::defineCmdlineVariables( // Check that CmdlineName does not denote a pseudo variable is only // composed of the parsed numeric variable. This catches cases like // "FOO+2" in a "FOO+2=10" definition. - if (IsPseudo || !CmdlineName.empty()) { + if (ParseVarResult->IsPseudo || !CmdlineName.empty()) { Errs = joinErrors(std::move(Errs), FileCheckErrorDiagnostic::get( SM, OrigCmdlineName, @@ -1795,7 +1835,7 @@ Error FileCheckPatternContext::defineCmdlineVariables( OrigCmdlineName + "'")); continue; } - StringRef Name = *ParseVarResult; + StringRef Name = ParseVarResult->Name; // Detect collisions between string and numeric variables when the former // is created later than the latter. diff --git a/llvm/test/FileCheck/line-count.txt b/llvm/test/FileCheck/line-count.txt index 7b34e00bef404..0c7be7ebc99b9 100644 --- a/llvm/test/FileCheck/line-count.txt +++ b/llvm/test/FileCheck/line-count.txt @@ -50,7 +50,7 @@ 50 ERR9: line-count.txt:[[#@LINE-1]]:17: error: unsupported operation '*' 51 52 BAD10: [[@LINE-x]] -53 ERR10: line-count.txt:[[#@LINE-1]]:19: error: invalid offset in expression 'x' +53 ERR10: line-count.txt:[[#@LINE-1]]:19: error: invalid operand format 'x' 54 55 BAD11: [[@LINE-1x]] 56 ERR11: line-count.txt:[[#@LINE-1]]:20: error: unexpected characters at end of expression 'x' diff --git a/llvm/test/FileCheck/numeric-expression.txt b/llvm/test/FileCheck/numeric-expression.txt index 5e10d31dc2a0e..3ff7519e51193 100644 --- a/llvm/test/FileCheck/numeric-expression.txt +++ b/llvm/test/FileCheck/numeric-expression.txt @@ -59,8 +59,8 @@ CHECK-NEXT: [[# VAR1 -1]] CHECK-NEXT: [[# VAR1 - 1]] CHECK-NEXT: [[# VAR1 - 1 ]] -; Numeric expressions using variables defined on the command-line and an -; immediate interpreted as an unsigned value. +; Numeric expressions using variables defined on other lines and an immediate +; interpreted as an unsigned value. ; Note: 9223372036854775819 = 0x8000000000000000 + 11 ; 9223372036854775808 = 0x8000000000000000 USE UNSIGNED IMM @@ -68,21 +68,29 @@ USE UNSIGNED IMM CHECK-LABEL: USE UNSIGNED IMM CHECK-NEXT: [[#VAR1+9223372036854775808]] -; Numeric expression using undefined variable. +; Numeric expressions using more than one variable defined on other lines. +USE MULTI VAR +31 +42 +CHECK-LABEL: USE MULTI VAR +CHECK-NEXT: [[#VAR2:]] +CHECK-NEXT: [[#VAR1+VAR2]] + +; Numeric expression using undefined variables. RUN: not FileCheck --check-prefix UNDEF-USE --input-file %s %s 2>&1 \ RUN: | FileCheck --strict-whitespace --check-prefix UNDEF-USE-MSG %s UNDEF VAR USE UNDEFVAR: 11 UNDEF-USE-LABEL: UNDEF VAR USE -UNDEF-USE-NEXT: UNDEFVAR: [[#UNDEFVAR]] +UNDEF-USE-NEXT: UNDEFVAR: [[#UNDEFVAR1+UNDEFVAR2]] UNDEF-USE-MSG: numeric-expression.txt:[[#@LINE-1]]:17: error: {{U}}NDEF-USE-NEXT: expected string not found in input -UNDEF-USE-MSG-NEXT: {{U}}NDEF-USE-NEXT: UNDEFVAR: {{\[\[#UNDEFVAR\]\]}} +UNDEF-USE-MSG-NEXT: {{U}}NDEF-USE-NEXT: UNDEFVAR: {{\[\[#UNDEFVAR1\+UNDEFVAR2\]\]}} UNDEF-USE-MSG-NEXT: {{^ \^$}} UNDEF-USE-MSG-NEXT: numeric-expression.txt:[[#@LINE-6]]:1: note: scanning from here UNDEF-USE-MSG-NEXT: UNDEFVAR: 11 UNDEF-USE-MSG-NEXT: {{^\^$}} -UNDEF-USE-MSG-NEXT: numeric-expression.txt:[[#@LINE-9]]:1: note: uses undefined variable "UNDEFVAR" +UNDEF-USE-MSG-NEXT: numeric-expression.txt:[[#@LINE-9]]:1: note: uses undefined variable(s): "UNDEFVAR1" "UNDEFVAR2" UNDEF-USE-MSG-NEXT: UNDEFVAR: 11 UNDEF-USE-MSG-NEXT: {{^\^$}} diff --git a/llvm/test/FileCheck/var-scope.txt b/llvm/test/FileCheck/var-scope.txt index c45a384812659..3fa8a73e157d8 100644 --- a/llvm/test/FileCheck/var-scope.txt +++ b/llvm/test/FileCheck/var-scope.txt @@ -34,5 +34,5 @@ LOCAL3: [[LOCAL]][[#LOCNUM+2]] GLOBAL: [[$GLOBAL]][[#$GLOBNUM+2]] ERRUNDEF: expected string not found in input -ERRUNDEFLOCAL: uses undefined variable "LOCAL" -ERRUNDEFLOCNUM: uses undefined variable "LOCNUM" +ERRUNDEFLOCAL: uses undefined variable(s): "LOCAL" +ERRUNDEFLOCNUM: uses undefined variable(s): "LOCNUM" diff --git a/llvm/unittests/Support/FileCheckTest.cpp b/llvm/unittests/Support/FileCheckTest.cpp index 0cc729da46acb..8df4603dbd118 100644 --- a/llvm/unittests/Support/FileCheckTest.cpp +++ b/llvm/unittests/Support/FileCheckTest.cpp @@ -8,56 +8,112 @@ #include "llvm/Support/FileCheck.h" #include "gtest/gtest.h" +#include using namespace llvm; namespace { class FileCheckTest : public ::testing::Test {}; +TEST_F(FileCheckTest, Literal) { + // Eval returns the literal's value. + FileCheckExpressionLiteral Ten(10); + Expected Value = Ten.eval(); + EXPECT_TRUE(bool(Value)); + EXPECT_EQ(10U, *Value); + + // Max value can be correctly represented. + FileCheckExpressionLiteral Max(std::numeric_limits::max()); + Value = Max.eval(); + EXPECT_TRUE(bool(Value)); + EXPECT_EQ(std::numeric_limits::max(), *Value); +} + +static std::string toString(const std::unordered_set &Set) { + bool First = true; + std::string Str; + for (StringRef S : Set) { + Str += Twine(First ? "{" + S : ", " + S).str(); + First = false; + } + Str += '}'; + return Str; +} + +static void +expectUndefErrors(std::unordered_set ExpectedUndefVarNames, + Error Err) { + handleAllErrors(std::move(Err), [&](const FileCheckUndefVarError &E) { + ExpectedUndefVarNames.erase(E.getVarName()); + }); + EXPECT_TRUE(ExpectedUndefVarNames.empty()) << toString(ExpectedUndefVarNames); +} + +static void expectUndefError(const Twine &ExpectedUndefVarName, Error Err) { + expectUndefErrors({ExpectedUndefVarName.str()}, std::move(Err)); +} + TEST_F(FileCheckTest, NumericVariable) { - // Undefined variable: getValue fails, setValue does not trigger assert. + // Undefined variable: getValue and eval fail, error returned by eval holds + // the name of the undefined variable and setValue does not trigger assert. FileCheckNumericVariable FooVar = FileCheckNumericVariable(1, "FOO"); EXPECT_EQ("FOO", FooVar.getName()); - llvm::Optional Value = FooVar.getValue(); - EXPECT_FALSE(Value); - FooVar.clearValue(); + FileCheckNumericVariableUse FooVarUse = + FileCheckNumericVariableUse("FOO", &FooVar); + EXPECT_FALSE(FooVar.getValue()); + Expected EvalResult = FooVarUse.eval(); + EXPECT_FALSE(EvalResult); + expectUndefError("FOO", EvalResult.takeError()); FooVar.setValue(42); - // Defined variable: getValue returns value set. - Value = FooVar.getValue(); - EXPECT_TRUE(Value); + // Defined variable: getValue and eval return value set. + Optional Value = FooVar.getValue(); + EXPECT_TRUE(bool(Value)); EXPECT_EQ(42U, *Value); + EvalResult = FooVarUse.eval(); + EXPECT_TRUE(bool(EvalResult)); + EXPECT_EQ(42U, *EvalResult); - // Clearing variable: getValue fails. + // Clearing variable: getValue and eval fail. Error returned by eval holds + // the name of the cleared variable. FooVar.clearValue(); Value = FooVar.getValue(); EXPECT_FALSE(Value); + EvalResult = FooVarUse.eval(); + EXPECT_FALSE(EvalResult); + expectUndefError("FOO", EvalResult.takeError()); } uint64_t doAdd(uint64_t OpL, uint64_t OpR) { return OpL + OpR; } -static void expectUndefError(const Twine &ExpectedStr, Error Err) { - handleAllErrors(std::move(Err), [&](const FileCheckUndefVarError &E) { - EXPECT_EQ(ExpectedStr.str(), E.getVarName()); - }); -} - -TEST_F(FileCheckTest, Expression) { +TEST_F(FileCheckTest, Binop) { FileCheckNumericVariable FooVar = FileCheckNumericVariable("FOO", 42); - FileCheckExpression Expression = FileCheckExpression(doAdd, &FooVar, 18); + std::unique_ptr FooVarUse = + llvm::make_unique("FOO", &FooVar); + FileCheckNumericVariable BarVar = FileCheckNumericVariable("BAR", 18); + std::unique_ptr BarVarUse = + llvm::make_unique("BAR", &BarVar); + FileCheckASTBinop Binop = + FileCheckASTBinop(doAdd, std::move(FooVarUse), std::move(BarVarUse)); // Defined variable: eval returns right value. - Expected Value = Expression.eval(); + Expected Value = Binop.eval(); EXPECT_TRUE(bool(Value)); EXPECT_EQ(60U, *Value); - // Undefined variable: eval fails, undefined variable returned. We call - // getUndefVarName first to check that it can be called without calling - // eval() first. + // 1 undefined variable: eval fails, error contains name of undefined + // variable. FooVar.clearValue(); - Error EvalError = Expression.eval().takeError(); - EXPECT_TRUE(errorToBool(std::move(EvalError))); - expectUndefError("FOO", std::move(EvalError)); + Value = Binop.eval(); + EXPECT_FALSE(Value); + expectUndefError("FOO", Value.takeError()); + + // 2 undefined variables: eval fails, error contains names of all undefined + // variables. + BarVar.clearValue(); + Value = Binop.eval(); + EXPECT_FALSE(Value); + expectUndefErrors({"FOO", "BAR"}, Value.takeError()); } TEST_F(FileCheckTest, ValidVarNameStart) { @@ -84,77 +140,69 @@ TEST_F(FileCheckTest, ParseVar) { SourceMgr SM; StringRef OrigVarName = bufferize(SM, "GoodVar42"); StringRef VarName = OrigVarName; - bool IsPseudo = true; - Expected ParsedName = - FileCheckPattern::parseVariable(VarName, IsPseudo, SM); - EXPECT_TRUE(bool(ParsedName)); - EXPECT_EQ(*ParsedName, OrigVarName); + Expected ParsedVarResult = + FileCheckPattern::parseVariable(VarName, SM); + EXPECT_TRUE(bool(ParsedVarResult)); + EXPECT_EQ(ParsedVarResult->Name, OrigVarName); EXPECT_TRUE(VarName.empty()); - EXPECT_FALSE(IsPseudo); + EXPECT_FALSE(ParsedVarResult->IsPseudo); VarName = OrigVarName = bufferize(SM, "$GoodGlobalVar"); - IsPseudo = true; - ParsedName = FileCheckPattern::parseVariable(VarName, IsPseudo, SM); - EXPECT_TRUE(bool(ParsedName)); - EXPECT_EQ(*ParsedName, OrigVarName); + ParsedVarResult = FileCheckPattern::parseVariable(VarName, SM); + EXPECT_TRUE(bool(ParsedVarResult)); + EXPECT_EQ(ParsedVarResult->Name, OrigVarName); EXPECT_TRUE(VarName.empty()); - EXPECT_FALSE(IsPseudo); + EXPECT_FALSE(ParsedVarResult->IsPseudo); VarName = OrigVarName = bufferize(SM, "@GoodPseudoVar"); - IsPseudo = true; - ParsedName = FileCheckPattern::parseVariable(VarName, IsPseudo, SM); - EXPECT_TRUE(bool(ParsedName)); - EXPECT_EQ(*ParsedName, OrigVarName); + ParsedVarResult = FileCheckPattern::parseVariable(VarName, SM); + EXPECT_TRUE(bool(ParsedVarResult)); + EXPECT_EQ(ParsedVarResult->Name, OrigVarName); EXPECT_TRUE(VarName.empty()); - EXPECT_TRUE(IsPseudo); + EXPECT_TRUE(ParsedVarResult->IsPseudo); VarName = bufferize(SM, "42BadVar"); - ParsedName = FileCheckPattern::parseVariable(VarName, IsPseudo, SM); - EXPECT_TRUE(errorToBool(ParsedName.takeError())); + ParsedVarResult = FileCheckPattern::parseVariable(VarName, SM); + EXPECT_TRUE(errorToBool(ParsedVarResult.takeError())); VarName = bufferize(SM, "$@"); - ParsedName = FileCheckPattern::parseVariable(VarName, IsPseudo, SM); - EXPECT_TRUE(errorToBool(ParsedName.takeError())); + ParsedVarResult = FileCheckPattern::parseVariable(VarName, SM); + EXPECT_TRUE(errorToBool(ParsedVarResult.takeError())); VarName = OrigVarName = bufferize(SM, "B@dVar"); - IsPseudo = true; - ParsedName = FileCheckPattern::parseVariable(VarName, IsPseudo, SM); - EXPECT_TRUE(bool(ParsedName)); + ParsedVarResult = FileCheckPattern::parseVariable(VarName, SM); + EXPECT_TRUE(bool(ParsedVarResult)); EXPECT_EQ(VarName, OrigVarName.substr(1)); - EXPECT_EQ(*ParsedName, "B"); - EXPECT_FALSE(IsPseudo); + EXPECT_EQ(ParsedVarResult->Name, "B"); + EXPECT_FALSE(ParsedVarResult->IsPseudo); VarName = OrigVarName = bufferize(SM, "B$dVar"); - IsPseudo = true; - ParsedName = FileCheckPattern::parseVariable(VarName, IsPseudo, SM); - EXPECT_TRUE(bool(ParsedName)); + ParsedVarResult = FileCheckPattern::parseVariable(VarName, SM); + EXPECT_TRUE(bool(ParsedVarResult)); EXPECT_EQ(VarName, OrigVarName.substr(1)); - EXPECT_EQ(*ParsedName, "B"); - EXPECT_FALSE(IsPseudo); + EXPECT_EQ(ParsedVarResult->Name, "B"); + EXPECT_FALSE(ParsedVarResult->IsPseudo); VarName = bufferize(SM, "BadVar+"); - IsPseudo = true; - ParsedName = FileCheckPattern::parseVariable(VarName, IsPseudo, SM); - EXPECT_TRUE(bool(ParsedName)); + ParsedVarResult = FileCheckPattern::parseVariable(VarName, SM); + EXPECT_TRUE(bool(ParsedVarResult)); EXPECT_EQ(VarName, "+"); - EXPECT_EQ(*ParsedName, "BadVar"); - EXPECT_FALSE(IsPseudo); + EXPECT_EQ(ParsedVarResult->Name, "BadVar"); + EXPECT_FALSE(ParsedVarResult->IsPseudo); VarName = bufferize(SM, "BadVar-"); - IsPseudo = true; - ParsedName = FileCheckPattern::parseVariable(VarName, IsPseudo, SM); - EXPECT_TRUE(bool(ParsedName)); + ParsedVarResult = FileCheckPattern::parseVariable(VarName, SM); + EXPECT_TRUE(bool(ParsedVarResult)); EXPECT_EQ(VarName, "-"); - EXPECT_EQ(*ParsedName, "BadVar"); - EXPECT_FALSE(IsPseudo); + EXPECT_EQ(ParsedVarResult->Name, "BadVar"); + EXPECT_FALSE(ParsedVarResult->IsPseudo); VarName = bufferize(SM, "BadVar:"); - IsPseudo = true; - ParsedName = FileCheckPattern::parseVariable(VarName, IsPseudo, SM); - EXPECT_TRUE(bool(ParsedName)); + ParsedVarResult = FileCheckPattern::parseVariable(VarName, SM); + EXPECT_TRUE(bool(ParsedVarResult)); EXPECT_EQ(VarName, ":"); - EXPECT_EQ(*ParsedName, "BadVar"); - EXPECT_FALSE(IsPseudo); + EXPECT_EQ(ParsedVarResult->Name, "BadVar"); + EXPECT_FALSE(ParsedVarResult->IsPseudo); } class PatternTester { @@ -197,7 +245,7 @@ class PatternTester { StringRef ExprBufferRef = bufferize(SM, Expr); Optional DefinedNumericVariable; return errorToBool(P.parseNumericSubstitutionBlock( - ExprBufferRef, DefinedNumericVariable, SM) + ExprBufferRef, DefinedNumericVariable, false, SM) .takeError()); } @@ -269,15 +317,12 @@ TEST_F(FileCheckTest, ParseExpr) { // Missing offset operand. EXPECT_TRUE(Tester.parseSubstExpect("@LINE+")); - // Cannot parse offset operand. - EXPECT_TRUE(Tester.parseSubstExpect("@LINE+x")); - - // Unexpected string at end of numeric expression. - EXPECT_TRUE(Tester.parseSubstExpect("@LINE+5x")); - // Valid expression. EXPECT_FALSE(Tester.parseSubstExpect("@LINE+5")); EXPECT_FALSE(Tester.parseSubstExpect("FOO+4")); + Tester.initNextPattern(); + EXPECT_FALSE(Tester.parsePatternExpect("[[#FOO+FOO]]")); + EXPECT_FALSE(Tester.parsePatternExpect("[[#FOO+3-FOO]]")); } TEST_F(FileCheckTest, ParsePattern) { @@ -306,7 +351,6 @@ TEST_F(FileCheckTest, ParsePattern) { EXPECT_TRUE(Tester.parsePatternExpect("[[#42INVALID]]")); EXPECT_TRUE(Tester.parsePatternExpect("[[#@FOO]]")); EXPECT_TRUE(Tester.parsePatternExpect("[[#@LINE/2]]")); - EXPECT_TRUE(Tester.parsePatternExpect("[[#2+@LINE]]")); EXPECT_TRUE(Tester.parsePatternExpect("[[#YUP:@LINE]]")); // Valid numeric expressions and numeric variable definition. @@ -365,35 +409,37 @@ TEST_F(FileCheckTest, Substitution) { // the right value. FileCheckNumericVariable LineVar = FileCheckNumericVariable("@LINE", 42); FileCheckNumericVariable NVar = FileCheckNumericVariable("N", 10); - FileCheckExpression LineExpression = FileCheckExpression(doAdd, &LineVar, 0); - FileCheckExpression NExpression = FileCheckExpression(doAdd, &NVar, 3); - FileCheckNumericSubstitution SubstitutionLine = - FileCheckNumericSubstitution(&Context, "@LINE", &LineExpression, 12); + auto LineVarUse = + llvm::make_unique("@LINE", &LineVar); + auto NVarUse = llvm::make_unique("N", &NVar); + FileCheckNumericSubstitution SubstitutionLine = FileCheckNumericSubstitution( + &Context, "@LINE", std::move(LineVarUse), 12); FileCheckNumericSubstitution SubstitutionN = - FileCheckNumericSubstitution(&Context, "N", &NExpression, 30); - Expected Value = SubstitutionLine.getResult(); - EXPECT_TRUE(bool(Value)); - EXPECT_EQ("42", *Value); - Value = SubstitutionN.getResult(); - EXPECT_TRUE(bool(Value)); - EXPECT_EQ("13", *Value); - - // Substitution of an undefined numeric variable fails. + FileCheckNumericSubstitution(&Context, "N", std::move(NVarUse), 30); + SubstValue = SubstitutionLine.getResult(); + EXPECT_TRUE(bool(SubstValue)); + EXPECT_EQ("42", *SubstValue); + SubstValue = SubstitutionN.getResult(); + EXPECT_TRUE(bool(SubstValue)); + EXPECT_EQ("10", *SubstValue); + + // Substitution of an undefined numeric variable fails, error holds name of + // undefined variable. LineVar.clearValue(); - SubstValue = SubstitutionLine.getResult().takeError(); + SubstValue = SubstitutionLine.getResult(); EXPECT_FALSE(bool(SubstValue)); expectUndefError("@LINE", SubstValue.takeError()); NVar.clearValue(); - SubstValue = SubstitutionN.getResult().takeError(); + SubstValue = SubstitutionN.getResult(); EXPECT_FALSE(bool(SubstValue)); expectUndefError("N", SubstValue.takeError()); // Substitution of a defined string variable returns the right value. FileCheckPattern P = FileCheckPattern(Check::CheckPlain, &Context, 1); StringSubstitution = FileCheckStringSubstitution(&Context, "FOO", 42); - Value = StringSubstitution.getResult(); - EXPECT_TRUE(bool(Value)); - EXPECT_EQ("BAR", *Value); + SubstValue = StringSubstitution.getResult(); + EXPECT_TRUE(bool(SubstValue)); + EXPECT_EQ("BAR", *SubstValue); } TEST_F(FileCheckTest, FileCheckContext) { @@ -456,14 +502,15 @@ TEST_F(FileCheckTest, FileCheckContext) { Expected LocalVar = Cxt.getPatternVarValue(LocalVarStr); FileCheckPattern P = FileCheckPattern(Check::CheckPlain, &Cxt, 1); Optional DefinedNumericVariable; - Expected Expression = P.parseNumericSubstitutionBlock( - LocalNumVarRef, DefinedNumericVariable, SM); - Expected EmptyVar = Cxt.getPatternVarValue(EmptyVarStr); - Expected UnknownVar = Cxt.getPatternVarValue(UnknownVarStr); + Expected> ExpressionAST = + P.parseNumericSubstitutionBlock(LocalNumVarRef, DefinedNumericVariable, + /*IsLegacyLineExpr=*/false, SM); EXPECT_TRUE(bool(LocalVar)); EXPECT_EQ(*LocalVar, "FOO"); - EXPECT_TRUE(bool(Expression)); - Expected ExpressionVal = (*Expression)->eval(); + Expected EmptyVar = Cxt.getPatternVarValue(EmptyVarStr); + Expected UnknownVar = Cxt.getPatternVarValue(UnknownVarStr); + EXPECT_TRUE(bool(ExpressionAST)); + Expected ExpressionVal = (*ExpressionAST)->eval(); EXPECT_TRUE(bool(ExpressionVal)); EXPECT_EQ(*ExpressionVal, 18U); EXPECT_TRUE(bool(EmptyVar)); @@ -478,12 +525,12 @@ TEST_F(FileCheckTest, FileCheckContext) { // local variables, if it was created before. This is important because local // variable clearing due to --enable-var-scope happens after numeric // expressions are linked to the numeric variables they use. - EXPECT_TRUE(errorToBool((*Expression)->eval().takeError())); + EXPECT_TRUE(errorToBool((*ExpressionAST)->eval().takeError())); P = FileCheckPattern(Check::CheckPlain, &Cxt, 2); - Expression = P.parseNumericSubstitutionBlock(LocalNumVarRef, - DefinedNumericVariable, SM); - EXPECT_TRUE(bool(Expression)); - ExpressionVal = (*Expression)->eval(); + ExpressionAST = P.parseNumericSubstitutionBlock( + LocalNumVarRef, DefinedNumericVariable, /*IsLegacyLineExpr=*/false, SM); + EXPECT_TRUE(bool(ExpressionAST)); + ExpressionVal = (*ExpressionAST)->eval(); EXPECT_TRUE(errorToBool(ExpressionVal.takeError())); EmptyVar = Cxt.getPatternVarValue(EmptyVarStr); EXPECT_TRUE(errorToBool(EmptyVar.takeError())); @@ -501,10 +548,10 @@ TEST_F(FileCheckTest, FileCheckContext) { EXPECT_TRUE(bool(GlobalVar)); EXPECT_EQ(*GlobalVar, "BAR"); P = FileCheckPattern(Check::CheckPlain, &Cxt, 3); - Expression = P.parseNumericSubstitutionBlock(GlobalNumVarRef, - DefinedNumericVariable, SM); - EXPECT_TRUE(bool(Expression)); - ExpressionVal = (*Expression)->eval(); + ExpressionAST = P.parseNumericSubstitutionBlock( + GlobalNumVarRef, DefinedNumericVariable, /*IsLegacyLineExpr=*/false, SM); + EXPECT_TRUE(bool(ExpressionAST)); + ExpressionVal = (*ExpressionAST)->eval(); EXPECT_TRUE(bool(ExpressionVal)); EXPECT_EQ(*ExpressionVal, 36U); @@ -512,10 +559,10 @@ TEST_F(FileCheckTest, FileCheckContext) { Cxt.clearLocalVars(); EXPECT_FALSE(errorToBool(Cxt.getPatternVarValue(GlobalVarStr).takeError())); P = FileCheckPattern(Check::CheckPlain, &Cxt, 4); - Expression = P.parseNumericSubstitutionBlock(GlobalNumVarRef, - DefinedNumericVariable, SM); - EXPECT_TRUE(bool(Expression)); - ExpressionVal = (*Expression)->eval(); + ExpressionAST = P.parseNumericSubstitutionBlock( + GlobalNumVarRef, DefinedNumericVariable, /*IsLegacyLineExpr=*/false, SM); + EXPECT_TRUE(bool(ExpressionAST)); + ExpressionVal = (*ExpressionAST)->eval(); EXPECT_TRUE(bool(ExpressionVal)); EXPECT_EQ(*ExpressionVal, 36U); } From ec8af0db6c97362a10994ab17ba9d175ac833f6c Mon Sep 17 00:00:00 2001 From: David Green Date: Sat, 13 Jul 2019 14:29:02 +0000 Subject: [PATCH 039/451] [ARM] MVE minnm and maxnm instructions This adds the patterns for minnm and maxnm from the fminnum and fmaxnum nodes, similar to scalar types. Original patch by Simon Tatham Differential Revision: https://reviews.llvm.org/D63870 llvm-svn: 366002 --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 13 ++-- llvm/lib/Target/ARM/ARMInstrMVE.td | 15 ++++ llvm/test/CodeGen/Thumb2/mve-minmax.ll | 95 +++++++++++++++++++++++++ 3 files changed, 119 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/Thumb2/mve-minmax.ll diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index dd11ed6ede75d..b7c894c2a8abb 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -281,6 +281,9 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal); if (HasMVEFP) { + setOperationAction(ISD::FMINNUM, VT, Legal); + setOperationAction(ISD::FMAXNUM, VT, Legal); + // No native support for these. setOperationAction(ISD::FDIV, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); @@ -1254,10 +1257,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FRINT, MVT::f32, Legal); setOperationAction(ISD::FMINNUM, MVT::f32, Legal); setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); - setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal); - setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal); - setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); - setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); + if (Subtarget->hasNEON()) { + setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal); + setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal); + setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); + setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); + } if (Subtarget->hasFP64()) { setOperationAction(ISD::FFLOOR, MVT::f64, Legal); diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 07ba61798d4d1..7ae91423fecbe 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -863,9 +863,24 @@ class MVE_VMINMAXNM; def MVE_VMAXNMf16 : MVE_VMINMAXNM<"vmaxnm", "f16", 0b1, 0b0>; +let Predicates = [HasMVEFloat] in { + def : Pat<(v4f32 (fmaxnum (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))), + (v4f32 (MVE_VMAXNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>; + def : Pat<(v8f16 (fmaxnum (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))), + (v8f16 (MVE_VMAXNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>; +} + def MVE_VMINNMf32 : MVE_VMINMAXNM<"vminnm", "f32", 0b0, 0b1>; def MVE_VMINNMf16 : MVE_VMINMAXNM<"vminnm", "f16", 0b1, 0b1>; +let Predicates = [HasMVEFloat] in { + def : Pat<(v4f32 (fminnum (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))), + (v4f32 (MVE_VMINNMf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>; + def : Pat<(v8f16 (fminnum (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))), + (v8f16 (MVE_VMINNMf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>; +} + + class MVE_VMINMAX size, bit bit_4, list pattern=[]> : MVE_comp { diff --git a/llvm/test/CodeGen/Thumb2/mve-minmax.ll b/llvm/test/CodeGen/Thumb2/mve-minmax.ll new file mode 100644 index 0000000000000..38648afaabfa0 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-minmax.ll @@ -0,0 +1,95 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVEFP + +define arm_aapcs_vfpcc <4 x float> @maxnm_float32_t(<4 x float> %src1, <4 x float> %src2) { +; CHECK-MVE-LABEL: maxnm_float32_t: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vmaxnm.f32 s11, s7, s3 +; CHECK-MVE-NEXT: vmaxnm.f32 s10, s6, s2 +; CHECK-MVE-NEXT: vmaxnm.f32 s9, s5, s1 +; CHECK-MVE-NEXT: vmaxnm.f32 s8, s4, s0 +; CHECK-MVE-NEXT: vmov q0, q2 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: maxnm_float32_t: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vmaxnm.f32 q0, q1, q0 +; CHECK-MVEFP-NEXT: bx lr +entry: + %cmp = fcmp fast ogt <4 x float> %src2, %src1 + %0 = select <4 x i1> %cmp, <4 x float> %src2, <4 x float> %src1 + ret <4 x float> %0 +} + +define arm_aapcs_vfpcc <8 x half> @minnm_float16_t(<8 x half> %src1, <8 x half> %src2) { +; CHECK-MVE-LABEL: minnm_float16_t: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vmov.u16 r0, q0[0] +; CHECK-MVE-NEXT: vmov.u16 r1, q0[1] +; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q1[0] +; CHECK-MVE-NEXT: vmov s10, r0 +; CHECK-MVE-NEXT: vmov.u16 r2, q1[1] +; CHECK-MVE-NEXT: vminnm.f16 s8, s10, s8 +; CHECK-MVE-NEXT: vmov s10, r2 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov s8, r1 +; CHECK-MVE-NEXT: vminnm.f16 s8, s10, s8 +; CHECK-MVE-NEXT: vmov r1, s8 +; CHECK-MVE-NEXT: vmov.16 q2[0], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[2] +; CHECK-MVE-NEXT: vmov.16 q2[1], r1 +; CHECK-MVE-NEXT: vmov s12, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q1[2] +; CHECK-MVE-NEXT: vmov s14, r0 +; CHECK-MVE-NEXT: vminnm.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: vmov.16 q2[2], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[3] +; CHECK-MVE-NEXT: vmov s12, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q1[3] +; CHECK-MVE-NEXT: vmov s14, r0 +; CHECK-MVE-NEXT: vminnm.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: vmov.16 q2[3], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[4] +; CHECK-MVE-NEXT: vmov s12, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q1[4] +; CHECK-MVE-NEXT: vmov s14, r0 +; CHECK-MVE-NEXT: vminnm.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: vmov.16 q2[4], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[5] +; CHECK-MVE-NEXT: vmov s12, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q1[5] +; CHECK-MVE-NEXT: vmov s14, r0 +; CHECK-MVE-NEXT: vminnm.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: vmov.16 q2[5], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[6] +; CHECK-MVE-NEXT: vmov s12, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q1[6] +; CHECK-MVE-NEXT: vmov s14, r0 +; CHECK-MVE-NEXT: vminnm.f16 s12, s14, s12 +; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: vmov.16 q2[6], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[7] +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q1[7] +; CHECK-MVE-NEXT: vmov s2, r0 +; CHECK-MVE-NEXT: vminnm.f16 s0, s2, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q2[7], r0 +; CHECK-MVE-NEXT: vmov q0, q2 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: minnm_float16_t: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vminnm.f16 q0, q1, q0 +; CHECK-MVEFP-NEXT: bx lr +entry: + %cmp = fcmp fast ogt <8 x half> %src2, %src1 + %0 = select <8 x i1> %cmp, <8 x half> %src1, <8 x half> %src2 + ret <8 x half> %0 +} From ac5bcbeb9f8c6ffd49f533d10241c23837336de7 Mon Sep 17 00:00:00 2001 From: David Green Date: Sat, 13 Jul 2019 14:38:53 +0000 Subject: [PATCH 040/451] [ARM] MVE VRINT support This adds support for the floor/ceil/trunc/... series of instructions, converting to various forms of VRINT. They use the same suffixes as their floating point counterparts. There is not VTINTR, so nearbyint is expanded. Also added a copysign test, to show it is expanded. Differential Revision: https://reviews.llvm.org/D63985 llvm-svn: 366003 --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 2 + llvm/lib/Target/ARM/ARMInstrMVE.td | 23 ++ llvm/test/CodeGen/Thumb2/mve-fmath.ll | 177 ++++++++++ llvm/test/CodeGen/Thumb2/mve-frint.ll | 450 ++++++++++++++++++++++++ 4 files changed, 652 insertions(+) create mode 100644 llvm/test/CodeGen/Thumb2/mve-frint.ll diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index b7c894c2a8abb..2d8fadb724119 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -283,6 +283,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { if (HasMVEFP) { setOperationAction(ISD::FMINNUM, VT, Legal); setOperationAction(ISD::FMAXNUM, VT, Legal); + setOperationAction(ISD::FROUND, VT, Legal); // No native support for these. setOperationAction(ISD::FDIV, VT, Expand); @@ -296,6 +297,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { setOperationAction(ISD::FLOG10, VT, Expand); setOperationAction(ISD::FEXP, VT, Expand); setOperationAction(ISD::FEXP2, VT, Expand); + setOperationAction(ISD::FNEARBYINT, VT, Expand); } } diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 7ae91423fecbe..a6cc8cee65f1d 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -2325,6 +2325,29 @@ multiclass MVE_VRINT_ops size, list pattern=[]> { defm MVE_VRINTf16 : MVE_VRINT_ops<"f16", 0b01>; defm MVE_VRINTf32 : MVE_VRINT_ops<"f32", 0b10>; +let Predicates = [HasMVEFloat] in { + def : Pat<(v4f32 (frint (v4f32 MQPR:$val1))), + (v4f32 (MVE_VRINTf32X (v4f32 MQPR:$val1)))>; + def : Pat<(v8f16 (frint (v8f16 MQPR:$val1))), + (v8f16 (MVE_VRINTf16X (v8f16 MQPR:$val1)))>; + def : Pat<(v4f32 (fround (v4f32 MQPR:$val1))), + (v4f32 (MVE_VRINTf32A (v4f32 MQPR:$val1)))>; + def : Pat<(v8f16 (fround (v8f16 MQPR:$val1))), + (v8f16 (MVE_VRINTf16A (v8f16 MQPR:$val1)))>; + def : Pat<(v4f32 (ftrunc (v4f32 MQPR:$val1))), + (v4f32 (MVE_VRINTf32Z (v4f32 MQPR:$val1)))>; + def : Pat<(v8f16 (ftrunc (v8f16 MQPR:$val1))), + (v8f16 (MVE_VRINTf16Z (v8f16 MQPR:$val1)))>; + def : Pat<(v4f32 (ffloor (v4f32 MQPR:$val1))), + (v4f32 (MVE_VRINTf32M (v4f32 MQPR:$val1)))>; + def : Pat<(v8f16 (ffloor (v8f16 MQPR:$val1))), + (v8f16 (MVE_VRINTf16M (v8f16 MQPR:$val1)))>; + def : Pat<(v4f32 (fceil (v4f32 MQPR:$val1))), + (v4f32 (MVE_VRINTf32P (v4f32 MQPR:$val1)))>; + def : Pat<(v8f16 (fceil (v8f16 MQPR:$val1))), + (v8f16 (MVE_VRINTf16P (v8f16 MQPR:$val1)))>; +} + class MVEFloatArithNeon pattern=[]> diff --git a/llvm/test/CodeGen/Thumb2/mve-fmath.ll b/llvm/test/CodeGen/Thumb2/mve-fmath.ll index 5ce4e73d5c4ec..41054e2d34d14 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fmath.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fmath.ll @@ -1165,6 +1165,181 @@ entry: ret <8 x half> %0 } +define arm_aapcs_vfpcc <4 x float> @copysign_float32_t(<4 x float> %src1, <4 x float> %src2) { +; CHECK-LABEL: copysign_float32_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r5, r6, lr} +; CHECK-NEXT: push {r4, r5, r6, lr} +; CHECK-NEXT: .pad #32 +; CHECK-NEXT: sub sp, #32 +; CHECK-NEXT: vstr s5, [sp, #8] +; CHECK-NEXT: ldr.w r12, [sp, #8] +; CHECK-NEXT: vstr s6, [sp, #16] +; CHECK-NEXT: ldr.w lr, [sp, #16] +; CHECK-NEXT: vstr s7, [sp, #24] +; CHECK-NEXT: lsr.w r2, r12, #31 +; CHECK-NEXT: ldr r6, [sp, #24] +; CHECK-NEXT: vstr s3, [sp, #28] +; CHECK-NEXT: ldr r3, [sp, #28] +; CHECK-NEXT: vstr s4, [sp] +; CHECK-NEXT: ldr r0, [sp] +; CHECK-NEXT: vstr s0, [sp, #4] +; CHECK-NEXT: ldr r1, [sp, #4] +; CHECK-NEXT: vstr s1, [sp, #12] +; CHECK-NEXT: lsrs r0, r0, #31 +; CHECK-NEXT: vstr s2, [sp, #20] +; CHECK-NEXT: bfi r1, r0, #31, #1 +; CHECK-NEXT: ldr r4, [sp, #12] +; CHECK-NEXT: ldr r5, [sp, #20] +; CHECK-NEXT: bfi r4, r2, #31, #1 +; CHECK-NEXT: lsr.w r2, lr, #31 +; CHECK-NEXT: bfi r5, r2, #31, #1 +; CHECK-NEXT: lsrs r2, r6, #31 +; CHECK-NEXT: bfi r3, r2, #31, #1 +; CHECK-NEXT: vmov s3, r3 +; CHECK-NEXT: vmov s2, r5 +; CHECK-NEXT: vmov s1, r4 +; CHECK-NEXT: vmov s0, r1 +; CHECK-NEXT: add sp, #32 +; CHECK-NEXT: pop {r4, r5, r6, pc} +entry: + %0 = call fast <4 x float> @llvm.copysign.v4f32(<4 x float> %src1, <4 x float> %src2) + ret <4 x float> %0 +} + +define arm_aapcs_vfpcc <8 x half> @copysign_float16_t(<8 x half> %src1, <8 x half> %src2) { +; CHECK-LABEL: copysign_float16_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .pad #32 +; CHECK-NEXT: sub sp, #32 +; CHECK-NEXT: vmov.u16 r0, q1[1] +; CHECK-NEXT: vmov.u16 r1, q0[0] +; CHECK-NEXT: vmov s8, r0 +; CHECK-NEXT: vmov.u16 r0, q1[0] +; CHECK-NEXT: vstr.16 s8, [sp, #24] +; CHECK-NEXT: vmov s8, r0 +; CHECK-NEXT: vmov.u16 r0, q1[2] +; CHECK-NEXT: vstr.16 s8, [sp, #28] +; CHECK-NEXT: vmov s8, r0 +; CHECK-NEXT: vmov.u16 r0, q1[3] +; CHECK-NEXT: vstr.16 s8, [sp, #20] +; CHECK-NEXT: vmov s8, r0 +; CHECK-NEXT: vmov.u16 r0, q1[4] +; CHECK-NEXT: vstr.16 s8, [sp, #16] +; CHECK-NEXT: vmov s8, r0 +; CHECK-NEXT: vmov.u16 r0, q1[5] +; CHECK-NEXT: vstr.16 s8, [sp, #12] +; CHECK-NEXT: vmov s8, r0 +; CHECK-NEXT: vmov.u16 r0, q1[6] +; CHECK-NEXT: vstr.16 s8, [sp, #8] +; CHECK-NEXT: vmov s8, r0 +; CHECK-NEXT: vmov.u16 r0, q1[7] +; CHECK-NEXT: vmov s4, r0 +; CHECK-NEXT: vstr.16 s8, [sp, #4] +; CHECK-NEXT: vstr.16 s4, [sp] +; CHECK-NEXT: vmov.u16 r0, q0[1] +; CHECK-NEXT: vmov s4, r0 +; CHECK-NEXT: ldrb.w r0, [sp, #25] +; CHECK-NEXT: vabs.f16 s4, s4 +; CHECK-NEXT: ands r0, r0, #128 +; CHECK-NEXT: vneg.f16 s6, s4 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: vseleq.f16 s4, s4, s6 +; CHECK-NEXT: vmov r0, s4 +; CHECK-NEXT: vmov s4, r1 +; CHECK-NEXT: ldrb.w r1, [sp, #29] +; CHECK-NEXT: vabs.f16 s4, s4 +; CHECK-NEXT: ands r1, r1, #128 +; CHECK-NEXT: vneg.f16 s6, s4 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne r1, #1 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: vseleq.f16 s4, s4, s6 +; CHECK-NEXT: vmov r1, s4 +; CHECK-NEXT: vmov.16 q1[0], r1 +; CHECK-NEXT: vmov.16 q1[1], r0 +; CHECK-NEXT: vmov.u16 r0, q0[2] +; CHECK-NEXT: vmov s8, r0 +; CHECK-NEXT: ldrb.w r0, [sp, #21] +; CHECK-NEXT: vabs.f16 s8, s8 +; CHECK-NEXT: ands r0, r0, #128 +; CHECK-NEXT: vneg.f16 s10, s8 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: vseleq.f16 s8, s8, s10 +; CHECK-NEXT: vmov r0, s8 +; CHECK-NEXT: vmov.16 q1[2], r0 +; CHECK-NEXT: vmov.u16 r0, q0[3] +; CHECK-NEXT: vmov s8, r0 +; CHECK-NEXT: ldrb.w r0, [sp, #17] +; CHECK-NEXT: vabs.f16 s8, s8 +; CHECK-NEXT: ands r0, r0, #128 +; CHECK-NEXT: vneg.f16 s10, s8 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: vseleq.f16 s8, s8, s10 +; CHECK-NEXT: vmov r0, s8 +; CHECK-NEXT: vmov.16 q1[3], r0 +; CHECK-NEXT: vmov.u16 r0, q0[4] +; CHECK-NEXT: vmov s8, r0 +; CHECK-NEXT: ldrb.w r0, [sp, #13] +; CHECK-NEXT: vabs.f16 s8, s8 +; CHECK-NEXT: ands r0, r0, #128 +; CHECK-NEXT: vneg.f16 s10, s8 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: vseleq.f16 s8, s8, s10 +; CHECK-NEXT: vmov r0, s8 +; CHECK-NEXT: vmov.16 q1[4], r0 +; CHECK-NEXT: vmov.u16 r0, q0[5] +; CHECK-NEXT: vmov s8, r0 +; CHECK-NEXT: ldrb.w r0, [sp, #9] +; CHECK-NEXT: vabs.f16 s8, s8 +; CHECK-NEXT: ands r0, r0, #128 +; CHECK-NEXT: vneg.f16 s10, s8 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: vseleq.f16 s8, s8, s10 +; CHECK-NEXT: vmov r0, s8 +; CHECK-NEXT: vmov.16 q1[5], r0 +; CHECK-NEXT: vmov.u16 r0, q0[6] +; CHECK-NEXT: vmov s8, r0 +; CHECK-NEXT: ldrb.w r0, [sp, #5] +; CHECK-NEXT: vabs.f16 s8, s8 +; CHECK-NEXT: ands r0, r0, #128 +; CHECK-NEXT: vneg.f16 s10, s8 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: vseleq.f16 s8, s8, s10 +; CHECK-NEXT: vmov r0, s8 +; CHECK-NEXT: vmov.16 q1[6], r0 +; CHECK-NEXT: vmov.u16 r0, q0[7] +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: ldrb.w r0, [sp, #1] +; CHECK-NEXT: vabs.f16 s0, s0 +; CHECK-NEXT: ands r0, r0, #128 +; CHECK-NEXT: vneg.f16 s2, s0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: vseleq.f16 s0, s0, s2 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: vmov.16 q1[7], r0 +; CHECK-NEXT: vmov q0, q1 +; CHECK-NEXT: add sp, #32 +; CHECK-NEXT: bx lr +entry: + %0 = call fast <8 x half> @llvm.copysign.v8f16(<8 x half> %src1, <8 x half> %src2) + ret <8 x half> %0 +} + declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) declare <4 x float> @llvm.cos.v4f32(<4 x float>) declare <4 x float> @llvm.sin.v4f32(<4 x float>) @@ -1174,6 +1349,7 @@ declare <4 x float> @llvm.log.v4f32(<4 x float>) declare <4 x float> @llvm.log2.v4f32(<4 x float>) declare <4 x float> @llvm.log10.v4f32(<4 x float>) declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>) +declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) declare <8 x half> @llvm.sqrt.v8f16(<8 x half>) declare <8 x half> @llvm.cos.v8f16(<8 x half>) declare <8 x half> @llvm.sin.v8f16(<8 x half>) @@ -1183,4 +1359,5 @@ declare <8 x half> @llvm.log.v8f16(<8 x half>) declare <8 x half> @llvm.log2.v8f16(<8 x half>) declare <8 x half> @llvm.log10.v8f16(<8 x half>) declare <8 x half> @llvm.pow.v8f16(<8 x half>, <8 x half>) +declare <8 x half> @llvm.copysign.v8f16(<8 x half>, <8 x half>) diff --git a/llvm/test/CodeGen/Thumb2/mve-frint.ll b/llvm/test/CodeGen/Thumb2/mve-frint.ll new file mode 100644 index 0000000000000..847d7ede1d73c --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-frint.ll @@ -0,0 +1,450 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVEFP + +define arm_aapcs_vfpcc <4 x float> @fceil_float32_t(<4 x float> %src) { +; CHECK-MVE-LABEL: fceil_float32_t: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vrintp.f32 s7, s3 +; CHECK-MVE-NEXT: vrintp.f32 s6, s2 +; CHECK-MVE-NEXT: vrintp.f32 s5, s1 +; CHECK-MVE-NEXT: vrintp.f32 s4, s0 +; CHECK-MVE-NEXT: vmov q0, q1 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: fceil_float32_t: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vrintp.f32 q0, q0 +; CHECK-MVEFP-NEXT: bx lr +entry: + %0 = call fast <4 x float> @llvm.ceil.v4f32(<4 x float> %src) + ret <4 x float> %0 +} + +define arm_aapcs_vfpcc <8 x half> @fceil_float16_t(<8 x half> %src) { +; CHECK-MVE-LABEL: fceil_float16_t: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vmov.u16 r0, q0[0] +; CHECK-MVE-NEXT: vmov.u16 r1, q0[1] +; CHECK-MVE-NEXT: vmov s4, r0 +; CHECK-MVE-NEXT: vrintp.f16 s4, s4 +; CHECK-MVE-NEXT: vmov r0, s4 +; CHECK-MVE-NEXT: vmov s4, r1 +; CHECK-MVE-NEXT: vrintp.f16 s4, s4 +; CHECK-MVE-NEXT: vmov r1, s4 +; CHECK-MVE-NEXT: vmov.16 q1[0], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[2] +; CHECK-MVE-NEXT: vmov.16 q1[1], r1 +; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vrintp.f16 s8, s8 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.16 q1[2], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[3] +; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vrintp.f16 s8, s8 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.16 q1[3], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[4] +; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vrintp.f16 s8, s8 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.16 q1[4], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[5] +; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vrintp.f16 s8, s8 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.16 q1[5], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[6] +; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vrintp.f16 s8, s8 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.16 q1[6], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[7] +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vrintp.f16 s0, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q1[7], r0 +; CHECK-MVE-NEXT: vmov q0, q1 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: fceil_float16_t: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vrintp.f16 q0, q0 +; CHECK-MVEFP-NEXT: bx lr +entry: + %0 = call fast <8 x half> @llvm.ceil.v8f16(<8 x half> %src) + ret <8 x half> %0 +} + +define arm_aapcs_vfpcc <4 x float> @ftrunc_float32_t(<4 x float> %src) { +; CHECK-MVE-LABEL: ftrunc_float32_t: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vrintz.f32 s7, s3 +; CHECK-MVE-NEXT: vrintz.f32 s6, s2 +; CHECK-MVE-NEXT: vrintz.f32 s5, s1 +; CHECK-MVE-NEXT: vrintz.f32 s4, s0 +; CHECK-MVE-NEXT: vmov q0, q1 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: ftrunc_float32_t: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vrintz.f32 q0, q0 +; CHECK-MVEFP-NEXT: bx lr +entry: + %0 = call fast <4 x float> @llvm.trunc.v4f32(<4 x float> %src) + ret <4 x float> %0 +} + +define arm_aapcs_vfpcc <8 x half> @ftrunc_float16_t(<8 x half> %src) { +; CHECK-MVE-LABEL: ftrunc_float16_t: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vmov.u16 r0, q0[0] +; CHECK-MVE-NEXT: vmov.u16 r1, q0[1] +; CHECK-MVE-NEXT: vmov s4, r0 +; CHECK-MVE-NEXT: vrintz.f16 s4, s4 +; CHECK-MVE-NEXT: vmov r0, s4 +; CHECK-MVE-NEXT: vmov s4, r1 +; CHECK-MVE-NEXT: vrintz.f16 s4, s4 +; CHECK-MVE-NEXT: vmov r1, s4 +; CHECK-MVE-NEXT: vmov.16 q1[0], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[2] +; CHECK-MVE-NEXT: vmov.16 q1[1], r1 +; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vrintz.f16 s8, s8 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.16 q1[2], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[3] +; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vrintz.f16 s8, s8 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.16 q1[3], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[4] +; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vrintz.f16 s8, s8 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.16 q1[4], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[5] +; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vrintz.f16 s8, s8 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.16 q1[5], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[6] +; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vrintz.f16 s8, s8 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.16 q1[6], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[7] +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vrintz.f16 s0, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q1[7], r0 +; CHECK-MVE-NEXT: vmov q0, q1 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: ftrunc_float16_t: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vrintz.f16 q0, q0 +; CHECK-MVEFP-NEXT: bx lr +entry: + %0 = call fast <8 x half> @llvm.trunc.v8f16(<8 x half> %src) + ret <8 x half> %0 +} + +define arm_aapcs_vfpcc <4 x float> @frint_float32_t(<4 x float> %src) { +; CHECK-MVE-LABEL: frint_float32_t: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vrintx.f32 s7, s3 +; CHECK-MVE-NEXT: vrintx.f32 s6, s2 +; CHECK-MVE-NEXT: vrintx.f32 s5, s1 +; CHECK-MVE-NEXT: vrintx.f32 s4, s0 +; CHECK-MVE-NEXT: vmov q0, q1 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: frint_float32_t: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vrintx.f32 q0, q0 +; CHECK-MVEFP-NEXT: bx lr +entry: + %0 = call fast <4 x float> @llvm.rint.v4f32(<4 x float> %src) + ret <4 x float> %0 +} + +define arm_aapcs_vfpcc <8 x half> @frint_float16_t(<8 x half> %src) { +; CHECK-MVE-LABEL: frint_float16_t: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vmov.u16 r0, q0[0] +; CHECK-MVE-NEXT: vmov.u16 r1, q0[1] +; CHECK-MVE-NEXT: vmov s4, r0 +; CHECK-MVE-NEXT: vrintx.f16 s4, s4 +; CHECK-MVE-NEXT: vmov r0, s4 +; CHECK-MVE-NEXT: vmov s4, r1 +; CHECK-MVE-NEXT: vrintx.f16 s4, s4 +; CHECK-MVE-NEXT: vmov r1, s4 +; CHECK-MVE-NEXT: vmov.16 q1[0], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[2] +; CHECK-MVE-NEXT: vmov.16 q1[1], r1 +; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vrintx.f16 s8, s8 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.16 q1[2], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[3] +; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vrintx.f16 s8, s8 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.16 q1[3], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[4] +; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vrintx.f16 s8, s8 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.16 q1[4], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[5] +; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vrintx.f16 s8, s8 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.16 q1[5], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[6] +; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vrintx.f16 s8, s8 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.16 q1[6], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[7] +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vrintx.f16 s0, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q1[7], r0 +; CHECK-MVE-NEXT: vmov q0, q1 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: frint_float16_t: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vrintx.f16 q0, q0 +; CHECK-MVEFP-NEXT: bx lr +entry: + %0 = call fast <8 x half> @llvm.rint.v8f16(<8 x half> %src) + ret <8 x half> %0 +} + +define arm_aapcs_vfpcc <4 x float> @fnearbyint_float32_t(<4 x float> %src) { +; CHECK-LABEL: fnearbyint_float32_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrintr.f32 s7, s3 +; CHECK-NEXT: vrintr.f32 s6, s2 +; CHECK-NEXT: vrintr.f32 s5, s1 +; CHECK-NEXT: vrintr.f32 s4, s0 +; CHECK-NEXT: vmov q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = call fast <4 x float> @llvm.nearbyint.v4f32(<4 x float> %src) + ret <4 x float> %0 +} + +define arm_aapcs_vfpcc <8 x half> @fnearbyint_float16_t(<8 x half> %src) { +; CHECK-LABEL: fnearbyint_float16_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.u16 r0, q0[0] +; CHECK-NEXT: vmov.u16 r1, q0[1] +; CHECK-NEXT: vmov s4, r0 +; CHECK-NEXT: vrintr.f16 s4, s4 +; CHECK-NEXT: vmov r0, s4 +; CHECK-NEXT: vmov s4, r1 +; CHECK-NEXT: vrintr.f16 s4, s4 +; CHECK-NEXT: vmov r1, s4 +; CHECK-NEXT: vmov.16 q1[0], r0 +; CHECK-NEXT: vmov.u16 r0, q0[2] +; CHECK-NEXT: vmov.16 q1[1], r1 +; CHECK-NEXT: vmov s8, r0 +; CHECK-NEXT: vrintr.f16 s8, s8 +; CHECK-NEXT: vmov r0, s8 +; CHECK-NEXT: vmov.16 q1[2], r0 +; CHECK-NEXT: vmov.u16 r0, q0[3] +; CHECK-NEXT: vmov s8, r0 +; CHECK-NEXT: vrintr.f16 s8, s8 +; CHECK-NEXT: vmov r0, s8 +; CHECK-NEXT: vmov.16 q1[3], r0 +; CHECK-NEXT: vmov.u16 r0, q0[4] +; CHECK-NEXT: vmov s8, r0 +; CHECK-NEXT: vrintr.f16 s8, s8 +; CHECK-NEXT: vmov r0, s8 +; CHECK-NEXT: vmov.16 q1[4], r0 +; CHECK-NEXT: vmov.u16 r0, q0[5] +; CHECK-NEXT: vmov s8, r0 +; CHECK-NEXT: vrintr.f16 s8, s8 +; CHECK-NEXT: vmov r0, s8 +; CHECK-NEXT: vmov.16 q1[5], r0 +; CHECK-NEXT: vmov.u16 r0, q0[6] +; CHECK-NEXT: vmov s8, r0 +; CHECK-NEXT: vrintr.f16 s8, s8 +; CHECK-NEXT: vmov r0, s8 +; CHECK-NEXT: vmov.16 q1[6], r0 +; CHECK-NEXT: vmov.u16 r0, q0[7] +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vrintr.f16 s0, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: vmov.16 q1[7], r0 +; CHECK-NEXT: vmov q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = call fast <8 x half> @llvm.nearbyint.v8f16(<8 x half> %src) + ret <8 x half> %0 +} + +define arm_aapcs_vfpcc <4 x float> @ffloor_float32_t(<4 x float> %src) { +; CHECK-MVE-LABEL: ffloor_float32_t: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vrintm.f32 s7, s3 +; CHECK-MVE-NEXT: vrintm.f32 s6, s2 +; CHECK-MVE-NEXT: vrintm.f32 s5, s1 +; CHECK-MVE-NEXT: vrintm.f32 s4, s0 +; CHECK-MVE-NEXT: vmov q0, q1 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: ffloor_float32_t: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vrintm.f32 q0, q0 +; CHECK-MVEFP-NEXT: bx lr +entry: + %0 = call fast <4 x float> @llvm.floor.v4f32(<4 x float> %src) + ret <4 x float> %0 +} + +define arm_aapcs_vfpcc <8 x half> @ffloor_float16_t(<8 x half> %src) { +; CHECK-MVE-LABEL: ffloor_float16_t: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vmov.u16 r0, q0[0] +; CHECK-MVE-NEXT: vmov.u16 r1, q0[1] +; CHECK-MVE-NEXT: vmov s4, r0 +; CHECK-MVE-NEXT: vrintm.f16 s4, s4 +; CHECK-MVE-NEXT: vmov r0, s4 +; CHECK-MVE-NEXT: vmov s4, r1 +; CHECK-MVE-NEXT: vrintm.f16 s4, s4 +; CHECK-MVE-NEXT: vmov r1, s4 +; CHECK-MVE-NEXT: vmov.16 q1[0], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[2] +; CHECK-MVE-NEXT: vmov.16 q1[1], r1 +; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vrintm.f16 s8, s8 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.16 q1[2], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[3] +; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vrintm.f16 s8, s8 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.16 q1[3], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[4] +; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vrintm.f16 s8, s8 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.16 q1[4], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[5] +; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vrintm.f16 s8, s8 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.16 q1[5], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[6] +; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vrintm.f16 s8, s8 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.16 q1[6], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[7] +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vrintm.f16 s0, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q1[7], r0 +; CHECK-MVE-NEXT: vmov q0, q1 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: ffloor_float16_t: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vrintm.f16 q0, q0 +; CHECK-MVEFP-NEXT: bx lr +entry: + %0 = call fast <8 x half> @llvm.floor.v8f16(<8 x half> %src) + ret <8 x half> %0 +} + +define arm_aapcs_vfpcc <4 x float> @fround_float32_t(<4 x float> %src) { +; CHECK-MVE-LABEL: fround_float32_t: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vrinta.f32 s7, s3 +; CHECK-MVE-NEXT: vrinta.f32 s6, s2 +; CHECK-MVE-NEXT: vrinta.f32 s5, s1 +; CHECK-MVE-NEXT: vrinta.f32 s4, s0 +; CHECK-MVE-NEXT: vmov q0, q1 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: fround_float32_t: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vrinta.f32 q0, q0 +; CHECK-MVEFP-NEXT: bx lr +entry: + %0 = call fast <4 x float> @llvm.round.v4f32(<4 x float> %src) + ret <4 x float> %0 +} + +define arm_aapcs_vfpcc <8 x half> @fround_float16_t(<8 x half> %src) { +; CHECK-MVE-LABEL: fround_float16_t: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vmov.u16 r0, q0[0] +; CHECK-MVE-NEXT: vmov.u16 r1, q0[1] +; CHECK-MVE-NEXT: vmov s4, r0 +; CHECK-MVE-NEXT: vrinta.f16 s4, s4 +; CHECK-MVE-NEXT: vmov r0, s4 +; CHECK-MVE-NEXT: vmov s4, r1 +; CHECK-MVE-NEXT: vrinta.f16 s4, s4 +; CHECK-MVE-NEXT: vmov r1, s4 +; CHECK-MVE-NEXT: vmov.16 q1[0], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[2] +; CHECK-MVE-NEXT: vmov.16 q1[1], r1 +; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vrinta.f16 s8, s8 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.16 q1[2], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[3] +; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vrinta.f16 s8, s8 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.16 q1[3], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[4] +; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vrinta.f16 s8, s8 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.16 q1[4], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[5] +; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vrinta.f16 s8, s8 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.16 q1[5], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[6] +; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vrinta.f16 s8, s8 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.16 q1[6], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[7] +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vrinta.f16 s0, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q1[7], r0 +; CHECK-MVE-NEXT: vmov q0, q1 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: fround_float16_t: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vrinta.f16 q0, q0 +; CHECK-MVEFP-NEXT: bx lr +entry: + %0 = call fast <8 x half> @llvm.round.v8f16(<8 x half> %src) + ret <8 x half> %0 +} + +declare <4 x float> @llvm.ceil.v4f32(<4 x float>) +declare <4 x float> @llvm.trunc.v4f32(<4 x float>) +declare <4 x float> @llvm.rint.v4f32(<4 x float>) +declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) +declare <4 x float> @llvm.floor.v4f32(<4 x float>) +declare <4 x float> @llvm.round.v4f32(<4 x float>) +declare <8 x half> @llvm.ceil.v8f16(<8 x half>) +declare <8 x half> @llvm.trunc.v8f16(<8 x half>) +declare <8 x half> @llvm.rint.v8f16(<8 x half>) +declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>) +declare <8 x half> @llvm.floor.v8f16(<8 x half>) +declare <8 x half> @llvm.round.v8f16(<8 x half>) From 701bf714dbcab718067deaf4f343ce3e872b8578 Mon Sep 17 00:00:00 2001 From: David Green Date: Sat, 13 Jul 2019 14:48:54 +0000 Subject: [PATCH 041/451] [ARM] MVE integer min and max This simply makes the MVE integer min and max instructions legal and adds the relevant patterns for them. Differential Revision: https://reviews.llvm.org/D64026 llvm-svn: 366004 --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 4 + llvm/lib/Target/ARM/ARMInstrMVE.td | 30 ++++++ llvm/test/CodeGen/Thumb2/mve-minmax.ll | 134 ++++++++++++++++++++++++ 3 files changed, 168 insertions(+) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 2d8fadb724119..e538353fc7669 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -250,6 +250,10 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::SMIN, VT, Legal); + setOperationAction(ISD::SMAX, VT, Legal); + setOperationAction(ISD::UMIN, VT, Legal); + setOperationAction(ISD::UMAX, VT, Legal); // No native support for these. setOperationAction(ISD::UDIV, VT, Expand); diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index a6cc8cee65f1d..bc02fdae97b81 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -907,6 +907,36 @@ multiclass MVE_VMINMAX_all_sizes { defm MVE_VMAX : MVE_VMINMAX_all_sizes<"vmax", 0b0>; defm MVE_VMIN : MVE_VMINMAX_all_sizes<"vmin", 0b1>; +let Predicates = [HasMVEInt] in { + def : Pat<(v16i8 (smin (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), + (v16i8 (MVE_VMINs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; + def : Pat<(v8i16 (smin (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), + (v8i16 (MVE_VMINs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; + def : Pat<(v4i32 (smin (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), + (v4i32 (MVE_VMINs32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; + + def : Pat<(v16i8 (smax (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), + (v16i8 (MVE_VMAXs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; + def : Pat<(v8i16 (smax (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), + (v8i16 (MVE_VMAXs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; + def : Pat<(v4i32 (smax (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), + (v4i32 (MVE_VMAXs32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; + + def : Pat<(v16i8 (umin (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), + (v16i8 (MVE_VMINu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; + def : Pat<(v8i16 (umin (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), + (v8i16 (MVE_VMINu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; + def : Pat<(v4i32 (umin (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), + (v4i32 (MVE_VMINu32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; + + def : Pat<(v16i8 (umax (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), + (v16i8 (MVE_VMAXu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; + def : Pat<(v8i16 (umax (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))), + (v8i16 (MVE_VMAXu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; + def : Pat<(v4i32 (umax (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), + (v4i32 (MVE_VMAXu32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; +} + // end of mve_comp instructions // start of mve_imm_shift instructions diff --git a/llvm/test/CodeGen/Thumb2/mve-minmax.ll b/llvm/test/CodeGen/Thumb2/mve-minmax.ll index 38648afaabfa0..38990d3571744 100644 --- a/llvm/test/CodeGen/Thumb2/mve-minmax.ll +++ b/llvm/test/CodeGen/Thumb2/mve-minmax.ll @@ -2,6 +2,140 @@ ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVEFP +define arm_aapcs_vfpcc <16 x i8> @smin_v16i8(<16 x i8> %s1, <16 x i8> %s2) { +; CHECK-LABEL: smin_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmin.s8 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = icmp slt <16 x i8> %s1, %s2 + %1 = select <16 x i1> %0, <16 x i8> %s1, <16 x i8> %s2 + ret <16 x i8> %1 +} + +define arm_aapcs_vfpcc <8 x i16> @smin_v8i16(<8 x i16> %s1, <8 x i16> %s2) { +; CHECK-LABEL: smin_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmin.s16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = icmp slt <8 x i16> %s1, %s2 + %1 = select <8 x i1> %0, <8 x i16> %s1, <8 x i16> %s2 + ret <8 x i16> %1 +} + +define arm_aapcs_vfpcc <4 x i32> @smin_v4i32(<4 x i32> %s1, <4 x i32> %s2) { +; CHECK-LABEL: smin_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmin.s32 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = icmp slt <4 x i32> %s1, %s2 + %1 = select <4 x i1> %0, <4 x i32> %s1, <4 x i32> %s2 + ret <4 x i32> %1 +} + +define arm_aapcs_vfpcc <16 x i8> @umin_v16i8(<16 x i8> %s1, <16 x i8> %s2) { +; CHECK-LABEL: umin_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmin.u8 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = icmp ult <16 x i8> %s1, %s2 + %1 = select <16 x i1> %0, <16 x i8> %s1, <16 x i8> %s2 + ret <16 x i8> %1 +} + +define arm_aapcs_vfpcc <8 x i16> @umin_v8i16(<8 x i16> %s1, <8 x i16> %s2) { +; CHECK-LABEL: umin_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmin.u16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = icmp ult <8 x i16> %s1, %s2 + %1 = select <8 x i1> %0, <8 x i16> %s1, <8 x i16> %s2 + ret <8 x i16> %1 +} + +define arm_aapcs_vfpcc <4 x i32> @umin_v4i32(<4 x i32> %s1, <4 x i32> %s2) { +; CHECK-LABEL: umin_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmin.u32 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = icmp ult <4 x i32> %s1, %s2 + %1 = select <4 x i1> %0, <4 x i32> %s1, <4 x i32> %s2 + ret <4 x i32> %1 +} + + +define arm_aapcs_vfpcc <16 x i8> @smax_v16i8(<16 x i8> %s1, <16 x i8> %s2) { +; CHECK-LABEL: smax_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmax.s8 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = icmp sgt <16 x i8> %s1, %s2 + %1 = select <16 x i1> %0, <16 x i8> %s1, <16 x i8> %s2 + ret <16 x i8> %1 +} + +define arm_aapcs_vfpcc <8 x i16> @smax_v8i16(<8 x i16> %s1, <8 x i16> %s2) { +; CHECK-LABEL: smax_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmax.s16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = icmp sgt <8 x i16> %s1, %s2 + %1 = select <8 x i1> %0, <8 x i16> %s1, <8 x i16> %s2 + ret <8 x i16> %1 +} + +define arm_aapcs_vfpcc <4 x i32> @smax_v4i32(<4 x i32> %s1, <4 x i32> %s2) { +; CHECK-LABEL: smax_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmax.s32 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = icmp sgt <4 x i32> %s1, %s2 + %1 = select <4 x i1> %0, <4 x i32> %s1, <4 x i32> %s2 + ret <4 x i32> %1 +} + +define arm_aapcs_vfpcc <16 x i8> @umax_v16i8(<16 x i8> %s1, <16 x i8> %s2) { +; CHECK-LABEL: umax_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmax.u8 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = icmp ugt <16 x i8> %s1, %s2 + %1 = select <16 x i1> %0, <16 x i8> %s1, <16 x i8> %s2 + ret <16 x i8> %1 +} + +define arm_aapcs_vfpcc <8 x i16> @umax_v8i16(<8 x i16> %s1, <8 x i16> %s2) { +; CHECK-LABEL: umax_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmax.u16 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = icmp ugt <8 x i16> %s1, %s2 + %1 = select <8 x i1> %0, <8 x i16> %s1, <8 x i16> %s2 + ret <8 x i16> %1 +} + +define arm_aapcs_vfpcc <4 x i32> @umax_v4i32(<4 x i32> %s1, <4 x i32> %s2) { +; CHECK-LABEL: umax_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmax.u32 q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = icmp ugt <4 x i32> %s1, %s2 + %1 = select <4 x i1> %0, <4 x i32> %s1, <4 x i32> %s2 + ret <4 x i32> %1 +} + + define arm_aapcs_vfpcc <4 x float> @maxnm_float32_t(<4 x float> %src1, <4 x float> %src2) { ; CHECK-MVE-LABEL: maxnm_float32_t: ; CHECK-MVE: @ %bb.0: @ %entry From 4ce648b5e84cdbfbc1d386166bda27892f3aabba Mon Sep 17 00:00:00 2001 From: David Green Date: Sat, 13 Jul 2019 14:58:32 +0000 Subject: [PATCH 042/451] [ARM] MVE integer abs Similar to floating point abs, we also have instructions for integers. Differential Revision: https://reviews.llvm.org/D64027 llvm-svn: 366005 --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 1 + llvm/lib/Target/ARM/ARMInstrMVE.td | 9 ++++++ llvm/test/CodeGen/Thumb2/mve-abs.ll | 38 +++++++++++++++++++++++++ 3 files changed, 48 insertions(+) create mode 100644 llvm/test/CodeGen/Thumb2/mve-abs.ll diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index e538353fc7669..e8526d1f31ccc 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -254,6 +254,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { setOperationAction(ISD::SMAX, VT, Legal); setOperationAction(ISD::UMIN, VT, Legal); setOperationAction(ISD::UMAX, VT, Legal); + setOperationAction(ISD::ABS, VT, Legal); // No native support for these. setOperationAction(ISD::UDIV, VT, Expand); diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index bc02fdae97b81..e261b74fbf800 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -2189,6 +2189,15 @@ def MVE_VABSs8 : MVE_VABSNEG_int<"vabs", "s8", 0b00, 0b0>; def MVE_VABSs16 : MVE_VABSNEG_int<"vabs", "s16", 0b01, 0b0>; def MVE_VABSs32 : MVE_VABSNEG_int<"vabs", "s32", 0b10, 0b0>; +let Predicates = [HasMVEInt] in { + def : Pat<(v16i8 (abs (v16i8 MQPR:$v))), + (v16i8 (MVE_VABSs8 $v))>; + def : Pat<(v8i16 (abs (v8i16 MQPR:$v))), + (v8i16 (MVE_VABSs16 $v))>; + def : Pat<(v4i32 (abs (v4i32 MQPR:$v))), + (v4i32 (MVE_VABSs32 $v))>; +} + def MVE_VNEGs8 : MVE_VABSNEG_int<"vneg", "s8", 0b00, 0b1>; def MVE_VNEGs16 : MVE_VABSNEG_int<"vneg", "s16", 0b01, 0b1>; def MVE_VNEGs32 : MVE_VABSNEG_int<"vneg", "s32", 0b10, 0b1>; diff --git a/llvm/test/CodeGen/Thumb2/mve-abs.ll b/llvm/test/CodeGen/Thumb2/mve-abs.ll new file mode 100644 index 0000000000000..e0af56abf123b --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-abs.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s + +define arm_aapcs_vfpcc <16 x i8> @abs_v16i8(<16 x i8> %s1) { +; CHECK-LABEL: abs_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vabs.s8 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = icmp slt <16 x i8> %s1, zeroinitializer + %1 = sub nsw <16 x i8> zeroinitializer, %s1 + %2 = select <16 x i1> %0, <16 x i8> %1, <16 x i8> %s1 + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @abs_v8i16(<8 x i16> %s1) { +; CHECK-LABEL: abs_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vabs.s16 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = icmp slt <8 x i16> %s1, zeroinitializer + %1 = sub nsw <8 x i16> zeroinitializer, %s1 + %2 = select <8 x i1> %0, <8 x i16> %1, <8 x i16> %s1 + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @abs_v4i32(<4 x i32> %s1) { +; CHECK-LABEL: abs_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vabs.s32 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = icmp slt <4 x i32> %s1, zeroinitializer + %1 = sub nsw <4 x i32> zeroinitializer, %s1 + %2 = select <4 x i1> %0, <4 x i32> %1, <4 x i32> %s1 + ret <4 x i32> %2 +} From 07a7ec202144ecde55016c3334651c72e77a9cce Mon Sep 17 00:00:00 2001 From: David Green Date: Sat, 13 Jul 2019 15:26:51 +0000 Subject: [PATCH 043/451] [ARM] MVE VNEG instruction patterns This selects integer VNEG instructions, which can be especially useful with shifts. Differential Revision: https://reviews.llvm.org/D64204 llvm-svn: 366006 --- llvm/lib/Target/ARM/ARMInstrMVE.td | 9 ++++++++ llvm/test/CodeGen/Thumb2/mve-neg.ll | 32 +++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 llvm/test/CodeGen/Thumb2/mve-neg.ll diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index e261b74fbf800..dcee5d749bb11 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -2202,6 +2202,15 @@ def MVE_VNEGs8 : MVE_VABSNEG_int<"vneg", "s8", 0b00, 0b1>; def MVE_VNEGs16 : MVE_VABSNEG_int<"vneg", "s16", 0b01, 0b1>; def MVE_VNEGs32 : MVE_VABSNEG_int<"vneg", "s32", 0b10, 0b1>; +let Predicates = [HasMVEInt] in { + def : Pat<(v16i8 (vnegq (v16i8 MQPR:$v))), + (v16i8 (MVE_VNEGs8 $v))>; + def : Pat<(v8i16 (vnegq (v8i16 MQPR:$v))), + (v8i16 (MVE_VNEGs16 $v))>; + def : Pat<(v4i32 (vnegq (v4i32 MQPR:$v))), + (v4i32 (MVE_VNEGs32 $v))>; +} + class MVE_VQABSNEG size, bit negate, list pattern=[]> : MVEIntSingleSrc { diff --git a/llvm/test/CodeGen/Thumb2/mve-neg.ll b/llvm/test/CodeGen/Thumb2/mve-neg.ll new file mode 100644 index 0000000000000..f1c4352e3edb0 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-neg.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s + +define arm_aapcs_vfpcc <16 x i8> @neg_v16i8(<16 x i8> %s1) { +; CHECK-LABEL: neg_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vneg.s8 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = sub nsw <16 x i8> zeroinitializer, %s1 + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @neg_v8i16(<8 x i16> %s1) { +; CHECK-LABEL: neg_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vneg.s16 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = sub nsw <8 x i16> zeroinitializer, %s1 + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @neg_v4i32(<4 x i32> %s1) { +; CHECK-LABEL: neg_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vneg.s32 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = sub nsw <4 x i32> zeroinitializer, %s1 + ret <4 x i32> %0 +} From f6ce7ddecbc593a3911eb119f84e4b437aad8536 Mon Sep 17 00:00:00 2001 From: Mike Spertus Date: Sat, 13 Jul 2019 15:27:53 +0000 Subject: [PATCH 044/451] Template-related improvements to Visual Studio visualizers llvm-svn: 366007 --- clang/utils/ClangVisualizers/clang.natvis | 55 ++++++++++++++++++++--- 1 file changed, 48 insertions(+), 7 deletions(-) diff --git a/clang/utils/ClangVisualizers/clang.natvis b/clang/utils/ClangVisualizers/clang.natvis index 42b674e7d7bb2..8b2bf49b41bb9 100644 --- a/clang/utils/ClangVisualizers/clang.natvis +++ b/clang/utils/ClangVisualizers/clang.natvis @@ -196,7 +196,11 @@ For later versions of Visual Studio, no setup is required--> (not yet known if parameter pack) ... - {*this,view(TorC)} {*this,view(MaybeEllipses)}{Name,view(cpp)} + {(TypeSourceInfo *)(DefaultArgument.ValueOrInherited.Val.Value&~3LL),view(cpp)} + {{InheritedInitializer}} + = {this,view(DefaultArg)na} + + {*this,view(TorC)} {*this,view(MaybeEllipses)}{Name,view(cpp)} {this,view(Initializer)na} template{TemplateParams,na} {*TemplatedDecl}; @@ -288,7 +292,7 @@ For later versions of Visual Studio, no setup is required--> - {*Replaced,view(cpp)} <= {CanonicalType,view(cpp)} + {{{*Replaced,view(cpp)} <= {CanonicalType,view(cpp)}}} *(clang::Type *)this, view(cmn) *Replaced @@ -353,7 +357,7 @@ For later versions of Visual Studio, no setup is required--> - {InjectedType,view(cpp)} + {Decl,view(cpp)} Decl InjectedType @@ -424,12 +428,21 @@ For later versions of Visual Studio, no setup is required--> Ty - - {Argument} + + {(QualType *)&Ty,na} - Argument + (QualType *)&Ty + Data + + Not building anything + Building a {LastTy} + + + {Argument,view(cpp)} + {Argument} + {*(clang::QualType *)&TypeOrValue.V,view(cpp)} {(clang::TemplateArgument::ArgKind)TypeOrValue.Kind,en} template argument: {*(clang::QualType *)&TypeOrValue.V} @@ -459,6 +472,21 @@ For later versions of Visual Studio, no setup is required--> + + + {((TemplateArgumentLoc*)Arguments.BeginX)[0],view(cpp)}{*this,view(elt1)} + + , {((TemplateArgumentLoc*)Arguments.BeginX)[1],view(cpp)}{*this,view(elt2)} + + , {((TemplateArgumentLoc*)Arguments.BeginX)[2],view(cpp)}{*this,view(elt3)} + + , {((TemplateArgumentLoc*)Arguments.BeginX)[3],view(cpp)}{*this,view(elt4)} + + , ... + empty + <{*this,view(elt0)}> + Uninitialized + {Arguments[0],view(cpp)}{*this,view(arg1)} @@ -483,6 +511,17 @@ For later versions of Visual Studio, no setup is required--> , {Data[2],view(cpp)}, ... <{*this,view(arg0)}> + + Length + + + + Length + Data + + + + @@ -558,6 +597,7 @@ For later versions of Visual Studio, no setup is required--> TemplateSpecializationTypeBits.NumArgs (clang::TemplateArgument *)(this+1) + *(clang::Type *)this, view(cmn) @@ -745,7 +785,7 @@ For later versions of Visual Studio, no setup is required--> {this,view(retType)nand} {Name,view(cpp)nd}({*this,view(parm0)nd}) (clang::DeclaratorDecl *)this,nd - *this,view(retType)nd + ((clang::FunctionProtoType *)((clang::ExtQualsTypeCommonBase *)(((uintptr_t)DeclType.Value.Value) & ~15))->BaseType)->ResultType {*this,view(parm0)nd} @@ -755,6 +795,7 @@ For later versions of Visual Studio, no setup is required--> + TemplateOrSpecialization From 458a720ec17234f01d6e92e557436187e6f32c4b Mon Sep 17 00:00:00 2001 From: David Green Date: Sat, 13 Jul 2019 15:43:00 +0000 Subject: [PATCH 045/451] [ARM] Add sign and zero extend patterns for MVE The vmovlb instructions can be uses to sign or zero extend vector registers between types. This adds some patterns for them and relevant testing. The VBICIMM generation is also put behind a hasNEON check (as is already done for VORRIMM). Code originally by David Sherwood. Differential Revision: https://reviews.llvm.org/D64069 llvm-svn: 366008 --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 2 +- llvm/lib/Target/ARM/ARMInstrMVE.td | 17 +++++ llvm/test/CodeGen/Thumb2/mve-sext.ll | 93 +++++++++++++++++++++++++ 3 files changed, 111 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/Thumb2/mve-sext.ll diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index e8526d1f31ccc..5e2de61e288f9 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -11180,7 +11180,7 @@ static SDValue PerformANDCombine(SDNode *N, APInt SplatBits, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; - if (BVN && + if (BVN && Subtarget->hasNEON() && BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { if (SplatBitSize <= 64) { EVT VbicVT; diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index dcee5d749bb11..10ed876f484a0 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -1002,6 +1002,23 @@ defm MVE_VMOVLu8 : MVE_VMOVL_shift_half<"vmovl", "u8", 0b01, 0b1>; defm MVE_VMOVLs16 : MVE_VMOVL_shift_half<"vmovl", "s16", 0b10, 0b0>; defm MVE_VMOVLu16 : MVE_VMOVL_shift_half<"vmovl", "u16", 0b10, 0b1>; +let Predicates = [HasMVEInt] in { + def : Pat<(sext_inreg (v4i32 MQPR:$src), v4i16), + (MVE_VMOVLs16bh MQPR:$src)>; + def : Pat<(sext_inreg (v8i16 MQPR:$src), v8i8), + (MVE_VMOVLs8bh MQPR:$src)>; + def : Pat<(sext_inreg (v4i32 MQPR:$src), v4i8), + (MVE_VMOVLs16bh (MVE_VMOVLs8bh MQPR:$src))>; + + // zext_inreg 16 -> 32 + def : Pat<(and (v4i32 MQPR:$src), (v4i32 (ARMvmovImm (i32 0xCFF)))), + (MVE_VMOVLu16bh MQPR:$src)>; + // zext_inreg 8 -> 16 + def : Pat<(and (v8i16 MQPR:$src), (v8i16 (ARMvmovImm (i32 0x8FF)))), + (MVE_VMOVLu8bh MQPR:$src)>; +} + + class MVE_VSHLL_imm pattern=[]> : MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$Qm), immops), diff --git a/llvm/test/CodeGen/Thumb2/mve-sext.ll b/llvm/test/CodeGen/Thumb2/mve-sext.ll new file mode 100644 index 0000000000000..9458fdc47e582 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-sext.ll @@ -0,0 +1,93 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s + +define arm_aapcs_vfpcc <8 x i16> @sext_v8i8_v8i16(<8 x i8> %src) { +; CHECK-LABEL: sext_v8i8_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.s8 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = sext <8 x i8> %src to <8 x i16> + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @sext_v4i16_v4i32(<4 x i16> %src) { +; CHECK-LABEL: sext_v4i16_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.s16 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = sext <4 x i16> %src to <4 x i32> + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @sext_v4i8_v4i32(<4 x i8> %src) { +; CHECK-LABEL: sext_v4i8_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.s8 q0, q0 +; CHECK-NEXT: vmovlb.s16 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = sext <4 x i8> %src to <4 x i32> + ret <4 x i32> %0 +} + + +define arm_aapcs_vfpcc <8 x i16> @zext_v8i8_v8i16(<8 x i8> %src) { +; CHECK-LABEL: zext_v8i8_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.u8 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = zext <8 x i8> %src to <8 x i16> + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @zext_v4i16_v4i32(<4 x i16> %src) { +; CHECK-LABEL: zext_v4i16_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.u16 q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = zext <4 x i16> %src to <4 x i32> + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @zext_v4i8_v4i32(<4 x i8> %src) { +; CHECK-LABEL: zext_v4i8_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i32 q1, #0xff +; CHECK-NEXT: vand q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext <4 x i8> %src to <4 x i32> + ret <4 x i32> %0 +} + + +define arm_aapcs_vfpcc <8 x i8> @trunc_v8i16_v8i8(<8 x i16> %src) { +; CHECK-LABEL: trunc_v8i16_v8i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = trunc <8 x i16> %src to <8 x i8> + ret <8 x i8> %0 +} + +define arm_aapcs_vfpcc <4 x i16> @trunc_v4i32_v4i16(<4 x i32> %src) { +; CHECK-LABEL: trunc_v4i32_v4i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = trunc <4 x i32> %src to <4 x i16> + ret <4 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i8> @trunc_v4i32_v4i8(<4 x i32> %src) { +; CHECK-LABEL: trunc_v4i32_v4i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = trunc <4 x i32> %src to <4 x i8> + ret <4 x i8> %0 +} From e0363adb75f593b23300991c7b8a47b16c228be0 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Sat, 13 Jul 2019 17:01:00 +0000 Subject: [PATCH 046/451] [Attributor][Fix] Never override given argument numbers llvm-svn: 366009 --- llvm/include/llvm/Transforms/IPO/Attributor.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 88b6af3abbd3d..9e8a680d82900 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -182,8 +182,8 @@ struct Attributor { // Determine the argument number automatically for llvm::Arguments if none // is set. Do not override a given one as it could be a use of the argument // in a call site. - if (auto *Arg = dyn_cast(&V)) - if (ArgNo == -1) + if (ArgNo == -1) + if (auto *Arg = dyn_cast(&V)) ArgNo = Arg->getArgNo(); // If a function was given together with an argument number, perform the @@ -232,10 +232,13 @@ struct Attributor { "'AbstractAttribute'!"); // Determine the anchor value and the argument number which are used to - // lookup the attribute together with AAType::ID. + // lookup the attribute together with AAType::ID. If passed an argument, + // use its argument number but do not override a given one as it could be a + // use of the argument at a call site. Value &AnchoredVal = AA.getAnchoredValue(); - if (auto *Arg = dyn_cast(&AnchoredVal)) - ArgNo = Arg->getArgNo(); + if (ArgNo == -1) + if (auto *Arg = dyn_cast(&AnchoredVal)) + ArgNo = Arg->getArgNo(); // Put the attribute in the lookup map structure and the container we use to // keep track of all attributes. From 124cae7d3fc5249a22ab309c48262413865db266 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Sat, 13 Jul 2019 19:49:39 +0000 Subject: [PATCH 047/451] Remove extra ';' to silent compiler warning. - Plus extra style formatting. llvm-svn: 366010 --- clang/lib/Sema/SemaDecl.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 73407afb49f34..1324abb04a741 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -11082,7 +11082,8 @@ bool Sema::DeduceVariableDeclarationType(VarDecl *VDecl, bool DirectInit, return VDecl->isInvalidDecl(); } -void Sema::checkNonTrivialCUnionInInitializer(const Expr *Init, SourceLocation Loc) { +void Sema::checkNonTrivialCUnionInInitializer(const Expr *Init, + SourceLocation Loc) { if (auto *CE = dyn_cast(Init)) Init = CE->getSubExpr(); @@ -11113,7 +11114,7 @@ void Sema::checkNonTrivialCUnionInInitializer(const Expr *Init, SourceLocation L if (InitType.hasNonTrivialToPrimitiveCopyCUnion()) checkNonTrivialCUnion(InitType, Loc, NTCUC_CopyInit, NTCUK_Copy); } -}; +} namespace { From 9450b0084af2cb0d4273e8accfdb3f11b947206e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 14 Jul 2019 04:13:33 +0000 Subject: [PATCH 048/451] [X86] Remove offset of 8 from the call to FuseInst for UNPCKLPDrr folding added in r365287. This was copy/pasted from above and I forgot to change it. We just need the default offset of 0 here. Fixes PR42616. llvm-svn: 366011 --- llvm/lib/Target/X86/X86InstrInfo.cpp | 2 +- llvm/test/CodeGen/X86/pr42616.ll | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/X86/pr42616.ll diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 38b66d8c8af93..e5d3a09c291b9 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -4641,7 +4641,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom( unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8; if ((Size == 0 || Size >= 16) && RCSize >= 16 && Align < 16) { MachineInstr *NewMI = - FuseInst(MF, X86::MOVHPDrm, OpNum, MOs, InsertPt, MI, *this, 8); + FuseInst(MF, X86::MOVHPDrm, OpNum, MOs, InsertPt, MI, *this); return NewMI; } } diff --git a/llvm/test/CodeGen/X86/pr42616.ll b/llvm/test/CodeGen/X86/pr42616.ll new file mode 100644 index 0000000000000..961d99b207d62 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr42616.ll @@ -0,0 +1,11 @@ +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=sse2 | FileCheck %s + +define <2 x double> @pr42616(<2 x double> %a0, <2 x double> %a1, <2 x double>* %p) { + ;CHECK-LABEL: pr42616 + ;CHECK: movhpd (%esp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = load <2 x double>, <2 x double>* %p, align 1 + %3 = shufflevector <2 x double> %a1, <2 x double> %2, <2 x i32> + %4 = fadd <2 x double> %a0, %3 + ret <2 x double> %4 +} From c7f9559d659f5c889cd19921b57f0201109e494d Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 14 Jul 2019 06:46:46 +0000 Subject: [PATCH 049/451] [Driver] Simplify -lgcc & -lgcc_s gcc defaults to -shared-libgcc in C++ mode. Letting getLibGccType() return SharedLibGcc simplifies the logic. llvm-svn: 366012 --- clang/lib/Driver/ToolChains/CommonArgs.cpp | 30 ++++++++++------------ 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index d0c9d7d396272..b6a0afd7e5ecc 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -1135,16 +1135,12 @@ bool tools::isObjCAutoRefCount(const ArgList &Args) { enum class LibGccType { UnspecifiedLibGcc, StaticLibGcc, SharedLibGcc }; -static LibGccType getLibGccType(const ArgList &Args) { - bool Static = Args.hasArg(options::OPT_static_libgcc) || - Args.hasArg(options::OPT_static) || - Args.hasArg(options::OPT_static_pie); - - bool Shared = Args.hasArg(options::OPT_shared_libgcc); - if (Shared) - return LibGccType::SharedLibGcc; - if (Static) +static LibGccType getLibGccType(const Driver &D, const ArgList &Args) { + if (Args.hasArg(options::OPT_static_libgcc) || + Args.hasArg(options::OPT_static) || Args.hasArg(options::OPT_static_pie)) return LibGccType::StaticLibGcc; + if (Args.hasArg(options::OPT_shared_libgcc) || D.CCCIsCXX()) + return LibGccType::SharedLibGcc; return LibGccType::UnspecifiedLibGcc; } @@ -1170,8 +1166,8 @@ static void AddUnwindLibrary(const ToolChain &TC, const Driver &D, UNW == ToolChain::UNW_None) return; - LibGccType LGT = getLibGccType(Args); - bool AsNeeded = D.CCCIsCC() && LGT == LibGccType::UnspecifiedLibGcc && + LibGccType LGT = getLibGccType(D, Args); + bool AsNeeded = LGT == LibGccType::UnspecifiedLibGcc && !TC.getTriple().isAndroid() && !TC.getTriple().isOSCygMing(); if (AsNeeded) CmdArgs.push_back("--as-needed"); @@ -1180,11 +1176,11 @@ static void AddUnwindLibrary(const ToolChain &TC, const Driver &D, case ToolChain::UNW_None: return; case ToolChain::UNW_Libgcc: { - LibGccType LGT = getLibGccType(Args); - if (LGT == LibGccType::UnspecifiedLibGcc || LGT == LibGccType::SharedLibGcc) - CmdArgs.push_back("-lgcc_s"); - else if (LGT == LibGccType::StaticLibGcc) + LibGccType LGT = getLibGccType(D, Args); + if (LGT == LibGccType::StaticLibGcc) CmdArgs.push_back("-lgcc_eh"); + else + CmdArgs.push_back("-lgcc_s"); break; } case ToolChain::UNW_CompilerRT: @@ -1200,7 +1196,7 @@ static void AddLibgcc(const ToolChain &TC, const Driver &D, ArgStringList &CmdArgs, const ArgList &Args) { bool isAndroid = TC.getTriple().isAndroid(); - LibGccType LGT = getLibGccType(Args); + LibGccType LGT = getLibGccType(D, Args); bool LibGccFirst = (D.CCCIsCC() && LGT == LibGccType::UnspecifiedLibGcc) || LGT == LibGccType::StaticLibGcc; if (LibGccFirst) @@ -1216,7 +1212,7 @@ static void AddLibgcc(const ToolChain &TC, const Driver &D, // // NOTE: This fixes a link error on Android MIPS as well. The non-static // libgcc for MIPS relies on _Unwind_Find_FDE and dl_iterate_phdr from libdl. - if (isAndroid && getLibGccType(Args) != LibGccType::StaticLibGcc) + if (isAndroid && getLibGccType(D, Args) != LibGccType::StaticLibGcc) CmdArgs.push_back("-ldl"); } From e9dc9c2bebb1a7503bf71befe7d8c4daae964c1e Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 14 Jul 2019 07:16:13 +0000 Subject: [PATCH 050/451] [Driver] Simplify AddLibgcc llvm-svn: 366013 --- clang/lib/Driver/ToolChains/CommonArgs.cpp | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index b6a0afd7e5ecc..99691cb43dc42 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -1194,17 +1194,11 @@ static void AddUnwindLibrary(const ToolChain &TC, const Driver &D, static void AddLibgcc(const ToolChain &TC, const Driver &D, ArgStringList &CmdArgs, const ArgList &Args) { - bool isAndroid = TC.getTriple().isAndroid(); - LibGccType LGT = getLibGccType(D, Args); - bool LibGccFirst = (D.CCCIsCC() && LGT == LibGccType::UnspecifiedLibGcc) || - LGT == LibGccType::StaticLibGcc; - if (LibGccFirst) + if (LGT != LibGccType::SharedLibGcc) CmdArgs.push_back("-lgcc"); - AddUnwindLibrary(TC, D, CmdArgs, Args); - - if (!LibGccFirst) + if (LGT == LibGccType::SharedLibGcc) CmdArgs.push_back("-lgcc"); // According to Android ABI, we have to link with libdl if we are @@ -1212,7 +1206,7 @@ static void AddLibgcc(const ToolChain &TC, const Driver &D, // // NOTE: This fixes a link error on Android MIPS as well. The non-static // libgcc for MIPS relies on _Unwind_Find_FDE and dl_iterate_phdr from libdl. - if (isAndroid && getLibGccType(D, Args) != LibGccType::StaticLibGcc) + if (TC.getTriple().isAndroid() && LGT != LibGccType::StaticLibGcc) CmdArgs.push_back("-ldl"); } From 44a4bf95ddf16cfc9cad36a5fd1936fd67a9b06a Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 14 Jul 2019 11:10:04 +0000 Subject: [PATCH 051/451] [MachineOutliner] Add missing initializers for OutlinedFunction. NFCI. Appeases MSVC/cppcheck. llvm-svn: 366014 --- llvm/include/llvm/CodeGen/MachineOutliner.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineOutliner.h b/llvm/include/llvm/CodeGen/MachineOutliner.h index 377df4e18a2a0..3868fa4155799 100644 --- a/llvm/include/llvm/CodeGen/MachineOutliner.h +++ b/llvm/include/llvm/CodeGen/MachineOutliner.h @@ -171,13 +171,13 @@ struct OutlinedFunction { /// Represents the size of a sequence in bytes. (Some instructions vary /// widely in size, so just counting the instructions isn't very useful.) - unsigned SequenceSize; + unsigned SequenceSize = 0; /// Target-defined overhead of constructing a frame for this function. - unsigned FrameOverhead; + unsigned FrameOverhead = 0; /// Target-defined identifier for constructing a frame for this function. - unsigned FrameConstructionID; + unsigned FrameConstructionID = 0; /// Return the number of candidates for this \p OutlinedFunction. unsigned getOccurrenceCount() const { return Candidates.size(); } From a3f9240bfbbf5d6f2fb1bc329e00f2c20328d3d9 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 14 Jul 2019 11:41:52 +0000 Subject: [PATCH 052/451] SlotIndexes - add missing initializer. NFCI. llvm-svn: 366015 --- llvm/include/llvm/CodeGen/SlotIndexes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/CodeGen/SlotIndexes.h b/llvm/include/llvm/CodeGen/SlotIndexes.h index 10ab4cca8319a..2b32a4d30dff2 100644 --- a/llvm/include/llvm/CodeGen/SlotIndexes.h +++ b/llvm/include/llvm/CodeGen/SlotIndexes.h @@ -347,7 +347,7 @@ class raw_ostream; public: static char ID; - SlotIndexes() : MachineFunctionPass(ID) { + SlotIndexes() : MachineFunctionPass(ID), mf(nullptr) { initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); } From f66f5ff38ab25043aed6e379b27a298196e764b9 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 14 Jul 2019 11:47:36 +0000 Subject: [PATCH 053/451] VirtRegMap - add missing initializers. NFCI. llvm-svn: 366016 --- llvm/include/llvm/CodeGen/VirtRegMap.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/CodeGen/VirtRegMap.h b/llvm/include/llvm/CodeGen/VirtRegMap.h index 7a64d674ecac0..70eb048f05ebe 100644 --- a/llvm/include/llvm/CodeGen/VirtRegMap.h +++ b/llvm/include/llvm/CodeGen/VirtRegMap.h @@ -67,8 +67,10 @@ class TargetInstrInfo; public: static char ID; - VirtRegMap() : MachineFunctionPass(ID), Virt2PhysMap(NO_PHYS_REG), - Virt2StackSlotMap(NO_STACK_SLOT), Virt2SplitMap(0) {} + VirtRegMap() + : MachineFunctionPass(ID), MRI(nullptr), TII(nullptr), TRI(nullptr), + MF(nullptr), Virt2PhysMap(NO_PHYS_REG), + Virt2StackSlotMap(NO_STACK_SLOT), Virt2SplitMap(0) {} VirtRegMap(const VirtRegMap &) = delete; VirtRegMap &operator=(const VirtRegMap &) = delete; From 864474c9c72a647e1d9bc7546df86103ce043f4f Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 14 Jul 2019 12:35:50 +0000 Subject: [PATCH 054/451] [BitcodeReader] Use tighter upper bound to validate forward references. At the moment, bitcode files with invalid forward reference can easily cause the bitcode reader to run out of memory, by creating a forward reference with a very high index. We can use the size of the bitcode file as an upper bound, because a valid bitcode file can never contain more records. This should be sufficient to fail early in most cases. The only exception is large files with invalid forward references close to the file size. There are a couple of clusterfuzz runs that fail with out-of-memory because of very high forward references and they should be fixed by this patch. A concrete example for this is D64507, which causes out-of-memory on systems with low memory, like the hexagon upstream bots. Reviewers: t.p.northover, thegameg, jfb, efriedma, hfinkel Reviewed By: jfb Differential Revision: https://reviews.llvm.org/D64577 llvm-svn: 366017 --- llvm/include/llvm/Bitstream/BitstreamReader.h | 8 ++++++-- llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 2 +- llvm/lib/Bitcode/Reader/MetadataLoader.cpp | 20 +++++++++++++++---- llvm/lib/Bitcode/Reader/ValueList.cpp | 8 ++++++-- llvm/lib/Bitcode/Reader/ValueList.h | 9 ++++++++- llvm/test/Bitcode/pr18704.ll | 2 +- 6 files changed, 38 insertions(+), 11 deletions(-) diff --git a/llvm/include/llvm/Bitstream/BitstreamReader.h b/llvm/include/llvm/Bitstream/BitstreamReader.h index ccb4a492b9d56..ee82e7ec1ba23 100644 --- a/llvm/include/llvm/Bitstream/BitstreamReader.h +++ b/llvm/include/llvm/Bitstream/BitstreamReader.h @@ -294,6 +294,9 @@ class SimpleBitstreamCursor { BitsInCurWord = 0; } + /// Return the size of the stream in bytes. + size_t SizeInBytes() const { return BitcodeBytes.size(); } + /// Skip to the end of the file. void skipToEnd() { NextChar = BitcodeBytes.size(); } }; @@ -364,17 +367,18 @@ class BitstreamCursor : SimpleBitstreamCursor { explicit BitstreamCursor(MemoryBufferRef BitcodeBytes) : SimpleBitstreamCursor(BitcodeBytes) {} - using SimpleBitstreamCursor::canSkipToPos; using SimpleBitstreamCursor::AtEndOfStream; + using SimpleBitstreamCursor::canSkipToPos; + using SimpleBitstreamCursor::fillCurWord; using SimpleBitstreamCursor::getBitcodeBytes; using SimpleBitstreamCursor::GetCurrentBitNo; using SimpleBitstreamCursor::getCurrentByteNo; using SimpleBitstreamCursor::getPointerToByte; using SimpleBitstreamCursor::JumpToBit; - using SimpleBitstreamCursor::fillCurWord; using SimpleBitstreamCursor::Read; using SimpleBitstreamCursor::ReadVBR; using SimpleBitstreamCursor::ReadVBR64; + using SimpleBitstreamCursor::SizeInBytes; /// Return the number of bits used to encode an abbrev #. unsigned getAbbrevIDWidth() const { return CurCodeSize; } diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 09bd0f4ec71cd..d07edefcffacc 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -858,7 +858,7 @@ BitcodeReader::BitcodeReader(BitstreamCursor Stream, StringRef Strtab, StringRef ProducerIdentification, LLVMContext &Context) : BitcodeReaderBase(std::move(Stream), Strtab), Context(Context), - ValueList(Context) { + ValueList(Context, Stream.SizeInBytes()) { this->ProducerIdentification = ProducerIdentification; } diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp index 24620ed10d747..108f71189585f 100644 --- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp +++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp @@ -130,8 +130,15 @@ class BitcodeReaderMetadataList { LLVMContext &Context; + /// Maximum number of valid references. Forward references exceeding the + /// maximum must be invalid. + unsigned RefsUpperBound; + public: - BitcodeReaderMetadataList(LLVMContext &C) : Context(C) {} + BitcodeReaderMetadataList(LLVMContext &C, size_t RefsUpperBound) + : Context(C), + RefsUpperBound(std::min((size_t)std::numeric_limits::max(), + RefsUpperBound)) {} // vector compatibility methods unsigned size() const { return MetadataPtrs.size(); } @@ -218,6 +225,10 @@ void BitcodeReaderMetadataList::assignValue(Metadata *MD, unsigned Idx) { } Metadata *BitcodeReaderMetadataList::getMetadataFwdRef(unsigned Idx) { + // Bail out for a clearly invalid value. + if (Idx >= RefsUpperBound) + return nullptr; + if (Idx >= size()) resize(Idx + 1); @@ -625,9 +636,10 @@ class MetadataLoader::MetadataLoaderImpl { BitcodeReaderValueList &ValueList, std::function getTypeByID, bool IsImporting) - : MetadataList(TheModule.getContext()), ValueList(ValueList), - Stream(Stream), Context(TheModule.getContext()), TheModule(TheModule), - getTypeByID(std::move(getTypeByID)), IsImporting(IsImporting) {} + : MetadataList(TheModule.getContext(), Stream.SizeInBytes()), + ValueList(ValueList), Stream(Stream), Context(TheModule.getContext()), + TheModule(TheModule), getTypeByID(std::move(getTypeByID)), + IsImporting(IsImporting) {} Error parseMetadata(bool ModuleLevel); diff --git a/llvm/lib/Bitcode/Reader/ValueList.cpp b/llvm/lib/Bitcode/Reader/ValueList.cpp index da2d24d103b20..431995fd40ac7 100644 --- a/llvm/lib/Bitcode/Reader/ValueList.cpp +++ b/llvm/lib/Bitcode/Reader/ValueList.cpp @@ -97,6 +97,10 @@ void BitcodeReaderValueList::assignValue(Value *V, unsigned Idx, Type *FullTy) { } Constant *BitcodeReaderValueList::getConstantFwdRef(unsigned Idx, Type *Ty) { + // Bail out for a clearly invalid value. + if (Idx >= RefsUpperBound) + return nullptr; + if (Idx >= size()) resize(Idx + 1); @@ -114,8 +118,8 @@ Constant *BitcodeReaderValueList::getConstantFwdRef(unsigned Idx, Type *Ty) { Value *BitcodeReaderValueList::getValueFwdRef(unsigned Idx, Type *Ty, Type **FullTy) { - // Bail out for a clearly invalid value. This would make us call resize(0) - if (Idx == std::numeric_limits::max()) + // Bail out for a clearly invalid value. + if (Idx >= RefsUpperBound) return nullptr; if (Idx >= size()) diff --git a/llvm/lib/Bitcode/Reader/ValueList.h b/llvm/lib/Bitcode/Reader/ValueList.h index 1c54911650fe8..49900498c2944 100644 --- a/llvm/lib/Bitcode/Reader/ValueList.h +++ b/llvm/lib/Bitcode/Reader/ValueList.h @@ -46,8 +46,15 @@ class BitcodeReaderValueList { ResolveConstantsTy ResolveConstants; LLVMContext &Context; + /// Maximum number of valid references. Forward references exceeding the + /// maximum must be invalid. + unsigned RefsUpperBound; + public: - BitcodeReaderValueList(LLVMContext &C) : Context(C) {} + BitcodeReaderValueList(LLVMContext &C, size_t RefsUpperBound) + : Context(C), + RefsUpperBound(std::min((size_t)std::numeric_limits::max(), + RefsUpperBound)) {} ~BitcodeReaderValueList() { assert(ResolveConstants.empty() && "Constants not resolved?"); diff --git a/llvm/test/Bitcode/pr18704.ll b/llvm/test/Bitcode/pr18704.ll index e57ce3cec4c05..1f1abfaab9478 100644 --- a/llvm/test/Bitcode/pr18704.ll +++ b/llvm/test/Bitcode/pr18704.ll @@ -1,6 +1,6 @@ ; RUN: not llvm-dis < %s.bc 2>&1 | FileCheck %s -; CHECK: llvm-dis{{(\.EXE|\.exe)?}}: error: Never resolved value found in function +; CHECK: llvm-dis{{(\.EXE|\.exe)?}}: error: Invalid record ; pr18704.ll.bc has an instruction referring to invalid type. ; The test checks that LLVM reports the error and doesn't access freed memory From 19d3fdb08b722a4a66b21b3e08d2008c95f968e8 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 14 Jul 2019 14:06:25 +0000 Subject: [PATCH 055/451] Recommit "[BitcodeReader] Validate OpNum, before accessing Record array." This recommits r365750 (git commit 8b222ecf2769ee133691f208f6166ce118c4a164) Original message: Currently invalid bitcode files can cause a crash, when OpNum exceeds the number of elements in Record, like in the attached bitcode file. The test case was generated by clusterfuzz: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=15698 Reviewers: t.p.northover, thegameg, jfb Reviewed By: jfb Differential Revision: https://reviews.llvm.org/D64507 llvm-svn: 365750jkkkk llvm-svn: 366018 --- llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 4 ++++ llvm/test/Bitcode/Inputs/invalid-fcmp-opnum.bc | Bin 0 -> 908 bytes llvm/test/Bitcode/invalid.test | 5 +++++ 3 files changed, 9 insertions(+) create mode 100644 llvm/test/Bitcode/Inputs/invalid-fcmp-opnum.bc diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index d07edefcffacc..6cad3b94e5e74 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -4171,6 +4171,10 @@ Error BitcodeReader::parseFunctionBody(Function *F) { popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS)) return error("Invalid record"); + if (OpNum >= Record.size()) + return error( + "Invalid record: operand number exceeded available operands"); + unsigned PredVal = Record[OpNum]; bool IsFP = LHS->getType()->isFPOrFPVectorTy(); FastMathFlags FMF; diff --git a/llvm/test/Bitcode/Inputs/invalid-fcmp-opnum.bc b/llvm/test/Bitcode/Inputs/invalid-fcmp-opnum.bc new file mode 100644 index 0000000000000000000000000000000000000000..454a14b8611ac37baabb23893b20fa275fe4b2fd GIT binary patch literal 908 zcmXX_O=uHO6rb%T*|m+6)do#;ciHU+H;rXfu$ zNYY)I=s(ITJ>=lQfd7NH96WdsX_8t)34S9zlwxQMBDDlRz=NQ(tqKi)yw6*^BpS`DUG1eOPi*^8B;F#-;V_4OThI9K+F5ji=Lrh;U=m$5mnZQgYclkDO zw&*#W0(OaZcR}eBXmj#cj3ptO+y#KJg9J(J^U)c0zQM+wL@1~D^=dwnp{CN*L}A?C z0*)^X{GNe6dxWo)xfew?E?GCiwB2fjhk zV9X%ZKMg$s^UcQ2r3SY`@a7dibm6y%ZC7W) zgZZMS&!`0@gxfADfmhX;7Q0Z~`YCK)XG`QZ>j1AA0Bksiss|rv@;yur8M?0pF;Pa9 z!2Xy301g4&mdGB{l30@iQ9|!OJYbF|Y?a?w91AMx&6P`pXaQ39ut0irk}&g6Gb%;^ w#)ZvXFJyXjP&St~&1 RUN: FileCheck --check-prefix=NONPOINTER-ATOMICRMW %s NONPOINTER-ATOMICRMW: Invalid record + +RUN: not llvm-dis -disable-output %p/Inputs/invalid-fcmp-opnum.bc 2>&1 | \ +RUN: FileCheck --check-prefix=INVALID-FCMP-OPNUM %s + +INVALID-FCMP-OPNUM: Invalid record: operand number exceeded available operands From 03d5e28fe9438c327b49c67d3022a31ff2b53dc9 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sun, 14 Jul 2019 14:08:39 +0000 Subject: [PATCH 056/451] [x86] add test for sub-with-flags opportunity (PR40483); NFC llvm-svn: 366019 --- llvm/test/CodeGen/X86/combine-sbb.ll | 47 +++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/X86/combine-sbb.ll b/llvm/test/CodeGen/X86/combine-sbb.ll index 9e68ab4beb16b..bba72c5660943 100644 --- a/llvm/test/CodeGen/X86/combine-sbb.ll +++ b/llvm/test/CodeGen/X86/combine-sbb.ll @@ -280,7 +280,7 @@ define i32 @PR40483_sub4(i32*, i32) nounwind { ; Verify that a bogus cmov is simplified. -define i32 @PR40483_sub5(i32*, i32) { +define i32 @PR40483_sub5(i32*, i32) nounwind { ; X86-LABEL: PR40483_sub5: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -306,4 +306,49 @@ define i32 @PR40483_sub5(i32*, i32) { ret i32 %10 } +define i32 @PR40483_sub6(i32*, i32) nounwind { +; X86-LABEL: PR40483_sub6: +; X86: # %bb.0: +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl (%edx), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %esi, %ecx +; X86-NEXT: subl %edi, %ecx +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: subl %edi, %esi +; X86-NEXT: movl %esi, (%edx) +; X86-NEXT: jae .LBB8_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: addl %ecx, %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: .LBB8_2: +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl +; +; X64-LABEL: PR40483_sub6: +; X64: # %bb.0: +; X64-NEXT: movl (%rdi), %ecx +; X64-NEXT: movl %ecx, %edx +; X64-NEXT: subl %esi, %edx +; X64-NEXT: addl %edx, %edx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: subl %esi, %ecx +; X64-NEXT: movl %ecx, (%rdi) +; X64-NEXT: cmovbl %edx, %eax +; X64-NEXT: retq + %3 = load i32, i32* %0, align 8 + %4 = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 %3, i32 %1) + %5 = extractvalue { i8, i32 } %4, 1 + store i32 %5, i32* %0, align 8 + %6 = extractvalue { i8, i32 } %4, 0 + %7 = icmp eq i8 %6, 0 + %8 = sub i32 %3, %1 + %9 = add i32 %8, %5 + %10 = select i1 %7, i32 0, i32 %9 + ret i32 %10 +} + declare { i8, i32 } @llvm.x86.subborrow.32(i8, i32, i32) From 34c067331e465bb992693de7c6c4f9ee46f6de49 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 14 Jul 2019 15:05:05 +0000 Subject: [PATCH 057/451] [Hashing] hash_1to3_bytes - avoid trunc(v + zext(x)) NFCI. MSVC complains about the extension to uint64_t for an addition followed by truncation back to uint32_t - add an explicit uint32_t cast to avoid this. llvm-svn: 366020 --- llvm/include/llvm/ADT/Hashing.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/ADT/Hashing.h b/llvm/include/llvm/ADT/Hashing.h index f639aa2054c4f..008188bfa2109 100644 --- a/llvm/include/llvm/ADT/Hashing.h +++ b/llvm/include/llvm/ADT/Hashing.h @@ -191,7 +191,7 @@ inline uint64_t hash_1to3_bytes(const char *s, size_t len, uint64_t seed) { uint8_t b = s[len >> 1]; uint8_t c = s[len - 1]; uint32_t y = static_cast(a) + (static_cast(b) << 8); - uint32_t z = len + (static_cast(c) << 2); + uint32_t z = static_cast(len) + (static_cast(c) << 2); return shift_mix(y * k2 ^ z * k3 ^ seed) * k2; } From 57190b3974fbc07cdb5b61aeaf023de94c2afe7e Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sun, 14 Jul 2019 15:55:32 +0000 Subject: [PATCH 058/451] [InstCombine] Add assume context test; NFC Baseline test for D37215. llvm-svn: 366021 --- .../InstCombine/assume_inevitable.ll | 74 +++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/assume_inevitable.ll diff --git a/llvm/test/Transforms/InstCombine/assume_inevitable.ll b/llvm/test/Transforms/InstCombine/assume_inevitable.ll new file mode 100644 index 0000000000000..a063775b05729 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/assume_inevitable.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +; Check that assume is propagated backwards through all +; operations that are `isGuaranteedToTransferExecutionToSuccessor` +; (it should reach the load and mark it as `align 32`). +define i32 @assume_inevitable(i32* %a, i32* %b, i8* %c) { +; CHECK-LABEL: @assume_inevitable( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[M:%.*]] = alloca i64, align 8 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4 +; CHECK-NEXT: [[LOADRES:%.*]] = load i32, i32* [[B:%.*]], align 4 +; CHECK-NEXT: [[LOADRES2:%.*]] = call i32 @llvm.annotation.i32(i32 [[LOADRES]], i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str1, i64 0, i64 0), i32 2) +; CHECK-NEXT: store i32 [[LOADRES2]], i32* [[A]], align 4 +; CHECK-NEXT: [[DUMMY_EQ:%.*]] = icmp ugt i32 [[LOADRES]], 42 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[DUMMY_EQ]]) +; CHECK-NEXT: [[M_I8:%.*]] = bitcast i64* [[M]] to i8* +; CHECK-NEXT: [[M_A:%.*]] = call i8* @llvm.ptr.annotation.p0i8(i8* nonnull [[M_I8]], i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str1, i64 0, i64 0), i32 2) +; CHECK-NEXT: [[M_X:%.*]] = bitcast i8* [[M_A]] to i64* +; CHECK-NEXT: [[OBJSZ:%.*]] = call i64 @llvm.objectsize.i64.p0i8(i8* [[C:%.*]], i1 false, i1 false, i1 false) +; CHECK-NEXT: store i64 [[OBJSZ]], i64* [[M_X]], align 4 +; CHECK-NEXT: [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64 +; CHECK-NEXT: [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31 +; CHECK-NEXT: [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]]) +; CHECK-NEXT: ret i32 [[TMP0]] +; +entry: + %dummy = alloca i8, align 4 + %m = alloca i64 + %0 = load i32, i32* %a, align 4 + + ; START perform a bunch of inevitable operations + %loadres = load i32, i32* %b + %loadres2 = call i32 @llvm.annotation.i32(i32 %loadres, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str1, i32 0, i32 0), i32 2) + store i32 %loadres2, i32* %a + + %dummy_eq = icmp ugt i32 %loadres, 42 + tail call void @llvm.assume(i1 %dummy_eq) + + call void @llvm.lifetime.start.p0i8(i64 1, i8* %dummy) + %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %dummy) + call void @llvm.invariant.end.p0i8({}* %i, i64 1, i8* %dummy) + call void @llvm.lifetime.end.p0i8(i64 1, i8* %dummy) + + %m_i8 = bitcast i64* %m to i8* + %m_a = call i8* @llvm.ptr.annotation.p0i8(i8* %m_i8, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str1, i32 0, i32 0), i32 2) + %m_x = bitcast i8* %m_a to i64* + %objsz = call i64 @llvm.objectsize.i64.p0i8(i8* %c, i1 false) + store i64 %objsz, i64* %m_x + ; END perform a bunch of inevitable operations + + ; AND here's the assume: + %ptrint = ptrtoint i32* %a to i64 + %maskedptr = and i64 %ptrint, 31 + %maskcond = icmp eq i64 %maskedptr, 0 + tail call void @llvm.assume(i1 %maskcond) + + ret i32 %0 +} + +@.str = private unnamed_addr constant [4 x i8] c"sth\00", section "llvm.metadata" +@.str1 = private unnamed_addr constant [4 x i8] c"t.c\00", section "llvm.metadata" + +declare i64 @llvm.objectsize.i64.p0i8(i8*, i1) +declare i32 @llvm.annotation.i32(i32, i8*, i8*, i32) +declare i8* @llvm.ptr.annotation.p0i8(i8*, i8*, i8*, i32) + +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) + +declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) +declare void @llvm.invariant.end.p0i8({}*, i64, i8* nocapture) +declare void @llvm.assume(i1) From 194b337f3327691ee88007061c71225c934b3af0 Mon Sep 17 00:00:00 2001 From: Eric Fiselier Date: Sun, 14 Jul 2019 18:21:15 +0000 Subject: [PATCH 059/451] Avoid eager template instantiation caused by the variant narrowing checks. The standard disallows narrowing conversions when constructing a variant. This is checked by attempting to perform braced initialization of the destination type from the argument type. However, braced initialization can force the compiler (mostly clang) to eagerly instantiate the constructors of the destintation type -- which can lead to errors in a non-immediate context. However, as variant is currently specified, the narrowing checks only observably apply when the destination type is arithmetic. Meaning we can skip the check for class types. Hense avoiding the hard errors. In order to cause fewer build breakages, this patch avoids the narrowing check except when the destination type is arithmetic. llvm-svn: 366022 --- libcxx/include/variant | 36 ++++++++++++++----- .../variant.variant/variant.ctor/T.pass.cpp | 14 +++++++- 2 files changed, 41 insertions(+), 9 deletions(-) diff --git a/libcxx/include/variant b/libcxx/include/variant index 420e8c2611f67..21871ae3c1231 100644 --- a/libcxx/include/variant +++ b/libcxx/include/variant @@ -1095,19 +1095,39 @@ struct __overload; template <> struct __overload<> { void operator()() const; }; + + +struct __no_narrowing_check { + template + using _Apply = __identity<_Dest>; +}; + +struct __narrowing_check { + template + static auto __test_impl(_Dest (&&)[1]) -> __identity<_Dest>; + template + using _Apply = decltype(__test_impl<_Dest>({std::declval<_Source>()})); +}; + +template +using __check_for_narrowing = typename _If< +#ifdef _LIBCPP_ENABLE_NARROWING_CONVERSIONS_IN_VARIANT + false && +#endif + is_arithmetic<_Dest>::value, + __narrowing_check, + __no_narrowing_check + >::template _Apply<_Dest, _Source>; + + template struct __overload<_Tp, _Types...> : __overload<_Types...> { using __overload<_Types...>::operator(); - static auto __test(_Tp (&&)[1]) -> __identity<_Tp>; - template - auto operator()(_Tp, _Up&& __t) const -#ifndef _LIBCPP_ENABLE_NARROWING_CONVERSIONS_IN_VARIANT - -> decltype(__test({ _VSTD::forward<_Up>(__t) })); -#else - -> __identity<_Tp>; -#endif + auto operator()(_Tp, _Up&&) const -> + + __check_for_narrowing<_Tp, _Up>; }; template diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp index 55f8d11c1159b..ef07482858133 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp @@ -177,10 +177,22 @@ void test_T_ctor_basic() { #endif } +struct BoomOnInt { + template + constexpr BoomOnInt(T) { static_assert(!std::is_same::value, ""); } +}; + +void test_no_narrowing_check_for_class_types() { + using V = std::variant; + V v(42); + assert(v.index() == 0); + assert(std::get<0>(v) == 42); +} + int main(int, char**) { test_T_ctor_basic(); test_T_ctor_noexcept(); test_T_ctor_sfinae(); - + test_no_narrowing_check_for_class_types(); return 0; } From 8eb86a15c5cfdcf5052e507916c65b52d4ca086e Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Sun, 14 Jul 2019 18:24:19 +0000 Subject: [PATCH 060/451] [GitSVN][NFC] Mark dry-run commits as such in the log output Summary: This helps to avoid worries about the "dry run flag" while testing. Reviewers: jyknight, rnk, mehdi_amini Subscribers: bollu, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64697 llvm-svn: 366023 --- llvm/utils/git-svn/git-llvm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/utils/git-svn/git-llvm b/llvm/utils/git-svn/git-llvm index 399e55247b553..289898d15b5fb 100755 --- a/llvm/utils/git-svn/git-llvm +++ b/llvm/utils/git-svn/git-llvm @@ -420,8 +420,8 @@ def cmd_push(args): rev_range = args.rev_range dry_run = args.dry_run revs = get_revs_to_push(rev_range) - log('Pushing %d %s commit%s:\n%s' % - (len(revs), + log('%sPushing %d %s commit%s:\n%s' % + ('[DryRun] ' if dry_run else '', len(revs), 'split-repo (%s)' % split_repo_path if split_repo_path else 'monorepo', 's' if len(revs) != 1 else '', From 8f1d7d1c55c76d315fb766128b8174389dbd46f5 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sun, 14 Jul 2019 18:25:09 +0000 Subject: [PATCH 061/451] consistency in the release notes llvm-svn: 366024 --- clang/docs/ReleaseNotes.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index f0a35050dde08..f89447fc96a3c 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -184,8 +184,8 @@ AST Matchers clang-format ------------ -- Add language support for clang-formatting C# files -- Add Microsoft coding style to encapsulate default C# formatting style +- Add language support for clang-formatting C# files. +- Add Microsoft coding style to encapsulate default C# formatting style. - Added new option `PPDIS_BeforeHash` (in configuration: `BeforeHash`) to `IndentPPDirectives` which indents preprocessor directives before the hash. - Added new option `AlignConsecutiveMacros` to align the C/C++ preprocessor From 273857d1ea9a39ab976b6dda446b8564e88d068a Mon Sep 17 00:00:00 2001 From: Eric Fiselier Date: Sun, 14 Jul 2019 18:30:34 +0000 Subject: [PATCH 062/451] Harden variant test added in r366022 The test was brittle since it only went boom for one specific type, when really it should go boom for all of them. llvm-svn: 366025 --- .../variant/variant.variant/variant.ctor/T.pass.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp index ef07482858133..d05e800b302ea 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp @@ -177,13 +177,13 @@ void test_T_ctor_basic() { #endif } -struct BoomOnInt { +struct BoomOnAnything { template - constexpr BoomOnInt(T) { static_assert(!std::is_same::value, ""); } + constexpr BoomOnAnything(T) { static_assert(!std::is_same::value, ""); } }; void test_no_narrowing_check_for_class_types() { - using V = std::variant; + using V = std::variant; V v(42); assert(v.index() == 0); assert(std::get<0>(v) == 42); From aae0cb67ed79bb7dddcda3438afd1263104cc689 Mon Sep 17 00:00:00 2001 From: Eric Fiselier Date: Sun, 14 Jul 2019 18:31:55 +0000 Subject: [PATCH 063/451] Cleanup whitespace in . NFC. llvm-svn: 366026 --- libcxx/include/variant | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/libcxx/include/variant b/libcxx/include/variant index 21871ae3c1231..88a625df71e1c 100644 --- a/libcxx/include/variant +++ b/libcxx/include/variant @@ -1089,14 +1089,6 @@ private: } }; -template -struct __overload; - -template <> -struct __overload<> { void operator()() const; }; - - - struct __no_narrowing_check { template using _Apply = __identity<_Dest>; @@ -1120,14 +1112,18 @@ using __check_for_narrowing = typename _If< >::template _Apply<_Dest, _Source>; +template +struct __overload; + +template <> +struct __overload<> { void operator()() const; }; + template struct __overload<_Tp, _Types...> : __overload<_Types...> { using __overload<_Types...>::operator(); template - auto operator()(_Tp, _Up&&) const -> - - __check_for_narrowing<_Tp, _Up>; + auto operator()(_Tp, _Up&&) const -> __check_for_narrowing<_Tp, _Up>; }; template From 24cacf9c56f0b55534e98941cc8675a9a7489c37 Mon Sep 17 00:00:00 2001 From: Alexandros Lamprineas Date: Sun, 14 Jul 2019 18:32:42 +0000 Subject: [PATCH 064/451] [clang][Driver][ARM] Favor -mfpu over default CPU features When processing the command line options march, mcpu and mfpu, we store the implied target features on a vector. The change D62998 introduced a temporary vector, where the processed features get accumulated. When calling DecodeARMFeaturesFromCPU, which sets the default features for the specified CPU, we certainly don't want to override the features that have been explicitly specified on the command line. Therefore, the default features should appear first in the final vector. This problem became evident once I added the missing (unhandled) target features in ARM::getExtensionFeatures. Differential Revision: https://reviews.llvm.org/D63936 llvm-svn: 366027 --- clang/lib/Driver/ToolChains/Arch/ARM.cpp | 6 +++- clang/test/CodeGen/arm-target-features.c | 2 +- llvm/include/llvm/Support/ARMTargetParser.def | 1 + llvm/lib/Support/ARMTargetParser.cpp | 30 ++++--------------- llvm/unittests/Support/TargetParserTest.cpp | 15 +++++----- 5 files changed, 21 insertions(+), 33 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.cpp b/clang/lib/Driver/ToolChains/Arch/ARM.cpp index 2e7562c6ee272..d1db583e52802 100644 --- a/clang/lib/Driver/ToolChains/Arch/ARM.cpp +++ b/clang/lib/Driver/ToolChains/Arch/ARM.cpp @@ -376,7 +376,11 @@ void arm::getARMTargetFeatures(const ToolChain &TC, Features.push_back( Args.MakeArgString((F.second ? "+" : "-") + F.first())); } else if (!CPUName.empty()) { - DecodeARMFeaturesFromCPU(D, CPUName, ExtensionFeatures); + // This sets the default features for the specified CPU. We certainly don't + // want to override the features that have been explicitly specified on the + // command line. Therefore, process them directly instead of appending them + // at the end later. + DecodeARMFeaturesFromCPU(D, CPUName, Features); } if (CPUArg) diff --git a/clang/test/CodeGen/arm-target-features.c b/clang/test/CodeGen/arm-target-features.c index ec165aeec2881..a0fbafc5d0c54 100644 --- a/clang/test/CodeGen/arm-target-features.c +++ b/clang/test/CodeGen/arm-target-features.c @@ -32,7 +32,7 @@ // RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu exynos-m4 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V82 // RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu exynos-m5 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V82 -// CHECK-BASIC-V82: "target-features"="+armv8.2-a,+crc,+crypto,+d32,+dotprod,+dsp,+fp-armv8,+fp-armv8d16,+fp-armv8d16sp,+fp-armv8sp,+fp16,+fp64,+fpregs,+hwdiv,+hwdiv-arm,+neon,+ras,+thumb-mode,+vfp2,+vfp2d16,+vfp2d16sp,+vfp2sp,+vfp3,+vfp3d16,+vfp3d16sp,+vfp3sp,+vfp4,+vfp4d16,+vfp4d16sp,+vfp4sp" +// CHECK-BASIC-V82: "target-features"="+armv8.2-a,+crc,+crypto,+d32,+dotprod,+dsp,+fp-armv8,+fp-armv8d16,+fp-armv8d16sp,+fp-armv8sp,+fp16,+fp64,+fpregs,+fullfp16,+hwdiv,+hwdiv-arm,+neon,+ras,+thumb-mode,+vfp2,+vfp2d16,+vfp2d16sp,+vfp2sp,+vfp3,+vfp3d16,+vfp3d16sp,+vfp3sp,+vfp4,+vfp4d16,+vfp4d16sp,+vfp4sp" // RUN: %clang_cc1 -triple armv8-linux-gnueabi -target-cpu cortex-a53 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V8-ARM // CHECK-BASIC-V8-ARM: "target-features"="+armv8-a,+crc,+crypto,+d32,+dsp,+fp-armv8,+fp-armv8d16,+fp-armv8d16sp,+fp-armv8sp,+fp16,+fp64,+fpregs,+hwdiv,+hwdiv-arm,+neon,+vfp2,+vfp2d16,+vfp2d16sp,+vfp2sp,+vfp3,+vfp3d16,+vfp3d16sp,+vfp3sp,+vfp4,+vfp4d16,+vfp4d16sp,+vfp4sp,-thumb-mode" diff --git a/llvm/include/llvm/Support/ARMTargetParser.def b/llvm/include/llvm/Support/ARMTargetParser.def index 593480f2f1892..6e17c8cbfd784 100644 --- a/llvm/include/llvm/Support/ARMTargetParser.def +++ b/llvm/include/llvm/Support/ARMTargetParser.def @@ -148,6 +148,7 @@ ARM_ARCH_EXT_NAME("aes", ARM::AEK_AES, "+aes", "-aes") ARM_ARCH_EXT_NAME("dotprod", ARM::AEK_DOTPROD, "+dotprod","-dotprod") ARM_ARCH_EXT_NAME("dsp", ARM::AEK_DSP, "+dsp", "-dsp") ARM_ARCH_EXT_NAME("fp", ARM::AEK_FP, nullptr, nullptr) +ARM_ARCH_EXT_NAME("fp.dp", ARM::AEK_FP_DP, nullptr, nullptr) ARM_ARCH_EXT_NAME("mve", ARM::AEK_SIMD, "+mve", "-mve") ARM_ARCH_EXT_NAME("mve.fp", (ARM::AEK_SIMD | ARM::AEK_FP), "+mve.fp", "-mve.fp") ARM_ARCH_EXT_NAME("idiv", (ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB), nullptr, nullptr) diff --git a/llvm/lib/Support/ARMTargetParser.cpp b/llvm/lib/Support/ARMTargetParser.cpp index e91b508eefd51..27d1e5527be47 100644 --- a/llvm/lib/Support/ARMTargetParser.cpp +++ b/llvm/lib/Support/ARMTargetParser.cpp @@ -409,30 +409,12 @@ bool ARM::getExtensionFeatures(unsigned Extensions, if (Extensions == AEK_INVALID) return false; - if (Extensions & AEK_CRC) - Features.push_back("+crc"); - else - Features.push_back("-crc"); - - if (Extensions & AEK_DSP) - Features.push_back("+dsp"); - else - Features.push_back("-dsp"); - - if (Extensions & AEK_FP16FML) - Features.push_back("+fp16fml"); - else - Features.push_back("-fp16fml"); - - if (Extensions & AEK_RAS) - Features.push_back("+ras"); - else - Features.push_back("-ras"); - - if (Extensions & AEK_DOTPROD) - Features.push_back("+dotprod"); - else - Features.push_back("-dotprod"); + for (const auto AE : ARCHExtNames) { + if ((Extensions & AE.ID) == AE.ID && AE.Feature) + Features.push_back(AE.Feature); + else if (AE.NegFeature) + Features.push_back(AE.NegFeature); + } return getHWDivFeatures(Extensions, Features); } diff --git a/llvm/unittests/Support/TargetParserTest.cpp b/llvm/unittests/Support/TargetParserTest.cpp index b11d9ae7f0061..34c7a8a4fd1c8 100644 --- a/llvm/unittests/Support/TargetParserTest.cpp +++ b/llvm/unittests/Support/TargetParserTest.cpp @@ -571,17 +571,18 @@ TEST(TargetParserTest, ARMFPURestriction) { TEST(TargetParserTest, ARMExtensionFeatures) { std::map> Extensions; - Extensions[ARM::AEK_CRC] = { "+crc", "-crc" }; - Extensions[ARM::AEK_DSP] = { "+dsp", "-dsp" }; + for (auto &Ext : ARM::ARCHExtNames) { + if (Ext.Feature && Ext.NegFeature) + Extensions[Ext.ID] = { StringRef(Ext.Feature), + StringRef(Ext.NegFeature) }; + } + Extensions[ARM::AEK_HWDIVARM] = { "+hwdiv-arm", "-hwdiv-arm" }; Extensions[ARM::AEK_HWDIVTHUMB] = { "+hwdiv", "-hwdiv" }; - Extensions[ARM::AEK_RAS] = { "+ras", "-ras" }; - Extensions[ARM::AEK_FP16FML] = { "+fp16fml", "-fp16fml" }; - Extensions[ARM::AEK_DOTPROD] = { "+dotprod", "-dotprod" }; std::vector Features; - EXPECT_FALSE(AArch64::getExtensionFeatures(ARM::AEK_INVALID, Features)); + EXPECT_FALSE(ARM::getExtensionFeatures(ARM::AEK_INVALID, Features)); for (auto &E : Extensions) { // test +extension @@ -598,7 +599,7 @@ TEST(TargetParserTest, ARMExtensionFeatures) { Found = std::find(std::begin(Features), std::end(Features), E.second.at(1)); EXPECT_TRUE(Found != std::end(Features)); EXPECT_TRUE(Extensions.size() == Features.size()); - } + } } TEST(TargetParserTest, ARMFPUFeatures) { From fff5dc0b173fc35ce9a75a737a980875be125566 Mon Sep 17 00:00:00 2001 From: JF Bastien Date: Sun, 14 Jul 2019 18:33:51 +0000 Subject: [PATCH 065/451] Support __seg_fs and __seg_gs on x86 Summary: GCC supports named address spaces macros: https://gcc.gnu.org/onlinedocs/gcc/Named-Address-Spaces.html clang does as well with address spaces: https://clang.llvm.org/docs/LanguageExtensions.html#memory-references-to-specified-segments Add the __seg_fs and __seg_gs macros for compatibility with GCC. Subscribers: jkorous, dexonsmith, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D64676 llvm-svn: 366028 --- clang/docs/LanguageExtensions.rst | 4 ++++ clang/lib/Basic/Targets/X86.cpp | 5 +++++ clang/test/Preprocessor/x86_seg_fs_gs.c | 7 +++++++ 3 files changed, 16 insertions(+) create mode 100644 clang/test/Preprocessor/x86_seg_fs_gs.c diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index ecbf04c3c822a..266309c6ce248 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -2465,6 +2465,10 @@ Which compiles to (on X86-32): movl %gs:(%eax), %eax ret +You can also use the GCC compatibility macros ``__seg_fs`` and ``__seg_gs`` for +the same purpose. The preprocessor symbols ``__SEG_FS`` and ``__SEG_GS`` +indicate their support. + PowerPC Language Extensions ------------------------------ diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index 76d8ab8146ffb..d618c90b05c02 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -917,6 +917,11 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, DefineStd(Builder, "i386", Opts); } + Builder.defineMacro("__SEG_GS"); + Builder.defineMacro("__SEG_FS"); + Builder.defineMacro("__seg_gs", "__attribute__((address_space(256)))"); + Builder.defineMacro("__seg_fs", "__attribute__((address_space(257)))"); + // Subtarget options. // FIXME: We are hard-coding the tune parameters based on the CPU, but they // truly should be based on -mtune options. diff --git a/clang/test/Preprocessor/x86_seg_fs_gs.c b/clang/test/Preprocessor/x86_seg_fs_gs.c new file mode 100644 index 0000000000000..b7a586c820245 --- /dev/null +++ b/clang/test/Preprocessor/x86_seg_fs_gs.c @@ -0,0 +1,7 @@ +// RUN: %clang -target i386-unknown-unknown -x c -E -dM -o - %s | FileCheck -match-full-lines %s +// RUN: %clang -target x86_64-unknown-unknown -x c -E -dM -o - %s | FileCheck -match-full-lines %s + +// CHECK: #define __SEG_FS 1 +// CHECK: #define __SEG_GS 1 +// CHECK: #define __seg_fs __attribute__((address_space(257))) +// CHECK: #define __seg_gs __attribute__((address_space(256))) From 8111807a03c7ecc340fe2d8497b422b09e111fe9 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 14 Jul 2019 19:13:09 +0000 Subject: [PATCH 066/451] Fix uninitialized variable analyzer warning. NFCI. llvm-svn: 366029 --- clang/lib/Sema/SemaDecl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 1324abb04a741..ee7950de45e73 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -12192,7 +12192,7 @@ void Sema::CheckCompleteVariableDeclaration(VarDecl *var) { // Cache the result of checking for constant initialization. Optional CacheHasConstInit; - const Expr *CacheCulprit; + const Expr *CacheCulprit = nullptr; auto checkConstInit = [&]() mutable { if (!CacheHasConstInit) CacheHasConstInit = var->getInit()->isConstantInitializer( From 9428d95ce7f84844a076fe13219db96a78e3bd44 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 14 Jul 2019 20:12:36 +0000 Subject: [PATCH 067/451] [LV] Exclude loop-invariant inputs from scalar cost computation. Loop invariant operands do not need to be scalarized, as we are using the values outside the loop. We should ignore them when computing the scalarization overhead. Fixes PR41294 Reviewers: hsaito, rengolin, dcaballe, Ayal Reviewed By: Ayal Differential Revision: https://reviews.llvm.org/D59995 llvm-svn: 366030 --- .../Transforms/Vectorize/LoopVectorize.cpp | 64 ++++++---- .../extractvalue-no-scalarization-required.ll | 109 ++++++++++++++++++ 2 files changed, 151 insertions(+), 22 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index c1bb43bc5bdb8..22cf9c7db9490 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1179,7 +1179,7 @@ class LoopVectorizationCostModel { /// VF. Return the cost of the instruction, including scalarization overhead /// if it's needed. The flag NeedToScalarize shows if the call needs to be /// scalarized - - // i.e. either vector version isn't available, or is too expensive. + /// i.e. either vector version isn't available, or is too expensive. unsigned getVectorCallCost(CallInst *CI, unsigned VF, bool &NeedToScalarize); private: @@ -1332,6 +1332,30 @@ class LoopVectorizationCostModel { DecisionList WideningDecisions; + /// Returns true if \p V is expected to be vectorized and it needs to be + /// extracted. + bool needsExtract(Value *V, unsigned VF) const { + Instruction *I = dyn_cast(V); + if (VF == 1 || !I || !TheLoop->contains(I) || TheLoop->isLoopInvariant(I)) + return false; + + // Assume we can vectorize V (and hence we need extraction) if the + // scalars are not computed yet. This can happen, because it is called + // via getScalarizationOverhead from setCostBasedWideningDecision, before + // the scalars are collected. That should be a safe assumption in most + // cases, because we check if the operands have vectorizable types + // beforehand in LoopVectorizationLegality. + return Scalars.find(VF) == Scalars.end() || + !isScalarAfterVectorization(I, VF); + }; + + /// Returns a range containing only operands needing to be extracted. + SmallVector filterExtractingOperands(Instruction::op_range Ops, + unsigned VF) { + return SmallVector(make_filter_range( + Ops, [this, VF](Value *V) { return this->needsExtract(V, VF); })); + } + public: /// The loop that we evaluate. Loop *TheLoop; @@ -3125,8 +3149,11 @@ unsigned LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI, if (auto *FPMO = dyn_cast(CI)) FMF = FPMO->getFastMathFlags(); - SmallVector Operands(CI->arg_operands()); - return TTI.getIntrinsicInstrCost(ID, CI->getType(), Operands, FMF, VF); + // Skip operands that do not require extraction/scalarization and do not incur + // any overhead. + return TTI.getIntrinsicInstrCost( + ID, CI->getType(), filterExtractingOperands(CI->arg_operands(), VF), FMF, + VF); } static Type *smallestIntegerVectorType(Type *T1, Type *T2) { @@ -5346,15 +5373,6 @@ int LoopVectorizationCostModel::computePredInstDiscount( return true; }; - // Returns true if an operand that cannot be scalarized must be extracted - // from a vector. We will account for this scalarization overhead below. Note - // that the non-void predicated instructions are placed in their own blocks, - // and their return values are inserted into vectors. Thus, an extract would - // still be required. - auto needsExtract = [&](Instruction *I) -> bool { - return TheLoop->contains(I) && !isScalarAfterVectorization(I, VF); - }; - // Compute the expected cost discount from scalarizing the entire expression // feeding the predicated instruction. We currently only consider expressions // that are single-use instruction chains. @@ -5394,7 +5412,7 @@ int LoopVectorizationCostModel::computePredInstDiscount( "Instruction has non-scalar type"); if (canBeScalarized(J)) Worklist.push_back(J); - else if (needsExtract(J)) + else if (needsExtract(J, VF)) ScalarCost += TTI.getScalarizationOverhead( ToVectorTy(J->getType(),VF), false, true); } @@ -5684,16 +5702,18 @@ unsigned LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I, if (isa(I) && !TTI.prefersVectorizedAddressing()) return Cost; - if (CallInst *CI = dyn_cast(I)) { - SmallVector Operands(CI->arg_operands()); - Cost += TTI.getOperandsScalarizationOverhead(Operands, VF); - } else if (!isa(I) || - !TTI.supportsEfficientVectorElementLoadStore()) { - SmallVector Operands(I->operand_values()); - Cost += TTI.getOperandsScalarizationOverhead(Operands, VF); - } + // Some targets support efficient element stores. + if (isa(I) && TTI.supportsEfficientVectorElementLoadStore()) + return Cost; - return Cost; + // Collect operands to consider. + CallInst *CI = dyn_cast(I); + Instruction::op_range Ops = CI ? CI->arg_operands() : I->operands(); + + // Skip operands that do not require extraction/scalarization and do not incur + // any overhead. + return Cost + TTI.getOperandsScalarizationOverhead( + filterExtractingOperands(Ops, VF), VF); } void LoopVectorizationCostModel::setCostBasedWideningDecision(unsigned VF) { diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll new file mode 100644 index 0000000000000..c3ad5b078ae0b --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll @@ -0,0 +1,109 @@ +; REQUIRES: asserts + +; RUN: opt -loop-vectorize -mtriple=arm64-apple-ios %s -S -debug -disable-output 2>&1 | FileCheck --check-prefix=CM %s +; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 %s -S | FileCheck --check-prefix=FORCED %s + +; Test case from PR41294. + +; Check scalar cost for extractvalue. The constant and loop invariant operands are free, +; leaving cost 3 for scalarizing the result + 2 for executing the op with VF 2. + +; CM: LV: Scalar loop costs: 7. +; CM: LV: Found an estimated cost of 5 for VF 2 For instruction: %a = extractvalue { i64, i64 } %sv, 0 +; CM-NEXT: LV: Found an estimated cost of 5 for VF 2 For instruction: %b = extractvalue { i64, i64 } %sv, 1 + +; Check that the extractvalue operands are actually free in vector code. + +; FORCED-LABEL: vector.body: ; preds = %vector.body, %vector.ph +; FORCED-NEXT: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] +; FORCED-NEXT: %broadcast.splatinsert = insertelement <2 x i32> undef, i32 %index, i32 0 +; FORCED-NEXT: %broadcast.splat = shufflevector <2 x i32> %broadcast.splatinsert, <2 x i32> undef, <2 x i32> zeroinitializer +; FORCED-NEXT: %induction = add <2 x i32> %broadcast.splat, +; FORCED-NEXT: %0 = add i32 %index, 0 +; FORCED-NEXT: %1 = extractvalue { i64, i64 } %sv, 0 +; FORCED-NEXT: %2 = extractvalue { i64, i64 } %sv, 0 +; FORCED-NEXT: %3 = insertelement <2 x i64> undef, i64 %1, i32 0 +; FORCED-NEXT: %4 = insertelement <2 x i64> %3, i64 %2, i32 1 +; FORCED-NEXT: %5 = extractvalue { i64, i64 } %sv, 1 +; FORCED-NEXT: %6 = extractvalue { i64, i64 } %sv, 1 +; FORCED-NEXT: %7 = insertelement <2 x i64> undef, i64 %5, i32 0 +; FORCED-NEXT: %8 = insertelement <2 x i64> %7, i64 %6, i32 1 +; FORCED-NEXT: %9 = getelementptr i64, i64* %dst, i32 %0 +; FORCED-NEXT: %10 = add <2 x i64> %4, %8 +; FORCED-NEXT: %11 = getelementptr i64, i64* %9, i32 0 +; FORCED-NEXT: %12 = bitcast i64* %11 to <2 x i64>* +; FORCED-NEXT: store <2 x i64> %10, <2 x i64>* %12, align 4 +; FORCED-NEXT: %index.next = add i32 %index, 2 +; FORCED-NEXT: %13 = icmp eq i32 %index.next, 0 +; FORCED-NEXT: br i1 %13, label %middle.block, label %vector.body, !llvm.loop !0 + +define void @test1(i64* %dst, {i64, i64} %sv) { +entry: + br label %loop.body + +loop.body: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.body ] + %a = extractvalue { i64, i64 } %sv, 0 + %b = extractvalue { i64, i64 } %sv, 1 + %addr = getelementptr i64, i64* %dst, i32 %iv + %add = add i64 %a, %b + store i64 %add, i64* %addr + %iv.next = add nsw i32 %iv, 1 + %cond = icmp ne i32 %iv.next, 0 + br i1 %cond, label %loop.body, label %exit + +exit: + ret void +} + + +; Similar to the test case above, but checks getVectorCallCost as well. +declare float @pow(float, float) readnone nounwind + +; CM: LV: Scalar loop costs: 16. +; CM: LV: Found an estimated cost of 5 for VF 2 For instruction: %a = extractvalue { float, float } %sv, 0 +; CM-NEXT: LV: Found an estimated cost of 5 for VF 2 For instruction: %b = extractvalue { float, float } %sv, 1 + +; FORCED-LABEL: define void @test_getVectorCallCost + +; FORCED-LABEL: vector.body: ; preds = %vector.body, %vector.ph +; FORCED-NEXT: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] +; FORCED-NEXT: %broadcast.splatinsert = insertelement <2 x i32> undef, i32 %index, i32 0 +; FORCED-NEXT: %broadcast.splat = shufflevector <2 x i32> %broadcast.splatinsert, <2 x i32> undef, <2 x i32> zeroinitializer +; FORCED-NEXT: %induction = add <2 x i32> %broadcast.splat, +; FORCED-NEXT: %0 = add i32 %index, 0 +; FORCED-NEXT: %1 = extractvalue { float, float } %sv, 0 +; FORCED-NEXT: %2 = extractvalue { float, float } %sv, 0 +; FORCED-NEXT: %3 = insertelement <2 x float> undef, float %1, i32 0 +; FORCED-NEXT: %4 = insertelement <2 x float> %3, float %2, i32 1 +; FORCED-NEXT: %5 = extractvalue { float, float } %sv, 1 +; FORCED-NEXT: %6 = extractvalue { float, float } %sv, 1 +; FORCED-NEXT: %7 = insertelement <2 x float> undef, float %5, i32 0 +; FORCED-NEXT: %8 = insertelement <2 x float> %7, float %6, i32 1 +; FORCED-NEXT: %9 = getelementptr float, float* %dst, i32 %0 +; FORCED-NEXT: %10 = call <2 x float> @llvm.pow.v2f32(<2 x float> %4, <2 x float> %8) +; FORCED-NEXT: %11 = getelementptr float, float* %9, i32 0 +; FORCED-NEXT: %12 = bitcast float* %11 to <2 x float>* +; FORCED-NEXT: store <2 x float> %10, <2 x float>* %12, align 4 +; FORCED-NEXT: %index.next = add i32 %index, 2 +; FORCED-NEXT: %13 = icmp eq i32 %index.next, 0 +; FORCED-NEXT: br i1 %13, label %middle.block, label %vector.body, !llvm.loop !4 + +define void @test_getVectorCallCost(float* %dst, {float, float} %sv) { +entry: + br label %loop.body + +loop.body: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.body ] + %a = extractvalue { float, float } %sv, 0 + %b = extractvalue { float, float } %sv, 1 + %addr = getelementptr float, float* %dst, i32 %iv + %p = call float @pow(float %a, float %b) + store float %p, float* %addr + %iv.next = add nsw i32 %iv, 1 + %cond = icmp ne i32 %iv.next, 0 + br i1 %cond, label %loop.body, label %exit + +exit: + ret void +} From 951bb68ce262545bdb0bff536256e0514daf0046 Mon Sep 17 00:00:00 2001 From: Alexandros Lamprineas Date: Sun, 14 Jul 2019 20:31:15 +0000 Subject: [PATCH 068/451] [TargetParser][ARM] Account dependencies when processing target features Teaches ARM::appendArchExtFeatures to account dependencies when processing target features: i.e. when you say -march=armv8.1-m.main+mve.fp+nofp it means mve.fp should get discarded too. (Split from D63936) Differential Revision: https://reviews.llvm.org/D64048 llvm-svn: 366031 --- clang/test/Preprocessor/arm-target-features.c | 29 +++++++++++++++---- llvm/include/llvm/Support/ARMTargetParser.def | 4 +-- llvm/lib/Support/ARMTargetParser.cpp | 26 +++++++++++++---- 3 files changed, 45 insertions(+), 14 deletions(-) diff --git a/clang/test/Preprocessor/arm-target-features.c b/clang/test/Preprocessor/arm-target-features.c index 95231e2a8125a..df5af4a933762 100644 --- a/clang/test/Preprocessor/arm-target-features.c +++ b/clang/test/Preprocessor/arm-target-features.c @@ -762,12 +762,29 @@ // CHECK-V81M-MVE: #define __ARM_FEATURE_MVE 1 // CHECK-V81M-MVE: #define __ARM_FEATURE_SIMD32 1 -// RUN: %clang -target arm-arm-none-eabi -march=armv8.1-m.main+mve.fp -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V81M-MVE-FP %s -// CHECK-V81M-MVE-FP: #define __ARM_FEATURE_DSP 1 -// CHECK-V81M-MVE-FP: #define __ARM_FEATURE_FP16_SCALAR_ARITHMETIC 1 -// CHECK-V81M-MVE-FP: #define __ARM_FEATURE_MVE 3 -// CHECK-V81M-MVE-FP: #define __ARM_FEATURE_SIMD32 1 -// CHECK-V81M-MVE-FP: #define __ARM_FPV5__ 1 +// RUN: %clang -target arm-arm-none-eabi -march=armv8.1-m.main+mve.fp -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V81M-MVEFP %s +// CHECK-V81M-MVEFP: #define __ARM_FEATURE_DSP 1 +// CHECK-V81M-MVEFP: #define __ARM_FEATURE_FP16_SCALAR_ARITHMETIC 1 +// CHECK-V81M-MVEFP: #define __ARM_FEATURE_MVE 3 +// CHECK-V81M-MVEFP: #define __ARM_FEATURE_SIMD32 1 +// CHECK-V81M-MVEFP: #define __ARM_FPV5__ 1 + +// nofp discards mve.fp +// RUN: %clang -target arm-arm-none-eabi -march=armv8.1-m.main+mve.fp+nofp -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V81M-MVEFP-NOFP %s +// CHECK-V81M-MVEFP-NOFP-NOT: #define __ARM_FEATURE_MVE + +// nomve discards mve.fp +// RUN: %clang -target arm-arm-none-eabi -march=armv8.1-m.main+mve.fp+nomve -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V81M-MVEFP-NOMVE %s +// CHECK-V81M-MVEFP-NOMVE-NOT: #define __ARM_FEATURE_MVE + +// mve+fp doesn't imply mve.fp +// RUN: %clang -target arm-arm-none-eabi -march=armv8.1-m.main+mve+fp -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V81M-MVE-FP %s +// CHECK-V81M-MVE-FP: #define __ARM_FEATURE_MVE 1 + +// nodsp discards both dsp and mve +// RUN: %clang -target arm-arm-none-eabi -march=armv8.1-m.main+mve+nodsp -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V81M-MVE-NODSP %s +// CHECK-V81M-MVE-NODSP-NOT: #define __ARM_FEATURE_MVE +// CHECK-V81M-MVE-NODSP-NOT: #define __ARM_FEATURE_DSP // RUN: %clang -target armv8.1a-none-none-eabi -x c -E -dM %s -o - | FileCheck -match-full-lines --check-prefix=CHECK-V81A %s // CHECK-V81A: #define __ARM_ARCH 8 diff --git a/llvm/include/llvm/Support/ARMTargetParser.def b/llvm/include/llvm/Support/ARMTargetParser.def index 6e17c8cbfd784..f466b32527481 100644 --- a/llvm/include/llvm/Support/ARMTargetParser.def +++ b/llvm/include/llvm/Support/ARMTargetParser.def @@ -149,8 +149,8 @@ ARM_ARCH_EXT_NAME("dotprod", ARM::AEK_DOTPROD, "+dotprod","-dotprod") ARM_ARCH_EXT_NAME("dsp", ARM::AEK_DSP, "+dsp", "-dsp") ARM_ARCH_EXT_NAME("fp", ARM::AEK_FP, nullptr, nullptr) ARM_ARCH_EXT_NAME("fp.dp", ARM::AEK_FP_DP, nullptr, nullptr) -ARM_ARCH_EXT_NAME("mve", ARM::AEK_SIMD, "+mve", "-mve") -ARM_ARCH_EXT_NAME("mve.fp", (ARM::AEK_SIMD | ARM::AEK_FP), "+mve.fp", "-mve.fp") +ARM_ARCH_EXT_NAME("mve", (ARM::AEK_DSP | ARM::AEK_SIMD), "+mve", "-mve") +ARM_ARCH_EXT_NAME("mve.fp", (ARM::AEK_DSP | ARM::AEK_SIMD | ARM::AEK_FP), "+mve.fp", "-mve.fp") ARM_ARCH_EXT_NAME("idiv", (ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB), nullptr, nullptr) ARM_ARCH_EXT_NAME("mp", ARM::AEK_MP, nullptr, nullptr) ARM_ARCH_EXT_NAME("simd", ARM::AEK_SIMD, nullptr, nullptr) diff --git a/llvm/lib/Support/ARMTargetParser.cpp b/llvm/lib/Support/ARMTargetParser.cpp index 27d1e5527be47..be948cfc95d43 100644 --- a/llvm/lib/Support/ARMTargetParser.cpp +++ b/llvm/lib/Support/ARMTargetParser.cpp @@ -490,16 +490,30 @@ static unsigned findDoublePrecisionFPU(unsigned InputFPUKind) { return ARM::FK_INVALID; } +static unsigned getAEKID(StringRef ArchExtName) { + for (const auto AE : ARM::ARCHExtNames) + if (AE.getName() == ArchExtName) + return AE.ID; + return ARM::AEK_INVALID; +} + bool ARM::appendArchExtFeatures( StringRef CPU, ARM::ArchKind AK, StringRef ArchExt, std::vector &Features) { - StringRef StandardFeature = getArchExtFeature(ArchExt); - if (!StandardFeature.empty()) { - Features.push_back(StandardFeature); - return true; - } + size_t StartingNumFeatures = Features.size(); const bool Negated = stripNegationPrefix(ArchExt); + unsigned ID = getAEKID(ArchExt); + + if (ID == AEK_INVALID) + return false; + + for (const auto AE : ARCHExtNames) { + if (Negated && (AE.ID & ID) == ID && AE.NegFeature) + Features.push_back(AE.NegFeature); + else if (AE.ID == ID && AE.Feature) + Features.push_back(AE.Feature); + } if (CPU == "") CPU = "generic"; @@ -519,7 +533,7 @@ bool ARM::appendArchExtFeatures( } return ARM::getFPUFeatures(FPUKind, Features); } - return false; + return StartingNumFeatures != Features.size(); } StringRef ARM::getHWDivName(unsigned HWDivKind) { From 3c0e2bb0cba205166f85811140da0f6c17ae8280 Mon Sep 17 00:00:00 2001 From: Eric Fiselier Date: Sun, 14 Jul 2019 20:59:51 +0000 Subject: [PATCH 069/451] Add test for variant construction with duplicate types. llvm-svn: 366032 --- .../variant/variant.variant/variant.ctor/T.pass.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp index d05e800b302ea..42a31f3345075 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp @@ -189,10 +189,22 @@ void test_no_narrowing_check_for_class_types() { assert(std::get<0>(v) == 42); } +struct Bar {}; +struct Baz {}; +void test_construction_with_repeated_types() { + using V = std::variant; + static_assert(!std::is_constructible::value, ""); + static_assert(!std::is_constructible::value, ""); + // OK, the selected type appears only once and so it shouldn't + // be affected by the duplicate types. + static_assert(std::is_constructible::value, ""); +} + int main(int, char**) { test_T_ctor_basic(); test_T_ctor_noexcept(); test_T_ctor_sfinae(); test_no_narrowing_check_for_class_types(); + test_construction_with_repeated_types(); return 0; } From 4066978cb7a493abe303f81d930b1de8ee639909 Mon Sep 17 00:00:00 2001 From: Eric Fiselier Date: Sun, 14 Jul 2019 21:29:39 +0000 Subject: [PATCH 070/451] Improve compile time of variant. In particular, improve the compile time of the overload set builder that variant uses to determine which alternative to construct. Instead of having the __overload type construct itself recursively, this patch uses a flat construction for the overload set. llvm-svn: 366033 --- libcxx/include/variant | 67 +++++----- .../stress_test_variant_overloads_impl.sh.cpp | 118 ++++++++++++++++++ 2 files changed, 154 insertions(+), 31 deletions(-) create mode 100644 libcxx/test/libcxx/utilities/meta/stress_tests/stress_test_variant_overloads_impl.sh.cpp diff --git a/libcxx/include/variant b/libcxx/include/variant index 88a625df71e1c..98a62c992fa12 100644 --- a/libcxx/include/variant +++ b/libcxx/include/variant @@ -1098,59 +1098,64 @@ struct __narrowing_check { template static auto __test_impl(_Dest (&&)[1]) -> __identity<_Dest>; template - using _Apply = decltype(__test_impl<_Dest>({std::declval<_Source>()})); + using _Apply _LIBCPP_NODEBUG_TYPE = decltype(__test_impl<_Dest>({std::declval<_Source>()})); }; template -using __check_for_narrowing = typename _If< +using __check_for_narrowing _LIBCPP_NODEBUG_TYPE = + typename _If< #ifdef _LIBCPP_ENABLE_NARROWING_CONVERSIONS_IN_VARIANT false && #endif is_arithmetic<_Dest>::value, __narrowing_check, __no_narrowing_check - >::template _Apply<_Dest, _Source>; - - -template -struct __overload; - -template <> -struct __overload<> { void operator()() const; }; - -template -struct __overload<_Tp, _Types...> : __overload<_Types...> { - using __overload<_Types...>::operator(); + >::template _Apply<_Dest, _Source>; +template +struct __overload { template auto operator()(_Tp, _Up&&) const -> __check_for_narrowing<_Tp, _Up>; }; -template -struct __overload_bool : _Base { - using _Base::operator(); - +template +struct __overload_bool { template > auto operator()(bool, _Up&&) const -> enable_if_t, __identity<_Tp>>; }; -template -struct __overload - : __overload_bool<__overload<_Types...>, bool> {}; -template -struct __overload - : __overload_bool<__overload<_Types...>, bool const> {}; -template -struct __overload - : __overload_bool<__overload<_Types...>, bool volatile> {}; -template -struct __overload - : __overload_bool<__overload<_Types...>, bool const volatile> {}; +template +struct __overload : __overload_bool {}; +template +struct __overload : __overload_bool {}; +template +struct __overload : __overload_bool {}; +template +struct __overload : __overload_bool {}; + +template +struct __all_overloads : _Bases... { + void operator()() const; + using _Bases::operator()...; +}; + +template +struct __make_overloads_imp; + +template +struct __make_overloads_imp<__tuple_indices<_Idx...> > { + template + using _Apply _LIBCPP_NODEBUG_TYPE = __all_overloads<__overload<_Types, _Idx>...>; +}; + +template +using _MakeOverloads _LIBCPP_NODEBUG_TYPE = typename __make_overloads_imp< + __make_indices_imp >::template _Apply<_Types...>; template using __best_match_t = - typename invoke_result_t<__overload<_Types...>, _Tp, _Tp>::type; + typename invoke_result_t<_MakeOverloads<_Types...>, _Tp, _Tp>::type; } // __variant_detail diff --git a/libcxx/test/libcxx/utilities/meta/stress_tests/stress_test_variant_overloads_impl.sh.cpp b/libcxx/test/libcxx/utilities/meta/stress_tests/stress_test_variant_overloads_impl.sh.cpp new file mode 100644 index 0000000000000..013d434f40d7b --- /dev/null +++ b/libcxx/test/libcxx/utilities/meta/stress_tests/stress_test_variant_overloads_impl.sh.cpp @@ -0,0 +1,118 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is a dummy feature that prevents this test from running by default. +// REQUIRES: template-cost-testing + +// Test the cost of the mechanism used to create an overload set used by variant +// to determine which alternative to construct. + +// The table below compares the compile time and object size for each of the +// variants listed in the RUN script. +// +// Impl Compile Time Object Size +// ----------------------------------------------------- +// flat: 959 ms 792 KiB +// recursive: 23,444 ms 23,000 KiB +// ----------------------------------------------------- +// variant_old: 16,894 ms 17,000 KiB +// variant_new: 1,105 ms 828 KiB + + +// RUN: %cxx %flags %compile_flags -std=c++17 -c %s \ +// RUN: -ggdb -ggnu-pubnames -ftemplate-depth=5000 -ftime-trace -g \ +// RUN: -DTEST_NS=flat_impl -o %S/flat.o +// RUN: %cxx %flags %compile_flags -std=c++17 -c %s \ +// RUN: -ggdb -ggnu-pubnames -ftemplate-depth=5000 -ftime-trace -g \ +// RUN: -DTEST_NS=rec_impl -o %S/rec.o +// RUN: %cxx %flags %compile_flags -std=c++17 -c %s \ +// RUN: -ggdb -ggnu-pubnames -ftemplate-depth=5000 -ftime-trace -g \ +// RUN: -DTEST_NS=variant_impl -o %S/variant.o + +#include +#include +#include +#include + +#include "test_macros.h" +#include "template_cost_testing.h" + +template +struct TestType {}; + +template +struct ID { + using type = T; +}; + +namespace flat_impl { + +struct OverloadBase { void operator()() const; }; + +template +struct Overload { + auto operator()(Tp, Tp) const -> ID; +}; + +template +struct AllOverloads : OverloadBase, Bases... {}; + +template +struct MakeOverloads; + +template +struct MakeOverloads > { + template + using Apply = AllOverloads...>; +}; + +template +using Overloads = typename MakeOverloads< + std::__make_indices_imp >::template Apply; + +} // namespace flat_impl + + +namespace rec_impl { + +template struct Overload; + +template <> +struct Overload<> { void operator()() const; }; + +template +struct Overload : Overload { + using Overload::operator(); + auto operator()(Tp, Tp) const -> ID; +}; + +template +using Overloads = Overload; + +} // namespace rec_impl + +namespace variant_impl { + template + using Overloads = std::__variant_detail::_MakeOverloads; +} // naamespace variant_impl + +#ifndef TEST_NS +#error TEST_NS must be defined +#endif + +#define TEST_TYPE() TestType< __COUNTER__ >, +using T1 = TEST_NS::Overloads, TestType<1>, int>; +static_assert(__COUNTER__ >= 1000, ""); + +void fn1(T1 x) { DoNotOptimize(&x); } +void fn2(typename std::invoke_result_t::type x) { DoNotOptimize(&x); } + +int main() { + DoNotOptimize(&fn1); + DoNotOptimize(&fn2); +} From 635d103e0be69342b65c251b7aa0e07bed418010 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 15 Jul 2019 02:02:31 +0000 Subject: [PATCH 071/451] [X86] Separate the memory size of vzext_load/vextract_store from the element size of the result type. Use them improve the codegen of v2f32 loads/stores with sse1 only. Summary: SSE1 only supports v4f32. But does have instructions like movlps/movhps that load/store 64-bits of memory. This patch breaks the connection between the node VT of the vzext_load/vextract_store patterns and the memory VT. Enabling a v4f32 node with a 64-bit memory VT. I've used i64 as the memory VT here. I've written the PatFrag predicate to just check the store size not the specific VT. I think the VT will only matter for CSE purposes. We could use v2f32, but if we want to start using these operations in more places a simple integer type might make the most sense. I'd like to maybe use this same thing for SSE2 and later as well, but that will need more work to be supported by EltsFromConsecutiveLoads to avoid regressing lit tests. I'd maybe also like to combine bitcasts with these load/stores nodes now that the types are disconnected. And I'd also like to consider canonicalizing (scalar_to_vector + load) to vzext_load. If you want I can split the mechanical tablegen stuff where I added the 32/64 off from the sse1 change. Reviewers: spatel, RKSimon Reviewed By: RKSimon Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64528 llvm-svn: 366034 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 73 +++++++++----- llvm/lib/Target/X86/X86InstrAVX512.td | 96 +++++++++---------- llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 26 +++-- llvm/lib/Target/X86/X86InstrSSE.td | 88 ++++++++++------- .../CodeGen/X86/sse-intrinsics-fast-isel.ll | 73 +++----------- llvm/test/CodeGen/X86/vector-shuffle-sse1.ll | 19 +--- 6 files changed, 178 insertions(+), 197 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e0bcf70248948..8be441fe28e08 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -794,6 +794,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); setOperationAction(ISD::SELECT, MVT::v4f32, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom); + + setOperationAction(ISD::LOAD, MVT::v2f32, Custom); + setOperationAction(ISD::STORE, MVT::v2f32, Custom); } if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) { @@ -971,11 +974,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // We want to legalize this to an f64 load rather than an i64 load on // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for // store. - setOperationAction(ISD::LOAD, MVT::v2f32, Custom); setOperationAction(ISD::LOAD, MVT::v2i32, Custom); setOperationAction(ISD::LOAD, MVT::v4i16, Custom); setOperationAction(ISD::LOAD, MVT::v8i8, Custom); - setOperationAction(ISD::STORE, MVT::v2f32, Custom); setOperationAction(ISD::STORE, MVT::v2i32, Custom); setOperationAction(ISD::STORE, MVT::v4i16, Custom); setOperationAction(ISD::STORE, MVT::v8i8, Custom); @@ -21267,21 +21268,29 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget, TargetLowering::TypeWidenVector) return SDValue(); - // Widen the vector, cast to a v2x64 type, extract the single 64-bit element - // and store it. MVT WideVT = MVT::getVectorVT(StoreVT.getVectorElementType(), StoreVT.getVectorNumElements() * 2); StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, StoredVal, DAG.getUNDEF(StoreVT)); - MVT StVT = Subtarget.is64Bit() && StoreVT.isInteger() ? MVT::i64 : MVT::f64; - MVT CastVT = MVT::getVectorVT(StVT, 2); - StoredVal = DAG.getBitcast(CastVT, StoredVal); - StoredVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, StVT, StoredVal, - DAG.getIntPtrConstant(0, dl)); - return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), - St->getPointerInfo(), St->getAlignment(), - St->getMemOperand()->getFlags()); + if (Subtarget.hasSSE2()) { + // Widen the vector, cast to a v2x64 type, extract the single 64-bit element + // and store it. + MVT StVT = Subtarget.is64Bit() && StoreVT.isInteger() ? MVT::i64 : MVT::f64; + MVT CastVT = MVT::getVectorVT(StVT, 2); + StoredVal = DAG.getBitcast(CastVT, StoredVal); + StoredVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, StVT, StoredVal, + DAG.getIntPtrConstant(0, dl)); + + return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), + St->getPointerInfo(), St->getAlignment(), + St->getMemOperand()->getFlags()); + } + assert(Subtarget.hasSSE1() && "Expected SSE"); + SDVTList Tys = DAG.getVTList(MVT::Other); + SDValue Ops[] = {St->getChain(), StoredVal, St->getBasePtr()}; + return DAG.getMemIntrinsicNode(X86ISD::VEXTRACT_STORE, dl, Tys, Ops, MVT::i64, + St->getMemOperand()); } // Lower vector extended loads using a shuffle. If SSSE3 is not available we @@ -28155,19 +28164,28 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, if (!ISD::isNON_EXTLoad(N)) return; auto *Ld = cast(N); - MVT LdVT = Subtarget.is64Bit() && VT.isInteger() ? MVT::i64 : MVT::f64; - SDValue Res = DAG.getLoad(LdVT, dl, Ld->getChain(), Ld->getBasePtr(), - Ld->getPointerInfo(), - Ld->getAlignment(), - Ld->getMemOperand()->getFlags()); - SDValue Chain = Res.getValue(1); - MVT WideVT = MVT::getVectorVT(LdVT, 2); - Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, WideVT, Res); - MVT CastVT = MVT::getVectorVT(VT.getVectorElementType(), - VT.getVectorNumElements() * 2); - Res = DAG.getBitcast(CastVT, Res); + if (Subtarget.hasSSE2()) { + MVT LdVT = Subtarget.is64Bit() && VT.isInteger() ? MVT::i64 : MVT::f64; + SDValue Res = DAG.getLoad(LdVT, dl, Ld->getChain(), Ld->getBasePtr(), + Ld->getPointerInfo(), Ld->getAlignment(), + Ld->getMemOperand()->getFlags()); + SDValue Chain = Res.getValue(1); + MVT WideVT = MVT::getVectorVT(LdVT, 2); + Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, WideVT, Res); + MVT CastVT = MVT::getVectorVT(VT.getVectorElementType(), + VT.getVectorNumElements() * 2); + Res = DAG.getBitcast(CastVT, Res); + Results.push_back(Res); + Results.push_back(Chain); + return; + } + assert(Subtarget.hasSSE1() && "Expected SSE"); + SDVTList Tys = DAG.getVTList(MVT::v4f32, MVT::Other); + SDValue Ops[] = {Ld->getChain(), Ld->getBasePtr()}; + SDValue Res = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, + MVT::i64, Ld->getMemOperand()); Results.push_back(Res); - Results.push_back(Chain); + Results.push_back(Res.getValue(1)); return; } } @@ -32016,8 +32034,11 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, // directly if we don't shuffle the lower element and we shuffle the upper // (zero) elements within themselves. if (V1.getOpcode() == X86ISD::VZEXT_LOAD && - (V1.getScalarValueSizeInBits() % MaskEltSizeInBits) == 0) { - unsigned Scale = V1.getScalarValueSizeInBits() / MaskEltSizeInBits; + (cast(V1)->getMemoryVT().getScalarSizeInBits() % + MaskEltSizeInBits) == 0) { + unsigned Scale = + cast(V1)->getMemoryVT().getScalarSizeInBits() / + MaskEltSizeInBits; ArrayRef HiMask(Mask.data() + Scale, NumMaskElts - Scale); if (isSequentialOrUndefInRange(Mask, 0, Scale, 0) && isUndefOrZeroOrInRange(HiMask, Scale, NumMaskElts)) { diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 56aa2ecffe071..54eddeacaa173 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -1352,15 +1352,15 @@ multiclass avx512_subvec_broadcast_rm_dq opc, string OpcodeStr, let Predicates = [HasAVX512] in { // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD. - def : Pat<(v8i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))), + def : Pat<(v8i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))), (VPBROADCASTQZm addr:$src)>; } let Predicates = [HasVLX] in { // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD. - def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))), + def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))), (VPBROADCASTQZ128m addr:$src)>; - def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))), + def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))), (VPBROADCASTQZ256m addr:$src)>; } let Predicates = [HasVLX, HasBWI] in { @@ -3838,7 +3838,7 @@ def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}", (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>; let Predicates = [HasAVX512] in { - def : Pat<(X86vextractstore (v2i64 VR128X:$src), addr:$dst), + def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst), (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>; } @@ -3873,7 +3873,7 @@ def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", // AVX-512 MOVSS, MOVSD //===----------------------------------------------------------------------===// -multiclass avx512_move_scalar { let Predicates = [HasAVX512, OptForSize] in def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), @@ -3901,7 +3901,7 @@ multiclass avx512_move_scalar, EVEX, Sched<[WriteFLoad]>; // _alt version uses FR32/FR64 register class. let isCodeGenOnly = 1 in @@ -3935,10 +3935,10 @@ multiclass avx512_move_scalar, +defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>, VEX_LIG, XS, EVEX_CD8<32, CD8VT1>; -defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>, +defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>, VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>; @@ -4319,16 +4319,16 @@ let Predicates = [HasAVX512] in { // Represent the same patterns above but in the form they appear for // 256-bit types - def : Pat<(v8f32 (X86vzload addr:$src)), + def : Pat<(v8f32 (X86vzload32 addr:$src)), (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; - def : Pat<(v4f64 (X86vzload addr:$src)), + def : Pat<(v4f64 (X86vzload64 addr:$src)), (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; // Represent the same patterns above but in the form they appear for // 512-bit types - def : Pat<(v16f32 (X86vzload addr:$src)), + def : Pat<(v16f32 (X86vzload32 addr:$src)), (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>; - def : Pat<(v8f64 (X86vzload addr:$src)), + def : Pat<(v8f64 (X86vzload64 addr:$src)), (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; } @@ -4351,21 +4351,21 @@ let Predicates = [HasAVX512] in { // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))), (VMOVDI2PDIZrm addr:$src)>; - def : Pat<(v4i32 (X86vzload addr:$src)), + def : Pat<(v4i32 (X86vzload32 addr:$src)), (VMOVDI2PDIZrm addr:$src)>; - def : Pat<(v8i32 (X86vzload addr:$src)), + def : Pat<(v8i32 (X86vzload32 addr:$src)), (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))), (VMOVZPQILo2PQIZrr VR128X:$src)>; - def : Pat<(v2i64 (X86vzload addr:$src)), + def : Pat<(v2i64 (X86vzload64 addr:$src)), (VMOVQI2PQIZrm addr:$src)>; - def : Pat<(v4i64 (X86vzload addr:$src)), + def : Pat<(v4i64 (X86vzload64 addr:$src)), (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>; // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext. - def : Pat<(v16i32 (X86vzload addr:$src)), + def : Pat<(v16i32 (X86vzload32 addr:$src)), (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>; - def : Pat<(v8i64 (X86vzload addr:$src)), + def : Pat<(v8i64 (X86vzload64 addr:$src)), (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>; def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))), @@ -6353,11 +6353,11 @@ let Predicates = [HasAVX512] in { def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))), (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>; - def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload addr:$src2))), + def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))), (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>; // VMOVLPD patterns - def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload addr:$src2))), + def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))), (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>; } @@ -8292,47 +8292,47 @@ let Predicates = [HasVLX] in { } let Predicates = [HasDQI, HasVLX] in { - def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload addr:$src))))), + def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), (VCVTPS2QQZ128rm addr:$src)>; def : Pat<(v2i64 (vselect VK2WM:$mask, - (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload addr:$src)))), + (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), VR128X:$src0)), (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; def : Pat<(v2i64 (vselect VK2WM:$mask, - (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload addr:$src)))), + (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), v2i64x_info.ImmAllZerosV)), (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>; - def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload addr:$src))))), + def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), (VCVTPS2UQQZ128rm addr:$src)>; def : Pat<(v2i64 (vselect VK2WM:$mask, - (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload addr:$src)))), + (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), VR128X:$src0)), (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; def : Pat<(v2i64 (vselect VK2WM:$mask, - (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload addr:$src)))), + (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), v2i64x_info.ImmAllZerosV)), (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; - def : Pat<(v2i64 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload addr:$src))))), + def : Pat<(v2i64 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), (VCVTTPS2QQZ128rm addr:$src)>; def : Pat<(v2i64 (vselect VK2WM:$mask, - (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload addr:$src)))), + (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), VR128X:$src0)), (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; def : Pat<(v2i64 (vselect VK2WM:$mask, - (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload addr:$src)))), + (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), v2i64x_info.ImmAllZerosV)), (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>; - def : Pat<(v2i64 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload addr:$src))))), + def : Pat<(v2i64 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))), (VCVTTPS2UQQZ128rm addr:$src)>; def : Pat<(v2i64 (vselect VK2WM:$mask, - (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload addr:$src)))), + (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), VR128X:$src0)), (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; def : Pat<(v2i64 (vselect VK2WM:$mask, - (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload addr:$src)))), + (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))), v2i64x_info.ImmAllZerosV)), (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>; } @@ -8375,25 +8375,25 @@ def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))), } let Predicates = [HasVLX] in { - def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))), + def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), (VCVTDQ2PDZ128rm addr:$src)>; def : Pat<(v2f64 (vselect VK2WM:$mask, - (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src)))), + (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), VR128X:$src0)), (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; def : Pat<(v2f64 (vselect VK2WM:$mask, - (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src)))), + (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), v2f64x_info.ImmAllZerosV)), (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>; - def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))), + def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), (VCVTUDQ2PDZ128rm addr:$src)>; def : Pat<(v2f64 (vselect VK2WM:$mask, - (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src)))), + (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), VR128X:$src0)), (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; def : Pat<(v2f64 (vselect VK2WM:$mask, - (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src)))), + (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), v2f64x_info.ImmAllZerosV)), (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>; } @@ -8562,7 +8562,7 @@ let Predicates = [HasVLX] in { EVEX_CD8<32, CD8VH>; // Pattern match vcvtph2ps of a scalar i64 load. - def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))), + def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), (VCVTPH2PSZ128rm addr:$src)>; def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), @@ -9626,13 +9626,13 @@ multiclass AVX512_pmovx_patterns(OpcPrefix#BWZ128rm) addr:$src)>; def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), (!cast(OpcPrefix#BWZ128rm) addr:$src)>; - def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))), + def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), (!cast(OpcPrefix#BWZ128rm) addr:$src)>; } let Predicates = [HasVLX] in { def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), (!cast(OpcPrefix#BDZ128rm) addr:$src)>; - def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v4i32 addr:$src)))), + def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), (!cast(OpcPrefix#BDZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))), @@ -9642,35 +9642,35 @@ multiclass AVX512_pmovx_patterns(OpcPrefix#WDZ128rm) addr:$src)>; def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), (!cast(OpcPrefix#WDZ128rm) addr:$src)>; - def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))), + def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), (!cast(OpcPrefix#WDZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), (!cast(OpcPrefix#WQZ128rm) addr:$src)>; - def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v4i32 addr:$src)))), + def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))), (!cast(OpcPrefix#WQZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), (!cast(OpcPrefix#DQZ128rm) addr:$src)>; def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), (!cast(OpcPrefix#DQZ128rm) addr:$src)>; - def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))), + def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), (!cast(OpcPrefix#DQZ128rm) addr:$src)>; } let Predicates = [HasVLX] in { def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), (!cast(OpcPrefix#BDZ256rm) addr:$src)>; - def : Pat<(v8i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))), + def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), (!cast(OpcPrefix#BDZ256rm) addr:$src)>; def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), (!cast(OpcPrefix#BQZ256rm) addr:$src)>; - def : Pat<(v4i64 (InVecOp (v16i8 (vzload_v4i32 addr:$src)))), + def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), (!cast(OpcPrefix#BQZ256rm) addr:$src)>; def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), (!cast(OpcPrefix#WQZ256rm) addr:$src)>; - def : Pat<(v4i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))), + def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), (!cast(OpcPrefix#WQZ256rm) addr:$src)>; } // 512-bit patterns @@ -10873,7 +10873,7 @@ def : Pat<(v2f64 (X86VBroadcast f64:$src)), (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; def : Pat<(v2f64 (X86VBroadcast (v2f64 (nonvolatile_load addr:$src)))), (VMOVDDUPZ128rm addr:$src)>; -def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload addr:$src)))), +def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))), (VMOVDDUPZ128rm addr:$src)>; def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 4077bdc92f86e..096cc27861caa 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -99,10 +99,10 @@ def X86insertps : SDNode<"X86ISD::INSERTPS", def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL", SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>; -def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad, - [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; -def X86vextractstore : SDNode<"X86ISD::VEXTRACT_STORE", SDTStore, - [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def X86vzld : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def X86vextractst : SDNode<"X86ISD::VEXTRACT_STORE", SDTStore, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def SDTVtrunc : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<0>, SDTCisInt<1>, @@ -939,10 +939,20 @@ def bc_v8i64 : PatFrag<(ops node:$in), (v8i64 (bitconvert node:$in))>; def bc_v8f64 : PatFrag<(ops node:$in), (v8f64 (bitconvert node:$in))>; def bc_v16f32 : PatFrag<(ops node:$in), (v16f32 (bitconvert node:$in))>; -def vzload_v4i32 : PatFrag<(ops node:$src), - (bitconvert (v4i32 (X86vzload node:$src)))>; -def vzload_v2i64 : PatFrag<(ops node:$src), - (bitconvert (v2i64 (X86vzload node:$src)))>; +def X86vzload32 : PatFrag<(ops node:$src), + (X86vzld node:$src), [{ + return cast(N)->getMemoryVT().getStoreSize() == 4; +}]>; + +def X86vzload64 : PatFrag<(ops node:$src), + (X86vzld node:$src), [{ + return cast(N)->getMemoryVT().getStoreSize() == 8; +}]>; + +def X86vextractstore64 : PatFrag<(ops node:$val, node:$ptr), + (X86vextractst node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT().getStoreSize() == 8; +}]>; def fp32imm0 : PatLeaf<(f32 fpimm), [{ diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index d25d216db1942..7d0a5b87baf49 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -226,14 +226,15 @@ multiclass sse12_move { + PatFrag mem_pat, PatFrag vzloadfrag, string OpcodeStr, + Domain d> { def V#NAME#rm : SI<0x10, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (vt (X86vzload addr:$src)))], d>, + [(set VR128:$dst, (vt (vzloadfrag addr:$src)))], d>, VEX, VEX_LIG, Sched<[WriteFLoad]>, VEX_WIG; def NAME#rm : SI<0x10, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (vt (X86vzload addr:$src)))], d>, + [(set VR128:$dst, (vt (vzloadfrag addr:$src)))], d>, Sched<[WriteFLoad]>; // _alt version uses FR32/FR64 register class. @@ -255,9 +256,9 @@ defm MOVSD : sse12_move, XD; let canFoldAsLoad = 1, isReMaterializable = 1 in { - defm MOVSS : sse12_move_rm, XS; - defm MOVSD : sse12_move_rm, XD; } @@ -270,9 +271,9 @@ let Predicates = [UseAVX] in { // Represent the same patterns above but in the form they appear for // 256-bit types - def : Pat<(v8f32 (X86vzload addr:$src)), + def : Pat<(v8f32 (X86vzload32 addr:$src)), (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>; - def : Pat<(v4f64 (X86vzload addr:$src)), + def : Pat<(v4f64 (X86vzload64 addr:$src)), (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>; } @@ -663,6 +664,13 @@ let Predicates = [UseSSE1] in { def : Pat<(X86Shufp (v4f32 (nonvolatile_load addr:$src2)), VR128:$src1, (i8 -28)), (MOVLPSrm VR128:$src1, addr:$src2)>; + def : Pat<(X86Shufp (v4f32 (X86vzload64 addr:$src2)), VR128:$src1, (i8 -28)), + (MOVLPSrm VR128:$src1, addr:$src2)>; + + def : Pat<(v4f32 (X86vzload64 addr:$src)), + (MOVLPSrm (v4f32 (V_SET0)), addr:$src)>; + def : Pat<(X86vextractstore64 (v4f32 VR128:$src), addr:$dst), + (MOVLPSmr addr:$dst, VR128:$src)>; } //===----------------------------------------------------------------------===// @@ -702,7 +710,7 @@ let Predicates = [UseAVX] in { def : Pat<(v2f64 (X86Unpckl VR128:$src1, (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))), (VMOVHPDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload addr:$src2))), + def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload64 addr:$src2))), (VMOVHPDrm VR128:$src1, addr:$src2)>; def : Pat<(store (f64 (extractelt @@ -711,7 +719,7 @@ let Predicates = [UseAVX] in { (VMOVHPDmr addr:$dst, VR128:$src)>; // MOVLPD patterns - def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload addr:$src2))), + def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload64 addr:$src2))), (VMOVLPDrm VR128:$src1, addr:$src2)>; } @@ -721,6 +729,12 @@ let Predicates = [UseSSE1] in { // No need for aligned load, we're only loading 64-bits. def : Pat<(X86Movlhps VR128:$src1, (v4f32 (nonvolatile_load addr:$src2))), (MOVHPSrm VR128:$src1, addr:$src2)>; + def : Pat<(X86Movlhps VR128:$src1, (v4f32 (X86vzload64 addr:$src2))), + (MOVHPSrm VR128:$src1, addr:$src2)>; + + def : Pat<(X86vextractstore64 (v4f32 (X86Movhlps VR128:$src, VR128:$src)), + addr:$dst), + (MOVHPSmr addr:$dst, VR128:$src)>; } let Predicates = [UseSSE2] in { @@ -731,7 +745,7 @@ let Predicates = [UseSSE2] in { def : Pat<(v2f64 (X86Unpckl VR128:$src1, (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))), (MOVHPDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload addr:$src2))), + def : Pat<(v2f64 (X86Unpckl VR128:$src1, (X86vzload64 addr:$src2))), (MOVHPDrm VR128:$src1, addr:$src2)>; def : Pat<(store (f64 (extractelt @@ -740,7 +754,7 @@ let Predicates = [UseSSE2] in { (MOVHPDmr addr:$dst, VR128:$src)>; // MOVLPD patterns - def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload addr:$src2))), + def : Pat<(v2f64 (X86Movsd VR128:$src1, (X86vzload64 addr:$src2))), (MOVLPDrm VR128:$src1, addr:$src2)>; } @@ -1631,13 +1645,13 @@ def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), // AVX register conversion intrinsics let Predicates = [HasAVX, NoVLX] in { - def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))), + def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), (VCVTDQ2PDrm addr:$src)>; } // Predicates = [HasAVX, NoVLX] // SSE2 register conversion intrinsics let Predicates = [UseSSE2] in { - def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))), + def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), (CVTDQ2PDrm addr:$src)>; } // Predicates = [UseSSE2] @@ -4124,9 +4138,9 @@ let Predicates = [UseAVX] in { // These instructions also write zeros in the high part of a 256-bit register. def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))), (VMOVDI2PDIrm addr:$src)>; - def : Pat<(v4i32 (X86vzload addr:$src)), + def : Pat<(v4i32 (X86vzload32 addr:$src)), (VMOVDI2PDIrm addr:$src)>; - def : Pat<(v8i32 (X86vzload addr:$src)), + def : Pat<(v8i32 (X86vzload32 addr:$src)), (SUBREG_TO_REG (i64 0), (v4i32 (VMOVDI2PDIrm addr:$src)), sub_xmm)>; } @@ -4138,7 +4152,7 @@ let Predicates = [UseSSE2] in { (MOV64toPQIrr GR64:$src)>; def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))), (MOVDI2PDIrm addr:$src)>; - def : Pat<(v4i32 (X86vzload addr:$src)), + def : Pat<(v4i32 (X86vzload32 addr:$src)), (MOVDI2PDIrm addr:$src)>; } @@ -4206,19 +4220,19 @@ def : InstAlias<"movq.s\t{$src, $dst|$dst, $src}", (MOVPQI2QIrr VR128:$dst, VR128:$src), 0>; let Predicates = [UseAVX] in { - def : Pat<(v2i64 (X86vzload addr:$src)), + def : Pat<(v2i64 (X86vzload64 addr:$src)), (VMOVQI2PQIrm addr:$src)>; - def : Pat<(v4i64 (X86vzload addr:$src)), + def : Pat<(v4i64 (X86vzload64 addr:$src)), (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIrm addr:$src)), sub_xmm)>; - def : Pat<(X86vextractstore (v2i64 VR128:$src), addr:$dst), + def : Pat<(X86vextractstore64 (v2i64 VR128:$src), addr:$dst), (VMOVPQI2QImr addr:$dst, VR128:$src)>; } let Predicates = [UseSSE2] in { - def : Pat<(v2i64 (X86vzload addr:$src)), (MOVQI2PQIrm addr:$src)>; + def : Pat<(v2i64 (X86vzload64 addr:$src)), (MOVQI2PQIrm addr:$src)>; - def : Pat<(X86vextractstore (v2i64 VR128:$src), addr:$dst), + def : Pat<(X86vextractstore64 (v2i64 VR128:$src), addr:$dst), (MOVPQI2QImr addr:$dst, VR128:$src)>; } @@ -4368,7 +4382,7 @@ defm MOVDDUP : sse3_replicate_dfp<"movddup", SchedWriteFShuffle>; let Predicates = [HasAVX, NoVLX] in { def : Pat<(X86Movddup (v2f64 (nonvolatile_load addr:$src))), (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; - def : Pat<(X86Movddup (v2f64 (X86vzload addr:$src))), + def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))), (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; } @@ -4376,7 +4390,7 @@ let Predicates = [UseSSE3] in { // No need for aligned memory as this only loads 64-bits. def : Pat<(X86Movddup (v2f64 (nonvolatile_load addr:$src))), (MOVDDUPrm addr:$src)>; - def : Pat<(X86Movddup (v2f64 (X86vzload addr:$src))), + def : Pat<(X86Movddup (v2f64 (X86vzload64 addr:$src))), (MOVDDUPrm addr:$src)>; } @@ -4953,7 +4967,7 @@ multiclass SS41I_pmovx_avx2_patterns(OpcPrefix#BDYrm) addr:$src)>; - def : Pat<(v8i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))), + def : Pat<(v8i32 (InVecOp (v16i8 (X86vzload64 addr:$src)))), (!cast(OpcPrefix#BDYrm) addr:$src)>; def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))), @@ -4961,12 +4975,12 @@ multiclass SS41I_pmovx_avx2_patterns(OpcPrefix#BQYrm) addr:$src)>; - def : Pat<(v4i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))), + def : Pat<(v4i64 (InVecOp (v16i8 (X86vzload64 addr:$src)))), (!cast(OpcPrefix#BQYrm) addr:$src)>; def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), (!cast(OpcPrefix#WQYrm) addr:$src)>; - def : Pat<(v4i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))), + def : Pat<(v4i64 (InVecOp (v8i16 (X86vzload64 addr:$src)))), (!cast(OpcPrefix#WQYrm) addr:$src)>; } } @@ -5018,7 +5032,7 @@ multiclass SS41I_pmovx_patterns(OpcPrefix#BWrm) addr:$src)>; def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), (!cast(OpcPrefix#BWrm) addr:$src)>; - def : Pat<(v8i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))), + def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))), (!cast(OpcPrefix#BWrm) addr:$src)>; def : Pat<(v8i16 (ExtOp (loadv16i8 addr:$src))), (!cast(OpcPrefix#BWrm) addr:$src)>; @@ -5026,7 +5040,7 @@ multiclass SS41I_pmovx_patterns(OpcPrefix#BDrm) addr:$src)>; - def : Pat<(v4i32 (ExtOp (v16i8 (vzload_v4i32 addr:$src)))), + def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))), (!cast(OpcPrefix#BDrm) addr:$src)>; def : Pat<(v4i32 (ExtOp (loadv16i8 addr:$src))), (!cast(OpcPrefix#BDrm) addr:$src)>; @@ -5040,14 +5054,14 @@ multiclass SS41I_pmovx_patterns(OpcPrefix#WDrm) addr:$src)>; def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), (!cast(OpcPrefix#WDrm) addr:$src)>; - def : Pat<(v4i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))), + def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), (!cast(OpcPrefix#WDrm) addr:$src)>; def : Pat<(v4i32 (ExtOp (loadv8i16 addr:$src))), (!cast(OpcPrefix#WDrm) addr:$src)>; def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), (!cast(OpcPrefix#WQrm) addr:$src)>; - def : Pat<(v2i64 (ExtOp (v8i16 (vzload_v4i32 addr:$src)))), + def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))), (!cast(OpcPrefix#WQrm) addr:$src)>; def : Pat<(v2i64 (ExtOp (loadv8i16 addr:$src))), (!cast(OpcPrefix#WQrm) addr:$src)>; @@ -5056,7 +5070,7 @@ multiclass SS41I_pmovx_patterns(OpcPrefix#DQrm) addr:$src)>; def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), (!cast(OpcPrefix#DQrm) addr:$src)>; - def : Pat<(v2i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))), + def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), (!cast(OpcPrefix#DQrm) addr:$src)>; def : Pat<(v2i64 (ExtOp (loadv4i32 addr:$src))), (!cast(OpcPrefix#DQrm) addr:$src)>; @@ -7261,10 +7275,10 @@ let Predicates = [HasF16C, NoVLX] in { WriteCvtPS2PHYSt>, VEX_L; // Pattern match vcvtph2ps of a scalar i64 load. - def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))), + def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))), (VCVTPH2PSrm addr:$src)>; - def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert - (v2i64 (scalar_to_vector (loadi64 addr:$src))))))), + def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 + (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), (VCVTPH2PSrm addr:$src)>; def : Pat<(store (f64 (extractelt @@ -7436,9 +7450,9 @@ defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64, let Predicates = [HasAVX2, NoVLX] in { // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD. - def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))), + def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))), (VPBROADCASTQrm addr:$src)>; - def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))), + def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))), (VPBROADCASTQYrm addr:$src)>; def : Pat<(v4i32 (X86VBroadcast (v4i32 (scalar_to_vector (loadi32 addr:$src))))), @@ -7550,7 +7564,7 @@ let Predicates = [HasAVX, NoVLX] in { (VMOVDDUPrr VR128:$src)>; def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))), (VMOVDDUPrm addr:$src)>; - def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload addr:$src)))), + def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))), (VMOVDDUPrm addr:$src)>; } diff --git a/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll index cc61875b916c7..9f51a94051330 100644 --- a/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll @@ -1319,14 +1319,8 @@ define <4 x float> @test_mm_loadh_pi(<4 x float> %a0, x86_mmx* %a1) { ; X86-SSE-LABEL: test_mm_loadh_pi: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-SSE-NEXT: movss (%eax), %xmm1 # encoding: [0xf3,0x0f,0x10,0x08] -; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero -; X86-SSE-NEXT: movss 4(%eax), %xmm2 # encoding: [0xf3,0x0f,0x10,0x50,0x04] -; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero -; X86-SSE-NEXT: shufps $0, %xmm1, %xmm2 # encoding: [0x0f,0xc6,0xd1,0x00] -; X86-SSE-NEXT: # xmm2 = xmm2[0,0],xmm1[0,0] -; X86-SSE-NEXT: shufps $36, %xmm2, %xmm0 # encoding: [0x0f,0xc6,0xc2,0x24] -; X86-SSE-NEXT: # xmm0 = xmm0[0,1],xmm2[2,0] +; X86-SSE-NEXT: movhps (%eax), %xmm0 # encoding: [0x0f,0x16,0x00] +; X86-SSE-NEXT: # xmm0 = xmm0[0,1],mem[0,1] ; X86-SSE-NEXT: retl # encoding: [0xc3] ; ; X86-AVX1-LABEL: test_mm_loadh_pi: @@ -1345,18 +1339,8 @@ define <4 x float> @test_mm_loadh_pi(<4 x float> %a0, x86_mmx* %a1) { ; ; X64-SSE-LABEL: test_mm_loadh_pi: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movq (%rdi), %rax # encoding: [0x48,0x8b,0x07] -; X64-SSE-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xf8] -; X64-SSE-NEXT: shrq $32, %rax # encoding: [0x48,0xc1,0xe8,0x20] -; X64-SSE-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xfc] -; X64-SSE-NEXT: movss -{{[0-9]+}}(%rsp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0xf8] -; X64-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero -; X64-SSE-NEXT: movss -{{[0-9]+}}(%rsp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0xfc] -; X64-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero -; X64-SSE-NEXT: unpcklps %xmm2, %xmm1 # encoding: [0x0f,0x14,0xca] -; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; X64-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] -; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X64-SSE-NEXT: movhps (%rdi), %xmm0 # encoding: [0x0f,0x16,0x07] +; X64-SSE-NEXT: # xmm0 = xmm0[0,1],mem[0,1] ; X64-SSE-NEXT: retq # encoding: [0xc3] ; ; X64-AVX1-LABEL: test_mm_loadh_pi: @@ -1381,15 +1365,8 @@ define <4 x float> @test_mm_loadl_pi(<4 x float> %a0, x86_mmx* %a1) { ; X86-SSE-LABEL: test_mm_loadl_pi: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-SSE-NEXT: movss (%eax), %xmm2 # encoding: [0xf3,0x0f,0x10,0x10] -; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero -; X86-SSE-NEXT: movss 4(%eax), %xmm1 # encoding: [0xf3,0x0f,0x10,0x48,0x04] -; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero -; X86-SSE-NEXT: shufps $0, %xmm2, %xmm1 # encoding: [0x0f,0xc6,0xca,0x00] -; X86-SSE-NEXT: # xmm1 = xmm1[0,0],xmm2[0,0] -; X86-SSE-NEXT: shufps $226, %xmm0, %xmm1 # encoding: [0x0f,0xc6,0xc8,0xe2] -; X86-SSE-NEXT: # xmm1 = xmm1[2,0],xmm0[2,3] -; X86-SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] +; X86-SSE-NEXT: movlps (%eax), %xmm0 # encoding: [0x0f,0x12,0x00] +; X86-SSE-NEXT: # xmm0 = mem[0,1],xmm0[2,3] ; X86-SSE-NEXT: retl # encoding: [0xc3] ; ; X86-AVX1-LABEL: test_mm_loadl_pi: @@ -1408,19 +1385,8 @@ define <4 x float> @test_mm_loadl_pi(<4 x float> %a0, x86_mmx* %a1) { ; ; X64-SSE-LABEL: test_mm_loadl_pi: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movq (%rdi), %rax # encoding: [0x48,0x8b,0x07] -; X64-SSE-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xf8] -; X64-SSE-NEXT: shrq $32, %rax # encoding: [0x48,0xc1,0xe8,0x20] -; X64-SSE-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # encoding: [0x89,0x44,0x24,0xfc] -; X64-SSE-NEXT: movss -{{[0-9]+}}(%rsp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0xf8] -; X64-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero -; X64-SSE-NEXT: movss -{{[0-9]+}}(%rsp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0xfc] -; X64-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero -; X64-SSE-NEXT: unpcklps %xmm2, %xmm1 # encoding: [0x0f,0x14,0xca] -; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; X64-SSE-NEXT: shufps $228, %xmm0, %xmm1 # encoding: [0x0f,0xc6,0xc8,0xe4] -; X64-SSE-NEXT: # xmm1 = xmm1[0,1],xmm0[2,3] -; X64-SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] +; X64-SSE-NEXT: movlps (%rdi), %xmm0 # encoding: [0x0f,0x12,0x07] +; X64-SSE-NEXT: # xmm0 = mem[0,1],xmm0[2,3] ; X64-SSE-NEXT: retq # encoding: [0xc3] ; ; X64-AVX1-LABEL: test_mm_loadl_pi: @@ -2818,13 +2784,7 @@ define void @test_mm_storeh_pi2(x86_mmx *%a0, <4 x float> %a1) nounwind { ; X86-SSE-LABEL: test_mm_storeh_pi2: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-SSE-NEXT: movaps %xmm0, %xmm1 # encoding: [0x0f,0x28,0xc8] -; X86-SSE-NEXT: movhlps %xmm0, %xmm1 # encoding: [0x0f,0x12,0xc8] -; X86-SSE-NEXT: # xmm1 = xmm0[1],xmm1[1] -; X86-SSE-NEXT: shufps $231, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0xe7] -; X86-SSE-NEXT: # xmm0 = xmm0[3,1,2,3] -; X86-SSE-NEXT: movss %xmm0, 4(%eax) # encoding: [0xf3,0x0f,0x11,0x40,0x04] -; X86-SSE-NEXT: movss %xmm1, (%eax) # encoding: [0xf3,0x0f,0x11,0x08] +; X86-SSE-NEXT: movhps %xmm0, (%eax) # encoding: [0x0f,0x17,0x00] ; X86-SSE-NEXT: retl # encoding: [0xc3] ; ; X86-AVX1-LABEL: test_mm_storeh_pi2: @@ -2841,11 +2801,7 @@ define void @test_mm_storeh_pi2(x86_mmx *%a0, <4 x float> %a1) nounwind { ; ; X64-SSE-LABEL: test_mm_storeh_pi2: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movhlps %xmm0, %xmm0 # encoding: [0x0f,0x12,0xc0] -; X64-SSE-NEXT: # xmm0 = xmm0[1,1] -; X64-SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x29,0x44,0x24,0xe8] -; X64-SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8b,0x44,0x24,0xe8] -; X64-SSE-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] +; X64-SSE-NEXT: movhps %xmm0, (%rdi) # encoding: [0x0f,0x17,0x07] ; X64-SSE-NEXT: retq # encoding: [0xc3] ; ; X64-AVX1-LABEL: test_mm_storeh_pi2: @@ -2922,10 +2878,7 @@ define void @test_mm_storel_pi2(x86_mmx *%a0, <4 x float> %a1) nounwind { ; X86-SSE-LABEL: test_mm_storel_pi2: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-SSE-NEXT: movss %xmm0, (%eax) # encoding: [0xf3,0x0f,0x11,0x00] -; X86-SSE-NEXT: shufps $229, %xmm0, %xmm0 # encoding: [0x0f,0xc6,0xc0,0xe5] -; X86-SSE-NEXT: # xmm0 = xmm0[1,1,2,3] -; X86-SSE-NEXT: movss %xmm0, 4(%eax) # encoding: [0xf3,0x0f,0x11,0x40,0x04] +; X86-SSE-NEXT: movlps %xmm0, (%eax) # encoding: [0x0f,0x13,0x00] ; X86-SSE-NEXT: retl # encoding: [0xc3] ; ; X86-AVX1-LABEL: test_mm_storel_pi2: @@ -2942,9 +2895,7 @@ define void @test_mm_storel_pi2(x86_mmx *%a0, <4 x float> %a1) nounwind { ; ; X64-SSE-LABEL: test_mm_storel_pi2: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x29,0x44,0x24,0xe8] -; X64-SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rax # encoding: [0x48,0x8b,0x44,0x24,0xe8] -; X64-SSE-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] +; X64-SSE-NEXT: movlps %xmm0, (%rdi) # encoding: [0x0f,0x13,0x07] ; X64-SSE-NEXT: retq # encoding: [0xc3] ; ; X64-AVX1-LABEL: test_mm_storel_pi2: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-sse1.ll b/llvm/test/CodeGen/X86/vector-shuffle-sse1.ll index eb0f0b043e24e..7a8c41519d654 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-sse1.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-sse1.ll @@ -230,15 +230,7 @@ define <4 x float> @insert_mem_and_zero_v4f32(float* %ptr) { define <4 x float> @insert_mem_lo_v4f32(<2 x float>* %ptr, <4 x float> %b) { ; SSE1-LABEL: insert_mem_lo_v4f32: ; SSE1: # %bb.0: -; SSE1-NEXT: movq (%rdi), %rax -; SSE1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) -; SSE1-NEXT: shrq $32, %rax -; SSE1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) -; SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE1-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3] -; SSE1-NEXT: movaps %xmm1, %xmm0 +; SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3] ; SSE1-NEXT: retq %a = load <2 x float>, <2 x float>* %ptr %v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> @@ -249,14 +241,7 @@ define <4 x float> @insert_mem_lo_v4f32(<2 x float>* %ptr, <4 x float> %b) { define <4 x float> @insert_mem_hi_v4f32(<2 x float>* %ptr, <4 x float> %b) { ; SSE1-LABEL: insert_mem_hi_v4f32: ; SSE1: # %bb.0: -; SSE1-NEXT: movq (%rdi), %rax -; SSE1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) -; SSE1-NEXT: shrq $32, %rax -; SSE1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) -; SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE1-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE1-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; SSE1-NEXT: retq %a = load <2 x float>, <2 x float>* %ptr %v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> From 2d63fbb7b1f7bbbd91f9f862946663a813e1780c Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Mon, 15 Jul 2019 03:24:35 +0000 Subject: [PATCH 072/451] [ValueTracking] Look through constant Int2Ptr/Ptr2Int expressions Summary: This is analogous to the int2ptr/ptr2int instruction handling introduced in D54956. Reviewers: fhahn, efriedma, spatel, nlopes, sanjoy, lebedev.ri Subscribers: hiraditya, bollu, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64708 llvm-svn: 366036 --- llvm/lib/Analysis/ValueTracking.cpp | 9 +++++++++ llvm/test/Transforms/FunctionAttrs/nonnull.ll | 10 ++++++++++ 2 files changed, 19 insertions(+) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index ad8034b2d7bc5..49a328bbc9ba3 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -1998,6 +1998,15 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { // Must be non-zero due to null test above. return true; + if (auto *CE = dyn_cast(C)) { + // See the comment for IntToPtr/PtrToInt instructions below. + if (CE->getOpcode() == Instruction::IntToPtr || + CE->getOpcode() == Instruction::PtrToInt) + if (Q.DL.getTypeSizeInBits(CE->getOperand(0)->getType()) <= + Q.DL.getTypeSizeInBits(CE->getType())) + return isKnownNonZero(CE->getOperand(0), Depth, Q); + } + // For constant vectors, check that all elements are undefined or known // non-zero to determine that the whole vector is known non-zero. if (auto *VecTy = dyn_cast(C->getType())) { diff --git a/llvm/test/Transforms/FunctionAttrs/nonnull.ll b/llvm/test/Transforms/FunctionAttrs/nonnull.ll index 7029be9691d78..6c04e4907d8e8 100644 --- a/llvm/test/Transforms/FunctionAttrs/nonnull.ll +++ b/llvm/test/Transforms/FunctionAttrs/nonnull.ll @@ -237,4 +237,14 @@ define i32 addrspace(3)* @gep2(i32 addrspace(3)* %p) { ret i32 addrspace(3)* %q } +; CHECK: define internal nonnull i32* @f2() +define internal i32* @f2() { + ret i32* inttoptr (i64 4 to i32*) +} + +define i32* @f1() { + %c = call i32* @f2() + ret i32* %c +} + attributes #0 = { "null-pointer-is-valid"="true" } From 776ac79e88ddd630f2abf346f685a7ef53529502 Mon Sep 17 00:00:00 2001 From: Kang Zhang Date: Mon, 15 Jul 2019 03:55:10 +0000 Subject: [PATCH 073/451] [NFC][PowerPC] Add the test block-placement.mir llvm-svn: 366037 --- llvm/test/CodeGen/PowerPC/block-placement.mir | 217 ++++++++++++++++++ 1 file changed, 217 insertions(+) create mode 100644 llvm/test/CodeGen/PowerPC/block-placement.mir diff --git a/llvm/test/CodeGen/PowerPC/block-placement.mir b/llvm/test/CodeGen/PowerPC/block-placement.mir new file mode 100644 index 0000000000000..3c6937cdc4a6d --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/block-placement.mir @@ -0,0 +1,217 @@ +# RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple powerpc64le-unknown-linux-gnu \ +# RUN: -run-pass=block-placement -o - %s | FileCheck %s +--- | + ; ModuleID = 'block-placement.ll' + source_filename = "block-placement.ll" + target datalayout = "e-m:e-i64:64-n32:64" + target triple = "powerpc64le-unknown-linux-gnu" + + %"class.xercesc_2_7::HashXMLCh" = type { %"class.xercesc_2_7::HashBase" } + %"class.xercesc_2_7::HashBase" = type { i32 (...)** } + + define dso_local zeroext i1 @_ZN11xercesc_2_79HashXMLCh6equalsEPKvS2_(%"class.xercesc_2_7::HashXMLCh"* nocapture readnone %this, i8* readonly %key1, i8* readonly %key2) unnamed_addr #0 { + entry: + %cmp.i = icmp eq i8* %key1, null + %cmp1.i = icmp eq i8* %key2, null + %or.cond.i = or i1 %cmp.i, %cmp1.i + br i1 %or.cond.i, label %if.then.i, label %while.cond.preheader.i + + while.cond.preheader.i: ; preds = %entry + %0 = bitcast i8* %key2 to i16* + %1 = bitcast i8* %key1 to i16* + %2 = load i16, i16* %1, align 2 + %3 = load i16, i16* %0, align 2 + %cmp926.i = icmp eq i16 %2, %3 + br i1 %cmp926.i, label %while.body.i.preheader, label %_ZN11xercesc_2_79XMLString6equalsEPKtS2_.exit + + while.body.i.preheader: ; preds = %while.cond.preheader.i + %scevgep = getelementptr i8, i8* %key2, i64 2 + %scevgep4 = getelementptr i8, i8* %key1, i64 2 + br label %while.body.i + + if.then.i: ; preds = %entry + br i1 %cmp.i, label %lor.lhs.false3.i, label %land.lhs.true.i + + land.lhs.true.i: ; preds = %if.then.i + %4 = bitcast i8* %key1 to i16* + %5 = load i16, i16* %4, align 2 + %tobool.i = icmp eq i16 %5, 0 + br i1 %tobool.i, label %lor.lhs.false3.i, label %_ZN11xercesc_2_79XMLString6equalsEPKtS2_.exit + + lor.lhs.false3.i: ; preds = %land.lhs.true.i, %if.then.i + br i1 %cmp1.i, label %if.else.i, label %land.lhs.true5.i + + land.lhs.true5.i: ; preds = %lor.lhs.false3.i + %6 = bitcast i8* %key2 to i16* + %7 = load i16, i16* %6, align 2 + %tobool6.i = icmp eq i16 %7, 0 + br i1 %tobool6.i, label %if.else.i, label %_ZN11xercesc_2_79XMLString6equalsEPKtS2_.exit + + if.else.i: ; preds = %land.lhs.true5.i, %lor.lhs.false3.i + br label %_ZN11xercesc_2_79XMLString6equalsEPKtS2_.exit + + while.body.i: ; preds = %while.body.i.preheader, %if.end12.i + %lsr.iv5 = phi i8* [ %scevgep4, %while.body.i.preheader ], [ %scevgep6, %if.end12.i ] + %lsr.iv = phi i8* [ %scevgep, %while.body.i.preheader ], [ %scevgep2, %if.end12.i ] + %8 = phi i16 [ %15, %if.end12.i ], [ %2, %while.body.i.preheader ] + %9 = phi i8* [ %key1, %while.body.i.preheader ], [ %13, %if.end12.i ] + %10 = phi i8* [ %key2, %while.body.i.preheader ], [ %11, %if.end12.i ] + %11 = getelementptr i8, i8* %10, i64 2 + %12 = bitcast i8* %11 to i16* + %13 = getelementptr i8, i8* %9, i64 2 + %14 = bitcast i8* %13 to i16* + %tobool10.i = icmp eq i16 %8, 0 + br i1 %tobool10.i, label %_ZN11xercesc_2_79XMLString6equalsEPKtS2_.exit, label %if.end12.i + + if.end12.i: ; preds = %while.body.i + %15 = load i16, i16* %14, align 2 + %16 = load i16, i16* %12, align 2 + %cmp9.i = icmp eq i16 %15, %16 + %scevgep2 = getelementptr i8, i8* %lsr.iv, i64 2 + %scevgep6 = getelementptr i8, i8* %lsr.iv5, i64 2 + br i1 %cmp9.i, label %while.body.i, label %_ZN11xercesc_2_79XMLString6equalsEPKtS2_.exit + + _ZN11xercesc_2_79XMLString6equalsEPKtS2_.exit: ; preds = %if.end12.i, %while.body.i, %if.else.i, %land.lhs.true5.i, %land.lhs.true.i, %while.cond.preheader.i + %retval.0.i1 = phi i64 [ 1, %if.else.i ], [ 0, %land.lhs.true.i ], [ 0, %land.lhs.true5.i ], [ 0, %while.cond.preheader.i ], [ 0, %if.end12.i ], [ 1, %while.body.i ] + %backToBool = trunc i64 %retval.0.i1 to i1 + ret i1 %backToBool + } + + attributes #0 = { "target-cpu"="pwr9" } + +... +--- +name: _ZN11xercesc_2_79HashXMLCh6equalsEPKvS2_ +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$x4', virtual-reg: '' } + - { reg: '$x5', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + successors: %bb.5(0x40000000), %bb.1(0x40000000) + liveins: $x4, $x5 + + renamable $cr1 = CMPDI renamable $x4, 0 + renamable $cr0 = CMPDI renamable $x5, 0 + renamable $cr5lt = CROR renamable $cr1eq, renamable $cr0eq + BC killed renamable $cr5lt, %bb.5 + + bb.1.while.cond.preheader.i: + successors: %bb.2(0x40000000), %bb.11(0x40000000) + liveins: $x4, $x5 + + renamable $r8 = LHZ 0, renamable $x4 :: (load 2 from %ir.1) + renamable $r6 = LHZ 0, renamable $x5 :: (load 2 from %ir.0) + renamable $x3 = LI8 0 + renamable $cr0 = CMPLW renamable $r8, killed renamable $r6 + BCC 68, killed renamable $cr0, %bb.11 + + bb.2.while.body.i.preheader: + successors: %bb.3(0x80000000) + liveins: $r8, $x3, $x4, $x5 + + renamable $x6 = ADDI8 renamable $x5, 2 + renamable $x7 = ADDI8 renamable $x4, 2 + + bb.3.while.body.i: + successors: %bb.4(0x04000000), %bb.10(0x7c000000) + liveins: $r8, $x3, $x4, $x5, $x6, $x7 + + dead renamable $r8 = ANDIo killed renamable $r8, 65535, implicit-def $cr0 + BCC 68, killed renamable $cr0, %bb.10 + + bb.4: + renamable $x3 = LI8 1 + BLR8 implicit $lr8, implicit $rm, implicit killed $x3 + + bb.5.if.then.i: + successors: %bb.7(0x30000000), %bb.6(0x50000000) + liveins: $cr0, $cr1, $x4, $x5 + + BC killed renamable $cr1eq, %bb.7 + + bb.6.land.lhs.true.i: + successors: %bb.7(0x30000000), %bb.11(0x50000000) + liveins: $cr0, $x4, $x5 + + renamable $r4 = LHZ 0, killed renamable $x4 :: (load 2 from %ir.4) + renamable $x3 = LI8 0 + renamable $cr1 = CMPLWI killed renamable $r4, 0 + BCC 68, killed renamable $cr1, %bb.11 + + bb.7.lor.lhs.false3.i: + successors: %bb.9(0x30000000), %bb.8(0x50000000) + liveins: $cr0, $x5 + + BC killed renamable $cr0eq, %bb.9 + + bb.8.land.lhs.true5.i: + successors: %bb.9(0x80000000) + liveins: $x5 + + renamable $r4 = LHZ 0, killed renamable $x5 :: (load 2 from %ir.6) + renamable $x3 = LI8 0 + renamable $cr0 = CMPLWI killed renamable $r4, 0 + BCCLR 68, killed renamable $cr0, implicit $lr, implicit $rm, implicit killed $x3 + + bb.9.if.else.i: + renamable $x3 = LI8 1 + BLR8 implicit $lr8, implicit $rm, implicit killed $x3 + + bb.10.if.end12.i: + successors: %bb.3(0x7c000000), %bb.11(0x04000000) + liveins: $x3, $x4, $x5, $x6, $x7 + + renamable $x5 = ADDI8 killed renamable $x5, 2 + renamable $x4 = ADDI8 killed renamable $x4, 2 + renamable $r8 = LHZ 0, renamable $x4 :: (load 2 from %ir.14) + renamable $r9 = LHZ 0, renamable $x5 :: (load 2 from %ir.12) + renamable $x6 = ADDI8 killed renamable $x6, 2 + renamable $x7 = ADDI8 killed renamable $x7, 2 + renamable $cr0 = CMPLW renamable $r8, killed renamable $r9 + BCC 76, killed renamable $cr0, %bb.3 + + bb.11._ZN11xercesc_2_79XMLString6equalsEPKtS2_.exit: + liveins: $x3 + + BLR8 implicit $lr8, implicit $rm, implicit killed $x3 + + ; CHECK: bb.5.if.else.i: + ; CHECK: B %bb.11 + + ; CHECK: bb.11: + ; CHECK: renamable $x3 = LI8 1 + ; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm, implicit killed $x3 +... From c3805d761ea8be6f35212a29aec09f03f1be5612 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 15 Jul 2019 04:51:34 +0000 Subject: [PATCH 074/451] [BPF] add unit tests for preserve_{array,union,struct}_access_index intrinsics This is a followup patch for https://reviews.llvm.org/D61810/new/, which adds new intrinsics preserve_{array,union,struct}_access_index. Currently, only BPF backend utilizes preserve_{array,union,struct}_access_index intrinsics, so all tests are compiled with BPF target. https://reviews.llvm.org/D61524 already added some tests for these intrinsics, but some of them pretty complex. This patch added a few unit test cases focusing on individual intrinsic functions. Also made a few clarification on language reference for these intrinsics. Differential Revision: https://reviews.llvm.org/D64606 llvm-svn: 366038 --- llvm/docs/LangRef.rst | 12 ++- llvm/test/CodeGen/BPF/CORE/intrinsic-array.ll | 80 +++++++++++++++++++ .../test/CodeGen/BPF/CORE/intrinsic-struct.ll | 77 ++++++++++++++++++ llvm/test/CodeGen/BPF/CORE/intrinsic-union.ll | 76 ++++++++++++++++++ 4 files changed, 242 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/BPF/CORE/intrinsic-array.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/intrinsic-struct.ll create mode 100644 llvm/test/CodeGen/BPF/CORE/intrinsic-union.ll diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 2ccc8bd591614..18f760d9b0500 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -17332,7 +17332,7 @@ Syntax: """"""" :: - declare + declare @llvm.preserve.array.access.index.p0s_union.anons.p0a10s_union.anons( base, i32 dim, i32 index) @@ -17342,7 +17342,9 @@ Overview: The '``llvm.preserve.array.access.index``' intrinsic returns the getelementptr address based on array base ``base``, array dimension ``dim`` and the last access index ``index`` -into the array. +into the array. The return type ``ret_type`` is a pointer type to the array element. +The array ``dim`` and ``index`` are preserved which is more robust than +getelementptr instruction which may be subject to compiler transformation. Arguments: """""""""" @@ -17375,6 +17377,8 @@ The '``llvm.preserve.union.access.index``' intrinsic carries the debuginfo field ``di_index`` and returns the ``base`` address. The ``llvm.preserve.access.index`` type of metadata is attached to this call instruction to provide union debuginfo type. +The metadata is a ``DICompositeType`` representing the debuginfo version of ``type``. +The return type ``type`` is the same as the ``base`` type. Arguments: """""""""" @@ -17393,7 +17397,7 @@ Syntax: """"""" :: - declare + declare @llvm.preserve.struct.access.index.p0i8.p0s_struct.anon.0s( base, i32 gep_index, i32 di_index) @@ -17405,6 +17409,8 @@ The '``llvm.preserve.struct.access.index``' intrinsic returns the getelementptr based on struct base ``base`` and IR struct member index ``gep_index``. The ``llvm.preserve.access.index`` type of metadata is attached to this call instruction to provide struct debuginfo type. +The metadata is a ``DICompositeType`` representing the debuginfo version of ``type``. +The return type ``ret_type`` is a pointer type to the structure member. Arguments: """""""""" diff --git a/llvm/test/CodeGen/BPF/CORE/intrinsic-array.ll b/llvm/test/CodeGen/BPF/CORE/intrinsic-array.ll new file mode 100644 index 0000000000000..adbcb9fd75ad2 --- /dev/null +++ b/llvm/test/CodeGen/BPF/CORE/intrinsic-array.ll @@ -0,0 +1,80 @@ +; RUN: llc -march=bpfel -filetype=asm -o - %s | FileCheck %s +; +; Source code: +; #define _(x) (__builtin_preserve_access_index(x)) +; struct s { int a; int b; }; +; int get_value(const void *addr); +; int test(struct s *arg) { return get_value(_(&arg[2].b)); } +; Compiler flag to generate IR: +; clang -target bpf -S -O2 -g -emit-llvm test.c + +%struct.s = type { i32, i32 } + +; Function Attrs: nounwind +define dso_local i32 @test(%struct.s* %arg) local_unnamed_addr #0 !dbg !7 { +entry: + call void @llvm.dbg.value(metadata %struct.s* %arg, metadata !17, metadata !DIExpression()), !dbg !18 + %0 = tail call %struct.s* @llvm.preserve.array.access.index.p0s_struct.ss.p0s_struct.ss(%struct.s* %arg, i32 0, i32 2), !dbg !19 + %1 = tail call i32* @llvm.preserve.struct.access.index.p0i32.p0s_struct.ss(%struct.s* %0, i32 1, i32 1), !dbg !19, !llvm.preserve.access.index !12 + %2 = bitcast i32* %1 to i8*, !dbg !19 + %call = tail call i32 @get_value(i8* %2) #4, !dbg !20 + ret i32 %call, !dbg !21 +} +; CHECK-LABEL: test +; CHECK: [[RELOC:.Ltmp[0-9]+]] +; CHECK: r2 = 20 +; CHECK: r1 += r2 +; CHECK: call get_value +; CHECK: exit +; +; CHECK: .section .BTF.ext,"",@progbits +; CHECK: .long 12 # OffsetReloc +; CHECK-NEXT: .long 20 # Offset reloc section string offset=20 +; CHECK-NEXT: .long 1 +; CHECK-NEXT: .long [[RELOC]] +; CHECK-NEXT: .long 2 +; CHECK-NEXT: .long 26 + +declare dso_local i32 @get_value(i8*) local_unnamed_addr #1 + +; Function Attrs: nounwind readnone +declare %struct.s* @llvm.preserve.array.access.index.p0s_struct.ss.p0s_struct.ss(%struct.s*, i32 immarg, i32 immarg) #2 + +; Function Attrs: nounwind readnone +declare i32* @llvm.preserve.struct.access.index.p0i32.p0s_struct.ss(%struct.s*, i32 immarg, i32 immarg) #2 + +; Function Attrs: nounwind readnone speculatable +declare void @llvm.dbg.value(metadata, metadata, metadata) #3 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind readnone } +attributes #3 = { nounwind readnone speculatable } +attributes #4 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 9.0.0 (trunk 365789)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "test.c", directory: "/tmp/home/yhs/work/tests/core") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 9.0.0 (trunk 365789)"} +!7 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 4, type: !8, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !16) +!8 = !DISubroutineType(types: !9) +!9 = !{!10, !11} +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!11 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 64) +!12 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "s", file: !1, line: 2, size: 64, elements: !13) +!13 = !{!14, !15} +!14 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !12, file: !1, line: 2, baseType: !10, size: 32) +!15 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !12, file: !1, line: 2, baseType: !10, size: 32, offset: 32) +!16 = !{!17} +!17 = !DILocalVariable(name: "arg", arg: 1, scope: !7, file: !1, line: 4, type: !11) +!18 = !DILocation(line: 0, scope: !7) +!19 = !DILocation(line: 4, column: 44, scope: !7) +!20 = !DILocation(line: 4, column: 34, scope: !7) +!21 = !DILocation(line: 4, column: 27, scope: !7) diff --git a/llvm/test/CodeGen/BPF/CORE/intrinsic-struct.ll b/llvm/test/CodeGen/BPF/CORE/intrinsic-struct.ll new file mode 100644 index 0000000000000..c07c16f522847 --- /dev/null +++ b/llvm/test/CodeGen/BPF/CORE/intrinsic-struct.ll @@ -0,0 +1,77 @@ +; RUN: llc -march=bpfel -filetype=asm -o - %s | FileCheck %s +; +; Source code: +; struct s { int a; int b; }; +; #define _(x) (__builtin_preserve_access_index(x)) +; int get_value(const void *addr); +; int test(struct s *arg) { return get_value(_(&arg->b)); } +; Compiler flag to generate IR: +; clang -target bpf -S -O2 -g -emit-llvm test.c + +%struct.s = type { i32, i32 } + +; Function Attrs: nounwind +define dso_local i32 @test(%struct.s* %arg) local_unnamed_addr #0 !dbg !7 { +entry: + call void @llvm.dbg.value(metadata %struct.s* %arg, metadata !17, metadata !DIExpression()), !dbg !18 + %0 = tail call i32* @llvm.preserve.struct.access.index.p0i32.p0s_struct.ss(%struct.s* %arg, i32 1, i32 1), !dbg !19, !llvm.preserve.access.index !12 + %1 = bitcast i32* %0 to i8*, !dbg !19 + %call = tail call i32 @get_value(i8* %1) #4, !dbg !20 + ret i32 %call, !dbg !21 +} + +; CHECK-LABEL: test +; CHECK: [[RELOC:.Ltmp[0-9]+]] +; CHECK: r2 = 4 +; CHECK: r1 += r2 +; CHECK: call get_value +; CHECK: exit +; +; CHECK: .section .BTF.ext,"",@progbits +; CHECK: .long 12 # OffsetReloc +; CHECK-NEXT: .long 20 # Offset reloc section string offset=20 +; CHECK-NEXT: .long 1 +; CHECK-NEXT: .long [[RELOC]] +; CHECK-NEXT: .long 2 +; CHECK-NEXT: .long 26 + +declare dso_local i32 @get_value(i8*) local_unnamed_addr #1 + +; Function Attrs: nounwind readnone +declare i32* @llvm.preserve.struct.access.index.p0i32.p0s_struct.ss(%struct.s*, i32 immarg, i32 immarg) #2 + +; Function Attrs: nounwind readnone speculatable +declare void @llvm.dbg.value(metadata, metadata, metadata) #3 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind readnone } +attributes #3 = { nounwind readnone speculatable } +attributes #4 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 9.0.0 (trunk 365789)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "test.c", directory: "/tmp/home/yhs/work/tests/core") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 9.0.0 (trunk 365789)"} +!7 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 4, type: !8, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !16) +!8 = !DISubroutineType(types: !9) +!9 = !{!10, !11} +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!11 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 64) +!12 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "s", file: !1, line: 1, size: 64, elements: !13) +!13 = !{!14, !15} +!14 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !12, file: !1, line: 1, baseType: !10, size: 32) +!15 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !12, file: !1, line: 1, baseType: !10, size: 32, offset: 32) +!16 = !{!17} +!17 = !DILocalVariable(name: "arg", arg: 1, scope: !7, file: !1, line: 4, type: !11) +!18 = !DILocation(line: 0, scope: !7) +!19 = !DILocation(line: 4, column: 44, scope: !7) +!20 = !DILocation(line: 4, column: 34, scope: !7) +!21 = !DILocation(line: 4, column: 27, scope: !7) diff --git a/llvm/test/CodeGen/BPF/CORE/intrinsic-union.ll b/llvm/test/CodeGen/BPF/CORE/intrinsic-union.ll new file mode 100644 index 0000000000000..19b459533be6d --- /dev/null +++ b/llvm/test/CodeGen/BPF/CORE/intrinsic-union.ll @@ -0,0 +1,76 @@ +; RUN: llc -march=bpfel -filetype=asm -o - %s | FileCheck %s +; +; Source code: +; union u { int a; int b; }; +; #define _(x) (__builtin_preserve_access_index(x)) +; int get_value(const void *addr); +; int test(union u *arg) { return get_value(_(&arg->b)); } +; Compiler flag to generate IR: +; clang -target bpf -S -O2 -g -emit-llvm test.c + +%union.u = type { i32 } + +; Function Attrs: nounwind +define dso_local i32 @test(%union.u* %arg) local_unnamed_addr #0 !dbg !7 { +entry: + call void @llvm.dbg.value(metadata %union.u* %arg, metadata !17, metadata !DIExpression()), !dbg !18 + %0 = tail call %union.u* @llvm.preserve.union.access.index.p0s_union.us.p0s_union.us(%union.u* %arg, i32 1), !dbg !19, !llvm.preserve.access.index !12 + %1 = bitcast %union.u* %0 to i8*, !dbg !19 + %call = tail call i32 @get_value(i8* %1) #4, !dbg !20 + ret i32 %call, !dbg !21 +} +; CHECK-LABEL: test +; CHECK: [[RELOC:.Ltmp[0-9]+]] +; CHECK: r2 = 0 +; CHECK: r1 += r2 +; CHECK: call get_value +; CHECK: exit + +; CHECK: .section .BTF.ext,"",@progbits +; CHECK: .long 12 # OffsetReloc +; CHECK-NEXT: .long 20 # Offset reloc section string offset=20 +; CHECK-NEXT: .long 1 +; CHECK-NEXT: .long [[RELOC]] +; CHECK-NEXT: .long 2 +; CHECK-NEXT: .long 26 + +declare dso_local i32 @get_value(i8*) local_unnamed_addr #1 + +; Function Attrs: nounwind readnone +declare %union.u* @llvm.preserve.union.access.index.p0s_union.us.p0s_union.us(%union.u*, i32 immarg) #2 + +; Function Attrs: nounwind readnone speculatable +declare void @llvm.dbg.value(metadata, metadata, metadata) #3 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind readnone } +attributes #3 = { nounwind readnone speculatable } +attributes #4 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 9.0.0 (trunk 365789)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "test.c", directory: "/tmp/home/yhs/work/tests/core") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 9.0.0 (trunk 365789)"} +!7 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 4, type: !8, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !16) +!8 = !DISubroutineType(types: !9) +!9 = !{!10, !11} +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!11 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 64) +!12 = distinct !DICompositeType(tag: DW_TAG_union_type, name: "u", file: !1, line: 1, size: 32, elements: !13) +!13 = !{!14, !15} +!14 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !12, file: !1, line: 1, baseType: !10, size: 32) +!15 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !12, file: !1, line: 1, baseType: !10, size: 32) +!16 = !{!17} +!17 = !DILocalVariable(name: "arg", arg: 1, scope: !7, file: !1, line: 4, type: !11) +!18 = !DILocation(line: 0, scope: !7) +!19 = !DILocation(line: 4, column: 43, scope: !7) +!20 = !DILocation(line: 4, column: 33, scope: !7) +!21 = !DILocation(line: 4, column: 26, scope: !7) From 335f955dc4942d6956e759b8c2686c52914c36b6 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 15 Jul 2019 05:02:32 +0000 Subject: [PATCH 075/451] [PowerPC] Support fp128 libcalls On PowerPC, IEEE 754 quadruple-precision libcall names use "kf" instead of "tf". In libgcc, libgcc/config/rs6000/float128-sed converts TF names to KF names. This patch implements its 24 substitution rules. Reviewed By: hfinkel Differential Revision: https://reviews.llvm.org/D64282 llvm-svn: 366039 --- llvm/lib/CodeGen/TargetLoweringBase.cpp | 28 ++++ llvm/test/CodeGen/PowerPC/fp128-libcalls.ll | 164 ++++++++++++++++++++ 2 files changed, 192 insertions(+) create mode 100644 llvm/test/CodeGen/PowerPC/fp128-libcalls.ll diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 1fd3d38255e51..7c135864766fe 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -123,6 +123,34 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) { for (int LC = 0; LC < RTLIB::UNKNOWN_LIBCALL; ++LC) setLibcallCallingConv((RTLIB::Libcall)LC, CallingConv::C); + // For IEEE quad-precision libcall names, PPC uses "kf" instead of "tf". + if (TT.getArch() == Triple::ppc || TT.isPPC64()) { + setLibcallName(RTLIB::ADD_F128, "__addkf3"); + setLibcallName(RTLIB::SUB_F128, "__subkf3"); + setLibcallName(RTLIB::MUL_F128, "__mulkf3"); + setLibcallName(RTLIB::DIV_F128, "__divkf3"); + setLibcallName(RTLIB::FPEXT_F32_F128, "__extendsfkf2"); + setLibcallName(RTLIB::FPEXT_F64_F128, "__extenddfkf2"); + setLibcallName(RTLIB::FPROUND_F128_F32, "__trunckfsf2"); + setLibcallName(RTLIB::FPROUND_F128_F64, "__trunckfdf2"); + setLibcallName(RTLIB::FPTOSINT_F128_I32, "__fixkfsi"); + setLibcallName(RTLIB::FPTOSINT_F128_I64, "__fixkfdi"); + setLibcallName(RTLIB::FPTOUINT_F128_I32, "__fixunskfsi"); + setLibcallName(RTLIB::FPTOUINT_F128_I64, "__fixunskfdi"); + setLibcallName(RTLIB::SINTTOFP_I32_F128, "__floatsikf"); + setLibcallName(RTLIB::SINTTOFP_I64_F128, "__floatdikf"); + setLibcallName(RTLIB::UINTTOFP_I32_F128, "__floatunsikf"); + setLibcallName(RTLIB::UINTTOFP_I64_F128, "__floatundikf"); + setLibcallName(RTLIB::OEQ_F128, "__eqkf2"); + setLibcallName(RTLIB::UNE_F128, "__nekf2"); + setLibcallName(RTLIB::OGE_F128, "__gekf2"); + setLibcallName(RTLIB::OLT_F128, "__ltkf2"); + setLibcallName(RTLIB::OLE_F128, "__lekf2"); + setLibcallName(RTLIB::OGT_F128, "__gtkf2"); + setLibcallName(RTLIB::UO_F128, "__unordkf2"); + setLibcallName(RTLIB::O_F128, "__unordkf2"); + } + // A few names are different on particular architectures or environments. if (TT.isOSDarwin()) { // For f16/f32 conversions, Darwin uses the standard naming scheme, instead diff --git a/llvm/test/CodeGen/PowerPC/fp128-libcalls.ll b/llvm/test/CodeGen/PowerPC/fp128-libcalls.ll new file mode 100644 index 0000000000000..9d875c854e320 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fp128-libcalls.ll @@ -0,0 +1,164 @@ +; RUN: llc < %s -O2 -mtriple=powerpc-linux-musl | FileCheck %s +; RUN: llc < %s -O2 -mtriple=powerpc64-linux-musl | FileCheck %s +; RUN: llc < %s -O2 -mtriple=powerpc64le-linux-musl | FileCheck %s + +define fp128 @addkf3(fp128 %a, fp128 %b) { +; CHECK-LABEL: addkf3: +; CHECK: __addkf3 + %1 = fadd fp128 %a, %b + ret fp128 %1 +} + +define fp128 @subkf3(fp128 %a, fp128 %b) { +; CHECK-LABEL: subkf3: +; CHECK: __subkf3 + %1 = fsub fp128 %a, %b + ret fp128 %1 +} + +define fp128 @mulkf3(fp128 %a, fp128 %b) { +; CHECK-LABEL: mulkf3: +; CHECK: __mulkf3 + %1 = fmul fp128 %a, %b + ret fp128 %1 +} + +define fp128 @divkf3(fp128 %a, fp128 %b) { +; CHECK-LABEL: divkf3: +; CHECK: __divkf3 + %1 = fdiv fp128 %a, %b + ret fp128 %1 +} + +define fp128 @extendsfkf2(float %a) { +; CHECK-LABEL: extendsfkf2: +; CHECK: __extendsfkf2 + %1 = fpext float %a to fp128 + ret fp128 %1 +} + +define fp128 @extenddfkf2(double %a) { +; CHECK-LABEL: extenddfkf2: +; CHECK: __extenddfkf2 + %1 = fpext double %a to fp128 + ret fp128 %1 +} + +define float @trunckfsf2(fp128 %a) { +; CHECK-LABEL: trunckfsf2: +; CHECK: __trunckfsf2 + %1 = fptrunc fp128 %a to float + ret float %1 +} + +define double @trunckfdf2(fp128 %a) { +; CHECK-LABEL: trunckfdf2: +; CHECK: __trunckfdf2 + %1 = fptrunc fp128 %a to double + ret double %1 +} + +define i32 @fixkfsi(fp128 %a) { +; CHECK-LABEL: fixkfsi: +; CHECK: __fixkfsi + %1 = fptosi fp128 %a to i32 + ret i32 %1 +} + +define i64 @fixkfdi(fp128 %a) { +; CHECK-LABEL: fixkfdi: +; CHECK: __fixkfdi + %1 = fptosi fp128 %a to i64 + ret i64 %1 +} + +define i32 @fixunskfsi(fp128 %a) { +; CHECK-LABEL: fixunskfsi: +; CHECK: __fixunskfsi + %1 = fptoui fp128 %a to i32 + ret i32 %1 +} + +define i64 @fixunskfdi(fp128 %a) { +; CHECK-LABEL: fixunskfdi: +; CHECK: __fixunskfdi + %1 = fptoui fp128 %a to i64 + ret i64 %1 +} + +define fp128 @floatsikf(i32 %a) { +; CHECK-LABEL: floatsikf: +; CHECK: __floatsikf + %1 = sitofp i32 %a to fp128 + ret fp128 %1 +} + +define fp128 @floatdikf(i64 %a) { +; CHECK-LABEL: floatdikf: +; CHECK: __floatdikf + %1 = sitofp i64 %a to fp128 + ret fp128 %1 +} + +define fp128 @floatunsikf(i32 %a) { +; CHECK-LABEL: floatunsikf: +; CHECK: __floatunsikf + %1 = uitofp i32 %a to fp128 + ret fp128 %1 +} + +define fp128 @floatundikf(i64 %a) { +; CHECK-LABEL: floatundikf: +; CHECK: __floatundikf + %1 = uitofp i64 %a to fp128 + ret fp128 %1 +} + +define i1 @test_eqkf2(fp128 %a, fp128 %b) { +; CHECK-LABEL: test_eqkf2: +; CHECK: __eqkf2 + %1 = fcmp oeq fp128 %a, %b + ret i1 %1 +} + +define i1 @test_nekf2(fp128 %a, fp128 %b) { +; CHECK-LABEL: test_nekf2: +; CHECK: __nekf2 + %1 = fcmp une fp128 %a, %b + ret i1 %1 +} + +define i1 @test_gekf2(fp128 %a, fp128 %b) { +; CHECK-LABEL: test_gekf2: +; CHECK: __gekf2 + %1 = fcmp oge fp128 %a, %b + ret i1 %1 +} + +define i1 @test_ltkf2(fp128 %a, fp128 %b) { +; CHECK-LABEL: test_ltkf2: +; CHECK: __ltkf2 + %1 = fcmp olt fp128 %a, %b + ret i1 %1 +} + +define i1 @test_lekf2(fp128 %a, fp128 %b) { +; CHECK-LABEL: test_lekf2: +; CHECK: __lekf2 + %1 = fcmp ole fp128 %a, %b + ret i1 %1 +} + +define i1 @test_gtkf2(fp128 %a, fp128 %b) { +; CHECK-LABEL: test_gtkf2: +; CHECK: __gtkf2 + %1 = fcmp ogt fp128 %a, %b + ret i1 %1 +} + +define i1 @test_unordkf2(fp128 %a, fp128 %b) { +; CHECK-LABEL: test_unordkf2: +; CHECK: __unordkf2 + %1 = fcmp uno fp128 %a, %b + ret i1 %1 +} From f1ee04c42a431d88cd66b884570ca7928cffd0cd Mon Sep 17 00:00:00 2001 From: Serguei Katkov Date: Mon, 15 Jul 2019 05:51:10 +0000 Subject: [PATCH 076/451] [LoopInfo] Introduce getUniqueNonLatchExitBlocks utility function Extract the code from LoopUnrollRuntime into utility function to re-use it in D63923. Reviewers: reames, mkuper Reviewed By: reames Subscribers: fhahn, hiraditya, zzheng, dmgreen, llvm-commits Differential Revision: https://reviews.llvm.org/D64548 llvm-svn: 366040 --- llvm/include/llvm/Analysis/LoopInfo.h | 8 ++++ llvm/include/llvm/Analysis/LoopInfoImpl.h | 32 ++++++++++--- .../Transforms/Utils/LoopUnrollRuntime.cpp | 20 +++----- llvm/unittests/Analysis/LoopInfoTest.cpp | 46 +++++++++++++++++++ 4 files changed, 87 insertions(+), 19 deletions(-) diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h index 98b31295ada08..f8f0a0c339d58 100644 --- a/llvm/include/llvm/Analysis/LoopInfo.h +++ b/llvm/include/llvm/Analysis/LoopInfo.h @@ -274,6 +274,14 @@ template class LoopBase { /// dedicated exits. void getUniqueExitBlocks(SmallVectorImpl &ExitBlocks) const; + /// Return all unique successor blocks of this loop except successors from + /// Latch block are not considered. If the exit comes from Latch has also + /// non Latch predecessor in a loop it will be added to ExitBlocks. + /// These are the blocks _outside of the current loop_ which are branched to. + /// This assumes that loop exits are in canonical form, i.e. all exits are + /// dedicated exits. + void getUniqueNonLatchExitBlocks(SmallVectorImpl &ExitBlocks) const; + /// If getUniqueExitBlocks would return exactly one block, return that block. /// Otherwise return null. BlockT *getUniqueExitBlock() const; diff --git a/llvm/include/llvm/Analysis/LoopInfoImpl.h b/llvm/include/llvm/Analysis/LoopInfoImpl.h index 6ff483770c4bc..c9f690dac65eb 100644 --- a/llvm/include/llvm/Analysis/LoopInfoImpl.h +++ b/llvm/include/llvm/Analysis/LoopInfoImpl.h @@ -95,21 +95,25 @@ bool LoopBase::hasDedicatedExits() const { return true; } -template -void LoopBase::getUniqueExitBlocks( - SmallVectorImpl &ExitBlocks) const { +// Helper function to get unique loop exits. Pred is a predicate pointing to +// BasicBlocks in a loop which should be considered to find loop exits. +template +void getUniqueExitBlocksHelper(const LoopT *L, + SmallVectorImpl &ExitBlocks, + PredicateT Pred) { typedef GraphTraits BlockTraits; typedef GraphTraits> InvBlockTraits; - assert(hasDedicatedExits() && + assert(L->hasDedicatedExits() && "getUniqueExitBlocks assumes the loop has canonical form exits!"); SmallVector SwitchExitBlocks; - for (BlockT *Block : this->blocks()) { + auto Filtered = make_filter_range(L->blocks(), Pred); + for (BlockT *Block : Filtered) { SwitchExitBlocks.clear(); for (BlockT *Successor : children(Block)) { // If block is inside the loop then it is not an exit block. - if (contains(Successor)) + if (L->contains(Successor)) continue; BlockT *FirstPred = *InvBlockTraits::child_begin(Successor); @@ -140,6 +144,22 @@ void LoopBase::getUniqueExitBlocks( } } +template +void LoopBase::getUniqueExitBlocks( + SmallVectorImpl &ExitBlocks) const { + getUniqueExitBlocksHelper(this, ExitBlocks, + [](const BlockT *BB) { return true; }); +} + +template +void LoopBase::getUniqueNonLatchExitBlocks( + SmallVectorImpl &ExitBlocks) const { + const BlockT *Latch = getLoopLatch(); + assert(Latch && "Latch block must exists"); + getUniqueExitBlocksHelper(this, ExitBlocks, + [Latch](const BlockT *BB) { return BB != Latch; }); +} + template BlockT *LoopBase::getUniqueExitBlock() const { SmallVector UniqueExitBlocks; diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp index b39d78dde0013..d22fdb4d52dc1 100644 --- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -424,10 +424,9 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, /// Returns true if we can safely unroll a multi-exit/exiting loop. OtherExits /// is populated with all the loop exit blocks other than the LatchExit block. -static bool -canSafelyUnrollMultiExitLoop(Loop *L, SmallVectorImpl &OtherExits, - BasicBlock *LatchExit, bool PreserveLCSSA, - bool UseEpilogRemainder) { +static bool canSafelyUnrollMultiExitLoop(Loop *L, BasicBlock *LatchExit, + bool PreserveLCSSA, + bool UseEpilogRemainder) { // We currently have some correctness constrains in unrolling a multi-exit // loop. Check for these below. @@ -435,11 +434,6 @@ canSafelyUnrollMultiExitLoop(Loop *L, SmallVectorImpl &OtherExits, // We rely on LCSSA form being preserved when the exit blocks are transformed. if (!PreserveLCSSA) return false; - SmallVector Exits; - L->getUniqueExitBlocks(Exits); - for (auto *BB : Exits) - if (BB != LatchExit) - OtherExits.push_back(BB); // TODO: Support multiple exiting blocks jumping to the `LatchExit` when // UnrollRuntimeMultiExit is true. This will need updating the logic in @@ -469,9 +463,8 @@ static bool canProfitablyUnrollMultiExitLoop( bool PreserveLCSSA, bool UseEpilogRemainder) { #if !defined(NDEBUG) - SmallVector OtherExitsDummyCheck; - assert(canSafelyUnrollMultiExitLoop(L, OtherExitsDummyCheck, LatchExit, - PreserveLCSSA, UseEpilogRemainder) && + assert(canSafelyUnrollMultiExitLoop(L, LatchExit, PreserveLCSSA, + UseEpilogRemainder) && "Should be safe to unroll before checking profitability!"); #endif @@ -595,8 +588,9 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, // These are exit blocks other than the target of the latch exiting block. SmallVector OtherExits; + L->getUniqueNonLatchExitBlocks(OtherExits); bool isMultiExitUnrollingEnabled = - canSafelyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA, + canSafelyUnrollMultiExitLoop(L, LatchExit, PreserveLCSSA, UseEpilogRemainder) && canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA, UseEpilogRemainder); diff --git a/llvm/unittests/Analysis/LoopInfoTest.cpp b/llvm/unittests/Analysis/LoopInfoTest.cpp index 005e1dc405b75..953a72aee8e36 100644 --- a/llvm/unittests/Analysis/LoopInfoTest.cpp +++ b/llvm/unittests/Analysis/LoopInfoTest.cpp @@ -1110,3 +1110,49 @@ TEST(LoopInfoTest, AuxiliaryIV) { L->isAuxiliaryInductionVariable(Instruction_mulopcode, SE)); }); } + +// Examine getUniqueExitBlocks/getUniqueNonLatchExitBlocks functions. +TEST(LoopInfoTest, LoopUniqueExitBlocks) { + const char *ModuleStr = + "target datalayout = \"e-m:o-i64:64-f80:128-n8:16:32:64-S128\"\n" + "define void @foo(i32 %n, i1 %cond) {\n" + "entry:\n" + " br label %for.cond\n" + "for.cond:\n" + " %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]\n" + " %cmp = icmp slt i32 %i.0, %n\n" + " br i1 %cond, label %for.inc, label %for.end1\n" + "for.inc:\n" + " %inc = add nsw i32 %i.0, 1\n" + " br i1 %cmp, label %for.cond, label %for.end2, !llvm.loop !0\n" + "for.end1:\n" + " br label %for.end\n" + "for.end2:\n" + " br label %for.end\n" + "for.end:\n" + " ret void\n" + "}\n" + "!0 = distinct !{!0, !1}\n" + "!1 = !{!\"llvm.loop.distribute.enable\", i1 true}\n"; + + // Parse the module. + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleStr); + + runWithLoopInfo(*M, "foo", [&](Function &F, LoopInfo &LI) { + Function::iterator FI = F.begin(); + // First basic block is entry - skip it. + BasicBlock *Header = &*(++FI); + assert(Header->getName() == "for.cond"); + Loop *L = LI.getLoopFor(Header); + + SmallVector Exits; + // This loop has 2 unique exits. + L->getUniqueExitBlocks(Exits); + EXPECT_TRUE(Exits.size() == 2); + // And one unique non latch exit. + Exits.clear(); + L->getUniqueNonLatchExitBlocks(Exits); + EXPECT_TRUE(Exits.size() == 1); + }); +} From 796ed134ccc3b4d1a27a0c30c17e43db1ced6a89 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Mon, 15 Jul 2019 06:35:28 +0000 Subject: [PATCH 077/451] Remove set but unused variable. llvm-svn: 366041 --- llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp index 9a93e84d80e85..a82047473370a 100644 --- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp @@ -250,7 +250,6 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); SIMachineFunctionInfo *FuncInfo = MF.getInfo(); - bool AllSGPRSpilledToVGPRs = false; const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs() && EnableSpillVGPRToAGPR; @@ -262,8 +261,6 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) { // handled as SpilledToReg in regular PrologEpilogInserter. if ((TRI->spillSGPRToVGPR() && (HasCSRs || FuncInfo->hasSpilledSGPRs())) || SpillVGPRToAGPR) { - AllSGPRSpilledToVGPRs = true; - // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs // are spilled to VGPRs, in which case we can eliminate the stack usage. // @@ -299,8 +296,7 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) { bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(MI, FI, nullptr); (void)Spilled; assert(Spilled && "failed to spill SGPR to VGPR when allocated"); - } else - AllSGPRSpilledToVGPRs = false; + } } } From 45c43e7d04d3582b6e48c909252e99342800e1c8 Mon Sep 17 00:00:00 2001 From: Serguei Katkov Date: Mon, 15 Jul 2019 06:42:39 +0000 Subject: [PATCH 078/451] [LoopUtils] Extend the scope of getLoopEstimatedTripCount With this patch the getLoopEstimatedTripCount function will accept also the loops where there are more than one exit but all exits except latch block should ends up with a call to deopt. This side exits should not impact the estimated trip count. Reviewers: reames, mkuper, danielcdh Reviewed By: reames Subscribers: fhahn, lebedev.ri, hiraditya, llvm-commits Differential Revision: https://reviews.llvm.org/D64553 llvm-svn: 366042 --- llvm/lib/Transforms/Utils/LoopUtils.cpp | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 29ae77c385dc2..ec226e65f6502 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -621,20 +621,28 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr, } Optional llvm::getLoopEstimatedTripCount(Loop *L) { - // Only support loops with a unique exiting block, and a latch. - if (!L->getExitingBlock()) - return None; + // Support loops with an exiting latch and other existing exists only + // deoptimize. // Get the branch weights for the loop's backedge. - BranchInst *LatchBR = - dyn_cast(L->getLoopLatch()->getTerminator()); - if (!LatchBR || LatchBR->getNumSuccessors() != 2) + BasicBlock *Latch = L->getLoopLatch(); + if (!Latch) + return None; + BranchInst *LatchBR = dyn_cast(Latch->getTerminator()); + if (!LatchBR || LatchBR->getNumSuccessors() != 2 || !L->isLoopExiting(Latch)) return None; assert((LatchBR->getSuccessor(0) == L->getHeader() || LatchBR->getSuccessor(1) == L->getHeader()) && "At least one edge out of the latch must go to the header"); + SmallVector ExitBlocks; + L->getUniqueNonLatchExitBlocks(ExitBlocks); + if (any_of(ExitBlocks, [](const BasicBlock *EB) { + return !EB->getTerminatingDeoptimizeCall(); + })) + return None; + // To estimate the number of times the loop body was executed, we want to // know the number of times the backedge was taken, vs. the number of times // we exited the loop. From 54869ec907f322e9fed8942636560c27f482caf1 Mon Sep 17 00:00:00 2001 From: Hideto Ueno Date: Mon, 15 Jul 2019 06:49:04 +0000 Subject: [PATCH 079/451] [Attributor] Deduce "nonnull" attribute Summary: Porting nonnull attribute to attributor. Reviewers: jdoerfert, sstefan1 Reviewed By: jdoerfert Subscribers: xbolva00, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D63604 llvm-svn: 366043 --- llvm/include/llvm/Transforms/IPO/Attributor.h | 32 ++ llvm/lib/Transforms/IPO/Attributor.cpp | 284 ++++++++++++++++ llvm/test/Transforms/FunctionAttrs/nonnull.ll | 321 ++++++++++++++---- llvm/test/Transforms/FunctionAttrs/nosync.ll | 2 +- 4 files changed, 573 insertions(+), 66 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 9e8a680d82900..3f6a2b6a24e9d 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -263,6 +263,14 @@ struct Attributor { Function &F, InformationCache &InfoCache, DenseSet *Whitelist = nullptr); + /// Check \p Pred on all function call sites. + /// + /// This method will evaluate \p Pred on call sites and return + /// true if \p Pred holds in every call sites. However, this is only possible + /// all call sites are known, hence the function has internal linkage. + bool checkForAllCallSites(Function &F, std::function &Pred, + bool RequireAllCallSites); + private: /// The set of all abstract attributes. ///{ @@ -708,6 +716,30 @@ struct AANoSync : public AbstractAttribute { virtual bool isKnownNoSync() const = 0; }; +/// An abstract interface for all nonnull attributes. +struct AANonNull : public AbstractAttribute { + + /// See AbstractAttribute::AbstractAttribute(...). + AANonNull(Value &V, InformationCache &InfoCache) + : AbstractAttribute(V, InfoCache) {} + + /// See AbstractAttribute::AbstractAttribute(...). + AANonNull(Value *AssociatedVal, Value &AnchoredValue, + InformationCache &InfoCache) + : AbstractAttribute(AssociatedVal, AnchoredValue, InfoCache) {} + + /// Return true if we assume that the underlying value is nonnull. + virtual bool isAssumedNonNull() const = 0; + + /// Return true if we know that underlying value is nonnull. + virtual bool isKnownNonNull() const = 0; + + /// See AbastractState::getAttrKind(). + Attribute::AttrKind getAttrKind() const override { return ID; } + + /// The identifier used by the Attributor for this class of attributes. + static constexpr Attribute::AttrKind ID = Attribute::NonNull; +}; } // end namespace llvm #endif // LLVM_TRANSFORMS_IPO_FUNCTIONATTRS_H diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 5a72865db9d0f..5d18e40b0b92c 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/InstIterator.h" @@ -51,6 +52,10 @@ STATISTIC(NumFnArgumentReturned, "Number of function arguments marked returned"); STATISTIC(NumFnNoSync, "Number of functions marked nosync"); STATISTIC(NumFnNoFree, "Number of functions marked nofree"); +STATISTIC(NumFnReturnedNonNull, + "Number of function return values marked nonnull"); +STATISTIC(NumFnArgumentNonNull, "Number of function arguments marked nonnull"); +STATISTIC(NumCSArgumentNonNull, "Number of call site arguments marked nonnull"); // TODO: Determine a good default value. // @@ -108,6 +113,21 @@ static void bookkeeping(AbstractAttribute::ManifestPosition MP, case Attribute::NoFree: NumFnNoFree++; break; + case Attribute::NonNull: + switch (MP) { + case AbstractAttribute::MP_RETURNED: + NumFnReturnedNonNull++; + break; + case AbstractAttribute::MP_ARGUMENT: + NumFnArgumentNonNull++; + break; + case AbstractAttribute::MP_CALL_SITE_ARGUMENT: + NumCSArgumentNonNull++; + break; + default: + break; + } + break; default: return; } @@ -970,10 +990,252 @@ ChangeStatus AANoFreeFunction::updateImpl(Attributor &A) { return ChangeStatus::UNCHANGED; } +/// ------------------------ NonNull Argument Attribute ------------------------ +struct AANonNullImpl : AANonNull, BooleanState { + + AANonNullImpl(Value &V, InformationCache &InfoCache) + : AANonNull(V, InfoCache) {} + + AANonNullImpl(Value *AssociatedVal, Value &AnchoredValue, + InformationCache &InfoCache) + : AANonNull(AssociatedVal, AnchoredValue, InfoCache) {} + + /// See AbstractAttribute::getState() + /// { + AbstractState &getState() override { return *this; } + const AbstractState &getState() const override { return *this; } + /// } + + /// See AbstractAttribute::getAsStr(). + const std::string getAsStr() const override { + return getAssumed() ? "nonnull" : "may-null"; + } + + /// See AANonNull::isAssumedNonNull(). + bool isAssumedNonNull() const override { return getAssumed(); } + + /// See AANonNull::isKnownNonNull(). + bool isKnownNonNull() const override { return getKnown(); } + + /// Generate a predicate that checks if a given value is assumed nonnull. + /// The generated function returns true if a value satisfies any of + /// following conditions. + /// (i) A value is known nonZero(=nonnull). + /// (ii) A value is associated with AANonNull and its isAssumedNonNull() is + /// true. + std::function generatePredicate(Attributor &); +}; + +std::function AANonNullImpl::generatePredicate(Attributor &A) { + // FIXME: The `AAReturnedValues` should provide the predicate with the + // `ReturnInst` vector as well such that we can use the control flow sensitive + // version of `isKnownNonZero`. This should fix `test11` in + // `test/Transforms/FunctionAttrs/nonnull.ll` + + std::function Pred = [&](Value &RV) -> bool { + if (isKnownNonZero(&RV, getAnchorScope().getParent()->getDataLayout())) + return true; + + auto *NonNullAA = A.getAAFor(*this, RV); + + ImmutableCallSite ICS(&RV); + + if ((!NonNullAA || !NonNullAA->isAssumedNonNull()) && + (!ICS || !ICS.hasRetAttr(Attribute::NonNull))) + return false; + + return true; + }; + + return Pred; +} + +/// NonNull attribute for function return value. +struct AANonNullReturned : AANonNullImpl { + + AANonNullReturned(Function &F, InformationCache &InfoCache) + : AANonNullImpl(F, InfoCache) {} + + /// See AbstractAttribute::getManifestPosition(). + ManifestPosition getManifestPosition() const override { return MP_RETURNED; } + + /// See AbstractAttriubute::initialize(...). + void initialize(Attributor &A) override { + Function &F = getAnchorScope(); + + // Already nonnull. + if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex, + Attribute::NonNull)) + indicateOptimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override; +}; + +ChangeStatus AANonNullReturned::updateImpl(Attributor &A) { + Function &F = getAnchorScope(); + + auto *AARetVal = A.getAAFor(*this, F); + if (!AARetVal) { + indicatePessimisticFixpoint(); + return ChangeStatus::CHANGED; + } + + std::function Pred = this->generatePredicate(A); + if (!AARetVal->checkForallReturnedValues(Pred)) { + indicatePessimisticFixpoint(); + return ChangeStatus::CHANGED; + } + return ChangeStatus::UNCHANGED; +} + +/// NonNull attribute for function argument. +struct AANonNullArgument : AANonNullImpl { + + AANonNullArgument(Argument &A, InformationCache &InfoCache) + : AANonNullImpl(A, InfoCache) {} + + /// See AbstractAttribute::getManifestPosition(). + ManifestPosition getManifestPosition() const override { return MP_ARGUMENT; } + + /// See AbstractAttriubute::initialize(...). + void initialize(Attributor &A) override { + Argument *Arg = cast(getAssociatedValue()); + if (Arg->hasNonNullAttr()) + indicateOptimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override; +}; + +/// NonNull attribute for a call site argument. +struct AANonNullCallSiteArgument : AANonNullImpl { + + /// See AANonNullImpl::AANonNullImpl(...). + AANonNullCallSiteArgument(CallSite CS, unsigned ArgNo, + InformationCache &InfoCache) + : AANonNullImpl(CS.getArgOperand(ArgNo), *CS.getInstruction(), InfoCache), + ArgNo(ArgNo) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + CallSite CS(&getAnchoredValue()); + if (isKnownNonZero(getAssociatedValue(), + getAnchorScope().getParent()->getDataLayout()) || + CS.paramHasAttr(ArgNo, getAttrKind())) + indicateOptimisticFixpoint(); + } + + /// See AbstractAttribute::updateImpl(Attributor &A). + ChangeStatus updateImpl(Attributor &A) override; + + /// See AbstractAttribute::getManifestPosition(). + ManifestPosition getManifestPosition() const override { + return MP_CALL_SITE_ARGUMENT; + }; + + // Return argument index of associated value. + int getArgNo() const { return ArgNo; } + +private: + unsigned ArgNo; +}; +ChangeStatus AANonNullArgument::updateImpl(Attributor &A) { + Function &F = getAnchorScope(); + Argument &Arg = cast(getAnchoredValue()); + + unsigned ArgNo = Arg.getArgNo(); + + // Callback function + std::function CallSiteCheck = [&](CallSite CS) { + assert(CS && "Sanity check: Call site was not initialized properly!"); + + auto *NonNullAA = A.getAAFor(*this, *CS.getInstruction(), ArgNo); + + // Check that NonNullAA is AANonNullCallSiteArgument. + if (NonNullAA) { + ImmutableCallSite ICS(&NonNullAA->getAnchoredValue()); + if (ICS && CS.getInstruction() == ICS.getInstruction()) + return NonNullAA->isAssumedNonNull(); + return false; + } + + if (CS.paramHasAttr(ArgNo, Attribute::NonNull)) + return true; + + Value *V = CS.getArgOperand(ArgNo); + if (isKnownNonZero(V, getAnchorScope().getParent()->getDataLayout())) + return true; + + return false; + }; + if (!A.checkForAllCallSites(F, CallSiteCheck, true)) { + indicatePessimisticFixpoint(); + return ChangeStatus::CHANGED; + } + return ChangeStatus::UNCHANGED; +} + +ChangeStatus AANonNullCallSiteArgument::updateImpl(Attributor &A) { + // NOTE: Never look at the argument of the callee in this method. + // If we do this, "nonnull" is always deduced because of the assumption. + + Value &V = *getAssociatedValue(); + + auto *NonNullAA = A.getAAFor(*this, V); + + if (!NonNullAA || !NonNullAA->isAssumedNonNull()) { + indicatePessimisticFixpoint(); + return ChangeStatus::CHANGED; + } + + return ChangeStatus::UNCHANGED; +} + /// ---------------------------------------------------------------------------- /// Attributor /// ---------------------------------------------------------------------------- +bool Attributor::checkForAllCallSites(Function &F, + std::function &Pred, + bool RequireAllCallSites) { + // We can try to determine information from + // the call sites. However, this is only possible all call sites are known, + // hence the function has internal linkage. + if (RequireAllCallSites && !F.hasInternalLinkage()) { + LLVM_DEBUG( + dbgs() + << "Attributor: Function " << F.getName() + << " has no internal linkage, hence not all call sites are known\n"); + return false; + } + + for (const Use &U : F.uses()) { + + CallSite CS(U.getUser()); + dbgs() << *CS.getInstruction() << "\n"; + if (!CS || !CS.isCallee(&U) || !CS.getCaller()->hasExactDefinition()) { + if (!RequireAllCallSites) + continue; + + LLVM_DEBUG(dbgs() << "Attributor: User " << *U.getUser() + << " is an invalid use of " << F.getName() << "\n"); + return false; + } + + if (Pred(CS)) + continue; + + LLVM_DEBUG(dbgs() << "Attributor: Call site callback failed for " + << *CS.getInstruction() << "\n"); + return false; + } + + return true; +} + ChangeStatus Attributor::run() { // Initialize all abstract attributes. for (AbstractAttribute *AA : AllAbstractAttributes) @@ -1128,6 +1390,17 @@ void Attributor::identifyDefaultAbstractAttributes( // though it is an argument attribute. if (!Whitelist || Whitelist->count(AAReturnedValues::ID)) registerAA(*new AAReturnedValuesImpl(F, InfoCache)); + + // Every function with pointer return type might be marked nonnull. + if (ReturnType->isPointerTy() && + (!Whitelist || Whitelist->count(AANonNullReturned::ID))) + registerAA(*new AANonNullReturned(F, InfoCache)); + } + + // Every argument with pointer type might be marked nonnull. + for (Argument &Arg : F.args()) { + if (Arg.getType()->isPointerTy()) + registerAA(*new AANonNullArgument(Arg, InfoCache)); } // Walk all instructions to find more attribute opportunities and also @@ -1163,6 +1436,17 @@ void Attributor::identifyDefaultAbstractAttributes( InstOpcodeMap[I.getOpcode()].push_back(&I); if (I.mayReadOrWriteMemory()) ReadOrWriteInsts.push_back(&I); + + CallSite CS(&I); + if (CS && CS.getCalledFunction()) { + for (int i = 0, e = CS.getCalledFunction()->arg_size(); i < e; i++) { + if (!CS.getArgument(i)->getType()->isPointerTy()) + continue; + + // Call site argument attribute "non-null". + registerAA(*new AANonNullCallSiteArgument(CS, i, InfoCache), i); + } + } } } diff --git a/llvm/test/Transforms/FunctionAttrs/nonnull.ll b/llvm/test/Transforms/FunctionAttrs/nonnull.ll index 6c04e4907d8e8..c37defc205aaa 100644 --- a/llvm/test/Transforms/FunctionAttrs/nonnull.ll +++ b/llvm/test/Transforms/FunctionAttrs/nonnull.ll @@ -1,31 +1,34 @@ -; RUN: opt -S -functionattrs -enable-nonnull-arg-prop %s | FileCheck %s -; RUN: opt -S -passes=function-attrs -enable-nonnull-arg-prop %s | FileCheck %s +; RUN: opt -S -functionattrs -enable-nonnull-arg-prop %s | FileCheck %s --check-prefixes=BOTH,FNATTR +; RUN: opt -S -passes=function-attrs -enable-nonnull-arg-prop %s | FileCheck %s --check-prefixes=BOTH,FNATTR +; RUN: opt -attributor --attributor-disable=false -S < %s | FileCheck %s --check-prefixes=BOTH,ATTRIBUTOR + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" declare nonnull i8* @ret_nonnull() ; Return a pointer trivially nonnull (call return attribute) define i8* @test1() { -; CHECK: define nonnull i8* @test1 +; BOTH: define nonnull i8* @test1 %ret = call i8* @ret_nonnull() ret i8* %ret } ; Return a pointer trivially nonnull (argument attribute) define i8* @test2(i8* nonnull %p) { -; CHECK: define nonnull i8* @test2 +; BOTH: define nonnull i8* @test2 ret i8* %p } ; Given an SCC where one of the functions can not be marked nonnull, ; can we still mark the other one which is trivially nonnull define i8* @scc_binder() { -; CHECK: define i8* @scc_binder +; BOTH: define i8* @scc_binder call i8* @test3() ret i8* null } define i8* @test3() { -; CHECK: define nonnull i8* @test3 +; BOTH: define nonnull i8* @test3 call i8* @scc_binder() %ret = call i8* @ret_nonnull() ret i8* %ret @@ -35,13 +38,15 @@ define i8* @test3() { ; nonnull if neither can ever return null. (In this case, they ; just never return period.) define i8* @test4_helper() { -; CHECK: define noalias nonnull i8* @test4_helper +; FNATTR: define noalias nonnull i8* @test4_helper +; ATTRIBUTOR: define nonnull i8* @test4_helper %ret = call i8* @test4() ret i8* %ret } define i8* @test4() { -; CHECK: define noalias nonnull i8* @test4 +; FNATTR: define noalias nonnull i8* @test4 +; ATTRIBUTOR: define nonnull i8* @test4 %ret = call i8* @test4_helper() ret i8* %ret } @@ -49,13 +54,15 @@ define i8* @test4() { ; Given a mutual recursive set of functions which *can* return null ; make sure we haven't marked them as nonnull. define i8* @test5_helper() { -; CHECK: define noalias i8* @test5_helper +; FNATTR: define noalias i8* @test5_helper +; ATTRIBUTOR: define i8* @test5_helper %ret = call i8* @test5() ret i8* null } define i8* @test5() { -; CHECK: define noalias i8* @test5 +; FNATTR: define noalias i8* @test5 +; ATTRIBUTOR: define i8* @test5 %ret = call i8* @test5_helper() ret i8* %ret } @@ -63,7 +70,7 @@ define i8* @test5() { ; Local analysis, but going through a self recursive phi define i8* @test6() { entry: -; CHECK: define nonnull i8* @test6 +; BOTH: define nonnull i8* @test6 %ret = call i8* @ret_nonnull() br label %loop loop: @@ -73,6 +80,144 @@ exit: ret i8* %phi } +; BOTH: define i8* @test7 +define i8* @test7(i8* %a) { + %b = getelementptr inbounds i8, i8* %a, i64 0 + ret i8* %b +} + +; BOTH: define nonnull i8* @test8 +define i8* @test8(i8* %a) { + %b = getelementptr inbounds i8, i8* %a, i64 1 + ret i8* %b +} + +; BOTH: define i8* @test9 +define i8* @test9(i8* %a, i64 %n) { + %b = getelementptr inbounds i8, i8* %a, i64 %n + ret i8* %b +} + +declare void @llvm.assume(i1) +; FNATTR: define i8* @test10 +; FIXME: missing nonnull +; ATTRIBUTOR: define i8* @test10 +define i8* @test10(i8* %a, i64 %n) { + %cmp = icmp ne i64 %n, 0 + call void @llvm.assume(i1 %cmp) + %b = getelementptr inbounds i8, i8* %a, i64 %n + ret i8* %b +} + +; TEST 11 +; char* test11(char *p) { +; return p? p: nonnull(); +; } +; FNATTR: define i8* @test11 +; FIXME: missing nonnull +; ATTRIBUTOR: define i8* @test11 +define i8* @test11(i8*) local_unnamed_addr { + %2 = icmp eq i8* %0, null + br i1 %2, label %3, label %5 + +;

    ()); + // expected-error@-1{{no viable constructor or deduction guide for deduction of template arguments of 'unordered_map'}} + } + { + // cannot deduce Key from just (Allocator) + std::unordered_map m(std::allocator

    {}); + // expected-error@-1{{no viable constructor or deduction guide for deduction of template arguments of 'unordered_map'}} + } + { + // cannot deduce Key from just (Size, Allocator) + std::unordered_map m(42, std::allocator

    ()); + // expected-error@-1{{no viable constructor or deduction guide for deduction of template arguments of 'unordered_map'}} + } + { + // cannot deduce Key from just (Size, Hash, Allocator) + std::unordered_map m(42, std::hash(), std::allocator

    ()); + // expected-error@-1{{no viable constructor or deduction guide for deduction of template arguments of 'unordered_map'}} + } + + return 0; +} diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/deduct.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/deduct.pass.cpp new file mode 100644 index 0000000000000..0923597dcc994 --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/deduct.pass.cpp @@ -0,0 +1,204 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// +// UNSUPPORTED: c++98, c++03, c++11, c++14 +// UNSUPPORTED: libcpp-no-deduction-guides + +// template>, +// class Pred = equal_to>, +// class Allocator = allocator>> +// unordered_map(InputIterator, InputIterator, typename see below::size_type = see below, +// Hash = Hash(), Pred = Pred(), Allocator = Allocator()) +// -> unordered_map, iter-mapped-type, Hash, Pred, +// Allocator>; +// +// template, +// class Pred = equal_to, class Allocator = allocator>> +// unordered_map(initializer_list>, +// typename see below::size_type = see below, Hash = Hash(), +// Pred = Pred(), Allocator = Allocator()) +// -> unordered_map; +// +// template +// unordered_map(InputIterator, InputIterator, typename see below::size_type, Allocator) +// -> unordered_map, iter-mapped-type, +// hash>, +// equal_to>, Allocator>; +// +// template +// unordered_map(InputIterator, InputIterator, Allocator) +// -> unordered_map, iter-mapped-type, +// hash>, +// equal_to>, Allocator>; +// +// template +// unordered_map(InputIterator, InputIterator, typename see below::size_type, Hash, Allocator) +// -> unordered_map, iter-mapped-type, Hash, +// equal_to>, Allocator>; +// +// template +// unordered_map(initializer_list>, typename see below::size_type, Allocator) +// -> unordered_map, equal_to, Allocator>; +// +// template +// unordered_map(initializer_list>, Allocator) +// -> unordered_map, equal_to, Allocator>; +// +// template +// unordered_map(initializer_list>, typename see below::size_type, Hash, +// Allocator) +// -> unordered_map, Allocator>; + +#include // is_permutation +#include +#include // INT_MAX +#include +#include + +#include "test_allocator.h" + +using P = std::pair; +using PC = std::pair; + +int main(int, char**) +{ + const PC expected_m[] = { {1,1}, {2,2}, {3,1}, {INT_MAX,1} }; + + { + const P arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} }; + std::unordered_map m(std::begin(arr), std::end(arr)); + ASSERT_SAME_TYPE(decltype(m), std::unordered_map); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + } + + { + const P arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} }; + std::unordered_map m(std::begin(arr), std::end(arr), 42); + ASSERT_SAME_TYPE(decltype(m), std::unordered_map); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + } + + { + const P arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} }; + std::unordered_map m(std::begin(arr), std::end(arr), 42, std::hash()); + ASSERT_SAME_TYPE(decltype(m), std::unordered_map, std::equal_to>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + } + + { + const P arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} }; + std::unordered_map m(std::begin(arr), std::end(arr), 42, std::hash(), std::equal_to<>()); + ASSERT_SAME_TYPE(decltype(m), std::unordered_map, std::equal_to<>>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + } + + { + const P arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} }; + std::unordered_map m(std::begin(arr), std::end(arr), 42, std::hash(), std::equal_to<>(), test_allocator(0, 41)); + ASSERT_SAME_TYPE(decltype(m), std::unordered_map, std::equal_to<>, test_allocator>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + assert(m.get_allocator().get_id() == 41); + } + + { + std::unordered_map source; + std::unordered_map m(source); + ASSERT_SAME_TYPE(decltype(m), decltype(source)); + assert(m.size() == 0); + } + + { + std::unordered_map source; + std::unordered_map m{source}; // braces instead of parens + ASSERT_SAME_TYPE(decltype(m), decltype(source)); + assert(m.size() == 0); + } + + { + std::unordered_map, std::equal_to<>, test_allocator> source; + test_allocator a(0, 42); + std::unordered_map m(source, a); + ASSERT_SAME_TYPE(decltype(m), decltype(source)); + assert(m.get_allocator().get_id() == 42); + assert(m.size() == 0); + } + + { + std::unordered_map, std::equal_to<>, test_allocator> source; + test_allocator a(0, 43); + std::unordered_map m{source, a}; // braces instead of parens + ASSERT_SAME_TYPE(decltype(m), decltype(source)); + assert(m.get_allocator().get_id() == 43); + assert(m.size() == 0); + } + + { + std::unordered_map m { P{1,1L}, P{2,2L}, P{1,1L}, P{INT_MAX,1L}, P{3,1L} }; + ASSERT_SAME_TYPE(decltype(m), std::unordered_map); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + } + + { + std::unordered_map m({ P{1,1L}, P{2,2L}, P{1,1L}, P{INT_MAX,1L}, P{3,1L} }, 42); + ASSERT_SAME_TYPE(decltype(m), std::unordered_map); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + } + + { + std::unordered_map m({ P{1,1L}, P{2,2L}, P{1,1L}, P{INT_MAX,1L}, P{3,1L} }, 42, std::hash()); + ASSERT_SAME_TYPE(decltype(m), std::unordered_map>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + } + + { + std::unordered_map m({ P{1,1L}, P{2,2L}, P{1,1L}, P{INT_MAX,1L}, P{3,1L} }, 42, std::hash(), std::equal_to<>()); + ASSERT_SAME_TYPE(decltype(m), std::unordered_map, std::equal_to<>>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + } + + { + std::unordered_map m({ P{1,1L}, P{2,2L}, P{1,1L}, P{INT_MAX,1L}, P{3,1L} }, 42, std::hash(), std::equal_to<>(), test_allocator(0, 44)); + ASSERT_SAME_TYPE(decltype(m), std::unordered_map, std::equal_to<>, test_allocator>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + assert(m.get_allocator().get_id() == 44); + } + + { + const P arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} }; + std::unordered_map m(std::begin(arr), std::end(arr), 42, test_allocator(0, 45)); + ASSERT_SAME_TYPE(decltype(m), std::unordered_map, std::equal_to, test_allocator>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + assert(m.get_allocator().get_id() == 45); + } + + { + const P arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} }; + std::unordered_map m(std::begin(arr), std::end(arr), 42, std::hash(), test_allocator(0, 46)); + ASSERT_SAME_TYPE(decltype(m), std::unordered_map, std::equal_to, test_allocator>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + assert(m.get_allocator().get_id() == 46); + } + + { + std::unordered_map m({ P{1,1L}, P{2,2L}, P{1,1L}, P{INT_MAX,1L}, P{3,1L} }, 42, test_allocator(0, 47)); + ASSERT_SAME_TYPE(decltype(m), std::unordered_map, std::equal_to, test_allocator>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + assert(m.get_allocator().get_id() == 47); + } + + { + std::unordered_map m({ P{1,1L}, P{2,2L}, P{1,1L}, P{INT_MAX,1L}, P{3,1L} }, 42, std::hash(), test_allocator(0, 48)); + ASSERT_SAME_TYPE(decltype(m), std::unordered_map, std::equal_to, test_allocator>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + assert(m.get_allocator().get_id() == 48); + } + + return 0; +} diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/deduct_const.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/deduct_const.pass.cpp new file mode 100644 index 0000000000000..1fb4d674d5fb0 --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/deduct_const.pass.cpp @@ -0,0 +1,172 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// +// UNSUPPORTED: c++98, c++03, c++11, c++14 +// UNSUPPORTED: libcpp-no-deduction-guides + +// template>, +// class Pred = equal_to>, +// class Allocator = allocator>> +// unordered_map(InputIterator, InputIterator, typename see below::size_type = see below, +// Hash = Hash(), Pred = Pred(), Allocator = Allocator()) +// -> unordered_map, iter-mapped-type, Hash, Pred, +// Allocator>; +// +// template, +// class Pred = equal_to, class Allocator = allocator>> +// unordered_map(initializer_list>, +// typename see below::size_type = see below, Hash = Hash(), +// Pred = Pred(), Allocator = Allocator()) +// -> unordered_map; +// +// template +// unordered_map(InputIterator, InputIterator, typename see below::size_type, Allocator) +// -> unordered_map, iter-mapped-type, +// hash>, +// equal_to>, Allocator>; +// +// template +// unordered_map(InputIterator, InputIterator, Allocator) +// -> unordered_map, iter-mapped-type, +// hash>, +// equal_to>, Allocator>; +// +// template +// unordered_map(InputIterator, InputIterator, typename see below::size_type, Hash, Allocator) +// -> unordered_map, iter-mapped-type, Hash, +// equal_to>, Allocator>; +// +// template +// unordered_map(initializer_list>, typename see below::size_type, Allocator) +// -> unordered_map, equal_to, Allocator>; +// +// template +// unordered_map(initializer_list>, Allocator) +// -> unordered_map, equal_to, Allocator>; +// +// template +// unordered_map(initializer_list>, typename see below::size_type, Hash, +// Allocator) +// -> unordered_map, Allocator>; + +#include // std::is_permutation +#include +#include // INT_MAX +#include +#include + +#include "test_allocator.h" + +using P = std::pair; +using PC = std::pair; + +int main(int, char**) +{ + const PC expected_m[] = { {1,1L}, {2,2L}, {3,1L}, {INT_MAX,1L} }; + + { + const PC arr[] = { {1,1L}, {2,2L}, {1,1L}, {INT_MAX,1L}, {3,1L} }; + std::unordered_map m(std::begin(arr), std::end(arr)); + ASSERT_SAME_TYPE(decltype(m), std::unordered_map); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + } + + { + const PC arr[] = { {1,1L}, {2,2L}, {1,1L}, {INT_MAX,1L}, {3,1L} }; + std::unordered_map m(std::begin(arr), std::end(arr), 42); + ASSERT_SAME_TYPE(decltype(m), std::unordered_map); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + } + + { + const PC arr[] = { {1,1L}, {2,2L}, {1,1L}, {INT_MAX,1L}, {3,1L} }; + std::unordered_map m(std::begin(arr), std::end(arr), 42, std::hash()); + ASSERT_SAME_TYPE(decltype(m), std::unordered_map, std::equal_to>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + } + + { + const PC arr[] = { {1,1L}, {2,2L}, {1,1L}, {INT_MAX,1L}, {3,1L} }; + std::unordered_map m(std::begin(arr), std::end(arr), 42, std::hash(), std::equal_to<>()); + ASSERT_SAME_TYPE(decltype(m), std::unordered_map, std::equal_to<>>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + } + + { + const PC arr[] = { {1,1L}, {2,2L}, {1,1L}, {INT_MAX,1L}, {3,1L} }; + std::unordered_map m(std::begin(arr), std::end(arr), 42, std::hash(), std::equal_to<>(), test_allocator(0, 41)); + ASSERT_SAME_TYPE(decltype(m), std::unordered_map, std::equal_to<>, test_allocator>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + assert(m.get_allocator().get_id() == 41); + } + + { + std::unordered_map m { PC{1,1L}, PC{2,2L}, PC{1,1L}, PC{INT_MAX,1L}, PC{3,1L} }; + ASSERT_SAME_TYPE(decltype(m), std::unordered_map); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + } + + { + std::unordered_map m({ PC{1,1L}, PC{2,2L}, PC{1,1L}, PC{INT_MAX,1L}, PC{3,1L} }, 42); + ASSERT_SAME_TYPE(decltype(m), std::unordered_map); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + } + + { + std::unordered_map m({ PC{1,1L}, PC{2,2L}, PC{1,1L}, PC{INT_MAX,1L}, PC{3,1L} }, 42, std::hash()); + ASSERT_SAME_TYPE(decltype(m), std::unordered_map>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + } + + { + std::unordered_map m({ PC{1,1L}, PC{2,2L}, PC{1,1L}, PC{INT_MAX,1L}, PC{3,1L} }, 42, std::hash(), std::equal_to<>()); + ASSERT_SAME_TYPE(decltype(m), std::unordered_map, std::equal_to<>>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + } + + { + std::unordered_map m({ PC{1,1L}, PC{2,2L}, PC{1,1L}, PC{INT_MAX,1L}, PC{3,1L} }, 42, std::hash(), std::equal_to<>(), test_allocator(0, 44)); + ASSERT_SAME_TYPE(decltype(m), std::unordered_map, std::equal_to<>, test_allocator>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + assert(m.get_allocator().get_id() == 44); + } + + { + const PC arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} }; + std::unordered_map m(std::begin(arr), std::end(arr), 42, test_allocator(0, 45)); + ASSERT_SAME_TYPE(decltype(m), std::unordered_map, std::equal_to, test_allocator>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + assert(m.get_allocator().get_id() == 45); + } + + { + const PC arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} }; + std::unordered_map m(std::begin(arr), std::end(arr), 42, std::hash(), test_allocator(0, 46)); + ASSERT_SAME_TYPE(decltype(m), std::unordered_map, std::equal_to, test_allocator>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + assert(m.get_allocator().get_id() == 46); + } + + { + std::unordered_map m({ PC{1,1L}, PC{2,2L}, PC{1,1L}, PC{INT_MAX,1L}, PC{3,1L} }, 42, test_allocator(0, 47)); + ASSERT_SAME_TYPE(decltype(m), std::unordered_map, std::equal_to, test_allocator>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + assert(m.get_allocator().get_id() == 47); + } + + { + std::unordered_map m({ PC{1,1L}, PC{2,2L}, PC{1,1L}, PC{INT_MAX,1L}, PC{3,1L} }, 42, std::hash(), test_allocator(0, 48)); + ASSERT_SAME_TYPE(decltype(m), std::unordered_map, std::equal_to, test_allocator>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + assert(m.get_allocator().get_id() == 48); + } + + return 0; +} diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/deduct.fail.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/deduct.fail.cpp new file mode 100644 index 0000000000000..7f170472d7a52 --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/deduct.fail.cpp @@ -0,0 +1,106 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// +// UNSUPPORTED: c++98, c++03, c++11, c++14 +// UNSUPPORTED: libcpp-no-deduction-guides +// XFAIL: clang-6, apple-clang-9.0, apple-clang-9.1, apple-clang-10.0 + +// template>, +// class Pred = equal_to>, +// class Allocator = allocator>> +// unordered_multimap(InputIterator, InputIterator, typename see below::size_type = see below, +// Hash = Hash(), Pred = Pred(), Allocator = Allocator()) +// -> unordered_multimap, iter-mapped-type, Hash, Pred, +// Allocator>; +// +// template, +// class Pred = equal_to, class Allocator = allocator>> +// unordered_multimap(initializer_list>, +// typename see below::size_type = see below, Hash = Hash(), +// Pred = Pred(), Allocator = Allocator()) +// -> unordered_multimap; +// +// template +// unordered_multimap(InputIterator, InputIterator, typename see below::size_type, Allocator) +// -> unordered_multimap, iter-mapped-type, +// hash>, +// equal_to>, Allocator>; +// +// template +// unordered_multimap(InputIterator, InputIterator, Allocator) +// -> unordered_multimap, iter-mapped-type, +// hash>, +// equal_to>, Allocator>; +// +// template +// unordered_multimap(InputIterator, InputIterator, typename see below::size_type, Hash, Allocator) +// -> unordered_multimap, iter-mapped-type, Hash, +// equal_to>, Allocator>; +// +// template +// unordered_multimap(initializer_list>, typename see below::size_type, Allocator) +// -> unordered_multimap, equal_to, Allocator>; +// +// template +// unordered_multimap(initializer_list>, Allocator) +// -> unordered_multimap, equal_to, Allocator>; +// +// template +// unordered_multimap(initializer_list>, typename see below::size_type, Hash, +// Allocator) +// -> unordered_multimap, Allocator>; + +#include +#include + +int main(int, char**) +{ + using P = std::pair; + { + // cannot deduce Key from nothing + std::unordered_multimap m; // expected-error{{no viable constructor or deduction guide for deduction of template arguments of 'unordered_multimap'}} + } + { + // cannot deduce Key from just (Size) + std::unordered_multimap m(42); // expected-error{{no viable constructor or deduction guide for deduction of template arguments of 'unordered_multimap'}} + } + { + // cannot deduce Key from just (Size, Hash) + std::unordered_multimap m(42, std::hash()); + // expected-error@-1{{no viable constructor or deduction guide for deduction of template arguments of 'unordered_multimap'}} + } + { + // cannot deduce Key from just (Size, Hash, Pred) + std::unordered_multimap m(42, std::hash(), std::equal_to()); + // expected-error@-1{{no viable constructor or deduction guide for deduction of template arguments of 'unordered_multimap'}} + } + { + // cannot deduce Key from just (Size, Hash, Pred, Allocator) + std::unordered_multimap m(42, std::hash(), std::equal_to(), std::allocator

    ()); + // expected-error@-1{{no viable constructor or deduction guide for deduction of template arguments of 'unordered_multimap'}} + } + { + // cannot deduce Key from just (Allocator) + std::unordered_multimap m(std::allocator

    {}); + // expected-error@-1{{no viable constructor or deduction guide for deduction of template arguments of 'unordered_multimap'}} + } + { + // cannot deduce Key from just (Size, Allocator) + std::unordered_multimap m(42, std::allocator

    ()); + // expected-error@-1{{no viable constructor or deduction guide for deduction of template arguments of 'unordered_multimap'}} + } + { + // cannot deduce Key from just (Size, Hash, Allocator) + std::unordered_multimap m(42, std::hash(), std::allocator

    ()); + // expected-error@-1{{no viable constructor or deduction guide for deduction of template arguments of 'unordered_multimap'}} + } + + return 0; +} diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/deduct.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/deduct.pass.cpp new file mode 100644 index 0000000000000..f620f1e37112c --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/deduct.pass.cpp @@ -0,0 +1,204 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// +// UNSUPPORTED: c++98, c++03, c++11, c++14 +// UNSUPPORTED: libcpp-no-deduction-guides + +// template>, +// class Pred = equal_to>, +// class Allocator = allocator>> +// unordered_multimap(InputIterator, InputIterator, typename see below::size_type = see below, +// Hash = Hash(), Pred = Pred(), Allocator = Allocator()) +// -> unordered_multimap, iter-mapped-type, Hash, Pred, +// Allocator>; +// +// template, +// class Pred = equal_to, class Allocator = allocator>> +// unordered_multimap(initializer_list>, +// typename see below::size_type = see below, Hash = Hash(), +// Pred = Pred(), Allocator = Allocator()) +// -> unordered_multimap; +// +// template +// unordered_multimap(InputIterator, InputIterator, typename see below::size_type, Allocator) +// -> unordered_multimap, iter-mapped-type, +// hash>, +// equal_to>, Allocator>; +// +// template +// unordered_multimap(InputIterator, InputIterator, Allocator) +// -> unordered_multimap, iter-mapped-type, +// hash>, +// equal_to>, Allocator>; +// +// template +// unordered_multimap(InputIterator, InputIterator, typename see below::size_type, Hash, Allocator) +// -> unordered_multimap, iter-mapped-type, Hash, +// equal_to>, Allocator>; +// +// template +// unordered_multimap(initializer_list>, typename see below::size_type, Allocator) +// -> unordered_multimap, equal_to, Allocator>; +// +// template +// unordered_multimap(initializer_list>, Allocator) +// -> unordered_multimap, equal_to, Allocator>; +// +// template +// unordered_multimap(initializer_list>, typename see below::size_type, Hash, +// Allocator) +// -> unordered_multimap, Allocator>; + +#include // is_permutation +#include +#include // INT_MAX +#include +#include + +#include "test_allocator.h" + +using P = std::pair; +using PC = std::pair; + +int main(int, char**) +{ + const PC expected_m[] = { {1,1}, {1,1}, {2,2}, {3,1}, {INT_MAX,1} }; + + { + const P arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} }; + std::unordered_multimap m(std::begin(arr), std::end(arr)); + ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + } + + { + const P arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} }; + std::unordered_multimap m(std::begin(arr), std::end(arr), 42); + ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + } + + { + const P arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} }; + std::unordered_multimap m(std::begin(arr), std::end(arr), 42, std::hash()); + ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap, std::equal_to>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + } + + { + const P arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} }; + std::unordered_multimap m(std::begin(arr), std::end(arr), 42, std::hash(), std::equal_to<>()); + ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap, std::equal_to<>>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + } + + { + const P arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} }; + std::unordered_multimap m(std::begin(arr), std::end(arr), 42, std::hash(), std::equal_to<>(), test_allocator(0, 41)); + ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap, std::equal_to<>, test_allocator>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + assert(m.get_allocator().get_id() == 41); + } + + { + std::unordered_multimap source; + std::unordered_multimap m(source); + ASSERT_SAME_TYPE(decltype(m), decltype(source)); + assert(m.size() == 0); + } + + { + std::unordered_multimap source; + std::unordered_multimap m{source}; // braces instead of parens + ASSERT_SAME_TYPE(decltype(m), decltype(source)); + assert(m.size() == 0); + } + + { + std::unordered_multimap, std::equal_to<>, test_allocator> source; + test_allocator a(0, 42); + std::unordered_multimap m(source, a); + ASSERT_SAME_TYPE(decltype(m), decltype(source)); + assert(m.get_allocator().get_id() == 42); + assert(m.size() == 0); + } + + { + std::unordered_multimap, std::equal_to<>, test_allocator> source; + test_allocator a(0, 43); + std::unordered_multimap m{source, a}; // braces instead of parens + ASSERT_SAME_TYPE(decltype(m), decltype(source)); + assert(m.get_allocator().get_id() == 43); + assert(m.size() == 0); + } + + { + std::unordered_multimap m { P{1,1L}, P{2,2L}, P{1,1L}, P{INT_MAX,1L}, P{3,1L} }; + ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + } + + { + std::unordered_multimap m({ P{1,1L}, P{2,2L}, P{1,1L}, P{INT_MAX,1L}, P{3,1L} }, 42); + ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + } + + { + std::unordered_multimap m({ P{1,1L}, P{2,2L}, P{1,1L}, P{INT_MAX,1L}, P{3,1L} }, 42, std::hash()); + ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + } + + { + std::unordered_multimap m({ P{1,1L}, P{2,2L}, P{1,1L}, P{INT_MAX,1L}, P{3,1L} }, 42, std::hash(), std::equal_to<>()); + ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap, std::equal_to<>>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + } + + { + std::unordered_multimap m({ P{1,1L}, P{2,2L}, P{1,1L}, P{INT_MAX,1L}, P{3,1L} }, 42, std::hash(), std::equal_to<>(), test_allocator(0, 44)); + ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap, std::equal_to<>, test_allocator>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + assert(m.get_allocator().get_id() == 44); + } + + { + const P arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} }; + std::unordered_multimap m(std::begin(arr), std::end(arr), 42, test_allocator(0, 45)); + ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap, std::equal_to, test_allocator>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + assert(m.get_allocator().get_id() == 45); + } + + { + const P arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} }; + std::unordered_multimap m(std::begin(arr), std::end(arr), 42, std::hash(), test_allocator(0, 46)); + ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap, std::equal_to, test_allocator>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + assert(m.get_allocator().get_id() == 46); + } + + { + std::unordered_multimap m({ P{1,1L}, P{2,2L}, P{1,1L}, P{INT_MAX,1L}, P{3,1L} }, 42, test_allocator(0, 47)); + ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap, std::equal_to, test_allocator>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + assert(m.get_allocator().get_id() == 47); + } + + { + std::unordered_multimap m({ P{1,1L}, P{2,2L}, P{1,1L}, P{INT_MAX,1L}, P{3,1L} }, 42, std::hash(), test_allocator(0, 48)); + ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap, std::equal_to, test_allocator>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + assert(m.get_allocator().get_id() == 48); + } + + return 0; +} diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/deduct_const.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/deduct_const.pass.cpp new file mode 100644 index 0000000000000..8a4a383641173 --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/deduct_const.pass.cpp @@ -0,0 +1,173 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// +// UNSUPPORTED: c++98, c++03, c++11, c++14 +// UNSUPPORTED: libcpp-no-deduction-guides + +// template>, +// class Pred = equal_to>, +// class Allocator = allocator>> +// unordered_multimap(InputIterator, InputIterator, typename see below::size_type = see below, +// Hash = Hash(), Pred = Pred(), Allocator = Allocator()) +// -> unordered_multimap, iter-mapped-type, Hash, Pred, +// Allocator>; +// +// template, +// class Pred = equal_to, class Allocator = allocator>> +// unordered_multimap(initializer_list>, +// typename see below::size_type = see below, Hash = Hash(), +// Pred = Pred(), Allocator = Allocator()) +// -> unordered_multimap; +// +// template +// unordered_multimap(InputIterator, InputIterator, typename see below::size_type, Allocator) +// -> unordered_multimap, iter-mapped-type, +// hash>, +// equal_to>, Allocator>; +// +// template +// unordered_multimap(InputIterator, InputIterator, Allocator) +// -> unordered_multimap, iter-mapped-type, +// hash>, +// equal_to>, Allocator>; +// +// template +// unordered_multimap(InputIterator, InputIterator, typename see below::size_type, Hash, Allocator) +// -> unordered_multimap, iter-mapped-type, Hash, +// equal_to>, Allocator>; +// +// template +// unordered_multimap(initializer_list>, typename see below::size_type, Allocator) +// -> unordered_multimap, equal_to, Allocator>; +// +// template +// unordered_multimap(initializer_list>, Allocator) +// -> unordered_multimap, equal_to, Allocator>; +// +// template +// unordered_multimap(initializer_list>, typename see below::size_type, Hash, +// Allocator) +// -> unordered_multimap, Allocator>; + +#include // is_permutation +#include +#include // INT_MAX +#include +#include +#include + +#include "test_allocator.h" + +using P = std::pair; +using PC = std::pair; + +int main(int, char**) +{ + const PC expected_m[] = { {1,1}, {1,1}, {2,2}, {3,1}, {INT_MAX,1} }; + + { + const PC arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} }; + std::unordered_multimap m(std::begin(arr), std::end(arr)); + ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + } + + { + const PC arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} }; + std::unordered_multimap m(std::begin(arr), std::end(arr), 42); + ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + } + + { + const PC arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} }; + std::unordered_multimap m(std::begin(arr), std::end(arr), 42, std::hash()); + ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap, std::equal_to>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + } + + { + const PC arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} }; + std::unordered_multimap m(std::begin(arr), std::end(arr), 42, std::hash(), std::equal_to<>()); + ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap, std::equal_to<>>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + } + + { + const PC arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} }; + std::unordered_multimap m(std::begin(arr), std::end(arr), 42, std::hash(), std::equal_to<>(), test_allocator(0, 41)); + ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap, std::equal_to<>, test_allocator>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + assert(m.get_allocator().get_id() == 41); + } + + { + std::unordered_multimap m { PC{1,1L}, PC{2,2L}, PC{1,1L}, PC{INT_MAX,1L}, PC{3,1L} }; + ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + } + + { + std::unordered_multimap m({ PC{1,1L}, PC{2,2L}, PC{1,1L}, PC{INT_MAX,1L}, PC{3,1L} }, 42); + ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + } + + { + std::unordered_multimap m({ PC{1,1L}, PC{2,2L}, PC{1,1L}, PC{INT_MAX,1L}, PC{3,1L} }, 42, std::hash()); + ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + } + + { + std::unordered_multimap m({ PC{1,1L}, PC{2,2L}, PC{1,1L}, PC{INT_MAX,1L}, PC{3,1L} }, 42, std::hash(), std::equal_to<>()); + ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap, std::equal_to<>>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + } + + { + std::unordered_multimap m({ PC{1,1L}, PC{2,2L}, PC{1,1L}, PC{INT_MAX,1L}, PC{3,1L} }, 42, std::hash(), std::equal_to<>(), test_allocator(0, 44)); + ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap, std::equal_to<>, test_allocator>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + assert(m.get_allocator().get_id() == 44); + } + + { + const PC arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} }; + std::unordered_multimap m(std::begin(arr), std::end(arr), 42, test_allocator(0, 45)); + ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap, std::equal_to, test_allocator>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + assert(m.get_allocator().get_id() == 45); + } + + { + const PC arr[] = { {1,1}, {2,2}, {1,1}, {INT_MAX,1}, {3,1} }; + std::unordered_multimap m(std::begin(arr), std::end(arr), 42, std::hash(), test_allocator(0, 46)); + ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap, std::equal_to, test_allocator>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + assert(m.get_allocator().get_id() == 46); + } + + { + std::unordered_multimap m({ PC{1,1L}, PC{2,2L}, PC{1,1L}, PC{INT_MAX,1L}, PC{3,1L} }, 42, test_allocator(0, 47)); + ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap, std::equal_to, test_allocator>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + assert(m.get_allocator().get_id() == 47); + } + + { + std::unordered_multimap m({ PC{1,1L}, PC{2,2L}, PC{1,1L}, PC{INT_MAX,1L}, PC{3,1L} }, 42, std::hash(), test_allocator(0, 48)); + ASSERT_SAME_TYPE(decltype(m), std::unordered_multimap, std::equal_to, test_allocator>); + assert(std::is_permutation(m.begin(), m.end(), std::begin(expected_m), std::end(expected_m))); + assert(m.get_allocator().get_id() == 48); + } + + return 0; +} diff --git a/libcxx/www/cxx2a_status.html b/libcxx/www/cxx2a_status.html index b800ef07bfad9..2b19eb86ba332 100644 --- a/libcxx/www/cxx2a_status.html +++ b/libcxx/www/cxx2a_status.html @@ -283,7 +283,7 @@

    Library Working group Issues Status

    2996Missing rvalue overloads for shared_ptr operationsSan Diego 3008make_shared (sub)object destruction semantics are not specifiedSan Diego 3022is_convertible<derived*, base*> may lead to ODRSan DiegoResolved by 1285R0 - 3025Map-like container deduction guides should use pair<Key, T>, not pair<const Key, T>San Diego + 3025Map-like container deduction guides should use pair<Key, T>, not pair<const Key, T>San DiegoComplete 3031Algorithms and predicates with non-const reference argumentsSan Diego 3037polymorphic_allocator and incomplete typesSan Diego 3038polymorphic_allocator::allocate should not allow integer overflow to create vulnerabilitiesSan Diego From 66ee934440c21dc2bd6ff938c79dad4ce032990a Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 15 Jul 2019 20:20:18 +0000 Subject: [PATCH 157/451] AMDGPU/GlobalISel: Allow scalar s1 and/or/xor If a 1-bit value is in a 32-bit VGPR, the scalar opcodes set SCC to whether the result is 0. If the inputs are SCC, these can be copied to a 32-bit SGPR to produce an SCC result. llvm-svn: 366125 --- .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 97 +- .../GlobalISel/regbankselect-and-s1.mir | 527 +++++++ .../AMDGPU/GlobalISel/regbankselect-and.mir | 145 +- .../AMDGPU/GlobalISel/regbankselect-or.mir | 15 +- .../GlobalISel/regbankselect-phi-s1.mir | 1333 +++++++++++++++++ .../AMDGPU/GlobalISel/regbankselect-xor.mir | 15 +- 6 files changed, 1964 insertions(+), 168 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and-s1.mir create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-phi-s1.mir diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index be05d9cb0ec6b..815cbc5e26ee4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -108,14 +108,22 @@ AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI) unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst, const RegisterBank &Src, unsigned Size) const { + // TODO: Should there be a UniformVGPRRegBank which can use readfirstlane? if (Dst.getID() == AMDGPU::SGPRRegBankID && Src.getID() == AMDGPU::VGPRRegBankID) { return std::numeric_limits::max(); } - // SGPRRegBank with size 1 is actually vcc or another 64-bit sgpr written by - // the valu. - if (Size == 1 && Dst.getID() == AMDGPU::SCCRegBankID && + // Bool values are tricky, because the meaning is based on context. The SCC + // and VCC banks are for the natural scalar and vector conditions produced by + // a compare. + // + // Legalization doesn't know about the necessary context, so an s1 use may + // have been a truncate from an arbitrary value, in which case a copy (lowered + // as a compare with 0) needs to be inserted. + if (Size == 1 && + (Dst.getID() == AMDGPU::SCCRegBankID || + Dst.getID() == AMDGPU::SGPRRegBankID) && (Src.getID() == AMDGPU::SGPRRegBankID || Src.getID() == AMDGPU::VGPRRegBankID || Src.getID() == AMDGPU::VCCRegBankID)) @@ -333,6 +341,35 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings( case TargetOpcode::G_OR: case TargetOpcode::G_XOR: { unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); + + if (Size == 1) { + // s_{and|or|xor}_b32 set scc when the result of the 32-bit op is not 0. + const InstructionMapping &SCCMapping = getInstructionMapping( + 1, 1, getOperandsMapping( + {AMDGPU::getValueMapping(AMDGPU::SCCRegBankID, Size), + AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), + AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}), + 3); // Num Operands + AltMappings.push_back(&SCCMapping); + + const InstructionMapping &SGPRMapping = getInstructionMapping( + 1, 1, getOperandsMapping( + {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), + AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), + AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size)}), + 3); // Num Operands + AltMappings.push_back(&SGPRMapping); + + const InstructionMapping &VCCMapping0 = getInstructionMapping( + 2, 10, getOperandsMapping( + {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size), + AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size), + AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size)}), + 3); // Num Operands + AltMappings.push_back(&VCCMapping0); + return AltMappings; + } + if (Size != 64) break; @@ -360,7 +397,7 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings( 3); // Num Operands AltMappings.push_back(&SVMapping); - // SGPR in LHS is slightly preferrable, so make it VS more expnesive than + // SGPR in LHS is slightly preferrable, so make it VS more expensive than // SV. const InstructionMapping &VSMapping = getInstructionMapping( 3, 4, getOperandsMapping( @@ -1551,8 +1588,56 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case AMDGPU::G_XOR: { unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); if (Size == 1) { - OpdsMapping[0] = OpdsMapping[1] = - OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Size); + const RegisterBank *DstBank + = getRegBank(MI.getOperand(0).getReg(), MRI, *TRI); + + unsigned TargetBankID = -1; + unsigned BankLHS = -1; + unsigned BankRHS = -1; + if (DstBank) { + TargetBankID = DstBank->getID(); + if (DstBank == &AMDGPU::VCCRegBank) { + TargetBankID = AMDGPU::VCCRegBankID; + BankLHS = AMDGPU::VCCRegBankID; + BankRHS = AMDGPU::VCCRegBankID; + } else if (DstBank == &AMDGPU::SCCRegBank) { + TargetBankID = AMDGPU::SCCRegBankID; + BankLHS = AMDGPU::SGPRRegBankID; + BankRHS = AMDGPU::SGPRRegBankID; + } else { + BankLHS = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI, + AMDGPU::SGPRRegBankID); + BankRHS = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI, + AMDGPU::SGPRRegBankID); + } + } else { + BankLHS = getRegBankID(MI.getOperand(1).getReg(), MRI, *TRI, + AMDGPU::VCCRegBankID); + BankRHS = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI, + AMDGPU::VCCRegBankID); + + // Both inputs should be true booleans to produce a boolean result. + if (BankLHS == AMDGPU::VGPRRegBankID || BankRHS == AMDGPU::VGPRRegBankID) { + TargetBankID = AMDGPU::VGPRRegBankID; + } else if (BankLHS == AMDGPU::VCCRegBankID || BankRHS == AMDGPU::VCCRegBankID) { + TargetBankID = AMDGPU::VCCRegBankID; + BankLHS = AMDGPU::VCCRegBankID; + BankRHS = AMDGPU::VCCRegBankID; + } else if (BankLHS == AMDGPU::SGPRRegBankID && BankRHS == AMDGPU::SGPRRegBankID) { + TargetBankID = AMDGPU::SGPRRegBankID; + } else if (BankLHS == AMDGPU::SCCRegBankID || BankRHS == AMDGPU::SCCRegBankID) { + // The operation must be done on a 32-bit register, but it will set + // scc. The result type could interchangably be SCC or SGPR, since + // both values will be produced. + TargetBankID = AMDGPU::SCCRegBankID; + BankLHS = AMDGPU::SGPRRegBankID; + BankRHS = AMDGPU::SGPRRegBankID; + } + } + + OpdsMapping[0] = AMDGPU::getValueMapping(TargetBankID, Size); + OpdsMapping[1] = AMDGPU::getValueMapping(BankLHS, Size); + OpdsMapping[2] = AMDGPU::getValueMapping(BankRHS, Size); break; } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and-s1.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and-s1.mir new file mode 100644 index 0000000000000..620a8e1e71bdc --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and-s1.mir @@ -0,0 +1,527 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck -check-prefix=FAST %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck -check-prefix=GREEDY %s + +--- +name: and_s1_sgpr_sgpr +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; FAST-LABEL: name: and_s1_sgpr_sgpr + ; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; FAST: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) + ; FAST: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) + ; FAST: [[AND:%[0-9]+]]:sgpr(s1) = G_AND [[TRUNC]], [[TRUNC1]] + ; GREEDY-LABEL: name: and_s1_sgpr_sgpr + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GREEDY: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) + ; GREEDY: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) + ; GREEDY: [[AND:%[0-9]+]]:sgpr(s1) = G_AND [[TRUNC]], [[TRUNC1]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s1) = G_TRUNC %0 + %3:_(s1) = G_TRUNC %1 + %4:_(s1) = G_AND %2, %3 +... + +--- +name: and_s1_scc_scc +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; FAST-LABEL: name: and_s1_scc_scc + ; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; FAST: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; FAST: [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; FAST: [[COPY2:%[0-9]+]]:sgpr(s1) = COPY [[ICMP]](s1) + ; FAST: [[COPY3:%[0-9]+]]:sgpr(s1) = COPY [[ICMP1]](s1) + ; FAST: [[AND:%[0-9]+]]:scc(s1) = G_AND [[COPY2]], [[COPY3]] + ; GREEDY-LABEL: name: and_s1_scc_scc + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; GREEDY: [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s1) = COPY [[ICMP]](s1) + ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s1) = COPY [[ICMP1]](s1) + ; GREEDY: [[AND:%[0-9]+]]:scc(s1) = G_AND [[COPY2]], [[COPY3]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s1) = G_ICMP intpred(eq), %0, %2 + %4:_(s1) = G_ICMP intpred(eq), %1, %2 + %5:_(s1) = G_AND %3, %4 +... + +--- +name: and_s1_vgpr_vgpr +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; FAST-LABEL: name: and_s1_vgpr_vgpr + ; FAST: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; FAST: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; FAST: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) + ; FAST: [[TRUNC1:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32) + ; FAST: [[AND:%[0-9]+]]:vgpr(s1) = G_AND [[TRUNC]], [[TRUNC1]] + ; GREEDY-LABEL: name: and_s1_vgpr_vgpr + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) + ; GREEDY: [[TRUNC1:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32) + ; GREEDY: [[AND:%[0-9]+]]:vgpr(s1) = G_AND [[TRUNC]], [[TRUNC1]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s1) = G_TRUNC %0 + %3:_(s1) = G_TRUNC %1 + %4:_(s1) = G_AND %2, %3 +... + +--- +name: and_s1_vcc_vcc +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; FAST-LABEL: name: and_s1_vcc_vcc + ; FAST: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; FAST: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; FAST: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; FAST: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; FAST: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-LABEL: name: and_s1_vcc_vcc + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; GREEDY: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; GREEDY: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s1) = G_ICMP intpred(eq), %0, %2 + %4:_(s1) = G_ICMP intpred(eq), %1, %2 + %5:_(s1) = G_AND %3, %4 +... + +--- +name: and_s1_sgpr_vgpr +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; FAST-LABEL: name: and_s1_sgpr_vgpr + ; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; FAST: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; FAST: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) + ; FAST: [[TRUNC1:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32) + ; FAST: [[AND:%[0-9]+]]:vgpr(s1) = G_AND [[TRUNC]], [[TRUNC1]] + ; GREEDY-LABEL: name: and_s1_sgpr_vgpr + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) + ; GREEDY: [[TRUNC1:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32) + ; GREEDY: [[AND:%[0-9]+]]:vgpr(s1) = G_AND [[TRUNC]], [[TRUNC1]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr0 + %2:_(s1) = G_TRUNC %0 + %3:_(s1) = G_TRUNC %1 + %4:_(s1) = G_AND %2, %3 +... + +--- +name: and_s1_vgpr_sgpr +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr0 + ; FAST-LABEL: name: and_s1_vgpr_sgpr + ; FAST: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; FAST: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) + ; FAST: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) + ; FAST: [[AND:%[0-9]+]]:vgpr(s1) = G_AND [[TRUNC]], [[TRUNC1]] + ; GREEDY-LABEL: name: and_s1_vgpr_sgpr + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GREEDY: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) + ; GREEDY: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) + ; GREEDY: [[AND:%[0-9]+]]:vgpr(s1) = G_AND [[TRUNC]], [[TRUNC1]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $sgpr0 + %2:_(s1) = G_TRUNC %0 + %3:_(s1) = G_TRUNC %1 + %4:_(s1) = G_AND %2, %3 +... + +# FIXME: Should just change the result bank of the scc compare. +--- +name: and_s1_scc_vcc +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; FAST-LABEL: name: and_s1_scc_vcc + ; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; FAST: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; FAST: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; FAST: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; FAST: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[ICMP]](s1) + ; FAST: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[COPY2]], [[ICMP1]] + ; GREEDY-LABEL: name: and_s1_scc_vcc + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; GREEDY: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; GREEDY: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[ICMP]](s1) + ; GREEDY: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[COPY2]], [[ICMP1]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s1) = G_ICMP intpred(eq), %0, %2 + %4:_(s1) = G_ICMP intpred(eq), %1, %2 + %5:_(s1) = G_AND %3, %4 +... + +--- +name: and_s1_vcc_scc +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; FAST-LABEL: name: and_s1_vcc_scc + ; FAST: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; FAST: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; FAST: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; FAST: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; FAST: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; GREEDY-LABEL: name: and_s1_vcc_scc + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; GREEDY: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; GREEDY: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s1) = G_ICMP intpred(eq), %0, %2 + %4:_(s1) = G_ICMP intpred(eq), %1, %2 + %5:_(s1) = G_AND %3, %4 +... + + +# Test with a known result bank +--- +name: and_s1_vcc_sgpr_sgpr +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; FAST-LABEL: name: and_s1_vcc_sgpr_sgpr + ; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; FAST: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) + ; FAST: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) + ; FAST: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; FAST: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1) + ; FAST: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[COPY2]], [[COPY3]] + ; GREEDY-LABEL: name: and_s1_vcc_sgpr_sgpr + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GREEDY: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) + ; GREEDY: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) + ; GREEDY: [[AND:%[0-9]+]]:scc(s1) = G_AND [[TRUNC]], [[TRUNC1]] + ; GREEDY: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[AND]](s1) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s1) = G_TRUNC %0 + %3:_(s1) = G_TRUNC %1 + %4:vcc(s1) = G_AND %2, %3 +... + +--- +name: and_s1_vcc_vgpr_vgpr +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; FAST-LABEL: name: and_s1_vcc_vgpr_vgpr + ; FAST: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; FAST: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; FAST: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) + ; FAST: [[TRUNC1:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32) + ; FAST: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; FAST: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1) + ; FAST: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[COPY2]], [[COPY3]] + ; GREEDY-LABEL: name: and_s1_vcc_vgpr_vgpr + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) + ; GREEDY: [[TRUNC1:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32) + ; GREEDY: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; GREEDY: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1) + ; GREEDY: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[COPY2]], [[COPY3]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s1) = G_TRUNC %0 + %3:_(s1) = G_TRUNC %1 + %4:vcc(s1) = G_AND %2, %3 +... + +--- +name: and_s1_vcc_vgpr_sgpr +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $sgpr0 + ; FAST-LABEL: name: and_s1_vcc_vgpr_sgpr + ; FAST: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; FAST: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) + ; FAST: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) + ; FAST: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; FAST: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1) + ; FAST: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[COPY2]], [[COPY3]] + ; GREEDY-LABEL: name: and_s1_vcc_vgpr_sgpr + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GREEDY: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) + ; GREEDY: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) + ; GREEDY: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; GREEDY: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1) + ; GREEDY: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[COPY2]], [[COPY3]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $sgpr0 + %2:_(s1) = G_TRUNC %0 + %3:_(s1) = G_TRUNC %1 + %4:vcc(s1) = G_AND %2, %3 +... + +--- +name: and_s1_vcc_sgpr_vgpr +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $sgpr0 + ; FAST-LABEL: name: and_s1_vcc_sgpr_vgpr + ; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; FAST: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; FAST: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) + ; FAST: [[TRUNC1:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32) + ; FAST: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; FAST: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1) + ; FAST: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[COPY2]], [[COPY3]] + ; GREEDY-LABEL: name: and_s1_vcc_sgpr_vgpr + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) + ; GREEDY: [[TRUNC1:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32) + ; GREEDY: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; GREEDY: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1) + ; GREEDY: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[COPY2]], [[COPY3]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr0 + %2:_(s1) = G_TRUNC %0 + %3:_(s1) = G_TRUNC %1 + %4:vcc(s1) = G_AND %2, %3 +... + +--- +name: and_s1_vgpr_sgpr_sgpr +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; FAST-LABEL: name: and_s1_vgpr_sgpr_sgpr + ; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; FAST: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) + ; FAST: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) + ; FAST: [[AND:%[0-9]+]]:vgpr(s1) = G_AND [[TRUNC]], [[TRUNC1]] + ; GREEDY-LABEL: name: and_s1_vgpr_sgpr_sgpr + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GREEDY: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) + ; GREEDY: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) + ; GREEDY: [[AND:%[0-9]+]]:vgpr(s1) = G_AND [[TRUNC]], [[TRUNC1]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s1) = G_TRUNC %0 + %3:_(s1) = G_TRUNC %1 + %4:vgpr(s1) = G_AND %2, %3 +... + +--- +name: and_s1_sgpr_sgpr_sgpr +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; FAST-LABEL: name: and_s1_sgpr_sgpr_sgpr + ; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; FAST: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) + ; FAST: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) + ; FAST: [[AND:%[0-9]+]]:sgpr(s1) = G_AND [[TRUNC]], [[TRUNC1]] + ; GREEDY-LABEL: name: and_s1_sgpr_sgpr_sgpr + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GREEDY: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) + ; GREEDY: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) + ; GREEDY: [[AND:%[0-9]+]]:sgpr(s1) = G_AND [[TRUNC]], [[TRUNC1]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s1) = G_TRUNC %0 + %3:_(s1) = G_TRUNC %1 + %4:sgpr(s1) = G_AND %2, %3 +... + +--- +name: and_s1_scc_sgpr_sgpr +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; FAST-LABEL: name: and_s1_scc_sgpr_sgpr + ; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; FAST: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) + ; FAST: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) + ; FAST: [[AND:%[0-9]+]]:scc(s1) = G_AND [[TRUNC]], [[TRUNC1]] + ; GREEDY-LABEL: name: and_s1_scc_sgpr_sgpr + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GREEDY: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) + ; GREEDY: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) + ; GREEDY: [[AND:%[0-9]+]]:scc(s1) = G_AND [[TRUNC]], [[TRUNC1]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s1) = G_TRUNC %0 + %3:_(s1) = G_TRUNC %1 + %4:scc(s1) = G_AND %2, %3 +... + +--- +name: and_s1_scc_scc_scc +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; FAST-LABEL: name: and_s1_scc_scc_scc + ; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; FAST: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] + ; FAST: [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[C]] + ; FAST: [[COPY2:%[0-9]+]]:sgpr(s1) = COPY [[ICMP]](s1) + ; FAST: [[COPY3:%[0-9]+]]:sgpr(s1) = COPY [[ICMP1]](s1) + ; FAST: [[AND:%[0-9]+]]:scc(s1) = G_AND [[COPY2]], [[COPY3]] + ; GREEDY-LABEL: name: and_s1_scc_scc_scc + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] + ; GREEDY: [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[C]] + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s1) = COPY [[ICMP]](s1) + ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s1) = COPY [[ICMP1]](s1) + ; GREEDY: [[AND:%[0-9]+]]:scc(s1) = G_AND [[COPY2]], [[COPY3]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s1) = G_ICMP intpred(ne), %0, %2 + %4:_(s1) = G_ICMP intpred(ne), %1, %2 + %5:scc(s1) = G_AND %3, %4 + +... + +--- +name: and_s1_scc_sgpr_scc +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; FAST-LABEL: name: and_s1_scc_sgpr_scc + ; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; FAST: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) + ; FAST: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[C]] + ; FAST: [[COPY2:%[0-9]+]]:sgpr(s1) = COPY [[ICMP]](s1) + ; FAST: [[AND:%[0-9]+]]:scc(s1) = G_AND [[TRUNC]], [[COPY2]] + ; GREEDY-LABEL: name: and_s1_scc_sgpr_scc + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) + ; GREEDY: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[C]] + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s1) = COPY [[ICMP]](s1) + ; GREEDY: [[AND:%[0-9]+]]:scc(s1) = G_AND [[TRUNC]], [[COPY2]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s1) = G_TRUNC %0 + %4:_(s1) = G_ICMP intpred(ne), %1, %2 + %5:scc(s1) = G_AND %3, %4 + +... + +--- +name: and_s1_scc_scc_sgpr +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; FAST-LABEL: name: and_s1_scc_scc_sgpr + ; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; FAST: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] + ; FAST: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) + ; FAST: [[COPY2:%[0-9]+]]:sgpr(s1) = COPY [[ICMP]](s1) + ; FAST: [[AND:%[0-9]+]]:scc(s1) = G_AND [[COPY2]], [[TRUNC]] + ; GREEDY-LABEL: name: and_s1_scc_scc_sgpr + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] + ; GREEDY: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s1) = COPY [[ICMP]](s1) + ; GREEDY: [[AND:%[0-9]+]]:scc(s1) = G_AND [[COPY2]], [[TRUNC]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s1) = G_ICMP intpred(ne), %0, %2 + %4:_(s1) = G_TRUNC %1 + %5:scc(s1) = G_AND %3, %4 + +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and.mir index 1d05930295c45..fbfadad6c55d2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and.mir @@ -67,150 +67,6 @@ body: | %2:_(s32) = G_AND %0, %1 ... ---- -name: and_i1_scc_scc -legalized: true - -body: | - bb.0: - liveins: $sgpr0, $sgpr1 - ; CHECK-LABEL: name: and_i1_scc_scc - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] - ; CHECK: [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[C]] - ; CHECK: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[ICMP]](s1) - ; CHECK: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[ICMP1]](s1) - ; CHECK: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[COPY2]], [[COPY3]] - ; CHECK: S_NOP 0, implicit [[AND]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(ne), %0, %2 - %5:_(s1) = G_ICMP intpred(ne), %1, %2 - %6:_(s1) = G_AND %4, %5 - S_NOP 0, implicit %6 -... - ---- -name: and_i1_vcc_vcc -legalized: true - -body: | - bb.0: - liveins: $vgpr0, $vgpr1 - ; CHECK-LABEL: name: and_i1_vcc_vcc - ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] - ; CHECK: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[C]] - ; CHECK: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] - ; CHECK: S_NOP 0, implicit [[AND]](s1) - %0:_(s32) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(ne), %0, %2 - %5:_(s1) = G_ICMP intpred(ne), %1, %2 - %6:_(s1) = G_AND %4, %5 - S_NOP 0, implicit %6 -... - ---- -name: and_i1_scc_vcc -legalized: true - -body: | - bb.0: - liveins: $sgpr0, $vgpr0 - ; CHECK-LABEL: name: and_i1_scc_vcc - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] - ; CHECK: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[C]] - ; CHECK: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[ICMP]](s1) - ; CHECK: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[COPY2]], [[ICMP1]] - ; CHECK: S_NOP 0, implicit [[AND]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s32) = G_CONSTANT i32 0 - %4:_(s1) = G_ICMP intpred(ne), %0, %2 - %5:_(s1) = G_ICMP intpred(ne), %1, %2 - %6:_(s1) = G_AND %4, %5 - S_NOP 0, implicit %6 -... - ---- -name: and_i1_sgpr_trunc_sgpr_trunc -legalized: true -body: | - bb.0.entry: - liveins: $sgpr0, $sgpr1 - ; CHECK-LABEL: name: and_i1_sgpr_trunc_sgpr_trunc - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1) - ; CHECK: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[COPY2]], [[COPY3]] - ; CHECK: S_NOP 0, implicit [[AND]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s1) = G_TRUNC %0 - %3:_(s1) = G_TRUNC %1 - %4:_(s1) = G_AND %2, %3 - S_NOP 0, implicit %4 - -... - ---- -name: and_i1_trunc_scc -legalized: true -body: | - bb.0.entry: - liveins: $sgpr0, $sgpr1 - ; CHECK-LABEL: name: and_i1_trunc_scc - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; CHECK: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[ICMP]](s1) - ; CHECK: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[COPY2]], [[COPY3]] - ; CHECK: S_NOP 0, implicit [[AND]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $sgpr1 - %2:_(s1) = G_TRUNC %0 - %3:_(s1) = G_ICMP intpred(ne), %0, %1 - %4:_(s1) = G_AND %2, %3 - S_NOP 0, implicit %4 -... - ---- -name: and_i1_s_trunc_vcc -legalized: true -body: | - bb.0.entry: - liveins: $sgpr0, $vgpr0 - ; CHECK-LABEL: name: and_i1_s_trunc_vcc - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) - ; CHECK: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; CHECK: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[COPY2]], [[ICMP]] - ; CHECK: S_NOP 0, implicit [[AND]](s1) - %0:_(s32) = COPY $sgpr0 - %1:_(s32) = COPY $vgpr0 - %2:_(s1) = G_TRUNC %0 - %3:_(s1) = G_ICMP intpred(ne), %0, %1 - %4:_(s1) = G_AND %2, %3 - S_NOP 0, implicit %4 -... - --- name: and_s64_ss legalized: true @@ -737,3 +593,4 @@ body: | %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_AND %0, %1 ... + diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-or.mir index aed682fc671e5..96f9e5fcaacd2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-or.mir @@ -80,9 +80,9 @@ body: | ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] ; CHECK: [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[C]] - ; CHECK: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[ICMP]](s1) - ; CHECK: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[ICMP1]](s1) - ; CHECK: [[OR:%[0-9]+]]:vcc(s1) = G_OR [[COPY2]], [[COPY3]] + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s1) = COPY [[ICMP]](s1) + ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s1) = COPY [[ICMP1]](s1) + ; CHECK: [[OR:%[0-9]+]]:scc(s1) = G_OR [[COPY2]], [[COPY3]] ; CHECK: S_NOP 0, implicit [[OR]](s1) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 @@ -153,9 +153,7 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) ; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1) - ; CHECK: [[OR:%[0-9]+]]:vcc(s1) = G_OR [[COPY2]], [[COPY3]] + ; CHECK: [[OR:%[0-9]+]]:sgpr(s1) = G_OR [[TRUNC]], [[TRUNC1]] ; CHECK: S_NOP 0, implicit [[OR]](s1) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 @@ -177,9 +175,8 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) ; CHECK: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; CHECK: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[ICMP]](s1) - ; CHECK: [[OR:%[0-9]+]]:vcc(s1) = G_OR [[COPY2]], [[COPY3]] + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s1) = COPY [[ICMP]](s1) + ; CHECK: [[OR:%[0-9]+]]:scc(s1) = G_OR [[TRUNC]], [[COPY2]] ; CHECK: S_NOP 0, implicit [[OR]](s1) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-phi-s1.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-phi-s1.mir new file mode 100644 index 0000000000000..d1aa28d445d01 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-phi-s1.mir @@ -0,0 +1,1333 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck -check-prefix=FAST %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck -check-prefix=GREEDY %s + +--- +name: phi_s1_scc_scc_sbranch +legalized: true +tracksRegLiveness: true + +body: | + ; FAST-LABEL: name: phi_s1_scc_scc_sbranch + ; FAST: bb.0: + ; FAST: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; FAST: liveins: $sgpr0, $sgpr1, $sgpr2 + ; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; FAST: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; FAST: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; FAST: [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] + ; FAST: G_BRCOND [[ICMP1]](s1), %bb.1 + ; FAST: G_BR %bb.2 + ; FAST: bb.1: + ; FAST: successors: %bb.2(0x80000000) + ; FAST: [[ICMP2:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; FAST: G_BR %bb.2 + ; FAST: bb.2: + ; FAST: [[PHI:%[0-9]+]]:sgpr(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1 + ; FAST: [[COPY3:%[0-9]+]]:scc(s1) = COPY [[PHI]](s1) + ; FAST: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[COPY3]](s1), [[C]], [[COPY]] + ; FAST: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-LABEL: name: phi_s1_scc_scc_sbranch + ; GREEDY: bb.0: + ; GREEDY: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; GREEDY: [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] + ; GREEDY: G_BRCOND [[ICMP1]](s1), %bb.1 + ; GREEDY: G_BR %bb.2 + ; GREEDY: bb.1: + ; GREEDY: successors: %bb.2(0x80000000) + ; GREEDY: [[ICMP2:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; GREEDY: G_BR %bb.2 + ; GREEDY: bb.2: + ; GREEDY: [[PHI:%[0-9]+]]:sgpr(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1 + ; GREEDY: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; GREEDY: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] + ; GREEDY: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + bb.0: + successors: %bb.1, %bb.2 + liveins: $sgpr0, $sgpr1, $sgpr2 + + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s32) = G_CONSTANT i32 0 + %4:_(s1) = G_ICMP intpred(eq), %0, %3 + %5:_(s1) = G_ICMP intpred(eq), %2, %3 + G_BRCOND %5, %bb.1 + G_BR %bb.2 + + bb.1: + successors: %bb.2 + + %6:_(s1) = G_ICMP intpred(eq), %1, %3 + G_BR %bb.2 + + bb.2: + %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 + %8:_(s32) = G_SELECT %7, %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + +... + +--- +name: phi_s1_scc_scc_scc_sbranch +legalized: true +tracksRegLiveness: true + +body: | + ; FAST-LABEL: name: phi_s1_scc_scc_scc_sbranch + ; FAST: bb.0: + ; FAST: successors: %bb.1(0x40000000), %bb.3(0x40000000) + ; FAST: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 + ; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; FAST: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; FAST: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; FAST: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] + ; FAST: [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] + ; FAST: G_BRCOND [[ICMP1]](s1), %bb.3 + ; FAST: G_BR %bb.1 + ; FAST: bb.1: + ; FAST: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; FAST: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 99 + ; FAST: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 888 + ; FAST: [[ICMP2:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] + ; FAST: [[ICMP3:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C2]] + ; FAST: G_BRCOND [[ICMP3]](s1), %bb.3 + ; FAST: G_BR %bb.2 + ; FAST: bb.2: + ; FAST: successors: %bb.3(0x80000000) + ; FAST: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 123 + ; FAST: [[ICMP4:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C3]] + ; FAST: G_BR %bb.3 + ; FAST: bb.3: + ; FAST: [[PHI:%[0-9]+]]:sgpr(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1, [[ICMP4]](s1), %bb.2 + ; FAST: [[COPY4:%[0-9]+]]:scc(s1) = COPY [[PHI]](s1) + ; FAST: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY]], [[COPY1]] + ; FAST: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-LABEL: name: phi_s1_scc_scc_scc_sbranch + ; GREEDY: bb.0: + ; GREEDY: successors: %bb.1(0x40000000), %bb.3(0x40000000) + ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] + ; GREEDY: [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[COPY3]] + ; GREEDY: G_BRCOND [[ICMP1]](s1), %bb.3 + ; GREEDY: G_BR %bb.1 + ; GREEDY: bb.1: + ; GREEDY: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 99 + ; GREEDY: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 888 + ; GREEDY: [[ICMP2:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C1]] + ; GREEDY: [[ICMP3:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C2]] + ; GREEDY: G_BRCOND [[ICMP3]](s1), %bb.3 + ; GREEDY: G_BR %bb.2 + ; GREEDY: bb.2: + ; GREEDY: successors: %bb.3(0x80000000) + ; GREEDY: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 123 + ; GREEDY: [[ICMP4:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C3]] + ; GREEDY: G_BR %bb.3 + ; GREEDY: bb.3: + ; GREEDY: [[PHI:%[0-9]+]]:sgpr(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1, [[ICMP4]](s1), %bb.2 + ; GREEDY: [[COPY4:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1) + ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GREEDY: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY4]](s1), [[COPY5]], [[COPY6]] + ; GREEDY: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + bb.0: + successors: %bb.1, %bb.3 + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 + + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s32) = COPY $sgpr3 + %4:_(s32) = G_CONSTANT i32 0 + %5:_(s1) = G_ICMP intpred(eq), %0, %3 + %6:_(s1) = G_ICMP intpred(eq), %2, %3 + G_BRCOND %6, %bb.3 + G_BR %bb.1 + + bb.1: + successors: %bb.2, %bb.3 + + %7:_(s32) = G_CONSTANT i32 99 + %8:_(s32) = G_CONSTANT i32 888 + %9:_(s1) = G_ICMP intpred(eq), %1, %7 + %10:_(s1) = G_ICMP intpred(eq), %1, %8 + G_BRCOND %10, %bb.3 + G_BR %bb.2 + + bb.2: + successors: %bb.3 + + %11:_(s32) = G_CONSTANT i32 123 + %12:_(s1) = G_ICMP intpred(eq), %2, %11 + G_BR %bb.3 + + bb.3: + %13:_(s1) = G_PHI %5, %bb.0, %9, %bb.1, %12, %bb.2 + %14:_(s32) = G_SELECT %13, %0, %1 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %14 + +... + +--- +name: phi_s1_scc_vcc_sbranch +legalized: true +tracksRegLiveness: true + +body: | + ; FAST-LABEL: name: phi_s1_scc_vcc_sbranch + ; FAST: bb.0: + ; FAST: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; FAST: liveins: $sgpr0, $sgpr1, $vgpr0 + ; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; FAST: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; FAST: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; FAST: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; FAST: [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] + ; FAST: G_BRCOND [[ICMP1]](s1), %bb.1 + ; FAST: G_BR %bb.2 + ; FAST: bb.1: + ; FAST: successors: %bb.2(0x80000000) + ; FAST: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; FAST: G_BR %bb.2 + ; FAST: bb.2: + ; FAST: [[PHI:%[0-9]+]]:vgpr(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1 + ; FAST: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1) + ; FAST: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; FAST: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; FAST: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] + ; FAST: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-LABEL: name: phi_s1_scc_vcc_sbranch + ; GREEDY: bb.0: + ; GREEDY: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GREEDY: liveins: $sgpr0, $sgpr1, $vgpr0 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; GREEDY: [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] + ; GREEDY: G_BRCOND [[ICMP1]](s1), %bb.1 + ; GREEDY: G_BR %bb.2 + ; GREEDY: bb.1: + ; GREEDY: successors: %bb.2(0x80000000) + ; GREEDY: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; GREEDY: G_BR %bb.2 + ; GREEDY: bb.2: + ; GREEDY: [[PHI:%[0-9]+]]:vgpr(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1 + ; GREEDY: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; GREEDY: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] + ; GREEDY: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + bb.0: + successors: %bb.1, %bb.2 + liveins: $sgpr0, $sgpr1, $vgpr0 + + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $sgpr1 + %3:_(s32) = G_CONSTANT i32 0 + %4:_(s1) = G_ICMP intpred(eq), %0, %3 + %5:_(s1) = G_ICMP intpred(eq), %2, %3 + G_BRCOND %5, %bb.1 + G_BR %bb.2 + + bb.1: + successors: %bb.2 + + %6:_(s1) = G_ICMP intpred(eq), %1, %3 + G_BR %bb.2 + + bb.2: + %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 + %8:_(s32) = G_SELECT %7, %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + +... + +--- +name: phi_s1_vcc_scc_sbranch +legalized: true +tracksRegLiveness: true + +body: | + ; FAST-LABEL: name: phi_s1_vcc_scc_sbranch + ; FAST: bb.0: + ; FAST: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; FAST: liveins: $vgpr0, $sgpr0, $sgpr1 + ; FAST: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; FAST: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; FAST: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; FAST: [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] + ; FAST: G_BRCOND [[ICMP1]](s1), %bb.1 + ; FAST: G_BR %bb.2 + ; FAST: bb.1: + ; FAST: successors: %bb.2(0x80000000) + ; FAST: [[ICMP2:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; FAST: G_BR %bb.2 + ; FAST: bb.2: + ; FAST: [[PHI:%[0-9]+]]:sgpr(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1 + ; FAST: [[COPY3:%[0-9]+]]:scc(s1) = COPY [[PHI]](s1) + ; FAST: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[COPY3]](s1), [[C]], [[COPY1]] + ; FAST: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-LABEL: name: phi_s1_vcc_scc_sbranch + ; GREEDY: bb.0: + ; GREEDY: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GREEDY: liveins: $vgpr0, $sgpr0, $sgpr1 + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; GREEDY: [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] + ; GREEDY: G_BRCOND [[ICMP1]](s1), %bb.1 + ; GREEDY: G_BR %bb.2 + ; GREEDY: bb.1: + ; GREEDY: successors: %bb.2(0x80000000) + ; GREEDY: [[ICMP2:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; GREEDY: G_BR %bb.2 + ; GREEDY: bb.2: + ; GREEDY: [[PHI:%[0-9]+]]:sgpr(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1 + ; GREEDY: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; GREEDY: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] + ; GREEDY: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + bb.0: + successors: %bb.1, %bb.2 + liveins: $vgpr0, $sgpr0, $sgpr1 + + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $sgpr0 + %2:_(s32) = COPY $sgpr1 + %3:_(s32) = G_CONSTANT i32 0 + %4:_(s1) = G_ICMP intpred(eq), %0, %3 + %5:_(s1) = G_ICMP intpred(eq), %2, %3 + G_BRCOND %5, %bb.1 + G_BR %bb.2 + + bb.1: + successors: %bb.2 + + %6:_(s1) = G_ICMP intpred(eq), %1, %3 + G_BR %bb.2 + + bb.2: + %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 + %8:_(s32) = G_SELECT %7, %3, %1 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + +... + +--- +name: phi_s1_vcc_vcc_sbranch +legalized: true +tracksRegLiveness: true + +body: | + ; FAST-LABEL: name: phi_s1_vcc_vcc_sbranch + ; FAST: bb.0: + ; FAST: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; FAST: liveins: $vgpr0, $vgpr1, $sgpr0 + ; FAST: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; FAST: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; FAST: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; FAST: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; FAST: [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] + ; FAST: G_BRCOND [[ICMP1]](s1), %bb.1 + ; FAST: G_BR %bb.2 + ; FAST: bb.1: + ; FAST: successors: %bb.2(0x80000000) + ; FAST: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; FAST: G_BR %bb.2 + ; FAST: bb.2: + ; FAST: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1 + ; FAST: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; FAST: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY3]], [[COPY]] + ; FAST: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-LABEL: name: phi_s1_vcc_vcc_sbranch + ; GREEDY: bb.0: + ; GREEDY: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GREEDY: liveins: $vgpr0, $vgpr1, $sgpr0 + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; GREEDY: [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] + ; GREEDY: G_BRCOND [[ICMP1]](s1), %bb.1 + ; GREEDY: G_BR %bb.2 + ; GREEDY: bb.1: + ; GREEDY: successors: %bb.2(0x80000000) + ; GREEDY: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; GREEDY: G_BR %bb.2 + ; GREEDY: bb.2: + ; GREEDY: [[PHI:%[0-9]+]]:vcc(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1 + ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[PHI]](s1), [[COPY3]], [[COPY]] + ; GREEDY: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + bb.0: + successors: %bb.1, %bb.2 + liveins: $vgpr0, $vgpr1, $sgpr0 + + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $sgpr0 + %3:_(s32) = G_CONSTANT i32 0 + %4:_(s1) = G_ICMP intpred(eq), %0, %3 + %5:_(s1) = G_ICMP intpred(eq), %2, %3 + G_BRCOND %5, %bb.1 + G_BR %bb.2 + + bb.1: + successors: %bb.2 + + %6:_(s1) = G_ICMP intpred(eq), %1, %3 + G_BR %bb.2 + + bb.2: + %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 + %8:_(s32) = G_SELECT %7, %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + +... + +--- +name: phi_s1_s_scc_sbranch +legalized: true +tracksRegLiveness: true + +body: | + ; FAST-LABEL: name: phi_s1_s_scc_sbranch + ; FAST: bb.0: + ; FAST: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; FAST: liveins: $sgpr0, $sgpr1, $sgpr2 + ; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; FAST: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; FAST: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) + ; FAST: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] + ; FAST: G_BRCOND [[ICMP]](s1), %bb.1 + ; FAST: G_BR %bb.2 + ; FAST: bb.1: + ; FAST: successors: %bb.2(0x80000000) + ; FAST: [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; FAST: G_BR %bb.2 + ; FAST: bb.2: + ; FAST: [[PHI:%[0-9]+]]:sgpr(s1) = G_PHI [[TRUNC]](s1), %bb.0, [[ICMP1]](s1), %bb.1 + ; FAST: [[COPY3:%[0-9]+]]:scc(s1) = COPY [[PHI]](s1) + ; FAST: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[COPY3]](s1), [[C]], [[COPY]] + ; FAST: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-LABEL: name: phi_s1_s_scc_sbranch + ; GREEDY: bb.0: + ; GREEDY: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) + ; GREEDY: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] + ; GREEDY: G_BRCOND [[ICMP]](s1), %bb.1 + ; GREEDY: G_BR %bb.2 + ; GREEDY: bb.1: + ; GREEDY: successors: %bb.2(0x80000000) + ; GREEDY: [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; GREEDY: G_BR %bb.2 + ; GREEDY: bb.2: + ; GREEDY: [[PHI:%[0-9]+]]:sgpr(s1) = G_PHI [[TRUNC]](s1), %bb.0, [[ICMP1]](s1), %bb.1 + ; GREEDY: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; GREEDY: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] + ; GREEDY: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + bb.0: + successors: %bb.1, %bb.2 + liveins: $sgpr0, $sgpr1, $sgpr2 + + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s32) = G_CONSTANT i32 0 + %4:_(s1) = G_TRUNC %0 + %5:_(s1) = G_ICMP intpred(eq), %2, %3 + G_BRCOND %5, %bb.1 + G_BR %bb.2 + + bb.1: + successors: %bb.2 + + %6:_(s1) = G_ICMP intpred(eq), %1, %3 + G_BR %bb.2 + + bb.2: + %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 + %8:_(s32) = G_SELECT %7, %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + +... + +--- +name: phi_s1_scc_s_sbranch +legalized: true +tracksRegLiveness: true + +body: | + ; FAST-LABEL: name: phi_s1_scc_s_sbranch + ; FAST: bb.0: + ; FAST: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; FAST: liveins: $sgpr0, $sgpr1, $sgpr2 + ; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; FAST: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; FAST: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; FAST: [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] + ; FAST: G_BRCOND [[ICMP1]](s1), %bb.1 + ; FAST: G_BR %bb.2 + ; FAST: bb.1: + ; FAST: successors: %bb.2(0x80000000) + ; FAST: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) + ; FAST: G_BR %bb.2 + ; FAST: bb.2: + ; FAST: [[PHI:%[0-9]+]]:sgpr(s1) = G_PHI [[ICMP]](s1), %bb.0, [[TRUNC]](s1), %bb.1 + ; FAST: [[COPY3:%[0-9]+]]:scc(s1) = COPY [[PHI]](s1) + ; FAST: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[COPY3]](s1), [[C]], [[COPY]] + ; FAST: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-LABEL: name: phi_s1_scc_s_sbranch + ; GREEDY: bb.0: + ; GREEDY: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; GREEDY: [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] + ; GREEDY: G_BRCOND [[ICMP1]](s1), %bb.1 + ; GREEDY: G_BR %bb.2 + ; GREEDY: bb.1: + ; GREEDY: successors: %bb.2(0x80000000) + ; GREEDY: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) + ; GREEDY: G_BR %bb.2 + ; GREEDY: bb.2: + ; GREEDY: [[PHI:%[0-9]+]]:sgpr(s1) = G_PHI [[ICMP]](s1), %bb.0, [[TRUNC]](s1), %bb.1 + ; GREEDY: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; GREEDY: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] + ; GREEDY: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + bb.0: + successors: %bb.1, %bb.2 + liveins: $sgpr0, $sgpr1, $sgpr2 + + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s32) = G_CONSTANT i32 0 + %4:_(s1) = G_ICMP intpred(eq), %0, %3 + %5:_(s1) = G_ICMP intpred(eq), %2, %3 + G_BRCOND %5, %bb.1 + G_BR %bb.2 + + bb.1: + successors: %bb.2 + + %6:_(s1) = G_TRUNC %1 + G_BR %bb.2 + + bb.2: + %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 + %8:_(s32) = G_SELECT %7, %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + +... + +--- +name: phi_s1_scc_v_sbranch +legalized: true +tracksRegLiveness: true + +body: | + ; FAST-LABEL: name: phi_s1_scc_v_sbranch + ; FAST: bb.0: + ; FAST: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; FAST: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2 + ; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; FAST: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; FAST: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; FAST: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; FAST: [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] + ; FAST: G_BRCOND [[ICMP1]](s1), %bb.1 + ; FAST: G_BR %bb.2 + ; FAST: bb.1: + ; FAST: successors: %bb.2(0x80000000) + ; FAST: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32) + ; FAST: G_BR %bb.2 + ; FAST: bb.2: + ; FAST: [[PHI:%[0-9]+]]:vgpr(s1) = G_PHI [[ICMP]](s1), %bb.0, [[TRUNC]](s1), %bb.1 + ; FAST: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1) + ; FAST: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; FAST: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; FAST: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] + ; FAST: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-LABEL: name: phi_s1_scc_v_sbranch + ; GREEDY: bb.0: + ; GREEDY: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GREEDY: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; GREEDY: [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] + ; GREEDY: G_BRCOND [[ICMP1]](s1), %bb.1 + ; GREEDY: G_BR %bb.2 + ; GREEDY: bb.1: + ; GREEDY: successors: %bb.2(0x80000000) + ; GREEDY: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32) + ; GREEDY: G_BR %bb.2 + ; GREEDY: bb.2: + ; GREEDY: [[PHI:%[0-9]+]]:vgpr(s1) = G_PHI [[ICMP]](s1), %bb.0, [[TRUNC]](s1), %bb.1 + ; GREEDY: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; GREEDY: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] + ; GREEDY: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + bb.0: + successors: %bb.1, %bb.2 + liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2 + + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $sgpr2 + %3:_(s32) = G_CONSTANT i32 0 + %4:_(s1) = G_ICMP intpred(eq), %0, %3 + %5:_(s1) = G_ICMP intpred(eq), %2, %3 + G_BRCOND %5, %bb.1 + G_BR %bb.2 + + bb.1: + successors: %bb.2 + + %6:_(s1) = G_TRUNC %1 + G_BR %bb.2 + + bb.2: + %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 + %8:_(s32) = G_SELECT %7, %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + +... + +--- +name: phi_s1_v_scc_sbranch +legalized: true +tracksRegLiveness: true + +body: | + ; FAST-LABEL: name: phi_s1_v_scc_sbranch + ; FAST: bb.0: + ; FAST: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; FAST: liveins: $vgpr0, $sgpr0, $sgpr1 + ; FAST: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; FAST: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; FAST: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) + ; FAST: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] + ; FAST: G_BRCOND [[ICMP]](s1), %bb.1 + ; FAST: G_BR %bb.2 + ; FAST: bb.1: + ; FAST: successors: %bb.2(0x80000000) + ; FAST: [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; FAST: G_BR %bb.2 + ; FAST: bb.2: + ; FAST: [[PHI:%[0-9]+]]:vgpr(s1) = G_PHI [[TRUNC]](s1), %bb.0, [[ICMP1]](s1), %bb.1 + ; FAST: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1) + ; FAST: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; FAST: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] + ; FAST: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-LABEL: name: phi_s1_v_scc_sbranch + ; GREEDY: bb.0: + ; GREEDY: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GREEDY: liveins: $vgpr0, $sgpr0, $sgpr1 + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) + ; GREEDY: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] + ; GREEDY: G_BRCOND [[ICMP]](s1), %bb.1 + ; GREEDY: G_BR %bb.2 + ; GREEDY: bb.1: + ; GREEDY: successors: %bb.2(0x80000000) + ; GREEDY: [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; GREEDY: G_BR %bb.2 + ; GREEDY: bb.2: + ; GREEDY: [[PHI:%[0-9]+]]:vgpr(s1) = G_PHI [[TRUNC]](s1), %bb.0, [[ICMP1]](s1), %bb.1 + ; GREEDY: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] + ; GREEDY: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + bb.0: + successors: %bb.1, %bb.2 + liveins: $vgpr0, $sgpr0, $sgpr1 + + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $sgpr0 + %2:_(s32) = COPY $sgpr1 + %3:_(s32) = G_CONSTANT i32 0 + %4:_(s1) = G_TRUNC %0 + %5:_(s1) = G_ICMP intpred(eq), %2, %3 + G_BRCOND %5, %bb.1 + G_BR %bb.2 + + bb.1: + successors: %bb.2 + + %6:_(s1) = G_ICMP intpred(eq), %1, %3 + G_BR %bb.2 + + bb.2: + %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 + %8:_(s32) = G_SELECT %7, %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + +... + +--- +name: phi_s1_vcc_s_sbranch +legalized: true +tracksRegLiveness: true + +body: | + ; FAST-LABEL: name: phi_s1_vcc_s_sbranch + ; FAST: bb.0: + ; FAST: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; FAST: liveins: $vgpr0, $sgpr0, $sgpr1 + ; FAST: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; FAST: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; FAST: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; FAST: [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] + ; FAST: G_BRCOND [[ICMP1]](s1), %bb.1 + ; FAST: G_BR %bb.2 + ; FAST: bb.1: + ; FAST: successors: %bb.2(0x80000000) + ; FAST: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) + ; FAST: G_BR %bb.2 + ; FAST: bb.2: + ; FAST: [[PHI:%[0-9]+]]:sgpr(s1) = G_PHI [[ICMP]](s1), %bb.0, [[TRUNC]](s1), %bb.1 + ; FAST: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1) + ; FAST: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; FAST: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] + ; FAST: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-LABEL: name: phi_s1_vcc_s_sbranch + ; GREEDY: bb.0: + ; GREEDY: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GREEDY: liveins: $vgpr0, $sgpr0, $sgpr1 + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; GREEDY: [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] + ; GREEDY: G_BRCOND [[ICMP1]](s1), %bb.1 + ; GREEDY: G_BR %bb.2 + ; GREEDY: bb.1: + ; GREEDY: successors: %bb.2(0x80000000) + ; GREEDY: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) + ; GREEDY: G_BR %bb.2 + ; GREEDY: bb.2: + ; GREEDY: [[PHI:%[0-9]+]]:sgpr(s1) = G_PHI [[ICMP]](s1), %bb.0, [[TRUNC]](s1), %bb.1 + ; GREEDY: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] + ; GREEDY: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + bb.0: + successors: %bb.1, %bb.2 + liveins: $vgpr0, $sgpr0, $sgpr1 + + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $sgpr0 + %2:_(s32) = COPY $sgpr1 + %3:_(s32) = G_CONSTANT i32 0 + %4:_(s1) = G_ICMP intpred(eq), %0, %3 + %5:_(s1) = G_ICMP intpred(eq), %2, %3 + G_BRCOND %5, %bb.1 + G_BR %bb.2 + + bb.1: + successors: %bb.2 + + %6:_(s1) = G_TRUNC %1 + G_BR %bb.2 + + bb.2: + %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 + %8:_(s32) = G_SELECT %7, %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + +... + +--- +name: phi_s1_s_vcc_sbranch +legalized: true +tracksRegLiveness: true + +body: | + ; FAST-LABEL: name: phi_s1_s_vcc_sbranch + ; FAST: bb.0: + ; FAST: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; FAST: liveins: $vgpr0, $sgpr0, $sgpr1 + ; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; FAST: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; FAST: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; FAST: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) + ; FAST: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] + ; FAST: G_BRCOND [[ICMP]](s1), %bb.1 + ; FAST: G_BR %bb.2 + ; FAST: bb.1: + ; FAST: successors: %bb.2(0x80000000) + ; FAST: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; FAST: G_BR %bb.2 + ; FAST: bb.2: + ; FAST: [[PHI:%[0-9]+]]:vgpr(s1) = G_PHI [[TRUNC]](s1), %bb.0, [[ICMP1]](s1), %bb.1 + ; FAST: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1) + ; FAST: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; FAST: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; FAST: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] + ; FAST: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-LABEL: name: phi_s1_s_vcc_sbranch + ; GREEDY: bb.0: + ; GREEDY: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GREEDY: liveins: $vgpr0, $sgpr0, $sgpr1 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) + ; GREEDY: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] + ; GREEDY: G_BRCOND [[ICMP]](s1), %bb.1 + ; GREEDY: G_BR %bb.2 + ; GREEDY: bb.1: + ; GREEDY: successors: %bb.2(0x80000000) + ; GREEDY: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; GREEDY: G_BR %bb.2 + ; GREEDY: bb.2: + ; GREEDY: [[PHI:%[0-9]+]]:vgpr(s1) = G_PHI [[TRUNC]](s1), %bb.0, [[ICMP1]](s1), %bb.1 + ; GREEDY: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; GREEDY: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] + ; GREEDY: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + bb.0: + successors: %bb.1, %bb.2 + liveins: $vgpr0, $sgpr0, $sgpr1 + + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $sgpr1 + %3:_(s32) = G_CONSTANT i32 0 + %4:_(s1) = G_TRUNC %0 + %5:_(s1) = G_ICMP intpred(eq), %2, %3 + G_BRCOND %5, %bb.1 + G_BR %bb.2 + + bb.1: + successors: %bb.2 + + %6:_(s1) = G_ICMP intpred(eq), %1, %3 + G_BR %bb.2 + + bb.2: + %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 + %8:_(s32) = G_SELECT %7, %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + +... + +--- +name: phi_s1_vcc_v_sbranch +legalized: true +tracksRegLiveness: true + +body: | + ; FAST-LABEL: name: phi_s1_vcc_v_sbranch + ; FAST: bb.0: + ; FAST: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; FAST: liveins: $vgpr0, $vgpr1, $sgpr0 + ; FAST: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; FAST: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; FAST: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; FAST: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; FAST: [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] + ; FAST: G_BRCOND [[ICMP1]](s1), %bb.1 + ; FAST: G_BR %bb.2 + ; FAST: bb.1: + ; FAST: successors: %bb.2(0x80000000) + ; FAST: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32) + ; FAST: G_BR %bb.2 + ; FAST: bb.2: + ; FAST: [[PHI:%[0-9]+]]:vgpr(s1) = G_PHI [[ICMP]](s1), %bb.0, [[TRUNC]](s1), %bb.1 + ; FAST: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1) + ; FAST: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; FAST: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] + ; FAST: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-LABEL: name: phi_s1_vcc_v_sbranch + ; GREEDY: bb.0: + ; GREEDY: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GREEDY: liveins: $vgpr0, $vgpr1, $sgpr0 + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; GREEDY: [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] + ; GREEDY: G_BRCOND [[ICMP1]](s1), %bb.1 + ; GREEDY: G_BR %bb.2 + ; GREEDY: bb.1: + ; GREEDY: successors: %bb.2(0x80000000) + ; GREEDY: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32) + ; GREEDY: G_BR %bb.2 + ; GREEDY: bb.2: + ; GREEDY: [[PHI:%[0-9]+]]:vgpr(s1) = G_PHI [[ICMP]](s1), %bb.0, [[TRUNC]](s1), %bb.1 + ; GREEDY: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] + ; GREEDY: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + bb.0: + successors: %bb.1, %bb.2 + liveins: $vgpr0, $vgpr1, $sgpr0 + + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $sgpr0 + %3:_(s32) = G_CONSTANT i32 0 + %4:_(s1) = G_ICMP intpred(eq), %0, %3 + %5:_(s1) = G_ICMP intpred(eq), %2, %3 + G_BRCOND %5, %bb.1 + G_BR %bb.2 + + bb.1: + successors: %bb.2 + + %6:_(s1) = G_TRUNC %1 + G_BR %bb.2 + + bb.2: + %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 + %8:_(s32) = G_SELECT %7, %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + +... + +--- +name: phi_s1_v_vcc_sbranch +legalized: true +tracksRegLiveness: true + +body: | + ; FAST-LABEL: name: phi_s1_v_vcc_sbranch + ; FAST: bb.0: + ; FAST: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; FAST: liveins: $vgpr0, $vgpr1, $sgpr0 + ; FAST: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; FAST: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; FAST: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; FAST: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) + ; FAST: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] + ; FAST: G_BRCOND [[ICMP]](s1), %bb.1 + ; FAST: G_BR %bb.2 + ; FAST: bb.1: + ; FAST: successors: %bb.2(0x80000000) + ; FAST: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; FAST: G_BR %bb.2 + ; FAST: bb.2: + ; FAST: [[PHI:%[0-9]+]]:vgpr(s1) = G_PHI [[TRUNC]](s1), %bb.0, [[ICMP1]](s1), %bb.1 + ; FAST: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1) + ; FAST: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; FAST: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] + ; FAST: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-LABEL: name: phi_s1_v_vcc_sbranch + ; GREEDY: bb.0: + ; GREEDY: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GREEDY: liveins: $vgpr0, $vgpr1, $sgpr0 + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) + ; GREEDY: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] + ; GREEDY: G_BRCOND [[ICMP]](s1), %bb.1 + ; GREEDY: G_BR %bb.2 + ; GREEDY: bb.1: + ; GREEDY: successors: %bb.2(0x80000000) + ; GREEDY: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; GREEDY: G_BR %bb.2 + ; GREEDY: bb.2: + ; GREEDY: [[PHI:%[0-9]+]]:vgpr(s1) = G_PHI [[TRUNC]](s1), %bb.0, [[ICMP1]](s1), %bb.1 + ; GREEDY: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] + ; GREEDY: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + bb.0: + successors: %bb.1, %bb.2 + liveins: $vgpr0, $vgpr1, $sgpr0 + + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $sgpr0 + %3:_(s32) = G_CONSTANT i32 0 + %4:_(s1) = G_TRUNC %0 + %5:_(s1) = G_ICMP intpred(eq), %2, %3 + G_BRCOND %5, %bb.1 + G_BR %bb.2 + + bb.1: + successors: %bb.2 + + %6:_(s1) = G_ICMP intpred(eq), %1, %3 + G_BR %bb.2 + + bb.2: + %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 + %8:_(s32) = G_SELECT %7, %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + +... + +--- +name: phi_s1_v_s_sbranch +legalized: true +tracksRegLiveness: true + +body: | + ; FAST-LABEL: name: phi_s1_v_s_sbranch + ; FAST: bb.0: + ; FAST: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; FAST: liveins: $vgpr0, $sgpr0, $sgpr1 + ; FAST: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; FAST: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; FAST: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) + ; FAST: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] + ; FAST: G_BRCOND [[ICMP]](s1), %bb.1 + ; FAST: G_BR %bb.2 + ; FAST: bb.1: + ; FAST: successors: %bb.2(0x80000000) + ; FAST: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) + ; FAST: G_BR %bb.2 + ; FAST: bb.2: + ; FAST: [[PHI:%[0-9]+]]:vgpr(s1) = G_PHI [[TRUNC]](s1), %bb.0, [[TRUNC1]](s1), %bb.1 + ; FAST: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1) + ; FAST: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; FAST: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] + ; FAST: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-LABEL: name: phi_s1_v_s_sbranch + ; GREEDY: bb.0: + ; GREEDY: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GREEDY: liveins: $vgpr0, $sgpr0, $sgpr1 + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) + ; GREEDY: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] + ; GREEDY: G_BRCOND [[ICMP]](s1), %bb.1 + ; GREEDY: G_BR %bb.2 + ; GREEDY: bb.1: + ; GREEDY: successors: %bb.2(0x80000000) + ; GREEDY: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) + ; GREEDY: G_BR %bb.2 + ; GREEDY: bb.2: + ; GREEDY: [[PHI:%[0-9]+]]:vgpr(s1) = G_PHI [[TRUNC]](s1), %bb.0, [[TRUNC1]](s1), %bb.1 + ; GREEDY: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] + ; GREEDY: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + bb.0: + successors: %bb.1, %bb.2 + liveins: $vgpr0, $sgpr0, $sgpr1 + + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $sgpr0 + %2:_(s32) = COPY $sgpr1 + %3:_(s32) = G_CONSTANT i32 0 + %4:_(s1) = G_TRUNC %0 + %5:_(s1) = G_ICMP intpred(eq), %2, %3 + G_BRCOND %5, %bb.1 + G_BR %bb.2 + + bb.1: + successors: %bb.2 + + %6:_(s1) = G_TRUNC %1 + G_BR %bb.2 + + bb.2: + %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 + %8:_(s32) = G_SELECT %7, %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + +... + +--- +name: phi_s1_s_v_sbranch +legalized: true +tracksRegLiveness: true + +body: | + ; FAST-LABEL: name: phi_s1_s_v_sbranch + ; FAST: bb.0: + ; FAST: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; FAST: liveins: $vgpr0, $sgpr0, $sgpr1 + ; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; FAST: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; FAST: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; FAST: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) + ; FAST: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] + ; FAST: G_BRCOND [[ICMP]](s1), %bb.1 + ; FAST: G_BR %bb.2 + ; FAST: bb.1: + ; FAST: successors: %bb.2(0x80000000) + ; FAST: [[TRUNC1:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32) + ; FAST: G_BR %bb.2 + ; FAST: bb.2: + ; FAST: [[PHI:%[0-9]+]]:vgpr(s1) = G_PHI [[TRUNC]](s1), %bb.0, [[TRUNC1]](s1), %bb.1 + ; FAST: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1) + ; FAST: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; FAST: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; FAST: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] + ; FAST: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-LABEL: name: phi_s1_s_v_sbranch + ; GREEDY: bb.0: + ; GREEDY: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GREEDY: liveins: $vgpr0, $sgpr0, $sgpr1 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) + ; GREEDY: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] + ; GREEDY: G_BRCOND [[ICMP]](s1), %bb.1 + ; GREEDY: G_BR %bb.2 + ; GREEDY: bb.1: + ; GREEDY: successors: %bb.2(0x80000000) + ; GREEDY: [[TRUNC1:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32) + ; GREEDY: G_BR %bb.2 + ; GREEDY: bb.2: + ; GREEDY: [[PHI:%[0-9]+]]:vgpr(s1) = G_PHI [[TRUNC]](s1), %bb.0, [[TRUNC1]](s1), %bb.1 + ; GREEDY: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; GREEDY: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] + ; GREEDY: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + bb.0: + successors: %bb.1, %bb.2 + liveins: $vgpr0, $sgpr0, $sgpr1 + + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $sgpr1 + %3:_(s32) = G_CONSTANT i32 0 + %4:_(s1) = G_TRUNC %0 + %5:_(s1) = G_ICMP intpred(eq), %2, %3 + G_BRCOND %5, %bb.1 + G_BR %bb.2 + + bb.1: + successors: %bb.2 + + %6:_(s1) = G_TRUNC %1 + G_BR %bb.2 + + bb.2: + %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 + %8:_(s32) = G_SELECT %7, %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + +... + +--- +name: phi_s1_v_v_sbranch +legalized: true +tracksRegLiveness: true + +body: | + ; FAST-LABEL: name: phi_s1_v_v_sbranch + ; FAST: bb.0: + ; FAST: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; FAST: liveins: $vgpr0, $vgpr1, $sgpr0 + ; FAST: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; FAST: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; FAST: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; FAST: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) + ; FAST: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] + ; FAST: G_BRCOND [[ICMP]](s1), %bb.1 + ; FAST: G_BR %bb.2 + ; FAST: bb.1: + ; FAST: successors: %bb.2(0x80000000) + ; FAST: [[TRUNC1:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32) + ; FAST: G_BR %bb.2 + ; FAST: bb.2: + ; FAST: [[PHI:%[0-9]+]]:vgpr(s1) = G_PHI [[TRUNC]](s1), %bb.0, [[TRUNC1]](s1), %bb.1 + ; FAST: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1) + ; FAST: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; FAST: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] + ; FAST: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-LABEL: name: phi_s1_v_v_sbranch + ; GREEDY: bb.0: + ; GREEDY: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GREEDY: liveins: $vgpr0, $vgpr1, $sgpr0 + ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) + ; GREEDY: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] + ; GREEDY: G_BRCOND [[ICMP]](s1), %bb.1 + ; GREEDY: G_BR %bb.2 + ; GREEDY: bb.1: + ; GREEDY: successors: %bb.2(0x80000000) + ; GREEDY: [[TRUNC1:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY1]](s32) + ; GREEDY: G_BR %bb.2 + ; GREEDY: bb.2: + ; GREEDY: [[PHI:%[0-9]+]]:vgpr(s1) = G_PHI [[TRUNC]](s1), %bb.0, [[TRUNC1]](s1), %bb.1 + ; GREEDY: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY]] + ; GREEDY: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + bb.0: + successors: %bb.1, %bb.2 + liveins: $vgpr0, $vgpr1, $sgpr0 + + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $sgpr0 + %3:_(s32) = G_CONSTANT i32 0 + %4:_(s1) = G_TRUNC %0 + %5:_(s1) = G_ICMP intpred(eq), %2, %3 + G_BRCOND %5, %bb.1 + G_BR %bb.2 + + bb.1: + successors: %bb.2 + + %6:_(s1) = G_TRUNC %1 + G_BR %bb.2 + + bb.2: + %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 + %8:_(s32) = G_SELECT %7, %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + +... + +--- +name: phi_s1_s_s_sbranch +legalized: true +tracksRegLiveness: true + +body: | + ; FAST-LABEL: name: phi_s1_s_s_sbranch + ; FAST: bb.0: + ; FAST: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; FAST: liveins: $sgpr0, $sgpr1, $sgpr2 + ; FAST: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; FAST: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; FAST: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; FAST: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) + ; FAST: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] + ; FAST: G_BRCOND [[ICMP]](s1), %bb.1 + ; FAST: G_BR %bb.2 + ; FAST: bb.1: + ; FAST: successors: %bb.2(0x80000000) + ; FAST: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) + ; FAST: G_BR %bb.2 + ; FAST: bb.2: + ; FAST: [[PHI:%[0-9]+]]:sgpr(s1) = G_PHI [[TRUNC]](s1), %bb.0, [[TRUNC1]](s1), %bb.1 + ; FAST: [[COPY3:%[0-9]+]]:scc(s1) = COPY [[PHI]](s1) + ; FAST: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[COPY3]](s1), [[C]], [[COPY]] + ; FAST: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + ; GREEDY-LABEL: name: phi_s1_s_s_sbranch + ; GREEDY: bb.0: + ; GREEDY: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GREEDY: liveins: $sgpr0, $sgpr1, $sgpr2 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) + ; GREEDY: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] + ; GREEDY: G_BRCOND [[ICMP]](s1), %bb.1 + ; GREEDY: G_BR %bb.2 + ; GREEDY: bb.1: + ; GREEDY: successors: %bb.2(0x80000000) + ; GREEDY: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) + ; GREEDY: G_BR %bb.2 + ; GREEDY: bb.2: + ; GREEDY: [[PHI:%[0-9]+]]:sgpr(s1) = G_PHI [[TRUNC]](s1), %bb.0, [[TRUNC1]](s1), %bb.1 + ; GREEDY: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[PHI]](s1) + ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; GREEDY: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY3]](s1), [[COPY4]], [[COPY5]] + ; GREEDY: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[SELECT]](s32) + bb.0: + successors: %bb.1, %bb.2 + liveins: $sgpr0, $sgpr1, $sgpr2 + + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s32) = G_CONSTANT i32 0 + %4:_(s1) = G_TRUNC %0 + %5:_(s1) = G_ICMP intpred(eq), %2, %3 + G_BRCOND %5, %bb.1 + G_BR %bb.2 + + bb.1: + successors: %bb.2 + + %6:_(s1) = G_TRUNC %1 + G_BR %bb.2 + + bb.2: + %7:_(s1) = G_PHI %4, %bb.0, %6, %bb.1 + %8:_(s32) = G_SELECT %7, %3, %0 + S_SETPC_B64 undef $sgpr30_sgpr31, implicit %8 + +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-xor.mir index c666ca1be04d8..389e42dcd6bc8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-xor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-xor.mir @@ -80,9 +80,9 @@ body: | ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] ; CHECK: [[ICMP1:%[0-9]+]]:scc(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[C]] - ; CHECK: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[ICMP]](s1) - ; CHECK: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[ICMP1]](s1) - ; CHECK: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[COPY2]], [[COPY3]] + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s1) = COPY [[ICMP]](s1) + ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s1) = COPY [[ICMP1]](s1) + ; CHECK: [[XOR:%[0-9]+]]:scc(s1) = G_XOR [[COPY2]], [[COPY3]] ; CHECK: S_NOP 0, implicit [[XOR]](s1) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 @@ -153,9 +153,7 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) ; CHECK: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[TRUNC1]](s1) - ; CHECK: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[COPY2]], [[COPY3]] + ; CHECK: [[XOR:%[0-9]+]]:sgpr(s1) = G_XOR [[TRUNC]], [[TRUNC1]] ; CHECK: S_NOP 0, implicit [[XOR]](s1) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 @@ -177,9 +175,8 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[COPY]](s32) ; CHECK: [[ICMP:%[0-9]+]]:scc(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; CHECK: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) - ; CHECK: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[ICMP]](s1) - ; CHECK: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[COPY2]], [[COPY3]] + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s1) = COPY [[ICMP]](s1) + ; CHECK: [[XOR:%[0-9]+]]:scc(s1) = G_XOR [[TRUNC]], [[COPY2]] ; CHECK: S_NOP 0, implicit [[XOR]](s1) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 From 39d888c1e42a62e02adfb5fe8da54cafe7e62b0b Mon Sep 17 00:00:00 2001 From: Julian Lettner Date: Mon, 15 Jul 2019 20:22:27 +0000 Subject: [PATCH 158/451] [TSan] Improve handling of stack pointer mangling in {set,long}jmp, pt.9 Switch over to computing the xor key in C, instead of assembly for Linux/AArch64. llvm-svn: 366126 --- .../lib/tsan/rtl/tsan_platform_linux.cc | 27 ++++--------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc index 0f23da0e877fe..db85d547b0588 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc +++ b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc @@ -302,26 +302,8 @@ void InitializePlatform() { CHECK_NE(personality(old_personality | ADDR_NO_RANDOMIZE), -1); reexec = true; } - // Initialize the guard pointer used in {sig}{set,long}jump. - longjmp_xor_key = InitializeGuardPtr(); - // uptr old_value = longjmp_xor_key; - // InitializeLongjmpXorKey(); - // CHECK_EQ(longjmp_xor_key, old_value); - // If the above check fails for you, please contact me (jlettner@apple.com) - // and let me know the values of the two differing keys. Please also set a - // breakpoint on `InitializeGuardPtr` and `InitializeLongjmpXorKey` and tell - // me the stack pointer (SP) values that go into the XOR operation (where we - // derive the key): - // - // InitializeLongjmpXorKey: - // uptr sp = (uptr)__builtin_frame_address(0); - // - // InitializeGuardPtr (in tsan_rtl_aarch64.S): - // mov x0, sp - // ... - // eor x0, x0, x1 - // - // Then feel free to comment out the call to `InitializeLongjmpXorKey`. + // Initialize the xor key used in {sig}{set,long}jump. + InitializeLongjmpXorKey(); #endif if (reexec) ReExec(); @@ -437,9 +419,10 @@ static void InitializeLongjmpXorKey() { jmp_buf env; REAL(_setjmp)(env); - // 2. Retrieve mangled/vanilla SP. + // 2. Retrieve vanilla/mangled SP. + uptr sp; + asm("mov %0, %%sp" : "=r" (sp)); uptr mangled_sp = ((uptr *)&env)[LONG_JMP_SP_ENV_SLOT]; - uptr sp = (uptr)__builtin_frame_address(0); // 3. xor SPs to obtain key. longjmp_xor_key = mangled_sp ^ sp; From 794346460afa212690d0f006023a0e770884f0bb Mon Sep 17 00:00:00 2001 From: Bob Haarman Date: Mon, 15 Jul 2019 20:51:44 +0000 Subject: [PATCH 159/451] [clang] allow -fthinlto-index= without -x ir Summary: Previously, passing -fthinlto-index= to clang required that bitcode files be explicitly marked by -x ir. This change makes us detect files with object file extensions as bitcode files when -fthinlto-index= is present, so that explicitly marking them is no longer necessary. Explicitly specifying -x ir is still accepted and continues to be part of the test case to ensure we continue to support it. Reviewers: tejohnson, rnk, pcc Subscribers: mehdi_amini, steven_wu, dexonsmith, arphaman, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D64610 llvm-svn: 366127 --- clang/include/clang/Basic/DiagnosticDriverKinds.td | 2 ++ clang/lib/Driver/Driver.cpp | 6 ++++++ clang/lib/Driver/ToolChains/Clang.cpp | 3 +-- clang/test/Driver/thinlto_backend.c | 13 ++++++++++--- 4 files changed, 19 insertions(+), 5 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index dd86ca49b7a28..eab453ee20ec9 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -159,6 +159,8 @@ def err_drv_cannot_read_config_file : Error< "cannot read configuration file '%0'">; def err_drv_nested_config_file: Error< "option '--config' is not allowed inside configuration file">; +def err_drv_arg_requires_bitcode_input: Error< + "option '%0' requires input to be LLVM bitcode">; def err_target_unsupported_arch : Error<"the target architecture '%0' is not supported by the target '%1'">; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 22f26d90bd7dd..087335562d0ae 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -2119,6 +2119,12 @@ void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args, Diag(clang::diag::warn_drv_treating_input_as_cxx) << getTypeName(OldTy) << getTypeName(Ty); } + + // If running with -fthinlto-index=, extensions that normally identify + // native object files actually identify LLVM bitcode files. + if (Args.hasArgNoClaim(options::OPT_fthinlto_index_EQ) && + Ty == types::TY_Object) + Ty = types::TY_LLVM_BC; } // -ObjC and -ObjC++ override the default language, but only for "source diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 6a83e1a480a5d..6c3074b69e9f9 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -3647,8 +3647,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (const Arg *A = Args.getLastArg(options::OPT_fthinlto_index_EQ)) { if (!types::isLLVMIR(Input.getType())) - D.Diag(diag::err_drv_argument_only_allowed_with) << A->getAsString(Args) - << "-x ir"; + D.Diag(diag::err_drv_arg_requires_bitcode_input) << A->getAsString(Args); Args.AddLastArg(CmdArgs, options::OPT_fthinlto_index_EQ); } diff --git a/clang/test/Driver/thinlto_backend.c b/clang/test/Driver/thinlto_backend.c index b2b45f57088ab..7a3d6ede7c0da 100644 --- a/clang/test/Driver/thinlto_backend.c +++ b/clang/test/Driver/thinlto_backend.c @@ -2,8 +2,14 @@ // RUN: llvm-lto -thinlto -o %t %t.o // -fthinlto_index should be passed to cc1 -// RUN: %clang -O2 -o %t1.o -x ir %t.o -c -fthinlto-index=%t.thinlto.bc -### 2>&1 | FileCheck %s -check-prefix=CHECK-THINLTOBE-ACTION +// RUN: %clang -O2 -o %t1.o -x ir %t.o -c -fthinlto-index=%t.thinlto.bc -### \ +// RUN: 2>&1 | FileCheck %s -check-prefix=CHECK-THINLTOBE-ACTION // CHECK-THINLTOBE-ACTION: -fthinlto-index= +// CHECK-THINLTOBE-ACTION-SAME: {{"?-x"? "?ir"?}} + +// Check that this also works without -x ir. +// RUN: %clang -O2 -o %t1.o %t.o -c -fthinlto-index=%t.thinlto.bc -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=CHECK-THINLTOBE-ACTION // -save-temps should be passed to cc1 // RUN: %clang -O2 -o %t1.o -x ir %t.o -c -fthinlto-index=%t.thinlto.bc -save-temps -### 2>&1 | FileCheck %s -check-prefix=CHECK-SAVE-TEMPS -check-prefix=CHECK-SAVE-TEMPS-CWD @@ -15,5 +21,6 @@ // CHECK-SAVE-TEMPS-NOT: -emit-llvm-bc // Ensure clang driver gives the expected error for incorrect input type -// RUN: not %clang -O2 -o %t1.o %s -c -fthinlto-index=%t.thinlto.bc 2>&1 | FileCheck %s -check-prefix=CHECK-WARNING -// CHECK-WARNING: error: invalid argument '-fthinlto-index={{.*}}' only allowed with '-x ir' +// RUN: not %clang -O2 -o %t1.o %s -c -fthinlto-index=%t.thinlto.bc 2>&1 \ +// RUN: | FileCheck %s -check-prefix=CHECK-WARNING +// CHECK-WARNING: error: option '-fthinlto-index={{.*}}' requires input to be LLVM bitcode From d00d8578016520a4113c6930a2a6053785e66eac Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 15 Jul 2019 20:59:42 +0000 Subject: [PATCH 160/451] TableGen: Add address space to matchers Currently AMDGPU uses a CodePatPred to check address spaces from the MachineMemOperand. Introduce a new first class property so that the existing patterns can be easily modified to uses the new generated predicate, which will also be handled for GlobalISel. I would prefer these to match against the pointer type of the instruction, but that would be difficult to get working with SelectionDAG compatbility. This is much easier for now and will avoid a painful tablegen rewrite for all the loads and stores. I'm also not sure if there's a better way to encode multiple address spaces in the table, rather than putting the number to expect. llvm-svn: 366128 --- .../CodeGen/GlobalISel/InstructionSelector.h | 10 +++ .../GlobalISel/InstructionSelectorImpl.h | 39 +++++++++ .../include/llvm/Target/TargetSelectionDAG.td | 6 ++ llvm/test/TableGen/address-space-patfrags.td | 85 +++++++++++++++++++ llvm/utils/TableGen/CodeGenDAGPatterns.cpp | 34 +++++++- llvm/utils/TableGen/CodeGenDAGPatterns.h | 2 + llvm/utils/TableGen/GlobalISelEmitter.cpp | 81 +++++++++++++++++- 7 files changed, 253 insertions(+), 4 deletions(-) create mode 100644 llvm/test/TableGen/address-space-patfrags.td diff --git a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h index e4d05a57bd365..e9b93be76754f 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h @@ -138,6 +138,16 @@ enum { /// - MMOIdx - MMO index /// - Size - The size in bytes of the memory access GIM_CheckMemorySizeEqualTo, + + /// Check the address space of the memory access for the given machine memory + /// operand. + /// - InsnID - Instruction ID + /// - MMOIdx - MMO index + /// - NumAddrSpace - Number of valid address spaces + /// - AddrSpaceN - An allowed space of the memory access + /// - AddrSpaceN+1 ... + GIM_CheckMemoryAddressSpace, + /// Check the size of the memory access for the given machine memory operand /// against the size of an operand. /// - InsnID - Instruction ID diff --git a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h index e010180903d0e..e8ee4af0cb0b5 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h @@ -370,6 +370,45 @@ bool InstructionSelector::executeMatchTable( return false; break; } + case GIM_CheckMemoryAddressSpace: { + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t MMOIdx = MatchTable[CurrentIdx++]; + // This accepts a list of possible address spaces. + const int NumAddrSpace = MatchTable[CurrentIdx++]; + + if (State.MIs[InsnID]->getNumMemOperands() <= MMOIdx) { + if (handleReject() == RejectAndGiveUp) + return false; + break; + } + + // Need to still jump to the end of the list of address spaces if we find + // a match earlier. + const uint64_t LastIdx = CurrentIdx + NumAddrSpace; + + const MachineMemOperand *MMO + = *(State.MIs[InsnID]->memoperands_begin() + MMOIdx); + const unsigned MMOAddrSpace = MMO->getAddrSpace(); + + bool Success = false; + for (int I = 0; I != NumAddrSpace; ++I) { + unsigned AddrSpace = MatchTable[CurrentIdx++]; + DEBUG_WITH_TYPE( + TgtInstructionSelector::getName(), + dbgs() << "addrspace(" << MMOAddrSpace << ") vs " + << AddrSpace << '\n'); + + if (AddrSpace == MMOAddrSpace) { + Success = true; + break; + } + } + + CurrentIdx = LastIdx; + if (!Success && handleReject() == RejectAndGiveUp) + return false; + break; + } case GIM_CheckMemorySizeEqualTo: { int64_t InsnID = MatchTable[CurrentIdx++]; int64_t MMOIdx = MatchTable[CurrentIdx++]; diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index 3b5c767fc8727..b913a054ac2cb 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -737,6 +737,10 @@ class PatFrags frags, code pred = [{}], // cast(N)->isTruncatingStore(); bit IsTruncStore = ?; + // cast(N)->getAddressSpace() == + // If this empty, accept any address space. + list AddressSpaces = ?; + // cast(N)->getOrdering() == AtomicOrdering::Monotonic bit IsAtomicOrderingMonotonic = ?; // cast(N)->getOrdering() == AtomicOrdering::Acquire @@ -762,6 +766,8 @@ class PatFrags frags, code pred = [{}], // cast(N)->getMemoryVT().getScalarType() == MVT::; // cast(N)->getMemoryVT().getScalarType() == MVT::; ValueType ScalarMemoryVT = ?; + + // TODO: Add alignment } // PatFrag - A version of PatFrags matching only a single fragment. diff --git a/llvm/test/TableGen/address-space-patfrags.td b/llvm/test/TableGen/address-space-patfrags.td new file mode 100644 index 0000000000000..f6c5d11449803 --- /dev/null +++ b/llvm/test/TableGen/address-space-patfrags.td @@ -0,0 +1,85 @@ +// RUN: llvm-tblgen -gen-dag-isel -I %p/../../include %s 2>&1 | FileCheck -check-prefix=SDAG %s +// RUN: llvm-tblgen -gen-global-isel -optimize-match-table=false -I %p/../../include %s -o - < %s | FileCheck -check-prefix=GISEL %s + +include "llvm/Target/Target.td" + +def TestTargetInstrInfo : InstrInfo; + + +def TestTarget : Target { + let InstructionSet = TestTargetInstrInfo; +} + +def R0 : Register<"r0"> { let Namespace = "MyTarget"; } +def GPR32 : RegisterClass<"MyTarget", [i32], 32, (add R0)>; + + +// With one address space +def pat_frag_a : PatFrag <(ops node:$ptr), (load node:$ptr), [{}]> { + let AddressSpaces = [ 999 ]; + let IsLoad = 1; // FIXME: Can this be inferred? + let MemoryVT = i32; +} + +// With multiple address spaces +def pat_frag_b : PatFrag <(ops node:$ptr), (load node:$ptr), [{}]> { + let AddressSpaces = [ 123, 455 ]; + let IsLoad = 1; // FIXME: Can this be inferred? + let MemoryVT = i32; +} + +def inst_a : Instruction { + let OutOperandList = (outs GPR32:$dst); + let InOperandList = (ins GPR32:$src); +} + +def inst_b : Instruction { + let OutOperandList = (outs GPR32:$dst); + let InOperandList = (ins GPR32:$src); +} + +// SDAG: case 2: { +// SDAG: // Predicate_pat_frag_a +// SDAG-NEXT: SDNode *N = Node; +// SDAG-NEXT: (void)N; +// SDAG-NEXT: unsigned AddrSpace = cast(N)->getAddressSpace(); + +// SDAG-NEXT: if (AddrSpace != 999) +// SDAG-NEXT: return false; +// SDAG-NEXT: if (cast(N)->getMemoryVT() != MVT::i32) return false; +// SDAG-NEXT: return true; + +// GISEL: GIM_Try, /*On fail goto*//*Label 0*/ 47, // Rule ID 0 // +// GISEL-NEXT: GIM_CheckNumOperands, /*MI*/0, /*Expected*/2, +// GISEL-NEXT: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_LOAD, +// GISEL-NEXT: GIM_CheckMemorySizeEqualToLLT, /*MI*/0, /*MMO*/0, /*OpIdx*/0, +// GISEL-NEXT: GIM_CheckMemoryAddressSpace, /*MI*/0, /*MMO*/0, /*NumAddrSpace*/1, /*AddrSpace*/999, +// GISEL-NEXT: GIM_CheckMemorySizeEqualTo, /*MI*/0, /*MMO*/0, /*Size*/4, +// GISEL-NEXT: GIM_CheckAtomicOrdering, /*MI*/0, /*Order*/(int64_t)AtomicOrdering::NotAtomic, +def : Pat < + (pat_frag_a GPR32:$src), + (inst_a GPR32:$src) +>; + +// SDAG: case 3: { +// SDAG-NEXT: // Predicate_pat_frag_b +// SDAG-NEXT: SDNode *N = Node; +// SDAG-NEXT: (void)N; +// SDAG-NEXT: unsigned AddrSpace = cast(N)->getAddressSpace(); +// SDAG-NEXT: if (AddrSpace != 123 && AddrSpace != 455) +// SDAG-NEXT: return false; +// SDAG-NEXT: if (cast(N)->getMemoryVT() != MVT::i32) return false; +// SDAG-NEXT: return true; + + +// GISEL: GIM_Try, /*On fail goto*//*Label 1*/ 95, // Rule ID 1 // +// GISEL-NEXT: GIM_CheckNumOperands, /*MI*/0, /*Expected*/2, +// GISEL-NEXT: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_LOAD, +// GISEL-NEXT: GIM_CheckMemorySizeEqualToLLT, /*MI*/0, /*MMO*/0, /*OpIdx*/0, +// GISEL-NEXT: GIM_CheckMemoryAddressSpace, /*MI*/0, /*MMO*/0, /*NumAddrSpace*/2, /*AddrSpace*/123, /*AddrSpace*/455, +// GISEL-NEXT: GIM_CheckMemorySizeEqualTo, /*MI*/0, /*MMO*/0, /*Size*/4, +// GISEL-NEXT: GIM_CheckAtomicOrdering, /*MI*/0, /*Order*/(int64_t)AtomicOrdering::NotAtomic, +def : Pat < + (pat_frag_b GPR32:$src), + (inst_b GPR32:$src) +>; diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp index 9f87b3d591dc7..a0e8696001b0e 100644 --- a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp +++ b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp @@ -954,13 +954,33 @@ std::string TreePredicateFn::getPredCode() const { } if (isLoad() || isStore() || isAtomic()) { - StringRef SDNodeName = - isLoad() ? "LoadSDNode" : isStore() ? "StoreSDNode" : "AtomicSDNode"; + if (ListInit *AddressSpaces = getAddressSpaces()) { + Code += "unsigned AddrSpace = cast(N)->getAddressSpace();\n" + " if ("; + + bool First = true; + for (Init *Val : AddressSpaces->getValues()) { + if (First) + First = false; + else + Code += " && "; + + IntInit *IntVal = dyn_cast(Val); + if (!IntVal) { + PrintFatalError(getOrigPatFragRecord()->getRecord()->getLoc(), + "AddressSpaces element must be integer"); + } + + Code += "AddrSpace != " + utostr(IntVal->getValue()); + } + + Code += ")\nreturn false;\n"; + } Record *MemoryVT = getMemoryVT(); if (MemoryVT) - Code += ("if (cast<" + SDNodeName + ">(N)->getMemoryVT() != MVT::" + + Code += ("if (cast(N)->getMemoryVT() != MVT::" + MemoryVT->getName() + ") return false;\n") .str(); } @@ -1149,6 +1169,14 @@ Record *TreePredicateFn::getMemoryVT() const { return nullptr; return R->getValueAsDef("MemoryVT"); } + +ListInit *TreePredicateFn::getAddressSpaces() const { + Record *R = getOrigPatFragRecord()->getRecord(); + if (R->isValueUnset("AddressSpaces")) + return nullptr; + return R->getValueAsListInit("AddressSpaces"); +} + Record *TreePredicateFn::getScalarMemoryVT() const { Record *R = getOrigPatFragRecord()->getRecord(); if (R->isValueUnset("ScalarMemoryVT")) diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.h b/llvm/utils/TableGen/CodeGenDAGPatterns.h index bf36ebf26ccf3..2b49a64c3f1d6 100644 --- a/llvm/utils/TableGen/CodeGenDAGPatterns.h +++ b/llvm/utils/TableGen/CodeGenDAGPatterns.h @@ -593,6 +593,8 @@ class TreePredicateFn { /// ValueType record for the memory VT. Record *getScalarMemoryVT() const; + ListInit *getAddressSpaces() const; + // If true, indicates that GlobalISel-based C++ code was supplied. bool hasGISelPredicateCode() const; std::string getGISelPredicateCode() const; diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp index 4ec7a81927836..4940d911f6637 100644 --- a/llvm/utils/TableGen/GlobalISelEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp @@ -232,6 +232,23 @@ static std::string explainPredicates(const TreePatternNode *N) { if (Record *VT = P.getScalarMemoryVT()) Explanation += (" ScalarVT(MemVT)=" + VT->getName()).str(); + if (ListInit *AddrSpaces = P.getAddressSpaces()) { + raw_string_ostream OS(Explanation); + OS << " AddressSpaces=["; + + StringRef AddrSpaceSeparator; + for (Init *Val : AddrSpaces->getValues()) { + IntInit *IntVal = dyn_cast(Val); + if (!IntVal) + continue; + + OS << AddrSpaceSeparator << IntVal->getValue(); + AddrSpaceSeparator = ", "; + } + + OS << ']'; + } + if (P.isAtomicOrderingMonotonic()) Explanation += " monotonic"; if (P.isAtomicOrderingAcquire()) @@ -308,6 +325,12 @@ static Error isTrivialOperatorNode(const TreePatternNode *N) { continue; } + if (Predicate.isLoad() || Predicate.isStore() || Predicate.isAtomic()) { + const ListInit *AddrSpaces = Predicate.getAddressSpaces(); + if (AddrSpaces && !AddrSpaces->empty()) + continue; + } + if (Predicate.isAtomic() && Predicate.getMemoryVT()) continue; @@ -1028,6 +1051,7 @@ class PredicateMatcher { IPM_AtomicOrderingMMO, IPM_MemoryLLTSize, IPM_MemoryVsLLTSize, + IPM_MemoryAddressSpace, IPM_GenericPredicate, OPM_SameOperand, OPM_ComplexPattern, @@ -1789,6 +1813,42 @@ class MemorySizePredicateMatcher : public InstructionPredicateMatcher { } }; +class MemoryAddressSpacePredicateMatcher : public InstructionPredicateMatcher { +protected: + unsigned MMOIdx; + SmallVector AddrSpaces; + +public: + MemoryAddressSpacePredicateMatcher(unsigned InsnVarID, unsigned MMOIdx, + ArrayRef AddrSpaces) + : InstructionPredicateMatcher(IPM_MemoryAddressSpace, InsnVarID), + MMOIdx(MMOIdx), AddrSpaces(AddrSpaces.begin(), AddrSpaces.end()) {} + + static bool classof(const PredicateMatcher *P) { + return P->getKind() == IPM_MemoryAddressSpace; + } + bool isIdentical(const PredicateMatcher &B) const override { + if (!InstructionPredicateMatcher::isIdentical(B)) + return false; + auto *Other = cast(&B); + return MMOIdx == Other->MMOIdx && AddrSpaces == Other->AddrSpaces; + } + + void emitPredicateOpcodes(MatchTable &Table, + RuleMatcher &Rule) const override { + Table << MatchTable::Opcode("GIM_CheckMemoryAddressSpace") + << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID) + << MatchTable::Comment("MMO") << MatchTable::IntValue(MMOIdx) + // Encode number of address spaces to expect. + << MatchTable::Comment("NumAddrSpace") + << MatchTable::IntValue(AddrSpaces.size()); + for (unsigned AS : AddrSpaces) + Table << MatchTable::Comment("AddrSpace") << MatchTable::IntValue(AS); + + Table << MatchTable::LineBreak; + } +}; + /// Generates code to check that the size of an MMO is less-than, equal-to, or /// greater than a given LLT. class MemoryVsLLTSizePredicateMatcher : public InstructionPredicateMatcher { @@ -3210,7 +3270,26 @@ Expected GlobalISelEmitter::createAndImportSelDAGMatcher( continue; } - // G_LOAD is used for both non-extending and any-extending loads. + // An address space check is needed in all contexts if there is one. + if (Predicate.isLoad() || Predicate.isStore() || Predicate.isAtomic()) { + if (const ListInit *AddrSpaces = Predicate.getAddressSpaces()) { + SmallVector ParsedAddrSpaces; + + for (Init *Val : AddrSpaces->getValues()) { + IntInit *IntVal = dyn_cast(Val); + if (!IntVal) + return failedImport("Address space is not an integer"); + ParsedAddrSpaces.push_back(IntVal->getValue()); + } + + if (!ParsedAddrSpaces.empty()) { + InsnMatcher.addPredicate( + 0, ParsedAddrSpaces); + } + } + } + + // G_LOAD is used for both non-extending and any-extending loads. if (Predicate.isLoad() && Predicate.isNonExtLoad()) { InsnMatcher.addPredicate( 0, MemoryVsLLTSizePredicateMatcher::EqualTo, 0); From 02772499551ada7e72fa83515e563e770020ba55 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 15 Jul 2019 21:15:20 +0000 Subject: [PATCH 161/451] TableGen/GlobalISel: Fix handling of truncstore patterns This was failing to import the AMDGPU truncstore patterns. The truncating stores from 32-bit to 8/16 were then somehow being incorrectly selected to a 4-byte store. A separate check is emitted for the LLT size in comparison to the specific memory VT, which looks strange to me but makes sense based on the hierarchy of PatFrags used for the default truncstore PatFrags. llvm-svn: 366129 --- llvm/test/TableGen/address-space-patfrags.td | 38 ++++++++++++++++++++ llvm/utils/TableGen/GlobalISelEmitter.cpp | 9 ++++- 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/llvm/test/TableGen/address-space-patfrags.td b/llvm/test/TableGen/address-space-patfrags.td index f6c5d11449803..029170e8414fa 100644 --- a/llvm/test/TableGen/address-space-patfrags.td +++ b/llvm/test/TableGen/address-space-patfrags.td @@ -38,6 +38,11 @@ def inst_b : Instruction { let InOperandList = (ins GPR32:$src); } +def inst_c : Instruction { + let OutOperandList = (outs); + let InOperandList = (ins GPR32:$src0, GPR32:$src1); +} + // SDAG: case 2: { // SDAG: // Predicate_pat_frag_a // SDAG-NEXT: SDNode *N = Node; @@ -83,3 +88,36 @@ def : Pat < (pat_frag_b GPR32:$src), (inst_b GPR32:$src) >; + + +def truncstorei16_addrspace : PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr)> { + let IsStore = 1; + let MemoryVT = i16; + let AddressSpaces = [ 123, 455 ]; +} + +// Test truncstore without a specific MemoryVT +// GISEL: GIM_Try, /*On fail goto*//*Label 2*/ 133, // Rule ID 2 // +// GISEL-NEXT: GIM_CheckNumOperands, /*MI*/0, /*Expected*/2, +// GISEL-NEXT: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_STORE, +// GISEL-NEXT: GIM_CheckMemorySizeLessThanLLT, /*MI*/0, /*MMO*/0, /*OpIdx*/0, +// GISEL-NEXT: GIM_CheckAtomicOrdering, /*MI*/0, /*Order*/(int64_t)AtomicOrdering::NotAtomic, +// GISEL-NEXT: // MIs[0] src0 +// GISEL-NEXT: GIM_CheckType, /*MI*/0, /*Op*/0, /*Type*/GILLT_s32, +def : Pat < + (truncstore GPR32:$src0, GPR32:$src1), + (inst_c GPR32:$src0, GPR32:$src1) +>; + +// Test truncstore with specific MemoryVT +// GISEL: GIM_Try, /*On fail goto*//*Label 3*/ 181, // Rule ID 3 // +// GISEL-NEXT: GIM_CheckNumOperands, /*MI*/0, /*Expected*/2, +// GISEL-NEXT: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_STORE, +// GISEL-NEXT: GIM_CheckMemorySizeLessThanLLT, /*MI*/0, /*MMO*/0, /*OpIdx*/0, +// GISEL-NEXT: GIM_CheckMemoryAddressSpace, /*MI*/0, /*MMO*/0, /*NumAddrSpace*/2, /*AddrSpace*/123, /*AddrSpace*/455, +// GISEL-NEXT: GIM_CheckMemorySizeEqualTo, /*MI*/0, /*MMO*/0, /*Size*/2, +def : Pat < + (truncstorei16_addrspace GPR32:$src0, GPR32:$src1), + (inst_c GPR32:$src0, GPR32:$src1) +>; diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp index 4940d911f6637..f1c02134198bd 100644 --- a/llvm/utils/TableGen/GlobalISelEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp @@ -314,7 +314,7 @@ static Error isTrivialOperatorNode(const TreePatternNode *N) { Predicate.isSignExtLoad() || Predicate.isZeroExtLoad()) continue; - if (Predicate.isNonTruncStore()) + if (Predicate.isNonTruncStore() || Predicate.isTruncStore()) continue; if (Predicate.isLoad() && Predicate.getMemoryVT()) @@ -3301,6 +3301,13 @@ Expected GlobalISelEmitter::createAndImportSelDAGMatcher( continue; } + if (Predicate.isStore() && Predicate.isTruncStore()) { + // FIXME: If MemoryVT is set, we end up with 2 checks for the MMO size. + InsnMatcher.addPredicate( + 0, MemoryVsLLTSizePredicateMatcher::LessThan, 0); + continue; + } + // No check required. We already did it by swapping the opcode. if (!SrcGIEquivOrNull->isValueUnset("IfSignExtend") && Predicate.isSignExtLoad()) From c4f245b40aad7e8627b37a8bf1bdcdbcd541e665 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Mon, 15 Jul 2019 21:16:29 +0000 Subject: [PATCH 162/451] [LoopUnroll+LoopUnswitch] do not transform loops containing callbr Summary: There is currently a correctness issue when unrolling loops containing callbr's where their indirect targets are being updated correctly to the newly created labels, but their operands are not. This manifests in unrolled loops where the second and subsequent copies of callbr instructions have blockaddresses of the label from the first instance of the unrolled loop, which would result in nonsensical runtime control flow. For now, conservatively do not unroll the loop. In the future, I think we can pursue unrolling such loops provided we transform the cloned callbr's operands correctly. Such a transform and its legalities are being discussed in: https://reviews.llvm.org/D64101 Link: https://bugs.llvm.org/show_bug.cgi?id=42489 Link: https://groups.google.com/forum/#!topic/clang-built-linux/z-hRWP9KqPI Reviewers: fhahn, hfinkel, efriedma Reviewed By: fhahn, hfinkel, efriedma Subscribers: efriedma, hiraditya, zzheng, dmgreen, llvm-commits, pirama, kees, nathanchance, E5ten, craig.topper, chandlerc, glider, void, srhines Tags: #llvm Differential Revision: https://reviews.llvm.org/D64368 llvm-svn: 366130 --- llvm/lib/Analysis/LoopInfo.cpp | 5 +- llvm/test/Transforms/LoopUnroll/callbr.ll | 51 ++++++++++++++++ llvm/test/Transforms/LoopUnswitch/callbr.ll | 66 +++++++++++++++++++++ 3 files changed, 121 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/LoopUnroll/callbr.ll create mode 100644 llvm/test/Transforms/LoopUnswitch/callbr.ll diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp index 00dbe30c2b3db..c59e48a7a98e1 100644 --- a/llvm/lib/Analysis/LoopInfo.cpp +++ b/llvm/lib/Analysis/LoopInfo.cpp @@ -432,8 +432,11 @@ bool Loop::isLoopSimplifyForm() const { bool Loop::isSafeToClone() const { // Return false if any loop blocks contain indirectbrs, or there are any calls // to noduplicate functions. + // FIXME: it should be ok to clone CallBrInst's if we correctly update the + // operand list to reflect the newly cloned labels. for (BasicBlock *BB : this->blocks()) { - if (isa(BB->getTerminator())) + if (isa(BB->getTerminator()) || + isa(BB->getTerminator())) return false; for (Instruction &I : *BB) diff --git a/llvm/test/Transforms/LoopUnroll/callbr.ll b/llvm/test/Transforms/LoopUnroll/callbr.ll new file mode 100644 index 0000000000000..22206b46662e3 --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/callbr.ll @@ -0,0 +1,51 @@ +; RUN: opt -loop-unroll -S %s | FileCheck %s + +; Check that the loop body exists. +; CHECK: for.body +; CHECK: if.then +; CHECK: asm.fallthrough +; CHECK: l_yes +; CHECK: for.inc + +; Check that the loop body does not get unrolled. We could modify this test in +; the future to support loop unrolling callbr's IFF we checked that the callbr +; operands were unrolled/updated correctly, as today they are not. +; CHECK-NOT: if.then.1 +; CHECK-NOT: asm.fallthrough.1 +; CHECK-NOT: l_yes.1 +; CHECK-NOT: for.inc.1 +; CHECK-NOT: if.then.2 +; CHECK-NOT: asm.fallthrough.2 +; CHECK-NOT: l_yes.2 +; CHECK-NOT: for.inc.2 + +define dso_local void @d() { +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.inc + ret void + +for.body: ; preds = %for.inc, %entry + %e.04 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %tobool = icmp eq i32 %e.04, 0 + br i1 %tobool, label %for.inc, label %if.then + +if.then: ; preds = %for.body + callbr void asm sideeffect "1: nop\0A\09.quad b, ${0:l}, $$5\0A\09", "X,~{dirflag},~{fpsr},~{flags}"(i8* blockaddress(@d, %l_yes)) + to label %asm.fallthrough [label %l_yes] + +asm.fallthrough: ; preds = %if.then + br label %l_yes + +l_yes: ; preds = %asm.fallthrough, %if.then + %call = tail call i32 (...) @g() + br label %for.inc + +for.inc: ; preds = %for.body, %l_yes + %inc = add nuw nsw i32 %e.04, 1 + %exitcond = icmp eq i32 %inc, 3 + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + +declare dso_local i32 @g(...) local_unnamed_addr diff --git a/llvm/test/Transforms/LoopUnswitch/callbr.ll b/llvm/test/Transforms/LoopUnswitch/callbr.ll new file mode 100644 index 0000000000000..6e05374d3299f --- /dev/null +++ b/llvm/test/Transforms/LoopUnswitch/callbr.ll @@ -0,0 +1,66 @@ +; RUN: opt -loop-unswitch %s -S | FileCheck %s + +; We want to check that the loop does not get split (so only 2 callbr's not 4). +; It's ok to modify this test in the future should we allow the loop containing +; callbr to be unswitched and are able to do so correctly. + +; CHECK: callbr void asm sideeffect "# ${0:l}", "X,~{dirflag},~{fpsr},~{flags}"(i8* blockaddress(@foo, %10)) +; CHECK: to label %7 [label %10] +; CHECK: callbr void asm sideeffect "# ${0:l}", "X,~{dirflag},~{fpsr},~{flags}"(i8* blockaddress(@foo, %10)) +; CHECK: to label %9 [label %10] + +; CHECK-NOT: callbr void asm sideeffect "# ${0:l}", "X,~{dirflag},~{fpsr},~{flags}"(i8* blockaddress(@foo, %10)) +; CHECK-NOT: to label %7 [label %10] +; CHECK-NOT: callbr void asm sideeffect "# ${0:l}", "X,~{dirflag},~{fpsr},~{flags}"(i8* blockaddress(@foo, %10)) +; CHECK-NOT: to label %9 [label %10] +; CHECK-NOT: callbr void asm sideeffect "# ${0:l}", "X,~{dirflag},~{fpsr},~{flags}"(i8* blockaddress(@foo, %19)) +; CHECK-NOT: to label %16 [label %19] +; CHECK-NOT: callbr void asm sideeffect "# ${0:l}", "X,~{dirflag},~{fpsr},~{flags}"(i8* blockaddress(@foo, %19)) +; CHECK-NOT: to label %18 [label %19] + +; This test is essentially: +; void foo(int n) { +; for (int i = 0; i < 1000; ++i) +; if (n) { +; asm goto("# %l0"::::bar); +; bar:; +; } else { +; asm goto("# %l0"::::baz); +; baz:; +; } +;} + +define dso_local void @foo(i32) #0 { + br label %2 + +2: ; preds = %10, %1 + %.0 = phi i32 [ 0, %1 ], [ %11, %10 ] + %3 = icmp ult i32 %.0, 1000 + br i1 %3, label %4, label %12 + +4: ; preds = %2 + %5 = icmp eq i32 %0, 0 + br i1 %5, label %8, label %6 + +6: ; preds = %4 + callbr void asm sideeffect "# ${0:l}", "X,~{dirflag},~{fpsr},~{flags}"(i8* blockaddress(@foo, %10)) #0 + to label %7 [label %10] + +7: ; preds = %6 + br label %10 + +8: ; preds = %4 + callbr void asm sideeffect "# ${0:l}", "X,~{dirflag},~{fpsr},~{flags}"(i8* blockaddress(@foo, %10)) #0 + to label %9 [label %10] + +9: ; preds = %8 + br label %10 + +10: ; preds = %7, %6, %9, %8 + %11 = add nuw nsw i32 %.0, 1 + br label %2 + +12: ; preds = %2 + ret void +} + From 228f80d92dc22f3a6cd0b9b98566327f63debbe5 Mon Sep 17 00:00:00 2001 From: Julian Lettner Date: Mon, 15 Jul 2019 21:22:57 +0000 Subject: [PATCH 163/451] [TSan] Fix asm token error llvm-svn: 366131 --- compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc index db85d547b0588..8e4ddc969e058 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc +++ b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc @@ -421,7 +421,7 @@ static void InitializeLongjmpXorKey() { // 2. Retrieve vanilla/mangled SP. uptr sp; - asm("mov %0, %%sp" : "=r" (sp)); + asm("mov %0, %sp" : "=r" (sp)); uptr mangled_sp = ((uptr *)&env)[LONG_JMP_SP_ENV_SLOT]; // 3. xor SPs to obtain key. From b082f1055b0a5370d1902339ffe058b4abb6abc0 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 15 Jul 2019 21:41:44 +0000 Subject: [PATCH 164/451] AMDGPU: Use standalone MUBUF load patterns We already do this for the flat and DS instructions, although it is certainly uglier and more verbose. This will allow using separate pattern definitions for extload and zextload. Currently we get away with using a single PatFrag with custom predicate code to check if the extension type is a zextload or anyextload. The generic mechanism the global isel emitter understands treats these as mutually exclusive. I was considering making the pattern emitter accept zextload or sextload extensions for anyextload patterns, but in global isel, the different extending loads have distinct opcodes, and there is currently no mechanism for an opcode matcher to try multiple (and there probably is very little need for one beyond this case). llvm-svn: 366132 --- llvm/lib/Target/AMDGPU/BUFInstructions.td | 57 +++++++++++++++-------- 1 file changed, 37 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index bc70d138e4280..7d9ca59c6d08a 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -470,6 +470,24 @@ class MUBUF_Load_Pseudo .ret; } +class MUBUF_Offset_Load_Pat : Pat < + (load_vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))), + (load_vt (inst v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)) +>; + +class MUBUF_Addr64_Load_Pat : Pat < + (load_vt (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))), + (load_vt (inst i64:$vaddr, v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)) +>; + +multiclass MUBUF_Pseudo_Load_Pats { + def : MUBUF_Offset_Load_Pat(BaseInst#"_OFFSET"), load_vt, ld>; + def : MUBUF_Addr64_Load_Pat(BaseInst#"_ADDR64"), load_vt, ld>; +} + + // FIXME: tfe can't be an operand because it requires a separate // opcode because it needs an N+1 register class dest register. multiclass MUBUF_Pseudo_Loads { - def _OFFSET : MUBUF_Load_Pseudo , + def _OFFSET : MUBUF_Load_Pseudo , MUBUFAddr64Table<0, NAME # !if(isLds, "_LDS", "")>; - def _ADDR64 : MUBUF_Load_Pseudo , + def _ADDR64 : MUBUF_Load_Pseudo , MUBUFAddr64Table<1, NAME # !if(isLds, "_LDS", "")>; def _OFFEN : MUBUF_Load_Pseudo ; @@ -819,30 +827,39 @@ let SubtargetPredicate = HasPackedD16VMem, D16Buf = 1 in { } // End HasPackedD16VMem. defm BUFFER_LOAD_UBYTE : MUBUF_Pseudo_Loads_Lds < - "buffer_load_ubyte", VGPR_32, i32, az_extloadi8_global + "buffer_load_ubyte", VGPR_32, i32 >; defm BUFFER_LOAD_SBYTE : MUBUF_Pseudo_Loads_Lds < - "buffer_load_sbyte", VGPR_32, i32, sextloadi8_global + "buffer_load_sbyte", VGPR_32, i32 >; defm BUFFER_LOAD_USHORT : MUBUF_Pseudo_Loads_Lds < - "buffer_load_ushort", VGPR_32, i32, az_extloadi16_global + "buffer_load_ushort", VGPR_32, i32 >; defm BUFFER_LOAD_SSHORT : MUBUF_Pseudo_Loads_Lds < - "buffer_load_sshort", VGPR_32, i32, sextloadi16_global + "buffer_load_sshort", VGPR_32, i32 >; defm BUFFER_LOAD_DWORD : MUBUF_Pseudo_Loads_Lds < - "buffer_load_dword", VGPR_32, i32, load_global + "buffer_load_dword", VGPR_32, i32 >; defm BUFFER_LOAD_DWORDX2 : MUBUF_Pseudo_Loads < - "buffer_load_dwordx2", VReg_64, v2i32, load_global + "buffer_load_dwordx2", VReg_64, v2i32 >; defm BUFFER_LOAD_DWORDX3 : MUBUF_Pseudo_Loads < - "buffer_load_dwordx3", VReg_96, v3i32, load_global + "buffer_load_dwordx3", VReg_96, v3i32 >; defm BUFFER_LOAD_DWORDX4 : MUBUF_Pseudo_Loads < - "buffer_load_dwordx4", VReg_128, v4i32, load_global + "buffer_load_dwordx4", VReg_128, v4i32 >; +defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, az_extloadi8_global>; +defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SBYTE", i32, sextloadi8_global>; +defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, az_extloadi16_global>; +defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SSHORT", i32, sextloadi16_global>; +defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORD", i32, load_global>; +defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX2", v2i32, load_global>; +defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX3", v3i32, load_global>; +defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX4", v4i32, load_global>; + // This is not described in AMD documentation, // but 'lds' versions of these opcodes are available // in at least GFX8+ chips. See Bug 37653. From becae2b2324f15bbe0de46d58e7eb2fe5bd1cef4 Mon Sep 17 00:00:00 2001 From: Simon Atanasyan Date: Mon, 15 Jul 2019 21:46:38 +0000 Subject: [PATCH 165/451] [mips] Print BEQZL and BNEZL pseudo instructions One of the reasons - to be compatible with GNU tools. llvm-svn: 366133 --- llvm/lib/Target/Mips/MipsInstrInfo.td | 8 +++--- llvm/test/MC/Mips/branch-pseudos.s | 32 +++++++++++------------ llvm/test/MC/Mips/macro-bcc-imm.s | 16 ++++++------ llvm/test/MC/Mips/mips-jump-delay-slots.s | 4 +-- 4 files changed, 30 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.td b/llvm/lib/Target/Mips/MipsInstrInfo.td index ab353c92e27db..a4e85a38ab28d 100644 --- a/llvm/lib/Target/Mips/MipsInstrInfo.td +++ b/llvm/lib/Target/Mips/MipsInstrInfo.td @@ -2801,14 +2801,14 @@ let AdditionalPredicates = [NotInMicroMips] in { def : MipsInstAlias<"bnez $rs,$offset", (BNE GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>, ISA_MIPS1; - def : MipsInstAlias<"bnezl $rs,$offset", - (BNEL GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>, + def : MipsInstAlias<"bnezl $rs, $offset", + (BNEL GPR32Opnd:$rs, ZERO, brtarget:$offset), 1>, ISA_MIPS2; def : MipsInstAlias<"beqz $rs,$offset", (BEQ GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>, ISA_MIPS1; - def : MipsInstAlias<"beqzl $rs,$offset", - (BEQL GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>, + def : MipsInstAlias<"beqzl $rs, $offset", + (BEQL GPR32Opnd:$rs, ZERO, brtarget:$offset), 1>, ISA_MIPS2; def : MipsInstAlias<"syscall", (SYSCALL 0), 1>, ISA_MIPS1; diff --git a/llvm/test/MC/Mips/branch-pseudos.s b/llvm/test/MC/Mips/branch-pseudos.s index 56841e29f4259..9c4abdbbfad01 100644 --- a/llvm/test/MC/Mips/branch-pseudos.s +++ b/llvm/test/MC/Mips/branch-pseudos.s @@ -190,12 +190,12 @@ local_label: bltl $7,$8,local_label # CHECK: slt $1, $7, $8 # encoding: [0x00,0xe8,0x08,0x2a] -# CHECK: bnel $1, $zero, local_label # encoding: [0x54,0x20,A,A] +# CHECK: bnezl $1, local_label # encoding: [0x54,0x20,A,A] # CHECK: # fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16 # CHECK: nop # encoding: [0x00,0x00,0x00,0x00] bltl $7,$8,global_label # CHECK: slt $1, $7, $8 # encoding: [0x00,0xe8,0x08,0x2a] -# CHECK: bnel $1, $zero, global_label # encoding: [0x54,0x20,A,A] +# CHECK: bnezl $1, global_label # encoding: [0x54,0x20,A,A] # CHECK: # fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16 # CHECK: nop # encoding: [0x00,0x00,0x00,0x00] bltl $7,$0,local_label @@ -211,12 +211,12 @@ local_label: blel $7,$8,local_label # CHECK: slt $1, $8, $7 # encoding: [0x01,0x07,0x08,0x2a] -# CHECK: beql $1, $zero, local_label # encoding: [0x50,0x20,A,A] +# CHECK: beqzl $1, local_label # encoding: [0x50,0x20,A,A] # CHECK: # fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16 # CHECK: nop # encoding: [0x00,0x00,0x00,0x00] blel $7,$8,global_label # CHECK: slt $1, $8, $7 # encoding: [0x01,0x07,0x08,0x2a] -# CHECK: beql $1, $zero, global_label # encoding: [0x50,0x20,A,A] +# CHECK: beqzl $1, global_label # encoding: [0x50,0x20,A,A] # CHECK: # fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16 # CHECK: nop # encoding: [0x00,0x00,0x00,0x00] blel $7,$0,local_label @@ -235,12 +235,12 @@ local_label: bgel $7,$8,local_label # CHECK: slt $1, $7, $8 # encoding: [0x00,0xe8,0x08,0x2a] -# CHECK: beql $1, $zero, local_label # encoding: [0x50,0x20,A,A] +# CHECK: beqzl $1, local_label # encoding: [0x50,0x20,A,A] # CHECK: # fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16 # CHECK: nop # encoding: [0x00,0x00,0x00,0x00] bgel $7,$8,global_label # CHECK: slt $1, $7, $8 # encoding: [0x00,0xe8,0x08,0x2a] -# CHECK: beql $1, $zero, global_label # encoding: [0x50,0x20,A,A] +# CHECK: beqzl $1, global_label # encoding: [0x50,0x20,A,A] # CHECK: # fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16 # CHECK: nop # encoding: [0x00,0x00,0x00,0x00] bgel $7,$0,local_label @@ -259,12 +259,12 @@ local_label: bgtl $7,$8,local_label # CHECK: slt $1, $8, $7 # encoding: [0x01,0x07,0x08,0x2a] -# CHECK: bnel $1, $zero, local_label # encoding: [0x54,0x20,A,A] +# CHECK: bnezl $1, local_label # encoding: [0x54,0x20,A,A] # CHECK: # fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16 # CHECK: nop # encoding: [0x00,0x00,0x00,0x00] bgtl $7,$8,global_label # CHECK: slt $1, $8, $7 # encoding: [0x01,0x07,0x08,0x2a] -# CHECK: bnel $1, $zero, global_label # encoding: [0x54,0x20,A,A] +# CHECK: bnezl $1, global_label # encoding: [0x54,0x20,A,A] # CHECK: # fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16 # CHECK: nop # encoding: [0x00,0x00,0x00,0x00] bgtl $7,$0,local_label @@ -280,12 +280,12 @@ local_label: bltul $7,$8,local_label # CHECK: sltu $1, $7, $8 # encoding: [0x00,0xe8,0x08,0x2b] -# CHECK: bnel $1, $zero, local_label # encoding: [0x54,0x20,A,A] +# CHECK: bnezl $1, local_label # encoding: [0x54,0x20,A,A] # CHECK: # fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16 # CHECK: nop # encoding: [0x00,0x00,0x00,0x00] bltul $7,$8,global_label # CHECK: sltu $1, $7, $8 # encoding: [0x00,0xe8,0x08,0x2b] -# CHECK: bnel $1, $zero, global_label # encoding: [0x54,0x20,A,A] +# CHECK: bnezl $1, global_label # encoding: [0x54,0x20,A,A] # CHECK: # fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16 # CHECK: nop # encoding: [0x00,0x00,0x00,0x00] bltul $7,$0,local_label @@ -301,12 +301,12 @@ local_label: bleul $7,$8,local_label # CHECK: sltu $1, $8, $7 # encoding: [0x01,0x07,0x08,0x2b] -# CHECK: beql $1, $zero, local_label # encoding: [0x50,0x20,A,A] +# CHECK: beqzl $1, local_label # encoding: [0x50,0x20,A,A] # CHECK: # fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16 # CHECK: nop # encoding: [0x00,0x00,0x00,0x00] bleul $7,$8,global_label # CHECK: sltu $1, $8, $7 # encoding: [0x01,0x07,0x08,0x2b] -# CHECK: beql $1, $zero, global_label # encoding: [0x50,0x20,A,A] +# CHECK: beqzl $1, global_label # encoding: [0x50,0x20,A,A] # CHECK: # fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16 # CHECK: nop # encoding: [0x00,0x00,0x00,0x00] bleul $7,$0,local_label @@ -325,12 +325,12 @@ local_label: bgeul $7,$8,local_label # CHECK: sltu $1, $7, $8 # encoding: [0x00,0xe8,0x08,0x2b] -# CHECK: beql $1, $zero, local_label # encoding: [0x50,0x20,A,A] +# CHECK: beqzl $1, local_label # encoding: [0x50,0x20,A,A] # CHECK: # fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16 # CHECK: nop # encoding: [0x00,0x00,0x00,0x00] bgeul $7,$8,global_label # CHECK: sltu $1, $7, $8 # encoding: [0x00,0xe8,0x08,0x2b] -# CHECK: beql $1, $zero, global_label # encoding: [0x50,0x20,A,A] +# CHECK: beqzl $1, global_label # encoding: [0x50,0x20,A,A] # CHECK: # fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16 # CHECK: nop # encoding: [0x00,0x00,0x00,0x00] bgeul $7,$0,local_label @@ -349,12 +349,12 @@ local_label: bgtul $7,$8,local_label # CHECK: sltu $1, $8, $7 # encoding: [0x01,0x07,0x08,0x2b] -# CHECK: bnel $1, $zero, local_label # encoding: [0x54,0x20,A,A] +# CHECK: bnezl $1, local_label # encoding: [0x54,0x20,A,A] # CHECK: # fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16 # CHECK: nop # encoding: [0x00,0x00,0x00,0x00] bgtul $7,$8,global_label # CHECK: sltu $1, $8, $7 # encoding: [0x01,0x07,0x08,0x2b] -# CHECK: bnel $1, $zero, global_label # encoding: [0x54,0x20,A,A] +# CHECK: bnezl $1, global_label # encoding: [0x54,0x20,A,A] # CHECK: # fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16 # CHECK: nop # encoding: [0x00,0x00,0x00,0x00] bgtul $7,$0,local_label diff --git a/llvm/test/MC/Mips/macro-bcc-imm.s b/llvm/test/MC/Mips/macro-bcc-imm.s index ebc4cd2ce1898..fdee6ec3670e6 100644 --- a/llvm/test/MC/Mips/macro-bcc-imm.s +++ b/llvm/test/MC/Mips/macro-bcc-imm.s @@ -75,33 +75,33 @@ foo: # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16 bltl $a2, 16, foo # ALL: addiu $1, $zero, 16 # ALL: slt $1, $6, $1 - # ALL: bnel $1, $zero, foo + # ALL: bnezl $1, foo # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16 blel $a2, 16, foo # ALL: addiu $1, $zero, 16 # ALL: slt $1, $1, $6 - # ALL: beql $1, $zero, foo + # ALL: beqzl $1, foo # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16 bgel $a2, 32767, foo # ALL: addiu $1, $zero, 32767 # ALL: slt $1, $6, $1 - # ALL: beql $1, $zero, foo + # ALL: beqzl $1, foo # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16 bgtl $a2, 32768, foo # ALL: ori $1, $zero, 32768 # ALL: slt $1, $1, $6 - # ALL: bnel $1, $zero, foo + # ALL: bnezl $1, foo # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16 bltul $a2, 16, foo # ALL: addiu $1, $zero, 16 # ALL: sltu $1, $6, $1 - # ALL: bnel $1, $zero, foo + # ALL: bnezl $1, foo # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16 bleul $a2, 16, foo # ALL: addiu $1, $zero, 16 # ALL: sltu $1, $1, $6 - # ALL: beql $1, $zero, foo + # ALL: beqzl $1, foo # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16 bgeul $a2, 32767, foo # ALL: addiu $1, $zero, 32767 # ALL: sltu $1, $6, $1 - # ALL: beql $1, $zero, foo + # ALL: beqzl $1, foo # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16 bgtul $a2, 65536, foo # ALL: lui $1, 1 # ALL: sltu $1, $1, $6 - # ALL: bnel $1, $zero, foo + # ALL: bnezl $1, foo # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16 diff --git a/llvm/test/MC/Mips/mips-jump-delay-slots.s b/llvm/test/MC/Mips/mips-jump-delay-slots.s index 8a0781103e9aa..f61f710b4accc 100644 --- a/llvm/test/MC/Mips/mips-jump-delay-slots.s +++ b/llvm/test/MC/Mips/mips-jump-delay-slots.s @@ -67,13 +67,13 @@ # CHECK: beql $9, $6, 1332 # CHECK: nop beql $9,$6,1332 - # CHECK: beql $9, $zero, 1332 + # CHECK: beqzl $9, 1332 # CHECK: nop beqzl $9,1332 # CHECK: bnel $9, $6, 1332 # CHECK: nop bnel $9,$6,1332 - # CHECK: bnel $9, $zero, 1332 + # CHECK: bnezl $9, 1332 # CHECK: nop bnezl $9,1332 # CHECK: bgezl $6, 1332 From 46b84fa231c7c1b55e0271e085779d883d103657 Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Mon, 15 Jul 2019 22:01:55 +0000 Subject: [PATCH 166/451] [ScopInfo][NFC] Add dot at the end of comment statement. llvm-svn: 366134 --- polly/include/polly/ScopInfo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h index 00c69cd24e104..aeed4a7e3d4f3 100644 --- a/polly/include/polly/ScopInfo.h +++ b/polly/include/polly/ScopInfo.h @@ -51,7 +51,7 @@ extern bool UseInstructionNames; // The maximal number of basic sets we allow during domain construction to // be created. More complex scops will result in very high compile time and -// are also unlikely to result in good code +// are also unlikely to result in good code. extern int const MaxDisjunctsInDomain; /// Enumeration of assumptions Polly can take. From 423b4a18a49cdf61efd0c45a7e133218db5a8547 Mon Sep 17 00:00:00 2001 From: Austin Kerbow Date: Mon, 15 Jul 2019 22:07:05 +0000 Subject: [PATCH 167/451] [AMDGPU] Enable merging m0 initializations. Summary: Enable hoisting and merging m0 defs that are initialized with the same immediate value. Fixes bug where removed instructions are not considered to interfere with other inits, and make sure to not hoist inits before block prologues. Reviewers: rampitec, arsenm Reviewed By: rampitec Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64766 llvm-svn: 366135 --- llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp | 47 ++++++--- llvm/test/CodeGen/AMDGPU/merge-m0.mir | 108 +++++++++++++++++++-- 2 files changed, 133 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index 18598d6cef450..624953963cf4c 100644 --- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -103,7 +103,7 @@ using namespace llvm; static cl::opt EnableM0Merge( "amdgpu-enable-merge-m0", cl::desc("Merge and hoist M0 initializations"), - cl::init(false)); + cl::init(true)); namespace { @@ -452,18 +452,32 @@ static bool isReachable(const MachineInstr *From, (const MachineBasicBlock *MBB) { return MBB == MBBFrom; }); } +// Return the first non-prologue instruction in the block. +static MachineBasicBlock::iterator +getFirstNonPrologue(MachineBasicBlock *MBB, const TargetInstrInfo *TII) { + MachineBasicBlock::iterator I = MBB->getFirstNonPHI(); + while (I != MBB->end() && TII->isBasicBlockPrologue(*I)) + ++I; + + return I; +} + // Hoist and merge identical SGPR initializations into a common predecessor. // This is intended to combine M0 initializations, but can work with any // SGPR. A VGPR cannot be processed since we cannot guarantee vector // executioon. static bool hoistAndMergeSGPRInits(unsigned Reg, const MachineRegisterInfo &MRI, - MachineDominatorTree &MDT) { + MachineDominatorTree &MDT, + const TargetInstrInfo *TII) { // List of inits by immediate value. using InitListMap = std::map>; InitListMap Inits; // List of clobbering instructions. SmallVector Clobbers; + // List of instructions marked for deletion. + SmallSet MergedInstrs; + bool Changed = false; for (auto &MI : MRI.def_instructions(Reg)) { @@ -492,8 +506,8 @@ static bool hoistAndMergeSGPRInits(unsigned Reg, MachineInstr *MI2 = *I2; // Check any possible interference - auto intereferes = [&](MachineBasicBlock::iterator From, - MachineBasicBlock::iterator To) -> bool { + auto interferes = [&](MachineBasicBlock::iterator From, + MachineBasicBlock::iterator To) -> bool { assert(MDT.dominates(&*To, &*From)); @@ -525,23 +539,23 @@ static bool hoistAndMergeSGPRInits(unsigned Reg, }; if (MDT.dominates(MI1, MI2)) { - if (!intereferes(MI2, MI1)) { + if (!interferes(MI2, MI1)) { LLVM_DEBUG(dbgs() << "Erasing from " << printMBBReference(*MI2->getParent()) << " " << *MI2); - MI2->eraseFromParent(); - Defs.erase(I2++); + MergedInstrs.insert(MI2); Changed = true; + ++I2; continue; } } else if (MDT.dominates(MI2, MI1)) { - if (!intereferes(MI1, MI2)) { + if (!interferes(MI1, MI2)) { LLVM_DEBUG(dbgs() << "Erasing from " << printMBBReference(*MI1->getParent()) << " " << *MI1); - MI1->eraseFromParent(); - Defs.erase(I1++); + MergedInstrs.insert(MI1); Changed = true; + ++I1; break; } } else { @@ -552,8 +566,8 @@ static bool hoistAndMergeSGPRInits(unsigned Reg, continue; } - MachineBasicBlock::iterator I = MBB->getFirstNonPHI(); - if (!intereferes(MI1, I) && !intereferes(MI2, I)) { + MachineBasicBlock::iterator I = getFirstNonPrologue(MBB, TII); + if (!interferes(MI1, I) && !interferes(MI2, I)) { LLVM_DEBUG(dbgs() << "Erasing from " << printMBBReference(*MI1->getParent()) << " " << *MI1 @@ -561,9 +575,9 @@ static bool hoistAndMergeSGPRInits(unsigned Reg, << printMBBReference(*MI2->getParent()) << " to " << printMBBReference(*I->getParent()) << " " << *MI2); I->getParent()->splice(I, MI2->getParent(), MI2); - MI1->eraseFromParent(); - Defs.erase(I1++); + MergedInstrs.insert(MI1); Changed = true; + ++I1; break; } } @@ -573,6 +587,9 @@ static bool hoistAndMergeSGPRInits(unsigned Reg, } } + for (auto MI : MergedInstrs) + MI->removeFromParent(); + if (Changed) MRI.clearKillFlags(Reg); @@ -723,7 +740,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { } if (MF.getTarget().getOptLevel() > CodeGenOpt::None && EnableM0Merge) - hoistAndMergeSGPRInits(AMDGPU::M0, MRI, *MDT); + hoistAndMergeSGPRInits(AMDGPU::M0, MRI, *MDT, TII); return true; } diff --git a/llvm/test/CodeGen/AMDGPU/merge-m0.mir b/llvm/test/CodeGen/AMDGPU/merge-m0.mir index 73a6b13685020..bafbce05a8876 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-m0.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-m0.mir @@ -47,13 +47,7 @@ # GCN-NEXT: DS_WRITE_B32 --- -name: test -alignment: 0 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -tracksRegLiveness: true +name: merge-m0-many-init registers: - { id: 0, class: vgpr_32 } - { id: 1, class: vgpr_32 } @@ -129,3 +123,103 @@ body: | S_BRANCH %bb.0.entry ... + +# GCN: bb.0.entry: +# GCN: SI_INIT_M0 65536 +# GCN-NEXT: DS_WRITE_B32 + +#GCN: bb.1: +#GCN-NOT: SI_INIT_M0 65536 +#GCN-NOT: SI_INIT_M0 -1 + +#GCN: bb.2: +#GCN: SI_INIT_M0 -1 + +#GCN: bb.3: +#GCN: SI_INIT_M0 -1 + +--- +name: merge-m0-dont-hoist-past-init-with-different-initializer +registers: + - { id: 0, class: vgpr_32 } + - { id: 1, class: vgpr_32 } +body: | + bb.0.entry: + successors: %bb.1 + + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + SI_INIT_M0 65536, implicit-def $m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + successors: %bb.2, %bb.3 + + SI_INIT_M0 65536, implicit-def $m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec + S_CBRANCH_VCCZ %bb.2, implicit undef $vcc + S_BRANCH %bb.3 + + bb.2: + successors: %bb.4 + + SI_INIT_M0 -1, implicit-def $m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec + S_BRANCH %bb.4 + + bb.3: + successors: %bb.4 + + SI_INIT_M0 -1, implicit-def $m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec + S_BRANCH %bb.4 + + bb.4: + S_ENDPGM 0 +... + +# GCN: bb.0.entry: +# GCN-NOT: SI_INIT_M0 +# GCN: S_OR_B64 +# GCN-NEXT: SI_INIT_M0 + +#GCN: bb.1: +#GCN-NOT: SI_INIT_M0 -1 + +#GCN: bb.2: +#GCN-NOT: SI_INIT_MO -1 + +--- +name: merge-m0-after-prologue +registers: + - { id: 0, class: vgpr_32 } + - { id: 1, class: vgpr_32 } +body: | + bb.0.entry: + successors: %bb.1, %bb.2 + liveins: $sgpr0_sgpr1 + + $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + S_CBRANCH_VCCZ %bb.1, implicit undef $vcc + S_BRANCH %bb.2 + + bb.1: + successors: %bb.3 + + SI_INIT_M0 -1, implicit-def $m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec + S_BRANCH %bb.3 + + bb.2: + successors: %bb.3 + + SI_INIT_M0 -1, implicit-def $m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec + S_BRANCH %bb.3 + + bb.3: + S_ENDPGM 0 +... From 8528b1951cc6fda2c7bb09b259cf81f222550f82 Mon Sep 17 00:00:00 2001 From: Matt Morehouse Date: Mon, 15 Jul 2019 22:07:56 +0000 Subject: [PATCH 168/451] [ASan] Fix >80 character line. llvm-svn: 366136 --- compiler-rt/lib/asan/asan_malloc_win.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/compiler-rt/lib/asan/asan_malloc_win.cc b/compiler-rt/lib/asan/asan_malloc_win.cc index ccbce061daf60..5fad55d6e2840 100644 --- a/compiler-rt/lib/asan/asan_malloc_win.cc +++ b/compiler-rt/lib/asan/asan_malloc_win.cc @@ -342,7 +342,8 @@ void *SharedReAlloc(ReAllocFunction reallocFunc, SizeFunction heapSizeFunc, size_t old_usable_size = 0; if (replacement_alloc) { old_usable_size = asan_malloc_usable_size(lpMem, pc, bp); - REAL(memcpy)(replacement_alloc, lpMem, Min(dwBytes, old_usable_size)); + REAL(memcpy)(replacement_alloc, lpMem, + Min(dwBytes, old_usable_size)); asan_free(lpMem, &stack, FROM_MALLOC); } return replacement_alloc; From 5076038bb00dd5643cb4c0a881629126b359c828 Mon Sep 17 00:00:00 2001 From: Jan Korous Date: Mon, 15 Jul 2019 22:11:28 +0000 Subject: [PATCH 169/451] [DirectoryWatcher][NFC][test] Add typedef for enum llvm-svn: 366137 --- .../DirectoryWatcher/DirectoryWatcherTest.cpp | 45 ++++++++++--------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp b/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp index 72bc86d4493cf..9b0e1077abce0 100644 --- a/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp +++ b/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp @@ -32,6 +32,8 @@ static bool operator==(const DirectoryWatcher::Event &lhs, namespace { +typedef DirectoryWatcher::Event::EventKind EventKind; + struct DirectoryWatcherTestFixture { std::string TestRootDir; std::string TestWatchedDir; @@ -81,15 +83,15 @@ struct DirectoryWatcherTestFixture { } }; -std::string eventKindToString(const DirectoryWatcher::Event::EventKind K) { +std::string eventKindToString(const EventKind K) { switch (K) { - case DirectoryWatcher::Event::EventKind::Removed: + case EventKind::Removed: return "Removed"; - case DirectoryWatcher::Event::EventKind::Modified: + case EventKind::Modified: return "Modified"; - case DirectoryWatcher::Event::EventKind::WatchedDirRemoved: + case EventKind::WatchedDirRemoved: return "WatchedDirRemoved"; - case DirectoryWatcher::Event::EventKind::WatcherGotInvalidated: + case EventKind::WatcherGotInvalidated: return "WatcherGotInvalidated"; } llvm_unreachable("unknown event kind"); @@ -249,7 +251,6 @@ void checkEventualResultWithTimeout(VerifyingConsumer &TestConsumer) { !TestConsumer.result().hasValue()) TestConsumer.printUnmetExpectations(llvm::outs()); } - } // namespace TEST(DirectoryWatcherTest, InitialScanSync) { @@ -260,9 +261,9 @@ TEST(DirectoryWatcherTest, InitialScanSync) { fixture.addFile("c"); VerifyingConsumer TestConsumer{ - {{DirectoryWatcher::Event::EventKind::Modified, "a"}, - {DirectoryWatcher::Event::EventKind::Modified, "b"}, - {DirectoryWatcher::Event::EventKind::Modified, "c"}}, + {{EventKind::Modified, "a"}, + {EventKind::Modified, "b"}, + {EventKind::Modified, "c"}}, {}}; auto DW = DirectoryWatcher::create( @@ -284,9 +285,9 @@ TEST(DirectoryWatcherTest, InitialScanAsync) { fixture.addFile("c"); VerifyingConsumer TestConsumer{ - {{DirectoryWatcher::Event::EventKind::Modified, "a"}, - {DirectoryWatcher::Event::EventKind::Modified, "b"}, - {DirectoryWatcher::Event::EventKind::Modified, "c"}}, + {{EventKind::Modified, "a"}, + {EventKind::Modified, "b"}, + {EventKind::Modified, "c"}}, {}}; auto DW = DirectoryWatcher::create( @@ -305,9 +306,9 @@ TEST(DirectoryWatcherTest, AddFiles) { VerifyingConsumer TestConsumer{ {}, - {{DirectoryWatcher::Event::EventKind::Modified, "a"}, - {DirectoryWatcher::Event::EventKind::Modified, "b"}, - {DirectoryWatcher::Event::EventKind::Modified, "c"}}}; + {{EventKind::Modified, "a"}, + {EventKind::Modified, "b"}, + {EventKind::Modified, "c"}}}; auto DW = DirectoryWatcher::create( fixture.TestWatchedDir, @@ -330,8 +331,8 @@ TEST(DirectoryWatcherTest, ModifyFile) { fixture.addFile("a"); VerifyingConsumer TestConsumer{ - {{DirectoryWatcher::Event::EventKind::Modified, "a"}}, - {{DirectoryWatcher::Event::EventKind::Modified, "a"}}}; + {{EventKind::Modified, "a"}}, + {{EventKind::Modified, "a"}}}; auto DW = DirectoryWatcher::create( fixture.TestWatchedDir, @@ -359,8 +360,8 @@ TEST(DirectoryWatcherTest, DeleteFile) { fixture.addFile("a"); VerifyingConsumer TestConsumer{ - {{DirectoryWatcher::Event::EventKind::Modified, "a"}}, - {{DirectoryWatcher::Event::EventKind::Removed, "a"}}}; + {{EventKind::Modified, "a"}}, + {{EventKind::Removed, "a"}}}; auto DW = DirectoryWatcher::create( fixture.TestWatchedDir, @@ -380,8 +381,8 @@ TEST(DirectoryWatcherTest, DeleteWatchedDir) { VerifyingConsumer TestConsumer{ {}, - {{DirectoryWatcher::Event::EventKind::WatchedDirRemoved, ""}, - {DirectoryWatcher::Event::EventKind::WatcherGotInvalidated, ""}}}; + {{EventKind::WatchedDirRemoved, ""}, + {EventKind::WatcherGotInvalidated, ""}}}; auto DW = DirectoryWatcher::create( fixture.TestWatchedDir, @@ -400,7 +401,7 @@ TEST(DirectoryWatcherTest, InvalidatedWatcher) { DirectoryWatcherTestFixture fixture; VerifyingConsumer TestConsumer{ - {}, {{DirectoryWatcher::Event::EventKind::WatcherGotInvalidated, ""}}}; + {}, {{EventKind::WatcherGotInvalidated, ""}}}; { auto DW = DirectoryWatcher::create( From c5e7a3d710097efbe86e73c6422d76744288b105 Mon Sep 17 00:00:00 2001 From: Jan Korous Date: Mon, 15 Jul 2019 22:11:51 +0000 Subject: [PATCH 170/451] [DirectoryWatcher][test] Relax test assumptions Workaround for FSEvents sometimes sending notifications for events that happened before DirectoryWatcher was created. This caused tests to be flaky on green dragon. llvm-svn: 366138 --- .../DirectoryWatcher/DirectoryWatcherTest.cpp | 64 ++++++------------- 1 file changed, 20 insertions(+), 44 deletions(-) diff --git a/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp b/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp index 9b0e1077abce0..a6b48e5623675 100644 --- a/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp +++ b/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp @@ -264,7 +264,14 @@ TEST(DirectoryWatcherTest, InitialScanSync) { {{EventKind::Modified, "a"}, {EventKind::Modified, "b"}, {EventKind::Modified, "c"}}, - {}}; + {}, + // We have to ignore these as it's a race between the test process + // which is scanning the directory and kernel which is sending + // notification. + {{EventKind::Modified, "a"}, + {EventKind::Modified, "b"}, + {EventKind::Modified, "c"}} + }; auto DW = DirectoryWatcher::create( fixture.TestWatchedDir, @@ -288,7 +295,14 @@ TEST(DirectoryWatcherTest, InitialScanAsync) { {{EventKind::Modified, "a"}, {EventKind::Modified, "b"}, {EventKind::Modified, "c"}}, - {}}; + {}, + // We have to ignore these as it's a race between the test process + // which is scanning the directory and kernel which is sending + // notification. + {{EventKind::Modified, "a"}, + {EventKind::Modified, "b"}, + {EventKind::Modified, "c"}} + }; auto DW = DirectoryWatcher::create( fixture.TestWatchedDir, @@ -331,6 +345,7 @@ TEST(DirectoryWatcherTest, ModifyFile) { fixture.addFile("a"); VerifyingConsumer TestConsumer{ + {{EventKind::Modified, "a"}}, {{EventKind::Modified, "a"}}, {{EventKind::Modified, "a"}}}; @@ -361,7 +376,8 @@ TEST(DirectoryWatcherTest, DeleteFile) { VerifyingConsumer TestConsumer{ {{EventKind::Modified, "a"}}, - {{EventKind::Removed, "a"}}}; + {{EventKind::Removed, "a"}}, + {{EventKind::Modified, "a"}}}; auto DW = DirectoryWatcher::create( fixture.TestWatchedDir, @@ -414,44 +430,4 @@ TEST(DirectoryWatcherTest, InvalidatedWatcher) { } // DW is destructed here. checkEventualResultWithTimeout(TestConsumer); -} - -TEST(DirectoryWatcherTest, ChangeMetadata) { - DirectoryWatcherTestFixture fixture; - fixture.addFile("a"); - - VerifyingConsumer TestConsumer{ - {{DirectoryWatcher::Event::EventKind::Modified, "a"}}, - // We don't expect any notification for file having access file changed. - {}, - // Given the timing we are ok with receiving the duplicate event. - {{DirectoryWatcher::Event::EventKind::Modified, "a"}}}; - - auto DW = DirectoryWatcher::create( - fixture.TestWatchedDir, - [&TestConsumer](llvm::ArrayRef Events, - bool IsInitial) { - TestConsumer.consume(Events, IsInitial); - }, - /*waitForInitialSync=*/true); - - { // Change access and modification time of file a. - Expected HopefullyTheFD = llvm::sys::fs::openNativeFileForWrite( - fixture.getPathInWatched("a"), CD_OpenExisting, OF_None); - if (!HopefullyTheFD) { - llvm::outs() << HopefullyTheFD.takeError(); - } - - const int FD = HopefullyTheFD.get(); - const TimePoint<> NewTimePt = - std::chrono::system_clock::now() - std::chrono::minutes(1); -#ifndef NDEBUG - std::error_code setTimeRes = -#endif - llvm::sys::fs::setLastAccessAndModificationTime(FD, NewTimePt, - NewTimePt); - assert(!setTimeRes); - } - - checkEventualResultWithTimeout(TestConsumer); -} +} \ No newline at end of file From 292e21d8bce463fb8bb833810f05d4382f61bdd0 Mon Sep 17 00:00:00 2001 From: Wouter van Oortmerssen Date: Mon, 15 Jul 2019 22:13:39 +0000 Subject: [PATCH 171/451] [WebAssembly] Assembler: support special floats: infinity / nan Summary: These are emitted as identifiers by the InstPrinter, so we should parse them as such. These could potentially clash with symbols of the same name, but that is out of our (the WebAssembly backend) control. Reviewers: dschuff Subscribers: sbc100, jgravelle-google, aheejin, sunfish, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64770 llvm-svn: 366139 --- .../AsmParser/WebAssemblyAsmParser.cpp | 25 +++++++++++++++++++ llvm/test/MC/WebAssembly/basic-assembly.s | 4 +++ 2 files changed, 29 insertions(+) diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp index e82923519f4c1..e9a7f6977c2d3 100644 --- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp +++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp @@ -363,6 +363,28 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser { return false; } + bool parseSpecialFloatMaybe(bool IsNegative, OperandVector &Operands) { + if (Lexer.isNot(AsmToken::Identifier)) + return true; + auto &Flt = Lexer.getTok(); + auto S = Flt.getString(); + double Val; + if (S.compare_lower("infinity") == 0) { + Val = std::numeric_limits::infinity(); + } else if (S.compare_lower("nan") == 0) { + Val = std::numeric_limits::quiet_NaN(); + } else { + return true; + } + if (IsNegative) + Val = -Val; + Operands.push_back(make_unique( + WebAssemblyOperand::Float, Flt.getLoc(), Flt.getEndLoc(), + WebAssemblyOperand::FltOp{Val})); + Parser.Lex(); + return false; + } + bool checkForP2AlignIfLoadStore(OperandVector &Operands, StringRef InstName) { // FIXME: there is probably a cleaner way to do this. auto IsLoadStore = InstName.find(".load") != StringRef::npos || @@ -476,6 +498,8 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser { auto &Tok = Lexer.getTok(); switch (Tok.getKind()) { case AsmToken::Identifier: { + if (!parseSpecialFloatMaybe(false, Operands)) + break; auto &Id = Lexer.getTok(); if (ExpectBlockType) { // Assume this identifier is a block_type. @@ -507,6 +531,7 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser { } else if(Lexer.is(AsmToken::Real)) { if (parseSingleFloat(true, Operands)) return true; + } else if (!parseSpecialFloatMaybe(true, Operands)) { } else { return error("Expected numeric constant instead got: ", Lexer.getTok()); diff --git a/llvm/test/MC/WebAssembly/basic-assembly.s b/llvm/test/MC/WebAssembly/basic-assembly.s index 81d6001175b65..c3b7e9da25de4 100644 --- a/llvm/test/MC/WebAssembly/basic-assembly.s +++ b/llvm/test/MC/WebAssembly/basic-assembly.s @@ -14,6 +14,8 @@ test0: i32.const -1 f64.const 0x1.999999999999ap1 f32.const -1.0 + f32.const -infinity + f32.const nan v128.const 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 v128.const 0, 1, 2, 3, 4, 5, 6, 7 # Indirect addressing: @@ -118,6 +120,8 @@ test0: # CHECK-NEXT: i32.const -1 # CHECK-NEXT: f64.const 0x1.999999999999ap1 # CHECK-NEXT: f32.const -0x1p0 +# CHECK-NEXT: f32.const -infinity +# CHECK-NEXT: f32.const nan # CHECK-NEXT: v128.const 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 # CHECK-NEXT: v128.const 0, 1, 2, 3, 4, 5, 6, 7 # CHECK-NEXT: local.get 0 From 474009eaea76ad18d66bd620a0044afbcf812dd9 Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Mon, 15 Jul 2019 22:22:10 +0000 Subject: [PATCH 172/451] [WebAssembly] Simplify regcopy.mir Summary: This deletes the ll templates from the functions because they don't need them (mir files need ll templates only when they have function calls or BB names that are not numbers). This also renames the filename to `reg-copy.mir`, because I'm planning to add some more `reg-*.mir` soon. Reviewers: tlively Subscribers: dschuff, sbc100, jgravelle-google, sunfish, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64704 llvm-svn: 366140 --- .../WebAssembly/{regcopy.mir => reg-copy.mir} | 33 +++---------------- 1 file changed, 5 insertions(+), 28 deletions(-) rename llvm/test/CodeGen/WebAssembly/{regcopy.mir => reg-copy.mir} (69%) diff --git a/llvm/test/CodeGen/WebAssembly/regcopy.mir b/llvm/test/CodeGen/WebAssembly/reg-copy.mir similarity index 69% rename from llvm/test/CodeGen/WebAssembly/regcopy.mir rename to llvm/test/CodeGen/WebAssembly/reg-copy.mir index 5115cde6d240b..0a362699b8143 100644 --- a/llvm/test/CodeGen/WebAssembly/regcopy.mir +++ b/llvm/test/CodeGen/WebAssembly/reg-copy.mir @@ -1,32 +1,9 @@ -# RUN: llc %s -o - -run-pass=postrapseudos | FileCheck %s ---- | - target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" - target triple = "wasm32-unknown-unknown" +# RUN: llc -mtriple=wasm32-unknown-unknown %s -o - -run-pass=postrapseudos | FileCheck %s - define void @copy_i32() { - ret void - } - - define void @copy_i64() { - ret void - } - - define void @copy_f32() { - ret void - } - - define void @copy_f64() { - ret void - } - - define void @copy_v128() { - ret void - } -... --- name: copy_i32 # CHECK-LABEL: copy_i32 -body: | +body: | ; CHECK-LABEL: bb.0: ; CHECK-NEXT: %0:i32 = COPY_I32 %1:i32 ; CHECK-NEXT: RETURN_VOID @@ -48,7 +25,7 @@ body: | --- name: copy_f32 # CHECK-LABEL: copy_f32 -body: | +body: | ; CHECK-LABEL: bb.0: ; CHECK-NEXT: %0:f32 = COPY_F32 %1:f32 ; CHECK-NEXT: RETURN_VOID @@ -59,7 +36,7 @@ body: | --- name: copy_f64 # CHECK-LABEL: copy_f64 -body: | +body: | ; CHECK-LABEL: bb.0: ; CHECK-NEXT: %0:f64 = COPY_F64 %1:f64 ; CHECK-NEXT: RETURN_VOID @@ -70,7 +47,7 @@ body: | --- name: copy_v128 # CHECK-LABEL: copy_v128 -body: | +body: | ; CHECK-LABEL: bb.0: ; CHECK-NEXT: %0:v128 = COPY_V128 %1:v128 ; CHECK-NEXT: RETURN_VOID From 853813293aceafbca6a71ded2e95e83bd3a9027d Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Mon, 15 Jul 2019 22:27:57 +0000 Subject: [PATCH 173/451] [clang-fuzzer] Remove 'setUseOrcMCJITReplacement(false)' call. The default value for this option (UseMCJITReplacement) is already false, and OrcMCJITReplacement is going to have deprecation warnings attached in LLVM 9.0. Removing this call removes a spurious warning. llvm-svn: 366141 --- clang/tools/clang-fuzzer/handle-llvm/handle_llvm.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/tools/clang-fuzzer/handle-llvm/handle_llvm.cpp b/clang/tools/clang-fuzzer/handle-llvm/handle_llvm.cpp index d0d35d51e80e5..ba2dff0af8a93 100644 --- a/clang/tools/clang-fuzzer/handle-llvm/handle_llvm.cpp +++ b/clang/tools/clang-fuzzer/handle-llvm/handle_llvm.cpp @@ -159,7 +159,6 @@ static void CreateAndRunJITFunc(const std::string &IR, CodeGenOpt::Level OLvl) { builder.setMAttrs(getFeatureList()); builder.setErrorStr(&ErrorMsg); builder.setEngineKind(EngineKind::JIT); - builder.setUseOrcMCJITReplacement(false); builder.setMCJITMemoryManager(make_unique()); builder.setOptLevel(OLvl); builder.setTargetOptions(InitTargetOptionsFromCodeGenFlags()); From 67cee1dc7ee285b03372eb818a3894d35efa7394 Mon Sep 17 00:00:00 2001 From: Shoaib Meenai Date: Mon, 15 Jul 2019 22:29:30 +0000 Subject: [PATCH 174/451] [llvm-lipo] Implement -create (with hardcoded alignments) Creates universal binary output file from input files. Currently uses hard coded value for alignment. Want to get the create functionality approved before implementing the alignment function. Patch by Anusha Basana Differential Revision: https://reviews.llvm.org/D64102 llvm-svn: 366142 --- .../tools/llvm-lipo/Inputs/arm64-slice.yaml | 101 ++++++++ .../tools/llvm-lipo/Inputs/armv7-slice.yaml | 76 ++++++ .../tools/llvm-lipo/Inputs/x86_64-slice.yaml | 89 +++++++ .../tools/llvm-lipo/create-executable.test | 11 + .../tools/llvm-lipo/create-invalid-input.test | 8 + .../llvm-lipo/create-without-alignment.test | 32 +++ .../thin-executable-universal-binary.test | 2 +- llvm/tools/llvm-lipo/LipoOpts.td | 5 + llvm/tools/llvm-lipo/llvm-lipo.cpp | 222 ++++++++++++++++-- 9 files changed, 527 insertions(+), 19 deletions(-) create mode 100644 llvm/test/tools/llvm-lipo/Inputs/arm64-slice.yaml create mode 100644 llvm/test/tools/llvm-lipo/Inputs/armv7-slice.yaml create mode 100644 llvm/test/tools/llvm-lipo/Inputs/x86_64-slice.yaml create mode 100644 llvm/test/tools/llvm-lipo/create-executable.test create mode 100644 llvm/test/tools/llvm-lipo/create-invalid-input.test create mode 100644 llvm/test/tools/llvm-lipo/create-without-alignment.test diff --git a/llvm/test/tools/llvm-lipo/Inputs/arm64-slice.yaml b/llvm/test/tools/llvm-lipo/Inputs/arm64-slice.yaml new file mode 100644 index 0000000000000..5dfd45027381e --- /dev/null +++ b/llvm/test/tools/llvm-lipo/Inputs/arm64-slice.yaml @@ -0,0 +1,101 @@ +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x0100000C + cpusubtype: 0x00000000 + filetype: 0x00000001 + ncmds: 4 + sizeofcmds: 352 + flags: 0x00002000 + reserved: 0x00000000 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 232 + segname: '' + vmaddr: 0 + vmsize: 56 + fileoff: 384 + filesize: 56 + maxprot: 7 + initprot: 7 + nsects: 2 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x0000000000000000 + size: 20 + offset: 0x00000180 + align: 2 + reloff: 0x00000000 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + - sectname: __compact_unwind + segname: __LD + addr: 0x0000000000000018 + size: 32 + offset: 0x00000198 + align: 3 + reloff: 0x000001B8 + nreloc: 1 + flags: 0x02000000 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + - cmd: LC_VERSION_MIN_IPHONEOS + cmdsize: 16 + version: 327680 + sdk: 0 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 448 + nsyms: 3 + stroff: 496 + strsize: 20 + - cmd: LC_DYSYMTAB + cmdsize: 80 + ilocalsym: 0 + nlocalsym: 2 + iextdefsym: 2 + nextdefsym: 1 + iundefsym: 3 + nundefsym: 0 + tocoff: 0 + ntoc: 0 + modtaboff: 0 + nmodtab: 0 + extrefsymoff: 0 + nextrefsyms: 0 + indirectsymoff: 0 + nindirectsyms: 0 + extreloff: 0 + nextrel: 0 + locreloff: 0 + nlocrel: 0 +LinkEditData: + NameList: + - n_strx: 13 + n_type: 0x0E + n_sect: 1 + n_desc: 0 + n_value: 0 + - n_strx: 7 + n_type: 0x0E + n_sect: 2 + n_desc: 0 + n_value: 24 + - n_strx: 1 + n_type: 0x0F + n_sect: 1 + n_desc: 0 + n_value: 0 + StringTable: + - '' + - _main + - ltmp1 + - ltmp0 + - '' +... diff --git a/llvm/test/tools/llvm-lipo/Inputs/armv7-slice.yaml b/llvm/test/tools/llvm-lipo/Inputs/armv7-slice.yaml new file mode 100644 index 0000000000000..b26062931458c --- /dev/null +++ b/llvm/test/tools/llvm-lipo/Inputs/armv7-slice.yaml @@ -0,0 +1,76 @@ +--- !mach-o +FileHeader: + magic: 0xFEEDFACE + cputype: 0x0000000C + cpusubtype: 0x00000009 + filetype: 0x00000001 + ncmds: 4 + sizeofcmds: 244 + flags: 0x00002000 +LoadCommands: + - cmd: LC_SEGMENT + cmdsize: 124 + segname: '' + vmaddr: 0 + vmsize: 10 + fileoff: 272 + filesize: 10 + maxprot: 7 + initprot: 7 + nsects: 1 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x0000000000000000 + size: 10 + offset: 0x00000110 + align: 1 + reloff: 0x00000000 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + - cmd: LC_VERSION_MIN_IPHONEOS + cmdsize: 16 + version: 327680 + sdk: 0 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 284 + nsyms: 1 + stroff: 296 + strsize: 8 + - cmd: LC_DYSYMTAB + cmdsize: 80 + ilocalsym: 0 + nlocalsym: 0 + iextdefsym: 0 + nextdefsym: 1 + iundefsym: 1 + nundefsym: 0 + tocoff: 0 + ntoc: 0 + modtaboff: 0 + nmodtab: 0 + extrefsymoff: 0 + nextrefsyms: 0 + indirectsymoff: 0 + nindirectsyms: 0 + extreloff: 0 + nextrel: 0 + locreloff: 0 + nlocrel: 0 +LinkEditData: + NameList: + - n_strx: 1 + n_type: 0x0F + n_sect: 1 + n_desc: 8 + n_value: 0 + StringTable: + - '' + - _main + - '' +... diff --git a/llvm/test/tools/llvm-lipo/Inputs/x86_64-slice.yaml b/llvm/test/tools/llvm-lipo/Inputs/x86_64-slice.yaml new file mode 100644 index 0000000000000..27db6d7a13157 --- /dev/null +++ b/llvm/test/tools/llvm-lipo/Inputs/x86_64-slice.yaml @@ -0,0 +1,89 @@ +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x01000007 + cpusubtype: 0x00000003 + filetype: 0x00000001 + ncmds: 4 + sizeofcmds: 352 + flags: 0x00002000 + reserved: 0x00000000 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 232 + segname: '' + vmaddr: 0 + vmsize: 80 + fileoff: 384 + filesize: 80 + maxprot: 7 + initprot: 7 + nsects: 2 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x0000000000000000 + size: 15 + offset: 0x00000180 + align: 4 + reloff: 0x00000000 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + - sectname: __eh_frame + segname: __TEXT + addr: 0x0000000000000010 + size: 64 + offset: 0x00000190 + align: 3 + reloff: 0x00000000 + nreloc: 0 + flags: 0x6800000B + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + - cmd: LC_VERSION_MIN_MACOSX + cmdsize: 16 + version: 656384 + sdk: 0 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 464 + nsyms: 1 + stroff: 480 + strsize: 8 + - cmd: LC_DYSYMTAB + cmdsize: 80 + ilocalsym: 0 + nlocalsym: 0 + iextdefsym: 0 + nextdefsym: 1 + iundefsym: 1 + nundefsym: 0 + tocoff: 0 + ntoc: 0 + modtaboff: 0 + nmodtab: 0 + extrefsymoff: 0 + nextrefsyms: 0 + indirectsymoff: 0 + nindirectsyms: 0 + extreloff: 0 + nextrel: 0 + locreloff: 0 + nlocrel: 0 +LinkEditData: + NameList: + - n_strx: 1 + n_type: 0x0F + n_sect: 1 + n_desc: 0 + n_value: 0 + StringTable: + - '' + - _main + - '' +... diff --git a/llvm/test/tools/llvm-lipo/create-executable.test b/llvm/test/tools/llvm-lipo/create-executable.test new file mode 100644 index 0000000000000..82aa69cfec521 --- /dev/null +++ b/llvm/test/tools/llvm-lipo/create-executable.test @@ -0,0 +1,11 @@ +# RUN: yaml2obj %p/Inputs/i386-slice.yaml > %t-i386.o +# RUN: yaml2obj %p/Inputs/x86_64-slice.yaml > %t-x86_64.o + +# RUN: chmod -x %t-i386.o +# RUN: chmod -x %t-x86_64.o +# RUN: llvm-lipo %t-i386.o %t-x86_64.o -create -output %t-universal.o +# RUN: ! test -x %t-universal.o + +# RUN: chmod +x %t-i386.o +# RUN: llvm-lipo %t-i386.o %t-x86_64.o -create -output %t-universal.o +# RUN: test -x %t-universal.o diff --git a/llvm/test/tools/llvm-lipo/create-invalid-input.test b/llvm/test/tools/llvm-lipo/create-invalid-input.test new file mode 100644 index 0000000000000..4bb2e1a566679 --- /dev/null +++ b/llvm/test/tools/llvm-lipo/create-invalid-input.test @@ -0,0 +1,8 @@ +# RUN: yaml2obj %p/Inputs/i386-slice.yaml > %t-32.o +# RUN: yaml2obj %p/Inputs/i386-x86_64-universal.yaml > %t-universal.o + +# RUN: not llvm-lipo %t-32.o -create 2>&1 | FileCheck --check-prefix=NO_OUTPUT %s +# NO_OUTPUT: error: create expects a single output file to be specified + +# RUN: not llvm-lipo %t-universal.o %t-32.o -create -output %t.o 2>&1 | FileCheck --check-prefix=DUPLICATE_ARCHS %s +# DUPLICATE_ARCHS: have the same architecture i386 and therefore cannot be in the same universal binary diff --git a/llvm/test/tools/llvm-lipo/create-without-alignment.test b/llvm/test/tools/llvm-lipo/create-without-alignment.test new file mode 100644 index 0000000000000..813230a7e3c4a --- /dev/null +++ b/llvm/test/tools/llvm-lipo/create-without-alignment.test @@ -0,0 +1,32 @@ +# RUN: yaml2obj %p/Inputs/i386-slice.yaml > %t-i386.o +# RUN: yaml2obj %p/Inputs/x86_64-slice.yaml > %t-x86_64.o + +# RUN: llvm-lipo %t-i386.o %t-x86_64.o -create -output %t-universal-llvm.o + +# RUN: yaml2obj %p/Inputs/i386-x86_64-universal.yaml > %t-universal.o +# RUN: cmp %t-universal-llvm.o %t-universal.o + +# RUN: yaml2obj %p/Inputs/armv7-slice.yaml > %t-armv7.o +# RUN: yaml2obj %p/Inputs/arm64-slice.yaml > %t-arm64.o + +# RUN: llvm-lipo %t-arm64.o %t-armv7.o %t-universal.o -create -output %t-universal-2.o +# RUN: llvm-lipo %t-universal-2.o -thin x86_64 -output %t-x86_64_extracted.o +# RUN: cmp %t-x86_64_extracted.o %t-x86_64.o +# RUN: llvm-lipo %t-universal-2.o -thin armv7 -output %t-armv7-extracted.o +# RUN: cmp %t-armv7-extracted.o %t-armv7.o + +# RUN: llvm-objdump %t-universal-2.o -m --universal-headers | FileCheck %s +# CHECK: fat_magic FAT_MAGIC +# CHECK: nfat_arch 4 +# CHECK: architecture i386 +# CHECK: offset 4096 +# CHECK: align 2^12 (4096) +# CHECK: architecture x86_64 +# CHECK: offset 8192 +# CHECK: align 2^12 (4096) +# CHECK: architecture armv7 +# CHECK: offset 16384 +# CHECK: align 2^14 (16384) +# CHECK: architecture arm64 +# CHECK: offset 32768 +# CHECK: align 2^14 (16384) diff --git a/llvm/test/tools/llvm-lipo/thin-executable-universal-binary.test b/llvm/test/tools/llvm-lipo/thin-executable-universal-binary.test index 3992373d9e46f..870252cd690ff 100644 --- a/llvm/test/tools/llvm-lipo/thin-executable-universal-binary.test +++ b/llvm/test/tools/llvm-lipo/thin-executable-universal-binary.test @@ -4,7 +4,7 @@ # RUN: chmod -x %t-universal.o # RUN: llvm-lipo %t-universal.o -thin i386 -output %t32.o -# RUN: test ! -x %t32.o +# RUN: ! test -x %t32.o # RUN: chmod +x %t-universal.o # RUN: llvm-lipo %t-universal.o -thin i386 -output %t32-ex.o diff --git a/llvm/tools/llvm-lipo/LipoOpts.td b/llvm/tools/llvm-lipo/LipoOpts.td index e3cbe2dfa8e45..e2a73768733eb 100644 --- a/llvm/tools/llvm-lipo/LipoOpts.td +++ b/llvm/tools/llvm-lipo/LipoOpts.td @@ -23,6 +23,11 @@ def thin : Option<["-", "--"], "thin", KIND_SEPARATE>, HelpText<"Create a thin output file of specified arch_type from the " "fat input file. Requires -output option">; +def create : Option<["-", "--"], "create", KIND_FLAG>, + Group, + HelpText<"Create a universal binary output file from the input " + "files. Requires -output option">; + def output : Option<["-", "--"], "output", KIND_SEPARATE>, HelpText<"Create output file with specified name">; def o : JoinedOrSeparate<["-"], "o">, Alias; diff --git a/llvm/tools/llvm-lipo/llvm-lipo.cpp b/llvm/tools/llvm-lipo/llvm-lipo.cpp index ea0d427e01997..65135bec951c3 100644 --- a/llvm/tools/llvm-lipo/llvm-lipo.cpp +++ b/llvm/tools/llvm-lipo/llvm-lipo.cpp @@ -80,6 +80,7 @@ enum class LipoAction { PrintArchs, VerifyArch, ThinArch, + CreateUniversal, }; struct Config { @@ -90,6 +91,14 @@ struct Config { LipoAction ActionToPerform; }; +struct Slice { + const MachOObjectFile *ObjectFile; + // Requires Alignment field to store slice alignment values from universal + // binaries. Also needed to order the slices using compareSlices, so the total + // file size can be calculated before creating the output buffer. + uint32_t Alignment; +}; + } // end namespace static void validateArchitectureName(StringRef ArchitectureName) { @@ -108,7 +117,7 @@ static Config parseLipoOptions(ArrayRef ArgsArr) { Config C; LipoOptTable T; unsigned MissingArgumentIndex, MissingArgumentCount; - llvm::opt::InputArgList InputArgs = + opt::InputArgList InputArgs = T.ParseArgs(ArgsArr, MissingArgumentIndex, MissingArgumentCount); if (MissingArgumentCount) @@ -186,6 +195,12 @@ static Config parseLipoOptions(ArrayRef ArgsArr) { C.ActionToPerform = LipoAction::ThinArch; return C; + case LIPO_create: + if (C.OutputFile.empty()) + reportError("create expects a single output file to be specified"); + C.ActionToPerform = LipoAction::CreateUniversal; + return C; + default: reportError("llvm-lipo action unspecified"); } @@ -195,8 +210,7 @@ static SmallVector, 1> readInputBinaries(ArrayRef InputFiles) { SmallVector, 1> InputBinaries; for (StringRef InputFile : InputFiles) { - Expected> BinaryOrErr = - createBinary(InputFile); + Expected> BinaryOrErr = createBinary(InputFile); if (!BinaryOrErr) reportError(InputFile, BinaryOrErr.takeError()); // TODO: Add compatibility for archive files @@ -241,33 +255,35 @@ static void verifyArch(ArrayRef> InputBinaries, exit(EXIT_SUCCESS); } -static void printArchOrUnknown(const MachOObjectFile *ObjectFile) { - // Prints trailing space and unknown in this format for compatibility with - // cctools lipo. - const std::string ObjectArch = ObjectFile->getArchTriple().getArchName(); - if (ObjectArch.empty()) - outs() << "unknown(" << ObjectFile->getHeader().cputype << "," - << ObjectFile->getHeader().cpusubtype << ") "; - else - outs() << ObjectArch + " "; +// Returns a string of the given Object file's architecture type +// Unknown architectures formatted unknown(CPUType,CPUSubType) for compatibility +// with cctools lipo +static std::string getArchString(const MachOObjectFile &ObjectFile) { + const Triple T = ObjectFile.getArchTriple(); + const StringRef ObjectArch = T.getArchName(); + if (!ObjectArch.empty()) + return ObjectArch; + return ("unknown(" + Twine(ObjectFile.getHeader().cputype) + "," + + Twine(ObjectFile.getHeader().cpusubtype & ~MachO::CPU_SUBTYPE_MASK) + + ")") + .str(); } LLVM_ATTRIBUTE_NORETURN static void printArchs(ArrayRef> InputBinaries) { + // Prints trailing space for compatibility with cctools lipo. assert(InputBinaries.size() == 1 && "Incorrect number of input binaries"); const Binary *InputBinary = InputBinaries.front().getBinary(); if (auto UO = dyn_cast(InputBinary)) { - for (MachOUniversalBinary::object_iterator I = UO->begin_objects(), - E = UO->end_objects(); - I != E; ++I) { + for (const auto &O : UO->objects()) { Expected> BinaryOrError = - I->getAsObjectFile(); + O.getAsObjectFile(); if (!BinaryOrError) reportError(InputBinary->getFileName(), BinaryOrError.takeError()); - printArchOrUnknown(BinaryOrError.get().get()); + outs() << getArchString(*BinaryOrError.get().get()) << " "; } } else if (auto O = dyn_cast(InputBinary)) { - printArchOrUnknown(O); + outs() << getArchString(*O) << " "; } else { llvm_unreachable("Unexpected binary format"); } @@ -314,6 +330,173 @@ static void extractSlice(ArrayRef> InputBinaries, exit(EXIT_SUCCESS); } +static void checkArchDuplicates(const ArrayRef &Slices) { + DenseMap CPUIds; + auto CPUIDForSlice = [](const Slice &S) { + return static_cast(S.ObjectFile->getHeader().cputype) << 32 | + S.ObjectFile->getHeader().cpusubtype; + }; + for (const auto &S : Slices) { + auto Entry = CPUIds.try_emplace(CPUIDForSlice(S), S.ObjectFile); + if (!Entry.second) + reportError(Entry.first->second->getFileName() + " and " + + S.ObjectFile->getFileName() + " have the same architecture " + + getArchString(*S.ObjectFile) + + " and therefore cannot be in the same universal binary"); + } +} + +static uint32_t calculateAlignment(const MachOObjectFile *ObjectFile) { + // TODO: Implement getAlign() and remove hard coding + // Will be implemented in a follow-up. + + switch (ObjectFile->getHeader().cputype) { + case MachO::CPU_TYPE_I386: + case MachO::CPU_TYPE_X86_64: + case MachO::CPU_TYPE_POWERPC: + case MachO::CPU_TYPE_POWERPC64: + return 12; // log2 value of page size(4k) for x86 and PPC + case MachO::CPU_TYPE_ARM: + case MachO::CPU_TYPE_ARM64: + case MachO::CPU_TYPE_ARM64_32: + return 14; // log2 value of page size(16k) for Darwin ARM + default: + return 12; + } +} + +// This function replicates ordering from cctools lipo for consistency +static bool compareSlices(const Slice &Lhs, const Slice &Rhs) { + if (Lhs.ObjectFile->getHeader().cputype == + Rhs.ObjectFile->getHeader().cputype) + return Lhs.ObjectFile->getHeader().cpusubtype < + Rhs.ObjectFile->getHeader().cpusubtype; + + // force arm64-family to follow after all other slices for compatibility + // with cctools lipo + if (Lhs.ObjectFile->getHeader().cputype == MachO::CPU_TYPE_ARM64) + return false; + if (Rhs.ObjectFile->getHeader().cputype == MachO::CPU_TYPE_ARM64) + return true; + + // Sort by alignment to minimize file size + return Lhs.Alignment < Rhs.Alignment; +} + +// Updates vector ExtractedObjects with the MachOObjectFiles extracted from +// Universal Binary files to transfer ownership. +static SmallVector buildSlices( + ArrayRef> InputBinaries, + SmallVectorImpl> &ExtractedObjects) { + SmallVector Slices; + for (auto &IB : InputBinaries) { + const Binary *InputBinary = IB.getBinary(); + if (auto UO = dyn_cast(InputBinary)) { + for (const auto &O : UO->objects()) { + Expected> BinaryOrError = + O.getAsObjectFile(); + if (!BinaryOrError) + reportError(InputBinary->getFileName(), BinaryOrError.takeError()); + ExtractedObjects.push_back(std::move(BinaryOrError.get())); + Slices.push_back(Slice{ExtractedObjects.back().get(), O.getAlign()}); + } + } else if (auto O = dyn_cast(InputBinary)) { + Slices.push_back(Slice{O, calculateAlignment(O)}); + } else { + llvm_unreachable("Unexpected binary format"); + } + } + return Slices; +} + +static SmallVector +buildFatArchList(ArrayRef Slices) { + SmallVector FatArchList; + uint64_t Offset = + sizeof(MachO::fat_header) + Slices.size() * sizeof(MachO::fat_arch); + + for (size_t Index = 0, Size = Slices.size(); Index < Size; ++Index) { + Offset = alignTo(Offset, 1 << Slices[Index].Alignment); + const MachOObjectFile *ObjectFile = Slices[Index].ObjectFile; + if (Offset > UINT32_MAX) + reportError("fat file too large to be created because the offset " + "field in struct fat_arch is only 32-bits and the offset " + + Twine(Offset) + " for " + ObjectFile->getFileName() + + " for architecture " + getArchString(*ObjectFile) + + "exceeds that."); + + MachO::fat_arch FatArch; + FatArch.cputype = ObjectFile->getHeader().cputype; + FatArch.cpusubtype = ObjectFile->getHeader().cpusubtype; + FatArch.offset = Offset; + FatArch.size = ObjectFile->getMemoryBufferRef().getBufferSize(); + FatArch.align = Slices[Index].Alignment; + Offset += FatArch.size; + FatArchList.push_back(FatArch); + } + return FatArchList; +} + +static void createUniversalBinary(SmallVectorImpl &Slices, + StringRef OutputFileName) { + MachO::fat_header FatHeader; + FatHeader.magic = MachO::FAT_MAGIC; + FatHeader.nfat_arch = Slices.size(); + + stable_sort(Slices, compareSlices); + SmallVector FatArchList = buildFatArchList(Slices); + + const bool IsExecutable = any_of(Slices, [](Slice S) { + return sys::fs::can_execute(S.ObjectFile->getFileName()); + }); + const uint64_t OutputFileSize = + FatArchList.back().offset + FatArchList.back().size; + Expected> OutFileOrError = + FileOutputBuffer::create(OutputFileName, OutputFileSize, + IsExecutable ? FileOutputBuffer::F_executable + : 0); + if (!OutFileOrError) + reportError(OutputFileName, OutFileOrError.takeError()); + std::unique_ptr OutFile = std::move(OutFileOrError.get()); + std::memset(OutFile->getBufferStart(), 0, OutputFileSize); + + if (sys::IsLittleEndianHost) + MachO::swapStruct(FatHeader); + std::memcpy(OutFile->getBufferStart(), &FatHeader, sizeof(MachO::fat_header)); + + for (size_t Index = 0, Size = Slices.size(); Index < Size; ++Index) { + MemoryBufferRef BufferRef = Slices[Index].ObjectFile->getMemoryBufferRef(); + std::copy(BufferRef.getBufferStart(), BufferRef.getBufferEnd(), + OutFile->getBufferStart() + FatArchList[Index].offset); + } + + // FatArchs written after Slices in order reduce the number of swaps for the + // LittleEndian case + if (sys::IsLittleEndianHost) + for (MachO::fat_arch &FA : FatArchList) + MachO::swapStruct(FA); + std::memcpy(OutFile->getBufferStart() + sizeof(MachO::fat_header), + FatArchList.begin(), + sizeof(MachO::fat_arch) * FatArchList.size()); + + if (Error E = OutFile->commit()) + reportError(OutputFileName, std::move(E)); +} + +LLVM_ATTRIBUTE_NORETURN +static void createUniversalBinary(ArrayRef> InputBinaries, + StringRef OutputFileName) { + assert(InputBinaries.size() >= 1 && "Incorrect number of input binaries"); + assert(!OutputFileName.empty() && "Create expects a single output file"); + + SmallVector, 1> ExtractedObjects; + SmallVector Slices = buildSlices(InputBinaries, ExtractedObjects); + checkArchDuplicates(Slices); + createUniversalBinary(Slices, OutputFileName); + + exit(EXIT_SUCCESS); +} + int main(int argc, char **argv) { InitLLVM X(argc, argv); Config C = parseLipoOptions(makeArrayRef(argv + 1, argc)); @@ -330,6 +513,9 @@ int main(int argc, char **argv) { case LipoAction::ThinArch: extractSlice(InputBinaries, C.ThinArchType, C.OutputFile); break; + case LipoAction::CreateUniversal: + createUniversalBinary(InputBinaries, C.OutputFile); + break; } return EXIT_SUCCESS; } From 4422cc4f1a910f790349d678a1c83bae5cd07ccb Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 15 Jul 2019 22:34:19 +0000 Subject: [PATCH 175/451] [OpenCL] Make TableGen'd builtin tables and helper functions static Reviewers: Pierre, Anastasia Reviewed By: Anastasia Subscribers: yaxunl, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D64608 llvm-svn: 366143 --- clang/lib/Sema/SemaLookup.cpp | 2 +- clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp index c0b946a98d1dc..8a24dd884a76a 100644 --- a/clang/lib/Sema/SemaLookup.cpp +++ b/clang/lib/Sema/SemaLookup.cpp @@ -688,7 +688,7 @@ static void InsertOCLBuiltinDeclarations(Sema &S, LookupResult &LR, unsigned Len) { for (unsigned i = 0; i < Len; ++i) { - OpenCLBuiltinDecl &Decl = OpenCLBuiltins[Index - 1 + i]; + const OpenCLBuiltinDecl &Decl = OpenCLBuiltins[Index - 1 + i]; ASTContext &Context = S.Context; // Ignore this BIF if the version is incorrect. diff --git a/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp b/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp index 1e495039c494c..8d83b1c7fa6b9 100644 --- a/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp +++ b/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp @@ -207,7 +207,7 @@ void BuiltinNameEmitter::GetOverloads() { } void BuiltinNameEmitter::EmitSignatureTable() { - OS << "OpenCLType OpenCLSignature[] = {\n"; + OS << "static const OpenCLType OpenCLSignature[] = {\n"; for (auto &P : SignatureSet) { OS << "// " << P.second << "\n"; for (Record *R : P.first) { @@ -222,7 +222,7 @@ void BuiltinNameEmitter::EmitSignatureTable() { } void BuiltinNameEmitter::EmitBuiltinTable() { - OS << "OpenCLBuiltinDecl OpenCLBuiltins[] = {\n"; + OS << "static const OpenCLBuiltinDecl OpenCLBuiltins[] = {\n"; for (auto &i : OverloadInfo) { StringRef Name = i.first; OS << "// " << Name << "\n"; @@ -255,7 +255,7 @@ void BuiltinNameEmitter::EmitStringMatcher() { OS << R"( // Return 0 if name is not a recognized OpenCL builtin, or an index // into a table of declarations if it is an OpenCL builtin. -std::pair isOpenCLBuiltin(llvm::StringRef name) { +static std::pair isOpenCLBuiltin(llvm::StringRef name) { )"; From c9e3c8301446f20efef6721dd3a05f2f9da217d8 Mon Sep 17 00:00:00 2001 From: Shoaib Meenai Date: Mon, 15 Jul 2019 22:44:08 +0000 Subject: [PATCH 176/451] Revert [llvm-lipo] Implement -create (with hardcoded alignments) This reverts r366142 (git commit 67cee1dc7ee285b03372eb818a3894d35efa7394) The test is failing on the Windows buildbots. Reverting while I investigate. llvm-svn: 366144 --- .../tools/llvm-lipo/Inputs/arm64-slice.yaml | 101 -------- .../tools/llvm-lipo/Inputs/armv7-slice.yaml | 76 ------ .../tools/llvm-lipo/Inputs/x86_64-slice.yaml | 89 ------- .../tools/llvm-lipo/create-executable.test | 11 - .../tools/llvm-lipo/create-invalid-input.test | 8 - .../llvm-lipo/create-without-alignment.test | 32 --- .../thin-executable-universal-binary.test | 2 +- llvm/tools/llvm-lipo/LipoOpts.td | 5 - llvm/tools/llvm-lipo/llvm-lipo.cpp | 222 ++---------------- 9 files changed, 19 insertions(+), 527 deletions(-) delete mode 100644 llvm/test/tools/llvm-lipo/Inputs/arm64-slice.yaml delete mode 100644 llvm/test/tools/llvm-lipo/Inputs/armv7-slice.yaml delete mode 100644 llvm/test/tools/llvm-lipo/Inputs/x86_64-slice.yaml delete mode 100644 llvm/test/tools/llvm-lipo/create-executable.test delete mode 100644 llvm/test/tools/llvm-lipo/create-invalid-input.test delete mode 100644 llvm/test/tools/llvm-lipo/create-without-alignment.test diff --git a/llvm/test/tools/llvm-lipo/Inputs/arm64-slice.yaml b/llvm/test/tools/llvm-lipo/Inputs/arm64-slice.yaml deleted file mode 100644 index 5dfd45027381e..0000000000000 --- a/llvm/test/tools/llvm-lipo/Inputs/arm64-slice.yaml +++ /dev/null @@ -1,101 +0,0 @@ ---- !mach-o -FileHeader: - magic: 0xFEEDFACF - cputype: 0x0100000C - cpusubtype: 0x00000000 - filetype: 0x00000001 - ncmds: 4 - sizeofcmds: 352 - flags: 0x00002000 - reserved: 0x00000000 -LoadCommands: - - cmd: LC_SEGMENT_64 - cmdsize: 232 - segname: '' - vmaddr: 0 - vmsize: 56 - fileoff: 384 - filesize: 56 - maxprot: 7 - initprot: 7 - nsects: 2 - flags: 0 - Sections: - - sectname: __text - segname: __TEXT - addr: 0x0000000000000000 - size: 20 - offset: 0x00000180 - align: 2 - reloff: 0x00000000 - nreloc: 0 - flags: 0x80000400 - reserved1: 0x00000000 - reserved2: 0x00000000 - reserved3: 0x00000000 - - sectname: __compact_unwind - segname: __LD - addr: 0x0000000000000018 - size: 32 - offset: 0x00000198 - align: 3 - reloff: 0x000001B8 - nreloc: 1 - flags: 0x02000000 - reserved1: 0x00000000 - reserved2: 0x00000000 - reserved3: 0x00000000 - - cmd: LC_VERSION_MIN_IPHONEOS - cmdsize: 16 - version: 327680 - sdk: 0 - - cmd: LC_SYMTAB - cmdsize: 24 - symoff: 448 - nsyms: 3 - stroff: 496 - strsize: 20 - - cmd: LC_DYSYMTAB - cmdsize: 80 - ilocalsym: 0 - nlocalsym: 2 - iextdefsym: 2 - nextdefsym: 1 - iundefsym: 3 - nundefsym: 0 - tocoff: 0 - ntoc: 0 - modtaboff: 0 - nmodtab: 0 - extrefsymoff: 0 - nextrefsyms: 0 - indirectsymoff: 0 - nindirectsyms: 0 - extreloff: 0 - nextrel: 0 - locreloff: 0 - nlocrel: 0 -LinkEditData: - NameList: - - n_strx: 13 - n_type: 0x0E - n_sect: 1 - n_desc: 0 - n_value: 0 - - n_strx: 7 - n_type: 0x0E - n_sect: 2 - n_desc: 0 - n_value: 24 - - n_strx: 1 - n_type: 0x0F - n_sect: 1 - n_desc: 0 - n_value: 0 - StringTable: - - '' - - _main - - ltmp1 - - ltmp0 - - '' -... diff --git a/llvm/test/tools/llvm-lipo/Inputs/armv7-slice.yaml b/llvm/test/tools/llvm-lipo/Inputs/armv7-slice.yaml deleted file mode 100644 index b26062931458c..0000000000000 --- a/llvm/test/tools/llvm-lipo/Inputs/armv7-slice.yaml +++ /dev/null @@ -1,76 +0,0 @@ ---- !mach-o -FileHeader: - magic: 0xFEEDFACE - cputype: 0x0000000C - cpusubtype: 0x00000009 - filetype: 0x00000001 - ncmds: 4 - sizeofcmds: 244 - flags: 0x00002000 -LoadCommands: - - cmd: LC_SEGMENT - cmdsize: 124 - segname: '' - vmaddr: 0 - vmsize: 10 - fileoff: 272 - filesize: 10 - maxprot: 7 - initprot: 7 - nsects: 1 - flags: 0 - Sections: - - sectname: __text - segname: __TEXT - addr: 0x0000000000000000 - size: 10 - offset: 0x00000110 - align: 1 - reloff: 0x00000000 - nreloc: 0 - flags: 0x80000400 - reserved1: 0x00000000 - reserved2: 0x00000000 - reserved3: 0x00000000 - - cmd: LC_VERSION_MIN_IPHONEOS - cmdsize: 16 - version: 327680 - sdk: 0 - - cmd: LC_SYMTAB - cmdsize: 24 - symoff: 284 - nsyms: 1 - stroff: 296 - strsize: 8 - - cmd: LC_DYSYMTAB - cmdsize: 80 - ilocalsym: 0 - nlocalsym: 0 - iextdefsym: 0 - nextdefsym: 1 - iundefsym: 1 - nundefsym: 0 - tocoff: 0 - ntoc: 0 - modtaboff: 0 - nmodtab: 0 - extrefsymoff: 0 - nextrefsyms: 0 - indirectsymoff: 0 - nindirectsyms: 0 - extreloff: 0 - nextrel: 0 - locreloff: 0 - nlocrel: 0 -LinkEditData: - NameList: - - n_strx: 1 - n_type: 0x0F - n_sect: 1 - n_desc: 8 - n_value: 0 - StringTable: - - '' - - _main - - '' -... diff --git a/llvm/test/tools/llvm-lipo/Inputs/x86_64-slice.yaml b/llvm/test/tools/llvm-lipo/Inputs/x86_64-slice.yaml deleted file mode 100644 index 27db6d7a13157..0000000000000 --- a/llvm/test/tools/llvm-lipo/Inputs/x86_64-slice.yaml +++ /dev/null @@ -1,89 +0,0 @@ ---- !mach-o -FileHeader: - magic: 0xFEEDFACF - cputype: 0x01000007 - cpusubtype: 0x00000003 - filetype: 0x00000001 - ncmds: 4 - sizeofcmds: 352 - flags: 0x00002000 - reserved: 0x00000000 -LoadCommands: - - cmd: LC_SEGMENT_64 - cmdsize: 232 - segname: '' - vmaddr: 0 - vmsize: 80 - fileoff: 384 - filesize: 80 - maxprot: 7 - initprot: 7 - nsects: 2 - flags: 0 - Sections: - - sectname: __text - segname: __TEXT - addr: 0x0000000000000000 - size: 15 - offset: 0x00000180 - align: 4 - reloff: 0x00000000 - nreloc: 0 - flags: 0x80000400 - reserved1: 0x00000000 - reserved2: 0x00000000 - reserved3: 0x00000000 - - sectname: __eh_frame - segname: __TEXT - addr: 0x0000000000000010 - size: 64 - offset: 0x00000190 - align: 3 - reloff: 0x00000000 - nreloc: 0 - flags: 0x6800000B - reserved1: 0x00000000 - reserved2: 0x00000000 - reserved3: 0x00000000 - - cmd: LC_VERSION_MIN_MACOSX - cmdsize: 16 - version: 656384 - sdk: 0 - - cmd: LC_SYMTAB - cmdsize: 24 - symoff: 464 - nsyms: 1 - stroff: 480 - strsize: 8 - - cmd: LC_DYSYMTAB - cmdsize: 80 - ilocalsym: 0 - nlocalsym: 0 - iextdefsym: 0 - nextdefsym: 1 - iundefsym: 1 - nundefsym: 0 - tocoff: 0 - ntoc: 0 - modtaboff: 0 - nmodtab: 0 - extrefsymoff: 0 - nextrefsyms: 0 - indirectsymoff: 0 - nindirectsyms: 0 - extreloff: 0 - nextrel: 0 - locreloff: 0 - nlocrel: 0 -LinkEditData: - NameList: - - n_strx: 1 - n_type: 0x0F - n_sect: 1 - n_desc: 0 - n_value: 0 - StringTable: - - '' - - _main - - '' -... diff --git a/llvm/test/tools/llvm-lipo/create-executable.test b/llvm/test/tools/llvm-lipo/create-executable.test deleted file mode 100644 index 82aa69cfec521..0000000000000 --- a/llvm/test/tools/llvm-lipo/create-executable.test +++ /dev/null @@ -1,11 +0,0 @@ -# RUN: yaml2obj %p/Inputs/i386-slice.yaml > %t-i386.o -# RUN: yaml2obj %p/Inputs/x86_64-slice.yaml > %t-x86_64.o - -# RUN: chmod -x %t-i386.o -# RUN: chmod -x %t-x86_64.o -# RUN: llvm-lipo %t-i386.o %t-x86_64.o -create -output %t-universal.o -# RUN: ! test -x %t-universal.o - -# RUN: chmod +x %t-i386.o -# RUN: llvm-lipo %t-i386.o %t-x86_64.o -create -output %t-universal.o -# RUN: test -x %t-universal.o diff --git a/llvm/test/tools/llvm-lipo/create-invalid-input.test b/llvm/test/tools/llvm-lipo/create-invalid-input.test deleted file mode 100644 index 4bb2e1a566679..0000000000000 --- a/llvm/test/tools/llvm-lipo/create-invalid-input.test +++ /dev/null @@ -1,8 +0,0 @@ -# RUN: yaml2obj %p/Inputs/i386-slice.yaml > %t-32.o -# RUN: yaml2obj %p/Inputs/i386-x86_64-universal.yaml > %t-universal.o - -# RUN: not llvm-lipo %t-32.o -create 2>&1 | FileCheck --check-prefix=NO_OUTPUT %s -# NO_OUTPUT: error: create expects a single output file to be specified - -# RUN: not llvm-lipo %t-universal.o %t-32.o -create -output %t.o 2>&1 | FileCheck --check-prefix=DUPLICATE_ARCHS %s -# DUPLICATE_ARCHS: have the same architecture i386 and therefore cannot be in the same universal binary diff --git a/llvm/test/tools/llvm-lipo/create-without-alignment.test b/llvm/test/tools/llvm-lipo/create-without-alignment.test deleted file mode 100644 index 813230a7e3c4a..0000000000000 --- a/llvm/test/tools/llvm-lipo/create-without-alignment.test +++ /dev/null @@ -1,32 +0,0 @@ -# RUN: yaml2obj %p/Inputs/i386-slice.yaml > %t-i386.o -# RUN: yaml2obj %p/Inputs/x86_64-slice.yaml > %t-x86_64.o - -# RUN: llvm-lipo %t-i386.o %t-x86_64.o -create -output %t-universal-llvm.o - -# RUN: yaml2obj %p/Inputs/i386-x86_64-universal.yaml > %t-universal.o -# RUN: cmp %t-universal-llvm.o %t-universal.o - -# RUN: yaml2obj %p/Inputs/armv7-slice.yaml > %t-armv7.o -# RUN: yaml2obj %p/Inputs/arm64-slice.yaml > %t-arm64.o - -# RUN: llvm-lipo %t-arm64.o %t-armv7.o %t-universal.o -create -output %t-universal-2.o -# RUN: llvm-lipo %t-universal-2.o -thin x86_64 -output %t-x86_64_extracted.o -# RUN: cmp %t-x86_64_extracted.o %t-x86_64.o -# RUN: llvm-lipo %t-universal-2.o -thin armv7 -output %t-armv7-extracted.o -# RUN: cmp %t-armv7-extracted.o %t-armv7.o - -# RUN: llvm-objdump %t-universal-2.o -m --universal-headers | FileCheck %s -# CHECK: fat_magic FAT_MAGIC -# CHECK: nfat_arch 4 -# CHECK: architecture i386 -# CHECK: offset 4096 -# CHECK: align 2^12 (4096) -# CHECK: architecture x86_64 -# CHECK: offset 8192 -# CHECK: align 2^12 (4096) -# CHECK: architecture armv7 -# CHECK: offset 16384 -# CHECK: align 2^14 (16384) -# CHECK: architecture arm64 -# CHECK: offset 32768 -# CHECK: align 2^14 (16384) diff --git a/llvm/test/tools/llvm-lipo/thin-executable-universal-binary.test b/llvm/test/tools/llvm-lipo/thin-executable-universal-binary.test index 870252cd690ff..3992373d9e46f 100644 --- a/llvm/test/tools/llvm-lipo/thin-executable-universal-binary.test +++ b/llvm/test/tools/llvm-lipo/thin-executable-universal-binary.test @@ -4,7 +4,7 @@ # RUN: chmod -x %t-universal.o # RUN: llvm-lipo %t-universal.o -thin i386 -output %t32.o -# RUN: ! test -x %t32.o +# RUN: test ! -x %t32.o # RUN: chmod +x %t-universal.o # RUN: llvm-lipo %t-universal.o -thin i386 -output %t32-ex.o diff --git a/llvm/tools/llvm-lipo/LipoOpts.td b/llvm/tools/llvm-lipo/LipoOpts.td index e2a73768733eb..e3cbe2dfa8e45 100644 --- a/llvm/tools/llvm-lipo/LipoOpts.td +++ b/llvm/tools/llvm-lipo/LipoOpts.td @@ -23,11 +23,6 @@ def thin : Option<["-", "--"], "thin", KIND_SEPARATE>, HelpText<"Create a thin output file of specified arch_type from the " "fat input file. Requires -output option">; -def create : Option<["-", "--"], "create", KIND_FLAG>, - Group, - HelpText<"Create a universal binary output file from the input " - "files. Requires -output option">; - def output : Option<["-", "--"], "output", KIND_SEPARATE>, HelpText<"Create output file with specified name">; def o : JoinedOrSeparate<["-"], "o">, Alias; diff --git a/llvm/tools/llvm-lipo/llvm-lipo.cpp b/llvm/tools/llvm-lipo/llvm-lipo.cpp index 65135bec951c3..ea0d427e01997 100644 --- a/llvm/tools/llvm-lipo/llvm-lipo.cpp +++ b/llvm/tools/llvm-lipo/llvm-lipo.cpp @@ -80,7 +80,6 @@ enum class LipoAction { PrintArchs, VerifyArch, ThinArch, - CreateUniversal, }; struct Config { @@ -91,14 +90,6 @@ struct Config { LipoAction ActionToPerform; }; -struct Slice { - const MachOObjectFile *ObjectFile; - // Requires Alignment field to store slice alignment values from universal - // binaries. Also needed to order the slices using compareSlices, so the total - // file size can be calculated before creating the output buffer. - uint32_t Alignment; -}; - } // end namespace static void validateArchitectureName(StringRef ArchitectureName) { @@ -117,7 +108,7 @@ static Config parseLipoOptions(ArrayRef ArgsArr) { Config C; LipoOptTable T; unsigned MissingArgumentIndex, MissingArgumentCount; - opt::InputArgList InputArgs = + llvm::opt::InputArgList InputArgs = T.ParseArgs(ArgsArr, MissingArgumentIndex, MissingArgumentCount); if (MissingArgumentCount) @@ -195,12 +186,6 @@ static Config parseLipoOptions(ArrayRef ArgsArr) { C.ActionToPerform = LipoAction::ThinArch; return C; - case LIPO_create: - if (C.OutputFile.empty()) - reportError("create expects a single output file to be specified"); - C.ActionToPerform = LipoAction::CreateUniversal; - return C; - default: reportError("llvm-lipo action unspecified"); } @@ -210,7 +195,8 @@ static SmallVector, 1> readInputBinaries(ArrayRef InputFiles) { SmallVector, 1> InputBinaries; for (StringRef InputFile : InputFiles) { - Expected> BinaryOrErr = createBinary(InputFile); + Expected> BinaryOrErr = + createBinary(InputFile); if (!BinaryOrErr) reportError(InputFile, BinaryOrErr.takeError()); // TODO: Add compatibility for archive files @@ -255,35 +241,33 @@ static void verifyArch(ArrayRef> InputBinaries, exit(EXIT_SUCCESS); } -// Returns a string of the given Object file's architecture type -// Unknown architectures formatted unknown(CPUType,CPUSubType) for compatibility -// with cctools lipo -static std::string getArchString(const MachOObjectFile &ObjectFile) { - const Triple T = ObjectFile.getArchTriple(); - const StringRef ObjectArch = T.getArchName(); - if (!ObjectArch.empty()) - return ObjectArch; - return ("unknown(" + Twine(ObjectFile.getHeader().cputype) + "," + - Twine(ObjectFile.getHeader().cpusubtype & ~MachO::CPU_SUBTYPE_MASK) + - ")") - .str(); +static void printArchOrUnknown(const MachOObjectFile *ObjectFile) { + // Prints trailing space and unknown in this format for compatibility with + // cctools lipo. + const std::string ObjectArch = ObjectFile->getArchTriple().getArchName(); + if (ObjectArch.empty()) + outs() << "unknown(" << ObjectFile->getHeader().cputype << "," + << ObjectFile->getHeader().cpusubtype << ") "; + else + outs() << ObjectArch + " "; } LLVM_ATTRIBUTE_NORETURN static void printArchs(ArrayRef> InputBinaries) { - // Prints trailing space for compatibility with cctools lipo. assert(InputBinaries.size() == 1 && "Incorrect number of input binaries"); const Binary *InputBinary = InputBinaries.front().getBinary(); if (auto UO = dyn_cast(InputBinary)) { - for (const auto &O : UO->objects()) { + for (MachOUniversalBinary::object_iterator I = UO->begin_objects(), + E = UO->end_objects(); + I != E; ++I) { Expected> BinaryOrError = - O.getAsObjectFile(); + I->getAsObjectFile(); if (!BinaryOrError) reportError(InputBinary->getFileName(), BinaryOrError.takeError()); - outs() << getArchString(*BinaryOrError.get().get()) << " "; + printArchOrUnknown(BinaryOrError.get().get()); } } else if (auto O = dyn_cast(InputBinary)) { - outs() << getArchString(*O) << " "; + printArchOrUnknown(O); } else { llvm_unreachable("Unexpected binary format"); } @@ -330,173 +314,6 @@ static void extractSlice(ArrayRef> InputBinaries, exit(EXIT_SUCCESS); } -static void checkArchDuplicates(const ArrayRef &Slices) { - DenseMap CPUIds; - auto CPUIDForSlice = [](const Slice &S) { - return static_cast(S.ObjectFile->getHeader().cputype) << 32 | - S.ObjectFile->getHeader().cpusubtype; - }; - for (const auto &S : Slices) { - auto Entry = CPUIds.try_emplace(CPUIDForSlice(S), S.ObjectFile); - if (!Entry.second) - reportError(Entry.first->second->getFileName() + " and " + - S.ObjectFile->getFileName() + " have the same architecture " + - getArchString(*S.ObjectFile) + - " and therefore cannot be in the same universal binary"); - } -} - -static uint32_t calculateAlignment(const MachOObjectFile *ObjectFile) { - // TODO: Implement getAlign() and remove hard coding - // Will be implemented in a follow-up. - - switch (ObjectFile->getHeader().cputype) { - case MachO::CPU_TYPE_I386: - case MachO::CPU_TYPE_X86_64: - case MachO::CPU_TYPE_POWERPC: - case MachO::CPU_TYPE_POWERPC64: - return 12; // log2 value of page size(4k) for x86 and PPC - case MachO::CPU_TYPE_ARM: - case MachO::CPU_TYPE_ARM64: - case MachO::CPU_TYPE_ARM64_32: - return 14; // log2 value of page size(16k) for Darwin ARM - default: - return 12; - } -} - -// This function replicates ordering from cctools lipo for consistency -static bool compareSlices(const Slice &Lhs, const Slice &Rhs) { - if (Lhs.ObjectFile->getHeader().cputype == - Rhs.ObjectFile->getHeader().cputype) - return Lhs.ObjectFile->getHeader().cpusubtype < - Rhs.ObjectFile->getHeader().cpusubtype; - - // force arm64-family to follow after all other slices for compatibility - // with cctools lipo - if (Lhs.ObjectFile->getHeader().cputype == MachO::CPU_TYPE_ARM64) - return false; - if (Rhs.ObjectFile->getHeader().cputype == MachO::CPU_TYPE_ARM64) - return true; - - // Sort by alignment to minimize file size - return Lhs.Alignment < Rhs.Alignment; -} - -// Updates vector ExtractedObjects with the MachOObjectFiles extracted from -// Universal Binary files to transfer ownership. -static SmallVector buildSlices( - ArrayRef> InputBinaries, - SmallVectorImpl> &ExtractedObjects) { - SmallVector Slices; - for (auto &IB : InputBinaries) { - const Binary *InputBinary = IB.getBinary(); - if (auto UO = dyn_cast(InputBinary)) { - for (const auto &O : UO->objects()) { - Expected> BinaryOrError = - O.getAsObjectFile(); - if (!BinaryOrError) - reportError(InputBinary->getFileName(), BinaryOrError.takeError()); - ExtractedObjects.push_back(std::move(BinaryOrError.get())); - Slices.push_back(Slice{ExtractedObjects.back().get(), O.getAlign()}); - } - } else if (auto O = dyn_cast(InputBinary)) { - Slices.push_back(Slice{O, calculateAlignment(O)}); - } else { - llvm_unreachable("Unexpected binary format"); - } - } - return Slices; -} - -static SmallVector -buildFatArchList(ArrayRef Slices) { - SmallVector FatArchList; - uint64_t Offset = - sizeof(MachO::fat_header) + Slices.size() * sizeof(MachO::fat_arch); - - for (size_t Index = 0, Size = Slices.size(); Index < Size; ++Index) { - Offset = alignTo(Offset, 1 << Slices[Index].Alignment); - const MachOObjectFile *ObjectFile = Slices[Index].ObjectFile; - if (Offset > UINT32_MAX) - reportError("fat file too large to be created because the offset " - "field in struct fat_arch is only 32-bits and the offset " + - Twine(Offset) + " for " + ObjectFile->getFileName() + - " for architecture " + getArchString(*ObjectFile) + - "exceeds that."); - - MachO::fat_arch FatArch; - FatArch.cputype = ObjectFile->getHeader().cputype; - FatArch.cpusubtype = ObjectFile->getHeader().cpusubtype; - FatArch.offset = Offset; - FatArch.size = ObjectFile->getMemoryBufferRef().getBufferSize(); - FatArch.align = Slices[Index].Alignment; - Offset += FatArch.size; - FatArchList.push_back(FatArch); - } - return FatArchList; -} - -static void createUniversalBinary(SmallVectorImpl &Slices, - StringRef OutputFileName) { - MachO::fat_header FatHeader; - FatHeader.magic = MachO::FAT_MAGIC; - FatHeader.nfat_arch = Slices.size(); - - stable_sort(Slices, compareSlices); - SmallVector FatArchList = buildFatArchList(Slices); - - const bool IsExecutable = any_of(Slices, [](Slice S) { - return sys::fs::can_execute(S.ObjectFile->getFileName()); - }); - const uint64_t OutputFileSize = - FatArchList.back().offset + FatArchList.back().size; - Expected> OutFileOrError = - FileOutputBuffer::create(OutputFileName, OutputFileSize, - IsExecutable ? FileOutputBuffer::F_executable - : 0); - if (!OutFileOrError) - reportError(OutputFileName, OutFileOrError.takeError()); - std::unique_ptr OutFile = std::move(OutFileOrError.get()); - std::memset(OutFile->getBufferStart(), 0, OutputFileSize); - - if (sys::IsLittleEndianHost) - MachO::swapStruct(FatHeader); - std::memcpy(OutFile->getBufferStart(), &FatHeader, sizeof(MachO::fat_header)); - - for (size_t Index = 0, Size = Slices.size(); Index < Size; ++Index) { - MemoryBufferRef BufferRef = Slices[Index].ObjectFile->getMemoryBufferRef(); - std::copy(BufferRef.getBufferStart(), BufferRef.getBufferEnd(), - OutFile->getBufferStart() + FatArchList[Index].offset); - } - - // FatArchs written after Slices in order reduce the number of swaps for the - // LittleEndian case - if (sys::IsLittleEndianHost) - for (MachO::fat_arch &FA : FatArchList) - MachO::swapStruct(FA); - std::memcpy(OutFile->getBufferStart() + sizeof(MachO::fat_header), - FatArchList.begin(), - sizeof(MachO::fat_arch) * FatArchList.size()); - - if (Error E = OutFile->commit()) - reportError(OutputFileName, std::move(E)); -} - -LLVM_ATTRIBUTE_NORETURN -static void createUniversalBinary(ArrayRef> InputBinaries, - StringRef OutputFileName) { - assert(InputBinaries.size() >= 1 && "Incorrect number of input binaries"); - assert(!OutputFileName.empty() && "Create expects a single output file"); - - SmallVector, 1> ExtractedObjects; - SmallVector Slices = buildSlices(InputBinaries, ExtractedObjects); - checkArchDuplicates(Slices); - createUniversalBinary(Slices, OutputFileName); - - exit(EXIT_SUCCESS); -} - int main(int argc, char **argv) { InitLLVM X(argc, argv); Config C = parseLipoOptions(makeArrayRef(argv + 1, argc)); @@ -513,9 +330,6 @@ int main(int argc, char **argv) { case LipoAction::ThinArch: extractSlice(InputBinaries, C.ThinArchType, C.OutputFile); break; - case LipoAction::CreateUniversal: - createUniversalBinary(InputBinaries, C.OutputFile); - break; } return EXIT_SUCCESS; } From 9f96a58cccb63110ca9515644c454620c86c566d Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Mon, 15 Jul 2019 22:49:25 +0000 Subject: [PATCH 177/451] [WebAssembly] Rename except_ref type to exnref Summary: We agreed to rename `except_ref` to `exnref` for consistency with other reference types in https://github.com/WebAssembly/exception-handling/issues/79. This also renames WebAssemblyInstrExceptRef.td to WebAssemblyInstrRef.td in order to use the file for other reference types in future. Reviewers: dschuff Subscribers: sbc100, jgravelle-google, hiraditya, sunfish, jfb, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64703 llvm-svn: 366145 --- lld/wasm/WriterUtils.cpp | 4 +-- llvm/include/llvm/BinaryFormat/Wasm.h | 4 +-- llvm/include/llvm/CodeGen/ValueTypes.td | 2 +- llvm/include/llvm/Support/MachineValueType.h | 4 +-- llvm/lib/CodeGen/ValueTypes.cpp | 2 +- .../AsmParser/WebAssemblyAsmParser.cpp | 6 ++--- .../MCTargetDesc/WebAssemblyInstPrinter.cpp | 4 +-- .../MCTargetDesc/WebAssemblyMCTargetDesc.cpp | 4 +-- .../MCTargetDesc/WebAssemblyMCTargetDesc.h | 22 +++++++-------- .../WebAssembly/WebAssemblyCFGStackify.cpp | 21 +++++++-------- .../WebAssemblyCallIndirectFixup.cpp | 4 +-- .../WebAssembly/WebAssemblyExplicitLocals.cpp | 20 +++++++------- .../WebAssembly/WebAssemblyFastISel.cpp | 26 +++++++++--------- .../WebAssembly/WebAssemblyInstrCall.td | 12 ++++----- .../WebAssembly/WebAssemblyInstrControl.td | 14 +++++----- .../WebAssembly/WebAssemblyInstrExceptRef.td | 26 ------------------ .../WebAssembly/WebAssemblyInstrInfo.cpp | 4 +-- .../WebAssembly/WebAssemblyInstrInfo.td | 6 ++--- .../Target/WebAssembly/WebAssemblyInstrRef.td | 25 +++++++++++++++++ .../WebAssembly/WebAssemblyLateEHPrepare.cpp | 27 +++++++++---------- .../WebAssembly/WebAssemblyRegStackify.cpp | 6 ++--- .../WebAssembly/WebAssemblyRegisterInfo.td | 4 +-- llvm/test/CodeGen/WebAssembly/exception.ll | 12 ++++----- .../test/MC/Disassembler/WebAssembly/wasm.txt | 2 +- llvm/test/MC/WebAssembly/basic-assembly.s | 4 +-- llvm/test/MC/WebAssembly/objdump.s | 4 +-- .../WebAssemblyExceptionInfoTest.cpp | 16 +++++------ llvm/utils/TableGen/CodeGenTarget.cpp | 2 +- 28 files changed, 141 insertions(+), 146 deletions(-) delete mode 100644 llvm/lib/Target/WebAssembly/WebAssemblyInstrExceptRef.td create mode 100644 llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td diff --git a/lld/wasm/WriterUtils.cpp b/lld/wasm/WriterUtils.cpp index d45f6a4c77f6b..913723c4b6703 100644 --- a/lld/wasm/WriterUtils.cpp +++ b/lld/wasm/WriterUtils.cpp @@ -182,8 +182,8 @@ std::string lld::toString(ValType type) { return "f64"; case ValType::V128: return "v128"; - case ValType::EXCEPT_REF: - return "except_ref"; + case ValType::EXNREF: + return "exnref"; } llvm_unreachable("Invalid wasm::ValType"); } diff --git a/llvm/include/llvm/BinaryFormat/Wasm.h b/llvm/include/llvm/BinaryFormat/Wasm.h index 0ff52cbdb3375..4f6c24bbc68df 100644 --- a/llvm/include/llvm/BinaryFormat/Wasm.h +++ b/llvm/include/llvm/BinaryFormat/Wasm.h @@ -224,7 +224,7 @@ enum : unsigned { WASM_TYPE_F64 = 0x7C, WASM_TYPE_V128 = 0x7B, WASM_TYPE_FUNCREF = 0x70, - WASM_TYPE_EXCEPT_REF = 0x68, + WASM_TYPE_EXNREF = 0x68, WASM_TYPE_FUNC = 0x60, WASM_TYPE_NORESULT = 0x40, // for blocks with no result values }; @@ -332,7 +332,7 @@ enum class ValType { F32 = WASM_TYPE_F32, F64 = WASM_TYPE_F64, V128 = WASM_TYPE_V128, - EXCEPT_REF = WASM_TYPE_EXCEPT_REF, + EXNREF = WASM_TYPE_EXNREF, }; struct WasmSignature { diff --git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td index feea7e5efe007..5818ac183fcc4 100644 --- a/llvm/include/llvm/CodeGen/ValueTypes.td +++ b/llvm/include/llvm/CodeGen/ValueTypes.td @@ -160,7 +160,7 @@ def x86mmx : ValueType<64 , 125>; // X86 MMX value def FlagVT : ValueType<0 , 126>; // Pre-RA sched glue def isVoid : ValueType<0 , 127>; // Produces no value def untyped: ValueType<8 , 128>; // Produces an untyped value -def ExceptRef: ValueType<0, 129>; // WebAssembly's except_ref type +def exnref: ValueType<0, 129>; // WebAssembly's exnref type def token : ValueType<0 , 248>; // TokenTy def MetadataVT: ValueType<0, 249>; // Metadata diff --git a/llvm/include/llvm/Support/MachineValueType.h b/llvm/include/llvm/Support/MachineValueType.h index a9b130f436502..b94d2c4836cc2 100644 --- a/llvm/include/llvm/Support/MachineValueType.h +++ b/llvm/include/llvm/Support/MachineValueType.h @@ -206,7 +206,7 @@ namespace llvm { // unspecified type. The register class // will be determined by the opcode. - ExceptRef = 129, // WebAssembly's except_ref type + exnref = 129, // WebAssembly's exnref type FIRST_VALUETYPE = 1, // This is always the beginning of the list. LAST_VALUETYPE = 130, // This always remains at the end of the list. @@ -811,7 +811,7 @@ namespace llvm { case v1024f32: return 32768; case v2048i32: case v2048f32: return 65536; - case ExceptRef: return 0; // opaque type + case exnref: return 0; // opaque type } } diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp index ac08877eb6648..a911cdcbec9db 100644 --- a/llvm/lib/CodeGen/ValueTypes.cpp +++ b/llvm/lib/CodeGen/ValueTypes.cpp @@ -207,7 +207,7 @@ std::string EVT::getEVTString() const { case MVT::v8f64: return "v8f64"; case MVT::Metadata:return "Metadata"; case MVT::Untyped: return "Untyped"; - case MVT::ExceptRef: return "ExceptRef"; + case MVT::exnref : return "exnref"; } } diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp index e9a7f6977c2d3..09628e872dd56 100644 --- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp +++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp @@ -308,8 +308,8 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser { Type == "i32x4" || Type == "i64x2" || Type == "f32x4" || Type == "f64x2") return wasm::ValType::V128; - if (Type == "except_ref") - return wasm::ValType::EXCEPT_REF; + if (Type == "exnref") + return wasm::ValType::EXNREF; return Optional(); } @@ -320,7 +320,7 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser { .Case("f32", WebAssembly::ExprType::F32) .Case("f64", WebAssembly::ExprType::F64) .Case("v128", WebAssembly::ExprType::V128) - .Case("except_ref", WebAssembly::ExprType::ExceptRef) + .Case("exnref", WebAssembly::ExprType::Exnref) .Case("void", WebAssembly::ExprType::Void) .Default(WebAssembly::ExprType::Invalid); } diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp index cfa808b750add..a439b724d9674 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp @@ -282,8 +282,8 @@ const char *llvm::WebAssembly::anyTypeToString(unsigned Ty) { return "funcref"; case wasm::WASM_TYPE_FUNC: return "func"; - case wasm::WASM_TYPE_EXCEPT_REF: - return "except_ref"; + case wasm::WASM_TYPE_EXNREF: + return "exnref"; case wasm::WASM_TYPE_NORESULT: return "void"; default: diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp index 21545c39164e9..9c8ca1f13b184 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp @@ -146,8 +146,8 @@ wasm::ValType WebAssembly::toValType(const MVT &Ty) { case MVT::v4f32: case MVT::v2f64: return wasm::ValType::V128; - case MVT::ExceptRef: - return wasm::ValType::EXCEPT_REF; + case MVT::exnref: + return wasm::ValType::EXNREF; default: llvm_unreachable("unexpected type"); } diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h index a0d526b8a2e03..31ad88b3549c7 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -130,7 +130,7 @@ enum class ExprType : unsigned { F32 = 0x7D, F64 = 0x7C, V128 = 0x7B, - ExceptRef = 0x68, + Exnref = 0x68, Invalid = 0x00 }; @@ -403,8 +403,8 @@ inline bool isCopy(unsigned Opc) { case WebAssembly::COPY_F64_S: case WebAssembly::COPY_V128: case WebAssembly::COPY_V128_S: - case WebAssembly::COPY_EXCEPT_REF: - case WebAssembly::COPY_EXCEPT_REF_S: + case WebAssembly::COPY_EXNREF: + case WebAssembly::COPY_EXNREF_S: return true; default: return false; @@ -453,8 +453,8 @@ inline bool isCallDirect(unsigned Opc) { case WebAssembly::CALL_v4f32_S: case WebAssembly::CALL_v2f64: case WebAssembly::CALL_v2f64_S: - case WebAssembly::CALL_ExceptRef: - case WebAssembly::CALL_ExceptRef_S: + case WebAssembly::CALL_exnref: + case WebAssembly::CALL_exnref_S: case WebAssembly::RET_CALL: case WebAssembly::RET_CALL_S: return true; @@ -487,8 +487,8 @@ inline bool isCallIndirect(unsigned Opc) { case WebAssembly::CALL_INDIRECT_v4f32_S: case WebAssembly::CALL_INDIRECT_v2f64: case WebAssembly::CALL_INDIRECT_v2f64_S: - case WebAssembly::CALL_INDIRECT_ExceptRef: - case WebAssembly::CALL_INDIRECT_ExceptRef_S: + case WebAssembly::CALL_INDIRECT_exnref: + case WebAssembly::CALL_INDIRECT_exnref_S: case WebAssembly::RET_CALL_INDIRECT: case WebAssembly::RET_CALL_INDIRECT_S: return true; @@ -530,8 +530,8 @@ inline unsigned getCalleeOpNo(unsigned Opc) { case WebAssembly::CALL_v4f32_S: case WebAssembly::CALL_v2f64: case WebAssembly::CALL_v2f64_S: - case WebAssembly::CALL_ExceptRef: - case WebAssembly::CALL_ExceptRef_S: + case WebAssembly::CALL_exnref: + case WebAssembly::CALL_exnref_S: case WebAssembly::CALL_INDIRECT_i32: case WebAssembly::CALL_INDIRECT_i32_S: case WebAssembly::CALL_INDIRECT_i64: @@ -552,8 +552,8 @@ inline unsigned getCalleeOpNo(unsigned Opc) { case WebAssembly::CALL_INDIRECT_v4f32_S: case WebAssembly::CALL_INDIRECT_v2f64: case WebAssembly::CALL_INDIRECT_v2f64_S: - case WebAssembly::CALL_INDIRECT_ExceptRef: - case WebAssembly::CALL_INDIRECT_ExceptRef_S: + case WebAssembly::CALL_INDIRECT_exnref: + case WebAssembly::CALL_INDIRECT_exnref_S: return 1; default: llvm_unreachable("Not a call instruction"); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp index a23a47d2e89ad..e6bfc5226e2eb 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp @@ -308,7 +308,7 @@ void WebAssemblyCFGStackify::placeBlockMarker(MachineBasicBlock &MBB) { // Add the BLOCK. - // 'br_on_exn' extracts except_ref object and pushes variable number of values + // 'br_on_exn' extracts exnref object and pushes variable number of values // depending on its tag. For C++ exception, its a single i32 value, and the // generated code will be in the form of: // block i32 @@ -766,11 +766,11 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) { // Note that the new wrapping block/end_block will be generated later in // placeBlockMarker. // - // TODO Currently local.set and local.gets are generated to move except_ref - // value created by catches. That's because we don't support yielding values - // from a block in LLVM machine IR yet, even though it is supported by wasm. - // Delete unnecessary local.get/local.sets once yielding values from a block - // is supported. The full EH spec requires multi-value support to do this, but + // TODO Currently local.set and local.gets are generated to move exnref value + // created by catches. That's because we don't support yielding values from a + // block in LLVM machine IR yet, even though it is supported by wasm. Delete + // unnecessary local.get/local.sets once yielding values from a block is + // supported. The full EH spec requires multi-value support to do this, but // for C++ we don't yet need it because we only throw a single i32. // // --- @@ -834,7 +834,7 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) { DenseMap> UnwindDestToTryRanges; // In new CFG, DenseMap> BrDestToTryRanges; - // In new CFG, + // In new CFG, DenseMap BrDestToExnReg; // Gather possibly throwing calls (i.e., previously invokes) whose current @@ -936,8 +936,7 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) { // of the function with a local.get and a rethrow instruction. if (NeedAppendixBlock) { auto *AppendixBB = getAppendixBlock(MF); - unsigned ExnReg = - MRI.createVirtualRegister(&WebAssembly::EXCEPT_REFRegClass); + unsigned ExnReg = MRI.createVirtualRegister(&WebAssembly::EXNREFRegClass); BuildMI(AppendixBB, DebugLoc(), TII.get(WebAssembly::RETHROW)) .addReg(ExnReg); // These instruction ranges should branch to this appendix BB. @@ -1225,8 +1224,8 @@ void WebAssemblyCFGStackify::fixEndsAtEndOfFunction(MachineFunction &MF) { case MVT::v2f64: RetType = WebAssembly::ExprType::V128; break; - case MVT::ExceptRef: - RetType = WebAssembly::ExprType::ExceptRef; + case MVT::exnref: + RetType = WebAssembly::ExprType::Exnref; break; default: llvm_unreachable("unexpected return type"); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp index 313147c943141..2537e6042b1e3 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp @@ -85,8 +85,8 @@ static unsigned getNonPseudoCallIndirectOpcode(const MachineInstr &MI) { return CALL_INDIRECT_v4f32; case PCALL_INDIRECT_v2f64: return CALL_INDIRECT_v2f64; - case PCALL_INDIRECT_ExceptRef: - return CALL_INDIRECT_ExceptRef; + case PCALL_INDIRECT_exnref: + return CALL_INDIRECT_exnref; case PRET_CALL_INDIRECT: return RET_CALL_INDIRECT; default: diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp index de7e912129fb6..dbd62179f055f 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp @@ -90,8 +90,8 @@ static unsigned getDropOpcode(const TargetRegisterClass *RC) { return WebAssembly::DROP_F64; if (RC == &WebAssembly::V128RegClass) return WebAssembly::DROP_V128; - if (RC == &WebAssembly::EXCEPT_REFRegClass) - return WebAssembly::DROP_EXCEPT_REF; + if (RC == &WebAssembly::EXNREFRegClass) + return WebAssembly::DROP_EXNREF; llvm_unreachable("Unexpected register class"); } @@ -107,8 +107,8 @@ static unsigned getLocalGetOpcode(const TargetRegisterClass *RC) { return WebAssembly::LOCAL_GET_F64; if (RC == &WebAssembly::V128RegClass) return WebAssembly::LOCAL_GET_V128; - if (RC == &WebAssembly::EXCEPT_REFRegClass) - return WebAssembly::LOCAL_GET_EXCEPT_REF; + if (RC == &WebAssembly::EXNREFRegClass) + return WebAssembly::LOCAL_GET_EXNREF; llvm_unreachable("Unexpected register class"); } @@ -124,8 +124,8 @@ static unsigned getLocalSetOpcode(const TargetRegisterClass *RC) { return WebAssembly::LOCAL_SET_F64; if (RC == &WebAssembly::V128RegClass) return WebAssembly::LOCAL_SET_V128; - if (RC == &WebAssembly::EXCEPT_REFRegClass) - return WebAssembly::LOCAL_SET_EXCEPT_REF; + if (RC == &WebAssembly::EXNREFRegClass) + return WebAssembly::LOCAL_SET_EXNREF; llvm_unreachable("Unexpected register class"); } @@ -141,8 +141,8 @@ static unsigned getLocalTeeOpcode(const TargetRegisterClass *RC) { return WebAssembly::LOCAL_TEE_F64; if (RC == &WebAssembly::V128RegClass) return WebAssembly::LOCAL_TEE_V128; - if (RC == &WebAssembly::EXCEPT_REFRegClass) - return WebAssembly::LOCAL_TEE_EXCEPT_REF; + if (RC == &WebAssembly::EXNREFRegClass) + return WebAssembly::LOCAL_TEE_EXNREF; llvm_unreachable("Unexpected register class"); } @@ -158,8 +158,8 @@ static MVT typeForRegClass(const TargetRegisterClass *RC) { return MVT::f64; if (RC == &WebAssembly::V128RegClass) return MVT::v16i8; - if (RC == &WebAssembly::EXCEPT_REFRegClass) - return MVT::ExceptRef; + if (RC == &WebAssembly::EXNREFRegClass) + return MVT::exnref; llvm_unreachable("unrecognized register class"); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp index 4fff49f54d765..1a24f749b5644 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -129,7 +129,7 @@ class WebAssemblyFastISel final : public FastISel { case MVT::i64: case MVT::f32: case MVT::f64: - case MVT::ExceptRef: + case MVT::exnref: return VT; case MVT::f16: return MVT::f32; @@ -698,9 +698,9 @@ bool WebAssemblyFastISel::fastLowerArguments() { Opc = WebAssembly::ARGUMENT_v2f64; RC = &WebAssembly::V128RegClass; break; - case MVT::ExceptRef: - Opc = WebAssembly::ARGUMENT_ExceptRef; - RC = &WebAssembly::EXCEPT_REFRegClass; + case MVT::exnref: + Opc = WebAssembly::ARGUMENT_exnref; + RC = &WebAssembly::EXNREFRegClass; break; default: return false; @@ -815,10 +815,10 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) { : WebAssembly::PCALL_INDIRECT_v2f64; ResultReg = createResultReg(&WebAssembly::V128RegClass); break; - case MVT::ExceptRef: - Opc = IsDirect ? WebAssembly::CALL_ExceptRef - : WebAssembly::PCALL_INDIRECT_ExceptRef; - ResultReg = createResultReg(&WebAssembly::EXCEPT_REFRegClass); + case MVT::exnref: + Opc = IsDirect ? WebAssembly::CALL_exnref + : WebAssembly::PCALL_INDIRECT_exnref; + ResultReg = createResultReg(&WebAssembly::EXNREFRegClass); break; default: return false; @@ -921,9 +921,9 @@ bool WebAssemblyFastISel::selectSelect(const Instruction *I) { Opc = WebAssembly::SELECT_F64; RC = &WebAssembly::F64RegClass; break; - case MVT::ExceptRef: - Opc = WebAssembly::SELECT_EXCEPT_REF; - RC = &WebAssembly::EXCEPT_REFRegClass; + case MVT::exnref: + Opc = WebAssembly::SELECT_EXNREF; + RC = &WebAssembly::EXNREFRegClass; break; default: return false; @@ -1341,8 +1341,8 @@ bool WebAssemblyFastISel::selectRet(const Instruction *I) { case MVT::v2f64: Opc = WebAssembly::RETURN_v2f64; break; - case MVT::ExceptRef: - Opc = WebAssembly::RETURN_EXCEPT_REF; + case MVT::exnref: + Opc = WebAssembly::RETURN_EXNREF; break; default: return false; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td index bcff9f23608bb..703c15d58c93a 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td @@ -59,7 +59,7 @@ defm "" : CALL; defm "" : CALL; defm "" : CALL; defm "" : CALL; -defm "" : CALL; +defm "" : CALL; defm "" : CALL; defm "" : CALL; defm "" : CALL; @@ -139,9 +139,8 @@ def : Pat<(v4f32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), (CALL_v4f32 tglobaladdr:$callee)>, Requires<[HasSIMD128]>; def : Pat<(v2f64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), (CALL_v2f64 tglobaladdr:$callee)>, Requires<[HasSIMD128]>; -def : Pat<(ExceptRef - (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), - (CALL_ExceptRef tglobaladdr:$callee)>, +def : Pat<(exnref (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), + (CALL_exnref tglobaladdr:$callee)>, Requires<[HasExceptionHandling]>; def : Pat<(WebAssemblycall0 (WebAssemblywrapper tglobaladdr:$callee)), (CALL_VOID tglobaladdr:$callee)>; @@ -169,9 +168,8 @@ def : Pat<(v4f32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), (CALL_v4f32 texternalsym:$callee)>, Requires<[HasSIMD128]>; def : Pat<(v2f64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), (CALL_v2f64 texternalsym:$callee)>, Requires<[HasSIMD128]>; -def : Pat<(ExceptRef - (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), - (CALL_ExceptRef texternalsym:$callee)>, +def : Pat<(exnref (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), + (CALL_exnref texternalsym:$callee)>, Requires<[HasExceptionHandling]>; def : Pat<(WebAssemblycall0 (WebAssemblywrapper texternalsym:$callee)), (CALL_VOID texternalsym:$callee)>; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td index 574cb09ff336e..1870c5bc34b06 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td @@ -114,7 +114,7 @@ let isReturn = 1 in { defm "": RETURN; defm "": RETURN; defm "": RETURN; - defm "": RETURN; + defm "": RETURN; defm "": SIMD_RETURN; defm "": SIMD_RETURN; defm "": SIMD_RETURN; @@ -144,8 +144,8 @@ defm THROW : I<(outs), (ins event_op:$tag, variable_ops), (outs), (ins event_op:$tag), [(WebAssemblythrow (WebAssemblywrapper texternalsym:$tag))], "throw \t$tag", "throw \t$tag", 0x08>; -defm RETHROW : I<(outs), (ins EXCEPT_REF:$exn), (outs), (ins), - [], "rethrow \t$exn", "rethrow", 0x09>; +defm RETHROW : I<(outs), (ins EXNREF:$exn), (outs), (ins), [], + "rethrow \t$exn", "rethrow", 0x09>; // Pseudo instruction to be the lowering target of int_wasm_rethrow_in_catch // intrinsic. Will be converted to the real rethrow instruction later. let isPseudo = 1 in @@ -161,15 +161,15 @@ defm END_TRY : NRI<(outs), (ins), [], "end_try", 0x0b>; // Catching an exception: catch / extract_exception let hasCtrlDep = 1, hasSideEffects = 1 in -defm CATCH : I<(outs EXCEPT_REF:$dst), (ins), (outs), (ins), [], +defm CATCH : I<(outs EXNREF:$dst), (ins), (outs), (ins), [], "catch \t$dst", "catch", 0x07>; // Querying / extracing exception: br_on_exn -// br_on_exn queries an except_ref to see if it matches the corresponding -// exception tag index. If true it branches to the given label and pushes the +// br_on_exn queries an exnref to see if it matches the corresponding exception +// tag index. If true it branches to the given label and pushes the // corresponding argument values of the exception onto the stack. let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in -defm BR_ON_EXN : I<(outs), (ins bb_op:$dst, event_op:$tag, EXCEPT_REF:$exn), +defm BR_ON_EXN : I<(outs), (ins bb_op:$dst, event_op:$tag, EXNREF:$exn), (outs), (ins bb_op:$dst, event_op:$tag), [], "br_on_exn \t$dst, $tag, $exn", "br_on_exn \t$dst, $tag", 0x0a>; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrExceptRef.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrExceptRef.td deleted file mode 100644 index 33a4f2519545d..0000000000000 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrExceptRef.td +++ /dev/null @@ -1,26 +0,0 @@ -// WebAssemblyInstrExceptRef.td-WebAssembly except_ref codegen --*- tablegen -*- -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// WebAssembly except_ref operand code-gen constructs. -/// -//===----------------------------------------------------------------------===// - -defm SELECT_EXCEPT_REF : I<(outs EXCEPT_REF:$dst), - (ins EXCEPT_REF:$lhs, EXCEPT_REF:$rhs, I32:$cond), - (outs), (ins), - [(set EXCEPT_REF:$dst, - (select I32:$cond, EXCEPT_REF:$lhs, - EXCEPT_REF:$rhs))], - "except_ref.select\t$dst, $lhs, $rhs, $cond", - "except_ref.select", 0x1b>; - -def : Pat<(select (i32 (setne I32:$cond, 0)), EXCEPT_REF:$lhs, EXCEPT_REF:$rhs), - (SELECT_EXCEPT_REF EXCEPT_REF:$lhs, EXCEPT_REF:$rhs, I32:$cond)>; -def : Pat<(select (i32 (seteq I32:$cond, 0)), EXCEPT_REF:$lhs, EXCEPT_REF:$rhs), - (SELECT_EXCEPT_REF EXCEPT_REF:$rhs, EXCEPT_REF:$lhs, I32:$cond)>; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp index ee6981135526b..d7022ce0bfba3 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp @@ -192,7 +192,7 @@ unsigned WebAssemblyInstrInfo::insertBranch( MachineFunction &MF = *MBB.getParent(); auto &MRI = MF.getRegInfo(); bool IsBrOnExn = Cond[1].isReg() && MRI.getRegClass(Cond[1].getReg()) == - &WebAssembly::EXCEPT_REFRegClass; + &WebAssembly::EXNREFRegClass; if (Cond[0].getImm()) { if (IsBrOnExn) { @@ -222,7 +222,7 @@ bool WebAssemblyInstrInfo::reverseBranchCondition( MachineFunction &MF = *Cond[1].getParent()->getParent()->getParent(); auto &MRI = MF.getRegInfo(); if (Cond[1].isReg() && - MRI.getRegClass(Cond[1].getReg()) == &WebAssembly::EXCEPT_REFRegClass) + MRI.getRegClass(Cond[1].getReg()) == &WebAssembly::EXNREFRegClass) return true; Cond.front() = MachineOperand::CreateImm(!Cond.front().getImm()); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td index 859cb9bcdfd97..73ddbe85d5511 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td @@ -224,7 +224,7 @@ defm "": ARGUMENT; defm "": ARGUMENT; defm "": ARGUMENT; defm "": ARGUMENT; -defm "": ARGUMENT; +defm "": ARGUMENT; // local.get and local.set are not generated by instruction selection; they // are implied by virtual register uses and defs. @@ -294,7 +294,7 @@ defm "" : LOCAL; defm "" : LOCAL; defm "" : LOCAL; defm "" : LOCAL, Requires<[HasSIMD128]>; -defm "" : LOCAL, Requires<[HasExceptionHandling]>; +defm "" : LOCAL, Requires<[HasExceptionHandling]>; let isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1 in { defm CONST_I32 : I<(outs I32:$res), (ins i32imm_op:$imm), @@ -345,5 +345,5 @@ include "WebAssemblyInstrConv.td" include "WebAssemblyInstrFloat.td" include "WebAssemblyInstrAtomics.td" include "WebAssemblyInstrSIMD.td" -include "WebAssemblyInstrExceptRef.td" +include "WebAssemblyInstrRef.td" include "WebAssemblyInstrBulkMemory.td" diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td new file mode 100644 index 0000000000000..afe89de60b361 --- /dev/null +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrRef.td @@ -0,0 +1,25 @@ +// WebAssemblyInstrRef.td - WebAssembly reference type codegen --*- tablegen -*- +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// WebAssembly refence type operand codegen constructs. +/// +//===----------------------------------------------------------------------===// + +defm SELECT_EXNREF : I<(outs EXNREF:$dst), + (ins EXNREF:$lhs, EXNREF:$rhs, I32:$cond), + (outs), (ins), + [(set EXNREF:$dst, + (select I32:$cond, EXNREF:$lhs, EXNREF:$rhs))], + "exnref.select\t$dst, $lhs, $rhs, $cond", + "exnref.select", 0x1b>; + +def : Pat<(select (i32 (setne I32:$cond, 0)), EXNREF:$lhs, EXNREF:$rhs), + (SELECT_EXNREF EXNREF:$lhs, EXNREF:$rhs, I32:$cond)>; +def : Pat<(select (i32 (seteq I32:$cond, 0)), EXNREF:$lhs, EXNREF:$rhs), + (SELECT_EXNREF EXNREF:$rhs, EXNREF:$lhs, I32:$cond)>; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp index 49258ded76622..e92b344302725 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp @@ -131,8 +131,7 @@ bool WebAssemblyLateEHPrepare::addCatches(MachineFunction &MF) { auto InsertPos = MBB.begin(); if (InsertPos->isEHLabel()) // EH pad starts with an EH label ++InsertPos; - unsigned DstReg = - MRI.createVirtualRegister(&WebAssembly::EXCEPT_REFRegClass); + unsigned DstReg = MRI.createVirtualRegister(&WebAssembly::EXNREFRegClass); BuildMI(MBB, InsertPos, MBB.begin()->getDebugLoc(), TII.get(WebAssembly::CATCH), DstReg); } @@ -209,23 +208,23 @@ bool WebAssemblyLateEHPrepare::removeUnnecessaryUnreachables( } // Wasm uses 'br_on_exn' instruction to check the tag of an exception. It takes -// except_ref type object returned by 'catch', and branches to the destination -// if it matches a given tag. We currently use __cpp_exception symbol to -// represent the tag for all C++ exceptions. +// exnref type object returned by 'catch', and branches to the destination if it +// matches a given tag. We currently use __cpp_exception symbol to represent the +// tag for all C++ exceptions. // // block $l (result i32) // ... -// ;; except_ref $e is on the stack at this point +// ;; exnref $e is on the stack at this point // br_on_exn $l $e ;; branch to $l with $e's arguments // ... // end // ;; Here we expect the extracted values are on top of the wasm value stack // ... Handle exception using values ... // -// br_on_exn takes an except_ref object and branches if it matches the given -// tag. There can be multiple br_on_exn instructions if we want to match for -// another tag, but for now we only test for __cpp_exception tag, and if it does -// not match, i.e., it is a foreign exception, we rethrow it. +// br_on_exn takes an exnref object and branches if it matches the given tag. +// There can be multiple br_on_exn instructions if we want to match for another +// tag, but for now we only test for __cpp_exception tag, and if it does not +// match, i.e., it is a foreign exception, we rethrow it. // // In the destination BB that's the target of br_on_exn, extracted exception // values (in C++'s case a single i32, which represents an exception pointer) @@ -279,13 +278,13 @@ bool WebAssemblyLateEHPrepare::addExceptionExtraction(MachineFunction &MF) { // - Before: // ehpad: - // %exnref:except_ref = catch + // %exnref:exnref = catch // %exn:i32 = extract_exception // ... use exn ... // // - After: // ehpad: - // %exnref:except_ref = catch + // %exnref:exnref = catch // br_on_exn %thenbb, $__cpp_exception, %exnref // br %elsebb // elsebb: @@ -317,14 +316,14 @@ bool WebAssemblyLateEHPrepare::addExceptionExtraction(MachineFunction &MF) { // // - Before: // ehpad: - // %exnref:except_ref = catch + // %exnref:exnref = catch // %exn:i32 = extract_exception // call @__clang_call_terminate(%exn) // unreachable // // - After: // ehpad: - // %exnref:except_ref = catch + // %exnref:exnref = catch // br_on_exn %thenbb, $__cpp_exception, %exnref // br %elsebb // elsebb: diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp index 31ba6f0e4c237..a120a6471014c 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -834,9 +834,9 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { // entering blocks, which is a part of multi-value proposal. // // Once we support live-in values of wasm blocks, this can be: - // catch ; push except_ref value onto stack - // block except_ref -> i32 - // br_on_exn $__cpp_exception ; pop the except_ref value + // catch ; push exnref value onto stack + // block exnref -> i32 + // br_on_exn $__cpp_exception ; pop the exnref value // end_block // // But because we don't support it yet, the catch instruction's dst diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td index 4d202f70caad7..6d3d6c723277d 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td @@ -43,7 +43,7 @@ def F64_0 : WebAssemblyReg<"%f64.0">; def V128_0: WebAssemblyReg<"%v128">; -def EXCEPT_REF_0 : WebAssemblyReg<"%except_ref.0">; +def EXNREF_0 : WebAssemblyReg<"%exnref.0">; // The value stack "register". This is an opaque entity which serves to order // uses and defs that must remain in LIFO order. @@ -64,4 +64,4 @@ def F32 : WebAssemblyRegClass<[f32], 32, (add F32_0)>; def F64 : WebAssemblyRegClass<[f64], 64, (add F64_0)>; def V128 : WebAssemblyRegClass<[v4f32, v2f64, v2i64, v4i32, v16i8, v8i16], 128, (add V128_0)>; -def EXCEPT_REF : WebAssemblyRegClass<[ExceptRef], 0, (add EXCEPT_REF_0)>; +def EXNREF : WebAssemblyRegClass<[exnref], 0, (add EXNREF_0)>; diff --git a/llvm/test/CodeGen/WebAssembly/exception.ll b/llvm/test/CodeGen/WebAssembly/exception.ll index 8f7687e2da485..fc61f4099e0ed 100644 --- a/llvm/test/CodeGen/WebAssembly/exception.ll +++ b/llvm/test/CodeGen/WebAssembly/exception.ll @@ -31,11 +31,11 @@ define void @test_throw(i8* %p) { ; CHECK: global.get ${{.+}}=, __stack_pointer ; CHECK: try ; CHECK: call foo -; CHECK: catch $[[EXCEPT_REF:[0-9]+]]= +; CHECK: catch $[[EXNREF:[0-9]+]]= ; CHECK: global.set __stack_pointer ; CHECK: block i32 -; CHECK: br_on_exn 0, __cpp_exception, $[[EXCEPT_REF]] -; CHECK: rethrow $[[EXCEPT_REF]] +; CHECK: br_on_exn 0, __cpp_exception, $[[EXNREF]] +; CHECK: rethrow $[[EXNREF]] ; CHECK: end_block ; CHECK: extract_exception $[[EXN:[0-9]+]]= ; CHECK-DAG: i32.store __wasm_lpad_context @@ -47,7 +47,7 @@ define void @test_throw(i8* %p) { ; CHECK: call __cxa_end_catch ; CHECK: br 1 ; CHECK: end_block -; CHECK: rethrow $[[EXCEPT_REF]] +; CHECK: rethrow $[[EXNREF]] ; CHECK: end_try define void @test_catch() personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) { entry: @@ -92,10 +92,10 @@ try.cont: ; preds = %entry, %catch ; CHECK-LABEL: test_cleanup: ; CHECK: try ; CHECK: call foo -; CHECK: catch $[[EXCEPT_REF:[0-9]+]]= +; CHECK: catch $[[EXNREF:[0-9]+]]= ; CHECK: global.set __stack_pointer ; CHECK: i32.call $drop=, _ZN4TempD2Ev -; CHECK: rethrow $[[EXCEPT_REF]] +; CHECK: rethrow $[[EXNREF]] ; CHECK: end_try define void @test_cleanup() personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) { entry: diff --git a/llvm/test/MC/Disassembler/WebAssembly/wasm.txt b/llvm/test/MC/Disassembler/WebAssembly/wasm.txt index e979bac69128c..08cc95434c16d 100644 --- a/llvm/test/MC/Disassembler/WebAssembly/wasm.txt +++ b/llvm/test/MC/Disassembler/WebAssembly/wasm.txt @@ -23,7 +23,7 @@ 0x11 0x80 0x01 0x00 # CHECK: call 0 -# CHECK-NOT: except_ref.call 0 +# CHECK-NOT: exnref.call 0 0x10 0x00 # CHECK: local.get 128 diff --git a/llvm/test/MC/WebAssembly/basic-assembly.s b/llvm/test/MC/WebAssembly/basic-assembly.s index c3b7e9da25de4..23b1a0940f637 100644 --- a/llvm/test/MC/WebAssembly/basic-assembly.s +++ b/llvm/test/MC/WebAssembly/basic-assembly.s @@ -70,7 +70,7 @@ test0: # TODO: enable once instruction has been added. #i32x4.trunc_sat_f32x4_s i32.trunc_f32_s - try except_ref + try exnref i32.atomic.load 0 atomic.notify 0 .LBB0_3: @@ -172,7 +172,7 @@ test0: # CHECK-NEXT: end_if # CHECK-NEXT: f32x4.add # CHECK-NEXT: i32.trunc_f32_s -# CHECK-NEXT: try except_ref +# CHECK-NEXT: try exnref # CHECK-NEXT: i32.atomic.load 0 # CHECK-NEXT: atomic.notify 0 # CHECK-NEXT: .LBB0_3: diff --git a/llvm/test/MC/WebAssembly/objdump.s b/llvm/test/MC/WebAssembly/objdump.s index f1cedc7db86e9..4030ba9c2c76f 100644 --- a/llvm/test/MC/WebAssembly/objdump.s +++ b/llvm/test/MC/WebAssembly/objdump.s @@ -9,7 +9,7 @@ test0: test1: .functype test1 (i32, i64) -> (i32) - .local i32, i64, except_ref + .local i32, i64, exnref local.get 3 end_function @@ -21,6 +21,6 @@ test1: # CHECK-NEXT: 9: 20 02 local.get 2 # CHECK-NEXT: b: 0b end # CHECK-LABEL: test1: -# CHECK-NEXT: .local i32, i64, except_ref +# CHECK-NEXT: .local i32, i64, exnref # CHECK-NEXT: 14: 20 03 local.get 3 # CHECK-NEXT: 16: 0b end diff --git a/llvm/unittests/Target/WebAssembly/WebAssemblyExceptionInfoTest.cpp b/llvm/unittests/Target/WebAssembly/WebAssemblyExceptionInfoTest.cpp index ec946379b1e9e..49a469bdef789 100644 --- a/llvm/unittests/Target/WebAssembly/WebAssemblyExceptionInfoTest.cpp +++ b/llvm/unittests/Target/WebAssembly/WebAssemblyExceptionInfoTest.cpp @@ -100,14 +100,14 @@ body: | ; predecessors: %bb.0 successors: %bb.3, %bb.9 liveins: $value_stack - %0:except_ref = CATCH implicit-def $arguments + %0:exnref = CATCH implicit-def $arguments CLEANUPRET implicit-def dead $arguments bb.3 (landing-pad): ; predecessors: %bb.2 successors: %bb.4, %bb.6 liveins: $value_stack - %1:except_ref = CATCH implicit-def $arguments + %1:exnref = CATCH implicit-def $arguments BR_IF %bb.4, %58:i32, implicit-def $arguments, implicit-def $value_stack, implicit $value_stack BR %bb.6, implicit-def $arguments @@ -138,13 +138,13 @@ body: | ; predecessors: %bb.4 successors: %bb.9 liveins: $value_stack - %2:except_ref = CATCH implicit-def $arguments + %2:exnref = CATCH implicit-def $arguments CLEANUPRET implicit-def dead $arguments bb.9 (landing-pad): ; predecessors: %bb.2, %bb.6, %bb.8 liveins: $value_stack - %3:except_ref = CATCH implicit-def $arguments + %3:exnref = CATCH implicit-def $arguments CLEANUPRET implicit-def dead $arguments bb.10: @@ -257,7 +257,7 @@ body: | ; predecessors: %bb.0 successors: %bb.2, %bb.8 liveins: $value_stack - %0:except_ref = CATCH implicit-def $arguments + %0:exnref = CATCH implicit-def $arguments BR_IF %bb.2, %32:i32, implicit-def $arguments, implicit-def $value_stack, implicit $value_stack BR %bb.8, implicit-def $arguments @@ -271,7 +271,7 @@ body: | ; predecessors: %bb.2 successors: %bb.4, %bb.6 liveins: $value_stack - %1:except_ref = CATCH implicit-def $arguments + %1:exnref = CATCH implicit-def $arguments BR_IF %bb.4, %43:i32, implicit-def $arguments, implicit-def $value_stack, implicit $value_stack BR %bb.6, implicit-def $arguments @@ -313,13 +313,13 @@ body: | ; predecessors: %bb.4 successors: %bb.11 liveins: $value_stack - %2:except_ref = CATCH implicit-def $arguments + %2:exnref = CATCH implicit-def $arguments CLEANUPRET implicit-def dead $arguments bb.11 (landing-pad): ; predecessors: %bb.2, %bb.6, %bb.10 liveins: $value_stack - %3:except_ref = CATCH implicit-def $arguments + %3:exnref = CATCH implicit-def $arguments CLEANUPRET implicit-def dead $arguments bb.12: diff --git a/llvm/utils/TableGen/CodeGenTarget.cpp b/llvm/utils/TableGen/CodeGenTarget.cpp index dc45b1d5d35b8..702317283f908 100644 --- a/llvm/utils/TableGen/CodeGenTarget.cpp +++ b/llvm/utils/TableGen/CodeGenTarget.cpp @@ -191,7 +191,7 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) { case MVT::iPTR: return "MVT::iPTR"; case MVT::iPTRAny: return "MVT::iPTRAny"; case MVT::Untyped: return "MVT::Untyped"; - case MVT::ExceptRef: return "MVT::ExceptRef"; + case MVT::exnref: return "MVT::exnref"; default: llvm_unreachable("ILLEGAL VALUE TYPE!"); } } From 199f8721e6a17242ded2f7a0e11211f2d6a69f75 Mon Sep 17 00:00:00 2001 From: Bob Haarman Date: Mon, 15 Jul 2019 22:50:04 +0000 Subject: [PATCH 178/451] add -fthinlto-index= option to clang-cl Summary: This adds a -fthinlto-index= option to clang-cl, which allows it to be used to drive ThinLTO backend passes. This allows clang-cl to be used for distributed ThinLTO. Reviewers: tejohnson, pcc, rnk Subscribers: mehdi_amini, steven_wu, dexonsmith, arphaman, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D64458 llvm-svn: 366146 --- clang/include/clang/Driver/Options.td | 2 +- clang/test/Driver/cl-thinlto-backend.c | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 clang/test/Driver/cl-thinlto-backend.c diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 957483c318647..dfd27fab796e3 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1270,7 +1270,7 @@ def flto_jobs_EQ : Joined<["-"], "flto-jobs=">, "of 0 means the number of threads will be derived from " "the number of CPUs detected)">; def fthinlto_index_EQ : Joined<["-"], "fthinlto-index=">, - Flags<[CC1Option]>, Group, + Flags<[CoreOption, CC1Option]>, Group, HelpText<"Perform ThinLTO importing using provided function summary index">; def fmacro_backtrace_limit_EQ : Joined<["-"], "fmacro-backtrace-limit=">, Group, Flags<[DriverOption, CoreOption]>; diff --git a/clang/test/Driver/cl-thinlto-backend.c b/clang/test/Driver/cl-thinlto-backend.c new file mode 100644 index 0000000000000..a948c4ea33d9c --- /dev/null +++ b/clang/test/Driver/cl-thinlto-backend.c @@ -0,0 +1,9 @@ +// RUN: %clang_cl -c -flto=thin -Fo%t.obj %s +// RUN: llvm-lto2 run -thinlto-distributed-indexes -o %t.exe %t.obj + +// -fthinlto_index should be passed to cc1 +// RUN: %clang_cl -### -c -fthinlto-index=%t.thinlto.bc -Fo%t1.obj \ +// RUN: %t.obj 2>&1 | FileCheck %s + +// CHECK: -fthinlto-index= +// CHECK: "-x" "ir" From a28dcf693d15119cf8be96ce66f97bdf8d373eb6 Mon Sep 17 00:00:00 2001 From: Yuanfang Chen Date: Mon, 15 Jul 2019 22:52:01 +0000 Subject: [PATCH 179/451] [llvm-readelf] Print "File: lib.a(file.o)" info when dumping archive files. Match GNU readelf. https://bugs.llvm.org/show_bug.cgi?id=35351 Reviewers: jhenderson, grimar, MaskRay, rupprecht Reviewed by: jhenderson, MaskRay, grimar Differential Revision: https://reviews.llvm.org/D64361 llvm-svn: 366147 --- llvm/test/tools/llvm-readobj/archive.test | 25 +++++++++++++++---- .../llvm-readobj/elf-dynamic-malformed.test | 1 + .../macho-universal-x86_64.i386.test | 18 ++++++++----- .../llvm-readobj/thin-archive-paths.test | 8 +++--- llvm/tools/llvm-readobj/llvm-readobj.cpp | 17 +++++++++---- 5 files changed, 49 insertions(+), 20 deletions(-) diff --git a/llvm/test/tools/llvm-readobj/archive.test b/llvm/test/tools/llvm-readobj/archive.test index 08b04fa4f03a0..1bb70d36270f3 100644 --- a/llvm/test/tools/llvm-readobj/archive.test +++ b/llvm/test/tools/llvm-readobj/archive.test @@ -2,39 +2,54 @@ # RUN: rm -f %t.a # RUN: llvm-ar rc %t.a %p/Inputs/trivial.obj.elf-x86-64 %p/Inputs/trivial.obj.elf-i386 %p/Inputs/trivial.obj.coff-arm -# RUN: llvm-readobj --all %t.a | FileCheck %s --check-prefixes=LLVM,COFF -# RUN: llvm-readelf --all %t.a | FileCheck %s --check-prefixes=GNU,COFF +# RUN: llvm-readobj --all %t.a | FileCheck %s -DARFILE="%t.a" --check-prefixes=HEADER,LLVM,COFF +# RUN: llvm-readelf --all %t.a | FileCheck %s -DARFILE="%t.a" --check-prefixes=HEADER,GNU,COFF -# LLVM: File: trivial.obj.elf-x86-64 +# LLVM: {{^$}} +# LLVM-NEXT: File: [[ARFILE]](trivial.obj.elf-x86-64) # LLVM: Format: ELF64-x86-64 # LLVM: ElfHeader { # LLVM: Machine: EM_X86_64 # LLVM: Sections [ # LLVM: Relocations [ # LLVM: Symbols [ +# LLVM: Notes [ +# LLVM: ] +# GNU: {{^$}} +# GNU-NEXT: File: [[ARFILE]](trivial.obj.elf-x86-64) # GNU: ELF Header: # GNU: Machine: Advanced Micro Devices X86-64 # GNU: Section Headers: # GNU: Relocation section '.rela.text' # GNU: Symbol table '.symtab' +# GNU: There are no section groups in this file. -# LLVM: File: trivial.obj.elf-i386 +# LLVM-EMPTY: +# LLVM-NEXT: File: [[ARFILE]](trivial.obj.elf-i386) # LLVM: Format: ELF32-i386 # LLVM: ElfHeader { # LLVM: Machine: EM_386 # LLVM: Sections [ # LLVM: Relocations [ # LLVM: Symbols [ +# LLVM: Notes [ +# LLVM: ] +# GNU-EMPTY: +# GNU-NEXT: File: [[ARFILE]](trivial.obj.elf-i386) # GNU: ELF Header: # GNU: Machine: Intel 80386 # GNU: Section Headers: # GNU: Relocation section '.rel.text' # GNU: Symbol table '.symtab' +# GNU: There are no section groups in this file. -# LLVM: File: trivial.obj.coff-arm +# LLVM-EMPTY: +# LLVM-NEXT: File: [[ARFILE]](trivial.obj.coff-arm) # LLVM: Format: COFF-ARM +# GNU-EMPTY: +# GNU-NEXT: File: [[ARFILE]](trivial.obj.coff-arm) # COFF: ImageFileHeader { # COFF: Machine: IMAGE_FILE_MACHINE_ARMNT # COFF: Sections [ diff --git a/llvm/test/tools/llvm-readobj/elf-dynamic-malformed.test b/llvm/test/tools/llvm-readobj/elf-dynamic-malformed.test index c8aa8d58eec92..e78e1affd5558 100644 --- a/llvm/test/tools/llvm-readobj/elf-dynamic-malformed.test +++ b/llvm/test/tools/llvm-readobj/elf-dynamic-malformed.test @@ -20,6 +20,7 @@ # WARN-GNU-NOT: warning # WARN-GNU: warning: invalid section size (4) or entity size (16) +# WARN-GNU-EMPTY: # WARN-GNU-NEXT: ELF Header: # WARN-GNU: Symbol table '.symtab' contains 1 entries: # WARN-GNU: 0: diff --git a/llvm/test/tools/llvm-readobj/macho-universal-x86_64.i386.test b/llvm/test/tools/llvm-readobj/macho-universal-x86_64.i386.test index b13bd455b175f..dc03ab367b14e 100644 --- a/llvm/test/tools/llvm-readobj/macho-universal-x86_64.i386.test +++ b/llvm/test/tools/llvm-readobj/macho-universal-x86_64.i386.test @@ -5,10 +5,12 @@ RUN: llvm-readobj --sections %p/Inputs/macho-universal.x86_64.i386 \ RUN: | FileCheck %s -check-prefix MULTISECTIONS RUN: llvm-readobj -h %p/Inputs/macho-universal-archive.x86_64.i386 \ -RUN: | FileCheck %s -check-prefix MULTIHEADER-ARCHIVE +RUN: | FileCheck %s -check-prefix MULTIHEADER-ARCHIVE \ +RUN: -DARFILE="%p/Inputs/macho-universal-archive.x86_64.i386" RUN: llvm-readobj --sections %p/Inputs/macho-universal-archive.x86_64.i386 \ -RUN: | FileCheck %s -check-prefix MULTISECTIONS-ARCHIVE +RUN: | FileCheck %s -check-prefix MULTISECTIONS-ARCHIVE \ +RUN: -DARFILE="%p/Inputs/macho-universal-archive.x86_64.i386" MULTIHEADER: Format: Mach-O 64-bit x86-64 MULTIHEADER: Arch: x86_64 @@ -146,7 +148,8 @@ MULTISECTIONS: Reserved2: 0x0 MULTISECTIONS: } MULTISECTIONS: ] -MULTIHEADER-ARCHIVE: File: hello.o +MULTIHEADER-ARCHIVE: {{^$}} +MULTIHEADER-ARCHIVE-NEXT: File: [[ARFILE]](hello.o) MULTIHEADER-ARCHIVE: Format: Mach-O 64-bit x86-64 MULTIHEADER-ARCHIVE: Arch: x86_64 MULTIHEADER-ARCHIVE: AddressSize: 64bit @@ -162,7 +165,8 @@ MULTIHEADER-ARCHIVE: MH_SUBSECTIONS_VIA_SYMBOLS (0x2000) MULTIHEADER-ARCHIVE: ] MULTIHEADER-ARCHIVE: Reserved: 0x0 MULTIHEADER-ARCHIVE: } -MULTIHEADER-ARCHIVE: File: foo.o +MULTIHEADER-ARCHIVE-EMPTY: +MULTIHEADER-ARCHIVE-NEXT: File: [[ARFILE]](foo.o) MULTIHEADER-ARCHIVE: Format: Mach-O 32-bit i386 MULTIHEADER-ARCHIVE: Arch: i386 MULTIHEADER-ARCHIVE: AddressSize: 32bit @@ -178,7 +182,8 @@ MULTIHEADER-ARCHIVE: MH_SUBSECTIONS_VIA_SYMBOLS (0x2000) MULTIHEADER-ARCHIVE: ] MULTIHEADER-ARCHIVE: } -MULTISECTIONS-ARCHIVE: File: hello.o +MULTISECTIONS-ARCHIVE: {{^$}} +MULTISECTIONS-ARCHIVE-NEXT: File: [[ARFILE]](hello.o) MULTISECTIONS-ARCHIVE: Format: Mach-O 64-bit x86-64 MULTISECTIONS-ARCHIVE: Arch: x86_64 MULTISECTIONS-ARCHIVE: AddressSize: 64bit @@ -254,7 +259,8 @@ MULTISECTIONS-ARCHIVE: Reserved1: 0x0 MULTISECTIONS-ARCHIVE: Reserved2: 0x0 MULTISECTIONS-ARCHIVE: } MULTISECTIONS-ARCHIVE: ] -MULTISECTIONS-ARCHIVE: File: foo.o +MULTISECTIONS-ARCHIVE-EMPTY: +MULTISECTIONS-ARCHIVE-NEXT: File: [[ARFILE]](foo.o) MULTISECTIONS-ARCHIVE: Format: Mach-O 32-bit i386 MULTISECTIONS-ARCHIVE: Arch: i386 MULTISECTIONS-ARCHIVE: AddressSize: 32bit diff --git a/llvm/test/tools/llvm-readobj/thin-archive-paths.test b/llvm/test/tools/llvm-readobj/thin-archive-paths.test index d7a971eb303d8..2e2ec56b99da5 100644 --- a/llvm/test/tools/llvm-readobj/thin-archive-paths.test +++ b/llvm/test/tools/llvm-readobj/thin-archive-paths.test @@ -8,8 +8,8 @@ # RUN: llvm-ar rcT a/relative.a a/b/1.o # Show that relative paths in the file header printing look sensible. -# RUN: llvm-readobj --file-headers a/relative.a | FileCheck %s --check-prefix=REL -# REL: File: b/1.o +# RUN: llvm-readobj --file-headers a/relative.a | FileCheck %s -DARFILE="a/relative.a" --check-prefix=REL +# REL: File: [[ARFILE]](b/1.o) # Show that relative paths in an error message for both archive and member look # sensible. @@ -23,8 +23,8 @@ # RUN: llvm-ar rcT c/absolute.a %t/a/b/1.o # Show that absolute paths in the file header printing are correct. -# RUN: llvm-readobj --file-headers c/absolute.a | FileCheck %s --check-prefix=ABS -DDIR=%/t -# ABS: File: [[DIR]]/a/b/1.o +# RUN: llvm-readobj --file-headers c/absolute.a | FileCheck %s --check-prefix=ABS -DARFILE="c/absolute.a" -DDIR=%/t +# ABS: File: [[ARFILE]]([[DIR]]/a/b/1.o) # Show that absolute paths in an error message for both archive and member are correct. # RUN: rm a/b/1.o diff --git a/llvm/tools/llvm-readobj/llvm-readobj.cpp b/llvm/tools/llvm-readobj/llvm-readobj.cpp index f00d94ee5c42c..b6d0493af700d 100644 --- a/llvm/tools/llvm-readobj/llvm-readobj.cpp +++ b/llvm/tools/llvm-readobj/llvm-readobj.cpp @@ -462,20 +462,27 @@ static std::error_code createDumper(const ObjectFile *Obj, } /// Dumps the specified object file. -static void dumpObject(const ObjectFile *Obj, ScopedPrinter &Writer) { +static void dumpObject(const ObjectFile *Obj, ScopedPrinter &Writer, + const Archive *A = nullptr) { + std::string FileStr = + A ? Twine(A->getFileName() + "(" + Obj->getFileName() + ")").str() + : Obj->getFileName().str(); + std::unique_ptr Dumper; if (std::error_code EC = createDumper(Obj, Writer, Dumper)) - reportError(Obj->getFileName(), EC); + reportError(FileStr, EC); + Writer.startLine() << "\n"; if (opts::Output == opts::LLVM) { - Writer.startLine() << "\n"; - Writer.printString("File", Obj->getFileName()); + Writer.printString("File", FileStr); Writer.printString("Format", Obj->getFileFormatName()); Writer.printString("Arch", Triple::getArchTypeName( (llvm::Triple::ArchType)Obj->getArch())); Writer.printString("AddressSize", formatv("{0}bit", 8 * Obj->getBytesInAddress())); Dumper->printLoadName(); + } else if (opts::Output == opts::GNU && A) { + Writer.printString("File", FileStr); } if (opts::FileHeaders) @@ -589,7 +596,7 @@ static void dumpArchive(const Archive *Arc, ScopedPrinter &Writer) { continue; } if (ObjectFile *Obj = dyn_cast(&*ChildOrErr.get())) - dumpObject(Obj, Writer); + dumpObject(Obj, Writer, Arc); else if (COFFImportFile *Imp = dyn_cast(&*ChildOrErr.get())) dumpCOFFImportFile(Imp, Writer); else From b5701710a4297040b8d80eaf444d560aeba0867c Mon Sep 17 00:00:00 2001 From: Alex Langford Date: Mon, 15 Jul 2019 22:56:12 +0000 Subject: [PATCH 180/451] [LanguageRuntime] Move ObjCLanguageRuntime into a plugin Summary: Following up to my CPPLanguageRuntime change, I'm moving ObjCLanguageRuntime into a plugin as well. Reviewers: JDevlieghere, compnerd, jingham, clayborg Subscribers: mgorny, arphaman, lldb-commits Differential Revision: https://reviews.llvm.org/D64763 llvm-svn: 366148 --- .../DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp | 3 ++- .../MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp | 3 ++- .../Plugins/ExpressionParser/Clang/ClangASTSource.cpp | 3 ++- .../ExpressionParser/Clang/ClangExpressionDeclMap.cpp | 2 +- .../ExpressionParser/Clang/ClangExpressionParser.cpp | 3 ++- .../Plugins/ExpressionParser/Clang/IRDynamicChecks.cpp | 3 ++- lldb/source/Plugins/Language/ObjC/CF.cpp | 3 ++- lldb/source/Plugins/Language/ObjC/Cocoa.cpp | 1 - lldb/source/Plugins/Language/ObjC/Cocoa.h | 3 ++- lldb/source/Plugins/Language/ObjC/NSArray.cpp | 2 +- lldb/source/Plugins/Language/ObjC/NSDictionary.cpp | 1 - lldb/source/Plugins/Language/ObjC/NSError.cpp | 2 +- lldb/source/Plugins/Language/ObjC/NSException.cpp | 2 +- lldb/source/Plugins/Language/ObjC/NSIndexPath.cpp | 2 +- lldb/source/Plugins/Language/ObjC/NSSet.cpp | 1 - lldb/source/Plugins/Language/ObjC/NSString.h | 3 ++- lldb/source/Plugins/Language/ObjC/ObjCLanguage.cpp | 3 ++- .../ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.h | 3 ++- .../ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp | 3 ++- .../ObjC/AppleObjCRuntime/AppleObjCDeclVendor.h | 3 ++- .../ObjC/AppleObjCRuntime/AppleObjCRuntime.h | 3 ++- .../ObjC/AppleObjCRuntime/AppleObjCRuntimeV1.h | 3 ++- .../ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp | 3 ++- .../ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.h | 3 ++- .../ObjC/AppleObjCRuntime/AppleObjCTrampolineHandler.cpp | 3 ++- .../ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.h | 3 ++- .../AppleThreadPlanStepThroughObjCTrampoline.cpp | 3 ++- lldb/source/Plugins/LanguageRuntime/ObjC/CMakeLists.txt | 9 +++++++++ .../LanguageRuntime/ObjC}/ObjCLanguageRuntime.cpp | 3 ++- .../Plugins/LanguageRuntime/ObjC}/ObjCLanguageRuntime.h | 0 lldb/source/Symbol/CMakeLists.txt | 1 + lldb/source/Symbol/ClangASTContext.cpp | 2 +- lldb/source/Target/CMakeLists.txt | 1 - 33 files changed, 56 insertions(+), 30 deletions(-) rename lldb/source/{Target => Plugins/LanguageRuntime/ObjC}/ObjCLanguageRuntime.cpp (99%) rename lldb/{include/lldb/Target => source/Plugins/LanguageRuntime/ObjC}/ObjCLanguageRuntime.h (100%) diff --git a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp index ce12157688361..57d87eb145eb9 100644 --- a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp +++ b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp @@ -20,7 +20,6 @@ #include "lldb/Symbol/Function.h" #include "lldb/Symbol/ObjectFile.h" #include "lldb/Target/ABI.h" -#include "lldb/Target/ObjCLanguageRuntime.h" #include "lldb/Target/RegisterContext.h" #include "lldb/Target/StackFrame.h" #include "lldb/Target/Target.h" @@ -32,6 +31,8 @@ #include "lldb/Utility/Log.h" #include "lldb/Utility/State.h" +#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h" + //#define ENABLE_DEBUG_PRINTF // COMMENT THIS LINE OUT PRIOR TO CHECKIN #ifdef ENABLE_DEBUG_PRINTF #include diff --git a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp index 7d00380bfcd45..53424f018c52f 100644 --- a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp +++ b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp @@ -16,7 +16,6 @@ #include "lldb/Symbol/Function.h" #include "lldb/Symbol/ObjectFile.h" #include "lldb/Target/ABI.h" -#include "lldb/Target/ObjCLanguageRuntime.h" #include "lldb/Target/RegisterContext.h" #include "lldb/Target/StackFrame.h" #include "lldb/Target/Target.h" @@ -30,6 +29,8 @@ #include "DynamicLoaderDarwin.h" #include "DynamicLoaderMacOSXDYLD.h" +#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h" + //#define ENABLE_DEBUG_PRINTF // COMMENT THIS LINE OUT PRIOR TO CHECKIN #ifdef ENABLE_DEBUG_PRINTF #include diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp index a616a1de0c6da..8d29df9dde2db 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp @@ -20,12 +20,13 @@ #include "lldb/Symbol/SymbolFile.h" #include "lldb/Symbol/SymbolVendor.h" #include "lldb/Symbol/TaggedASTType.h" -#include "lldb/Target/ObjCLanguageRuntime.h" #include "lldb/Target/Target.h" #include "lldb/Utility/Log.h" #include "clang/AST/ASTContext.h" #include "clang/AST/RecordLayout.h" +#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h" + #include #include diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp index a1b8f4f0011fc..a49a7029e0d28 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp @@ -33,7 +33,6 @@ #include "lldb/Symbol/Variable.h" #include "lldb/Symbol/VariableList.h" #include "lldb/Target/ExecutionContext.h" -#include "lldb/Target/ObjCLanguageRuntime.h" #include "lldb/Target/Process.h" #include "lldb/Target/RegisterContext.h" #include "lldb/Target/StackFrame.h" @@ -53,6 +52,7 @@ #include "Plugins/Language/CPlusPlus/CPlusPlusLanguage.h" #include "Plugins/LanguageRuntime/CPlusPlus/CPPLanguageRuntime.h" +#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h" using namespace lldb; using namespace lldb_private; diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp index 1c7f931898666..7d13891ded8d2 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp @@ -78,7 +78,6 @@ #include "lldb/Symbol/SymbolVendor.h" #include "lldb/Target/ExecutionContext.h" #include "lldb/Target/Language.h" -#include "lldb/Target/ObjCLanguageRuntime.h" #include "lldb/Target/Process.h" #include "lldb/Target/Target.h" #include "lldb/Target/ThreadPlanCallFunction.h" @@ -90,6 +89,8 @@ #include "lldb/Utility/StreamString.h" #include "lldb/Utility/StringList.h" +#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h" + #include #include diff --git a/lldb/source/Plugins/ExpressionParser/Clang/IRDynamicChecks.cpp b/lldb/source/Plugins/ExpressionParser/Clang/IRDynamicChecks.cpp index 6d34a35ba2bc1..f8e004fe7d4ad 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/IRDynamicChecks.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/IRDynamicChecks.cpp @@ -18,13 +18,14 @@ #include "lldb/Expression/UtilityFunction.h" #include "lldb/Target/ExecutionContext.h" -#include "lldb/Target/ObjCLanguageRuntime.h" #include "lldb/Target/Process.h" #include "lldb/Target/StackFrame.h" #include "lldb/Target/Target.h" #include "lldb/Utility/ConstString.h" #include "lldb/Utility/Log.h" +#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h" + using namespace llvm; using namespace lldb_private; diff --git a/lldb/source/Plugins/Language/ObjC/CF.cpp b/lldb/source/Plugins/Language/ObjC/CF.cpp index d9b6881565374..5bca260616ea8 100644 --- a/lldb/source/Plugins/Language/ObjC/CF.cpp +++ b/lldb/source/Plugins/Language/ObjC/CF.cpp @@ -14,7 +14,6 @@ #include "lldb/DataFormatters/FormattersHelpers.h" #include "lldb/Symbol/ClangASTContext.h" #include "lldb/Target/Language.h" -#include "lldb/Target/ObjCLanguageRuntime.h" #include "lldb/Target/StackFrame.h" #include "lldb/Target/Target.h" #include "lldb/Utility/DataBufferHeap.h" @@ -22,6 +21,8 @@ #include "lldb/Utility/Status.h" #include "lldb/Utility/Stream.h" +#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h" + using namespace lldb; using namespace lldb_private; using namespace lldb_private::formatters; diff --git a/lldb/source/Plugins/Language/ObjC/Cocoa.cpp b/lldb/source/Plugins/Language/ObjC/Cocoa.cpp index 6c9d024a13213..ddf3953bb512a 100644 --- a/lldb/source/Plugins/Language/ObjC/Cocoa.cpp +++ b/lldb/source/Plugins/Language/ObjC/Cocoa.cpp @@ -17,7 +17,6 @@ #include "lldb/Host/Time.h" #include "lldb/Symbol/ClangASTContext.h" #include "lldb/Target/Language.h" -#include "lldb/Target/ObjCLanguageRuntime.h" #include "lldb/Target/Process.h" #include "lldb/Target/ProcessStructReader.h" #include "lldb/Target/Target.h" diff --git a/lldb/source/Plugins/Language/ObjC/Cocoa.h b/lldb/source/Plugins/Language/ObjC/Cocoa.h index 819b0ceb0e0f1..388e6f03aa0f9 100644 --- a/lldb/source/Plugins/Language/ObjC/Cocoa.h +++ b/lldb/source/Plugins/Language/ObjC/Cocoa.h @@ -13,9 +13,10 @@ #include "lldb/Core/ValueObject.h" #include "lldb/DataFormatters/TypeSummary.h" #include "lldb/DataFormatters/TypeSynthetic.h" -#include "lldb/Target/ObjCLanguageRuntime.h" #include "lldb/Utility/Stream.h" +#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h" + namespace lldb_private { namespace formatters { bool NSIndexSetSummaryProvider(ValueObject &valobj, Stream &stream, diff --git a/lldb/source/Plugins/Language/ObjC/NSArray.cpp b/lldb/source/Plugins/Language/ObjC/NSArray.cpp index b981b373cf272..404dabf2870c0 100644 --- a/lldb/source/Plugins/Language/ObjC/NSArray.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSArray.cpp @@ -11,13 +11,13 @@ #include "Cocoa.h" #include "Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntime.h" + #include "lldb/Core/ValueObject.h" #include "lldb/Core/ValueObjectConstResult.h" #include "lldb/DataFormatters/FormattersHelpers.h" #include "lldb/Expression/FunctionCaller.h" #include "lldb/Symbol/ClangASTContext.h" #include "lldb/Target/Language.h" -#include "lldb/Target/ObjCLanguageRuntime.h" #include "lldb/Target/Target.h" #include "lldb/Utility/DataBufferHeap.h" #include "lldb/Utility/Endian.h" diff --git a/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp b/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp index 601d777b13711..10f66c4a37f80 100644 --- a/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp @@ -19,7 +19,6 @@ #include "lldb/DataFormatters/FormattersHelpers.h" #include "lldb/Symbol/ClangASTContext.h" #include "lldb/Target/Language.h" -#include "lldb/Target/ObjCLanguageRuntime.h" #include "lldb/Target/StackFrame.h" #include "lldb/Target/Target.h" #include "lldb/Utility/DataBufferHeap.h" diff --git a/lldb/source/Plugins/Language/ObjC/NSError.cpp b/lldb/source/Plugins/Language/ObjC/NSError.cpp index 3804a71e40dce..97df3be72c843 100644 --- a/lldb/source/Plugins/Language/ObjC/NSError.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSError.cpp @@ -14,7 +14,6 @@ #include "lldb/Core/ValueObjectConstResult.h" #include "lldb/DataFormatters/FormattersHelpers.h" #include "lldb/Symbol/ClangASTContext.h" -#include "lldb/Target/ObjCLanguageRuntime.h" #include "lldb/Target/ProcessStructReader.h" #include "lldb/Target/Target.h" #include "lldb/Utility/DataBufferHeap.h" @@ -23,6 +22,7 @@ #include "lldb/Utility/Stream.h" #include "Plugins/Language/ObjC/NSString.h" +#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h" using namespace lldb; using namespace lldb_private; diff --git a/lldb/source/Plugins/Language/ObjC/NSException.cpp b/lldb/source/Plugins/Language/ObjC/NSException.cpp index eea34e61d47b0..931794a12ab17 100644 --- a/lldb/source/Plugins/Language/ObjC/NSException.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSException.cpp @@ -14,7 +14,6 @@ #include "lldb/Core/ValueObjectConstResult.h" #include "lldb/DataFormatters/FormattersHelpers.h" #include "lldb/Symbol/ClangASTContext.h" -#include "lldb/Target/ObjCLanguageRuntime.h" #include "lldb/Target/ProcessStructReader.h" #include "lldb/Target/Target.h" #include "lldb/Utility/DataBufferHeap.h" @@ -23,6 +22,7 @@ #include "lldb/Utility/Stream.h" #include "Plugins/Language/ObjC/NSString.h" +#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h" using namespace lldb; using namespace lldb_private; diff --git a/lldb/source/Plugins/Language/ObjC/NSIndexPath.cpp b/lldb/source/Plugins/Language/ObjC/NSIndexPath.cpp index a15650fdb08b2..9ee6021ae56bd 100644 --- a/lldb/source/Plugins/Language/ObjC/NSIndexPath.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSIndexPath.cpp @@ -13,10 +13,10 @@ #include "lldb/DataFormatters/FormattersHelpers.h" #include "lldb/DataFormatters/TypeSynthetic.h" #include "lldb/Symbol/ClangASTContext.h" -#include "lldb/Target/ObjCLanguageRuntime.h" #include "lldb/Target/Process.h" #include "lldb/Target/Target.h" +#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h" using namespace lldb; using namespace lldb_private; using namespace lldb_private::formatters; diff --git a/lldb/source/Plugins/Language/ObjC/NSSet.cpp b/lldb/source/Plugins/Language/ObjC/NSSet.cpp index f201526deef1a..ebaa990fb74b2 100644 --- a/lldb/source/Plugins/Language/ObjC/NSSet.cpp +++ b/lldb/source/Plugins/Language/ObjC/NSSet.cpp @@ -14,7 +14,6 @@ #include "lldb/DataFormatters/FormattersHelpers.h" #include "lldb/Symbol/ClangASTContext.h" #include "lldb/Target/Language.h" -#include "lldb/Target/ObjCLanguageRuntime.h" #include "lldb/Target/Target.h" #include "lldb/Utility/DataBufferHeap.h" #include "lldb/Utility/Endian.h" diff --git a/lldb/source/Plugins/Language/ObjC/NSString.h b/lldb/source/Plugins/Language/ObjC/NSString.h index 33052d47d56e9..699d8eb36f882 100644 --- a/lldb/source/Plugins/Language/ObjC/NSString.h +++ b/lldb/source/Plugins/Language/ObjC/NSString.h @@ -12,9 +12,10 @@ #include "lldb/Core/ValueObject.h" #include "lldb/DataFormatters/TypeSummary.h" -#include "lldb/Target/ObjCLanguageRuntime.h" #include "lldb/Utility/Stream.h" +#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h" + namespace lldb_private { namespace formatters { bool NSStringSummaryProvider(ValueObject &valobj, Stream &stream, diff --git a/lldb/source/Plugins/Language/ObjC/ObjCLanguage.cpp b/lldb/source/Plugins/Language/ObjC/ObjCLanguage.cpp index fc0c933e13f74..f9ab18688de72 100644 --- a/lldb/source/Plugins/Language/ObjC/ObjCLanguage.cpp +++ b/lldb/source/Plugins/Language/ObjC/ObjCLanguage.cpp @@ -16,13 +16,14 @@ #include "lldb/DataFormatters/FormattersHelpers.h" #include "lldb/Symbol/ClangASTContext.h" #include "lldb/Symbol/CompilerType.h" -#include "lldb/Target/ObjCLanguageRuntime.h" #include "lldb/Target/Target.h" #include "lldb/Utility/ConstString.h" #include "lldb/Utility/StreamString.h" #include "llvm/Support/Threading.h" +#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h" + #include "CF.h" #include "Cocoa.h" #include "CoreMedia.h" diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.h b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.h index 7738531c71a14..b8ba9dbb65f45 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.h +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.h @@ -12,9 +12,10 @@ #include #include "AppleObjCRuntimeV2.h" -#include "lldb/Target/ObjCLanguageRuntime.h" #include "lldb/lldb-private.h" +#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h" + namespace lldb_private { class ClassDescriptorV2 : public ObjCLanguageRuntime::ClassDescriptor { diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp index 501114ad02810..18f2a1829a419 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp @@ -9,10 +9,10 @@ #include "AppleObjCDeclVendor.h" #include "Plugins/ExpressionParser/Clang/ASTDumper.h" +#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h" #include "lldb/Core/Module.h" #include "lldb/Symbol/ClangExternalASTSourceCommon.h" #include "lldb/Symbol/ClangUtil.h" -#include "lldb/Target/ObjCLanguageRuntime.h" #include "lldb/Target/Process.h" #include "lldb/Target/Target.h" #include "lldb/Utility/Log.h" @@ -20,6 +20,7 @@ #include "clang/AST/ASTContext.h" #include "clang/AST/DeclObjC.h" + using namespace lldb_private; class lldb_private::AppleObjCExternalASTSource diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.h b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.h index 41e211f3bb984..77b30b7fde791 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.h +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.h @@ -11,9 +11,10 @@ #include "lldb/Symbol/ClangASTContext.h" #include "lldb/Symbol/DeclVendor.h" -#include "lldb/Target/ObjCLanguageRuntime.h" #include "lldb/lldb-private.h" +#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h" + namespace lldb_private { class AppleObjCExternalASTSource; diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntime.h b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntime.h index 694230e52d3c6..79ac53e1e440a 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntime.h +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntime.h @@ -14,9 +14,10 @@ #include "AppleObjCTrampolineHandler.h" #include "AppleThreadPlanStepThroughObjCTrampoline.h" #include "lldb/Target/LanguageRuntime.h" -#include "lldb/Target/ObjCLanguageRuntime.h" #include "lldb/lldb-private.h" +#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h" + namespace lldb_private { class AppleObjCRuntime : public lldb_private::ObjCLanguageRuntime { diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV1.h b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV1.h index f012439364c1f..6fdae63d4126e 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV1.h +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV1.h @@ -10,9 +10,10 @@ #define liblldb_AppleObjCRuntimeV1_h_ #include "AppleObjCRuntime.h" -#include "lldb/Target/ObjCLanguageRuntime.h" #include "lldb/lldb-private.h" +#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h" + namespace lldb_private { class AppleObjCRuntimeV1 : public AppleObjCRuntime { diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp index 38a4f9e4094e5..635eaff637bcb 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp @@ -42,7 +42,6 @@ #include "lldb/Symbol/VariableList.h" #include "lldb/Target/ABI.h" #include "lldb/Target/ExecutionContext.h" -#include "lldb/Target/ObjCLanguageRuntime.h" #include "lldb/Target/Platform.h" #include "lldb/Target/Process.h" #include "lldb/Target/RegisterContext.h" @@ -66,6 +65,8 @@ #include "clang/AST/ASTContext.h" #include "clang/AST/DeclObjC.h" +#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h" + #include using namespace lldb; diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.h b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.h index 358f9a9181fab..a0fd39dc03b20 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.h +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.h @@ -14,9 +14,10 @@ #include #include "AppleObjCRuntime.h" -#include "lldb/Target/ObjCLanguageRuntime.h" #include "lldb/lldb-private.h" +#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h" + class RemoteNXMapTable; namespace lldb_private { diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTrampolineHandler.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTrampolineHandler.cpp index 654dbf0e2409d..b3eb09caa86dc 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTrampolineHandler.cpp +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTrampolineHandler.cpp @@ -23,7 +23,6 @@ #include "lldb/Symbol/Symbol.h" #include "lldb/Target/ABI.h" #include "lldb/Target/ExecutionContext.h" -#include "lldb/Target/ObjCLanguageRuntime.h" #include "lldb/Target/Process.h" #include "lldb/Target/RegisterContext.h" #include "lldb/Target/Target.h" @@ -35,6 +34,8 @@ #include "llvm/ADT/STLExtras.h" +#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h" + #include using namespace lldb; diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.h b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.h index ade96dc4170d4..e576e8f283f20 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.h +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.h @@ -11,9 +11,10 @@ #include "clang/AST/ASTContext.h" -#include "lldb/Target/ObjCLanguageRuntime.h" #include "lldb/lldb-private.h" +#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h" + namespace lldb_utility { class StringLexer; } diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleThreadPlanStepThroughObjCTrampoline.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleThreadPlanStepThroughObjCTrampoline.cpp index 12b637bd9d0c2..d18435c9c6dbc 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleThreadPlanStepThroughObjCTrampoline.cpp +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleThreadPlanStepThroughObjCTrampoline.cpp @@ -13,13 +13,14 @@ #include "lldb/Expression/FunctionCaller.h" #include "lldb/Expression/UtilityFunction.h" #include "lldb/Target/ExecutionContext.h" -#include "lldb/Target/ObjCLanguageRuntime.h" #include "lldb/Target/Process.h" #include "lldb/Target/Thread.h" #include "lldb/Target/ThreadPlanRunToAddress.h" #include "lldb/Target/ThreadPlanStepOut.h" #include "lldb/Utility/Log.h" +#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h" + #include using namespace lldb; diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/CMakeLists.txt b/lldb/source/Plugins/LanguageRuntime/ObjC/CMakeLists.txt index af13dc6a144de..5b3ea2ff27fad 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/CMakeLists.txt +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/CMakeLists.txt @@ -1 +1,10 @@ +add_lldb_library(lldbPluginObjCRuntime PLUGIN + ObjCLanguageRuntime.cpp + + LINK_LIBS + lldbCore + lldbSymbol + lldbTarget + lldbUtility +) add_subdirectory(AppleObjCRuntime) diff --git a/lldb/source/Target/ObjCLanguageRuntime.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.cpp similarity index 99% rename from lldb/source/Target/ObjCLanguageRuntime.cpp rename to lldb/source/Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.cpp index 8e5d144e04192..631c15c46ce85 100644 --- a/lldb/source/Target/ObjCLanguageRuntime.cpp +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.cpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include "clang/AST/Type.h" +#include "ObjCLanguageRuntime.h" + #include "lldb/Core/MappedHash.h" #include "lldb/Core/Module.h" #include "lldb/Core/PluginManager.h" @@ -17,7 +19,6 @@ #include "lldb/Symbol/Type.h" #include "lldb/Symbol/TypeList.h" #include "lldb/Symbol/Variable.h" -#include "lldb/Target/ObjCLanguageRuntime.h" #include "lldb/Target/Target.h" #include "lldb/Utility/Log.h" #include "lldb/Utility/Timer.h" diff --git a/lldb/include/lldb/Target/ObjCLanguageRuntime.h b/lldb/source/Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h similarity index 100% rename from lldb/include/lldb/Target/ObjCLanguageRuntime.h rename to lldb/source/Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h diff --git a/lldb/source/Symbol/CMakeLists.txt b/lldb/source/Symbol/CMakeLists.txt index 96ccc25220acb..4b922c2178661 100644 --- a/lldb/source/Symbol/CMakeLists.txt +++ b/lldb/source/Symbol/CMakeLists.txt @@ -59,6 +59,7 @@ add_lldb_library(lldbSymbol lldbPluginSymbolFileDWARF lldbPluginSymbolFilePDB lldbPluginObjCLanguage + lldbPluginObjCRuntime LINK_COMPONENTS Support diff --git a/lldb/source/Symbol/ClangASTContext.cpp b/lldb/source/Symbol/ClangASTContext.cpp index 2d400476548e1..f85c5d2b9e05d 100644 --- a/lldb/source/Symbol/ClangASTContext.cpp +++ b/lldb/source/Symbol/ClangASTContext.cpp @@ -86,7 +86,6 @@ #include "lldb/Symbol/VerifyDecl.h" #include "lldb/Target/ExecutionContext.h" #include "lldb/Target/Language.h" -#include "lldb/Target/ObjCLanguageRuntime.h" #include "lldb/Target/Process.h" #include "lldb/Target/Target.h" #include "lldb/Utility/DataExtractor.h" @@ -95,6 +94,7 @@ #include "lldb/Utility/RegularExpression.h" #include "lldb/Utility/Scalar.h" +#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h" #include "Plugins/SymbolFile/DWARF/DWARFASTParserClang.h" #include "Plugins/SymbolFile/PDB/PDBASTParser.h" diff --git a/lldb/source/Target/CMakeLists.txt b/lldb/source/Target/CMakeLists.txt index af3d05b98b65b..2f59a4851b8fb 100644 --- a/lldb/source/Target/CMakeLists.txt +++ b/lldb/source/Target/CMakeLists.txt @@ -10,7 +10,6 @@ add_lldb_library(lldbTarget Memory.cpp MemoryHistory.cpp ModuleCache.cpp - ObjCLanguageRuntime.cpp OperatingSystem.cpp PathMappingList.cpp Platform.cpp From 1cf6922660187c93f9203ea63e90b02514f08e79 Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Mon, 15 Jul 2019 23:04:00 +0000 Subject: [PATCH 181/451] [WebAssembly] Add missing utility methods for exnref type Summary: This adds missing utility methods and copy instruction handling for `exnref` type and also adds tests. `tee` instruction tests are missing because `isTee` is currently only used in ExplicitLocals pass and testing that pass in mir requires serialization of stackified registers in mir files, which is a bit nontrivial because `MachineFunctionInfo` only has info of vreg numbers (which are large integers) but not the mir's register numbers. But this change is quite trivial anyway. Reviewers: tlively Subscribers: dschuff, sbc100, jgravelle-google, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64705 llvm-svn: 366149 --- .../MCTargetDesc/WebAssemblyMCTargetDesc.h | 4 ++ .../WebAssembly/WebAssemblyInstrInfo.cpp | 2 + .../test/CodeGen/WebAssembly/reg-argument.mir | 59 +++++++++++++++++++ llvm/test/CodeGen/WebAssembly/reg-copy.mir | 11 ++++ 4 files changed, 76 insertions(+) create mode 100644 llvm/test/CodeGen/WebAssembly/reg-argument.mir diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h index 31ad88b3549c7..7a9f59b1a4f2c 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -385,6 +385,8 @@ inline bool isArgument(unsigned Opc) { case WebAssembly::ARGUMENT_v4f32_S: case WebAssembly::ARGUMENT_v2f64: case WebAssembly::ARGUMENT_v2f64_S: + case WebAssembly::ARGUMENT_exnref: + case WebAssembly::ARGUMENT_exnref_S: return true; default: return false; @@ -423,6 +425,8 @@ inline bool isTee(unsigned Opc) { case WebAssembly::TEE_F64_S: case WebAssembly::TEE_V128: case WebAssembly::TEE_V128_S: + case WebAssembly::TEE_EXNREF: + case WebAssembly::TEE_EXNREF_S: return true; default: return false; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp index d7022ce0bfba3..a86c9af28f0d7 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp @@ -75,6 +75,8 @@ void WebAssemblyInstrInfo::copyPhysReg(MachineBasicBlock &MBB, CopyOpcode = WebAssembly::COPY_F64; else if (RC == &WebAssembly::V128RegClass) CopyOpcode = WebAssembly::COPY_V128; + else if (RC == &WebAssembly::EXNREFRegClass) + CopyOpcode = WebAssembly::COPY_EXNREF; else llvm_unreachable("Unexpected register class"); diff --git a/llvm/test/CodeGen/WebAssembly/reg-argument.mir b/llvm/test/CodeGen/WebAssembly/reg-argument.mir new file mode 100644 index 0000000000000..70c033f7f8f0f --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/reg-argument.mir @@ -0,0 +1,59 @@ +# RUN: llc -mtriple=wasm32-unknown-unknown %s -o - -run-pass wasm-argument-move | FileCheck %s + +# wasm-argument-move pass moves all ARGUMENT instructions to the top of the +# entry BB. +--- +name: argument_i32 +# CHECK-LABEL: argument_i32 +body: | + ; CHECK-LABEL: bb.0: + ; CHECK-NEXT: %1:i32 = ARGUMENT_i32 0 + bb.0: + %0:i32 = CONST_I32 0, implicit-def $arguments + %1:i32 = ARGUMENT_i32 0, implicit $arguments + RETURN_VOID implicit-def $arguments +... +--- +name: argument_i64 +# CHECK-LABEL: argument_i64 +body: | + ; CHECK-LABEL: bb.0: + ; CHECK-NEXT: %1:i64 = ARGUMENT_i64 0 + bb.0: + %0:i32 = CONST_I32 0, implicit-def $arguments + %1:i64 = ARGUMENT_i64 0, implicit $arguments + RETURN_VOID implicit-def $arguments +... +--- +name: argument_f32 +# CHECK-LABEL: argument_f32 +body: | + ; CHECK-LABEL: bb.0: + ; CHECK-NEXT: %1:f32 = ARGUMENT_f32 0 + bb.0: + %0:i32 = CONST_I32 0, implicit-def $arguments + %1:f32 = ARGUMENT_f32 0, implicit $arguments + RETURN_VOID implicit-def $arguments +... +--- +name: argument_f64 +# CHECK-LABEL: argument_f64 +body: | + ; CHECK-LABEL: bb.0: + ; CHECK-NEXT: %1:f64 = ARGUMENT_f64 0 + bb.0: + %0:i32 = CONST_I32 0, implicit-def $arguments + %1:f64 = ARGUMENT_f64 0, implicit $arguments + RETURN_VOID implicit-def $arguments +... +--- +name: argument_exnref +# CHECK-LABEL: argument_exnref +body: | + ; CHECK-LABEL: bb.0: + ; CHECK-NEXT: %1:exnref = ARGUMENT_exnref 0 + bb.0: + %0:i32 = CONST_I32 0, implicit-def $arguments + %1:exnref = ARGUMENT_exnref 0, implicit $arguments + RETURN_VOID implicit-def $arguments +... diff --git a/llvm/test/CodeGen/WebAssembly/reg-copy.mir b/llvm/test/CodeGen/WebAssembly/reg-copy.mir index 0a362699b8143..a077c347efdaa 100644 --- a/llvm/test/CodeGen/WebAssembly/reg-copy.mir +++ b/llvm/test/CodeGen/WebAssembly/reg-copy.mir @@ -55,3 +55,14 @@ body: | %0:v128 = COPY %1:v128 RETURN_VOID implicit-def $arguments ... +--- +name: copy_exnref +# CHECK-LABEL: copy_exnref +body: | + ; CHECK-LABEL: bb.0: + ; CHECK-NEXT: %0:exnref = COPY_EXNREF %1:exnref + ; CHECK-NEXT: RETURN_VOID + bb.0: + %0:exnref = COPY %1:exnref + RETURN_VOID implicit-def $arguments +... From c48162db994ab6040c45d468ea95772b574ab3ef Mon Sep 17 00:00:00 2001 From: Julian Lettner Date: Mon, 15 Jul 2019 23:05:14 +0000 Subject: [PATCH 182/451] [TSan] Fix asm token error (again) llvm-svn: 366150 --- compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc index 8e4ddc969e058..c387416c20dd7 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc +++ b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc @@ -421,7 +421,7 @@ static void InitializeLongjmpXorKey() { // 2. Retrieve vanilla/mangled SP. uptr sp; - asm("mov %0, %sp" : "=r" (sp)); + asm("mov %0, sp" : "=r" (sp)); uptr mangled_sp = ((uptr *)&env)[LONG_JMP_SP_ENV_SLOT]; // 3. xor SPs to obtain key. From 51193871dafd99e79d7d19f62cffbdcdda238530 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 15 Jul 2019 23:07:56 +0000 Subject: [PATCH 183/451] [X86] Teach convertToThreeAddress to handle SUB with immediate We mostly avoid sub with immediate but there are a couple cases that can create them. One is the add 128, %rax -> sub -128, %rax trick in isel. The other is when a SUB immediate gets created for a compare where both the flags and the subtract value is used. If we are unable to linearize the SelectionDAG to satisfy the flag user and the sub result user from the same instruction, we will clone the sub immediate for the two uses. The one that produces flags will eventually become a compare. The other will have its flag output dead, and could then be considered for LEA creation. I added additional test cases to add.ll to show the the sub -128 trick gets converted to LEA and a case where we don't need to convert it. This showed up in the current codegen for PR42571. Differential Revision: https://reviews.llvm.org/D64574 llvm-svn: 366151 --- llvm/lib/Target/X86/X86InstrArithmetic.td | 15 ++++--- llvm/lib/Target/X86/X86InstrInfo.cpp | 45 +++++++++++++++++++ llvm/test/CodeGen/X86/add.ll | 14 +++--- .../X86/bmi-intrinsics-fast-isel-x86_64.ll | 12 ++--- .../CodeGen/X86/bmi-intrinsics-fast-isel.ll | 28 +++++------- llvm/test/CodeGen/X86/cgp-usubo.ll | 4 +- .../X86/tbm-intrinsics-fast-isel-x86_64.ll | 3 +- .../CodeGen/X86/tbm-intrinsics-fast-isel.ll | 7 ++- 8 files changed, 81 insertions(+), 47 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td index fbdc55cb02556..e52635f8d48b9 100644 --- a/llvm/lib/Target/X86/X86InstrArithmetic.td +++ b/llvm/lib/Target/X86/X86InstrArithmetic.td @@ -924,11 +924,12 @@ class BinOpAI_F opcode, string mnemonic, X86TypeInfo typeinfo, multiclass ArithBinOp_RF BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, string mnemonic, Format RegMRM, Format MemMRM, SDNode opnodeflag, SDNode opnode, - bit CommutableRR, bit ConvertibleToThreeAddress> { + bit CommutableRR, bit ConvertibleToThreeAddress, + bit ConvertibleToThreeAddressRR> { let Defs = [EFLAGS] in { let Constraints = "$src1 = $dst" in { let isCommutable = CommutableRR in { - let isConvertibleToThreeAddress = ConvertibleToThreeAddress in { + let isConvertibleToThreeAddress = ConvertibleToThreeAddressRR in { def NAME#8rr : BinOpRR_RF; def NAME#16rr : BinOpRR_RF; def NAME#32rr : BinOpRR_RF; @@ -1169,16 +1170,16 @@ multiclass ArithBinOp_F BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, defm AND : ArithBinOp_RF<0x20, 0x22, 0x24, "and", MRM4r, MRM4m, - X86and_flag, and, 1, 0>; + X86and_flag, and, 1, 0, 0>; defm OR : ArithBinOp_RF<0x08, 0x0A, 0x0C, "or", MRM1r, MRM1m, - X86or_flag, or, 1, 0>; + X86or_flag, or, 1, 0, 0>; defm XOR : ArithBinOp_RF<0x30, 0x32, 0x34, "xor", MRM6r, MRM6m, - X86xor_flag, xor, 1, 0>; + X86xor_flag, xor, 1, 0, 0>; defm ADD : ArithBinOp_RF<0x00, 0x02, 0x04, "add", MRM0r, MRM0m, - X86add_flag, add, 1, 1>; + X86add_flag, add, 1, 1, 1>; let isCompare = 1 in { defm SUB : ArithBinOp_RF<0x28, 0x2A, 0x2C, "sub", MRM5r, MRM5m, - X86sub_flag, sub, 0, 0>; + X86sub_flag, sub, 0, 1, 0>; } // Arithmetic. diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index e5d3a09c291b9..dbe45356c42bf 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -1078,6 +1078,51 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::ADD16ri_DB: case X86::ADD16ri8_DB: return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp); + case X86::SUB8ri: + case X86::SUB16ri8: + case X86::SUB16ri: + /// FIXME: Support these similar to ADD8ri/ADD16ri*. + return nullptr; + case X86::SUB32ri8: + case X86::SUB32ri: { + int64_t Imm = MI.getOperand(2).getImm(); + if (!isInt<32>(-Imm)) + return nullptr; + + assert(MI.getNumOperands() >= 3 && "Unknown add instruction!"); + unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r; + + bool isKill; + unsigned SrcReg; + MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); + if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true, + SrcReg, isKill, ImplicitOp, LV)) + return nullptr; + + MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc)) + .add(Dest) + .addReg(SrcReg, getKillRegState(isKill)); + if (ImplicitOp.getReg() != 0) + MIB.add(ImplicitOp); + + NewMI = addOffset(MIB, -Imm); + break; + } + + case X86::SUB64ri8: + case X86::SUB64ri32: { + int64_t Imm = MI.getOperand(2).getImm(); + if (!isInt<32>(-Imm)) + return nullptr; + + assert(MI.getNumOperands() >= 3 && "Unknown sub instruction!"); + + MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), + get(X86::LEA64r)).add(Dest).add(Src); + NewMI = addOffset(MIB, -Imm); + break; + } + case X86::VMOVDQU8Z128rmk: case X86::VMOVDQU8Z256rmk: case X86::VMOVDQU8Zrmk: diff --git a/llvm/test/CodeGen/X86/add.ll b/llvm/test/CodeGen/X86/add.ll index e9516b871048d..1662562bd013f 100644 --- a/llvm/test/CodeGen/X86/add.ll +++ b/llvm/test/CodeGen/X86/add.ll @@ -16,14 +16,14 @@ define i32 @test1(i32 inreg %a) nounwind { ; ; X64-LINUX-LABEL: test1: ; X64-LINUX: # %bb.0: # %entry -; X64-LINUX-NEXT: movl %edi, %eax -; X64-LINUX-NEXT: subl $-128, %eax +; X64-LINUX-NEXT: # kill: def $edi killed $edi def $rdi +; X64-LINUX-NEXT: leal 128(%rdi), %eax ; X64-LINUX-NEXT: retq ; ; X64-WIN32-LABEL: test1: ; X64-WIN32: # %bb.0: # %entry -; X64-WIN32-NEXT: movl %ecx, %eax -; X64-WIN32-NEXT: subl $-128, %eax +; X64-WIN32-NEXT: # kill: def $ecx killed $ecx def $rcx +; X64-WIN32-NEXT: leal 128(%rcx), %eax ; X64-WIN32-NEXT: retq entry: %b = add i32 %a, 128 @@ -86,14 +86,12 @@ define i64 @test3(i64 inreg %a) nounwind { ; ; X64-LINUX-LABEL: test3: ; X64-LINUX: # %bb.0: # %entry -; X64-LINUX-NEXT: movq %rdi, %rax -; X64-LINUX-NEXT: subq $-128, %rax +; X64-LINUX-NEXT: leaq 128(%rdi), %rax ; X64-LINUX-NEXT: retq ; ; X64-WIN32-LABEL: test3: ; X64-WIN32: # %bb.0: # %entry -; X64-WIN32-NEXT: movq %rcx, %rax -; X64-WIN32-NEXT: subq $-128, %rax +; X64-WIN32-NEXT: leaq 128(%rcx), %rax ; X64-WIN32-NEXT: retq entry: %b = add i64 %a, 128 diff --git a/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll b/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll index 872233f51ad48..d704f38307fcb 100644 --- a/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll +++ b/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll @@ -43,8 +43,7 @@ define i64 @test__blsi_u64(i64 %a0) { define i64 @test__blsmsk_u64(i64 %a0) { ; X64-LABEL: test__blsmsk_u64: ; X64: # %bb.0: -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: subq $1, %rax +; X64-NEXT: leaq -1(%rdi), %rax ; X64-NEXT: xorq %rdi, %rax ; X64-NEXT: retq %dec = sub i64 %a0, 1 @@ -55,8 +54,7 @@ define i64 @test__blsmsk_u64(i64 %a0) { define i64 @test__blsr_u64(i64 %a0) { ; X64-LABEL: test__blsr_u64: ; X64: # %bb.0: -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: subq $1, %rax +; X64-NEXT: leaq -1(%rdi), %rax ; X64-NEXT: andq %rdi, %rax ; X64-NEXT: retq %dec = sub i64 %a0, 1 @@ -124,8 +122,7 @@ define i64 @test_blsi_u64(i64 %a0) { define i64 @test_blsmsk_u64(i64 %a0) { ; X64-LABEL: test_blsmsk_u64: ; X64: # %bb.0: -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: subq $1, %rax +; X64-NEXT: leaq -1(%rdi), %rax ; X64-NEXT: xorq %rdi, %rax ; X64-NEXT: retq %dec = sub i64 %a0, 1 @@ -136,8 +133,7 @@ define i64 @test_blsmsk_u64(i64 %a0) { define i64 @test_blsr_u64(i64 %a0) { ; X64-LABEL: test_blsr_u64: ; X64: # %bb.0: -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: subq $1, %rax +; X64-NEXT: leaq -1(%rdi), %rax ; X64-NEXT: andq %rdi, %rax ; X64-NEXT: retq %dec = sub i64 %a0, 1 diff --git a/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll index ced1585bd71b4..c6950da4064d1 100644 --- a/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll @@ -82,15 +82,14 @@ define i32 @test__blsmsk_u32(i32 %a0) { ; X32-LABEL: test__blsmsk_u32: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: subl $1, %eax +; X32-NEXT: leal -1(%ecx), %eax ; X32-NEXT: xorl %ecx, %eax ; X32-NEXT: retl ; ; X64-LABEL: test__blsmsk_u32: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: subl $1, %eax +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: leal -1(%rdi), %eax ; X64-NEXT: xorl %edi, %eax ; X64-NEXT: retq %dec = sub i32 %a0, 1 @@ -102,15 +101,14 @@ define i32 @test__blsr_u32(i32 %a0) { ; X32-LABEL: test__blsr_u32: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: subl $1, %eax +; X32-NEXT: leal -1(%ecx), %eax ; X32-NEXT: andl %ecx, %eax ; X32-NEXT: retl ; ; X64-LABEL: test__blsr_u32: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: subl $1, %eax +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: leal -1(%rdi), %eax ; X64-NEXT: andl %edi, %eax ; X64-NEXT: retq %dec = sub i32 %a0, 1 @@ -224,15 +222,14 @@ define i32 @test_blsmsk_u32(i32 %a0) { ; X32-LABEL: test_blsmsk_u32: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: subl $1, %eax +; X32-NEXT: leal -1(%ecx), %eax ; X32-NEXT: xorl %ecx, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_blsmsk_u32: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: subl $1, %eax +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: leal -1(%rdi), %eax ; X64-NEXT: xorl %edi, %eax ; X64-NEXT: retq %dec = sub i32 %a0, 1 @@ -244,15 +241,14 @@ define i32 @test_blsr_u32(i32 %a0) { ; X32-LABEL: test_blsr_u32: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: subl $1, %eax +; X32-NEXT: leal -1(%ecx), %eax ; X32-NEXT: andl %ecx, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_blsr_u32: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: subl $1, %eax +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: leal -1(%rdi), %eax ; X64-NEXT: andl %edi, %eax ; X64-NEXT: retq %dec = sub i32 %a0, 1 diff --git a/llvm/test/CodeGen/X86/cgp-usubo.ll b/llvm/test/CodeGen/X86/cgp-usubo.ll index 6733a8258f8d6..ab82d9809724a 100644 --- a/llvm/test/CodeGen/X86/cgp-usubo.ll +++ b/llvm/test/CodeGen/X86/cgp-usubo.ll @@ -246,8 +246,8 @@ exit: define i32 @PR42571(i32 %x, i32 %y) { ; CHECK-LABEL: PR42571: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: subl $1, %eax +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: leal -1(%rdi), %eax ; CHECK-NEXT: andl %edi, %eax ; CHECK-NEXT: cmpl $1, %edi ; CHECK-NEXT: cmovbl %esi, %eax diff --git a/llvm/test/CodeGen/X86/tbm-intrinsics-fast-isel-x86_64.ll b/llvm/test/CodeGen/X86/tbm-intrinsics-fast-isel-x86_64.ll index 82b3b4c3624b8..35c14697cf967 100644 --- a/llvm/test/CodeGen/X86/tbm-intrinsics-fast-isel-x86_64.ll +++ b/llvm/test/CodeGen/X86/tbm-intrinsics-fast-isel-x86_64.ll @@ -76,8 +76,7 @@ define i64 @test__blcs_u64(i64 %a0) { define i64 @test__blsfill_u64(i64 %a0) { ; X64-LABEL: test__blsfill_u64: ; X64: # %bb.0: -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: subq $1, %rax +; X64-NEXT: leaq -1(%rdi), %rax ; X64-NEXT: orq %rdi, %rax ; X64-NEXT: retq %1 = sub i64 %a0, 1 diff --git a/llvm/test/CodeGen/X86/tbm-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/tbm-intrinsics-fast-isel.ll index 0664d043e1163..55fe9b8b3c0c1 100644 --- a/llvm/test/CodeGen/X86/tbm-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/tbm-intrinsics-fast-isel.ll @@ -125,15 +125,14 @@ define i32 @test__blsfill_u32(i32 %a0) { ; X32-LABEL: test__blsfill_u32: ; X32: # %bb.0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: subl $1, %eax +; X32-NEXT: leal -1(%ecx), %eax ; X32-NEXT: orl %ecx, %eax ; X32-NEXT: retl ; ; X64-LABEL: test__blsfill_u32: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: subl $1, %eax +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: leal -1(%rdi), %eax ; X64-NEXT: orl %edi, %eax ; X64-NEXT: retq %1 = sub i32 %a0, 1 From 60a0d49e77cf6583b749ad6189751cd5d31bf3ee Mon Sep 17 00:00:00 2001 From: Jan Korous Date: Mon, 15 Jul 2019 23:14:00 +0000 Subject: [PATCH 184/451] [DirectoryWatcher][linux] Fix for older kernels IN_EXCL_UNLINK exists since Linux 2.6.36 Differential Revision: https://reviews.llvm.org/D64764 llvm-svn: 366152 --- .../DirectoryWatcher/linux/DirectoryWatcher-linux.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/clang/lib/DirectoryWatcher/linux/DirectoryWatcher-linux.cpp b/clang/lib/DirectoryWatcher/linux/DirectoryWatcher-linux.cpp index 0c9f799b638d6..6d7d69da4db5a 100644 --- a/clang/lib/DirectoryWatcher/linux/DirectoryWatcher-linux.cpp +++ b/clang/lib/DirectoryWatcher/linux/DirectoryWatcher-linux.cpp @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -332,8 +333,12 @@ std::unique_ptr clang::DirectoryWatcher::create( const int InotifyWD = inotify_add_watch( InotifyFD, Path.str().c_str(), - IN_CREATE | IN_DELETE | IN_DELETE_SELF | IN_EXCL_UNLINK | IN_MODIFY | - IN_MOVED_FROM | IN_MOVE_SELF | IN_MOVED_TO | IN_ONLYDIR | IN_IGNORED); + IN_CREATE | IN_DELETE | IN_DELETE_SELF | IN_MODIFY | + IN_MOVED_FROM | IN_MOVE_SELF | IN_MOVED_TO | IN_ONLYDIR | IN_IGNORED +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36) + | IN_EXCL_UNLINK +#endif + ); if (InotifyWD == -1) return nullptr; From bb147aabc68c366cff4ac5f1713b7b138a3b0fe0 Mon Sep 17 00:00:00 2001 From: Leonard Chan Date: Mon, 15 Jul 2019 23:18:31 +0000 Subject: [PATCH 185/451] Revert "[NewPM] Port Sancov" This reverts commit 5652f35817f07b16f8b3856d594cc42f4d7ee29c. llvm-svn: 366153 --- clang/lib/CodeGen/BackendUtil.cpp | 47 +-- clang/test/CodeGen/sancov-new-pm.c | 41 -- llvm/include/llvm/InitializePasses.h | 4 +- .../include/llvm/Transforms/Instrumentation.h | 4 + .../Instrumentation/SanitizerCoverage.h | 62 --- llvm/lib/Passes/PassBuilder.cpp | 3 +- llvm/lib/Passes/PassRegistry.def | 2 - .../Instrumentation/Instrumentation.cpp | 2 +- .../Instrumentation/SanitizerCoverage.cpp | 359 ++++++------------ .../SanitizerCoverage/abort-in-entry-block.ll | 1 - .../SanitizerCoverage/backedge-pruning.ll | 2 - .../SanitizerCoverage/chains.ll | 1 - .../cmp-tracing-api-x86_32.ll | 25 +- .../cmp-tracing-api-x86_64.ll | 25 +- .../SanitizerCoverage/cmp-tracing.ll | 1 - .../SanitizerCoverage/coff-comdat.ll | 1 - .../coff-pc-table-inline-8bit-counters.ll | 1 - .../SanitizerCoverage/coff-used-ctor.ll | 3 +- .../SanitizerCoverage/const-cmp-tracing.ll | 1 - .../SanitizerCoverage/coverage-dbg.ll | 1 - .../SanitizerCoverage/coverage.ll | 4 +- .../SanitizerCoverage/coverage2-dbg.ll | 1 - .../SanitizerCoverage/div-tracing.ll | 1 - .../SanitizerCoverage/gep-tracing.ll | 1 - .../SanitizerCoverage/inline-8bit-counters.ll | 1 - .../interposable-symbol-nocomdat.ll | 2 - .../SanitizerCoverage/no-func.ll | 1 - .../SanitizerCoverage/pc-table.ll | 2 - .../SanitizerCoverage/postdominator_check.ll | 2 - .../Instrumentation/SanitizerCoverage/seh.ll | 3 - .../stack-depth-variable-declared-by-user.ll | 2 - .../SanitizerCoverage/stack-depth.ll | 5 - .../SanitizerCoverage/switch-tracing.ll | 1 - .../trace-pc-guard-comdat.ll | 1 - .../trace-pc-guard-inline-8bit-counters.ll | 1 - .../trace-pc-guard-nocomdat.ll | 1 - .../SanitizerCoverage/tracing-comdat.ll | 3 - .../SanitizerCoverage/tracing.ll | 4 - .../SanitizerCoverage/unreachable-critedge.ll | 1 - .../SanitizerCoverage/wineh.ll | 1 - 40 files changed, 156 insertions(+), 468 deletions(-) delete mode 100644 clang/test/CodeGen/sancov-new-pm.c delete mode 100644 llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 8499af07dbb7f..40a529c319f4a 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -60,7 +60,6 @@ #include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h" #include "llvm/Transforms/Instrumentation/InstrProfiling.h" #include "llvm/Transforms/Instrumentation/MemorySanitizer.h" -#include "llvm/Transforms/Instrumentation/SanitizerCoverage.h" #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" #include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Scalar.h" @@ -196,8 +195,11 @@ static void addBoundsCheckingPass(const PassManagerBuilder &Builder, PM.add(createBoundsCheckingLegacyPass()); } -static SanitizerCoverageOptions -getSancovOptsFromCGOpts(const CodeGenOptions &CGOpts) { +static void addSanitizerCoveragePass(const PassManagerBuilder &Builder, + legacy::PassManagerBase &PM) { + const PassManagerBuilderWrapper &BuilderWrapper = + static_cast(Builder); + const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts(); SanitizerCoverageOptions Opts; Opts.CoverageType = static_cast(CGOpts.SanitizeCoverageType); @@ -213,17 +215,7 @@ getSancovOptsFromCGOpts(const CodeGenOptions &CGOpts) { Opts.Inline8bitCounters = CGOpts.SanitizeCoverageInline8bitCounters; Opts.PCTable = CGOpts.SanitizeCoveragePCTable; Opts.StackDepth = CGOpts.SanitizeCoverageStackDepth; - return Opts; -} - -static void addSanitizerCoveragePass(const PassManagerBuilder &Builder, - legacy::PassManagerBase &PM) { - const PassManagerBuilderWrapper &BuilderWrapper = - static_cast(Builder); - const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts(); - auto Opts = getSancovOptsFromCGOpts(CGOpts); - PM.add(createModuleSanitizerCoverageLegacyPassPass(Opts)); - PM.add(createSanitizerCoverageLegacyPassPass(Opts)); + PM.add(createSanitizerCoverageModulePass(Opts)); } // Check if ASan should use GC-friendly instrumentation for globals. @@ -1143,21 +1135,6 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( EntryExitInstrumenterPass(/*PostInlining=*/false))); }); - if (CodeGenOpts.SanitizeCoverageType || - CodeGenOpts.SanitizeCoverageIndirectCalls || - CodeGenOpts.SanitizeCoverageTraceCmp) { - auto SancovOpts = getSancovOptsFromCGOpts(CodeGenOpts); - PB.registerPipelineStartEPCallback( - [SancovOpts](ModulePassManager &MPM) { - MPM.addPass(ModuleSanitizerCoveragePass(SancovOpts)); - }); - PB.registerOptimizerLastEPCallback( - [SancovOpts](FunctionPassManager &FPM, - PassBuilder::OptimizationLevel Level) { - FPM.addPass(SanitizerCoveragePass(SancovOpts)); - }); - } - // Register callbacks to schedule sanitizer passes at the appropriate part of // the pipeline. // FIXME: either handle asan/the remaining sanitizers or error out @@ -1242,18 +1219,8 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( } } - if (CodeGenOpts.OptimizationLevel == 0) { - if (CodeGenOpts.SanitizeCoverageType || - CodeGenOpts.SanitizeCoverageIndirectCalls || - CodeGenOpts.SanitizeCoverageTraceCmp) { - auto SancovOpts = getSancovOptsFromCGOpts(CodeGenOpts); - MPM.addPass(ModuleSanitizerCoveragePass(SancovOpts)); - MPM.addPass(createModuleToFunctionPassAdaptor( - SanitizerCoveragePass(SancovOpts))); - } - + if (CodeGenOpts.OptimizationLevel == 0) addSanitizersAtO0(MPM, TargetTriple, LangOpts, CodeGenOpts); - } } // FIXME: We still use the legacy pass manager to do code generation. We diff --git a/clang/test/CodeGen/sancov-new-pm.c b/clang/test/CodeGen/sancov-new-pm.c deleted file mode 100644 index 06d9042bc70a8..0000000000000 --- a/clang/test/CodeGen/sancov-new-pm.c +++ /dev/null @@ -1,41 +0,0 @@ -// Test that SanitizerCoverage works under the new pass manager. -// RUN: %clang -target x86_64-linux-gnu -fsanitize=fuzzer %s -fexperimental-new-pass-manager -S -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-O0 -// RUN: %clang -target x86_64-linux-gnu -fsanitize=fuzzer %s -fexperimental-new-pass-manager -O2 -S -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-O2 -// RUN: %clang -target x86_64-linux-gnu -fsanitize=fuzzer %s -fexperimental-new-pass-manager -flto -S -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-O0 -// RUN: %clang -target x86_64-linux-gnu -fsanitize=fuzzer %s -fexperimental-new-pass-manager -flto -O2 -S -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-O2 -// RUN: %clang -target x86_64-linux-gnu -fsanitize=fuzzer %s -fexperimental-new-pass-manager -flto=thin -S -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-O0 -// RUN: %clang -target x86_64-linux-gnu -fsanitize=fuzzer %s -fexperimental-new-pass-manager -flto=thin -O2 -S -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-O2,CHECK-O2-THINLTO - -extern void *memcpy(void *, const void *, unsigned long); -extern int printf(const char *restrict, ...); - -int LLVMFuzzerTestOneInput(const unsigned char *data, unsigned long size) { - unsigned char buf[4]; - - if (size < 8) - return 0; - - if (data[0] == 'h' && data[1] == 'i' && data[2] == '!') { - memcpy(buf, data, size); - printf("test: %.2X\n", buf[0]); - } - - return 0; -} - -// CHECK-DAG: declare void @__sanitizer_cov_pcs_init(i64*, i64*) -// CHECK-O0-DAG: declare void @__sanitizer_cov_trace_pc_indir(i64) -// CHECK-O0-DAG: declare void @__sanitizer_cov_trace_cmp1(i8 zeroext, i8 zeroext) -// CHECK-O0-DAG: declare void @__sanitizer_cov_trace_cmp2(i16 zeroext, i16 zeroext) -// CHECK-O0-DAG: declare void @__sanitizer_cov_trace_cmp4(i32 zeroext, i32 zeroext) -// CHECK-O0-DAG: declare void @__sanitizer_cov_trace_cmp8(i64, i64) -// CHECK-O2-THINLTO-NOT: declare void @__sanitizer_cov_trace_const_cmp1(i8 zeroext, i8 zeroext) -// CHECK-O0-DAG: declare void @__sanitizer_cov_trace_const_cmp2(i16 zeroext, i16 zeroext) -// CHECK-O0-DAG: declare void @__sanitizer_cov_trace_const_cmp4(i32 zeroext, i32 zeroext) -// CHECK-O2-THINLTO-NOT: declare void @__sanitizer_cov_trace_const_cmp8(i64, i64) -// CHECK-O0-DAG: declare void @__sanitizer_cov_trace_div4(i32 zeroext) -// CHECK-O0-DAG: declare void @__sanitizer_cov_trace_div8(i64) -// CHECK-O0-DAG: declare void @__sanitizer_cov_trace_gep(i64) -// CHECK-O0-DAG: declare void @__sanitizer_cov_trace_switch(i64, i64*) -// CHECK-O0-DAG: declare void @__sanitizer_cov_trace_pc() -// CHECK-O0-DAG: declare void @__sanitizer_cov_trace_pc_guard(i32*) diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 031df1f00e879..164d0be2855ad 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -360,9 +360,7 @@ void initializeSROALegacyPassPass(PassRegistry&); void initializeSafeStackLegacyPassPass(PassRegistry&); void initializeSafepointIRVerifierPass(PassRegistry&); void initializeSampleProfileLoaderLegacyPassPass(PassRegistry&); -void initializeSanitizerCoverageFunctionCheckLegacyPassPass(PassRegistry &); -void initializeSanitizerCoverageLegacyPassPass(PassRegistry &); -void initializeModuleSanitizerCoverageLegacyPassPass(PassRegistry &); +void initializeSanitizerCoverageModulePass(PassRegistry&); void initializeScalarEvolutionWrapperPassPass(PassRegistry&); void initializeScalarizeMaskedMemIntrinPass(PassRegistry&); void initializeScalarizerLegacyPassPass(PassRegistry&); diff --git a/llvm/include/llvm/Transforms/Instrumentation.h b/llvm/include/llvm/Transforms/Instrumentation.h index fcad1e11895fe..8b70d2926ae9e 100644 --- a/llvm/include/llvm/Transforms/Instrumentation.h +++ b/llvm/include/llvm/Transforms/Instrumentation.h @@ -181,6 +181,10 @@ struct SanitizerCoverageOptions { SanitizerCoverageOptions() = default; }; +// Insert SanitizerCoverage instrumentation. +ModulePass *createSanitizerCoverageModulePass( + const SanitizerCoverageOptions &Options = SanitizerCoverageOptions()); + /// Calculate what to divide by to scale counts. /// /// Given the maximum count, calculate a divisor that will scale all the diff --git a/llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h b/llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h deleted file mode 100644 index bdc79b1a45244..0000000000000 --- a/llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h +++ /dev/null @@ -1,62 +0,0 @@ -//===--------- Definition of the SanitizerCoverage class --------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file declares the SanitizerCoverage class which is a port of the legacy -// SanitizerCoverage pass to use the new PassManager infrastructure. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_SANITIZERCOVERAGE_H -#define LLVM_TRANSFORMS_INSTRUMENTATION_SANITIZERCOVERAGE_H - -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/PassManager.h" -#include "llvm/Transforms/Instrumentation.h" - -namespace llvm { - -/// This is the SanitizerCoverage pass used in the new pass manager. The -/// pass instruments functions for coverage. -class SanitizerCoveragePass : public PassInfoMixin { -public: - explicit SanitizerCoveragePass( - SanitizerCoverageOptions Options = SanitizerCoverageOptions()) - : Options(Options) {} - PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); - -private: - SanitizerCoverageOptions Options; -}; - -/// This is the ModuleSanitizerCoverage pass used in the new pass manager. This -/// adds initialization calls to the module for trace PC guards and 8bit -/// counters if they are requested. -class ModuleSanitizerCoveragePass - : public PassInfoMixin { -public: - explicit ModuleSanitizerCoveragePass( - SanitizerCoverageOptions Options = SanitizerCoverageOptions()) - : Options(Options) {} - PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); - -private: - SanitizerCoverageOptions Options; -}; - -// Insert SanitizerCoverage instrumentation. -FunctionPass *createSanitizerCoverageLegacyPassPass( - const SanitizerCoverageOptions &Options = SanitizerCoverageOptions()); -ModulePass *createModuleSanitizerCoverageLegacyPassPass( - const SanitizerCoverageOptions &Options = SanitizerCoverageOptions()); - -} // namespace llvm - -#endif diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index eef94bf9012a2..e2b2a2b252684 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -101,7 +101,6 @@ #include "llvm/Transforms/Instrumentation/MemorySanitizer.h" #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" #include "llvm/Transforms/Instrumentation/PoisonChecking.h" -#include "llvm/Transforms/Instrumentation/SanitizerCoverage.h" #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" #include "llvm/Transforms/Scalar/ADCE.h" #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h" @@ -144,8 +143,8 @@ #include "llvm/Transforms/Scalar/LowerWidenableCondition.h" #include "llvm/Transforms/Scalar/MakeGuardsExplicit.h" #include "llvm/Transforms/Scalar/MemCpyOptimizer.h" -#include "llvm/Transforms/Scalar/MergeICmps.h" #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h" +#include "llvm/Transforms/Scalar/MergeICmps.h" #include "llvm/Transforms/Scalar/NaryReassociate.h" #include "llvm/Transforms/Scalar/NewGVN.h" #include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index b1b2304af8d6b..e785558d5a732 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -86,7 +86,6 @@ MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass(nullptr, nullptr)) MODULE_PASS("verify", VerifierPass()) MODULE_PASS("asan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/false, false, true, false)) MODULE_PASS("kasan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/true, false, true, false)) -MODULE_PASS("sancov-module", ModuleSanitizerCoveragePass()) MODULE_PASS("poison-checking", PoisonCheckingPass()) #undef MODULE_PASS @@ -246,7 +245,6 @@ FUNCTION_PASS("khwasan", HWAddressSanitizerPass(true, true)) FUNCTION_PASS("msan", MemorySanitizerPass({})) FUNCTION_PASS("kmsan", MemorySanitizerPass({0, false, /*Kernel=*/true})) FUNCTION_PASS("tsan", ThreadSanitizerPass()) -FUNCTION_PASS("sancov-func", SanitizerCoveragePass()) #undef FUNCTION_PASS #ifndef FUNCTION_PASS_WITH_PARAMS diff --git a/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp index 64bf51476612a..f56a1bd91b898 100644 --- a/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp @@ -116,7 +116,7 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) { initializeMemorySanitizerLegacyPassPass(Registry); initializeHWAddressSanitizerLegacyPassPass(Registry); initializeThreadSanitizerLegacyPassPass(Registry); - initializeSanitizerCoverageLegacyPassPass(Registry); + initializeSanitizerCoverageModulePass(Registry); initializeDataFlowSanitizerPass(Registry); } diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index b7c29d6d28b41..ca0cb4bdbe844 100644 --- a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -10,7 +10,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Instrumentation/SanitizerCoverage.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/EHPersonalities.h" @@ -177,158 +176,24 @@ SanitizerCoverageOptions OverrideFromCL(SanitizerCoverageOptions Options) { return Options; } -bool canInstrumentWithSancov(const Function &F) { - if (F.empty()) - return false; - if (F.getName().find(".module_ctor") != std::string::npos) - return false; // Should not instrument sanitizer init functions. - if (F.getName().startswith("__sanitizer_")) - return false; // Don't instrument __sanitizer_* callbacks. - // Don't touch available_externally functions, their actual body is elewhere. - if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) - return false; - // Don't instrument MSVC CRT configuration helpers. They may run before normal - // initialization. - if (F.getName() == "__local_stdio_printf_options" || - F.getName() == "__local_stdio_scanf_options") - return false; - if (isa(F.getEntryBlock().getTerminator())) - return false; - // Don't instrument functions using SEH for now. Splitting basic blocks like - // we do for coverage breaks WinEHPrepare. - // FIXME: Remove this when SEH no longer uses landingpad pattern matching. - if (F.hasPersonalityFn() && - isAsynchronousEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) - return false; - return true; -} - -/// This is a class for instrumenting the module to add calls to initializing -/// the trace PC guards and 8bit counter globals. This should only be done -/// though if there is at least one function that can be instrumented with -/// Sancov. -class ModuleSanitizerCoverage { +class SanitizerCoverageModule : public ModulePass { public: - ModuleSanitizerCoverage(const SanitizerCoverageOptions &Options) - : Options(OverrideFromCL(Options)) {} - - bool instrumentModule(Module &M) { - if (Options.CoverageType == SanitizerCoverageOptions::SCK_None) - return false; - - Function *Ctor = nullptr; - LLVMContext *C = &(M.getContext()); - const DataLayout *DL = &M.getDataLayout(); - TargetTriple = Triple(M.getTargetTriple()); - IntptrTy = Type::getIntNTy(*C, DL->getPointerSizeInBits()); - Type *IntptrPtrTy = PointerType::getUnqual(IntptrTy); - IRBuilder<> IRB(*C); - Type *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty()); - Int8PtrTy = PointerType::getUnqual(IRB.getInt8Ty()); - Int8Ty = IRB.getInt8Ty(); - - // Check that the __sancov_lowest_stack marker does not already exist. - Constant *SanCovLowestStackConstant = - M.getOrInsertGlobal(SanCovLowestStackName, IntptrTy); - GlobalVariable *SanCovLowestStack = - dyn_cast(SanCovLowestStackConstant); - if (!SanCovLowestStack) { - C->emitError(StringRef("'") + SanCovLowestStackName + - "' should not be declared by the user"); - return true; - } - - // We want to emit guard init calls if the module contains a function that - // we can instrument with SanitizerCoverage. We ignore any functions that - // were inserted by SanitizerCoverage and get the result from the analysis - // that checks for a valid function that the analysis may have run over. - if (!llvm::any_of( - M, [](const Function &F) { return canInstrumentWithSancov(F); })) - return false; - - // Emit the init calls. - if (Options.TracePCGuard) - Ctor = CreateInitCallsForSections(M, SanCovModuleCtorTracePcGuardName, - SanCovTracePCGuardInitName, Int32PtrTy, - SanCovGuardsSectionName); - if (Options.Inline8bitCounters) - Ctor = CreateInitCallsForSections(M, SanCovModuleCtor8bitCountersName, - SanCov8bitCountersInitName, Int8PtrTy, - SanCovCountersSectionName); - if (Ctor && Options.PCTable) { - auto SecStartEnd = - CreateSecStartEnd(M, SanCovPCsSectionName, IntptrPtrTy); - FunctionCallee InitFunction = declareSanitizerInitFunction( - M, SanCovPCsInitName, {IntptrPtrTy, IntptrPtrTy}); - IRBuilder<> IRBCtor(Ctor->getEntryBlock().getTerminator()); - IRBCtor.CreateCall(InitFunction, {SecStartEnd.first, SecStartEnd.second}); - } - return Ctor; + SanitizerCoverageModule( + const SanitizerCoverageOptions &Options = SanitizerCoverageOptions()) + : ModulePass(ID), Options(OverrideFromCL(Options)) { + initializeSanitizerCoverageModulePass(*PassRegistry::getPassRegistry()); } + bool runOnModule(Module &M) override; + bool runOnFunction(Function &F); + static char ID; // Pass identification, replacement for typeid + StringRef getPassName() const override { return "SanitizerCoverageModule"; } -private: - Function *CreateInitCallsForSections(Module &M, const char *CtorName, - const char *InitFunctionName, Type *Ty, - const char *Section); - std::pair CreateSecStartEnd(Module &M, const char *Section, - Type *Ty); - std::string getSectionStart(const std::string &Section) const { - if (TargetTriple.isOSBinFormatMachO()) - return "\1section$start$__DATA$__" + Section; - return "__start___" + Section; - } - std::string getSectionEnd(const std::string &Section) const { - if (TargetTriple.isOSBinFormatMachO()) - return "\1section$end$__DATA$__" + Section; - return "__stop___" + Section; - } - - SanitizerCoverageOptions Options; - Triple TargetTriple; - Type *IntptrTy, *Int8PtrTy, *Int8Ty; -}; - -class ModuleSanitizerCoverageLegacyPass : public ModulePass { -public: - static char ID; - - ModuleSanitizerCoverageLegacyPass( - SanitizerCoverageOptions Options = SanitizerCoverageOptions()) - : ModulePass(ID), Options(Options) { - initializeModuleSanitizerCoverageLegacyPassPass( - *PassRegistry::getPassRegistry()); - } - - bool runOnModule(Module &M) override { - ModuleSanitizerCoverage ModuleSancov(Options); - return ModuleSancov.instrumentModule(M); - }; - - StringRef getPassName() const override { - return "ModuleSanitizerCoverageLegacyPass"; - } - -private: - SanitizerCoverageOptions Options; -}; - -char ModuleSanitizerCoverageLegacyPass::ID = 0; - -class SanitizerCoverage { -public: - SanitizerCoverage(Function &F, const SanitizerCoverageOptions &Options) - : CurModule(F.getParent()), Options(OverrideFromCL(Options)) { - initializeModule(*F.getParent()); + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); } - ~SanitizerCoverage() { finalizeModule(*CurModule); } - - bool instrumentFunction(Function &F, const DominatorTree *DT, - const PostDominatorTree *PDT); - private: - void initializeModule(Module &M); - void finalizeModule(Module &M); void InjectCoverageForIndirectCalls(Function &F, ArrayRef IndirCalls); void InjectTraceForCmp(Function &F, ArrayRef CmpTraceTargets); @@ -347,6 +212,11 @@ class SanitizerCoverage { void CreateFunctionLocalArrays(Function &F, ArrayRef AllBlocks); void InjectCoverageAtBlock(Function &F, BasicBlock &BB, size_t Idx, bool IsLeafFunc = true); + Function *CreateInitCallsForSections(Module &M, const char *CtorName, + const char *InitFunctionName, Type *Ty, + const char *Section); + std::pair CreateSecStartEnd(Module &M, const char *Section, + Type *Ty); void SetNoSanitizeMetadata(Instruction *I) { I->setMetadata(I->getModule()->getMDKindID("nosanitize"), @@ -354,6 +224,8 @@ class SanitizerCoverage { } std::string getSectionName(const std::string &Section) const; + std::string getSectionStart(const std::string &Section) const; + std::string getSectionEnd(const std::string &Section) const; FunctionCallee SanCovTracePCIndir; FunctionCallee SanCovTracePC, SanCovTracePCGuard; FunctionCallee SanCovTraceCmpFunction[4]; @@ -380,63 +252,10 @@ class SanitizerCoverage { SanitizerCoverageOptions Options; }; -class SanitizerCoverageLegacyPass : public FunctionPass { -public: - static char ID; // Pass identification, replacement for typeid - - SanitizerCoverageLegacyPass( - SanitizerCoverageOptions Options = SanitizerCoverageOptions()) - : FunctionPass(ID), Options(Options) { - initializeSanitizerCoverageLegacyPassPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override { - const DominatorTree *DT = - &getAnalysis().getDomTree(); - const PostDominatorTree *PDT = - &getAnalysis().getPostDomTree(); - SanitizerCoverage Sancov(F, Options); - return Sancov.instrumentFunction(F, DT, PDT); - } - - StringRef getPassName() const override { - return "SanitizerCoverageLegacyPass"; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - // Make the module sancov pass required by this pass so that it runs when - // -sancov is passed. - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - } - -private: - SanitizerCoverageOptions Options; -}; - } // namespace -PreservedAnalyses SanitizerCoveragePass::run(Function &F, - FunctionAnalysisManager &AM) { - const DominatorTree *DT = &AM.getResult(F); - const PostDominatorTree *PDT = &AM.getResult(F); - SanitizerCoverage Sancov(F, Options); - if (Sancov.instrumentFunction(F, DT, PDT)) - return PreservedAnalyses::none(); - return PreservedAnalyses::all(); -} - -PreservedAnalyses ModuleSanitizerCoveragePass::run(Module &M, - ModuleAnalysisManager &AM) { - ModuleSanitizerCoverage ModuleSancov(Options); - if (ModuleSancov.instrumentModule(M)) - return PreservedAnalyses::none(); - return PreservedAnalyses::all(); -} - std::pair -ModuleSanitizerCoverage::CreateSecStartEnd(Module &M, const char *Section, +SanitizerCoverageModule::CreateSecStartEnd(Module &M, const char *Section, Type *Ty) { GlobalVariable *SecStart = new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage, nullptr, @@ -446,7 +265,6 @@ ModuleSanitizerCoverage::CreateSecStartEnd(Module &M, const char *Section, new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage, nullptr, getSectionEnd(Section)); SecEnd->setVisibility(GlobalValue::HiddenVisibility); - IRBuilder<> IRB(M.getContext()); Value *SecEndPtr = IRB.CreatePointerCast(SecEnd, Ty); if (!TargetTriple.isOSBinFormatCOFF()) @@ -460,7 +278,7 @@ ModuleSanitizerCoverage::CreateSecStartEnd(Module &M, const char *Section, return std::make_pair(IRB.CreatePointerCast(GEP, Ty), SecEndPtr); } -Function *ModuleSanitizerCoverage::CreateInitCallsForSections( +Function *SanitizerCoverageModule::CreateInitCallsForSections( Module &M, const char *CtorName, const char *InitFunctionName, Type *Ty, const char *Section) { auto SecStartEnd = CreateSecStartEnd(M, Section, Ty); @@ -492,11 +310,12 @@ Function *ModuleSanitizerCoverage::CreateInitCallsForSections( return CtorFunc; } -void SanitizerCoverage::initializeModule(Module &M) { +bool SanitizerCoverageModule::runOnModule(Module &M) { if (Options.CoverageType == SanitizerCoverageOptions::SCK_None) - return; + return false; C = &(M.getContext()); DL = &M.getDataLayout(); + CurModule = &M; CurModuleUniqueId = getUniqueModuleId(CurModule); TargetTriple = Triple(M.getTargetTriple()); FunctionGuardArray = nullptr; @@ -564,6 +383,11 @@ void SanitizerCoverage::initializeModule(Module &M) { Constant *SanCovLowestStackConstant = M.getOrInsertGlobal(SanCovLowestStackName, IntptrTy); SanCovLowestStack = dyn_cast(SanCovLowestStackConstant); + if (!SanCovLowestStack) { + C->emitError(StringRef("'") + SanCovLowestStackName + + "' should not be declared by the user"); + return true; + } SanCovLowestStack->setThreadLocalMode( GlobalValue::ThreadLocalMode::InitialExecTLSModel); if (Options.StackDepth && !SanCovLowestStack->isDeclaration()) @@ -577,14 +401,33 @@ void SanitizerCoverage::initializeModule(Module &M) { SanCovTracePC = M.getOrInsertFunction(SanCovTracePCName, VoidTy); SanCovTracePCGuard = M.getOrInsertFunction(SanCovTracePCGuardName, VoidTy, Int32PtrTy); -} -void SanitizerCoverage::finalizeModule(Module &M) { + for (auto &F : M) + runOnFunction(F); + + Function *Ctor = nullptr; + + if (FunctionGuardArray) + Ctor = CreateInitCallsForSections(M, SanCovModuleCtorTracePcGuardName, + SanCovTracePCGuardInitName, Int32PtrTy, + SanCovGuardsSectionName); + if (Function8bitCounterArray) + Ctor = CreateInitCallsForSections(M, SanCovModuleCtor8bitCountersName, + SanCov8bitCountersInitName, Int8PtrTy, + SanCovCountersSectionName); + if (Ctor && Options.PCTable) { + auto SecStartEnd = CreateSecStartEnd(M, SanCovPCsSectionName, IntptrPtrTy); + FunctionCallee InitFunction = declareSanitizerInitFunction( + M, SanCovPCsInitName, {IntptrPtrTy, IntptrPtrTy}); + IRBuilder<> IRBCtor(Ctor->getEntryBlock().getTerminator()); + IRBCtor.CreateCall(InitFunction, {SecStartEnd.first, SecStartEnd.second}); + } // We don't reference these arrays directly in any of our runtime functions, // so we need to prevent them from being dead stripped. if (TargetTriple.isOSBinFormatMachO()) appendToUsed(M, GlobalsToAppendToUsed); appendToCompilerUsed(M, GlobalsToAppendToCompilerUsed); + return true; } // True if block has successors and it dominates all of them. @@ -675,11 +518,28 @@ static bool IsInterestingCmp(ICmpInst *CMP, const DominatorTree *DT, return true; } -bool SanitizerCoverage::instrumentFunction(Function &F, const DominatorTree *DT, - const PostDominatorTree *PDT) { - if (Options.CoverageType == SanitizerCoverageOptions::SCK_None) +bool SanitizerCoverageModule::runOnFunction(Function &F) { + if (F.empty()) + return false; + if (F.getName().find(".module_ctor") != std::string::npos) + return false; // Should not instrument sanitizer init functions. + if (F.getName().startswith("__sanitizer_")) + return false; // Don't instrument __sanitizer_* callbacks. + // Don't touch available_externally functions, their actual body is elewhere. + if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) + return false; + // Don't instrument MSVC CRT configuration helpers. They may run before normal + // initialization. + if (F.getName() == "__local_stdio_printf_options" || + F.getName() == "__local_stdio_scanf_options") + return false; + if (isa(F.getEntryBlock().getTerminator())) return false; - if (!canInstrumentWithSancov(F)) + // Don't instrument functions using SEH for now. Splitting basic blocks like + // we do for coverage breaks WinEHPrepare. + // FIXME: Remove this when SEH no longer uses landingpad pattern matching. + if (F.hasPersonalityFn() && + isAsynchronousEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) return false; if (Options.CoverageType >= SanitizerCoverageOptions::SCK_Edge) SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions().setIgnoreUnreachableDests()); @@ -690,6 +550,10 @@ bool SanitizerCoverage::instrumentFunction(Function &F, const DominatorTree *DT, SmallVector DivTraceTargets; SmallVector GepTraceTargets; + const DominatorTree *DT = + &getAnalysis(F).getDomTree(); + const PostDominatorTree *PDT = + &getAnalysis(F).getPostDomTree(); bool IsLeafFunc = true; for (auto &BB : F) { @@ -732,7 +596,7 @@ bool SanitizerCoverage::instrumentFunction(Function &F, const DominatorTree *DT, return true; } -GlobalVariable *SanitizerCoverage::CreateFunctionLocalArrayInSection( +GlobalVariable *SanitizerCoverageModule::CreateFunctionLocalArrayInSection( size_t NumElements, Function &F, Type *Ty, const char *Section) { ArrayType *ArrayTy = ArrayType::get(Ty, NumElements); auto Array = new GlobalVariable( @@ -755,8 +619,8 @@ GlobalVariable *SanitizerCoverage::CreateFunctionLocalArrayInSection( } GlobalVariable * -SanitizerCoverage::CreatePCArray(Function &F, - ArrayRef AllBlocks) { +SanitizerCoverageModule::CreatePCArray(Function &F, + ArrayRef AllBlocks) { size_t N = AllBlocks.size(); assert(N); SmallVector PCs; @@ -782,7 +646,7 @@ SanitizerCoverage::CreatePCArray(Function &F, return PCArray; } -void SanitizerCoverage::CreateFunctionLocalArrays( +void SanitizerCoverageModule::CreateFunctionLocalArrays( Function &F, ArrayRef AllBlocks) { if (Options.TracePCGuard) FunctionGuardArray = CreateFunctionLocalArrayInSection( @@ -796,9 +660,9 @@ void SanitizerCoverage::CreateFunctionLocalArrays( FunctionPCsArray = CreatePCArray(F, AllBlocks); } -bool SanitizerCoverage::InjectCoverage(Function &F, - ArrayRef AllBlocks, - bool IsLeafFunc) { +bool SanitizerCoverageModule::InjectCoverage(Function &F, + ArrayRef AllBlocks, + bool IsLeafFunc) { if (AllBlocks.empty()) return false; CreateFunctionLocalArrays(F, AllBlocks); for (size_t i = 0, N = AllBlocks.size(); i < N; i++) @@ -813,7 +677,7 @@ bool SanitizerCoverage::InjectCoverage(Function &F, // The cache is used to speed up recording the caller-callee pairs. // The address of the caller is passed implicitly via caller PC. // CacheSize is encoded in the name of the run-time function. -void SanitizerCoverage::InjectCoverageForIndirectCalls( +void SanitizerCoverageModule::InjectCoverageForIndirectCalls( Function &F, ArrayRef IndirCalls) { if (IndirCalls.empty()) return; @@ -832,7 +696,7 @@ void SanitizerCoverage::InjectCoverageForIndirectCalls( // __sanitizer_cov_trace_switch(CondValue, // {NumCases, ValueSizeInBits, Case0Value, Case1Value, Case2Value, ... }) -void SanitizerCoverage::InjectTraceForSwitch( +void SanitizerCoverageModule::InjectTraceForSwitch( Function &, ArrayRef SwitchTraceTargets) { for (auto I : SwitchTraceTargets) { if (SwitchInst *SI = dyn_cast(I)) { @@ -871,7 +735,7 @@ void SanitizerCoverage::InjectTraceForSwitch( } } -void SanitizerCoverage::InjectTraceForDiv( +void SanitizerCoverageModule::InjectTraceForDiv( Function &, ArrayRef DivTraceTargets) { for (auto BO : DivTraceTargets) { IRBuilder<> IRB(BO); @@ -889,7 +753,7 @@ void SanitizerCoverage::InjectTraceForDiv( } } -void SanitizerCoverage::InjectTraceForGep( +void SanitizerCoverageModule::InjectTraceForGep( Function &, ArrayRef GepTraceTargets) { for (auto GEP : GepTraceTargets) { IRBuilder<> IRB(GEP); @@ -900,7 +764,7 @@ void SanitizerCoverage::InjectTraceForGep( } } -void SanitizerCoverage::InjectTraceForCmp( +void SanitizerCoverageModule::InjectTraceForCmp( Function &, ArrayRef CmpTraceTargets) { for (auto I : CmpTraceTargets) { if (ICmpInst *ICMP = dyn_cast(I)) { @@ -935,8 +799,9 @@ void SanitizerCoverage::InjectTraceForCmp( } } -void SanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB, - size_t Idx, bool IsLeafFunc) { +void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, + size_t Idx, + bool IsLeafFunc) { BasicBlock::iterator IP = BB.getFirstInsertionPt(); bool IsEntryBB = &BB == &F.getEntryBlock(); DebugLoc EntryLoc; @@ -993,7 +858,7 @@ void SanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB, } std::string -SanitizerCoverage::getSectionName(const std::string &Section) const { +SanitizerCoverageModule::getSectionName(const std::string &Section) const { if (TargetTriple.isOSBinFormatCOFF()) { if (Section == SanCovCountersSectionName) return ".SCOV$CM"; @@ -1006,25 +871,33 @@ SanitizerCoverage::getSectionName(const std::string &Section) const { return "__" + Section; } -INITIALIZE_PASS(ModuleSanitizerCoverageLegacyPass, "module-sancov", - "Pass for inserting sancov top-level initialization calls", - false, false) +std::string +SanitizerCoverageModule::getSectionStart(const std::string &Section) const { + if (TargetTriple.isOSBinFormatMachO()) + return "\1section$start$__DATA$__" + Section; + return "__start___" + Section; +} + +std::string +SanitizerCoverageModule::getSectionEnd(const std::string &Section) const { + if (TargetTriple.isOSBinFormatMachO()) + return "\1section$end$__DATA$__" + Section; + return "__stop___" + Section; +} + -char SanitizerCoverageLegacyPass::ID = 0; -INITIALIZE_PASS_BEGIN(SanitizerCoverageLegacyPass, "sancov", - "Pass for instrumenting coverage on functions", false, - false) -INITIALIZE_PASS_DEPENDENCY(ModuleSanitizerCoverageLegacyPass) +char SanitizerCoverageModule::ID = 0; +INITIALIZE_PASS_BEGIN(SanitizerCoverageModule, "sancov", + "SanitizerCoverage: TODO." + "ModulePass", + false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) -INITIALIZE_PASS_END(SanitizerCoverageLegacyPass, "sancov", - "Pass for instrumenting coverage on functions", false, - false) -FunctionPass *llvm::createSanitizerCoverageLegacyPassPass( - const SanitizerCoverageOptions &Options) { - return new SanitizerCoverageLegacyPass(Options); -} -ModulePass *llvm::createModuleSanitizerCoverageLegacyPassPass( +INITIALIZE_PASS_END(SanitizerCoverageModule, "sancov", + "SanitizerCoverage: TODO." + "ModulePass", + false, false) +ModulePass *llvm::createSanitizerCoverageModulePass( const SanitizerCoverageOptions &Options) { - return new ModuleSanitizerCoverageLegacyPass(Options); + return new SanitizerCoverageModule(Options); } diff --git a/llvm/test/Instrumentation/SanitizerCoverage/abort-in-entry-block.ll b/llvm/test/Instrumentation/SanitizerCoverage/abort-in-entry-block.ll index 5711669240c6f..9bc8acef481d7 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/abort-in-entry-block.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/abort-in-entry-block.ll @@ -1,6 +1,5 @@ ; Checks that a function with no-return in the entry block is not instrumented. ; RUN: opt < %s -sancov -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard -S | FileCheck %s -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard -S | FileCheck %s ; CHECK-NOT: call void @__sanitizer_cov_trace_pc_guard target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Instrumentation/SanitizerCoverage/backedge-pruning.ll b/llvm/test/Instrumentation/SanitizerCoverage/backedge-pruning.ll index 5e9e579e17120..103198311279b 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/backedge-pruning.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/backedge-pruning.ll @@ -1,8 +1,6 @@ ; Test -sanitizer-coverage-trace-compares=1 and how it prunes backedge compares. ; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -sanitizer-coverage-trace-compares=1 -sanitizer-coverage-prune-blocks=1 -S | FileCheck %s --check-prefix=PRUNE ; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -sanitizer-coverage-trace-compares=1 -sanitizer-coverage-prune-blocks=0 -S | FileCheck %s --check-prefix=NOPRUNE -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=1 -sanitizer-coverage-trace-compares=1 -sanitizer-coverage-prune-blocks=1 -S | FileCheck %s --check-prefix=PRUNE -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=1 -sanitizer-coverage-trace-compares=1 -sanitizer-coverage-prune-blocks=0 -S | FileCheck %s --check-prefix=NOPRUNE target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/SanitizerCoverage/chains.ll b/llvm/test/Instrumentation/SanitizerCoverage/chains.ll index 7618267069ada..86b109165ee5c 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/chains.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/chains.ll @@ -1,5 +1,4 @@ ; RUN: opt < %s -sancov -sanitizer-coverage-level=4 -sanitizer-coverage-trace-pc -sanitizer-coverage-prune-blocks=1 -S | FileCheck %s -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=4 -sanitizer-coverage-trace-pc -sanitizer-coverage-prune-blocks=1 -S | FileCheck %s define i32 @blah(i32) #0 { %2 = icmp sgt i32 %0, 1 diff --git a/llvm/test/Instrumentation/SanitizerCoverage/cmp-tracing-api-x86_32.ll b/llvm/test/Instrumentation/SanitizerCoverage/cmp-tracing-api-x86_32.ll index 976bc77beb6f2..0f42756fdcb90 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/cmp-tracing-api-x86_32.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/cmp-tracing-api-x86_32.ll @@ -1,6 +1,5 @@ ; Test -sanitizer-coverage-trace-compares=1 API declarations on a non-x86_64 arch ; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -sanitizer-coverage-trace-compares=1 -S | FileCheck %s -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=1 -sanitizer-coverage-trace-compares=1 -S | FileCheck %s target triple = "i386-unknown-linux-gnu" define i32 @foo() #0 { @@ -8,16 +7,16 @@ entry: ret i32 0 } -; CHECK-DAG: declare void @__sanitizer_cov_trace_pc_indir(i64) -; CHECK-DAG: declare void @__sanitizer_cov_trace_cmp1(i8, i8) -; CHECK-DAG: declare void @__sanitizer_cov_trace_cmp2(i16, i16) -; CHECK-DAG: declare void @__sanitizer_cov_trace_cmp4(i32, i32) -; CHECK-DAG: declare void @__sanitizer_cov_trace_cmp8(i64, i64) -; CHECK-DAG: declare void @__sanitizer_cov_trace_div4(i32) -; CHECK-DAG: declare void @__sanitizer_cov_trace_div8(i64) -; CHECK-DAG: declare void @__sanitizer_cov_trace_gep(i64) -; CHECK-DAG: declare void @__sanitizer_cov_trace_switch(i64, i64*) -; CHECK-DAG: declare void @__sanitizer_cov_trace_pc() -; CHECK-DAG: declare void @__sanitizer_cov_trace_pc_guard(i32*) -; CHECK-DAG: declare void @__sanitizer_cov_trace_pc_guard_init(i32*, i32*) +; CHECK: declare void @__sanitizer_cov_trace_pc_indir(i64) +; CHECK: declare void @__sanitizer_cov_trace_cmp1(i8, i8) +; CHECK: declare void @__sanitizer_cov_trace_cmp2(i16, i16) +; CHECK: declare void @__sanitizer_cov_trace_cmp4(i32, i32) +; CHECK: declare void @__sanitizer_cov_trace_cmp8(i64, i64) +; CHECK: declare void @__sanitizer_cov_trace_div4(i32) +; CHECK: declare void @__sanitizer_cov_trace_div8(i64) +; CHECK: declare void @__sanitizer_cov_trace_gep(i64) +; CHECK: declare void @__sanitizer_cov_trace_switch(i64, i64*) +; CHECK: declare void @__sanitizer_cov_trace_pc() +; CHECK: declare void @__sanitizer_cov_trace_pc_guard(i32*) +; CHECK: declare void @__sanitizer_cov_trace_pc_guard_init(i32*, i32*) ; CHECK-NOT: declare diff --git a/llvm/test/Instrumentation/SanitizerCoverage/cmp-tracing-api-x86_64.ll b/llvm/test/Instrumentation/SanitizerCoverage/cmp-tracing-api-x86_64.ll index 165bf744432b5..16689f9831d8e 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/cmp-tracing-api-x86_64.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/cmp-tracing-api-x86_64.ll @@ -1,6 +1,5 @@ ; Test -sanitizer-coverage-trace-compares=1 API declarations on x86_64 ; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -sanitizer-coverage-trace-compares=1 -S | FileCheck %s -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=1 -sanitizer-coverage-trace-compares=1 -S | FileCheck %s target triple = "x86_64-unknown-linux-gnu" define i32 @foo() #0 { @@ -8,16 +7,16 @@ entry: ret i32 0 } -; CHECK-DAG: declare void @__sanitizer_cov_trace_pc_indir(i64) -; CHECK-DAG: declare void @__sanitizer_cov_trace_cmp1(i8 zeroext, i8 zeroext) -; CHECK-DAG: declare void @__sanitizer_cov_trace_cmp2(i16 zeroext, i16 zeroext) -; CHECK-DAG: declare void @__sanitizer_cov_trace_cmp4(i32 zeroext, i32 zeroext) -; CHECK-DAG: declare void @__sanitizer_cov_trace_cmp8(i64, i64) -; CHECK-DAG: declare void @__sanitizer_cov_trace_div4(i32 zeroext) -; CHECK-DAG: declare void @__sanitizer_cov_trace_div8(i64) -; CHECK-DAG: declare void @__sanitizer_cov_trace_gep(i64) -; CHECK-DAG: declare void @__sanitizer_cov_trace_switch(i64, i64*) -; CHECK-DAG: declare void @__sanitizer_cov_trace_pc() -; CHECK-DAG: declare void @__sanitizer_cov_trace_pc_guard(i32*) -; CHECK-DAG: declare void @__sanitizer_cov_trace_pc_guard_init(i32*, i32*) +; CHECK: declare void @__sanitizer_cov_trace_pc_indir(i64) +; CHECK: declare void @__sanitizer_cov_trace_cmp1(i8 zeroext, i8 zeroext) +; CHECK: declare void @__sanitizer_cov_trace_cmp2(i16 zeroext, i16 zeroext) +; CHECK: declare void @__sanitizer_cov_trace_cmp4(i32 zeroext, i32 zeroext) +; CHECK: declare void @__sanitizer_cov_trace_cmp8(i64, i64) +; CHECK: declare void @__sanitizer_cov_trace_div4(i32 zeroext) +; CHECK: declare void @__sanitizer_cov_trace_div8(i64) +; CHECK: declare void @__sanitizer_cov_trace_gep(i64) +; CHECK: declare void @__sanitizer_cov_trace_switch(i64, i64*) +; CHECK: declare void @__sanitizer_cov_trace_pc() +; CHECK: declare void @__sanitizer_cov_trace_pc_guard(i32*) +; CHECK: declare void @__sanitizer_cov_trace_pc_guard_init(i32*, i32*) ; CHECK-NOT: declare diff --git a/llvm/test/Instrumentation/SanitizerCoverage/cmp-tracing.ll b/llvm/test/Instrumentation/SanitizerCoverage/cmp-tracing.ll index c599806454d82..fda6f251bc847 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/cmp-tracing.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/cmp-tracing.ll @@ -1,6 +1,5 @@ ; Test -sanitizer-coverage-trace-compares=1 ; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -sanitizer-coverage-trace-compares=1 -S | FileCheck %s -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=1 -sanitizer-coverage-trace-compares=1 -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/SanitizerCoverage/coff-comdat.ll b/llvm/test/Instrumentation/SanitizerCoverage/coff-comdat.ll index d6019cb50a4f5..61a9dcd92de21 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/coff-comdat.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/coff-comdat.ll @@ -1,5 +1,4 @@ ; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -sanitizer-coverage-inline-8bit-counters=1 -sanitizer-coverage-pc-table=1 -S | FileCheck %s -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=1 -sanitizer-coverage-inline-8bit-counters=1 -sanitizer-coverage-pc-table=1 -S | FileCheck %s ; Make sure we use the right comdat groups for COFF to avoid relocations ; against discarded sections. Internal linkage functions are also different from diff --git a/llvm/test/Instrumentation/SanitizerCoverage/coff-pc-table-inline-8bit-counters.ll b/llvm/test/Instrumentation/SanitizerCoverage/coff-pc-table-inline-8bit-counters.ll index 31a2dd39c6ddc..d81d480009be4 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/coff-pc-table-inline-8bit-counters.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/coff-pc-table-inline-8bit-counters.ll @@ -1,6 +1,5 @@ ; Checks that the PC and 8-bit Counter Arrays are placed in their own sections in COFF binaries. ; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -sanitizer-coverage-inline-8bit-counters=1 -sanitizer-coverage-pc-table=1 -S | FileCheck %s -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=1 -sanitizer-coverage-inline-8bit-counters=1 -sanitizer-coverage-pc-table=1 -S | FileCheck %s target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc-windows-msvc19.14.26433" diff --git a/llvm/test/Instrumentation/SanitizerCoverage/coff-used-ctor.ll b/llvm/test/Instrumentation/SanitizerCoverage/coff-used-ctor.ll index f412d2f237d21..fd12eed8e3669 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/coff-used-ctor.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/coff-used-ctor.ll @@ -1,6 +1,5 @@ ; Checks that sancov.module_ctor is marked used. ; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -sanitizer-coverage-inline-8bit-counters=1 -sanitizer-coverage-pc-table=1 -S | FileCheck %s -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=1 -sanitizer-coverage-inline-8bit-counters=1 -sanitizer-coverage-pc-table=1 -S | FileCheck %s target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc-windows-msvc19.14.26433" @@ -9,4 +8,4 @@ entry: ret void } -; CHECK: @llvm.used = appending global {{.*}} @sancov.module_ctor +; CHECK: @llvm.used = appending global {{.*}} @sancov.module_ctor \ No newline at end of file diff --git a/llvm/test/Instrumentation/SanitizerCoverage/const-cmp-tracing.ll b/llvm/test/Instrumentation/SanitizerCoverage/const-cmp-tracing.ll index 0ee1a339ee521..b61b4eef5df1d 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/const-cmp-tracing.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/const-cmp-tracing.ll @@ -1,6 +1,5 @@ ; Test -sanitizer-coverage-trace-compares=1 ; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -sanitizer-coverage-trace-compares=1 -S | FileCheck %s -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=1 -sanitizer-coverage-trace-compares=1 -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/SanitizerCoverage/coverage-dbg.ll b/llvm/test/Instrumentation/SanitizerCoverage/coverage-dbg.ll index 03be088bfd82d..09e23372533f6 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/coverage-dbg.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/coverage-dbg.ll @@ -1,7 +1,6 @@ ; Test that coverage instrumentation does not lose debug location. ; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -S | FileCheck %s -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=1 -S | FileCheck %s ; C++ source: ; 1: struct A { diff --git a/llvm/test/Instrumentation/SanitizerCoverage/coverage.ll b/llvm/test/Instrumentation/SanitizerCoverage/coverage.ll index 1e8c69827539b..7b6b5f00442fe 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/coverage.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/coverage.ll @@ -1,8 +1,6 @@ ; RUN: opt < %s -sancov -sanitizer-coverage-level=4 -sanitizer-coverage-trace-pc -S | FileCheck %s --check-prefix=CHECK_TRACE_PC -; RUN: opt < %s -sancov -sanitizer-coverage-level=3 -sanitizer-coverage-prune-blocks=1 -S | FileCheck %s --check-prefix=CHECKPRUNE -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=4 -sanitizer-coverage-trace-pc -S | FileCheck %s --check-prefix=CHECK_TRACE_PC -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=3 -sanitizer-coverage-prune-blocks=1 -S | FileCheck %s --check-prefix=CHECKPRUNE +; RUN: opt < %s -sancov -sanitizer-coverage-level=3 -sanitizer-coverage-prune-blocks=1 -S | FileCheck %s --check-prefix=CHECKPRUNE target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/SanitizerCoverage/coverage2-dbg.ll b/llvm/test/Instrumentation/SanitizerCoverage/coverage2-dbg.ll index 428a7d735c5ec..508657a597645 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/coverage2-dbg.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/coverage2-dbg.ll @@ -1,7 +1,6 @@ ; Test that coverage instrumentation does not lose debug location. ; RUN: opt < %s -sancov -sanitizer-coverage-level=2 -S | FileCheck %s -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=2 -S | FileCheck %s ; C++ source: ; 1: void foo(int *a) { diff --git a/llvm/test/Instrumentation/SanitizerCoverage/div-tracing.ll b/llvm/test/Instrumentation/SanitizerCoverage/div-tracing.ll index e52366707b7f5..0de2ddf68e759 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/div-tracing.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/div-tracing.ll @@ -1,6 +1,5 @@ ; Test -sanitizer-coverage-trace-divs=1 ; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -sanitizer-coverage-trace-divs=1 -S | FileCheck %s -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=1 -sanitizer-coverage-trace-divs=1 -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/SanitizerCoverage/gep-tracing.ll b/llvm/test/Instrumentation/SanitizerCoverage/gep-tracing.ll index 924c2fe3eb3a5..ac6af4b37202e 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/gep-tracing.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/gep-tracing.ll @@ -1,6 +1,5 @@ ; Test -sanitizer-coverage-trace-geps=1 ; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -sanitizer-coverage-trace-geps=1 -S | FileCheck %s -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=1 -sanitizer-coverage-trace-geps=1 -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/SanitizerCoverage/inline-8bit-counters.ll b/llvm/test/Instrumentation/SanitizerCoverage/inline-8bit-counters.ll index d4e30aab54eab..88141678b6cd4 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/inline-8bit-counters.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/inline-8bit-counters.ll @@ -1,6 +1,5 @@ ; Test -sanitizer-coverage-inline-8bit-counters=1 ; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -sanitizer-coverage-inline-8bit-counters=1 -S | FileCheck %s -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=1 -sanitizer-coverage-inline-8bit-counters=1 -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/SanitizerCoverage/interposable-symbol-nocomdat.ll b/llvm/test/Instrumentation/SanitizerCoverage/interposable-symbol-nocomdat.ll index e73f1516fa103..c79a2fb5fff0d 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/interposable-symbol-nocomdat.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/interposable-symbol-nocomdat.ll @@ -1,8 +1,6 @@ ; Test that interposable symbols do not get put in comdats. ; RUN: opt < %s -sancov -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard -mtriple x86_64-linux-gnu -S | FileCheck %s ; RUN: opt < %s -sancov -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard -mtriple x86_64-windows-msvc -S | FileCheck %s -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard -mtriple x86_64-linux-gnu -S | FileCheck %s -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard -mtriple x86_64-windows-msvc -S | FileCheck %s define void @Vanilla() { entry: diff --git a/llvm/test/Instrumentation/SanitizerCoverage/no-func.ll b/llvm/test/Instrumentation/SanitizerCoverage/no-func.ll index 683238c33c81e..ec9e121439176 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/no-func.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/no-func.ll @@ -1,7 +1,6 @@ ; Tests that we don't insert __sanitizer_cov_trace_pc_guard_init or some such ; when there is no instrumentation. ; RUN: opt < %s -sancov -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard -S | FileCheck %s -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/SanitizerCoverage/pc-table.ll b/llvm/test/Instrumentation/SanitizerCoverage/pc-table.ll index f8e2a3015ba70..888277a4c5099 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/pc-table.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/pc-table.ll @@ -1,8 +1,6 @@ ; Test -sanitizer-coverage-pc-table=1 ; RUN: opt < %s -sancov -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard -sanitizer-coverage-pc-table=1 -S | FileCheck %s ; RUN: opt < %s -sancov -sanitizer-coverage-level=3 -sanitizer-coverage-inline-8bit-counters -sanitizer-coverage-pc-table=1 -S | FileCheck %s -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard -sanitizer-coverage-pc-table=1 -S | FileCheck %s -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=3 -sanitizer-coverage-inline-8bit-counters -sanitizer-coverage-pc-table=1 -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/SanitizerCoverage/postdominator_check.ll b/llvm/test/Instrumentation/SanitizerCoverage/postdominator_check.ll index ebcf3b276c9fb..c50d663eff825 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/postdominator_check.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/postdominator_check.ll @@ -1,7 +1,5 @@ ; RUN: opt < %s -sancov -sanitizer-coverage-level=4 -sanitizer-coverage-trace-pc -sanitizer-coverage-prune-blocks=1 -S | FileCheck %s ; RUN: opt < %s -sancov -sanitizer-coverage-level=4 -sanitizer-coverage-trace-pc -sanitizer-coverage-prune-blocks=0 -S | FileCheck %s --check-prefix=CHECK_NO_PRUNE -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=4 -sanitizer-coverage-trace-pc -sanitizer-coverage-prune-blocks=1 -S | FileCheck %s -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=4 -sanitizer-coverage-trace-pc -sanitizer-coverage-prune-blocks=0 -S | FileCheck %s --check-prefix=CHECK_NO_PRUNE define i32 @foo(i32) #0 { %2 = icmp sgt i32 %0, 0 diff --git a/llvm/test/Instrumentation/SanitizerCoverage/seh.ll b/llvm/test/Instrumentation/SanitizerCoverage/seh.ll index b45f1e02ddd70..94d1a2e9acdfb 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/seh.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/seh.ll @@ -1,9 +1,6 @@ ; RUN: opt < %s -sancov -sanitizer-coverage-level=0 -S | FileCheck %s ; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -S | FileCheck %s ; RUN: opt < %s -sancov -sanitizer-coverage-level=2 -S | FileCheck %s -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=0 -S | FileCheck %s -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=1 -S | FileCheck %s -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=2 -S | FileCheck %s target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" target triple = "i686-pc-windows-msvc18.0.0" diff --git a/llvm/test/Instrumentation/SanitizerCoverage/stack-depth-variable-declared-by-user.ll b/llvm/test/Instrumentation/SanitizerCoverage/stack-depth-variable-declared-by-user.ll index ff14bff1b7f9c..1ad96f82a694a 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/stack-depth-variable-declared-by-user.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/stack-depth-variable-declared-by-user.ll @@ -2,8 +2,6 @@ ; user declares `__sancov_lowest_stack` with an unexpected type. ; RUN: not opt < %s -sancov -sanitizer-coverage-level=1 \ ; RUN: -sanitizer-coverage-stack-depth -S 2>&1 | FileCheck %s -; RUN: not opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=1 \ -; RUN: -sanitizer-coverage-stack-depth -S 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/SanitizerCoverage/stack-depth.ll b/llvm/test/Instrumentation/SanitizerCoverage/stack-depth.ll index 9deb2f04d789c..0c6db1a922ada 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/stack-depth.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/stack-depth.ll @@ -4,11 +4,6 @@ ; RUN: opt < %s -sancov -sanitizer-coverage-level=3 \ ; RUN: -sanitizer-coverage-stack-depth -sanitizer-coverage-trace-pc-guard \ ; RUN: -S | FileCheck %s -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=1 \ -; RUN: -sanitizer-coverage-stack-depth -S | FileCheck %s -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=3 \ -; RUN: -sanitizer-coverage-stack-depth -sanitizer-coverage-trace-pc-guard \ -; RUN: -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/SanitizerCoverage/switch-tracing.ll b/llvm/test/Instrumentation/SanitizerCoverage/switch-tracing.ll index b109d80ff68d0..debb825db1bce 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/switch-tracing.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/switch-tracing.ll @@ -1,6 +1,5 @@ ; Test -sanitizer-coverage-trace-compares=1 (instrumenting a switch) ; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -sanitizer-coverage-trace-compares=1 -S | FileCheck %s -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=1 -sanitizer-coverage-trace-compares=1 -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/SanitizerCoverage/trace-pc-guard-comdat.ll b/llvm/test/Instrumentation/SanitizerCoverage/trace-pc-guard-comdat.ll index e6633e1c3ff6a..970ee0d3ac268 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/trace-pc-guard-comdat.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/trace-pc-guard-comdat.ll @@ -1,5 +1,4 @@ ; RUN: opt < %s -sancov -sanitizer-coverage-level=4 -sanitizer-coverage-trace-pc-guard -S | FileCheck %s --check-prefix=CHECK_TRACE_PC_GUARD -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=4 -sanitizer-coverage-trace-pc-guard -S | FileCheck %s --check-prefix=CHECK_TRACE_PC_GUARD target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/SanitizerCoverage/trace-pc-guard-inline-8bit-counters.ll b/llvm/test/Instrumentation/SanitizerCoverage/trace-pc-guard-inline-8bit-counters.ll index 06cd192a8296e..d5c9ff451ab6f 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/trace-pc-guard-inline-8bit-counters.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/trace-pc-guard-inline-8bit-counters.ll @@ -1,5 +1,4 @@ ; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -sanitizer-coverage-trace-pc-guard -sanitizer-coverage-inline-8bit-counters -S | FileCheck %s -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=1 -sanitizer-coverage-trace-pc-guard -sanitizer-coverage-inline-8bit-counters -S | FileCheck %s ; Module ctors should have stable names across modules, not something like ; @sancov.module_ctor.3 that may cause duplicate ctors after linked together. diff --git a/llvm/test/Instrumentation/SanitizerCoverage/trace-pc-guard-nocomdat.ll b/llvm/test/Instrumentation/SanitizerCoverage/trace-pc-guard-nocomdat.ll index 006c662f1440f..1fe1886975e94 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/trace-pc-guard-nocomdat.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/trace-pc-guard-nocomdat.ll @@ -1,5 +1,4 @@ ; RUN: opt < %s -sancov -sanitizer-coverage-level=4 -sanitizer-coverage-trace-pc-guard -S | FileCheck %s --check-prefix=CHECK_TRACE_PC_GUARD -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=4 -sanitizer-coverage-trace-pc-guard -S | FileCheck %s --check-prefix=CHECK_TRACE_PC_GUARD target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin10.0.0" diff --git a/llvm/test/Instrumentation/SanitizerCoverage/tracing-comdat.ll b/llvm/test/Instrumentation/SanitizerCoverage/tracing-comdat.ll index 012a19ba17f1d..baf4dc1e140e1 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/tracing-comdat.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/tracing-comdat.ll @@ -2,9 +2,6 @@ ; RUN: opt < %s -sancov -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard -S | FileCheck %s ; Make sure asan does not instrument __sancov_gen_ ; RUN: opt < %s -sancov -asan -asan-module -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard -S | FileCheck %s - -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard -S | FileCheck %s -; RUN: opt < %s -passes='module(require,sancov-module,asan-module),function(sancov-func,asan)' -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" $Foo = comdat any diff --git a/llvm/test/Instrumentation/SanitizerCoverage/tracing.ll b/llvm/test/Instrumentation/SanitizerCoverage/tracing.ll index c27fd0eac0bad..7bf8cf7e18e6e 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/tracing.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/tracing.ll @@ -3,10 +3,6 @@ ; RUN: opt < %s -sancov -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard -S | FileCheck %s --check-prefix=CHECK_PC_GUARD ; RUN: opt < %s -sancov -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard -S -mtriple=x86_64-apple-macosx | FileCheck %s --check-prefix=CHECK_PC_GUARD_DARWIN -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc -S | FileCheck %s --check-prefix=CHECK_PC -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard -S | FileCheck %s --check-prefix=CHECK_PC_GUARD -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard -S -mtriple=x86_64-apple-macosx | FileCheck %s --check-prefix=CHECK_PC_GUARD_DARWIN - target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" define void @foo(i32* %a) sanitize_address { diff --git a/llvm/test/Instrumentation/SanitizerCoverage/unreachable-critedge.ll b/llvm/test/Instrumentation/SanitizerCoverage/unreachable-critedge.ll index e3e31086ccb82..ad6cd574d7e0e 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/unreachable-critedge.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/unreachable-critedge.ll @@ -1,5 +1,4 @@ ; RUN: opt < %s -S -sancov -sanitizer-coverage-level=3 | FileCheck %s -; RUN: opt < %s -S -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=3 | FileCheck %s ; The critical edges to unreachable_bb should not be split. define i32 @foo(i32 %c, i32 %d) { diff --git a/llvm/test/Instrumentation/SanitizerCoverage/wineh.ll b/llvm/test/Instrumentation/SanitizerCoverage/wineh.ll index 350242bb3016d..87b44be5544f3 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/wineh.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/wineh.ll @@ -1,5 +1,4 @@ ; RUN: opt < %s -sancov -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc -S | FileCheck %s --check-prefix=CHECK -; RUN: opt < %s -passes='module(sancov-module),function(sancov-func)' -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc -S | FileCheck %s --check-prefix=CHECK ; Generated from this C++ source: ; $ clang -O2 t.cpp -S -emit-llvm From 93dfb93ad68cf2729701d0c3ee66af44076e9f17 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Mon, 15 Jul 2019 23:36:02 +0000 Subject: [PATCH 186/451] Temporarily Revert "[SLP] Recommit: Look-ahead operand reordering heuristic." As there are some reported miscompiles with AVX512 and performance regressions in Eigen. Verified with the original committer and testcases will be forthcoming. This reverts commit r364964. llvm-svn: 366154 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 294 +++--------------- .../Transforms/SLPVectorizer/X86/lookahead.ll | 208 ++----------- 2 files changed, 79 insertions(+), 423 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 8cd948ee55190..27a86c0bca914 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -147,20 +147,6 @@ static cl::opt MinTreeSize( "slp-min-tree-size", cl::init(3), cl::Hidden, cl::desc("Only vectorize small trees if they are fully vectorizable")); -// The maximum depth that the look-ahead score heuristic will explore. -// The higher this value, the higher the compilation time overhead. -static cl::opt LookAheadMaxDepth( - "slp-max-look-ahead-depth", cl::init(2), cl::Hidden, - cl::desc("The maximum look-ahead depth for operand reordering scores")); - -// The Look-ahead heuristic goes through the users of the bundle to calculate -// the users cost in getExternalUsesCost(). To avoid compilation time increase -// we limit the number of users visited to this value. -static cl::opt LookAheadUsersBudget( - "slp-look-ahead-users-budget", cl::init(2), cl::Hidden, - cl::desc("The maximum number of users to visit while visiting the " - "predecessors. This prevents compilation time increase.")); - static cl::opt ViewSLPTree("view-slp-tree", cl::Hidden, cl::desc("Display the SLP trees with Graphviz")); @@ -722,7 +708,6 @@ class BoUpSLP { const DataLayout &DL; ScalarEvolution &SE; - const BoUpSLP &R; /// \returns the operand data at \p OpIdx and \p Lane. OperandData &getData(unsigned OpIdx, unsigned Lane) { @@ -748,215 +733,6 @@ class BoUpSLP { std::swap(OpsVec[OpIdx1][Lane], OpsVec[OpIdx2][Lane]); } - // The hard-coded scores listed here are not very important. When computing - // the scores of matching one sub-tree with another, we are basically - // counting the number of values that are matching. So even if all scores - // are set to 1, we would still get a decent matching result. - // However, sometimes we have to break ties. For example we may have to - // choose between matching loads vs matching opcodes. This is what these - // scores are helping us with: they provide the order of preference. - - /// Loads from consecutive memory addresses, e.g. load(A[i]), load(A[i+1]). - static const int ScoreConsecutiveLoads = 3; - /// Constants. - static const int ScoreConstants = 2; - /// Instructions with the same opcode. - static const int ScoreSameOpcode = 2; - /// Instructions with alt opcodes (e.g, add + sub). - static const int ScoreAltOpcodes = 1; - /// Identical instructions (a.k.a. splat or broadcast). - static const int ScoreSplat = 1; - /// Matching with an undef is preferable to failing. - static const int ScoreUndef = 1; - /// Score for failing to find a decent match. - static const int ScoreFail = 0; - /// User exteranl to the vectorized code. - static const int ExternalUseCost = 1; - /// The user is internal but in a different lane. - static const int UserInDiffLaneCost = ExternalUseCost; - - /// \returns the score of placing \p V1 and \p V2 in consecutive lanes. - static int getShallowScore(Value *V1, Value *V2, const DataLayout &DL, - ScalarEvolution &SE) { - auto *LI1 = dyn_cast(V1); - auto *LI2 = dyn_cast(V2); - if (LI1 && LI2) - return isConsecutiveAccess(LI1, LI2, DL, SE) - ? VLOperands::ScoreConsecutiveLoads - : VLOperands::ScoreFail; - - auto *C1 = dyn_cast(V1); - auto *C2 = dyn_cast(V2); - if (C1 && C2) - return VLOperands::ScoreConstants; - - auto *I1 = dyn_cast(V1); - auto *I2 = dyn_cast(V2); - if (I1 && I2) { - if (I1 == I2) - return VLOperands::ScoreSplat; - InstructionsState S = getSameOpcode({I1, I2}); - // Note: Only consider instructions with <= 2 operands to avoid - // complexity explosion. - if (S.getOpcode() && S.MainOp->getNumOperands() <= 2) - return S.isAltShuffle() ? VLOperands::ScoreAltOpcodes - : VLOperands::ScoreSameOpcode; - } - - if (isa(V2)) - return VLOperands::ScoreUndef; - - return VLOperands::ScoreFail; - } - - /// Holds the values and their lane that are taking part in the look-ahead - /// score calculation. This is used in the external uses cost calculation. - SmallDenseMap InLookAheadValues; - - /// \Returns the additinal cost due to uses of \p LHS and \p RHS that are - /// either external to the vectorized code, or require shuffling. - int getExternalUsesCost(const std::pair &LHS, - const std::pair &RHS) { - int Cost = 0; - SmallVector, 2> Values = {LHS, RHS}; - for (int Idx = 0, IdxE = Values.size(); Idx != IdxE; ++Idx) { - Value *V = Values[Idx].first; - // Calculate the absolute lane, using the minimum relative lane of LHS - // and RHS as base and Idx as the offset. - int Ln = std::min(LHS.second, RHS.second) + Idx; - assert(Ln >= 0 && "Bad lane calculation"); - unsigned UsersBudget = LookAheadUsersBudget; - for (User *U : V->users()) { - if (const TreeEntry *UserTE = R.getTreeEntry(U)) { - // The user is in the VectorizableTree. Check if we need to insert. - auto It = llvm::find(UserTE->Scalars, U); - assert(It != UserTE->Scalars.end() && "U is in UserTE"); - int UserLn = std::distance(UserTE->Scalars.begin(), It); - assert(UserLn >= 0 && "Bad lane"); - if (UserLn != Ln) - Cost += UserInDiffLaneCost; - } else { - // Check if the user is in the look-ahead code. - auto It2 = InLookAheadValues.find(U); - if (It2 != InLookAheadValues.end()) { - // The user is in the look-ahead code. Check the lane. - if (It2->second != Ln) - Cost += UserInDiffLaneCost; - } else { - // The user is neither in SLP tree nor in the look-ahead code. - Cost += ExternalUseCost; - } - } - // Limit the number of visited uses to cap compilation time. - if (--UsersBudget == 0) - break; - } - } - return Cost; - } - - /// Go through the operands of \p LHS and \p RHS recursively until \p - /// MaxLevel, and return the cummulative score. For example: - /// \verbatim - /// A[0] B[0] A[1] B[1] C[0] D[0] B[1] A[1] - /// \ / \ / \ / \ / - /// + + + + - /// G1 G2 G3 G4 - /// \endverbatim - /// The getScoreAtLevelRec(G1, G2) function will try to match the nodes at - /// each level recursively, accumulating the score. It starts from matching - /// the additions at level 0, then moves on to the loads (level 1). The - /// score of G1 and G2 is higher than G1 and G3, because {A[0],A[1]} and - /// {B[0],B[1]} match with VLOperands::ScoreConsecutiveLoads, while - /// {A[0],C[0]} has a score of VLOperands::ScoreFail. - /// Please note that the order of the operands does not matter, as we - /// evaluate the score of all profitable combinations of operands. In - /// other words the score of G1 and G4 is the same as G1 and G2. This - /// heuristic is based on ideas described in: - /// Look-ahead SLP: Auto-vectorization in the presence of commutative - /// operations, CGO 2018 by Vasileios Porpodas, Rodrigo C. O. Rocha, - /// Luís F. W. Góes - int getScoreAtLevelRec(const std::pair &LHS, - const std::pair &RHS, int CurrLevel, - int MaxLevel) { - - Value *V1 = LHS.first; - Value *V2 = RHS.first; - // Get the shallow score of V1 and V2. - int ShallowScoreAtThisLevel = - std::max((int)ScoreFail, getShallowScore(V1, V2, DL, SE) - - getExternalUsesCost(LHS, RHS)); - int Lane1 = LHS.second; - int Lane2 = RHS.second; - - // If reached MaxLevel, - // or if V1 and V2 are not instructions, - // or if they are SPLAT, - // or if they are not consecutive, early return the current cost. - auto *I1 = dyn_cast(V1); - auto *I2 = dyn_cast(V2); - if (CurrLevel == MaxLevel || !(I1 && I2) || I1 == I2 || - ShallowScoreAtThisLevel == VLOperands::ScoreFail || - (isa(I1) && isa(I2) && ShallowScoreAtThisLevel)) - return ShallowScoreAtThisLevel; - assert(I1 && I2 && "Should have early exited."); - - // Keep track of in-tree values for determining the external-use cost. - InLookAheadValues[V1] = Lane1; - InLookAheadValues[V2] = Lane2; - - // Contains the I2 operand indexes that got matched with I1 operands. - SmallSet Op2Used; - - // Recursion towards the operands of I1 and I2. We are trying all possbile - // operand pairs, and keeping track of the best score. - for (unsigned OpIdx1 = 0, NumOperands1 = I1->getNumOperands(); - OpIdx1 != NumOperands1; ++OpIdx1) { - // Try to pair op1I with the best operand of I2. - int MaxTmpScore = 0; - unsigned MaxOpIdx2 = 0; - bool FoundBest = false; - // If I2 is commutative try all combinations. - unsigned FromIdx = isCommutative(I2) ? 0 : OpIdx1; - unsigned ToIdx = isCommutative(I2) - ? I2->getNumOperands() - : std::min(I2->getNumOperands(), OpIdx1 + 1); - assert(FromIdx <= ToIdx && "Bad index"); - for (unsigned OpIdx2 = FromIdx; OpIdx2 != ToIdx; ++OpIdx2) { - // Skip operands already paired with OpIdx1. - if (Op2Used.count(OpIdx2)) - continue; - // Recursively calculate the cost at each level - int TmpScore = getScoreAtLevelRec({I1->getOperand(OpIdx1), Lane1}, - {I2->getOperand(OpIdx2), Lane2}, - CurrLevel + 1, MaxLevel); - // Look for the best score. - if (TmpScore > VLOperands::ScoreFail && TmpScore > MaxTmpScore) { - MaxTmpScore = TmpScore; - MaxOpIdx2 = OpIdx2; - FoundBest = true; - } - } - if (FoundBest) { - // Pair {OpIdx1, MaxOpIdx2} was found to be best. Never revisit it. - Op2Used.insert(MaxOpIdx2); - ShallowScoreAtThisLevel += MaxTmpScore; - } - } - return ShallowScoreAtThisLevel; - } - - /// \Returns the look-ahead score, which tells us how much the sub-trees - /// rooted at \p LHS and \p RHS match, the more they match the higher the - /// score. This helps break ties in an informed way when we cannot decide on - /// the order of the operands by just considering the immediate - /// predecessors. - int getLookAheadScore(const std::pair &LHS, - const std::pair &RHS) { - InLookAheadValues.clear(); - return getScoreAtLevelRec(LHS, RHS, 1, LookAheadMaxDepth); - } - // Search all operands in Ops[*][Lane] for the one that matches best // Ops[OpIdx][LastLane] and return its opreand index. // If no good match can be found, return None. @@ -974,6 +750,9 @@ class BoUpSLP { // The linearized opcode of the operand at OpIdx, Lane. bool OpIdxAPO = getData(OpIdx, Lane).APO; + const unsigned BestScore = 2; + const unsigned GoodScore = 1; + // The best operand index and its score. // Sometimes we have more than one option (e.g., Opcode and Undefs), so we // are using the score to differentiate between the two. @@ -1002,19 +781,41 @@ class BoUpSLP { // Look for an operand that matches the current mode. switch (RMode) { case ReorderingMode::Load: + if (isa(Op)) { + // Figure out which is left and right, so that we can check for + // consecutive loads + bool LeftToRight = Lane > LastLane; + Value *OpLeft = (LeftToRight) ? OpLastLane : Op; + Value *OpRight = (LeftToRight) ? Op : OpLastLane; + if (isConsecutiveAccess(cast(OpLeft), + cast(OpRight), DL, SE)) + BestOp.Idx = Idx; + } + break; + case ReorderingMode::Opcode: + // We accept both Instructions and Undefs, but with different scores. + if ((isa(Op) && isa(OpLastLane) && + cast(Op)->getOpcode() == + cast(OpLastLane)->getOpcode()) || + (isa(OpLastLane) && isa(Op)) || + isa(Op)) { + // An instruction has a higher score than an undef. + unsigned Score = (isa(Op)) ? GoodScore : BestScore; + if (Score > BestOp.Score) { + BestOp.Idx = Idx; + BestOp.Score = Score; + } + } + break; case ReorderingMode::Constant: - case ReorderingMode::Opcode: { - bool LeftToRight = Lane > LastLane; - Value *OpLeft = (LeftToRight) ? OpLastLane : Op; - Value *OpRight = (LeftToRight) ? Op : OpLastLane; - unsigned Score = - getLookAheadScore({OpLeft, LastLane}, {OpRight, Lane}); - if (Score > BestOp.Score) { - BestOp.Idx = Idx; - BestOp.Score = Score; + if (isa(Op)) { + unsigned Score = (isa(Op)) ? GoodScore : BestScore; + if (Score > BestOp.Score) { + BestOp.Idx = Idx; + BestOp.Score = Score; + } } break; - } case ReorderingMode::Splat: if (Op == OpLastLane) BestOp.Idx = Idx; @@ -1145,8 +946,8 @@ class BoUpSLP { public: /// Initialize with all the operands of the instruction vector \p RootVL. VLOperands(ArrayRef RootVL, const DataLayout &DL, - ScalarEvolution &SE, const BoUpSLP &R) - : DL(DL), SE(SE), R(R) { + ScalarEvolution &SE) + : DL(DL), SE(SE) { // Append all the operands of RootVL. appendOperandsOfVL(RootVL); } @@ -1368,8 +1169,7 @@ class BoUpSLP { SmallVectorImpl &Left, SmallVectorImpl &Right, const DataLayout &DL, - ScalarEvolution &SE, - const BoUpSLP &R); + ScalarEvolution &SE); struct TreeEntry { using VecTreeTy = SmallVector, 8>; TreeEntry(VecTreeTy &Container) : Container(Container) {} @@ -2571,7 +2371,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // Commutative predicate - collect + sort operands of the instructions // so that each side is more likely to have the same opcode. assert(P0 == SwapP0 && "Commutative Predicate mismatch"); - reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this); + reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE); } else { // Collect operands - commute if it uses the swapped predicate. for (Value *V : VL) { @@ -2616,7 +2416,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // have the same opcode. if (isa(VL0) && VL0->isCommutative()) { ValueList Left, Right; - reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this); + reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE); buildTree_rec(Left, Depth + 1, {TE, 0}); buildTree_rec(Right, Depth + 1, {TE, 1}); return; @@ -2785,7 +2585,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // Reorder operands if reordering would enable vectorization. if (isa(VL0)) { ValueList Left, Right; - reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this); + reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE); buildTree_rec(Left, Depth + 1, {TE, 0}); buildTree_rec(Right, Depth + 1, {TE, 1}); return; @@ -3506,15 +3306,13 @@ int BoUpSLP::getGatherCost(ArrayRef VL) const { // Perform operand reordering on the instructions in VL and return the reordered // operands in Left and Right. -void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef VL, - SmallVectorImpl &Left, - SmallVectorImpl &Right, - const DataLayout &DL, - ScalarEvolution &SE, - const BoUpSLP &R) { +void BoUpSLP::reorderInputsAccordingToOpcode( + ArrayRef VL, SmallVectorImpl &Left, + SmallVectorImpl &Right, const DataLayout &DL, + ScalarEvolution &SE) { if (VL.empty()) return; - VLOperands Ops(VL, DL, SE, R); + VLOperands Ops(VL, DL, SE); // Reorder the operands in place. Ops.reorder(); Left = Ops.getVL(0); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll index f9cbf2ff01c1c..f89cae88a5fbc 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll @@ -27,19 +27,22 @@ define void @lookahead_basic(double* %array) { ; CHECK-NEXT: [[IDX5:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 5 ; CHECK-NEXT: [[IDX6:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 6 ; CHECK-NEXT: [[IDX7:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 7 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[IDX0]] to <2 x double>* -; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[IDX2]] to <2 x double>* -; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[IDX4]] to <2 x double>* -; CHECK-NEXT: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 8 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[IDX6]] to <2 x double>* -; CHECK-NEXT: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[TMP6]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = fsub fast <2 x double> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP9:%.*]] = fsub fast <2 x double> [[TMP5]], [[TMP7]] -; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[TMP8]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = bitcast double* [[IDX0]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP10]], <2 x double>* [[TMP11]], align 8 +; CHECK-NEXT: [[A_0:%.*]] = load double, double* [[IDX0]], align 8 +; CHECK-NEXT: [[A_1:%.*]] = load double, double* [[IDX1]], align 8 +; CHECK-NEXT: [[B_0:%.*]] = load double, double* [[IDX2]], align 8 +; CHECK-NEXT: [[B_1:%.*]] = load double, double* [[IDX3]], align 8 +; CHECK-NEXT: [[C_0:%.*]] = load double, double* [[IDX4]], align 8 +; CHECK-NEXT: [[C_1:%.*]] = load double, double* [[IDX5]], align 8 +; CHECK-NEXT: [[D_0:%.*]] = load double, double* [[IDX6]], align 8 +; CHECK-NEXT: [[D_1:%.*]] = load double, double* [[IDX7]], align 8 +; CHECK-NEXT: [[SUBAB_0:%.*]] = fsub fast double [[A_0]], [[B_0]] +; CHECK-NEXT: [[SUBCD_0:%.*]] = fsub fast double [[C_0]], [[D_0]] +; CHECK-NEXT: [[SUBAB_1:%.*]] = fsub fast double [[A_1]], [[B_1]] +; CHECK-NEXT: [[SUBCD_1:%.*]] = fsub fast double [[C_1]], [[D_1]] +; CHECK-NEXT: [[ADDABCD_0:%.*]] = fadd fast double [[SUBAB_0]], [[SUBCD_0]] +; CHECK-NEXT: [[ADDCDAB_1:%.*]] = fadd fast double [[SUBCD_1]], [[SUBAB_1]] +; CHECK-NEXT: store double [[ADDABCD_0]], double* [[IDX0]], align 8 +; CHECK-NEXT: store double [[ADDCDAB_1]], double* [[IDX1]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -161,23 +164,22 @@ define void @lookahead_alt2(double* %array) { ; CHECK-NEXT: [[IDX5:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 5 ; CHECK-NEXT: [[IDX6:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 6 ; CHECK-NEXT: [[IDX7:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 7 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[IDX0]] to <2 x double>* -; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[IDX2]] to <2 x double>* -; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[IDX4]] to <2 x double>* -; CHECK-NEXT: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 8 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[IDX6]] to <2 x double>* -; CHECK-NEXT: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[TMP6]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = fsub fast <2 x double> [[TMP5]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = fadd fast <2 x double> [[TMP5]], [[TMP7]] -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> [[TMP9]], <2 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = fadd fast <2 x double> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP12:%.*]] = fsub fast <2 x double> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = fadd fast <2 x double> [[TMP13]], [[TMP10]] -; CHECK-NEXT: [[TMP15:%.*]] = bitcast double* [[IDX0]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP14]], <2 x double>* [[TMP15]], align 8 +; CHECK-NEXT: [[A_0:%.*]] = load double, double* [[IDX0]], align 8 +; CHECK-NEXT: [[A_1:%.*]] = load double, double* [[IDX1]], align 8 +; CHECK-NEXT: [[B_0:%.*]] = load double, double* [[IDX2]], align 8 +; CHECK-NEXT: [[B_1:%.*]] = load double, double* [[IDX3]], align 8 +; CHECK-NEXT: [[C_0:%.*]] = load double, double* [[IDX4]], align 8 +; CHECK-NEXT: [[C_1:%.*]] = load double, double* [[IDX5]], align 8 +; CHECK-NEXT: [[D_0:%.*]] = load double, double* [[IDX6]], align 8 +; CHECK-NEXT: [[D_1:%.*]] = load double, double* [[IDX7]], align 8 +; CHECK-NEXT: [[ADDAB_0:%.*]] = fadd fast double [[A_0]], [[B_0]] +; CHECK-NEXT: [[SUBCD_0:%.*]] = fsub fast double [[C_0]], [[D_0]] +; CHECK-NEXT: [[ADDCD_1:%.*]] = fadd fast double [[C_1]], [[D_1]] +; CHECK-NEXT: [[SUBAB_1:%.*]] = fsub fast double [[A_1]], [[B_1]] +; CHECK-NEXT: [[ADDABCD_0:%.*]] = fadd fast double [[ADDAB_0]], [[SUBCD_0]] +; CHECK-NEXT: [[ADDCDAB_1:%.*]] = fadd fast double [[ADDCD_1]], [[SUBAB_1]] +; CHECK-NEXT: store double [[ADDABCD_0]], double* [[IDX0]], align 8 +; CHECK-NEXT: store double [[ADDCDAB_1]], double* [[IDX1]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -237,97 +239,6 @@ define void @lookahead_external_uses(double* %A, double *%B, double *%C, double ; CHECK-NEXT: [[IDXB2:%.*]] = getelementptr inbounds double, double* [[B]], i64 2 ; CHECK-NEXT: [[IDXA2:%.*]] = getelementptr inbounds double, double* [[A]], i64 2 ; CHECK-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, double* [[B]], i64 1 -; CHECK-NEXT: [[A0:%.*]] = load double, double* [[IDXA0]], align 8 -; CHECK-NEXT: [[C0:%.*]] = load double, double* [[IDXC0]], align 8 -; CHECK-NEXT: [[D0:%.*]] = load double, double* [[IDXD0]], align 8 -; CHECK-NEXT: [[A1:%.*]] = load double, double* [[IDXA1]], align 8 -; CHECK-NEXT: [[B2:%.*]] = load double, double* [[IDXB2]], align 8 -; CHECK-NEXT: [[A2:%.*]] = load double, double* [[IDXA2]], align 8 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[IDXB0]] to <2 x double>* -; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[C0]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[A1]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> undef, double [[D0]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[B2]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = fsub fast <2 x double> [[TMP3]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> undef, double [[A0]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[A2]], i32 1 -; CHECK-NEXT: [[TMP9:%.*]] = fsub fast <2 x double> [[TMP8]], [[TMP1]] -; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[TMP9]], [[TMP6]] -; CHECK-NEXT: [[IDXS0:%.*]] = getelementptr inbounds double, double* [[S:%.*]], i64 0 -; CHECK-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, double* [[S]], i64 1 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast double* [[IDXS0]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP10]], <2 x double>* [[TMP11]], align 8 -; CHECK-NEXT: store double [[A1]], double* [[EXT1:%.*]], align 8 -; CHECK-NEXT: ret void -; -entry: - %IdxA0 = getelementptr inbounds double, double* %A, i64 0 - %IdxB0 = getelementptr inbounds double, double* %B, i64 0 - %IdxC0 = getelementptr inbounds double, double* %C, i64 0 - %IdxD0 = getelementptr inbounds double, double* %D, i64 0 - - %IdxA1 = getelementptr inbounds double, double* %A, i64 1 - %IdxB2 = getelementptr inbounds double, double* %B, i64 2 - %IdxA2 = getelementptr inbounds double, double* %A, i64 2 - %IdxB1 = getelementptr inbounds double, double* %B, i64 1 - - %A0 = load double, double *%IdxA0, align 8 - %B0 = load double, double *%IdxB0, align 8 - %C0 = load double, double *%IdxC0, align 8 - %D0 = load double, double *%IdxD0, align 8 - - %A1 = load double, double *%IdxA1, align 8 - %B2 = load double, double *%IdxB2, align 8 - %A2 = load double, double *%IdxA2, align 8 - %B1 = load double, double *%IdxB1, align 8 - - %subA0B0 = fsub fast double %A0, %B0 - %subC0D0 = fsub fast double %C0, %D0 - - %subA1B2 = fsub fast double %A1, %B2 - %subA2B1 = fsub fast double %A2, %B1 - - %add0 = fadd fast double %subA0B0, %subC0D0 - %add1 = fadd fast double %subA1B2, %subA2B1 - - %IdxS0 = getelementptr inbounds double, double* %S, i64 0 - %IdxS1 = getelementptr inbounds double, double* %S, i64 1 - - store double %add0, double *%IdxS0, align 8 - store double %add1, double *%IdxS1, align 8 - - ; External use - store double %A1, double *%Ext1, align 8 - ret void -} - -; A[0] B[0] C[0] D[0] A[1] B[2] A[2] B[1] -; \ / \ / / \ / \ / \ -; - - U1,U2,U3 - - U4,U5 -; \ / \ / -; + + -; | | -; S[0] S[1] -; -; -; If we limit the users budget for the look-ahead heuristic to 2, then the -; look-ahead heuristic has no way of choosing B[1] (with 2 external users) -; over A[1] (with 3 external users). -; The result is that the operands are of the Add not reordered and the loads -; from A get vectorized instead of the loads from B. -; -define void @lookahead_limit_users_budget(double* %A, double *%B, double *%C, double *%D, double *%S, double *%Ext1, double *%Ext2, double *%Ext3, double *%Ext4, double *%Ext5) { -; CHECK-LABEL: @lookahead_limit_users_budget( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[IDXA0:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 0 -; CHECK-NEXT: [[IDXB0:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 0 -; CHECK-NEXT: [[IDXC0:%.*]] = getelementptr inbounds double, double* [[C:%.*]], i64 0 -; CHECK-NEXT: [[IDXD0:%.*]] = getelementptr inbounds double, double* [[D:%.*]], i64 0 -; CHECK-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, double* [[A]], i64 1 -; CHECK-NEXT: [[IDXB2:%.*]] = getelementptr inbounds double, double* [[B]], i64 2 -; CHECK-NEXT: [[IDXA2:%.*]] = getelementptr inbounds double, double* [[A]], i64 2 -; CHECK-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, double* [[B]], i64 1 ; CHECK-NEXT: [[B0:%.*]] = load double, double* [[IDXB0]], align 8 ; CHECK-NEXT: [[C0:%.*]] = load double, double* [[IDXC0]], align 8 ; CHECK-NEXT: [[D0:%.*]] = load double, double* [[IDXD0]], align 8 @@ -351,10 +262,6 @@ define void @lookahead_limit_users_budget(double* %A, double *%B, double *%C, do ; CHECK-NEXT: store <2 x double> [[TMP10]], <2 x double>* [[TMP11]], align 8 ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 ; CHECK-NEXT: store double [[TMP12]], double* [[EXT1:%.*]], align 8 -; CHECK-NEXT: store double [[TMP12]], double* [[EXT2:%.*]], align 8 -; CHECK-NEXT: store double [[TMP12]], double* [[EXT3:%.*]], align 8 -; CHECK-NEXT: store double [[B1]], double* [[EXT4:%.*]], align 8 -; CHECK-NEXT: store double [[B1]], double* [[EXT5:%.*]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -393,56 +300,7 @@ entry: store double %add0, double *%IdxS0, align 8 store double %add1, double *%IdxS1, align 8 - ; External uses of A1 + ; External use store double %A1, double *%Ext1, align 8 - store double %A1, double *%Ext2, align 8 - store double %A1, double *%Ext3, align 8 - - ; External uses of B1 - store double %B1, double *%Ext4, align 8 - store double %B1, double *%Ext5, align 8 - - ret void -} - -; This checks that the lookahead code does not crash when instructions with the same opcodes have different numbers of operands (in this case the calls). - -%Class = type { i8 } -declare double @_ZN1i2ayEv(%Class*) -declare double @_ZN1i2axEv() - -define void @lookahead_crash(double* %A, double *%S, %Class *%Arg0) { -; CHECK-LABEL: @lookahead_crash( -; CHECK-NEXT: [[IDXA0:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 0 -; CHECK-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, double* [[A]], i64 1 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[IDXA0]] to <2 x double>* -; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8 -; CHECK-NEXT: [[C0:%.*]] = call double @_ZN1i2ayEv(%Class* [[ARG0:%.*]]) -; CHECK-NEXT: [[C1:%.*]] = call double @_ZN1i2axEv() -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double [[C0]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[C1]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = fadd fast <2 x double> [[TMP2]], [[TMP4]] -; CHECK-NEXT: [[IDXS0:%.*]] = getelementptr inbounds double, double* [[S:%.*]], i64 0 -; CHECK-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, double* [[S]], i64 1 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[IDXS0]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8 -; CHECK-NEXT: ret void -; - %IdxA0 = getelementptr inbounds double, double* %A, i64 0 - %IdxA1 = getelementptr inbounds double, double* %A, i64 1 - - %A0 = load double, double *%IdxA0, align 8 - %A1 = load double, double *%IdxA1, align 8 - - %C0 = call double @_ZN1i2ayEv(%Class *%Arg0) - %C1 = call double @_ZN1i2axEv() - - %add0 = fadd fast double %A0, %C0 - %add1 = fadd fast double %A1, %C1 - - %IdxS0 = getelementptr inbounds double, double* %S, i64 0 - %IdxS1 = getelementptr inbounds double, double* %S, i64 1 - store double %add0, double *%IdxS0, align 8 - store double %add1, double *%IdxS1, align 8 ret void } From fdcbd5fa48680a1f02809d2ead6259b30b00d0b1 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Mon, 15 Jul 2019 23:49:31 +0000 Subject: [PATCH 187/451] Temporarily Revert "fix unnamed fiefield issue and add tests for __builtin_preserve_access_index intrinsic" The commit had tests that would only work with names in the IR. This reverts commit r366076. llvm-svn: 366155 --- clang/lib/CodeGen/CGExpr.cpp | 21 +-- clang/lib/CodeGen/CodeGenFunction.h | 3 - .../CodeGen/builtin-preserve-access-index.c | 177 ------------------ .../test/Sema/builtin-preserve-access-index.c | 13 -- 4 files changed, 2 insertions(+), 212 deletions(-) delete mode 100644 clang/test/CodeGen/builtin-preserve-access-index.c delete mode 100644 clang/test/Sema/builtin-preserve-access-index.c diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 25d2424eb85ad..62d930ca8c455 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -3892,23 +3892,6 @@ LValue CodeGenFunction::EmitLValueForLambdaField(const FieldDecl *Field) { return EmitLValueForField(LambdaLV, Field); } -/// Get the field index in the debug info. The debug info structure/union -/// will ignore the unnamed bitfields. -unsigned CodeGenFunction::getDebugInfoFIndex(const RecordDecl *Rec, - unsigned FieldIndex) { - unsigned I = 0, Skipped = 0; - - for (auto F : Rec->getDefinition()->fields()) { - if (I == FieldIndex) - break; - if (F->isUnnamedBitfield()) - Skipped++; - I++; - } - - return FieldIndex - Skipped; -} - /// Get the address of a zero-sized field within a record. The resulting /// address doesn't necessarily have the right type. static Address emitAddrOfZeroSizeField(CodeGenFunction &CGF, Address Base, @@ -3948,7 +3931,7 @@ static Address emitPreserveStructAccess(CodeGenFunction &CGF, Address base, CGF.CGM.getTypes().getCGRecordLayout(rec).getLLVMFieldNo(field); return CGF.Builder.CreatePreserveStructAccessIndex( - base, idx, CGF.getDebugInfoFIndex(rec, field->getFieldIndex()), DbgInfo); + base, idx, field->getFieldIndex(), DbgInfo); } static bool hasAnyVptr(const QualType Type, const ASTContext &Context) { @@ -4065,7 +4048,7 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, getContext().getRecordType(rec), rec->getLocation()); addr = Address( Builder.CreatePreserveUnionAccessIndex( - addr.getPointer(), getDebugInfoFIndex(rec, field->getFieldIndex()), DbgInfo), + addr.getPointer(), field->getFieldIndex(), DbgInfo), addr.getAlignment()); } } else { diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 0d534af42cddb..a51a9711ff170 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -2652,9 +2652,6 @@ class CodeGenFunction : public CodeGenTypeCache { /// Converts Location to a DebugLoc, if debug information is enabled. llvm::DebugLoc SourceLocToDebugLoc(SourceLocation Location); - /// Get the record field index as represented in debug info. - unsigned getDebugInfoFIndex(const RecordDecl *Rec, unsigned FieldIndex); - //===--------------------------------------------------------------------===// // Declaration Emission diff --git a/clang/test/CodeGen/builtin-preserve-access-index.c b/clang/test/CodeGen/builtin-preserve-access-index.c deleted file mode 100644 index c56c6ecc0e566..0000000000000 --- a/clang/test/CodeGen/builtin-preserve-access-index.c +++ /dev/null @@ -1,177 +0,0 @@ -// RUN: %clang -target x86_64 -emit-llvm -S -g %s -o - | FileCheck %s - -#define _(x) (__builtin_preserve_access_index(x)) - -const void *unit1(const void *arg) { - return _(arg); -} -// CHECK: define dso_local i8* @unit1(i8* %arg) -// CHECK-NOT: llvm.preserve.array.access.index -// CHECK-NOT: llvm.preserve.struct.access.index -// CHECK-NOT: llvm.preserve.union.access.index - -const void *unit2(void) { - return _((const void *)0xffffffffFFFF0000ULL); -} -// CHECK: define dso_local i8* @unit2() -// CHECK-NOT: llvm.preserve.array.access.index -// CHECK-NOT: llvm.preserve.struct.access.index -// CHECK-NOT: llvm.preserve.union.access.index - -const void *unit3(const int *arg) { - return _(arg + 1); -} -// CHECK: define dso_local i8* @unit3(i32* %arg) -// CHECK-NOT: llvm.preserve.array.access.index -// CHECK-NOT: llvm.preserve.struct.access.index -// CHECK-NOT: llvm.preserve.union.access.index - -const void *unit4(const int *arg) { - return _(&arg[1]); -} -// CHECK: define dso_local i8* @unit4(i32* %arg) -// CHECK-NOT: getelementptr -// CHECK: call i32* @llvm.preserve.array.access.index.p0i32.p0i32(i32* %0, i32 0, i32 1) - -const void *unit5(const int *arg[5]) { - return _(&arg[1][2]); -} -// CHECK: define dso_local i8* @unit5(i32** %arg) -// CHECK-NOT: getelementptr -// CHECK: call i32** @llvm.preserve.array.access.index.p0p0i32.p0p0i32(i32** %0, i32 0, i32 1) -// CHECK-NOT: getelementptr -// CHECK: call i32* @llvm.preserve.array.access.index.p0i32.p0i32(i32* %2, i32 0, i32 2) - -struct s1 { - char a; - int b; -}; - -struct s2 { - char a1:1; - char a2:1; - int b; -}; - -struct s3 { - char a1:1; - char a2:1; - char :6; - int b; -}; - -const void *unit6(struct s1 *arg) { - return _(&arg->a); -} -// CHECK: define dso_local i8* @unit6(%struct.s1* %arg) -// CHECK-NOT: getelementptr -// CHECK: call i8* @llvm.preserve.struct.access.index.p0i8.p0s_struct.s1s(%struct.s1* %0, i32 0, i32 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[STRUCT_S1:[0-9]+]] - -const void *unit7(struct s1 *arg) { - return _(&arg->b); -} -// CHECK: define dso_local i8* @unit7(%struct.s1* %arg) -// CHECK-NOT: getelementptr -// CHECK: call i32* @llvm.preserve.struct.access.index.p0i32.p0s_struct.s1s(%struct.s1* %0, i32 1, i32 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[STRUCT_S1]] - -const void *unit8(struct s2 *arg) { - return _(&arg->b); -} -// CHECK: define dso_local i8* @unit8(%struct.s2* %arg) -// CHECK-NOT: getelementptr -// CHECK: call i32* @llvm.preserve.struct.access.index.p0i32.p0s_struct.s2s(%struct.s2* %0, i32 1, i32 2), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[STRUCT_S2:[0-9]+]] - -const void *unit9(struct s3 *arg) { - return _(&arg->b); -} -// CHECK: define dso_local i8* @unit9(%struct.s3* %arg) -// CHECK-NOT: getelementptr -// CHECK: call i32* @llvm.preserve.struct.access.index.p0i32.p0s_struct.s3s(%struct.s3* %0, i32 1, i32 2), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[STRUCT_S3:[0-9]+]] - -union u1 { - char a; - int b; -}; - -union u2 { - char a; - int :32; - int b; -}; - -const void *unit10(union u1 *arg) { - return _(&arg->a); -} -// CHECK: define dso_local i8* @unit10(%union.u1* %arg) -// CHECK-NOT: getelementptr -// CHECK: call %union.u1* @llvm.preserve.union.access.index.p0s_union.u1s.p0s_union.u1s(%union.u1* %0, i32 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[UNION_U1:[0-9]+]] - -const void *unit11(union u1 *arg) { - return _(&arg->b); -} -// CHECK: define dso_local i8* @unit11(%union.u1* %arg) -// CHECK-NOT: getelementptr -// CHECK: call %union.u1* @llvm.preserve.union.access.index.p0s_union.u1s.p0s_union.u1s(%union.u1* %0, i32 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[UNION_U1]] - -const void *unit12(union u2 *arg) { - return _(&arg->b); -} -// CHECK: define dso_local i8* @unit12(%union.u2* %arg) -// CHECK-NOT: getelementptr -// CHECK: call %union.u2* @llvm.preserve.union.access.index.p0s_union.u2s.p0s_union.u2s(%union.u2* %0, i32 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[UNION_U2:[0-9]+]] - -struct s4 { - char d; - union u { - int b[4]; - char a; - } c; -}; - -union u3 { - struct s { - int b[4]; - } c; - char a; -}; - -const void *unit13(struct s4 *arg) { - return _(&arg->c.b[2]); -} -// CHECK: define dso_local i8* @unit13(%struct.s4* %arg) -// CHECK: call %union.u* @llvm.preserve.struct.access.index.p0s_union.us.p0s_struct.s4s(%struct.s4* %0, i32 1, i32 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[STRUCT_S4:[0-9]+]] -// CHECK: call %union.u* @llvm.preserve.union.access.index.p0s_union.us.p0s_union.us(%union.u* %1, i32 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[UNION_I_U:[0-9]+]] -// CHECK: call i32* @llvm.preserve.array.access.index.p0i32.p0a4i32([4 x i32]* %b, i32 1, i32 2) - -const void *unit14(union u3 *arg) { - return _(&arg->c.b[2]); -} -// CHECK: define dso_local i8* @unit14(%union.u3* %arg) -// CHECK: call %union.u3* @llvm.preserve.union.access.index.p0s_union.u3s.p0s_union.u3s(%union.u3* %0, i32 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[UNION_U3:[0-9]+]] -// CHECK: call [4 x i32]* @llvm.preserve.struct.access.index.p0a4i32.p0s_struct.ss(%struct.s* %c, i32 0, i32 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[STRUCT_I_S:[0-9]+]] -// CHECK: call i32* @llvm.preserve.array.access.index.p0i32.p0a4i32([4 x i32]* %2, i32 1, i32 2) - -const void *unit15(struct s4 *arg) { - return _(&arg[2].c.a); -} -// CHECK: define dso_local i8* @unit15(%struct.s4* %arg) -// CHECK: call %struct.s4* @llvm.preserve.array.access.index.p0s_struct.s4s.p0s_struct.s4s(%struct.s4* %0, i32 0, i32 2) -// CHECK: call %union.u* @llvm.preserve.struct.access.index.p0s_union.us.p0s_struct.s4s(%struct.s4* %1, i32 1, i32 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[STRUCT_S4]] -// CHECK: call %union.u* @llvm.preserve.union.access.index.p0s_union.us.p0s_union.us(%union.u* %2, i32 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[UNION_I_U]] - -const void *unit16(union u3 *arg) { - return _(&arg[2].a); -} -// CHECK: define dso_local i8* @unit16(%union.u3* %arg) -// CHECK: call %union.u3* @llvm.preserve.array.access.index.p0s_union.u3s.p0s_union.u3s(%union.u3* %0, i32 0, i32 2) -// CHECK: call %union.u3* @llvm.preserve.union.access.index.p0s_union.u3s.p0s_union.u3s(%union.u3* %1, i32 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[UNION_U3]] - -// CHECK: ![[STRUCT_S1]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "s1", -// CHECK: ![[STRUCT_S2]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "s2", -// CHECK: ![[STRUCT_S3]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "s3", -// CHECK: ![[UNION_U1]] = distinct !DICompositeType(tag: DW_TAG_union_type, name: "u1", -// CHECK: ![[UNION_U2]] = distinct !DICompositeType(tag: DW_TAG_union_type, name: "u2", -// CHECK: ![[STRUCT_S4]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "s4", -// CHECK: ![[UNION_I_U]] = distinct !DICompositeType(tag: DW_TAG_union_type, name: "u", -// CHECK: ![[UNION_U3]] = distinct !DICompositeType(tag: DW_TAG_union_type, name: "u3", -// CHECK: ![[STRUCT_I_S]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "s", diff --git a/clang/test/Sema/builtin-preserve-access-index.c b/clang/test/Sema/builtin-preserve-access-index.c deleted file mode 100644 index c10ceb5145b8c..0000000000000 --- a/clang/test/Sema/builtin-preserve-access-index.c +++ /dev/null @@ -1,13 +0,0 @@ -// RUN: %clang_cc1 -x c -triple x86_64-pc-linux-gnu -dwarf-version=4 -fsyntax-only -verify %s - -const void *invalid1(const int *arg) { - return __builtin_preserve_access_index(&arg[1], 1); // expected-error {{too many arguments to function call, expected 1, have 2}} -} - -void *invalid2(const int *arg) { - return __builtin_preserve_access_index(&arg[1]); // expected-warning {{returning 'const void *' from a function with result type 'void *' discards qualifiers}} -} - -const void *invalid3(const int *arg) { - return __builtin_preserve_access_index(1); // expected-warning {{incompatible integer to pointer conversion passing 'int' to parameter of type 'const void *'}} -} From c245249b7bd012b644c8756abf49157348b3fb15 Mon Sep 17 00:00:00 2001 From: Jason Molenda Date: Mon, 15 Jul 2019 23:55:22 +0000 Subject: [PATCH 188/451] Update some file changes, but there's a dependency loop so it doesn't quite work rigtht now. llvm-svn: 366156 --- lldb/lldb.xcodeproj/project.pbxproj | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/lldb/lldb.xcodeproj/project.pbxproj b/lldb/lldb.xcodeproj/project.pbxproj index 11b3b3ea87ed5..c02abef66a753 100644 --- a/lldb/lldb.xcodeproj/project.pbxproj +++ b/lldb/lldb.xcodeproj/project.pbxproj @@ -162,7 +162,7 @@ 2689007913353E1A00698AC0 /* CFCMutableDictionary.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 26BC7EF310F1B8AD00F91463 /* CFCMutableDictionary.cpp */; }; 2689007A13353E1A00698AC0 /* CFCMutableSet.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 26BC7EF510F1B8AD00F91463 /* CFCMutableSet.cpp */; }; 2689007B13353E1A00698AC0 /* CFCString.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 26BC7EF810F1B8AD00F91463 /* CFCString.cpp */; }; - 268900E913353E6F00698AC0 /* CPPLanguageRuntime.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CB443BC1249920C00C13DC2 /* CPPLanguageRuntime.cpp */; }; + AF9E360C22DD3BFC000B7776 /* CPPLanguageRuntime.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AF9E360B22DD3BFB000B7776 /* CPPLanguageRuntime.cpp */; }; 94B6385D1B8FB178004FE1E4 /* CPlusPlusLanguage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 94B6385B1B8FB174004FE1E4 /* CPlusPlusLanguage.cpp */; }; 23CB15341D66DA9300EDDDE1 /* CPlusPlusLanguageTest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 23CB14FA1D66CCF100EDDDE1 /* CPlusPlusLanguageTest.cpp */; }; 49F811F31E931B2100F4E163 /* CPlusPlusNameParser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 49F811EF1E931B1500F4E163 /* CPlusPlusNameParser.cpp */; }; @@ -379,7 +379,6 @@ 260A63191861009E00FECF8E /* IOHandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 260A63181861009E00FECF8E /* IOHandler.cpp */; }; 260A63171861008E00FECF8E /* IOHandler.h in Headers */ = {isa = PBXBuildFile; fileRef = 260A63161861008E00FECF8E /* IOHandler.h */; }; 236124A41986B4E2004EFC37 /* IOObject.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 236124A21986B4E2004EFC37 /* IOObject.cpp */; }; - 2689006A13353E0E00698AC0 /* IRDynamicChecks.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 49CF9829122C70BD007A0B96 /* IRDynamicChecks.cpp */; }; 2689006D13353E0E00698AC0 /* IRExecutionUnit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4C98D3DB118FB96F00E575D0 /* IRExecutionUnit.cpp */; }; 2689006B13353E0E00698AC0 /* IRForTarget.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 49307AAD11DEA4D90081F992 /* IRForTarget.cpp */; }; 49A71FE7141FFA5C00D59478 /* IRInterpreter.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 496B01581406DE8900F830D5 /* IRInterpreter.cpp */; }; @@ -1563,8 +1562,7 @@ 26792617211CA3E100EE1D10 /* CMakeLists.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = CMakeLists.txt; path = "tools/lldb-vscode/CMakeLists.txt"; sourceTree = ""; }; 9A1890311F47D5D400394BCA /* CMakeLists.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = CMakeLists.txt; path = TestingSupport/CMakeLists.txt; sourceTree = ""; }; AF352EDD22C17BD700D058B6 /* CMakeLists.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = CMakeLists.txt; sourceTree = ""; }; - 4CB443BC1249920C00C13DC2 /* CPPLanguageRuntime.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = CPPLanguageRuntime.cpp; path = source/Target/CPPLanguageRuntime.cpp; sourceTree = ""; }; - 4CB443BB1249920C00C13DC2 /* CPPLanguageRuntime.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CPPLanguageRuntime.h; path = include/lldb/Target/CPPLanguageRuntime.h; sourceTree = ""; }; + AF9E360B22DD3BFB000B7776 /* CPPLanguageRuntime.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = CPPLanguageRuntime.cpp; sourceTree = ""; }; 94B6385B1B8FB174004FE1E4 /* CPlusPlusLanguage.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = CPlusPlusLanguage.cpp; path = Language/CPlusPlus/CPlusPlusLanguage.cpp; sourceTree = ""; }; 94B6385C1B8FB174004FE1E4 /* CPlusPlusLanguage.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = CPlusPlusLanguage.h; path = Language/CPlusPlus/CPlusPlusLanguage.h; sourceTree = ""; }; 23CB14FA1D66CCF100EDDDE1 /* CPlusPlusLanguageTest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = CPlusPlusLanguageTest.cpp; sourceTree = ""; }; @@ -2013,8 +2011,6 @@ 236124A21986B4E2004EFC37 /* IOObject.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = IOObject.cpp; path = source/Utility/IOObject.cpp; sourceTree = ""; }; 236124A61986B50E004EFC37 /* IOObject.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = IOObject.h; path = include/lldb/Utility/IOObject.h; sourceTree = ""; }; 26BC7D6510F1B77400F91463 /* IOStreamMacros.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = IOStreamMacros.h; path = include/lldb/Core/IOStreamMacros.h; sourceTree = ""; }; - 49CF9829122C70BD007A0B96 /* IRDynamicChecks.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = IRDynamicChecks.cpp; path = source/Expression/IRDynamicChecks.cpp; sourceTree = ""; }; - 49CF9833122C718B007A0B96 /* IRDynamicChecks.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = IRDynamicChecks.h; path = include/lldb/Expression/IRDynamicChecks.h; sourceTree = ""; }; 4C98D3DB118FB96F00E575D0 /* IRExecutionUnit.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = IRExecutionUnit.cpp; path = source/Expression/IRExecutionUnit.cpp; sourceTree = ""; }; 4C98D3E1118FB98F00E575D0 /* IRExecutionUnit.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = IRExecutionUnit.h; path = include/lldb/Expression/IRExecutionUnit.h; sourceTree = ""; }; 49307AAD11DEA4D90081F992 /* IRForTarget.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = IRForTarget.cpp; path = ExpressionParser/Clang/IRForTarget.cpp; sourceTree = ""; }; @@ -5494,8 +5490,6 @@ 4C2479BE1BA39843009C9A7B /* ExpressionParser.h */, 26BC7DC310F1B79500F91463 /* DWARFExpression.h */, 26BC7ED810F1B86700F91463 /* DWARFExpression.cpp */, - 49CF9833122C718B007A0B96 /* IRDynamicChecks.h */, - 49CF9829122C70BD007A0B96 /* IRDynamicChecks.cpp */, 49C66B1C17011A43004D1922 /* IRMemoryMap.h */, 49DCF6FD170E6B4A0092F75E /* IRMemoryMap.cpp */, 4C98D3E1118FB98F00E575D0 /* IRExecutionUnit.h */, @@ -5679,8 +5673,6 @@ 23EDE3301926839700F6A132 /* NativeRegisterContext.cpp */, 497E7B331188ED300065CCA1 /* ABI.h */, 497E7B9D1188F6690065CCA1 /* ABI.cpp */, - 4CB443BB1249920C00C13DC2 /* CPPLanguageRuntime.h */, - 4CB443BC1249920C00C13DC2 /* CPPLanguageRuntime.cpp */, 26BC7DF110F1B81A00F91463 /* DynamicLoader.h */, 26BC7E7710F1B85900F91463 /* DynamicLoader.cpp */, 26BC7DF210F1B81A00F91463 /* ExecutionContext.h */, @@ -6272,6 +6264,7 @@ 4CCA643B13B40B82003BDF98 /* CPlusPlus */ = { isa = PBXGroup; children = ( + AF9E360B22DD3BFB000B7776 /* CPPLanguageRuntime.cpp */, 4CCA643C13B40B82003BDF98 /* ItaniumABI */, ); path = CPlusPlus; @@ -8127,6 +8120,7 @@ AFF81FB320D1CC910010F95E /* PlatformiOSSimulatorCoreSimulatorSupport.mm in Sources */, 942612F71B95000000EF842E /* LanguageCategory.cpp in Sources */, AF116BEF20CF234B0071093F /* DebugNamesDWARFIndex.cpp in Sources */, + AF9E360C22DD3BFC000B7776 /* CPPLanguageRuntime.cpp in Sources */, 2689005E13353E0E00698AC0 /* ClangASTSource.cpp in Sources */, AFF1273622276F1600C25726 /* LocateSymbolFile.cpp in Sources */, 2689005F13353E0E00698AC0 /* ClangFunctionCaller.cpp in Sources */, @@ -8145,7 +8139,6 @@ AFC234091AF85CE100CDE8B6 /* CommandObjectLanguage.cpp in Sources */, 2689006813353E0E00698AC0 /* ASTResultSynthesizer.cpp in Sources */, 2689006913353E0E00698AC0 /* ASTStructExtractor.cpp in Sources */, - 2689006A13353E0E00698AC0 /* IRDynamicChecks.cpp in Sources */, 2689006B13353E0E00698AC0 /* IRForTarget.cpp in Sources */, AF2BA6EC1A707E3400C5248A /* UriParser.cpp in Sources */, 2689006D13353E0E00698AC0 /* IRExecutionUnit.cpp in Sources */, @@ -8321,7 +8314,6 @@ 268900E813353E6F00698AC0 /* ABI.cpp in Sources */, 4C56543119D1EFAA002E9C44 /* ThreadPlanPython.cpp in Sources */, 26AB92121819D74600E63F3E /* DWARFDataExtractor.cpp in Sources */, - 268900E913353E6F00698AC0 /* CPPLanguageRuntime.cpp in Sources */, 4CD44D4220B777850003557C /* DWARFBaseDIE.cpp in Sources */, 9485545A1DCBAE3B00345FF5 /* RenderScriptScriptGroup.cpp in Sources */, 268900EA13353E6F00698AC0 /* DynamicLoader.cpp in Sources */, From bf20b2ace68d300665cf920050fda50003bd1096 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Tue, 16 Jul 2019 00:02:40 +0000 Subject: [PATCH 189/451] Temporarily revert "add -fthinlto-index= option to clang-cl" This is causing testsuite failures on (at least) darwin release+asserts. This reverts commit r366146. llvm-svn: 366157 --- clang/include/clang/Driver/Options.td | 2 +- clang/test/Driver/cl-thinlto-backend.c | 9 --------- 2 files changed, 1 insertion(+), 10 deletions(-) delete mode 100644 clang/test/Driver/cl-thinlto-backend.c diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index dfd27fab796e3..957483c318647 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1270,7 +1270,7 @@ def flto_jobs_EQ : Joined<["-"], "flto-jobs=">, "of 0 means the number of threads will be derived from " "the number of CPUs detected)">; def fthinlto_index_EQ : Joined<["-"], "fthinlto-index=">, - Flags<[CoreOption, CC1Option]>, Group, + Flags<[CC1Option]>, Group, HelpText<"Perform ThinLTO importing using provided function summary index">; def fmacro_backtrace_limit_EQ : Joined<["-"], "fmacro-backtrace-limit=">, Group, Flags<[DriverOption, CoreOption]>; diff --git a/clang/test/Driver/cl-thinlto-backend.c b/clang/test/Driver/cl-thinlto-backend.c deleted file mode 100644 index a948c4ea33d9c..0000000000000 --- a/clang/test/Driver/cl-thinlto-backend.c +++ /dev/null @@ -1,9 +0,0 @@ -// RUN: %clang_cl -c -flto=thin -Fo%t.obj %s -// RUN: llvm-lto2 run -thinlto-distributed-indexes -o %t.exe %t.obj - -// -fthinlto_index should be passed to cc1 -// RUN: %clang_cl -### -c -fthinlto-index=%t.thinlto.bc -Fo%t1.obj \ -// RUN: %t.obj 2>&1 | FileCheck %s - -// CHECK: -fthinlto-index= -// CHECK: "-x" "ir" From 509903e887263182f580ae75c7fee2eaedb66fae Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 16 Jul 2019 00:37:17 +0000 Subject: [PATCH 190/451] [DebugInfo] Move function from line table to the prologue (NFC) In LLDB, when parsing type units, we don't need to parse the whole line table. Instead, we only need to parse the "support files" from the line table prologue. To make that possible, this patch moves the respective functions from the LineTable into the Prologue. Because I don't think users of the LineTable should have to know that these files come from the Prologue, I've left the original methods in place, and made them redirect to the LineTable. Differential revision: https://reviews.llvm.org/D64774 llvm-svn: 366158 --- .../llvm/DebugInfo/DWARF/DWARFDebugLine.h | 26 ++++--- llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp | 67 +++++++++---------- 2 files changed, 51 insertions(+), 42 deletions(-) diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h index 9a3ad2b08c99b..e7425c1923737 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h @@ -121,6 +121,17 @@ class DWARFDebugLine { return LineBase + (int8_t)LineRange - 1; } + /// Get DWARF-version aware access to the file name entry at the provided + /// index. + const llvm::DWARFDebugLine::FileNameEntry & + getFileNameEntry(uint64_t Index) const; + + bool hasFileAtIndex(uint64_t FileIndex) const; + + bool getFileNameByIndex(uint64_t FileIndex, StringRef CompDir, + DILineInfoSpecifier::FileLineInfoKind Kind, + std::string &Result) const; + void clear(); void dump(raw_ostream &OS, DIDumpOptions DumpOptions) const; Error parse(const DWARFDataExtractor &DebugLineData, uint32_t *OffsetPtr, @@ -240,16 +251,20 @@ class DWARFDebugLine { bool lookupAddressRange(object::SectionedAddress Address, uint64_t Size, std::vector &Result) const; - bool hasFileAtIndex(uint64_t FileIndex) const; + bool hasFileAtIndex(uint64_t FileIndex) const { + return Prologue.hasFileAtIndex(FileIndex); + } /// Extracts filename by its index in filename table in prologue. /// In Dwarf 4, the files are 1-indexed and the current compilation file /// name is not represented in the list. In DWARF v5, the files are /// 0-indexed and the primary source file has the index 0. /// Returns true on success. - bool getFileNameByIndex(uint64_t FileIndex, const char *CompDir, + bool getFileNameByIndex(uint64_t FileIndex, StringRef CompDir, DILineInfoSpecifier::FileLineInfoKind Kind, - std::string &Result) const; + std::string &Result) const { + return Prologue.getFileNameByIndex(FileIndex, CompDir, Kind, Result); + } /// Fills the Result argument with the file and line information /// corresponding to Address. Returns true on success. @@ -268,11 +283,6 @@ class DWARFDebugLine { std::function RecoverableErrorCallback, raw_ostream *OS = nullptr); - /// Get DWARF-version aware access to the file name entry at the provided - /// index. - const llvm::DWARFDebugLine::FileNameEntry & - getFileNameEntry(uint64_t Index) const; - using RowVector = std::vector; using RowIter = RowVector::const_iterator; using SequenceVector = std::vector; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index 9f9aaabf1e89d..8a621084710e4 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -66,6 +66,26 @@ void DWARFDebugLine::ContentTypeTracker::trackContentType( DWARFDebugLine::Prologue::Prologue() { clear(); } +bool DWARFDebugLine::Prologue::hasFileAtIndex(uint64_t FileIndex) const { + uint16_t DwarfVersion = getVersion(); + assert(DwarfVersion != 0 && + "line table prologue has no dwarf version information"); + if (DwarfVersion >= 5) + return FileIndex < FileNames.size(); + return FileIndex != 0 && FileIndex <= FileNames.size(); +} + +const llvm::DWARFDebugLine::FileNameEntry & +DWARFDebugLine::Prologue::getFileNameEntry(uint64_t Index) const { + uint16_t DwarfVersion = getVersion(); + assert(DwarfVersion != 0 && + "line table prologue has no dwarf version information"); + // In DWARF v5 the file names are 0-indexed. + if (DwarfVersion >= 5) + return FileNames[Index]; + return FileNames[Index - 1]; +} + void DWARFDebugLine::Prologue::clear() { TotalLength = PrologueLength = 0; SegSelectorSize = 0; @@ -968,30 +988,11 @@ bool DWARFDebugLine::LineTable::lookupAddressRangeImpl( return true; } -bool DWARFDebugLine::LineTable::hasFileAtIndex(uint64_t FileIndex) const { - uint16_t DwarfVersion = Prologue.getVersion(); - assert(DwarfVersion != 0 && "LineTable has no dwarf version information"); - if (DwarfVersion >= 5) - return FileIndex < Prologue.FileNames.size(); - return FileIndex != 0 && FileIndex <= Prologue.FileNames.size(); -} - -const llvm::DWARFDebugLine::FileNameEntry & -DWARFDebugLine::LineTable::getFileNameEntry(uint64_t Index) const { - uint16_t DwarfVersion = Prologue.getVersion(); - assert(DwarfVersion != 0 && "LineTable has no dwarf version information"); - // In DWARF v5 the file names are 0-indexed. - if (DwarfVersion >= 5) - return Prologue.FileNames[Index]; - else - return Prologue.FileNames[Index - 1]; -} - Optional DWARFDebugLine::LineTable::getSourceByIndex(uint64_t FileIndex, FileLineInfoKind Kind) const { - if (Kind == FileLineInfoKind::None || !hasFileAtIndex(FileIndex)) + if (Kind == FileLineInfoKind::None || !Prologue.hasFileAtIndex(FileIndex)) return None; - const FileNameEntry &Entry = getFileNameEntry(FileIndex); + const FileNameEntry &Entry = Prologue.getFileNameEntry(FileIndex); if (Optional source = Entry.Source.getAsCString()) return StringRef(*source); return None; @@ -1005,10 +1006,10 @@ static bool isPathAbsoluteOnWindowsOrPosix(const Twine &Path) { sys::path::is_absolute(Path, sys::path::Style::windows); } -bool DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex, - const char *CompDir, - FileLineInfoKind Kind, - std::string &Result) const { +bool DWARFDebugLine::Prologue::getFileNameByIndex(uint64_t FileIndex, + StringRef CompDir, + FileLineInfoKind Kind, + std::string &Result) const { if (Kind == FileLineInfoKind::None || !hasFileAtIndex(FileIndex)) return false; const FileNameEntry &Entry = getFileNameEntry(FileIndex); @@ -1022,20 +1023,18 @@ bool DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex, SmallString<16> FilePath; StringRef IncludeDir; // Be defensive about the contents of Entry. - if (Prologue.getVersion() >= 5) { - if (Entry.DirIdx < Prologue.IncludeDirectories.size()) - IncludeDir = - Prologue.IncludeDirectories[Entry.DirIdx].getAsCString().getValue(); + if (getVersion() >= 5) { + if (Entry.DirIdx < IncludeDirectories.size()) + IncludeDir = IncludeDirectories[Entry.DirIdx].getAsCString().getValue(); } else { - if (0 < Entry.DirIdx && Entry.DirIdx <= Prologue.IncludeDirectories.size()) - IncludeDir = Prologue.IncludeDirectories[Entry.DirIdx - 1] - .getAsCString() - .getValue(); + if (0 < Entry.DirIdx && Entry.DirIdx <= IncludeDirectories.size()) + IncludeDir = + IncludeDirectories[Entry.DirIdx - 1].getAsCString().getValue(); // We may still need to append compilation directory of compile unit. // We know that FileName is not absolute, the only way to have an // absolute path at this point would be if IncludeDir is absolute. - if (CompDir && !isPathAbsoluteOnWindowsOrPosix(IncludeDir)) + if (!CompDir.empty() && !isPathAbsoluteOnWindowsOrPosix(IncludeDir)) sys::path::append(FilePath, CompDir); } From 50f0c824532886bea5b7536378a16ac9e3b12f9c Mon Sep 17 00:00:00 2001 From: Nathan Lanza Date: Tue, 16 Jul 2019 00:57:50 +0000 Subject: [PATCH 191/451] Allow for vendor prefixes in a list test Summary: Preprocessor/init.c contains a line that explicitly checks for the string __VERSION__ "Clang{{.*}} It's valid to have a toolchain configured to emit a vendor prefix before the word Clang. e.g. __VERSION__ "Vendor Clang{{.*}} Subscribers: fedor.sergeev, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D64772 llvm-svn: 366159 --- clang/test/Preprocessor/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/Preprocessor/init.c b/clang/test/Preprocessor/init.c index fce85e05f63f5..d2d7efc0ae709 100644 --- a/clang/test/Preprocessor/init.c +++ b/clang/test/Preprocessor/init.c @@ -8169,7 +8169,7 @@ // SPARC:#define __UINT_LEAST8_MAX__ 255 // SPARC:#define __UINT_LEAST8_TYPE__ unsigned char // SPARC:#define __USER_LABEL_PREFIX__ -// SPARC:#define __VERSION__ "Clang{{.*}} +// SPARC:#define __VERSION__ "{{.*}}Clang{{.*}} // SPARC:#define __WCHAR_MAX__ 2147483647 // SPARC:#define __WCHAR_TYPE__ int // SPARC:#define __WCHAR_WIDTH__ 32 From 01ee172e9e4a32a326105b6817bb801a07f44bfa Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 16 Jul 2019 00:59:04 +0000 Subject: [PATCH 192/451] Revert "[DebugInfo] Move function from line table to the prologue (NFC)" This broke LLD, which I didn't have enabled. llvm-svn: 366160 --- .../llvm/DebugInfo/DWARF/DWARFDebugLine.h | 26 +++---- llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp | 67 ++++++++++--------- 2 files changed, 42 insertions(+), 51 deletions(-) diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h index e7425c1923737..9a3ad2b08c99b 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h @@ -121,17 +121,6 @@ class DWARFDebugLine { return LineBase + (int8_t)LineRange - 1; } - /// Get DWARF-version aware access to the file name entry at the provided - /// index. - const llvm::DWARFDebugLine::FileNameEntry & - getFileNameEntry(uint64_t Index) const; - - bool hasFileAtIndex(uint64_t FileIndex) const; - - bool getFileNameByIndex(uint64_t FileIndex, StringRef CompDir, - DILineInfoSpecifier::FileLineInfoKind Kind, - std::string &Result) const; - void clear(); void dump(raw_ostream &OS, DIDumpOptions DumpOptions) const; Error parse(const DWARFDataExtractor &DebugLineData, uint32_t *OffsetPtr, @@ -251,20 +240,16 @@ class DWARFDebugLine { bool lookupAddressRange(object::SectionedAddress Address, uint64_t Size, std::vector &Result) const; - bool hasFileAtIndex(uint64_t FileIndex) const { - return Prologue.hasFileAtIndex(FileIndex); - } + bool hasFileAtIndex(uint64_t FileIndex) const; /// Extracts filename by its index in filename table in prologue. /// In Dwarf 4, the files are 1-indexed and the current compilation file /// name is not represented in the list. In DWARF v5, the files are /// 0-indexed and the primary source file has the index 0. /// Returns true on success. - bool getFileNameByIndex(uint64_t FileIndex, StringRef CompDir, + bool getFileNameByIndex(uint64_t FileIndex, const char *CompDir, DILineInfoSpecifier::FileLineInfoKind Kind, - std::string &Result) const { - return Prologue.getFileNameByIndex(FileIndex, CompDir, Kind, Result); - } + std::string &Result) const; /// Fills the Result argument with the file and line information /// corresponding to Address. Returns true on success. @@ -283,6 +268,11 @@ class DWARFDebugLine { std::function RecoverableErrorCallback, raw_ostream *OS = nullptr); + /// Get DWARF-version aware access to the file name entry at the provided + /// index. + const llvm::DWARFDebugLine::FileNameEntry & + getFileNameEntry(uint64_t Index) const; + using RowVector = std::vector; using RowIter = RowVector::const_iterator; using SequenceVector = std::vector; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index 8a621084710e4..9f9aaabf1e89d 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -66,26 +66,6 @@ void DWARFDebugLine::ContentTypeTracker::trackContentType( DWARFDebugLine::Prologue::Prologue() { clear(); } -bool DWARFDebugLine::Prologue::hasFileAtIndex(uint64_t FileIndex) const { - uint16_t DwarfVersion = getVersion(); - assert(DwarfVersion != 0 && - "line table prologue has no dwarf version information"); - if (DwarfVersion >= 5) - return FileIndex < FileNames.size(); - return FileIndex != 0 && FileIndex <= FileNames.size(); -} - -const llvm::DWARFDebugLine::FileNameEntry & -DWARFDebugLine::Prologue::getFileNameEntry(uint64_t Index) const { - uint16_t DwarfVersion = getVersion(); - assert(DwarfVersion != 0 && - "line table prologue has no dwarf version information"); - // In DWARF v5 the file names are 0-indexed. - if (DwarfVersion >= 5) - return FileNames[Index]; - return FileNames[Index - 1]; -} - void DWARFDebugLine::Prologue::clear() { TotalLength = PrologueLength = 0; SegSelectorSize = 0; @@ -988,11 +968,30 @@ bool DWARFDebugLine::LineTable::lookupAddressRangeImpl( return true; } +bool DWARFDebugLine::LineTable::hasFileAtIndex(uint64_t FileIndex) const { + uint16_t DwarfVersion = Prologue.getVersion(); + assert(DwarfVersion != 0 && "LineTable has no dwarf version information"); + if (DwarfVersion >= 5) + return FileIndex < Prologue.FileNames.size(); + return FileIndex != 0 && FileIndex <= Prologue.FileNames.size(); +} + +const llvm::DWARFDebugLine::FileNameEntry & +DWARFDebugLine::LineTable::getFileNameEntry(uint64_t Index) const { + uint16_t DwarfVersion = Prologue.getVersion(); + assert(DwarfVersion != 0 && "LineTable has no dwarf version information"); + // In DWARF v5 the file names are 0-indexed. + if (DwarfVersion >= 5) + return Prologue.FileNames[Index]; + else + return Prologue.FileNames[Index - 1]; +} + Optional DWARFDebugLine::LineTable::getSourceByIndex(uint64_t FileIndex, FileLineInfoKind Kind) const { - if (Kind == FileLineInfoKind::None || !Prologue.hasFileAtIndex(FileIndex)) + if (Kind == FileLineInfoKind::None || !hasFileAtIndex(FileIndex)) return None; - const FileNameEntry &Entry = Prologue.getFileNameEntry(FileIndex); + const FileNameEntry &Entry = getFileNameEntry(FileIndex); if (Optional source = Entry.Source.getAsCString()) return StringRef(*source); return None; @@ -1006,10 +1005,10 @@ static bool isPathAbsoluteOnWindowsOrPosix(const Twine &Path) { sys::path::is_absolute(Path, sys::path::Style::windows); } -bool DWARFDebugLine::Prologue::getFileNameByIndex(uint64_t FileIndex, - StringRef CompDir, - FileLineInfoKind Kind, - std::string &Result) const { +bool DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex, + const char *CompDir, + FileLineInfoKind Kind, + std::string &Result) const { if (Kind == FileLineInfoKind::None || !hasFileAtIndex(FileIndex)) return false; const FileNameEntry &Entry = getFileNameEntry(FileIndex); @@ -1023,18 +1022,20 @@ bool DWARFDebugLine::Prologue::getFileNameByIndex(uint64_t FileIndex, SmallString<16> FilePath; StringRef IncludeDir; // Be defensive about the contents of Entry. - if (getVersion() >= 5) { - if (Entry.DirIdx < IncludeDirectories.size()) - IncludeDir = IncludeDirectories[Entry.DirIdx].getAsCString().getValue(); - } else { - if (0 < Entry.DirIdx && Entry.DirIdx <= IncludeDirectories.size()) + if (Prologue.getVersion() >= 5) { + if (Entry.DirIdx < Prologue.IncludeDirectories.size()) IncludeDir = - IncludeDirectories[Entry.DirIdx - 1].getAsCString().getValue(); + Prologue.IncludeDirectories[Entry.DirIdx].getAsCString().getValue(); + } else { + if (0 < Entry.DirIdx && Entry.DirIdx <= Prologue.IncludeDirectories.size()) + IncludeDir = Prologue.IncludeDirectories[Entry.DirIdx - 1] + .getAsCString() + .getValue(); // We may still need to append compilation directory of compile unit. // We know that FileName is not absolute, the only way to have an // absolute path at this point would be if IncludeDir is absolute. - if (!CompDir.empty() && !isPathAbsoluteOnWindowsOrPosix(IncludeDir)) + if (CompDir && !isPathAbsoluteOnWindowsOrPosix(IncludeDir)) sys::path::append(FilePath, CompDir); } From 0d121273181f89b5296b02084fbb967d159b2c69 Mon Sep 17 00:00:00 2001 From: Alex Langford Date: Tue, 16 Jul 2019 01:02:32 +0000 Subject: [PATCH 193/451] [Target] Remove unused method Target::GetDefaultClangModuleSearchPaths llvm-svn: 366161 --- lldb/include/lldb/Target/Target.h | 2 -- lldb/source/Target/Target.cpp | 7 ------- 2 files changed, 9 deletions(-) diff --git a/lldb/include/lldb/Target/Target.h b/lldb/include/lldb/Target/Target.h index 875a8b1e2c187..4ed11afc31ba6 100644 --- a/lldb/include/lldb/Target/Target.h +++ b/lldb/include/lldb/Target/Target.h @@ -491,8 +491,6 @@ class Target : public std::enable_shared_from_this, static FileSpecList GetDefaultDebugFileSearchPaths(); - static FileSpecList GetDefaultClangModuleSearchPaths(); - static ArchSpec GetDefaultArchitecture(); static void SetDefaultArchitecture(const ArchSpec &arch); diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp index ac15749663f4c..4941cb585c554 100644 --- a/lldb/source/Target/Target.cpp +++ b/lldb/source/Target/Target.cpp @@ -2330,13 +2330,6 @@ FileSpecList Target::GetDefaultDebugFileSearchPaths() { return FileSpecList(); } -FileSpecList Target::GetDefaultClangModuleSearchPaths() { - TargetPropertiesSP properties_sp(Target::GetGlobalProperties()); - if (properties_sp) - return properties_sp->GetClangModuleSearchPaths(); - return FileSpecList(); -} - ArchSpec Target::GetDefaultArchitecture() { TargetPropertiesSP properties_sp(Target::GetGlobalProperties()); if (properties_sp) From 543ba4e9e0c421bedaea2d8a0f1965092cec300e Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Tue, 16 Jul 2019 01:03:06 +0000 Subject: [PATCH 194/451] [InstructionSimplify] Apply sext/trunc after pointer stripping Summary: - As the pointer stripping could trace through `addrspacecast` now, need to sext/trunc the offset to ensure it has the same width as the pointer after stripping. Reviewers: jdoerfert Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64768 llvm-svn: 366162 --- llvm/lib/Analysis/InstructionSimplify.cpp | 4 ++++ llvm/test/Transforms/InstSimplify/compare.ll | 11 ++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index c0d69f9275d1d..e34bf6f4e43f5 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -660,6 +660,10 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V, APInt Offset = APInt::getNullValue(IntPtrTy->getIntegerBitWidth()); V = V->stripAndAccumulateConstantOffsets(DL, Offset, AllowNonInbounds); + // As that strip may trace through `addrspacecast`, need to sext or trunc + // the offset calculated. + IntPtrTy = DL.getIntPtrType(V->getType())->getScalarType(); + Offset = Offset.sextOrTrunc(IntPtrTy->getIntegerBitWidth()); Constant *OffsetIntPtr = ConstantInt::get(IntPtrTy, Offset); if (V->getType()->isVectorTy()) diff --git a/llvm/test/Transforms/InstSimplify/compare.ll b/llvm/test/Transforms/InstSimplify/compare.ll index 899f198d48a31..570239eaf0c6e 100644 --- a/llvm/test/Transforms/InstSimplify/compare.ll +++ b/llvm/test/Transforms/InstSimplify/compare.ll @@ -1,5 +1,5 @@ ; RUN: opt < %s -instsimplify -S | FileCheck %s -target datalayout = "p:32:32" +target datalayout = "p:32:32-p1:64:64" define i1 @ptrtoint() { ; CHECK-LABEL: @ptrtoint( @@ -1358,4 +1358,13 @@ define i1 @constant_fold_null_inttoptr() { ret i1 %x } +; CHECK-LABEL: @cmp_through_addrspacecast( +; CHECK-NEXT: ret i1 true +define i1 @cmp_through_addrspacecast(i32 addrspace(1)* %p1) { + %p0 = addrspacecast i32 addrspace(1)* %p1 to i32* + %p0.1 = getelementptr inbounds i32, i32* %p0, i64 1 + %cmp = icmp ne i32* %p0, %p0.1 + ret i1 %cmp +} + attributes #0 = { "null-pointer-is-valid"="true" } From fa52e00c85ce3feeec14be34265781f721b966c0 Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Tue, 16 Jul 2019 01:13:36 +0000 Subject: [PATCH 195/451] [Sema] Suppress additional warnings for C's zero initializer Summary: D28148 relaxed some checks for assigning { 0 } to a structure for all C standards, but it failed to handle structures with non-integer subobjects. Relax -Wmissing-braces checks for such structures, and add some additional tests. This fixes PR39931. Patch By: al3xtjames Reviewed By: Lekensteyn Differential Revision: https://reviews.llvm.org/D61838 llvm-svn: 366163 --- clang/lib/AST/Expr.cpp | 4 ++-- clang/test/Sema/zero-initializer.c | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index f8017bb7adebe..10ab2bf72b72e 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -2303,11 +2303,11 @@ bool InitListExpr::isTransparent() const { bool InitListExpr::isIdiomaticZeroInitializer(const LangOptions &LangOpts) const { assert(isSyntacticForm() && "only test syntactic form as zero initializer"); - if (LangOpts.CPlusPlus || getNumInits() != 1) { + if (LangOpts.CPlusPlus || getNumInits() != 1 || !getInit(0)) { return false; } - const IntegerLiteral *Lit = dyn_cast(getInit(0)); + const IntegerLiteral *Lit = dyn_cast(getInit(0)->IgnoreImplicit()); return Lit && Lit->getValue() == 0; } diff --git a/clang/test/Sema/zero-initializer.c b/clang/test/Sema/zero-initializer.c index 0ab410d4c6d55..e54021a582c52 100644 --- a/clang/test/Sema/zero-initializer.c +++ b/clang/test/Sema/zero-initializer.c @@ -7,6 +7,8 @@ struct A { int a; }; struct B { struct A a; }; struct C { struct B b; }; struct D { struct C c; int n; }; +struct E { short e; }; +struct F { struct E e; int n; }; int main(void) { @@ -23,6 +25,9 @@ int main(void) struct C p = { 0 }; // no-warning struct C q = { 9 }; // warning suppressed for struct with single element struct D r = { 9 }; // expected-warning {{suggest braces around initialization of subobject}} expected-warning {{missing field 'n' initializer}} + struct F s = { 0 }; // no-warning + struct F t = { 9 }; // expected-warning {{suggest braces around initialization of subobject}} expected-warning {{missing field 'n' initializer}} + f = (struct foo ) { 0 }; // no-warning g = (struct foo ) { 9 }; // expected-warning {{missing field 'y' initializer}} h = (struct foo ) { 9, 9 }; // no-warning @@ -36,6 +41,8 @@ int main(void) p = (struct C) { 0 }; // no-warning q = (struct C) { 9 }; // warning suppressed for struct with single element r = (struct D) { 9 }; // expected-warning {{suggest braces around initialization of subobject}} expected-warning {{missing field 'n' initializer}} + s = (struct F) { 0 }; // no-warning + t = (struct F) { 9 }; // expected-warning {{suggest braces around initialization of subobject}} expected-warning {{missing field 'n' initializer}} return 0; } From ca16d280f7ed9509398ba8869a123c9f8ede72f7 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 16 Jul 2019 01:21:25 +0000 Subject: [PATCH 196/451] Re-land "[DebugInfo] Move function from line table to the prologue (NFC)" In LLDB, when parsing type units, we don't need to parse the whole line table. Instead, we only need to parse the "support files" from the line table prologue. To make that possible, this patch moves the respective functions from the LineTable into the Prologue. Because I don't think users of the LineTable should have to know that these files come from the Prologue, I've left the original methods in place, and made them redirect to the LineTable. Differential revision: https://reviews.llvm.org/D64774 llvm-svn: 366164 --- lld/ELF/InputFiles.cpp | 2 +- .../llvm/DebugInfo/DWARF/DWARFDebugLine.h | 26 ++++--- llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp | 67 +++++++++---------- 3 files changed, 52 insertions(+), 43 deletions(-) diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index 470d877f3fbf3..89b178decba2a 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -320,7 +320,7 @@ ObjFile::getVariableLoc(StringRef name) { // Take file name string from line table. std::string fileName; if (!it->second.lt->getFileNameByIndex( - it->second.file, nullptr, + it->second.file, {}, DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, fileName)) return None; diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h index 9a3ad2b08c99b..e7425c1923737 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h @@ -121,6 +121,17 @@ class DWARFDebugLine { return LineBase + (int8_t)LineRange - 1; } + /// Get DWARF-version aware access to the file name entry at the provided + /// index. + const llvm::DWARFDebugLine::FileNameEntry & + getFileNameEntry(uint64_t Index) const; + + bool hasFileAtIndex(uint64_t FileIndex) const; + + bool getFileNameByIndex(uint64_t FileIndex, StringRef CompDir, + DILineInfoSpecifier::FileLineInfoKind Kind, + std::string &Result) const; + void clear(); void dump(raw_ostream &OS, DIDumpOptions DumpOptions) const; Error parse(const DWARFDataExtractor &DebugLineData, uint32_t *OffsetPtr, @@ -240,16 +251,20 @@ class DWARFDebugLine { bool lookupAddressRange(object::SectionedAddress Address, uint64_t Size, std::vector &Result) const; - bool hasFileAtIndex(uint64_t FileIndex) const; + bool hasFileAtIndex(uint64_t FileIndex) const { + return Prologue.hasFileAtIndex(FileIndex); + } /// Extracts filename by its index in filename table in prologue. /// In Dwarf 4, the files are 1-indexed and the current compilation file /// name is not represented in the list. In DWARF v5, the files are /// 0-indexed and the primary source file has the index 0. /// Returns true on success. - bool getFileNameByIndex(uint64_t FileIndex, const char *CompDir, + bool getFileNameByIndex(uint64_t FileIndex, StringRef CompDir, DILineInfoSpecifier::FileLineInfoKind Kind, - std::string &Result) const; + std::string &Result) const { + return Prologue.getFileNameByIndex(FileIndex, CompDir, Kind, Result); + } /// Fills the Result argument with the file and line information /// corresponding to Address. Returns true on success. @@ -268,11 +283,6 @@ class DWARFDebugLine { std::function RecoverableErrorCallback, raw_ostream *OS = nullptr); - /// Get DWARF-version aware access to the file name entry at the provided - /// index. - const llvm::DWARFDebugLine::FileNameEntry & - getFileNameEntry(uint64_t Index) const; - using RowVector = std::vector; using RowIter = RowVector::const_iterator; using SequenceVector = std::vector; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index 9f9aaabf1e89d..8a621084710e4 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -66,6 +66,26 @@ void DWARFDebugLine::ContentTypeTracker::trackContentType( DWARFDebugLine::Prologue::Prologue() { clear(); } +bool DWARFDebugLine::Prologue::hasFileAtIndex(uint64_t FileIndex) const { + uint16_t DwarfVersion = getVersion(); + assert(DwarfVersion != 0 && + "line table prologue has no dwarf version information"); + if (DwarfVersion >= 5) + return FileIndex < FileNames.size(); + return FileIndex != 0 && FileIndex <= FileNames.size(); +} + +const llvm::DWARFDebugLine::FileNameEntry & +DWARFDebugLine::Prologue::getFileNameEntry(uint64_t Index) const { + uint16_t DwarfVersion = getVersion(); + assert(DwarfVersion != 0 && + "line table prologue has no dwarf version information"); + // In DWARF v5 the file names are 0-indexed. + if (DwarfVersion >= 5) + return FileNames[Index]; + return FileNames[Index - 1]; +} + void DWARFDebugLine::Prologue::clear() { TotalLength = PrologueLength = 0; SegSelectorSize = 0; @@ -968,30 +988,11 @@ bool DWARFDebugLine::LineTable::lookupAddressRangeImpl( return true; } -bool DWARFDebugLine::LineTable::hasFileAtIndex(uint64_t FileIndex) const { - uint16_t DwarfVersion = Prologue.getVersion(); - assert(DwarfVersion != 0 && "LineTable has no dwarf version information"); - if (DwarfVersion >= 5) - return FileIndex < Prologue.FileNames.size(); - return FileIndex != 0 && FileIndex <= Prologue.FileNames.size(); -} - -const llvm::DWARFDebugLine::FileNameEntry & -DWARFDebugLine::LineTable::getFileNameEntry(uint64_t Index) const { - uint16_t DwarfVersion = Prologue.getVersion(); - assert(DwarfVersion != 0 && "LineTable has no dwarf version information"); - // In DWARF v5 the file names are 0-indexed. - if (DwarfVersion >= 5) - return Prologue.FileNames[Index]; - else - return Prologue.FileNames[Index - 1]; -} - Optional DWARFDebugLine::LineTable::getSourceByIndex(uint64_t FileIndex, FileLineInfoKind Kind) const { - if (Kind == FileLineInfoKind::None || !hasFileAtIndex(FileIndex)) + if (Kind == FileLineInfoKind::None || !Prologue.hasFileAtIndex(FileIndex)) return None; - const FileNameEntry &Entry = getFileNameEntry(FileIndex); + const FileNameEntry &Entry = Prologue.getFileNameEntry(FileIndex); if (Optional source = Entry.Source.getAsCString()) return StringRef(*source); return None; @@ -1005,10 +1006,10 @@ static bool isPathAbsoluteOnWindowsOrPosix(const Twine &Path) { sys::path::is_absolute(Path, sys::path::Style::windows); } -bool DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex, - const char *CompDir, - FileLineInfoKind Kind, - std::string &Result) const { +bool DWARFDebugLine::Prologue::getFileNameByIndex(uint64_t FileIndex, + StringRef CompDir, + FileLineInfoKind Kind, + std::string &Result) const { if (Kind == FileLineInfoKind::None || !hasFileAtIndex(FileIndex)) return false; const FileNameEntry &Entry = getFileNameEntry(FileIndex); @@ -1022,20 +1023,18 @@ bool DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex, SmallString<16> FilePath; StringRef IncludeDir; // Be defensive about the contents of Entry. - if (Prologue.getVersion() >= 5) { - if (Entry.DirIdx < Prologue.IncludeDirectories.size()) - IncludeDir = - Prologue.IncludeDirectories[Entry.DirIdx].getAsCString().getValue(); + if (getVersion() >= 5) { + if (Entry.DirIdx < IncludeDirectories.size()) + IncludeDir = IncludeDirectories[Entry.DirIdx].getAsCString().getValue(); } else { - if (0 < Entry.DirIdx && Entry.DirIdx <= Prologue.IncludeDirectories.size()) - IncludeDir = Prologue.IncludeDirectories[Entry.DirIdx - 1] - .getAsCString() - .getValue(); + if (0 < Entry.DirIdx && Entry.DirIdx <= IncludeDirectories.size()) + IncludeDir = + IncludeDirectories[Entry.DirIdx - 1].getAsCString().getValue(); // We may still need to append compilation directory of compile unit. // We know that FileName is not absolute, the only way to have an // absolute path at this point would be if IncludeDir is absolute. - if (CompDir && !isPathAbsoluteOnWindowsOrPosix(IncludeDir)) + if (!CompDir.empty() && !isPathAbsoluteOnWindowsOrPosix(IncludeDir)) sys::path::append(FilePath, CompDir); } From 492ce8cc8b0831924f7f0479f37f2e708a8dad3f Mon Sep 17 00:00:00 2001 From: Bob Haarman Date: Tue, 16 Jul 2019 01:35:49 +0000 Subject: [PATCH 197/451] reland "add -fthinlto-index= option to clang-cl" Summary: This is a reland of r366146, adding in the previously missing '--' flag that prevents filenames from being interpreted as flags. Original description: This adds a -fthinlto-index= option to clang-cl, which allows it to be used to drive ThinLTO backend passes. This allows clang-cl to be used for distributed ThinLTO. Tags: #clang llvm-svn: 366165 --- clang/include/clang/Driver/Options.td | 2 +- clang/test/Driver/cl-thinlto-backend.c | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 clang/test/Driver/cl-thinlto-backend.c diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 957483c318647..dfd27fab796e3 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1270,7 +1270,7 @@ def flto_jobs_EQ : Joined<["-"], "flto-jobs=">, "of 0 means the number of threads will be derived from " "the number of CPUs detected)">; def fthinlto_index_EQ : Joined<["-"], "fthinlto-index=">, - Flags<[CC1Option]>, Group, + Flags<[CoreOption, CC1Option]>, Group, HelpText<"Perform ThinLTO importing using provided function summary index">; def fmacro_backtrace_limit_EQ : Joined<["-"], "fmacro-backtrace-limit=">, Group, Flags<[DriverOption, CoreOption]>; diff --git a/clang/test/Driver/cl-thinlto-backend.c b/clang/test/Driver/cl-thinlto-backend.c new file mode 100644 index 0000000000000..4697a994906ab --- /dev/null +++ b/clang/test/Driver/cl-thinlto-backend.c @@ -0,0 +1,9 @@ +// RUN: %clang_cl -c -flto=thin -Fo%t.obj -- %s +// RUN: llvm-lto2 run -thinlto-distributed-indexes -o %t.exe %t.obj + +// -fthinlto_index should be passed to cc1 +// RUN: %clang_cl -### -c -fthinlto-index=%t.thinlto.bc -Fo%t1.obj \ +// RUN: -- %t.obj 2>&1 | FileCheck %s + +// CHECK: -fthinlto-index= +// CHECK: "-x" "ir" From 49e14cefbe42262b0dc6e4daa8ac4e48fed8cec5 Mon Sep 17 00:00:00 2001 From: Nathan Lanza Date: Tue, 16 Jul 2019 02:05:52 +0000 Subject: [PATCH 198/451] Change a lit test to permit vendor specific clang version A test manually checks for the string `__VERSION__ "Clang`. This needs to permit vendor specific variants. llvm-svn: 366166 --- clang/test/Preprocessor/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/Preprocessor/init.c b/clang/test/Preprocessor/init.c index d2d7efc0ae709..954f02a014344 100644 --- a/clang/test/Preprocessor/init.c +++ b/clang/test/Preprocessor/init.c @@ -9041,7 +9041,7 @@ // X86_64-CLOUDABI:#define __UINT_LEAST8_MAX__ 255 // X86_64-CLOUDABI:#define __UINT_LEAST8_TYPE__ unsigned char // X86_64-CLOUDABI:#define __USER_LABEL_PREFIX__ -// X86_64-CLOUDABI:#define __VERSION__ "Clang{{.*}} +// X86_64-CLOUDABI:#define __VERSION__ "{{.*}}Clang{{.*}} // X86_64-CLOUDABI:#define __WCHAR_MAX__ 2147483647 // X86_64-CLOUDABI:#define __WCHAR_TYPE__ int // X86_64-CLOUDABI:#define __WCHAR_WIDTH__ 32 From 1739b700b17c93f6ad21cea2fc7e3febba51d22c Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 16 Jul 2019 02:46:05 +0000 Subject: [PATCH 199/451] AMDGPU: Avoid code predicates for extload PatFrags Use the MemoryVT field. This will be necessary for tablegen to automatically handle patterns for GlobalISel. Doesn't handle the d16 lo/hi patterns. Those are a special case since it involvess the custom node type. llvm-svn: 366168 --- llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 45 +++++++------------ llvm/lib/Target/AMDGPU/BUFInstructions.td | 27 +++++++---- .../Target/AMDGPU/EvergreenInstructions.td | 4 -- llvm/lib/Target/AMDGPU/FLATInstructions.td | 18 +++++--- llvm/lib/Target/AMDGPU/R600Instructions.td | 26 +++++++++++ 5 files changed, 72 insertions(+), 48 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index b29b0e7b17e3d..9e9510e0fa4a0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -379,27 +379,6 @@ class FlatStoreAddress : CodePatPred<[{ AS == AMDGPUAS::GLOBAL_ADDRESS; }]>; -class AZExtLoadBase : PatFrag<(ops node:$ptr), - (ld_node node:$ptr), [{ - LoadSDNode *L = cast(N); - return L->getExtensionType() == ISD::ZEXTLOAD || - L->getExtensionType() == ISD::EXTLOAD; -}]>; - -def az_extload : AZExtLoadBase ; - -def az_extloadi8 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i8; -}]>; - -def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i16; -}]>; - -def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i32; -}]>; - class PrivateLoad : LoadFrag , PrivateAddress; class PrivateStore : StoreFrag , PrivateAddress; @@ -419,9 +398,11 @@ class ConstantLoad : LoadFrag , ConstantAddress; def load_private : PrivateLoad ; -def az_extloadi8_private : PrivateLoad ; +def extloadi8_private : PrivateLoad ; +def zextloadi8_private : PrivateLoad ; def sextloadi8_private : PrivateLoad ; -def az_extloadi16_private : PrivateLoad ; +def extloadi16_private : PrivateLoad ; +def zextloadi16_private : PrivateLoad ; def sextloadi16_private : PrivateLoad ; def store_private : PrivateStore ; @@ -433,9 +414,11 @@ def truncstorei8_hi16_private : StoreHi16, PrivateAddress; def load_global : GlobalLoad ; def sextloadi8_global : GlobalLoad ; -def az_extloadi8_global : GlobalLoad ; +def extloadi8_global : GlobalLoad ; +def zextloadi8_global : GlobalLoad ; def sextloadi16_global : GlobalLoad ; -def az_extloadi16_global : GlobalLoad ; +def extloadi16_global : GlobalLoad ; +def zextloadi16_global : GlobalLoad ; def atomic_load_global : GlobalLoad; def store_global : GlobalStore ; @@ -479,9 +462,11 @@ def store_align16_local : Aligned16Bytes < >; def load_flat : FlatLoad ; -def az_extloadi8_flat : FlatLoad ; +def extloadi8_flat : FlatLoad ; +def zextloadi8_flat : FlatLoad ; def sextloadi8_flat : FlatLoad ; -def az_extloadi16_flat : FlatLoad ; +def extloadi16_flat : FlatLoad ; +def zextloadi16_flat : FlatLoad ; def sextloadi16_flat : FlatLoad ; def atomic_load_flat : FlatLoad; @@ -495,9 +480,11 @@ def truncstorei16_hi16_flat : StoreHi16, FlatStoreAddress; def constant_load : ConstantLoad; def sextloadi8_constant : ConstantLoad ; -def az_extloadi8_constant : ConstantLoad ; +def extloadi8_constant : ConstantLoad ; +def zextloadi8_constant : ConstantLoad ; def sextloadi16_constant : ConstantLoad ; -def az_extloadi16_constant : ConstantLoad ; +def extloadi16_constant : ConstantLoad ; +def zextloadi16_constant : ConstantLoad ; class local_binary_atomic_op : diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 7d9ca59c6d08a..4ff9aeb2e314e 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -851,9 +851,11 @@ defm BUFFER_LOAD_DWORDX4 : MUBUF_Pseudo_Loads < "buffer_load_dwordx4", VReg_128, v4i32 >; -defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, az_extloadi8_global>; +defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, extloadi8_global>; +defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, zextloadi8_global>; defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SBYTE", i32, sextloadi8_global>; -defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, az_extloadi16_global>; +defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, extloadi16_global>; +defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, zextloadi16_global>; defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SSHORT", i32, sextloadi16_global>; defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORD", i32, load_global>; defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX2", v2i32, load_global>; @@ -1437,9 +1439,11 @@ multiclass MUBUFLoad_Atomic_Pattern ; -def : MUBUFLoad_PatternADDR64 ; +def : MUBUFLoad_PatternADDR64 ; +def : MUBUFLoad_PatternADDR64 ; def : MUBUFLoad_PatternADDR64 ; -def : MUBUFLoad_PatternADDR64 ; +def : MUBUFLoad_PatternADDR64 ; +def : MUBUFLoad_PatternADDR64 ; defm : MUBUFLoad_Atomic_Pattern ; defm : MUBUFLoad_Atomic_Pattern ; @@ -1458,9 +1462,11 @@ multiclass MUBUFLoad_Pattern ; -defm : MUBUFLoad_Pattern ; +defm : MUBUFLoad_Pattern ; +defm : MUBUFLoad_Pattern ; defm : MUBUFLoad_Pattern ; -defm : MUBUFLoad_Pattern ; +defm : MUBUFLoad_Pattern ; +defm : MUBUFLoad_Pattern ; defm : MUBUFLoad_Pattern ; @@ -1497,11 +1503,14 @@ multiclass MUBUFScratchLoadPat_D16 ; -defm : MUBUFScratchLoadPat ; +defm : MUBUFScratchLoadPat ; +defm : MUBUFScratchLoadPat ; defm : MUBUFScratchLoadPat ; -defm : MUBUFScratchLoadPat ; +defm : MUBUFScratchLoadPat ; +defm : MUBUFScratchLoadPat ; defm : MUBUFScratchLoadPat ; -defm : MUBUFScratchLoadPat ; +defm : MUBUFScratchLoadPat ; +defm : MUBUFScratchLoadPat ; defm : MUBUFScratchLoadPat ; defm : MUBUFScratchLoadPat ; defm : MUBUFScratchLoadPat ; diff --git a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td index fbe7d757f3513..0550092ce1d6e 100644 --- a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td +++ b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td @@ -30,10 +30,6 @@ class EGOrCaymanPat : AMDGPUPat { let SubtargetPredicate = isEGorCayman; } -// FIXME: These are deprecated -def az_extloadi8_local : LocalLoad ; -def az_extloadi16_local : LocalLoad ; - //===----------------------------------------------------------------------===// // Evergreen / Cayman store instructions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index df334790b858f..4070d94dd4ab2 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -767,11 +767,14 @@ class FlatSignedAtomicPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; def : FlatLoadPat ; -def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; def : FlatLoadPat ; -def : FlatLoadPat ; +def : FlatLoadPat ; +def : FlatLoadPat ; def : FlatLoadPat ; def : FlatLoadPat ; def : FlatLoadPat ; @@ -849,11 +852,14 @@ def atomic_pk_fadd_global : global_binary_atomic_op_frag; +def : FlatLoadSignedPat ; +def : FlatLoadSignedPat ; def : FlatLoadSignedPat ; -def : FlatLoadSignedPat ; +def : FlatLoadSignedPat ; +def : FlatLoadSignedPat ; def : FlatLoadSignedPat ; -def : FlatLoadSignedPat ; +def : FlatLoadSignedPat ; +def : FlatLoadSignedPat ; def : FlatLoadSignedPat ; def : FlatLoadSignedPat ; diff --git a/llvm/lib/Target/AMDGPU/R600Instructions.td b/llvm/lib/Target/AMDGPU/R600Instructions.td index dcbedbd8cfd68..d3ce7ffd673c5 100644 --- a/llvm/lib/Target/AMDGPU/R600Instructions.td +++ b/llvm/lib/Target/AMDGPU/R600Instructions.td @@ -295,6 +295,32 @@ class VTX_READ pattern> let VTXInst = 1; } +// FIXME: Deprecated. +class AZExtLoadBase : PatFrag<(ops node:$ptr), + (ld_node node:$ptr), [{ + LoadSDNode *L = cast(N); + return L->getExtensionType() == ISD::ZEXTLOAD || + L->getExtensionType() == ISD::EXTLOAD; +}]>; + +def az_extload : AZExtLoadBase ; + +def az_extloadi8 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i8; +}]>; + +def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i16; +}]>; + +def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i32; +}]>; + +// FIXME: These are deprecated +def az_extloadi8_local : LocalLoad ; +def az_extloadi16_local : LocalLoad ; + class LoadParamFrag : PatFrag < (ops node:$ptr), (load_type node:$ptr), [{ return isConstantLoad(cast(N), 0) || From e7e8789a632f3dcd029b0f78230e61773bdb3586 Mon Sep 17 00:00:00 2001 From: Ali Tamur Date: Tue, 16 Jul 2019 03:20:15 +0000 Subject: [PATCH 200/451] Revert "[OPENMP]Add support for analysis of if clauses." This reverts commit rL366068. The patch broke 86 tests under clang/test/OpenMP/ when run with address sanitizer. llvm-svn: 366169 --- clang/include/clang/AST/OpenMPClause.h | 7 +- clang/lib/AST/OpenMPClause.cpp | 19 - clang/test/Analysis/cfg-openmp.cpp | 532 ++++++++---------- clang/test/OpenMP/cancel_if_messages.cpp | 10 - .../distribute_parallel_for_if_messages.cpp | 7 - ...stribute_parallel_for_simd_if_messages.cpp | 7 - .../test/OpenMP/parallel_for_if_messages.cpp | 7 - .../OpenMP/parallel_for_simd_if_messages.cpp | 7 - clang/test/OpenMP/parallel_if_messages.cpp | 7 - .../OpenMP/parallel_sections_if_messages.cpp | 8 - clang/test/OpenMP/target_data_if_messages.cpp | 7 - .../OpenMP/target_enter_data_if_messages.cpp | 7 - .../OpenMP/target_exit_data_if_messages.cpp | 7 - clang/test/OpenMP/target_if_messages.cpp | 7 - .../target_parallel_for_if_messages.cpp | 7 - .../target_parallel_for_simd_if_messages.cpp | 7 - .../OpenMP/target_parallel_if_messages.cpp | 7 - clang/test/OpenMP/target_simd_if_messages.cpp | 7 - .../target_teams_distribute_if_messages.cpp | 7 - ...ms_distribute_parallel_for_if_messages.cpp | 7 - ...stribute_parallel_for_simd_if_messages.cpp | 8 - ...rget_teams_distribute_simd_if_messages.cpp | 7 - .../test/OpenMP/target_teams_if_messages.cpp | 7 - .../test/OpenMP/target_update_if_messages.cpp | 7 - clang/test/OpenMP/task_if_messages.cpp | 7 - ...ms_distribute_parallel_for_if_messages.cpp | 8 - ...stribute_parallel_for_simd_if_messages.cpp | 8 - 27 files changed, 239 insertions(+), 494 deletions(-) diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index eadcc62a34575..c6daf73a623bd 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -501,10 +501,11 @@ class OMPIfClause : public OMPClause, public OMPClauseWithPreInit { return const_child_range(&Condition, &Condition + 1); } - child_range used_children(); + child_range used_children() { + return child_range(child_iterator(), child_iterator()); + } const_child_range used_children() const { - auto Children = const_cast(this)->used_children(); - return const_child_range(Children.begin(), Children.end()); + return const_child_range(const_child_iterator(), const_child_iterator()); } static bool classof(const OMPClause *T) { diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index 9d8a7ebc3023e..41520b380276c 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -209,25 +209,6 @@ const OMPClauseWithPostUpdate *OMPClauseWithPostUpdate::get(const OMPClause *C) return nullptr; } -/// Gets the address of the original, non-captured, expression used in the -/// clause as the preinitializer. -static Stmt **getAddrOfExprAsWritten(Stmt *S) { - if (!S) - return nullptr; - if (auto *DS = dyn_cast(S)) { - assert(DS->isSingleDecl() && "Only single expression must be captured."); - if (auto *OED = dyn_cast(DS->getSingleDecl())) - return OED->getInitAddress(); - } - return nullptr; -} - -OMPClause::child_range OMPIfClause::used_children() { - if (Stmt **C = getAddrOfExprAsWritten(getPreInitStmt())) - return child_range(C, C + 1); - return child_range(&Condition, &Condition + 1); -} - OMPOrderedClause *OMPOrderedClause::Create(const ASTContext &C, Expr *Num, unsigned NumLoops, SourceLocation StartLoc, diff --git a/clang/test/Analysis/cfg-openmp.cpp b/clang/test/Analysis/cfg-openmp.cpp index b608606a83f8c..2f734d14b0216 100644 --- a/clang/test/Analysis/cfg-openmp.cpp +++ b/clang/test/Analysis/cfg-openmp.cpp @@ -1,402 +1,340 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=debug.DumpCFG %s 2>&1 -fopenmp -fopenmp-version=45 | FileCheck %s +// RUN: %clang_analyze_cc1 -analyzer-checker=debug.DumpCFG %s 2>&1 -fopenmp | FileCheck %s // CHECK-LABEL: void xxx(int argc) void xxx(int argc) { // CHECK: [B1] // CHECK-NEXT: 1: int x; -// CHECK-NEXT: 2: int cond; - int x, cond; -// CHECK-NEXT: [[#ATOM:]]: x -// CHECK-NEXT: [[#ATOM+1]]: [B1.[[#ATOM]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#ATOM+2]]: argc -// CHECK-NEXT: [[#ATOM+3]]: [B1.[[#ATOM+2]]] = [B1.[[#ATOM+1]]] -// CHECK-NEXT: [[#ATOM+4]]: #pragma omp atomic read -// CHECK-NEXT: [B1.[[#ATOM+3]]]; + int x; +// CHECK-NEXT: 2: x +// CHECK-NEXT: 3: [B1.2] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 4: argc +// CHECK-NEXT: 5: [B1.4] = [B1.3] +// CHECK-NEXT: 6: #pragma omp atomic read +// CHECK-NEXT: [B1.5]; #pragma omp atomic read argc = x; -// CHECK-NEXT: [[#CRIT:]]: x -// CHECK-NEXT: [[#CRIT+1]]: [B1.[[#CRIT]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#CRIT+2]]: argc -// CHECK-NEXT: [[#CRIT+3]]: [B1.[[#CRIT+2]]] = [B1.[[#CRIT+1]]] -// CHECK-NEXT: [[#CRIT+4]]: #pragma omp critical -// CHECK-NEXT: [B1.[[#CRIT+3]]]; +// CHECK-NEXT: 7: x +// CHECK-NEXT: 8: [B1.7] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 9: argc +// CHECK-NEXT: 10: [B1.9] = [B1.8] +// CHECK-NEXT: 11: #pragma omp critical +// CHECK-NEXT: [B1.10]; #pragma omp critical argc = x; -// CHECK-NEXT: [[#DPF:]]: x -// CHECK-NEXT: [[#DPF+1]]: [B1.[[#DPF]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#DPF+2]]: argc -// CHECK-NEXT: [[#DPF+3]]: [B1.[[#DPF+2]]] = [B1.[[#DPF+1]]] -// CHECK-NEXT: [[#DPF+4]]: cond -// CHECK-NEXT: [[#DPF+5]]: [B1.[[#DPF+4]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#DPF+6]]: [B1.[[#DPF+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) -// CHECK-NEXT: [[#DPF+7]]: #pragma omp distribute parallel for if(parallel: cond) +// CHECK-NEXT: 12: x +// CHECK-NEXT: 13: [B1.12] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 14: argc +// CHECK-NEXT: 15: [B1.14] = [B1.13] +// CHECK-NEXT: 16: #pragma omp distribute parallel for // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.[[#DPF+3]]]; -#pragma omp distribute parallel for if(parallel:cond) +// CHECK-NEXT: [B1.15]; +#pragma omp distribute parallel for for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT: [[#DPFS:]]: x -// CHECK-NEXT: [[#DPFS+1]]: [B1.[[#DPFS]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#DPFS+2]]: argc -// CHECK-NEXT: [[#DPFS+3]]: [B1.[[#DPFS+2]]] = [B1.[[#DPFS+1]]] -// CHECK-NEXT: [[#DPFS+4]]: cond -// CHECK-NEXT: [[#DPFS+5]]: [B1.[[#DPFS+4]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#DPFS+6]]: [B1.[[#DPFS+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) -// CHECK-NEXT: [[#DPFS+7]]: #pragma omp distribute parallel for simd if(cond) +// CHECK-NEXT: 17: x +// CHECK-NEXT: 18: [B1.17] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 19: argc +// CHECK-NEXT: 20: [B1.19] = [B1.18] +// CHECK-NEXT: 21: #pragma omp distribute parallel for simd // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.[[#DPFS+3]]]; -#pragma omp distribute parallel for simd if(cond) +// CHECK-NEXT: [B1.20]; +#pragma omp distribute parallel for simd for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT: [[#DS:]]: x -// CHECK-NEXT: [[#DS+1]]: [B1.[[#DS]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#DS+2]]: argc -// CHECK-NEXT: [[#DS+3]]: [B1.[[#DS+2]]] = [B1.[[#DS+1]]] -// CHECK-NEXT: [[#DS+4]]: #pragma omp distribute simd +// CHECK-NEXT: 22: x +// CHECK-NEXT: 23: [B1.22] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 24: argc +// CHECK-NEXT: 25: [B1.24] = [B1.23] +// CHECK-NEXT: 26: #pragma omp distribute simd // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.[[#DS+3]]]; +// CHECK-NEXT: [B1.25]; #pragma omp distribute simd for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT: [[#FOR:]]: x -// CHECK-NEXT: [[#FOR+1]]: [B1.[[#FOR]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#FOR+2]]: argc -// CHECK-NEXT: [[#FOR+3]]: [B1.[[#FOR+2]]] = [B1.[[#FOR+1]]] -// CHECK-NEXT: [[#FOR+4]]: #pragma omp for +// CHECK-NEXT: 27: x +// CHECK-NEXT: 28: [B1.27] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 29: argc +// CHECK-NEXT: 30: [B1.29] = [B1.28] +// CHECK-NEXT: 31: #pragma omp for // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.[[#FOR+3]]]; +// CHECK-NEXT: [B1.30]; #pragma omp for for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT: [[#FS:]]: x -// CHECK-NEXT: [[#FS+1]]: [B1.[[#FS]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#FS+2]]: argc -// CHECK-NEXT: [[#FS+3]]: [B1.[[#FS+2]]] = [B1.[[#FS+1]]] -// CHECK-NEXT: [[#FS+4]]: #pragma omp for simd +// CHECK-NEXT: 32: x +// CHECK-NEXT: 33: [B1.32] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 34: argc +// CHECK-NEXT: 35: [B1.34] = [B1.33] +// CHECK-NEXT: 36: #pragma omp for simd // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.[[#FS+3]]]; +// CHECK-NEXT: [B1.35]; #pragma omp for simd for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT: [[#MASTER:]]: x -// CHECK-NEXT: [[#MASTER+1]]: [B1.[[#MASTER]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#MASTER+2]]: argc -// CHECK-NEXT: [[#MASTER+3]]: [B1.[[#MASTER+2]]] = [B1.[[#MASTER+1]]] -// CHECK-NEXT: [[#MASTER+4]]: #pragma omp master -// CHECK-NEXT: [B1.[[#MASTER+3]]]; +// CHECK-NEXT: 37: x +// CHECK-NEXT: 38: [B1.37] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 39: argc +// CHECK-NEXT: 40: [B1.39] = [B1.38] +// CHECK-NEXT: 41: #pragma omp master +// CHECK-NEXT: [B1.40]; #pragma omp master argc = x; -// CHECK-NEXT: [[#ORD:]]: x -// CHECK-NEXT: [[#ORD+1]]: [B1.[[#ORD]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#ORD+2]]: argc -// CHECK-NEXT: [[#ORD+3]]: [B1.[[#ORD+2]]] = [B1.[[#ORD+1]]] -// CHECK-NEXT: [[#ORD+4]]: #pragma omp ordered -// CHECK-NEXT: [B1.[[#ORD+3]]]; -// CHECK-NEXT: [[#ORD+5]]: #pragma omp for ordered +// CHECK-NEXT: 42: x +// CHECK-NEXT: 43: [B1.42] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 44: argc +// CHECK-NEXT: 45: [B1.44] = [B1.43] +// CHECK-NEXT: 46: #pragma omp ordered +// CHECK-NEXT: [B1.45]; +// CHECK-NEXT: 47: #pragma omp for ordered // CHECK-NEXT: for (int i = 0; i < 10; ++i) { -// CHECK-NEXT:[B1.[[#ORD+4]]] } +// CHECK-NEXT:[B1.46] } #pragma omp for ordered for (int i = 0; i < 10; ++i) { #pragma omp ordered argc = x; } -// CHECK-NEXT: [[#PF:]]: x -// CHECK-NEXT: [[#PF+1]]: [B1.[[#PF]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#PF+2]]: argc -// CHECK-NEXT: [[#PF+3]]: [B1.[[#PF+2]]] = [B1.[[#PF+1]]] -// CHECK-NEXT: [[#PF+4]]: cond -// CHECK-NEXT: [[#PF+5]]: [B1.[[#PF+4]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#PF+6]]: [B1.[[#PF+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) -// CHECK-NEXT: [[#PF+7]]: #pragma omp parallel for if(cond) +// CHECK-NEXT: 48: x +// CHECK-NEXT: 49: [B1.48] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 50: argc +// CHECK-NEXT: 51: [B1.50] = [B1.49] +// CHECK-NEXT: 52: #pragma omp parallel for // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.[[#PF+3]]]; -#pragma omp parallel for if(cond) +// CHECK-NEXT: [B1.51]; +#pragma omp parallel for for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT: [[#PFS:]]: x -// CHECK-NEXT: [[#PFS+1]]: [B1.[[#PFS]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#PFS+2]]: argc -// CHECK-NEXT: [[#PFS+3]]: [B1.[[#PFS+2]]] = [B1.[[#PFS+1]]] -// CHECK-NEXT: [[#PFS+4]]: cond -// CHECK-NEXT: [[#PFS+5]]: [B1.[[#PFS+4]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#PFS+6]]: [B1.[[#PFS+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) -// CHECK-NEXT: [[#PFS+7]]: #pragma omp parallel for simd if(cond) +// CHECK-NEXT: 53: x +// CHECK-NEXT: 54: [B1.53] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 55: argc +// CHECK-NEXT: 56: [B1.55] = [B1.54] +// CHECK-NEXT: 57: #pragma omp parallel for simd // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.[[#PFS+3]]]; -#pragma omp parallel for simd if(cond) +// CHECK-NEXT: [B1.56]; +#pragma omp parallel for simd for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT: [[#PAR:]]: x -// CHECK-NEXT: [[#PAR+1]]: [B1.[[#PAR]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#PAR+2]]: argc -// CHECK-NEXT: [[#PAR+3]]: [B1.[[#PAR+2]]] = [B1.[[#PAR+1]]] -// CHECK-NEXT: [[#PAR+4]]: cond -// CHECK-NEXT: [[#PAR+5]]: [B1.[[#PAR+4]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#PAR+6]]: [B1.[[#PAR+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) -// CHECK-NEXT: [[#PAR+7]]: #pragma omp parallel if(cond) -// CHECK-NEXT: [B1.[[#PAR+3]]]; -#pragma omp parallel if(cond) +// CHECK-NEXT: 58: x +// CHECK-NEXT: 59: [B1.58] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 60: argc +// CHECK-NEXT: 61: [B1.60] = [B1.59] +// CHECK-NEXT: 62: #pragma omp parallel +// CHECK-NEXT: [B1.61]; +#pragma omp parallel argc = x; -// CHECK-NEXT: [[#PSECT:]]: x -// CHECK-NEXT: [[#PSECT+1]]: [B1.[[#PSECT]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#PSECT+2]]: argc -// CHECK-NEXT: [[#PSECT+3]]: [B1.[[#PSECT+2]]] = [B1.[[#PSECT+1]]] -// CHECK-NEXT: [[#PSECT+4]]: cond -// CHECK-NEXT: [[#PSECT+5]]: [B1.[[#PSECT+4]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#PSECT+6]]: [B1.[[#PSECT+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) -// CHECK-NEXT: [[#PSECT+7]]: #pragma omp parallel sections if(cond) +// CHECK-NEXT: 63: x +// CHECK-NEXT: 64: [B1.63] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 65: argc +// CHECK-NEXT: 66: [B1.65] = [B1.64] +// CHECK-NEXT: 67: #pragma omp parallel sections // CHECK-NEXT: { -// CHECK-NEXT: [B1.[[#PSECT+3]]]; +// CHECK-NEXT: [B1.66]; // CHECK-NEXT: } -#pragma omp parallel sections if(cond) +#pragma omp parallel sections { argc = x; } -// CHECK-NEXT: [[#SIMD:]]: x -// CHECK-NEXT: [[#SIMD+1]]: [B1.[[#SIMD]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#SIMD+2]]: argc -// CHECK-NEXT: [[#SIMD+3]]: [B1.[[#SIMD+2]]] = [B1.[[#SIMD+1]]] -// CHECK-NEXT: [[#SIMD+4]]: #pragma omp simd +// CHECK-NEXT: 68: x +// CHECK-NEXT: 69: [B1.68] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 70: argc +// CHECK-NEXT: 71: [B1.70] = [B1.69] +// CHECK-NEXT: 72: #pragma omp simd // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.[[#SIMD+3]]]; +// CHECK-NEXT: [B1.71]; #pragma omp simd for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT: [[#SINGLE:]]: x -// CHECK-NEXT: [[#SINGLE+1]]: [B1.[[#SINGLE]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#SINGLE+2]]: argc -// CHECK-NEXT: [[#SINGLE+3]]: [B1.[[#SINGLE+2]]] = [B1.[[#SINGLE+1]]] -// CHECK-NEXT: [[#SINGLE+4]]: #pragma omp single -// CHECK-NEXT: [B1.[[#SINGLE+3]]]; +// CHECK-NEXT: 73: x +// CHECK-NEXT: 74: [B1.73] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 75: argc +// CHECK-NEXT: 76: [B1.75] = [B1.74] +// CHECK-NEXT: 77: #pragma omp single +// CHECK-NEXT: [B1.76]; #pragma omp single argc = x; -// CHECK-NEXT: [[#TARGET:]]: x -// CHECK-NEXT: [[#TARGET+1]]: [B1.[[#TARGET]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TARGET+2]]: argc -// CHECK-NEXT: [[#TARGET+3]]: [B1.[[#TARGET+2]]] = [B1.[[#TARGET+1]]] -// CHECK-NEXT: [[#TARGET+4]]: cond -// CHECK-NEXT: [[#TARGET+5]]: [B1.[[#TARGET+4]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TARGET+6]]: [B1.[[#TARGET+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) -// CHECK-NEXT: [[#TARGET+7]]: #pragma omp target depend(in : argc) if(cond) -// CHECK-NEXT: [B1.[[#TARGET+3]]]; +// CHECK-NEXT: 78: x +// CHECK-NEXT: 79: [B1.78] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 80: argc +// CHECK-NEXT: 81: [B1.80] = [B1.79] +// CHECK-NEXT: 82: #pragma omp target depend(in : argc) +// CHECK-NEXT: [B1.81]; #pragma omp target depend(in \ - : argc) if(cond) + : argc) argc = x; -// CHECK-NEXT: [[#TPF:]]: x -// CHECK-NEXT: [[#TPF+1]]: [B1.[[#TPF]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TPF+2]]: argc -// CHECK-NEXT: [[#TPF+3]]: [B1.[[#TPF+2]]] = [B1.[[#TPF+1]]] -// CHECK-NEXT: [[#TPF+4]]: cond -// CHECK-NEXT: [[#TPF+5]]: [B1.[[#TPF+4]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TPF+6]]: [B1.[[#TPF+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) -// CHECK-NEXT: [[#TPF+7]]: #pragma omp target parallel for if(parallel: cond) +// CHECK-NEXT: 83: x +// CHECK-NEXT: 84: [B1.83] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 85: argc +// CHECK-NEXT: 86: [B1.85] = [B1.84] +// CHECK-NEXT: 87: #pragma omp target parallel for // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.[[#TPF+3]]]; -#pragma omp target parallel for if(parallel:cond) +// CHECK-NEXT: [B1.86]; +#pragma omp target parallel for for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT: [[#TPFS:]]: x -// CHECK-NEXT: [[#TPFS+1]]: [B1.[[#TPFS]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TPFS+2]]: argc -// CHECK-NEXT: [[#TPFS+3]]: [B1.[[#TPFS+2]]] = [B1.[[#TPFS+1]]] -// CHECK-NEXT: [[#TPFS+4]]: cond -// CHECK-NEXT: [[#TPFS+5]]: [B1.[[#TPFS+4]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TPFS+6]]: [B1.[[#TPFS+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) -// CHECK-NEXT: [[#TPFS+7]]: #pragma omp target parallel for simd if(target: cond) +// CHECK-NEXT: 88: x +// CHECK-NEXT: 89: [B1.88] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 90: argc +// CHECK-NEXT: 91: [B1.90] = [B1.89] +// CHECK-NEXT: 92: #pragma omp target parallel for simd // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.[[#TPFS+3]]]; -#pragma omp target parallel for simd if(target:cond) +// CHECK-NEXT: [B1.91]; +#pragma omp target parallel for simd for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT: [[#TP:]]: x -// CHECK-NEXT: [[#TP+1]]: [B1.[[#TP]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TP+2]]: argc -// CHECK-NEXT: [[#TP+3]]: [B1.[[#TP+2]]] = [B1.[[#TP+1]]] -// CHECK-NEXT: [[#TP+4]]: cond -// CHECK-NEXT: [[#TP+5]]: [B1.[[#TP+4]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TP+6]]: [B1.[[#TP+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) -// CHECK-NEXT: [[#TP+7]]: #pragma omp target parallel if(cond) -// CHECK-NEXT: [B1.[[#TP+3]]]; -#pragma omp target parallel if(cond) +// CHECK-NEXT: 93: x +// CHECK-NEXT: 94: [B1.93] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 95: argc +// CHECK-NEXT: 96: [B1.95] = [B1.94] +// CHECK-NEXT: 97: #pragma omp target parallel +// CHECK-NEXT: [B1.96]; +#pragma omp target parallel argc = x; -// CHECK-NEXT: [[#TSIMD:]]: x -// CHECK-NEXT: [[#TSIMD+1]]: [B1.[[#TSIMD]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TSIMD+2]]: argc -// CHECK-NEXT: [[#TSIMD+3]]: [B1.[[#TSIMD+2]]] = [B1.[[#TSIMD+1]]] -// CHECK-NEXT: [[#TSIMD+4]]: cond -// CHECK-NEXT: [[#TSIMD+5]]: [B1.[[#TSIMD+4]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TSIMD+6]]: [B1.[[#TSIMD+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) -// CHECK-NEXT: [[#TSIMD+7]]: #pragma omp target simd if(cond) +// CHECK-NEXT: 98: x +// CHECK-NEXT: 99: [B1.98] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 100: argc +// CHECK-NEXT: 101: [B1.100] = [B1.99] +// CHECK-NEXT: 102: #pragma omp target simd // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.[[#TSIMD+3]]]; -#pragma omp target simd if(cond) +// CHECK-NEXT: [B1.101]; +#pragma omp target simd for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT: [[#TTD:]]: x -// CHECK-NEXT: [[#TTD+1]]: [B1.[[#TTD]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TTD+2]]: argc -// CHECK-NEXT: [[#TTD+3]]: [B1.[[#TTD+2]]] = [B1.[[#TTD+1]]] -// CHECK-NEXT: [[#TTD+4]]: cond -// CHECK-NEXT: [[#TTD+5]]: [B1.[[#TTD+4]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TTD+6]]: [B1.[[#TTD+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) -// CHECK-NEXT: [[#TTD+7]]: #pragma omp target teams distribute if(cond) +// CHECK-NEXT: 103: x +// CHECK-NEXT: 104: [B1.103] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 105: argc +// CHECK-NEXT: 106: [B1.105] = [B1.104] +// CHECK-NEXT: 107: #pragma omp target teams distribute // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.[[#TTD+3]]]; -#pragma omp target teams distribute if(cond) +// CHECK-NEXT: [B1.106]; +#pragma omp target teams distribute for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT: [[#TTDPF:]]: x -// CHECK-NEXT: [[#TTDPF+1]]: [B1.[[#TTDPF]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TTDPF+2]]: argc -// CHECK-NEXT: [[#TTDPF+3]]: [B1.[[#TTDPF+2]]] = [B1.[[#TTDPF+1]]] -// CHECK-NEXT: [[#TTDPF+4]]: cond -// CHECK-NEXT: [[#TTDPF+5]]: [B1.[[#TTDPF+4]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TTDPF+6]]: [B1.[[#TTDPF+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) -// CHECK-NEXT: [[#TTDPF+7]]: #pragma omp target teams distribute parallel for if(cond) +// CHECK-NEXT: 108: x +// CHECK-NEXT: 109: [B1.108] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 110: argc +// CHECK-NEXT: 111: [B1.110] = [B1.109] +// CHECK-NEXT: 112: #pragma omp target teams distribute parallel for // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.[[#TTDPF+3]]]; -#pragma omp target teams distribute parallel for if(cond) +// CHECK-NEXT: [B1.111]; +#pragma omp target teams distribute parallel for for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT: [[#TTDPFS:]]: x -// CHECK-NEXT: [[#TTDPFS+1]]: [B1.[[#TTDPFS]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TTDPFS+2]]: argc -// CHECK-NEXT: [[#TTDPFS+3]]: [B1.[[#TTDPFS+2]]] = [B1.[[#TTDPFS+1]]] -// CHECK-NEXT: [[#TTDPFS+4]]: cond -// CHECK-NEXT: [[#TTDPFS+5]]: [B1.[[#TTDPFS+4]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TTDPFS+6]]: [B1.[[#TTDPFS+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) -// CHECK-NEXT: [[#TTDPFS+7]]: #pragma omp target teams distribute parallel for simd if(parallel: cond) +// CHECK-NEXT: 113: x +// CHECK-NEXT: 114: [B1.113] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 115: argc +// CHECK-NEXT: 116: [B1.115] = [B1.114] +// CHECK-NEXT: 117: #pragma omp target teams distribute parallel for simd // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.[[#TTDPFS+3]]]; -#pragma omp target teams distribute parallel for simd if(parallel:cond) +// CHECK-NEXT: [B1.116]; +#pragma omp target teams distribute parallel for simd for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT: [[#TTDS:]]: x -// CHECK-NEXT: [[#TTDS+1]]: [B1.[[#TTDS]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TTDS+2]]: argc -// CHECK-NEXT: [[#TTDS+3]]: [B1.[[#TTDS+2]]] = [B1.[[#TTDS+1]]] -// CHECK-NEXT: [[#TTDS+4]]: cond -// CHECK-NEXT: [[#TTDS+5]]: [B1.[[#TTDS+4]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TTDS+6]]: [B1.[[#TTDS+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) -// CHECK-NEXT: [[#TTDS+7]]: #pragma omp target teams distribute simd if(cond) +// CHECK-NEXT: 118: x +// CHECK-NEXT: 119: [B1.118] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 120: argc +// CHECK-NEXT: 121: [B1.120] = [B1.119] +// CHECK-NEXT: 122: #pragma omp target teams distribute simd // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.[[#TTDS+3]]]; -#pragma omp target teams distribute simd if(cond) +// CHECK-NEXT: [B1.121]; +#pragma omp target teams distribute simd for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT: [[#TT:]]: x -// CHECK-NEXT: [[#TT+1]]: [B1.[[#TT]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TT+2]]: argc -// CHECK-NEXT: [[#TT+3]]: [B1.[[#TT+2]]] = [B1.[[#TT+1]]] -// CHECK-NEXT: [[#TT+4]]: cond -// CHECK-NEXT: [[#TT+5]]: [B1.[[#TT+4]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TT+6]]: [B1.[[#TT+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) -// CHECK-NEXT: [[#TT+7]]: #pragma omp target teams if(cond) -// CHECK-NEXT: [B1.[[#TT+3]]]; -#pragma omp target teams if(cond) +// CHECK-NEXT: 123: x +// CHECK-NEXT: 124: [B1.123] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 125: argc +// CHECK-NEXT: 126: [B1.125] = [B1.124] +// CHECK-NEXT: 127: #pragma omp target teams +// CHECK-NEXT: [B1.126]; +#pragma omp target teams argc = x; -// CHECK-NEXT: [[#TU:]]: cond -// CHECK-NEXT: [[#TU+1]]: [B1.[[#TU]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TU+2]]: [B1.[[#TU+1]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) -// CHECK-NEXT: [[#TU+3]]: #pragma omp target update to(x) if(target update: cond) -#pragma omp target update to(x) if(target update:cond) -// CHECK-NEXT: [[#TASK:]]: x -// CHECK-NEXT: [[#TASK+1]]: [B1.[[#TASK]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TASK+2]]: argc -// CHECK-NEXT: [[#TASK+3]]: [B1.[[#TASK+2]]] = [B1.[[#TASK+1]]] -// CHECK-NEXT: [[#TASK+4]]: cond -// CHECK-NEXT: [[#TASK+5]]: [B1.[[#TASK+4]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TASK+6]]: [B1.[[#TASK+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) -// CHECK-NEXT: [[#TASK+7]]: #pragma omp task if(cond) -// CHECK-NEXT: [B1.[[#TASK+3]]]; -#pragma omp task if(cond) +// CHECK-NEXT: 128: #pragma omp target update to(x) +#pragma omp target update to(x) +// CHECK-NEXT: 129: x +// CHECK-NEXT: 130: [B1.129] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 131: argc +// CHECK-NEXT: 132: [B1.131] = [B1.130] argc = x; -// CHECK-NEXT: [[#TG:]]: x -// CHECK-NEXT: [[#TG+1]]: [B1.[[#TG]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TG+2]]: argc -// CHECK-NEXT: [[#TG+3]]: [B1.[[#TG+2]]] = [B1.[[#TG+1]]] -// CHECK-NEXT: [[#TG+4]]: #pragma omp taskgroup -// CHECK-NEXT: [B1.[[#TG+3]]]; +// CHECK-NEXT: 133: x +// CHECK-NEXT: 134: [B1.133] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 135: argc +// CHECK-NEXT: 136: [B1.135] = [B1.134] +// CHECK-NEXT: 137: #pragma omp task +// CHECK-NEXT: [B1.136]; +#pragma omp task + argc = x; +// CHECK-NEXT: 138: x +// CHECK-NEXT: 139: [B1.138] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 140: argc +// CHECK-NEXT: 141: [B1.140] = [B1.139] +// CHECK-NEXT: 142: #pragma omp taskgroup +// CHECK-NEXT: [B1.141]; #pragma omp taskgroup argc = x; -// CHECK-NEXT: [[#TL:]]: x -// CHECK-NEXT: [[#TL+1]]: [B1.[[#TL]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TL+2]]: argc -// CHECK-NEXT: [[#TL+3]]: [B1.[[#TL+2]]] = [B1.[[#TL+1]]] -// CHECK-NEXT: [[#TL+4]]: cond -// CHECK-NEXT: [[#TL+5]]: [B1.[[#TL+4]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TL+6]]: [B1.[[#TL+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) -// CHECK-NEXT: [[#TL+7]]: #pragma omp taskloop if(cond) +// CHECK-NEXT: 143: x +// CHECK-NEXT: 144: [B1.143] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 145: argc +// CHECK-NEXT: 146: [B1.145] = [B1.144] +// CHECK-NEXT: 147: #pragma omp taskloop // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.[[#TL+3]]]; -#pragma omp taskloop if(cond) +// CHECK-NEXT: [B1.146]; +#pragma omp taskloop for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT: [[#TLS:]]: x -// CHECK-NEXT: [[#TLS+1]]: [B1.[[#TLS]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TLS+2]]: argc -// CHECK-NEXT: [[#TLS+3]]: [B1.[[#TLS+2]]] = [B1.[[#TLS+1]]] -// CHECK-NEXT: [[#TLS+4]]: cond -// CHECK-NEXT: [[#TLS+5]]: [B1.[[#TLS+4]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TLS+6]]: [B1.[[#TLS+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) -// CHECK-NEXT: [[#TLS+7]]: #pragma omp taskloop simd if(cond) +// CHECK-NEXT: 148: x +// CHECK-NEXT: 149: [B1.148] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 150: argc +// CHECK-NEXT: 151: [B1.150] = [B1.149] +// CHECK-NEXT: 152: #pragma omp taskloop simd // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.[[#TLS+3]]]; -#pragma omp taskloop simd if(cond) +// CHECK-NEXT: [B1.151]; +#pragma omp taskloop simd for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT: [[#TDPF:]]: x -// CHECK-NEXT: [[#TDPF+1]]: [B1.[[#TDPF]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TDPF+2]]: argc -// CHECK-NEXT: [[#TDPF+3]]: [B1.[[#TDPF+2]]] = [B1.[[#TDPF+1]]] -// CHECK-NEXT: [[#TDPF+4]]: cond -// CHECK-NEXT: [[#TDPF+5]]: [B1.[[#TDPF+4]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TDPF+6]]: [B1.[[#TDPF+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) -// CHECK-NEXT: [[#TDPF+7]]: #pragma omp teams distribute parallel for if(cond) +// CHECK-NEXT: 153: x +// CHECK-NEXT: 154: [B1.153] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 155: argc +// CHECK-NEXT: 156: [B1.155] = [B1.154] +// CHECK-NEXT: 157: #pragma omp teams distribute parallel for // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.[[#TDPF+3]]]; -// CHECK-NEXT: [[#TDPF+8]]: #pragma omp target +// CHECK-NEXT: [B1.156]; +// CHECK-NEXT: 158: #pragma omp target #pragma omp target -#pragma omp teams distribute parallel for if(cond) +#pragma omp teams distribute parallel for for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT: [B1.[[#TDPF+7]]] [[#TDPFS:]]: x -// CHECK-NEXT: [[#TDPFS+1]]: [B1.[[#TDPFS]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TDPFS+2]]: argc -// CHECK-NEXT: [[#TDPFS+3]]: [B1.[[#TDPFS+2]]] = [B1.[[#TDPFS+1]]] -// CHECK-NEXT: [[#TDPFS+4]]: cond -// CHECK-NEXT: [[#TDPFS+5]]: [B1.[[#TDPFS+4]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TDPFS+6]]: [B1.[[#TDPFS+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) -// CHECK-NEXT: [[#TDPFS+7]]: #pragma omp teams distribute parallel for simd +// CHECK-NEXT:[B1.157] 159: x +// CHECK-NEXT: 160: [B1.159] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 161: argc +// CHECK-NEXT: 162: [B1.161] = [B1.160] +// CHECK-NEXT: 163: #pragma omp teams distribute parallel for simd // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.[[#TDPFS+3]]]; -// CHECK-NEXT: [[#TDPFS+8]]: #pragma omp target +// CHECK-NEXT: [B1.162]; +// CHECK-NEXT: 164: #pragma omp target #pragma omp target -#pragma omp teams distribute parallel for simd if(cond) +#pragma omp teams distribute parallel for simd for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT: [B1.[[#TDPFS+7]]] [[#TDS:]]: x -// CHECK-NEXT: [[#TDS+1]]: [B1.[[#TDS]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TDS+2]]: argc -// CHECK-NEXT: [[#TDS+3]]: [B1.[[#TDS+2]]] = [B1.[[#TDS+1]]] -// CHECK-NEXT: [[#TDS+4]]: #pragma omp teams distribute simd +// CHECK-NEXT:[B1.163] 165: x +// CHECK-NEXT: 166: [B1.165] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 167: argc +// CHECK-NEXT: 168: [B1.167] = [B1.166] +// CHECK-NEXT: 169: #pragma omp teams distribute simd // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.[[#TDS+3]]]; -// CHECK-NEXT: [[#TDS+5]]: #pragma omp target +// CHECK-NEXT: [B1.168]; +// CHECK-NEXT: 170: #pragma omp target #pragma omp target #pragma omp teams distribute simd for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT: [B1.[[#TDS+4]]] [[#TEAMS:]]: x -// CHECK-NEXT: [[#TEAMS+1]]: [B1.[[#TEAMS]]] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: [[#TEAMS+2]]: argc -// CHECK-NEXT: [[#TEAMS+3]]: [B1.[[#TEAMS+2]]] = [B1.[[#TEAMS+1]]] -// CHECK-NEXT: [[#TEAMS+4]]: #pragma omp teams -// CHECK-NEXT: [B1.[[#TEAMS+3]]]; -// CHECK-NEXT: [[#TEAMS+5]]: #pragma omp target +// CHECK-NEXT:[B1.169] 171: x +// CHECK-NEXT: 172: [B1.171] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: 173: argc +// CHECK-NEXT: 174: [B1.173] = [B1.172] +// CHECK-NEXT: 175: #pragma omp teams +// CHECK-NEXT: [B1.174]; +// CHECK-NEXT: 176: #pragma omp target #pragma omp target #pragma omp teams argc = x; -// CHECK-NEXT: [B1.[[#TEAMS+4]]] Preds +// CHECK-NEXT:[B1.175] Preds } diff --git a/clang/test/OpenMP/cancel_if_messages.cpp b/clang/test/OpenMP/cancel_if_messages.cpp index 222087ca9e61b..3d629c927e907 100644 --- a/clang/test/OpenMP/cancel_if_messages.cpp +++ b/clang/test/OpenMP/cancel_if_messages.cpp @@ -9,16 +9,6 @@ bool foobool(int argc) { return argc; } -void xxx(int argc) { - int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} -#pragma omp parallel - { -#pragma omp cancel parallel if (cond) // expected-warning {{variable 'cond' is uninitialized when used here}} - for (int i = 0; i < 10; ++i) - ; - } -} - struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/distribute_parallel_for_if_messages.cpp b/clang/test/OpenMP/distribute_parallel_for_if_messages.cpp index e628a15c3ab44..a06ff2377c043 100644 --- a/clang/test/OpenMP/distribute_parallel_for_if_messages.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_if_messages.cpp @@ -9,13 +9,6 @@ bool foobool(int argc) { return argc; } -void xxx(int argc) { - int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} -#pragma omp distribute parallel for if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} - for (int i = 0; i < 10; ++i) - ; -} - struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_if_messages.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_if_messages.cpp index 6cf18faf0a87f..7769272026e6b 100644 --- a/clang/test/OpenMP/distribute_parallel_for_simd_if_messages.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_if_messages.cpp @@ -9,13 +9,6 @@ bool foobool(int argc) { return argc; } -void xxx(int argc) { - int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} -#pragma omp distribute parallel for simd if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} - for (int i = 0; i < 10; ++i) - ; -} - struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/parallel_for_if_messages.cpp b/clang/test/OpenMP/parallel_for_if_messages.cpp index 56bb06be0cc71..32f9ef3a7defa 100644 --- a/clang/test/OpenMP/parallel_for_if_messages.cpp +++ b/clang/test/OpenMP/parallel_for_if_messages.cpp @@ -9,13 +9,6 @@ bool foobool(int argc) { return argc; } -void xxx(int argc) { - int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} -#pragma omp parallel for if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} - for (int i = 0; i < 10; ++i) - ; -} - struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/parallel_for_simd_if_messages.cpp b/clang/test/OpenMP/parallel_for_simd_if_messages.cpp index bab9339d49174..aa1e302d04242 100644 --- a/clang/test/OpenMP/parallel_for_simd_if_messages.cpp +++ b/clang/test/OpenMP/parallel_for_simd_if_messages.cpp @@ -9,13 +9,6 @@ bool foobool(int argc) { return argc; } -void xxx(int argc) { - int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} -#pragma omp parallel for simd if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} - for (int i = 0; i < 10; ++i) - ; -} - struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/parallel_if_messages.cpp b/clang/test/OpenMP/parallel_if_messages.cpp index f095e66bbfa5e..7f802a9e4236a 100644 --- a/clang/test/OpenMP/parallel_if_messages.cpp +++ b/clang/test/OpenMP/parallel_if_messages.cpp @@ -9,13 +9,6 @@ bool foobool(int argc) { return argc; } -void xxx(int argc) { - int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} -#pragma omp parallel if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} - for (int i = 0; i < 10; ++i) - ; -} - struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/parallel_sections_if_messages.cpp b/clang/test/OpenMP/parallel_sections_if_messages.cpp index b7c92df4f30df..8d36b6d5d3086 100644 --- a/clang/test/OpenMP/parallel_sections_if_messages.cpp +++ b/clang/test/OpenMP/parallel_sections_if_messages.cpp @@ -9,14 +9,6 @@ bool foobool(int argc) { return argc; } -void xxx(int argc) { - int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} -#pragma omp parallel sections if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} - { - ; - } -} - struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/target_data_if_messages.cpp b/clang/test/OpenMP/target_data_if_messages.cpp index 29f898c6d9fa7..c6f9b4b34eeea 100644 --- a/clang/test/OpenMP/target_data_if_messages.cpp +++ b/clang/test/OpenMP/target_data_if_messages.cpp @@ -9,13 +9,6 @@ bool foobool(int argc) { return argc; } -void xxx(int argc) { - int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} -#pragma omp target data map(argc) if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} - for (int i = 0; i < 10; ++i) - ; -} - struct S1; // expected-note {{declared here}} int main(int argc, char **argv) { diff --git a/clang/test/OpenMP/target_enter_data_if_messages.cpp b/clang/test/OpenMP/target_enter_data_if_messages.cpp index 21019e9ae7f8c..5123d607dc6a1 100644 --- a/clang/test/OpenMP/target_enter_data_if_messages.cpp +++ b/clang/test/OpenMP/target_enter_data_if_messages.cpp @@ -9,13 +9,6 @@ bool foobool(int argc) { return argc; } -void xxx(int argc) { - int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} -#pragma omp target enter data map(to:argc) if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} - for (int i = 0; i < 10; ++i) - ; -} - struct S1; // expected-note {{declared here}} int main(int argc, char **argv) { diff --git a/clang/test/OpenMP/target_exit_data_if_messages.cpp b/clang/test/OpenMP/target_exit_data_if_messages.cpp index 7b2385c16cd21..c45b32ff3fe75 100644 --- a/clang/test/OpenMP/target_exit_data_if_messages.cpp +++ b/clang/test/OpenMP/target_exit_data_if_messages.cpp @@ -9,13 +9,6 @@ bool foobool(int argc) { return argc; } -void xxx(int argc) { - int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} -#pragma omp target exit data map(from: argc) if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} - for (int i = 0; i < 10; ++i) - ; -} - struct S1; // expected-note {{declared here}} int main(int argc, char **argv) { diff --git a/clang/test/OpenMP/target_if_messages.cpp b/clang/test/OpenMP/target_if_messages.cpp index f381e9eb91ebd..e6b667f2cffbf 100644 --- a/clang/test/OpenMP/target_if_messages.cpp +++ b/clang/test/OpenMP/target_if_messages.cpp @@ -9,13 +9,6 @@ bool foobool(int argc) { return argc; } -void xxx(int argc) { - int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} -#pragma omp target if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} - for (int i = 0; i < 10; ++i) - ; -} - struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/target_parallel_for_if_messages.cpp b/clang/test/OpenMP/target_parallel_for_if_messages.cpp index a5a181b9d273a..445dc1775b0f2 100644 --- a/clang/test/OpenMP/target_parallel_for_if_messages.cpp +++ b/clang/test/OpenMP/target_parallel_for_if_messages.cpp @@ -9,13 +9,6 @@ bool foobool(int argc) { return argc; } -void xxx(int argc) { - int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} -#pragma omp target parallel for if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} - for (int i = 0; i < 10; ++i) - ; -} - struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/target_parallel_for_simd_if_messages.cpp b/clang/test/OpenMP/target_parallel_for_simd_if_messages.cpp index ef9a2089d1087..b0da8017019f0 100644 --- a/clang/test/OpenMP/target_parallel_for_simd_if_messages.cpp +++ b/clang/test/OpenMP/target_parallel_for_simd_if_messages.cpp @@ -9,13 +9,6 @@ bool foobool(int argc) { return argc; } -void xxx(int argc) { - int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} -#pragma omp target parallel for simd if(parallel: cond) // expected-warning {{variable 'cond' is uninitialized when used here}} - for (int i = 0; i < 10; ++i) - ; -} - struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/target_parallel_if_messages.cpp b/clang/test/OpenMP/target_parallel_if_messages.cpp index ac498a7108b0d..460e0c8655f09 100644 --- a/clang/test/OpenMP/target_parallel_if_messages.cpp +++ b/clang/test/OpenMP/target_parallel_if_messages.cpp @@ -9,13 +9,6 @@ bool foobool(int argc) { return argc; } -void xxx(int argc) { - int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} -#pragma omp target parallel if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} - for (int i = 0; i < 10; ++i) - ; -} - struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/target_simd_if_messages.cpp b/clang/test/OpenMP/target_simd_if_messages.cpp index 5f3e9e3910ac6..94d2ab308daa2 100644 --- a/clang/test/OpenMP/target_simd_if_messages.cpp +++ b/clang/test/OpenMP/target_simd_if_messages.cpp @@ -9,13 +9,6 @@ bool foobool(int argc) { return argc; } -void xxx(int argc) { - int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} -#pragma omp target simd if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} - for (int i = 0; i < 10; ++i) - ; -} - struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/target_teams_distribute_if_messages.cpp b/clang/test/OpenMP/target_teams_distribute_if_messages.cpp index 499cd3ac58050..fd1ffb08cbe8c 100644 --- a/clang/test/OpenMP/target_teams_distribute_if_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_if_messages.cpp @@ -9,13 +9,6 @@ bool foobool(int argc) { return argc; } -void xxx(int argc) { - int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} -#pragma omp target teams distribute if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} - for (int i = 0; i < 10; ++i) - ; -} - struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_messages.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_messages.cpp index 6df23076472ec..e1114028b6877 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_messages.cpp @@ -9,13 +9,6 @@ bool foobool(int argc) { return argc; } -void xxx(int argc) { - int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} -#pragma omp target teams distribute parallel for if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} - for (int i = 0; i < 10; ++i) - ; -} - struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_messages.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_messages.cpp index e88c1f1dbbfff..59c75893a1714 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_messages.cpp @@ -9,14 +9,6 @@ bool foobool(int argc) { return argc; } -void xxx(int argc) { - int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} -#pragma omp target teams distribute parallel for simd if (parallel \ - : cond) // expected-warning {{variable 'cond' is uninitialized when used here}} - for (int i = 0; i < 10; ++i) - ; -} - struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/target_teams_distribute_simd_if_messages.cpp b/clang/test/OpenMP/target_teams_distribute_simd_if_messages.cpp index 53af6e759d21e..7134a8394cbb8 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_if_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_if_messages.cpp @@ -9,13 +9,6 @@ bool foobool(int argc) { return argc; } -void xxx(int argc) { - int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} -#pragma omp target teams distribute simd if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} - for (int i = 0; i < 10; ++i) - ; -} - struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/target_teams_if_messages.cpp b/clang/test/OpenMP/target_teams_if_messages.cpp index 4bc82a349398d..8d3d690d631fa 100644 --- a/clang/test/OpenMP/target_teams_if_messages.cpp +++ b/clang/test/OpenMP/target_teams_if_messages.cpp @@ -9,13 +9,6 @@ bool foobool(int argc) { return argc; } -void xxx(int argc) { - int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} -#pragma omp target teams if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} - for (int i = 0; i < 10; ++i) - ; -} - struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/target_update_if_messages.cpp b/clang/test/OpenMP/target_update_if_messages.cpp index d967713e456fb..9ded332b04eb9 100644 --- a/clang/test/OpenMP/target_update_if_messages.cpp +++ b/clang/test/OpenMP/target_update_if_messages.cpp @@ -9,13 +9,6 @@ bool foobool(int argc) { return argc; } -void xxx(int argc) { - int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} -#pragma omp target update to(argc) if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} - for (int i = 0; i < 10; ++i) - ; -} - struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/task_if_messages.cpp b/clang/test/OpenMP/task_if_messages.cpp index 2d47b32b9a153..305af22149d85 100644 --- a/clang/test/OpenMP/task_if_messages.cpp +++ b/clang/test/OpenMP/task_if_messages.cpp @@ -9,13 +9,6 @@ bool foobool(int argc) { return argc; } -void xxx(int argc) { - int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} -#pragma omp task if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} - for (int i = 0; i < 10; ++i) - ; -} - struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_if_messages.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_if_messages.cpp index b76599d41a46a..6f724b050178a 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_if_messages.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_if_messages.cpp @@ -9,14 +9,6 @@ bool foobool(int argc) { return argc; } -void xxx(int argc) { - int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} -#pragma omp target -#pragma omp teams distribute parallel for if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} - for (int i = 0; i < 10; ++i) - ; -} - struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_messages.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_messages.cpp index 39a0b326383a2..c01e6e87e39a5 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_messages.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_messages.cpp @@ -9,14 +9,6 @@ bool foobool(int argc) { return argc; } -void xxx(int argc) { - int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} -#pragma omp target -#pragma omp teams distribute parallel for simd if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} - for (int i = 0; i < 10; ++i) - ; -} - struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} From a17b1aed6ab205515adc31d19e953635e563e5c4 Mon Sep 17 00:00:00 2001 From: Zoe Carver Date: Tue, 16 Jul 2019 03:21:01 +0000 Subject: [PATCH 201/451] Add contains method to associative containers. This patch implements P0458R2, adding contains to map, multimap, unordered_map, unordered_multimap, set, multiset, unordered_set, and unordered_multiset. llvm-svn: 366170 --- libcxx/include/map | 16 ++++- libcxx/include/set | 19 +++++- libcxx/include/unordered_map | 10 +++ libcxx/include/unordered_set | 10 +++ .../associative/map/contains.pass.cpp | 62 +++++++++++++++++++ .../associative/set/contains.pass.cpp | 44 +++++++++++++ .../unord/unord.map/contains.pass.cpp | 62 +++++++++++++++++++ .../unord/unord.set/contains.pass.cpp | 44 +++++++++++++ libcxx/www/cxx2a_status.html | 2 +- 9 files changed, 263 insertions(+), 6 deletions(-) create mode 100644 libcxx/test/std/containers/associative/map/contains.pass.cpp create mode 100644 libcxx/test/std/containers/associative/set/contains.pass.cpp create mode 100644 libcxx/test/std/containers/unord/unord.map/contains.pass.cpp create mode 100644 libcxx/test/std/containers/unord/unord.set/contains.pass.cpp diff --git a/libcxx/include/map b/libcxx/include/map index 6805a513394a3..eb6ae57b011f8 100644 --- a/libcxx/include/map +++ b/libcxx/include/map @@ -193,8 +193,8 @@ public: const_iterator find(const K& x) const; // C++14 template size_type count(const K& x) const; // C++14 - size_type count(const key_type& k) const; + bool contains(const key_type& x) const; // C++20 iterator lower_bound(const key_type& k); const_iterator lower_bound(const key_type& k) const; template @@ -407,8 +407,8 @@ public: const_iterator find(const K& x) const; // C++14 template size_type count(const K& x) const; // C++14 - size_type count(const key_type& k) const; + bool contains(const key_type& x) const; // C++20 iterator lower_bound(const key_type& k); const_iterator lower_bound(const key_type& k) const; template @@ -1398,6 +1398,12 @@ public: typename enable_if<__is_transparent<_Compare, _K2>::value,size_type>::type count(const _K2& __k) const {return __tree_.__count_multi(__k);} #endif + +#if _LIBCPP_STD_VER > 17 + _LIBCPP_INLINE_VISIBILITY + bool contains(const key_type& __k) const {return find(__k) != end();} +#endif // _LIBCPP_STD_VER > 17 + _LIBCPP_INLINE_VISIBILITY iterator lower_bound(const key_type& __k) {return __tree_.lower_bound(__k);} @@ -2055,6 +2061,12 @@ public: typename enable_if<__is_transparent<_Compare, _K2>::value,size_type>::type count(const _K2& __k) const {return __tree_.__count_multi(__k);} #endif + +#if _LIBCPP_STD_VER > 17 + _LIBCPP_INLINE_VISIBILITY + bool contains(const key_type& __k) const {return find(__k) != end();} +#endif // _LIBCPP_STD_VER > 17 + _LIBCPP_INLINE_VISIBILITY iterator lower_bound(const key_type& __k) {return __tree_.lower_bound(__k);} diff --git a/libcxx/include/set b/libcxx/include/set index 79e8f29f0b943..70ab4d37add20 100644 --- a/libcxx/include/set +++ b/libcxx/include/set @@ -155,9 +155,9 @@ public: template const_iterator find(const K& x) const; // C++14 template - size_type count(const K& x) const; // C++14 - + size_type count(const K& x) const; // C++14 size_type count(const key_type& k) const; + bool contains(const key_type& x) const; // C++20 iterator lower_bound(const key_type& k); const_iterator lower_bound(const key_type& k) const; template @@ -354,8 +354,10 @@ public: iterator find(const K& x); template const_iterator find(const K& x) const; // C++14 - + template + size_type count(const K& x) const; // C++14 size_type count(const key_type& k) const; + bool contains(const key_type& x) const; // C++20 iterator lower_bound(const key_type& k); const_iterator lower_bound(const key_type& k) const; template @@ -787,6 +789,12 @@ public: typename enable_if<__is_transparent<_Compare, _K2>::value,size_type>::type count(const _K2& __k) const {return __tree_.__count_multi(__k);} #endif + +#if _LIBCPP_STD_VER > 17 + _LIBCPP_INLINE_VISIBILITY + bool contains(const key_type& __k) const {return find(__k) != end();} +#endif // _LIBCPP_STD_VER > 17 + _LIBCPP_INLINE_VISIBILITY iterator lower_bound(const key_type& __k) {return __tree_.lower_bound(__k);} @@ -1307,6 +1315,11 @@ public: count(const _K2& __k) const {return __tree_.__count_multi(__k);} #endif +#if _LIBCPP_STD_VER > 17 + _LIBCPP_INLINE_VISIBILITY + bool contains(const key_type& __k) const {return find(__k) != end();} +#endif // _LIBCPP_STD_VER > 17 + _LIBCPP_INLINE_VISIBILITY iterator lower_bound(const key_type& __k) {return __tree_.lower_bound(__k);} diff --git a/libcxx/include/unordered_map b/libcxx/include/unordered_map index 63aecc8bc0ef5..ad17f776c9388 100644 --- a/libcxx/include/unordered_map +++ b/libcxx/include/unordered_map @@ -174,6 +174,7 @@ public: iterator find(const key_type& k); const_iterator find(const key_type& k) const; size_type count(const key_type& k) const; + bool contains(const key_type& k) const; // C++20 pair equal_range(const key_type& k); pair equal_range(const key_type& k) const; @@ -355,6 +356,7 @@ public: iterator find(const key_type& k); const_iterator find(const key_type& k) const; size_type count(const key_type& k) const; + bool contains(const key_type& k) const; // C++20 pair equal_range(const key_type& k); pair equal_range(const key_type& k) const; @@ -1278,6 +1280,10 @@ public: const_iterator find(const key_type& __k) const {return __table_.find(__k);} _LIBCPP_INLINE_VISIBILITY size_type count(const key_type& __k) const {return __table_.__count_unique(__k);} + #if _LIBCPP_STD_VER > 17 + _LIBCPP_INLINE_VISIBILITY + bool contains(const key_type& __k) const {return find(__k) != end();} + #endif // _LIBCPP_STD_VER > 17 _LIBCPP_INLINE_VISIBILITY pair equal_range(const key_type& __k) {return __table_.__equal_range_unique(__k);} @@ -2049,6 +2055,10 @@ public: const_iterator find(const key_type& __k) const {return __table_.find(__k);} _LIBCPP_INLINE_VISIBILITY size_type count(const key_type& __k) const {return __table_.__count_multi(__k);} + #if _LIBCPP_STD_VER > 17 + _LIBCPP_INLINE_VISIBILITY + bool contains(const key_type& __k) const {return find(__k) != end();} + #endif // _LIBCPP_STD_VER > 17 _LIBCPP_INLINE_VISIBILITY pair equal_range(const key_type& __k) {return __table_.__equal_range_multi(__k);} diff --git a/libcxx/include/unordered_set b/libcxx/include/unordered_set index 4a9f030932798..68f777a4ea3eb 100644 --- a/libcxx/include/unordered_set +++ b/libcxx/include/unordered_set @@ -146,6 +146,7 @@ public: iterator find(const key_type& k); const_iterator find(const key_type& k) const; size_type count(const key_type& k) const; + bool contains(const key_type& k) const; // C++20 pair equal_range(const key_type& k); pair equal_range(const key_type& k) const; @@ -310,6 +311,7 @@ public: iterator find(const key_type& k); const_iterator find(const key_type& k) const; size_type count(const key_type& k) const; + bool contains(const key_type& k) const; // C++20 pair equal_range(const key_type& k); pair equal_range(const key_type& k) const; @@ -677,6 +679,10 @@ public: const_iterator find(const key_type& __k) const {return __table_.find(__k);} _LIBCPP_INLINE_VISIBILITY size_type count(const key_type& __k) const {return __table_.__count_unique(__k);} + #if _LIBCPP_STD_VER > 17 + _LIBCPP_INLINE_VISIBILITY + bool contains(const key_type& __k) const {return find(__k) != end();} + #endif // _LIBCPP_STD_VER > 17 _LIBCPP_INLINE_VISIBILITY pair equal_range(const key_type& __k) {return __table_.__equal_range_unique(__k);} @@ -1304,6 +1310,10 @@ public: const_iterator find(const key_type& __k) const {return __table_.find(__k);} _LIBCPP_INLINE_VISIBILITY size_type count(const key_type& __k) const {return __table_.__count_multi(__k);} + #if _LIBCPP_STD_VER > 17 + _LIBCPP_INLINE_VISIBILITY + bool contains(const key_type& __k) const {return find(__k) != end();} + #endif // _LIBCPP_STD_VER > 17 _LIBCPP_INLINE_VISIBILITY pair equal_range(const key_type& __k) {return __table_.__equal_range_multi(__k);} diff --git a/libcxx/test/std/containers/associative/map/contains.pass.cpp b/libcxx/test/std/containers/associative/map/contains.pass.cpp new file mode 100644 index 0000000000000..5b71eedba4d82 --- /dev/null +++ b/libcxx/test/std/containers/associative/map/contains.pass.cpp @@ -0,0 +1,62 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 + +#include +#include + +// + +// bool contains(const key_type& x) const; + +template +void test(B bad, Pairs... args) { + T map; + P pairs[] = {args...}; + + for (auto& p : pairs) map.insert(p); + for (auto& p : pairs) assert(map.contains(p.first)); + + assert(!map.contains(bad)); +} + +struct E { int a = 1; double b = 1; char c = 1; }; + +int main(int, char**) +{ + { + test, std::pair >( + 'e', std::make_pair('a', 10), std::make_pair('b', 11), + std::make_pair('c', 12), std::make_pair('d', 13)); + + test, std::pair >( + 'e', std::make_pair('a', 'a'), std::make_pair('b', 'a'), + std::make_pair('c', 'a'), std::make_pair('d', 'b')); + + test, std::pair >( + -1, std::make_pair(1, E{}), std::make_pair(2, E{}), + std::make_pair(3, E{}), std::make_pair(4, E{})); + } + { + test, std::pair >( + 'e', std::make_pair('a', 10), std::make_pair('b', 11), + std::make_pair('c', 12), std::make_pair('d', 13)); + + test, std::pair >( + 'e', std::make_pair('a', 'a'), std::make_pair('b', 'a'), + std::make_pair('c', 'a'), std::make_pair('d', 'b')); + + test, std::pair >( + -1, std::make_pair(1, E{}), std::make_pair(2, E{}), + std::make_pair(3, E{}), std::make_pair(4, E{})); + } + + return 0; +} + diff --git a/libcxx/test/std/containers/associative/set/contains.pass.cpp b/libcxx/test/std/containers/associative/set/contains.pass.cpp new file mode 100644 index 0000000000000..2b09729048723 --- /dev/null +++ b/libcxx/test/std/containers/associative/set/contains.pass.cpp @@ -0,0 +1,44 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 + +#include +#include + +// + +// bool contains(const key_type& x) const; + +template +void test(B bad, Vals... args) { + T set; + V vals[] = {args...}; + + for (auto& v : vals) set.insert(v); + for (auto& v : vals) assert(set.contains(v)); + + assert(!set.contains(bad)); +} + +struct E { int a = 1; double b = 1; char c = 1; }; + +int main(int, char**) +{ + { + test, int>(14, 10, 11, 12, 13); + test, char>('e', 'a', 'b', 'c', 'd'); + } + { + test, int>(14, 10, 11, 12, 13); + test, char>('e', 'a', 'b', 'c', 'd'); + } + + return 0; +} + diff --git a/libcxx/test/std/containers/unord/unord.map/contains.pass.cpp b/libcxx/test/std/containers/unord/unord.map/contains.pass.cpp new file mode 100644 index 0000000000000..c591e197249f4 --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.map/contains.pass.cpp @@ -0,0 +1,62 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 + +#include +#include + +// + +// bool contains(const key_type& x) const; + +template +void test(B bad, Pairs... args) { + T map; + P pairs[] = {args...}; + + for (auto& p : pairs) map.insert(p); + for (auto& p : pairs) assert(map.contains(p.first)); + + assert(!map.contains(bad)); +} + +struct E { int a = 1; double b = 1; char c = 1; }; + +int main(int, char**) +{ + { + test, std::pair >( + 'e', std::make_pair('a', 10), std::make_pair('b', 11), + std::make_pair('c', 12), std::make_pair('d', 13)); + + test, std::pair >( + 'e', std::make_pair('a', 'a'), std::make_pair('b', 'a'), + std::make_pair('c', 'a'), std::make_pair('d', 'b')); + + test, std::pair >( + -1, std::make_pair(1, E{}), std::make_pair(2, E{}), + std::make_pair(3, E{}), std::make_pair(4, E{})); + } + { + test, std::pair >( + 'e', std::make_pair('a', 10), std::make_pair('b', 11), + std::make_pair('c', 12), std::make_pair('d', 13)); + + test, std::pair >( + 'e', std::make_pair('a', 'a'), std::make_pair('b', 'a'), + std::make_pair('c', 'a'), std::make_pair('d', 'b')); + + test, std::pair >( + -1, std::make_pair(1, E{}), std::make_pair(2, E{}), + std::make_pair(3, E{}), std::make_pair(4, E{})); + } + + return 0; +} + diff --git a/libcxx/test/std/containers/unord/unord.set/contains.pass.cpp b/libcxx/test/std/containers/unord/unord.set/contains.pass.cpp new file mode 100644 index 0000000000000..3b87f2fd0e04d --- /dev/null +++ b/libcxx/test/std/containers/unord/unord.set/contains.pass.cpp @@ -0,0 +1,44 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++98, c++03, c++11, c++14, c++17 + +#include +#include + +// + +// bool contains(const key_type& x) const; + +template +void test(B bad, Vals... args) { + T set; + V vals[] = {args...}; + + for (auto& v : vals) set.insert(v); + for (auto& v : vals) assert(set.contains(v)); + + assert(!set.contains(bad)); +} + +struct E { int a = 1; double b = 1; char c = 1; }; + +int main(int, char**) +{ + { + test, int>(14, 10, 11, 12, 13); + test, char>('e', 'a', 'b', 'c', 'd'); + } + { + test, int>(14, 10, 11, 12, 13); + test, char>('e', 'a', 'b', 'c', 'd'); + } + + return 0; +} + diff --git a/libcxx/www/cxx2a_status.html b/libcxx/www/cxx2a_status.html index 2b19eb86ba332..9489c07ef0bbe 100644 --- a/libcxx/www/cxx2a_status.html +++ b/libcxx/www/cxx2a_status.html @@ -83,7 +83,7 @@

    Paper Status

    P0019R8LWGAtomic RefRapperswil - P0458R2LWGChecking for Existence of an Element in Associative ContainersRapperswil + P0458R2LWGChecking for Existence of an Element in Associative ContainersRapperswilComplete P0475R1LWGLWG 2511: guaranteed copy elision for piecewise constructionRapperswil P0476R2LWGBit-casting object representationsRapperswil P0528R3CWGThe Curious Case of Padding Bits, Featuring Atomic Compare-and-ExchangeRapperswil From e5c4b468f06307bc1b8341af9ccf9dd69fa890f4 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Tue, 16 Jul 2019 03:25:50 +0000 Subject: [PATCH 202/451] hwasan: Pad arrays with non-1 size correctly. Spotted by eugenis. Differential Revision: https://reviews.llvm.org/D64783 llvm-svn: 366171 --- .../Instrumentation/HWAddressSanitizer.cpp | 14 +++++++++----- .../HWAddressSanitizer/alloca-array.ll | 15 +++++++++++++++ .../HWAddressSanitizer/alloca-with-calls.ll | 4 ++-- .../Instrumentation/HWAddressSanitizer/alloca.ll | 10 +++++----- .../HWAddressSanitizer/kernel-alloca.ll | 4 ++-- 5 files changed, 33 insertions(+), 14 deletions(-) create mode 100644 llvm/test/Instrumentation/HWAddressSanitizer/alloca-array.ll diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index a961c81358792..450ae2f79026e 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -1108,8 +1108,14 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) { uint64_t AlignedSize = alignTo(Size, Mapping.getAllocaAlignment()); AI->setAlignment(std::max(AI->getAlignment(), 16u)); if (Size != AlignedSize) { + Type *AllocatedType = AI->getAllocatedType(); + if (AI->isArrayAllocation()) { + uint64_t ArraySize = + cast(AI->getArraySize())->getZExtValue(); + AllocatedType = ArrayType::get(AllocatedType, ArraySize); + } Type *TypeWithPadding = StructType::get( - AI->getAllocatedType(), ArrayType::get(Int8Ty, AlignedSize - Size)); + AllocatedType, ArrayType::get(Int8Ty, AlignedSize - Size)); auto *NewAI = new AllocaInst( TypeWithPadding, AI->getType()->getAddressSpace(), nullptr, "", AI); NewAI->takeName(AI); @@ -1117,10 +1123,8 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) { NewAI->setUsedWithInAlloca(AI->isUsedWithInAlloca()); NewAI->setSwiftError(AI->isSwiftError()); NewAI->copyMetadata(*AI); - Value *Zero = ConstantInt::get(Int32Ty, 0); - auto *GEP = GetElementPtrInst::Create(TypeWithPadding, NewAI, - {Zero, Zero}, "", AI); - AI->replaceAllUsesWith(GEP); + auto *Bitcast = new BitCastInst(NewAI, AI->getType(), "", AI); + AI->replaceAllUsesWith(Bitcast); AllocaToPaddedAllocaMap[AI] = NewAI; } } diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/alloca-array.ll b/llvm/test/Instrumentation/HWAddressSanitizer/alloca-array.ll new file mode 100644 index 0000000000000..7a83d0e2bb026 --- /dev/null +++ b/llvm/test/Instrumentation/HWAddressSanitizer/alloca-array.ll @@ -0,0 +1,15 @@ +; RUN: opt < %s -hwasan -S | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-android" + +declare void @use(i8*, i8*) + +define void @test_alloca() sanitize_hwaddress { + ; CHECK: alloca { [4 x i8], [12 x i8] }, align 16 + %x = alloca i8, i64 4 + ; CHECK: alloca i8, i64 16, align 16 + %y = alloca i8, i64 16 + call void @use(i8* %x, i8* %y) + ret void +} diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/alloca-with-calls.ll b/llvm/test/Instrumentation/HWAddressSanitizer/alloca-with-calls.ll index d47c38ff58902..2d0d113dedff9 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/alloca-with-calls.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/alloca-with-calls.ll @@ -9,10 +9,10 @@ declare void @use32(i32*) define void @test_alloca() sanitize_hwaddress { ; CHECK-LABEL: @test_alloca( -; CHECK: %[[GEP:[^ ]*]] = getelementptr { i32, [12 x i8] }, { i32, [12 x i8] }* %x, i32 0, i32 0 +; CHECK: %[[BC:[^ ]*]] = bitcast { i32, [12 x i8] }* %x to i32* ; CHECK: %[[T1:[^ ]*]] = call i8 @__hwasan_generate_tag() ; CHECK: %[[A:[^ ]*]] = zext i8 %[[T1]] to i64 -; CHECK: %[[B:[^ ]*]] = ptrtoint i32* %[[GEP]] to i64 +; CHECK: %[[B:[^ ]*]] = ptrtoint i32* %[[BC]] to i64 ; CHECK: %[[C:[^ ]*]] = shl i64 %[[A]], 56 ; CHECK: or i64 %[[B]], %[[C]] diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/alloca.ll b/llvm/test/Instrumentation/HWAddressSanitizer/alloca.ll index 65272caf6f4b8..ea2b566a744a5 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/alloca.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/alloca.ll @@ -17,28 +17,28 @@ define void @test_alloca() sanitize_hwaddress { ; CHECK: %[[BASE_TAG:[^ ]*]] = xor i64 %[[A]], %[[B]] ; CHECK: %[[X:[^ ]*]] = alloca { i32, [12 x i8] }, align 16 -; CHECK: %[[X_GEP:[^ ]*]] = getelementptr { i32, [12 x i8] }, { i32, [12 x i8] }* %[[X]], i32 0, i32 0 +; CHECK: %[[X_BC:[^ ]*]] = bitcast { i32, [12 x i8] }* %[[X]] to i32* ; CHECK: %[[X_TAG:[^ ]*]] = xor i64 %[[BASE_TAG]], 0 -; CHECK: %[[X1:[^ ]*]] = ptrtoint i32* %[[X_GEP]] to i64 +; CHECK: %[[X1:[^ ]*]] = ptrtoint i32* %[[X_BC]] to i64 ; CHECK: %[[C:[^ ]*]] = shl i64 %[[X_TAG]], 56 ; CHECK: %[[D:[^ ]*]] = or i64 %[[X1]], %[[C]] ; CHECK: %[[X_HWASAN:[^ ]*]] = inttoptr i64 %[[D]] to i32* ; CHECK: %[[X_TAG2:[^ ]*]] = trunc i64 %[[X_TAG]] to i8 -; CHECK: %[[E:[^ ]*]] = ptrtoint i32* %[[X_GEP]] to i64 +; CHECK: %[[E:[^ ]*]] = ptrtoint i32* %[[X_BC]] to i64 ; CHECK: %[[F:[^ ]*]] = lshr i64 %[[E]], 4 ; DYNAMIC-SHADOW: %[[X_SHADOW:[^ ]*]] = getelementptr i8, i8* %.hwasan.shadow, i64 %[[F]] ; ZERO-BASED-SHADOW: %[[X_SHADOW:[^ ]*]] = inttoptr i64 %[[F]] to i8* ; CHECK: %[[X_SHADOW_GEP:[^ ]*]] = getelementptr i8, i8* %[[X_SHADOW]], i32 0 ; CHECK: store i8 4, i8* %[[X_SHADOW_GEP]] -; CHECK: %[[X_I8:[^ ]*]] = bitcast i32* %[[X_GEP]] to i8* +; CHECK: %[[X_I8:[^ ]*]] = bitcast i32* %[[X_BC]] to i8* ; CHECK: %[[X_I8_GEP:[^ ]*]] = getelementptr i8, i8* %[[X_I8]], i32 15 ; CHECK: store i8 %[[X_TAG2]], i8* %[[X_I8_GEP]] ; CHECK: call void @use32(i32* nonnull %[[X_HWASAN]]) ; UAR-TAGS: %[[BASE_TAG_COMPL:[^ ]*]] = xor i64 %[[BASE_TAG]], 255 ; UAR-TAGS: %[[X_TAG_UAR:[^ ]*]] = trunc i64 %[[BASE_TAG_COMPL]] to i8 -; CHECK: %[[E2:[^ ]*]] = ptrtoint i32* %[[X_GEP]] to i64 +; CHECK: %[[E2:[^ ]*]] = ptrtoint i32* %[[X_BC]] to i64 ; CHECK: %[[F2:[^ ]*]] = lshr i64 %[[E2]], 4 ; DYNAMIC-SHADOW: %[[X_SHADOW2:[^ ]*]] = getelementptr i8, i8* %.hwasan.shadow, i64 %[[F2]] ; ZERO-BASED-SHADOW: %[[X_SHADOW2:[^ ]*]] = inttoptr i64 %[[F2]] to i8* diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/kernel-alloca.ll b/llvm/test/Instrumentation/HWAddressSanitizer/kernel-alloca.ll index 8ab8725bcedc0..ddf81c482ccd9 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/kernel-alloca.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/kernel-alloca.ll @@ -15,9 +15,9 @@ define void @test_alloca() sanitize_hwaddress { ; CHECK: %[[BASE_TAG:[^ ]*]] = xor i64 %[[A]], %[[B]] ; CHECK: %[[X:[^ ]*]] = alloca { i32, [12 x i8] }, align 16 -; CHECK: %[[X_GEP:[^ ]*]] = getelementptr { i32, [12 x i8] }, { i32, [12 x i8] }* %[[X]], i32 0, i32 0 +; CHECK: %[[X_BC:[^ ]*]] = bitcast { i32, [12 x i8] }* %[[X]] to i32* ; CHECK: %[[X_TAG:[^ ]*]] = xor i64 %[[BASE_TAG]], 0 -; CHECK: %[[X1:[^ ]*]] = ptrtoint i32* %[[X_GEP]] to i64 +; CHECK: %[[X1:[^ ]*]] = ptrtoint i32* %[[X_BC]] to i64 ; CHECK: %[[C:[^ ]*]] = shl i64 %[[X_TAG]], 56 ; CHECK: %[[D:[^ ]*]] = or i64 %[[C]], 72057594037927935 ; CHECK: %[[E:[^ ]*]] = and i64 %[[X1]], %[[D]] From 4ac0b9be230596e24e439109f2d23ea3dd81ebfd Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Tue, 16 Jul 2019 03:47:34 +0000 Subject: [PATCH 203/451] [RISCV] Make RISCVELFObjectWriter::getRelocType check IsPCRel Previously, this function didn't check the IsPCRel argument. But doing so is a useful check for errors, and also seemingly necessary for FK_Data_4 (which we produce a R_RISCV_32_PCREL relocation for if IsPCRel). Other than R_RISCV_32_PCREL, this should be NFC. Future exception handling related patches will include tests that capture this behaviour. llvm-svn: 366172 --- .../MCTargetDesc/RISCVELFObjectWriter.cpp | 61 +++++++++++-------- 1 file changed, 36 insertions(+), 25 deletions(-) diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp index c910f2ca39fec..3ccbc86d2619a 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp @@ -48,7 +48,42 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx, const MCFixup &Fixup, bool IsPCRel) const { // Determine the type of the relocation - switch ((unsigned)Fixup.getKind()) { + unsigned Kind = Fixup.getKind(); + if (IsPCRel) { + switch (Kind) { + default: + llvm_unreachable("invalid fixup kind!"); + case FK_Data_4: + case FK_PCRel_4: + return ELF::R_RISCV_32_PCREL; + case RISCV::fixup_riscv_pcrel_hi20: + return ELF::R_RISCV_PCREL_HI20; + case RISCV::fixup_riscv_pcrel_lo12_i: + return ELF::R_RISCV_PCREL_LO12_I; + case RISCV::fixup_riscv_pcrel_lo12_s: + return ELF::R_RISCV_PCREL_LO12_S; + case RISCV::fixup_riscv_got_hi20: + return ELF::R_RISCV_GOT_HI20; + case RISCV::fixup_riscv_tls_got_hi20: + return ELF::R_RISCV_TLS_GOT_HI20; + case RISCV::fixup_riscv_tls_gd_hi20: + return ELF::R_RISCV_TLS_GD_HI20; + case RISCV::fixup_riscv_jal: + return ELF::R_RISCV_JAL; + case RISCV::fixup_riscv_branch: + return ELF::R_RISCV_BRANCH; + case RISCV::fixup_riscv_rvc_jump: + return ELF::R_RISCV_RVC_JUMP; + case RISCV::fixup_riscv_rvc_branch: + return ELF::R_RISCV_RVC_BRANCH; + case RISCV::fixup_riscv_call: + return ELF::R_RISCV_CALL; + case RISCV::fixup_riscv_call_plt: + return ELF::R_RISCV_CALL_PLT; + } + } + + switch (Kind) { default: llvm_unreachable("invalid fixup kind!"); case FK_Data_4: @@ -77,14 +112,6 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx, return ELF::R_RISCV_LO12_I; case RISCV::fixup_riscv_lo12_s: return ELF::R_RISCV_LO12_S; - case RISCV::fixup_riscv_pcrel_hi20: - return ELF::R_RISCV_PCREL_HI20; - case RISCV::fixup_riscv_pcrel_lo12_i: - return ELF::R_RISCV_PCREL_LO12_I; - case RISCV::fixup_riscv_pcrel_lo12_s: - return ELF::R_RISCV_PCREL_LO12_S; - case RISCV::fixup_riscv_got_hi20: - return ELF::R_RISCV_GOT_HI20; case RISCV::fixup_riscv_tprel_hi20: return ELF::R_RISCV_TPREL_HI20; case RISCV::fixup_riscv_tprel_lo12_i: @@ -93,22 +120,6 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx, return ELF::R_RISCV_TPREL_LO12_S; case RISCV::fixup_riscv_tprel_add: return ELF::R_RISCV_TPREL_ADD; - case RISCV::fixup_riscv_tls_got_hi20: - return ELF::R_RISCV_TLS_GOT_HI20; - case RISCV::fixup_riscv_tls_gd_hi20: - return ELF::R_RISCV_TLS_GD_HI20; - case RISCV::fixup_riscv_jal: - return ELF::R_RISCV_JAL; - case RISCV::fixup_riscv_branch: - return ELF::R_RISCV_BRANCH; - case RISCV::fixup_riscv_rvc_jump: - return ELF::R_RISCV_RVC_JUMP; - case RISCV::fixup_riscv_rvc_branch: - return ELF::R_RISCV_RVC_BRANCH; - case RISCV::fixup_riscv_call: - return ELF::R_RISCV_CALL; - case RISCV::fixup_riscv_call_plt: - return ELF::R_RISCV_CALL_PLT; case RISCV::fixup_riscv_relax: return ELF::R_RISCV_RELAX; case RISCV::fixup_riscv_align: From a3c7b27419a52d612fe2cad319aafba1e226502b Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Tue, 16 Jul 2019 03:54:08 +0000 Subject: [PATCH 204/451] [RISCV][NFC] Fix HasStedExtA -> HasStdExtA typo in comment Differential Revision: https://reviews.llvm.org/D64011 Patch by James Clarke. llvm-svn: 366173 --- llvm/lib/Target/RISCV/RISCVInstrInfoA.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td index b35c2f128bae0..b768c9347b385 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td @@ -84,7 +84,7 @@ defm AMOMIN_D : AMO_rr_aq_rl<0b10000, 0b011, "amomin.d">; defm AMOMAX_D : AMO_rr_aq_rl<0b10100, 0b011, "amomax.d">; defm AMOMINU_D : AMO_rr_aq_rl<0b11000, 0b011, "amominu.d">; defm AMOMAXU_D : AMO_rr_aq_rl<0b11100, 0b011, "amomaxu.d">; -} // Predicates = [HasStedExtA, IsRV64] +} // Predicates = [HasStdExtA, IsRV64] //===----------------------------------------------------------------------===// // Pseudo-instructions and codegen patterns From ef8577ef98717c1c6a66293de3b2cc0f09e8c3ff Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Tue, 16 Jul 2019 03:56:45 +0000 Subject: [PATCH 205/451] [RISCV][NFC] Split PseudoCALL pattern out from instruction Since PseudoCALL defines AsmString, it can be generated from assembly, and so code-gen patterns should be defined separately to be consistent with the style of the RISCV backend. Other pseudo-instructions exist that have code-gen patterns defined directly, but these instructions are purely for code-gen and cannot be written in assembly. Differential Revision: https://reviews.llvm.org/D64012 Patch by James Clarke. llvm-svn: 366174 --- llvm/lib/Target/RISCV/RISCVInstrInfo.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 6c5218ba78359..b017307b46173 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -886,11 +886,11 @@ def PseudoCALLReg : Pseudo<(outs GPR:$rd), (ins call_symbol:$func), []> { // Define AsmString to print "call" when compile with -S flag. // Define isCodeGenOnly = 0 to support parsing assembly "call" instruction. let isCall = 1, Defs = [X1], isCodeGenOnly = 0 in -def PseudoCALL : Pseudo<(outs), (ins call_symbol:$func), - [(riscv_call tglobaladdr:$func)]> { +def PseudoCALL : Pseudo<(outs), (ins call_symbol:$func), []> { let AsmString = "call\t$func"; } +def : Pat<(riscv_call tglobaladdr:$func), (PseudoCALL tglobaladdr:$func)>; def : Pat<(riscv_call texternalsym:$func), (PseudoCALL texternalsym:$func)>; def : Pat<(riscv_uret_flag), (URET X0, X0)>; From e9ad0cf6cf79cfa5f8ce99db0f7161e110850011 Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Tue, 16 Jul 2019 04:37:19 +0000 Subject: [PATCH 206/451] [RISCV] Fix a potential issue in shouldInsertFixupForCodeAlign() The bool result of shouldInsertExtraNopBytesForCodeAlign() is not checked but the returned nop count is unconditionally read even though it could be uninitialized. Differential Revision: https://reviews.llvm.org/D63285 Patch by Edward Jones. llvm-svn: 366175 --- llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp index db25efb160f60..821ac2033c939 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp @@ -329,11 +329,10 @@ bool RISCVAsmBackend::shouldInsertFixupForCodeAlign(MCAssembler &Asm, if (!STI.getFeatureBits()[RISCV::FeatureRelax]) return false; - // Calculate total Nops we need to insert. + // Calculate total Nops we need to insert. If there are none to insert + // then simply return. unsigned Count; - shouldInsertExtraNopBytesForCodeAlign(AF, Count); - // No Nop need to insert, simply return. - if (Count == 0) + if (!shouldInsertExtraNopBytesForCodeAlign(AF, Count) || (Count == 0)) return false; MCContext &Ctx = Asm.getContext(); From bb479ca311958120279cf2c21da3a9d8d06ceb17 Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Tue, 16 Jul 2019 04:40:25 +0000 Subject: [PATCH 207/451] [RISCV] Avoid overflow when determining number of nops for code align RISCVAsmBackend::shouldInsertExtraNopBytesForCodeAlign() assumed that the align specified would be greater than or equal to the minimum nop length, but that is not always the case - for example if a user specifies ".align 0" in assembly. Differential Revision: https://reviews.llvm.org/D63274 Patch by Edward Jones. llvm-svn: 366176 --- llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp | 8 ++++++-- llvm/test/MC/RISCV/align.s | 7 +++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp index 821ac2033c939..ee5f760ebcb0e 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp @@ -313,8 +313,12 @@ bool RISCVAsmBackend::shouldInsertExtraNopBytesForCodeAlign( bool HasStdExtC = STI.getFeatureBits()[RISCV::FeatureStdExtC]; unsigned MinNopLen = HasStdExtC ? 2 : 4; - Size = AF.getAlignment() - MinNopLen; - return true; + if (AF.getAlignment() <= MinNopLen) { + return false; + } else { + Size = AF.getAlignment() - MinNopLen; + return true; + } } // We need to insert R_RISCV_ALIGN relocation type to indicate the diff --git a/llvm/test/MC/RISCV/align.s b/llvm/test/MC/RISCV/align.s index e62af93155587..b4b3e6aa778ae 100644 --- a/llvm/test/MC/RISCV/align.s +++ b/llvm/test/MC/RISCV/align.s @@ -90,6 +90,13 @@ test: ret # NORELAX-RELOC-NOT: R_RISCV # C-EXT-NORELAX-RELOC-NOT: R_RISCV +# Code alignment of a byte size less than the size of a nop must be treated +# as no alignment. This used to trigger a fatal error with relaxation enabled +# as the calculation to emit the worst-case sequence of nops would overflow. + .p2align 1 + add a0, a0, a1 + .p2align 0 + add a0, a0, a1 # We only need to insert R_RISCV_ALIGN for code section # when the linker relaxation enabled. .data From 49a3ad21d6034eb20f99f228dbebcc5f65a748d8 Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Tue, 16 Jul 2019 04:46:31 +0000 Subject: [PATCH 208/451] Fix parameter name comments using clang-tidy. NFC. This patch applies clang-tidy's bugprone-argument-comment tool to LLVM, clang and lld source trees. Here is how I created this patch: $ git clone https://github.com/llvm/llvm-project.git $ cd llvm-project $ mkdir build $ cd build $ cmake -GNinja -DCMAKE_BUILD_TYPE=Debug \ -DLLVM_ENABLE_PROJECTS='clang;lld;clang-tools-extra' \ -DCMAKE_EXPORT_COMPILE_COMMANDS=On -DLLVM_ENABLE_LLD=On \ -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ ../llvm $ ninja $ parallel clang-tidy -checks='-*,bugprone-argument-comment' \ -config='{CheckOptions: [{key: StrictMode, value: 1}]}' -fix \ ::: ../llvm/lib/**/*.{cpp,h} ../clang/lib/**/*.{cpp,h} ../lld/**/*.{cpp,h} llvm-svn: 366177 --- clang/lib/ARCMigrate/ARCMT.cpp | 2 +- clang/lib/ARCMigrate/ObjCMT.cpp | 4 +- clang/lib/ARCMigrate/TransGCAttrs.cpp | 2 +- clang/lib/AST/Expr.cpp | 2 +- clang/lib/AST/ItaniumMangle.cpp | 2 +- clang/lib/AST/Mangle.cpp | 2 +- clang/lib/AST/ScanfFormatString.cpp | 2 +- clang/lib/AST/Type.cpp | 4 +- clang/lib/AST/VTableBuilder.cpp | 4 +- clang/lib/Analysis/BodyFarm.cpp | 24 +++++----- clang/lib/Analysis/CFG.cpp | 4 +- clang/lib/Basic/FixedPoint.cpp | 4 +- clang/lib/CodeGen/CGBuilder.h | 2 +- clang/lib/CodeGen/CGBuiltin.cpp | 12 ++--- clang/lib/CodeGen/CGCXX.cpp | 2 +- clang/lib/CodeGen/CGCXXABI.cpp | 2 +- clang/lib/CodeGen/CGCall.cpp | 6 +-- clang/lib/CodeGen/CGCoroutine.cpp | 2 +- clang/lib/CodeGen/CGDecl.cpp | 2 +- clang/lib/CodeGen/CGException.cpp | 8 ++-- clang/lib/CodeGen/CGExpr.cpp | 12 ++--- clang/lib/CodeGen/CGExprCXX.cpp | 2 +- clang/lib/CodeGen/CGExprConstant.cpp | 2 +- clang/lib/CodeGen/CGObjC.cpp | 2 +- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 26 +++++------ clang/lib/CodeGen/CGStmtOpenMP.cpp | 4 +- clang/lib/CodeGen/CodeGenABITypes.cpp | 2 +- clang/lib/CodeGen/CodeGenModule.cpp | 2 +- clang/lib/CodeGen/CoverageMappingGen.cpp | 2 +- clang/lib/CodeGen/ItaniumCXXABI.cpp | 22 +++++----- clang/lib/CodeGen/MicrosoftCXXABI.cpp | 32 +++++++------- clang/lib/CodeGen/TargetInfo.cpp | 2 +- clang/lib/Driver/Driver.cpp | 4 +- clang/lib/Driver/ToolChains/Clang.cpp | 8 ++-- clang/lib/Driver/ToolChains/MSVC.cpp | 4 +- clang/lib/Format/UnwrappedLineFormatter.cpp | 2 +- clang/lib/Frontend/FrontendActions.cpp | 4 +- .../Frontend/Rewrite/RewriteModernObjC.cpp | 10 ++--- clang/lib/Frontend/Rewrite/RewriteObjC.cpp | 10 ++--- clang/lib/Index/IndexDecl.cpp | 4 +- clang/lib/Lex/HeaderSearch.cpp | 8 ++-- clang/lib/Lex/PPDirectives.cpp | 6 +-- clang/lib/Lex/PPMacroExpansion.cpp | 2 +- clang/lib/Lex/Pragma.cpp | 10 ++--- clang/lib/Lex/Preprocessor.cpp | 2 +- clang/lib/Parse/ParseCXXInlineMethods.cpp | 2 +- clang/lib/Parse/ParseDecl.cpp | 2 +- clang/lib/Parse/ParseDeclCXX.cpp | 4 +- clang/lib/Parse/ParseExpr.cpp | 8 ++-- clang/lib/Parse/ParseExprCXX.cpp | 16 +++---- clang/lib/Parse/ParseObjc.cpp | 12 ++--- clang/lib/Parse/ParsePragma.cpp | 6 +-- clang/lib/Parse/ParseTemplate.cpp | 2 +- clang/lib/Sema/SemaCast.cpp | 6 +-- clang/lib/Sema/SemaChecking.cpp | 2 +- clang/lib/Sema/SemaCodeComplete.cpp | 10 ++--- clang/lib/Sema/SemaDecl.cpp | 2 +- clang/lib/Sema/SemaDeclAttr.cpp | 6 +-- clang/lib/Sema/SemaDeclCXX.cpp | 8 ++-- clang/lib/Sema/SemaDeclObjC.cpp | 2 +- clang/lib/Sema/SemaExpr.cpp | 30 ++++++------- clang/lib/Sema/SemaExprCXX.cpp | 2 +- clang/lib/Sema/SemaExprMember.cpp | 14 +++--- clang/lib/Sema/SemaInit.cpp | 6 +-- clang/lib/Sema/SemaLambda.cpp | 10 ++--- clang/lib/Sema/SemaModule.cpp | 4 +- clang/lib/Sema/SemaObjCProperty.cpp | 2 +- clang/lib/Sema/SemaOpenMP.cpp | 4 +- clang/lib/Sema/SemaOverload.cpp | 44 +++++++++---------- clang/lib/Sema/SemaStmt.cpp | 4 +- clang/lib/Sema/SemaStmtAsm.cpp | 2 +- clang/lib/Sema/SemaTemplate.cpp | 6 +-- clang/lib/Sema/SemaTemplateDeduction.cpp | 6 +-- clang/lib/Sema/SemaType.cpp | 2 +- clang/lib/Serialization/ASTReader.cpp | 2 +- clang/lib/Serialization/ASTReaderStmt.cpp | 2 +- clang/lib/Serialization/ASTWriter.cpp | 2 +- clang/lib/Serialization/GlobalModuleIndex.cpp | 4 +- clang/lib/Serialization/ModuleManager.cpp | 12 ++--- .../Checkers/DynamicTypePropagation.cpp | 6 +-- .../Checkers/GCDAntipatternChecker.cpp | 2 +- .../Checkers/IdenticalExprChecker.cpp | 2 +- .../StaticAnalyzer/Checkers/MallocChecker.cpp | 2 +- .../Checkers/NullabilityChecker.cpp | 2 +- .../Checkers/OSObjectCStyleCast.cpp | 2 +- .../Checkers/ObjCAutoreleaseWriteChecker.cpp | 2 +- .../RetainCountChecker/RetainCountChecker.cpp | 2 +- .../RetainCountDiagnostics.cpp | 2 +- .../RunLoopAutoreleaseLeakChecker.cpp | 2 +- .../Checkers/TrustNonnullChecker.cpp | 4 +- .../StaticAnalyzer/Core/AnalysisManager.cpp | 2 +- clang/lib/StaticAnalyzer/Core/BugReporter.cpp | 2 +- .../StaticAnalyzer/Core/DynamicTypeMap.cpp | 2 +- clang/lib/StaticAnalyzer/Core/ExprEngine.cpp | 2 +- .../Core/ExprEngineCallAndReturn.cpp | 12 ++--- clang/lib/StaticAnalyzer/Core/RegionStore.cpp | 2 +- .../StaticAnalyzer/Core/SimpleSValBuilder.cpp | 2 +- lld/COFF/Driver.cpp | 4 +- lld/COFF/DriverUtils.cpp | 2 +- lld/ELF/Driver.cpp | 8 ++-- lld/ELF/InputFiles.cpp | 6 +-- lld/ELF/ScriptParser.cpp | 12 ++--- lld/ELF/Thunks.cpp | 2 +- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 4 +- llvm/lib/CodeGen/EdgeBundles.cpp | 2 +- llvm/lib/CodeGen/MachineBasicBlock.cpp | 2 +- llvm/lib/CodeGen/MachineBlockPlacement.cpp | 2 +- llvm/lib/CodeGen/MachineFrameInfo.cpp | 2 +- llvm/lib/CodeGen/MachineFunction.cpp | 2 +- llvm/lib/CodeGen/SelectionDAG/FastISel.cpp | 30 ++++++------- .../SelectionDAG/FunctionLoweringInfo.cpp | 2 +- llvm/lib/CodeGen/TargetLoweringBase.cpp | 2 +- llvm/lib/CodeGen/WinEHPrepare.cpp | 4 +- llvm/lib/IR/LegacyPassManager.cpp | 2 +- llvm/lib/Support/APSInt.cpp | 6 +-- llvm/lib/Support/LowLevelType.cpp | 4 +- llvm/lib/Support/raw_ostream.cpp | 2 +- llvm/lib/Target/AArch64/AArch64FastISel.cpp | 6 +-- .../AMDGPUOpenCLEnqueuedBlockLowering.cpp | 4 +- llvm/lib/Target/ARM/ARMISelLowering.cpp | 6 +-- .../MCTargetDesc/LanaiELFObjectWriter.cpp | 2 +- .../MCTargetDesc/SystemZMCObjectWriter.cpp | 4 +- .../MCTargetDesc/WebAssemblyInstPrinter.cpp | 2 +- .../WebAssembly/WebAssemblyFastISel.cpp | 2 +- .../WebAssembly/WebAssemblyRegisterInfo.cpp | 6 +-- llvm/lib/Target/X86/X86FastISel.cpp | 2 +- llvm/lib/Target/X86/X86FrameLowering.cpp | 2 +- llvm/lib/Target/X86/X86ISelLowering.cpp | 4 +- llvm/lib/Target/X86/X86WinAllocaExpander.cpp | 2 +- llvm/lib/Transforms/Coroutines/CoroEarly.cpp | 2 +- llvm/lib/Transforms/Coroutines/CoroFrame.cpp | 2 +- llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 2 +- .../InstCombine/InstCombineAndOrXor.cpp | 2 +- .../InstCombine/InstCombineMulDivRem.cpp | 8 ++-- .../Instrumentation/HWAddressSanitizer.cpp | 2 +- .../Scalar/CorrelatedValuePropagation.cpp | 2 +- llvm/lib/Transforms/Scalar/Float2Int.cpp | 2 +- .../Transforms/Scalar/LoopStrengthReduce.cpp | 2 +- llvm/lib/Transforms/Utils/LowerSwitch.cpp | 2 +- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 2 +- 140 files changed, 372 insertions(+), 372 deletions(-) diff --git a/clang/lib/ARCMigrate/ARCMT.cpp b/clang/lib/ARCMigrate/ARCMT.cpp index 6a5efc00d7e85..568e06f21fba6 100644 --- a/clang/lib/ARCMigrate/ARCMT.cpp +++ b/clang/lib/ARCMigrate/ARCMT.cpp @@ -514,7 +514,7 @@ MigrationProcess::MigrationProcess( IntrusiveRefCntPtr Diags( new DiagnosticsEngine(DiagID, &CI.getDiagnosticOpts(), DiagClient, /*ShouldOwnClient=*/false)); - Remapper.initFromDisk(outputDir, *Diags, /*ignoreIfFilesChanges=*/true); + Remapper.initFromDisk(outputDir, *Diags, /*ignoreIfFilesChanged=*/true); } } diff --git a/clang/lib/ARCMigrate/ObjCMT.cpp b/clang/lib/ARCMigrate/ObjCMT.cpp index f22e03f490f47..7126a0873ea0c 100644 --- a/clang/lib/ARCMigrate/ObjCMT.cpp +++ b/clang/lib/ARCMigrate/ObjCMT.cpp @@ -110,7 +110,7 @@ class ObjCMigrateASTConsumer : public ASTConsumer { if (!Summaries) Summaries.reset(new RetainSummaryManager(Ctx, /*TrackNSCFObjects=*/true, - /*TrackOSObjects=*/false)); + /*trackOSObjects=*/false)); return *Summaries; } @@ -216,7 +216,7 @@ ObjCMigrateAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) { bool ObjCMigrateAction::BeginInvocation(CompilerInstance &CI) { Remapper.initFromDisk(MigrateDir, CI.getDiagnostics(), - /*ignoreIfFilesChanges=*/true); + /*ignoreIfFilesChanged=*/true); CompInst = &CI; CI.getDiagnostics().setIgnoreAllWarnings(true); return true; diff --git a/clang/lib/ARCMigrate/TransGCAttrs.cpp b/clang/lib/ARCMigrate/TransGCAttrs.cpp index fdbe1d119af81..5e3162197ed1b 100644 --- a/clang/lib/ARCMigrate/TransGCAttrs.cpp +++ b/clang/lib/ARCMigrate/TransGCAttrs.cpp @@ -269,7 +269,7 @@ static void checkAllAtProps(MigrationContext &MigrateCtx, StringRef toAttr = "strong"; if (hasWeak) { if (canApplyWeak(MigrateCtx.Pass.Ctx, IndProps.front()->getType(), - /*AllowOnUnkwownClass=*/true)) + /*AllowOnUnknownClass=*/true)) toAttr = "weak"; else toAttr = "unsafe_unretained"; diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index 10ab2bf72b72e..6ef77b8aee684 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -2201,7 +2201,7 @@ APValue SourceLocExpr::EvaluateInContext(const ASTContext &Ctx, case SourceLocExpr::Line: case SourceLocExpr::Column: { llvm::APSInt IntVal(Ctx.getIntWidth(Ctx.UnsignedIntTy), - /*IsUnsigned=*/true); + /*isUnsigned=*/true); IntVal = getIdentKind() == SourceLocExpr::Line ? PLoc.getLine() : PLoc.getColumn(); return APValue(IntVal); diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index 5f4644b0f7c31..6c813f09a4b3c 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -3787,7 +3787,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) { if (TypeSourceInfo *ScopeInfo = PDE->getScopeTypeInfo()) { if (Qualifier) { mangleUnresolvedPrefix(Qualifier, - /*Recursive=*/true); + /*recursive=*/true); mangleUnresolvedTypeOrSimpleId(ScopeInfo->getType()); Out << 'E'; } else { diff --git a/clang/lib/AST/Mangle.cpp b/clang/lib/AST/Mangle.cpp index 5582dc582aca2..625282368a4d1 100644 --- a/clang/lib/AST/Mangle.cpp +++ b/clang/lib/AST/Mangle.cpp @@ -379,7 +379,7 @@ class ASTNameGenerator::Implementation { auto hasDefaultCXXMethodCC = [](ASTContext &C, const CXXMethodDecl *MD) { auto DefaultCC = C.getDefaultCallingConvention(/*IsVariadic=*/false, - /*IsCSSMethod=*/true); + /*IsCXXMethod=*/true); auto CC = MD->getType()->getAs()->getCallConv(); return CC == DefaultCC; }; diff --git a/clang/lib/AST/ScanfFormatString.cpp b/clang/lib/AST/ScanfFormatString.cpp index 1a87de70f86b7..8d763f28e57fd 100644 --- a/clang/lib/AST/ScanfFormatString.cpp +++ b/clang/lib/AST/ScanfFormatString.cpp @@ -142,7 +142,7 @@ static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, } // Look for the length modifier. - if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) { + if (ParseLengthModifier(FS, I, E, LO, /*IsScanf=*/true) && I == E) { // No more characters left? H.HandleIncompleteSpecifier(Start, E - Start); return true; diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index 01e93c11aad87..ed75a0b5bcd85 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -4080,7 +4080,7 @@ CXXRecordDecl *MemberPointerType::getMostRecentCXXRecordDecl() const { void clang::FixedPointValueToString(SmallVectorImpl &Str, llvm::APSInt Val, unsigned Scale) { FixedPointSemantics FXSema(Val.getBitWidth(), Scale, Val.isSigned(), - /*isSaturated=*/false, - /*hasUnsignedPadding=*/false); + /*IsSaturated=*/false, + /*HasUnsignedPadding=*/false); APFixedPoint(Val, FXSema).toString(Str); } diff --git a/clang/lib/AST/VTableBuilder.cpp b/clang/lib/AST/VTableBuilder.cpp index 5c79ea37ab11d..0c699571555d5 100644 --- a/clang/lib/AST/VTableBuilder.cpp +++ b/clang/lib/AST/VTableBuilder.cpp @@ -1272,7 +1272,7 @@ ThisAdjustment ItaniumVTableBuilder::ComputeThisAdjustment( // We don't have vcall offsets for this virtual base, go ahead and // build them. VCallAndVBaseOffsetBuilder Builder(MostDerivedClass, MostDerivedClass, - /*FinalOverriders=*/nullptr, + /*Overriders=*/nullptr, BaseSubobject(Offset.VirtualBase, CharUnits::Zero()), /*BaseIsVirtual=*/true, @@ -2245,7 +2245,7 @@ ItaniumVTableContext::getVirtualBaseOffsetOffset(const CXXRecordDecl *RD, if (I != VirtualBaseClassOffsetOffsets.end()) return I->second; - VCallAndVBaseOffsetBuilder Builder(RD, RD, /*FinalOverriders=*/nullptr, + VCallAndVBaseOffsetBuilder Builder(RD, RD, /*Overriders=*/nullptr, BaseSubobject(RD, CharUnits::Zero()), /*BaseIsVirtual=*/false, /*OffsetInLayoutClass=*/CharUnits::Zero()); diff --git a/clang/lib/Analysis/BodyFarm.cpp b/clang/lib/Analysis/BodyFarm.cpp index 7e636ed1f5889..576f86516017b 100644 --- a/clang/lib/Analysis/BodyFarm.cpp +++ b/clang/lib/Analysis/BodyFarm.cpp @@ -293,7 +293,7 @@ static CallExpr *create_call_once_lambda_call(ASTContext &C, ASTMaker M, return CXXOperatorCallExpr::Create( /*AstContext=*/C, OO_Call, callOperatorDeclRef, - /*args=*/CallArgs, + /*Args=*/CallArgs, /*QualType=*/C.VoidTy, /*ExprValueType=*/VK_RValue, /*SourceLocation=*/SourceLocation(), FPOptions()); @@ -465,10 +465,10 @@ static Stmt *create_call_once(ASTContext &C, const FunctionDecl *D) { auto *Out = IfStmt::Create(C, SourceLocation(), /* IsConstexpr=*/false, - /* init=*/nullptr, - /* var=*/nullptr, - /* cond=*/FlagCheck, - /* then=*/M.makeCompound({CallbackCall, FlagAssignment})); + /* Init=*/nullptr, + /* Var=*/nullptr, + /* Cond=*/FlagCheck, + /* Then=*/M.makeCompound({CallbackCall, FlagAssignment})); return Out; } @@ -511,7 +511,7 @@ static Stmt *create_dispatch_once(ASTContext &C, const FunctionDecl *D) { CallExpr *CE = CallExpr::Create( /*ASTContext=*/C, /*StmtClass=*/M.makeLvalueToRvalue(/*Expr=*/Block), - /*args=*/None, + /*Args=*/None, /*QualType=*/C.VoidTy, /*ExprValueType=*/VK_RValue, /*SourceLocation=*/SourceLocation()); @@ -549,10 +549,10 @@ static Stmt *create_dispatch_once(ASTContext &C, const FunctionDecl *D) { // (5) Create the 'if' statement. auto *If = IfStmt::Create(C, SourceLocation(), /* IsConstexpr=*/false, - /* init=*/nullptr, - /* var=*/nullptr, - /* cond=*/GuardCondition, - /* then=*/CS); + /* Init=*/nullptr, + /* Var=*/nullptr, + /* Cond=*/GuardCondition, + /* Then=*/CS); return If; } @@ -657,8 +657,8 @@ static Stmt *create_OSAtomicCompareAndSwap(ASTContext &C, const FunctionDecl *D) /// Construct the If. auto *If = IfStmt::Create(C, SourceLocation(), /* IsConstexpr=*/false, - /* init=*/nullptr, - /* var=*/nullptr, Comparison, Body, + /* Init=*/nullptr, + /* Var=*/nullptr, Comparison, Body, SourceLocation(), Else); return If; diff --git a/clang/lib/Analysis/CFG.cpp b/clang/lib/Analysis/CFG.cpp index f92d311111e7b..de89105a29257 100644 --- a/clang/lib/Analysis/CFG.cpp +++ b/clang/lib/Analysis/CFG.cpp @@ -2931,8 +2931,8 @@ CFGBlock *CFGBuilder::VisitIfStmt(IfStmt *I) { // Add the successors. If we know that specific branches are // unreachable, inform addSuccessor() of that knowledge. - addSuccessor(Block, ThenBlock, /* isReachable = */ !KnownVal.isFalse()); - addSuccessor(Block, ElseBlock, /* isReachable = */ !KnownVal.isTrue()); + addSuccessor(Block, ThenBlock, /* IsReachable = */ !KnownVal.isFalse()); + addSuccessor(Block, ElseBlock, /* IsReachable = */ !KnownVal.isTrue()); // Add the condition as the last statement in the new block. This may // create new blocks as the condition may contain control-flow. Any newly diff --git a/clang/lib/Basic/FixedPoint.cpp b/clang/lib/Basic/FixedPoint.cpp index f049e6f64a502..05600dfc6d212 100644 --- a/clang/lib/Basic/FixedPoint.cpp +++ b/clang/lib/Basic/FixedPoint.cpp @@ -190,12 +190,12 @@ void APFixedPoint::toString(llvm::SmallVectorImpl &Str) const { llvm::APInt FractPartMask = llvm::APInt::getAllOnesValue(Scale).zext(Width); llvm::APInt RadixInt = llvm::APInt(Width, 10); - IntPart.toString(Str, /*radix=*/10); + IntPart.toString(Str, /*Radix=*/10); Str.push_back('.'); do { (FractPart * RadixInt) .lshr(Scale) - .toString(Str, /*radix=*/10, Val.isSigned()); + .toString(Str, /*Radix=*/10, Val.isSigned()); FractPart = (FractPart * RadixInt) & FractPartMask; } while (FractPart != 0); } diff --git a/clang/lib/CodeGen/CGBuilder.h b/clang/lib/CodeGen/CGBuilder.h index 7a26ae2fdfab8..68c8c641139f2 100644 --- a/clang/lib/CodeGen/CGBuilder.h +++ b/clang/lib/CodeGen/CGBuilder.h @@ -263,7 +263,7 @@ class CGBuilderTy : public CGBuilderBaseTy { Addr.getElementType(), Addr.getPointer(), Idx0, Idx1, Name)); llvm::APInt Offset( DL.getIndexSizeInBits(Addr.getType()->getPointerAddressSpace()), 0, - /*IsSigned=*/true); + /*isSigned=*/true); if (!GEP->accumulateConstantOffset(DL, Offset)) llvm_unreachable("offset of GEP with constants is always computable"); return Address(GEP, Addr.getAlignment().alignmentAtOffset( diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 0a75c96a74b9c..c58d1018fa0ec 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -589,7 +589,7 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, auto DIter = LocalDeclMap.find(D); assert(DIter != LocalDeclMap.end()); - return EmitLoadOfScalar(DIter->second, /*volatile=*/false, + return EmitLoadOfScalar(DIter->second, /*Volatile=*/false, getContext().getSizeType(), E->getBeginLoc()); } } @@ -719,7 +719,7 @@ static llvm::Value *EmitX86BitTestIntrinsic(CodeGenFunction &CGF, llvm::FunctionType::get(CGF.Int8Ty, {IntPtrType, IntType}, false); llvm::InlineAsm *IA = - llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true); + llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true); return CGF.Builder.CreateCall(IA, {BitBase, BitPos}); } @@ -1063,7 +1063,7 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, } llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false); llvm::InlineAsm *IA = - llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true); + llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true); llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( getLLVMContext(), llvm::AttributeList::FunctionIndex, llvm::Attribute::NoReturn); @@ -5999,9 +5999,9 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, llvm::InlineAsm *Emit = IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "", - /*SideEffects=*/true) + /*hasSideEffects=*/true) : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "", - /*SideEffects=*/true); + /*hasSideEffects=*/true); return Builder.CreateCall(Emit); } @@ -12120,7 +12120,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // This syscall signals a driver assertion failure in x86 NT kernels. llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); llvm::InlineAsm *IA = - llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*SideEffects=*/true); + llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*hasSideEffects=*/true); llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( getLLVMContext(), llvm::AttributeList::FunctionIndex, llvm::Attribute::NoReturn); diff --git a/clang/lib/CodeGen/CGCXX.cpp b/clang/lib/CodeGen/CGCXX.cpp index adaeacfe868e0..6d903a0d09e23 100644 --- a/clang/lib/CodeGen/CGCXX.cpp +++ b/clang/lib/CodeGen/CGCXX.cpp @@ -239,7 +239,7 @@ llvm::FunctionCallee CodeGenModule::getAddrAndTypeOfCXXStructor( llvm::Constant *Ptr = GetOrCreateLLVMFunction( getMangledName(GD), FnType, GD, /*ForVTable=*/false, DontDefer, - /*isThunk=*/false, /*ExtraAttrs=*/llvm::AttributeList(), IsForDefinition); + /*IsThunk=*/false, /*ExtraAttrs=*/llvm::AttributeList(), IsForDefinition); return {FnType, Ptr}; } diff --git a/clang/lib/CodeGen/CGCXXABI.cpp b/clang/lib/CodeGen/CGCXXABI.cpp index 9c3973fb9f1c4..041c0f8959fd7 100644 --- a/clang/lib/CodeGen/CGCXXABI.cpp +++ b/clang/lib/CodeGen/CGCXXABI.cpp @@ -291,7 +291,7 @@ llvm::GlobalValue::LinkageTypes CGCXXABI::getCXXDestructorLinkage( GVALinkage Linkage, const CXXDestructorDecl *Dtor, CXXDtorType DT) const { // Delegate back to CGM by default. return CGM.getLLVMLinkageForDeclarator(Dtor, Linkage, - /*isConstantVariable=*/false); + /*IsConstantVariable=*/false); } bool CGCXXABI::NeedsVTTParameter(GlobalDecl GD) { diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 705998d3b2161..5f1fb10074829 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -1810,7 +1810,7 @@ void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone, void CodeGenModule::AddDefaultFnAttrs(llvm::Function &F) { llvm::AttrBuilder FuncAttrs; ConstructDefaultFnAttrList(F.getName(), F.hasOptNone(), - /* AttrOnCallsite = */ false, FuncAttrs); + /* AttrOnCallSite = */ false, FuncAttrs); F.addAttributes(llvm::AttributeList::FunctionIndex, FuncAttrs); } @@ -2490,7 +2490,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, assert(NumIRArgs == 1); auto AI = FnArgs[FirstIRArg]; AI->setName(Arg->getName() + ".coerce"); - CreateCoercedStore(AI, Ptr, /*DestIsVolatile=*/false, *this); + CreateCoercedStore(AI, Ptr, /*DstIsVolatile=*/false, *this); } // Match to what EmitParmDecl is expecting for this type. @@ -3537,7 +3537,7 @@ RValue CallArg::getRValue(CodeGenFunction &CGF) const { void CallArg::copyInto(CodeGenFunction &CGF, Address Addr) const { LValue Dst = CGF.MakeAddrLValue(Addr, Ty); if (!HasLV && RV.isScalar()) - CGF.EmitStoreOfScalar(RV.getScalarVal(), Dst, /*init=*/true); + CGF.EmitStoreOfScalar(RV.getScalarVal(), Dst, /*isInit=*/true); else if (!HasLV && RV.isComplex()) CGF.EmitStoreOfComplex(RV.getComplexVal(), Dst, /*init=*/true); else { diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp index c529c4d900c97..aee5a927a055b 100644 --- a/clang/lib/CodeGen/CGCoroutine.cpp +++ b/clang/lib/CodeGen/CGCoroutine.cpp @@ -405,7 +405,7 @@ struct CallCoroEnd final : public EHScopeStack::Cleanup { if (Bundles.empty()) { // Otherwise, (landingpad model), create a conditional branch that leads // either to a cleanup block or a block with EH resume instruction. - auto *ResumeBB = CGF.getEHResumeBlock(/*cleanup=*/true); + auto *ResumeBB = CGF.getEHResumeBlock(/*isCleanup=*/true); auto *CleanupContBB = CGF.createBasicBlock("cleanup.cont"); CGF.Builder.CreateCondBr(CoroEnd, ResumeBB, CleanupContBB); CGF.EmitBlock(CleanupContBB); diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp index 739b2d858cb2e..19a9e75cc5ac9 100644 --- a/clang/lib/CodeGen/CGDecl.cpp +++ b/clang/lib/CodeGen/CGDecl.cpp @@ -176,7 +176,7 @@ void CodeGenFunction::EmitVarDecl(const VarDecl &D) { return; llvm::GlobalValue::LinkageTypes Linkage = - CGM.getLLVMLinkageVarDefinition(&D, /*isConstant=*/false); + CGM.getLLVMLinkageVarDefinition(&D, /*IsConstant=*/false); // FIXME: We need to force the emission/use of a guard variable for // some variables even if we can constant-evaluate them because diff --git a/clang/lib/CodeGen/CGException.cpp b/clang/lib/CodeGen/CGException.cpp index 748029b860960..3b7a88a0b7693 100644 --- a/clang/lib/CodeGen/CGException.cpp +++ b/clang/lib/CodeGen/CGException.cpp @@ -32,7 +32,7 @@ static llvm::FunctionCallee getFreeExceptionFn(CodeGenModule &CGM) { // void __cxa_free_exception(void *thrown_exception); llvm::FunctionType *FTy = - llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*IsVarArgs=*/false); + llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*isVarArg=*/false); return CGM.CreateRuntimeFunction(FTy, "__cxa_free_exception"); } @@ -41,7 +41,7 @@ static llvm::FunctionCallee getUnexpectedFn(CodeGenModule &CGM) { // void __cxa_call_unexpected(void *thrown_exception); llvm::FunctionType *FTy = - llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*IsVarArgs=*/false); + llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*isVarArg=*/false); return CGM.CreateRuntimeFunction(FTy, "__cxa_call_unexpected"); } @@ -50,7 +50,7 @@ llvm::FunctionCallee CodeGenModule::getTerminateFn() { // void __terminate(); llvm::FunctionType *FTy = - llvm::FunctionType::get(VoidTy, /*IsVarArgs=*/false); + llvm::FunctionType::get(VoidTy, /*isVarArg=*/false); StringRef name; @@ -75,7 +75,7 @@ llvm::FunctionCallee CodeGenModule::getTerminateFn() { static llvm::FunctionCallee getCatchallRethrowFn(CodeGenModule &CGM, StringRef Name) { llvm::FunctionType *FTy = - llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*IsVarArgs=*/false); + llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*isVarArg=*/false); return CGM.CreateRuntimeFunction(FTy, Name); } diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 62d930ca8c455..8340f48abcb64 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -2031,7 +2031,7 @@ void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst, // Cast the source to the storage type and shift it into place. SrcVal = Builder.CreateIntCast(SrcVal, Ptr.getElementType(), - /*IsSigned=*/false); + /*isSigned=*/false); llvm::Value *MaskedVal = SrcVal; // See if there are other bits in the bitfield's storage we'll need to load @@ -2611,7 +2611,7 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { // some reason; most likely, because it's in an outer function. } else if (VD->isStaticLocal()) { addr = Address(CGM.getOrCreateStaticVarDecl( - *VD, CGM.getLLVMLinkageVarDefinition(VD, /*isConstant=*/false)), + *VD, CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false)), getContext().getDeclAlign(VD)); // No other cases for now. @@ -3749,7 +3749,7 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, Idx = Builder.CreateNSWMul(Idx, NumElements); EltPtr = emitArraySubscriptGEP(*this, Base, Idx, VLA->getElementType(), !getLangOpts().isSignedOverflowDefined(), - /*SignedIndices=*/false, E->getExprLoc()); + /*signedIndices=*/false, E->getExprLoc()); } else if (const Expr *Array = isSimpleArrayDecayOperand(E->getBase())) { // If this is A[i] where A is an array, the frontend will have decayed the // base to be a ArrayToPointerDecay implicit cast. While correct, it is @@ -3769,7 +3769,7 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, EltPtr = emitArraySubscriptGEP( *this, ArrayLV.getAddress(), {CGM.getSize(CharUnits::Zero()), Idx}, ResultExprTy, !getLangOpts().isSignedOverflowDefined(), - /*SignedIndices=*/false, E->getExprLoc()); + /*signedIndices=*/false, E->getExprLoc()); BaseInfo = ArrayLV.getBaseInfo(); TBAAInfo = CGM.getTBAAInfoForSubobject(ArrayLV, ResultExprTy); } else { @@ -3778,7 +3778,7 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, IsLowerBound); EltPtr = emitArraySubscriptGEP(*this, Base, Idx, ResultExprTy, !getLangOpts().isSignedOverflowDefined(), - /*SignedIndices=*/false, E->getExprLoc()); + /*signedIndices=*/false, E->getExprLoc()); } return MakeAddrLValue(EltPtr, ResultExprTy, BaseInfo, TBAAInfo); @@ -4867,7 +4867,7 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee E->getDirectCallee(), /*ParamsToSkip*/ 0, Order); const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeFreeFunctionCall( - Args, FnType, /*isChainCall=*/Chain); + Args, FnType, /*ChainCall=*/Chain); // C99 6.5.2.2p6: // If the expression that denotes the called function has a type diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp index 25b0abbc03045..8ad229fc0c362 100644 --- a/clang/lib/CodeGen/CGExprCXX.cpp +++ b/clang/lib/CodeGen/CGExprCXX.cpp @@ -1277,7 +1277,7 @@ static RValue EmitNewDeleteCall(CodeGenFunction &CGF, CGCallee Callee = CGCallee::forDirect(CalleePtr, GlobalDecl(CalleeDecl)); RValue RV = CGF.EmitCall(CGF.CGM.getTypes().arrangeFreeFunctionCall( - Args, CalleeType, /*chainCall=*/false), + Args, CalleeType, /*ChainCall=*/false), Callee, ReturnValueSlot(), Args, &CallOrInvoke); /// C++1y [expr.new]p10: diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp index cc5c463224a14..31cf2aef1ba0b 100644 --- a/clang/lib/CodeGen/CGExprConstant.cpp +++ b/clang/lib/CodeGen/CGExprConstant.cpp @@ -1878,7 +1878,7 @@ ConstantLValueEmitter::tryEmitBase(const APValue::LValueBase &base) { if (VD->isLocalVarDecl()) { return CGM.getOrCreateStaticVarDecl( - *VD, CGM.getLLVMLinkageVarDefinition(VD, /*isConstant=*/false)); + *VD, CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false)); } } } diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp index 37d8bd08db991..1dd7ec52230ee 100644 --- a/clang/lib/CodeGen/CGObjC.cpp +++ b/clang/lib/CodeGen/CGObjC.cpp @@ -3735,7 +3735,7 @@ void CodeGenModule::emitAtAvailableLinkGuard() { llvm::FunctionType *CheckFTy = llvm::FunctionType::get(VoidTy, {}, false); llvm::FunctionCallee CFLinkCheckFuncRef = CreateRuntimeFunction( CheckFTy, "__clang_at_available_requires_core_foundation_framework", - llvm::AttributeList(), /*IsLocal=*/true); + llvm::AttributeList(), /*Local=*/true); llvm::Function *CFLinkCheckFunc = cast(CFLinkCheckFuncRef.getCallee()->stripPointerCasts()); if (CFLinkCheckFunc->empty()) { diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index e3a9c0b0d63b4..27e7175da841f 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -2898,7 +2898,7 @@ Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, getThreadID(CGF, SourceLocation()), CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, - /*IsSigned=*/false), + /*isSigned=*/false), getOrCreateInternalVariable( CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; return Address( @@ -5254,7 +5254,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, if (const auto *ASE = dyn_cast(E->IgnoreParenImpCasts())) { LValue UpAddrLVal = - CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false); + CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); llvm::Value *LowIntPtr = @@ -6293,7 +6293,7 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); if (DelayedCreation) { CGF.EmitStoreOfScalar( - llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true), + llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), FlagsLVal); } else CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType()); @@ -6649,7 +6649,7 @@ emitNumTeamsForTargetDirective(CodeGenFunction &CGF, CGF.EmitScalarExpr(NumTeams, /*IgnoreResultAssign*/ true); return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, - /*IsSigned=*/true); + /*isSigned=*/true); } return Bld.getInt32(0); } @@ -6673,7 +6673,7 @@ emitNumTeamsForTargetDirective(CodeGenFunction &CGF, CGF.EmitScalarExpr(NumTeams, /*IgnoreResultAssign*/ true); return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, - /*IsSigned=*/true); + /*isSigned=*/true); } return Bld.getInt32(0); } @@ -6801,7 +6801,7 @@ static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, } NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, - /*IsSigned=*/false); + /*isSigned=*/false); if (DefaultThreadLimitVal) NumThreads = CGF.Builder.CreateSelect( CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), @@ -6875,7 +6875,7 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF, llvm::Value *ThreadLimit = CGF.EmitScalarExpr( ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); ThreadLimitVal = - Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false); + Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); } if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { @@ -6902,7 +6902,7 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF, llvm::Value *ThreadLimit = CGF.EmitScalarExpr( ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); ThreadLimitVal = - Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false); + Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); } const CapturedStmt *CS = D.getInnermostCapturedStmt(); if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) @@ -6925,7 +6925,7 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF, llvm::Value *ThreadLimit = CGF.EmitScalarExpr( ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); ThreadLimitVal = - Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false); + Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); } return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); case OMPD_target_parallel: @@ -6963,7 +6963,7 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF, llvm::Value *ThreadLimit = CGF.EmitScalarExpr( ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); ThreadLimitVal = - Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/false); + Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); } if (D.hasClausesOfKind()) { CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); @@ -6971,7 +6971,7 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF, llvm::Value *NumThreads = CGF.EmitScalarExpr( NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); NumThreadsVal = - Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/false); + Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); ThreadLimitVal = ThreadLimitVal ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, ThreadLimitVal), @@ -7865,7 +7865,7 @@ class MappableExprsHandler { llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, - /*isSinged=*/false); + /*isSigned=*/false); Sizes.push_back(Size); // Map type is always TARGET_PARAM Types.push_back(OMP_MAP_TARGET_PARAM); @@ -8416,7 +8416,7 @@ class MappableExprsHandler { CGF.Builder.CreateMemCpy( CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(), Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), - CurSizes.back(), /*isVolatile=*/false); + CurSizes.back(), /*IsVolatile=*/false); // Use new global variable as the base pointers. CurBasePointers.push_back(Addr); CurPointers.push_back(Addr); diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index a5396a362f222..e8fbca5108ade 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -3603,7 +3603,7 @@ static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, CGF.EmitAtomicStore(RVal, LVal, IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent : llvm::AtomicOrdering::Monotonic, - LVal.isVolatile(), /*IsInit=*/false); + LVal.isVolatile(), /*isInit=*/false); } } @@ -4095,7 +4095,7 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, // Emit calculation of the iterations count. llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations()); NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty, - /*IsSigned=*/false); + /*isSigned=*/false); return NumIterations; }; if (IsOffloadEntry) diff --git a/clang/lib/CodeGen/CodeGenABITypes.cpp b/clang/lib/CodeGen/CodeGenABITypes.cpp index c047587dc00c1..6b6a116cf259b 100644 --- a/clang/lib/CodeGen/CodeGenABITypes.cpp +++ b/clang/lib/CodeGen/CodeGenABITypes.cpp @@ -59,7 +59,7 @@ CodeGen::arrangeFreeFunctionCall(CodeGenModule &CGM, FunctionType::ExtInfo info, RequiredArgs args) { return CGM.getTypes().arrangeLLVMFunctionInfo( - returnType, /*IsInstanceMethod=*/false, /*IsChainCall=*/false, argTypes, + returnType, /*instanceMethod=*/false, /*chainCall=*/false, argTypes, info, {}, args); } diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 7ab960e8bcee5..6ff72ec045e62 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1199,7 +1199,7 @@ CodeGenModule::getFunctionLinkage(GlobalDecl GD) { return llvm::GlobalValue::InternalLinkage; } - return getLLVMLinkageForDeclarator(D, Linkage, /*isConstantVariable=*/false); + return getLLVMLinkageForDeclarator(D, Linkage, /*IsConstantVariable=*/false); } llvm::ConstantInt *CodeGenModule::CreateCrossDsoCfiTypeId(llvm::Metadata *MD) { diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp index d900c7b238378..6d18027f16a80 100644 --- a/clang/lib/CodeGen/CoverageMappingGen.cpp +++ b/clang/lib/CodeGen/CoverageMappingGen.cpp @@ -1281,7 +1281,7 @@ std::string getCoverageSection(const CodeGenModule &CGM) { std::string normalizeFilename(StringRef Filename) { llvm::SmallString<256> Path(Filename); llvm::sys::fs::make_absolute(Path); - llvm::sys::path::remove_dots(Path, /*remove_dot_dots=*/true); + llvm::sys::path::remove_dots(Path, /*remove_dot_dot=*/true); return Path.str().str(); } diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index cb22239559ad1..7367ff37cf45c 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -1138,7 +1138,7 @@ void ItaniumCXXABI::emitRethrow(CodeGenFunction &CGF, bool isNoReturn) { // void __cxa_rethrow(); llvm::FunctionType *FTy = - llvm::FunctionType::get(CGM.VoidTy, /*IsVarArgs=*/false); + llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false); llvm::FunctionCallee Fn = CGM.CreateRuntimeFunction(FTy, "__cxa_rethrow"); @@ -1152,7 +1152,7 @@ static llvm::FunctionCallee getAllocateExceptionFn(CodeGenModule &CGM) { // void *__cxa_allocate_exception(size_t thrown_size); llvm::FunctionType *FTy = - llvm::FunctionType::get(CGM.Int8PtrTy, CGM.SizeTy, /*IsVarArgs=*/false); + llvm::FunctionType::get(CGM.Int8PtrTy, CGM.SizeTy, /*isVarArg=*/false); return CGM.CreateRuntimeFunction(FTy, "__cxa_allocate_exception"); } @@ -1163,7 +1163,7 @@ static llvm::FunctionCallee getThrowFn(CodeGenModule &CGM) { llvm::Type *Args[3] = { CGM.Int8PtrTy, CGM.Int8PtrTy, CGM.Int8PtrTy }; llvm::FunctionType *FTy = - llvm::FunctionType::get(CGM.VoidTy, Args, /*IsVarArgs=*/false); + llvm::FunctionType::get(CGM.VoidTy, Args, /*isVarArg=*/false); return CGM.CreateRuntimeFunction(FTy, "__cxa_throw"); } @@ -2402,7 +2402,7 @@ static bool isThreadWrapperReplaceable(const VarDecl *VD, static llvm::GlobalValue::LinkageTypes getThreadLocalWrapperLinkage(const VarDecl *VD, CodeGen::CodeGenModule &CGM) { llvm::GlobalValue::LinkageTypes VarLinkage = - CGM.getLLVMLinkageVarDefinition(VD, /*isConstant=*/false); + CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); // For internal linkage variables, we don't need an external or weak wrapper. if (llvm::GlobalValue::isLocalLinkage(VarLinkage)) @@ -2780,7 +2780,7 @@ ItaniumRTTIBuilder::GetAddrOfExternalRTTIDescriptor(QualType Ty) { // RTTI, check if emitting vtables opportunistically need any adjustment. GV = new llvm::GlobalVariable(CGM.getModule(), CGM.Int8PtrTy, - /*Constant=*/true, + /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, nullptr, Name); const CXXRecordDecl *RD = Ty->getAsCXXRecordDecl(); @@ -3385,7 +3385,7 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo( llvm::GlobalVariable *OldGV = M.getNamedGlobal(Name); llvm::GlobalVariable *GV = new llvm::GlobalVariable(M, Init->getType(), - /*Constant=*/true, Linkage, Init, Name); + /*isConstant=*/true, Linkage, Init, Name); // If there's already an old global variable, replace it with the new one. if (OldGV) { @@ -3906,7 +3906,7 @@ void ItaniumCXXABI::emitCXXStructor(GlobalDecl GD) { static llvm::FunctionCallee getBeginCatchFn(CodeGenModule &CGM) { // void *__cxa_begin_catch(void*); llvm::FunctionType *FTy = llvm::FunctionType::get( - CGM.Int8PtrTy, CGM.Int8PtrTy, /*IsVarArgs=*/false); + CGM.Int8PtrTy, CGM.Int8PtrTy, /*isVarArg=*/false); return CGM.CreateRuntimeFunction(FTy, "__cxa_begin_catch"); } @@ -3914,7 +3914,7 @@ static llvm::FunctionCallee getBeginCatchFn(CodeGenModule &CGM) { static llvm::FunctionCallee getEndCatchFn(CodeGenModule &CGM) { // void __cxa_end_catch(); llvm::FunctionType *FTy = - llvm::FunctionType::get(CGM.VoidTy, /*IsVarArgs=*/false); + llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false); return CGM.CreateRuntimeFunction(FTy, "__cxa_end_catch"); } @@ -3922,7 +3922,7 @@ static llvm::FunctionCallee getEndCatchFn(CodeGenModule &CGM) { static llvm::FunctionCallee getGetExceptionPtrFn(CodeGenModule &CGM) { // void *__cxa_get_exception_ptr(void*); llvm::FunctionType *FTy = llvm::FunctionType::get( - CGM.Int8PtrTy, CGM.Int8PtrTy, /*IsVarArgs=*/false); + CGM.Int8PtrTy, CGM.Int8PtrTy, /*isVarArg=*/false); return CGM.CreateRuntimeFunction(FTy, "__cxa_get_exception_ptr"); } @@ -4196,9 +4196,9 @@ void ItaniumCXXABI::emitBeginCatch(CodeGenFunction &CGF, /// This code is used only in C++. static llvm::FunctionCallee getClangCallTerminateFn(CodeGenModule &CGM) { llvm::FunctionType *fnTy = - llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*IsVarArgs=*/false); + llvm::FunctionType::get(CGM.VoidTy, CGM.Int8PtrTy, /*isVarArg=*/false); llvm::FunctionCallee fnRef = CGM.CreateRuntimeFunction( - fnTy, "__clang_call_terminate", llvm::AttributeList(), /*IsLocal=*/true); + fnTy, "__clang_call_terminate", llvm::AttributeList(), /*Local=*/true); llvm::Function *fn = cast(fnRef.getCallee()->stripPointerCasts()); if (fn->empty()) { diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp index c37bfe3a59440..a91a949d024f8 100644 --- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp @@ -352,7 +352,7 @@ class MicrosoftCXXABI : public CGCXXABI { ? llvm::GlobalValue::LinkOnceODRLinkage : llvm::GlobalValue::InternalLinkage; auto *VDispMap = new llvm::GlobalVariable( - CGM.getModule(), VDispMapTy, /*Constant=*/true, Linkage, + CGM.getModule(), VDispMapTy, /*isConstant=*/true, Linkage, /*Initializer=*/Init, MangledName); return VDispMap; } @@ -436,7 +436,7 @@ class MicrosoftCXXABI : public CGCXXABI { friend struct MSRTTIBuilder; bool isImageRelative() const { - return CGM.getTarget().getPointerWidth(/*AddressSpace=*/0) == 64; + return CGM.getTarget().getPointerWidth(/*AddrSpace=*/0) == 64; } // 5 routines for constructing the llvm types for MS RTTI structs. @@ -730,7 +730,7 @@ class MicrosoftCXXABI : public CGCXXABI { // which describes the exception. llvm::Type *Args[] = {CGM.Int8PtrTy, getThrowInfoType()->getPointerTo()}; llvm::FunctionType *FTy = - llvm::FunctionType::get(CGM.VoidTy, Args, /*IsVarArgs=*/false); + llvm::FunctionType::get(CGM.VoidTy, Args, /*isVarArg=*/false); llvm::FunctionCallee Throw = CGM.CreateRuntimeFunction(FTy, "_CxxThrowException"); // _CxxThrowException is stdcall on 32-bit x86 platforms. @@ -1313,7 +1313,7 @@ llvm::GlobalValue::LinkageTypes MicrosoftCXXABI::getCXXDestructorLinkage( // The base destructor most closely tracks the user-declared constructor, so // we delegate back to the normal declarator case. return CGM.getLLVMLinkageForDeclarator(Dtor, Linkage, - /*isConstantVariable=*/false); + /*IsConstantVariable=*/false); case Dtor_Complete: // The complete destructor is like an inline function, but it may be // imported and therefore must be exported as well. This requires changing @@ -2250,7 +2250,7 @@ static void emitGlobalDtorWithTLRegDtor(CodeGenFunction &CGF, const VarDecl &VD, // extern "C" int __tlregdtor(void (*f)(void)); llvm::FunctionType *TLRegDtorTy = llvm::FunctionType::get( - CGF.IntTy, DtorStub->getType(), /*IsVarArg=*/false); + CGF.IntTy, DtorStub->getType(), /*isVarArg=*/false); llvm::FunctionCallee TLRegDtor = CGF.CGM.CreateRuntimeFunction( TLRegDtorTy, "__tlregdtor", llvm::AttributeList(), /*Local=*/true); @@ -2291,7 +2291,7 @@ void MicrosoftCXXABI::EmitThreadLocalInitFuncs( // pointers at start-up time and, eventually, at thread-creation time. auto AddToXDU = [&CGM](llvm::Function *InitFunc) { llvm::GlobalVariable *InitFuncPtr = new llvm::GlobalVariable( - CGM.getModule(), InitFunc->getType(), /*IsConstant=*/true, + CGM.getModule(), InitFunc->getType(), /*isConstant=*/true, llvm::GlobalVariable::InternalLinkage, InitFunc, Twine(InitFunc->getName(), "$initializer$")); InitFuncPtr->setSection(".CRT$XDU"); @@ -2340,7 +2340,7 @@ static ConstantAddress getInitThreadEpochPtr(CodeGenModule &CGM) { return ConstantAddress(GV, Align); auto *GV = new llvm::GlobalVariable( CGM.getModule(), CGM.IntTy, - /*Constant=*/false, llvm::GlobalVariable::ExternalLinkage, + /*isConstant=*/false, llvm::GlobalVariable::ExternalLinkage, /*Initializer=*/nullptr, VarName, /*InsertBefore=*/nullptr, llvm::GlobalVariable::GeneralDynamicTLSModel); GV->setAlignment(Align.getQuantity()); @@ -3400,7 +3400,7 @@ static llvm::GlobalVariable *getTypeInfoVTable(CodeGenModule &CGM) { if (auto VTable = CGM.getModule().getNamedGlobal(MangledName)) return VTable; return new llvm::GlobalVariable(CGM.getModule(), CGM.Int8PtrTy, - /*Constant=*/true, + /*isConstant=*/true, llvm::GlobalVariable::ExternalLinkage, /*Initializer=*/nullptr, MangledName); } @@ -3580,7 +3580,7 @@ llvm::GlobalVariable *MSRTTIBuilder::getClassHierarchyDescriptor() { // Forward-declare the class hierarchy descriptor auto Type = ABI.getClassHierarchyDescriptorType(); - auto CHD = new llvm::GlobalVariable(Module, Type, /*Constant=*/true, Linkage, + auto CHD = new llvm::GlobalVariable(Module, Type, /*isConstant=*/true, Linkage, /*Initializer=*/nullptr, MangledName); if (CHD->isWeakForLinker()) @@ -3619,7 +3619,7 @@ MSRTTIBuilder::getBaseClassArray(SmallVectorImpl &Classes) { auto *ArrType = llvm::ArrayType::get(PtrType, Classes.size() + 1); auto *BCA = new llvm::GlobalVariable(Module, ArrType, - /*Constant=*/true, Linkage, + /*isConstant=*/true, Linkage, /*Initializer=*/nullptr, MangledName); if (BCA->isWeakForLinker()) BCA->setComdat(CGM.getModule().getOrInsertComdat(BCA->getName())); @@ -3661,7 +3661,7 @@ MSRTTIBuilder::getBaseClassDescriptor(const MSRTTIClass &Class) { // Forward-declare the base class descriptor. auto Type = ABI.getBaseClassDescriptorType(); auto BCD = - new llvm::GlobalVariable(Module, Type, /*Constant=*/true, Linkage, + new llvm::GlobalVariable(Module, Type, /*isConstant=*/true, Linkage, /*Initializer=*/nullptr, MangledName); if (BCD->isWeakForLinker()) BCD->setComdat(CGM.getModule().getOrInsertComdat(BCD->getName())); @@ -3707,7 +3707,7 @@ MSRTTIBuilder::getCompleteObjectLocator(const VPtrInfo &Info) { // Forward-declare the complete object locator. llvm::StructType *Type = ABI.getCompleteObjectLocatorType(); - auto COL = new llvm::GlobalVariable(Module, Type, /*Constant=*/true, Linkage, + auto COL = new llvm::GlobalVariable(Module, Type, /*isConstant=*/true, Linkage, /*Initializer=*/nullptr, MangledName); // Initialize the CompleteObjectLocator. @@ -3822,7 +3822,7 @@ llvm::Constant *MicrosoftCXXABI::getAddrOfRTTIDescriptor(QualType Type) { llvm::StructType *TypeDescriptorType = getTypeDescriptorType(TypeInfoString); auto *Var = new llvm::GlobalVariable( - CGM.getModule(), TypeDescriptorType, /*Constant=*/false, + CGM.getModule(), TypeDescriptorType, /*isConstant=*/false, getLinkageForRTTI(Type), llvm::ConstantStruct::get(TypeDescriptorType, Fields), MangledName); @@ -4061,7 +4061,7 @@ llvm::Constant *MicrosoftCXXABI::getCatchableType(QualType T, }; llvm::StructType *CTType = getCatchableTypeType(); auto *GV = new llvm::GlobalVariable( - CGM.getModule(), CTType, /*Constant=*/true, getLinkageForRTTI(T), + CGM.getModule(), CTType, /*isConstant=*/true, getLinkageForRTTI(T), llvm::ConstantStruct::get(CTType, Fields), MangledName); GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); GV->setSection(".xdata"); @@ -4179,7 +4179,7 @@ llvm::GlobalVariable *MicrosoftCXXABI::getCatchableTypeArray(QualType T) { getMangleContext().mangleCXXCatchableTypeArray(T, NumEntries, Out); } CTA = new llvm::GlobalVariable( - CGM.getModule(), CTAType, /*Constant=*/true, getLinkageForRTTI(T), + CGM.getModule(), CTAType, /*isConstant=*/true, getLinkageForRTTI(T), llvm::ConstantStruct::get(CTAType, Fields), MangledName); CTA->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); CTA->setSection(".xdata"); @@ -4248,7 +4248,7 @@ llvm::GlobalVariable *MicrosoftCXXABI::getThrowInfo(QualType T) { PointerToCatchableTypes // CatchableTypeArray }; auto *GV = new llvm::GlobalVariable( - CGM.getModule(), TIType, /*Constant=*/true, getLinkageForRTTI(T), + CGM.getModule(), TIType, /*isConstant=*/true, getLinkageForRTTI(T), llvm::ConstantStruct::get(TIType, Fields), StringRef(MangledName)); GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); GV->setSection(".xdata"); diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 458b9a6025bfe..5da988fb8a3c5 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -833,7 +833,7 @@ ABIArgInfo WebAssemblyABIInfo::classifyReturnType(QualType RetTy) const { Address WebAssemblyABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const { - return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect=*/ false, + return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*IsIndirect=*/ false, getContext().getTypeInfoInChars(Ty), CharUnits::fromQuantity(4), /*AllowHigherAlign=*/ true); diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 087335562d0ae..396ddf4dd8162 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4023,9 +4023,9 @@ InputInfo Driver::BuildJobsForActionNoCache( Input.claim(); if (Input.getOption().matches(options::OPT_INPUT)) { const char *Name = Input.getValue(); - return InputInfo(A, Name, /* BaseInput = */ Name); + return InputInfo(A, Name, /* _BaseInput = */ Name); } - return InputInfo(A, &Input, /* BaseInput = */ ""); + return InputInfo(A, &Input, /* _BaseInput = */ ""); } if (const BindArchAction *BAA = dyn_cast(A)) { diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 6c3074b69e9f9..edc64581172ff 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -2971,7 +2971,7 @@ static void RenderObjCOptions(const ToolChain &TC, const Driver &D, // We default off for Objective-C, on for Objective-C++. if (Args.hasFlag(options::OPT_fobjc_arc_exceptions, options::OPT_fno_objc_arc_exceptions, - /*default=*/types::isCXX(Input.getType()))) + /*Default=*/types::isCXX(Input.getType()))) CmdArgs.push_back("-fobjc-arc-exceptions"); } @@ -5702,7 +5702,7 @@ static EHFlags parseClangCLEHFlags(const Driver &D, const ArgList &Args) { // The default is that /GX is not specified. if (EHArgs.empty() && Args.hasFlag(options::OPT__SLASH_GX, options::OPT__SLASH_GX_, - /*default=*/false)) { + /*Default=*/false)) { EH.Synch = true; EH.NoUnwindC = true; } @@ -5771,13 +5771,13 @@ void Clang::AddClangCLArgs(const ArgList &Args, types::ID InputType, // This controls whether or not we emit RTTI data for polymorphic types. if (Args.hasFlag(options::OPT__SLASH_GR_, options::OPT__SLASH_GR, - /*default=*/false)) + /*Default=*/false)) CmdArgs.push_back("-fno-rtti-data"); // This controls whether or not we emit stack-protector instrumentation. // In MSVC, Buffer Security Check (/GS) is on by default. if (Args.hasFlag(options::OPT__SLASH_GS, options::OPT__SLASH_GS_, - /*default=*/true)) { + /*Default=*/true)) { CmdArgs.push_back("-stack-protector"); CmdArgs.push_back(Args.MakeArgString(Twine(LangOptions::SSPStrong))); } diff --git a/clang/lib/Driver/ToolChains/MSVC.cpp b/clang/lib/Driver/ToolChains/MSVC.cpp index 3a789627c5519..6ed80a8f47523 100644 --- a/clang/lib/Driver/ToolChains/MSVC.cpp +++ b/clang/lib/Driver/ToolChains/MSVC.cpp @@ -626,11 +626,11 @@ std::unique_ptr visualstudio::Compiler::GetCommand( // FIXME: How can we ensure this stays in sync with relevant clang-cl options? if (Args.hasFlag(options::OPT__SLASH_GR_, options::OPT__SLASH_GR, - /*default=*/false)) + /*Default=*/false)) CmdArgs.push_back("/GR-"); if (Args.hasFlag(options::OPT__SLASH_GS_, options::OPT__SLASH_GS, - /*default=*/false)) + /*Default=*/false)) CmdArgs.push_back("/GS-"); if (Arg *A = Args.getLastArg(options::OPT_ffunction_sections, diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp index 36a18de017868..3f3c80bc1ccf1 100644 --- a/clang/lib/Format/UnwrappedLineFormatter.cpp +++ b/clang/lib/Format/UnwrappedLineFormatter.cpp @@ -833,7 +833,7 @@ class NoLineBreakFormatter : public LineFormatter { LineState State = Indenter->getInitialState(FirstIndent, FirstStartColumn, &Line, DryRun); while (State.NextToken) { - formatChildren(State, /*Newline=*/false, DryRun, Penalty); + formatChildren(State, /*NewLine=*/false, DryRun, Penalty); Indenter->addTokenToState( State, /*Newline=*/State.NextToken->MustBreakBefore, DryRun); } diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp index 7d54d665146b6..e37afae5332a0 100644 --- a/clang/lib/Frontend/FrontendActions.cpp +++ b/clang/lib/Frontend/FrontendActions.cpp @@ -139,7 +139,7 @@ GeneratePCHAction::CreateOutputFile(CompilerInstance &CI, StringRef InFile, std::unique_ptr OS = CI.createOutputFile(CI.getFrontendOpts().OutputFile, /*Binary=*/true, /*RemoveFileOnSignal=*/false, InFile, - /*Extension=*/"", /*useTemporary=*/true); + /*Extension=*/"", /*UseTemporary=*/true); if (!OS) return nullptr; @@ -215,7 +215,7 @@ GenerateModuleFromModuleMapAction::CreateOutputFile(CompilerInstance &CI, // We use a temporary to avoid race conditions. return CI.createOutputFile(CI.getFrontendOpts().OutputFile, /*Binary=*/true, /*RemoveFileOnSignal=*/false, InFile, - /*Extension=*/"", /*useTemporary=*/true, + /*Extension=*/"", /*UseTemporary=*/true, /*CreateMissingDirectories=*/true); } diff --git a/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp b/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp index 170149d5053ff..bd091ee033512 100644 --- a/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp +++ b/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp @@ -2428,7 +2428,7 @@ void RewriteModernObjC::SynthMsgSendFunctionDecl() { assert(!argT.isNull() && "Can't find 'SEL' type"); ArgTys.push_back(argT); QualType msgSendType = getSimpleFunctionType(Context->getObjCIdType(), - ArgTys, /*isVariadic=*/true); + ArgTys, /*variadic=*/true); MsgSendFunctionDecl = FunctionDecl::Create(*Context, TUDecl, SourceLocation(), SourceLocation(), @@ -2442,7 +2442,7 @@ void RewriteModernObjC::SynthMsgSendSuperFunctionDecl() { SmallVector ArgTys; ArgTys.push_back(Context->VoidTy); QualType msgSendType = getSimpleFunctionType(Context->getObjCIdType(), - ArgTys, /*isVariadic=*/true); + ArgTys, /*variadic=*/true); MsgSendSuperFunctionDecl = FunctionDecl::Create(*Context, TUDecl, SourceLocation(), SourceLocation(), @@ -2461,7 +2461,7 @@ void RewriteModernObjC::SynthMsgSendStretFunctionDecl() { assert(!argT.isNull() && "Can't find 'SEL' type"); ArgTys.push_back(argT); QualType msgSendType = getSimpleFunctionType(Context->getObjCIdType(), - ArgTys, /*isVariadic=*/true); + ArgTys, /*variadic=*/true); MsgSendStretFunctionDecl = FunctionDecl::Create(*Context, TUDecl, SourceLocation(), SourceLocation(), @@ -2477,7 +2477,7 @@ void RewriteModernObjC::SynthMsgSendSuperStretFunctionDecl() { SmallVector ArgTys; ArgTys.push_back(Context->VoidTy); QualType msgSendType = getSimpleFunctionType(Context->getObjCIdType(), - ArgTys, /*isVariadic=*/true); + ArgTys, /*variadic=*/true); MsgSendSuperStretFunctionDecl = FunctionDecl::Create(*Context, TUDecl, SourceLocation(), SourceLocation(), @@ -2497,7 +2497,7 @@ void RewriteModernObjC::SynthMsgSendFpretFunctionDecl() { assert(!argT.isNull() && "Can't find 'SEL' type"); ArgTys.push_back(argT); QualType msgSendType = getSimpleFunctionType(Context->DoubleTy, - ArgTys, /*isVariadic=*/true); + ArgTys, /*variadic=*/true); MsgSendFpretFunctionDecl = FunctionDecl::Create(*Context, TUDecl, SourceLocation(), SourceLocation(), diff --git a/clang/lib/Frontend/Rewrite/RewriteObjC.cpp b/clang/lib/Frontend/Rewrite/RewriteObjC.cpp index 2ff230dfff1b5..05078baee790c 100644 --- a/clang/lib/Frontend/Rewrite/RewriteObjC.cpp +++ b/clang/lib/Frontend/Rewrite/RewriteObjC.cpp @@ -2335,7 +2335,7 @@ void RewriteObjC::SynthMsgSendFunctionDecl() { assert(!argT.isNull() && "Can't find 'SEL' type"); ArgTys.push_back(argT); QualType msgSendType = getSimpleFunctionType(Context->getObjCIdType(), - ArgTys, /*isVariadic=*/true); + ArgTys, /*variadic=*/true); MsgSendFunctionDecl = FunctionDecl::Create(*Context, TUDecl, SourceLocation(), SourceLocation(), @@ -2357,7 +2357,7 @@ void RewriteObjC::SynthMsgSendSuperFunctionDecl() { assert(!argT.isNull() && "Can't find 'SEL' type"); ArgTys.push_back(argT); QualType msgSendType = getSimpleFunctionType(Context->getObjCIdType(), - ArgTys, /*isVariadic=*/true); + ArgTys, /*variadic=*/true); MsgSendSuperFunctionDecl = FunctionDecl::Create(*Context, TUDecl, SourceLocation(), SourceLocation(), @@ -2376,7 +2376,7 @@ void RewriteObjC::SynthMsgSendStretFunctionDecl() { assert(!argT.isNull() && "Can't find 'SEL' type"); ArgTys.push_back(argT); QualType msgSendType = getSimpleFunctionType(Context->getObjCIdType(), - ArgTys, /*isVariadic=*/true); + ArgTys, /*variadic=*/true); MsgSendStretFunctionDecl = FunctionDecl::Create(*Context, TUDecl, SourceLocation(), SourceLocation(), @@ -2400,7 +2400,7 @@ void RewriteObjC::SynthMsgSendSuperStretFunctionDecl() { assert(!argT.isNull() && "Can't find 'SEL' type"); ArgTys.push_back(argT); QualType msgSendType = getSimpleFunctionType(Context->getObjCIdType(), - ArgTys, /*isVariadic=*/true); + ArgTys, /*variadic=*/true); MsgSendSuperStretFunctionDecl = FunctionDecl::Create(*Context, TUDecl, SourceLocation(), SourceLocation(), @@ -2420,7 +2420,7 @@ void RewriteObjC::SynthMsgSendFpretFunctionDecl() { assert(!argT.isNull() && "Can't find 'SEL' type"); ArgTys.push_back(argT); QualType msgSendType = getSimpleFunctionType(Context->DoubleTy, - ArgTys, /*isVariadic=*/true); + ArgTys, /*variadic=*/true); MsgSendFpretFunctionDecl = FunctionDecl::Create(*Context, TUDecl, SourceLocation(), SourceLocation(), diff --git a/clang/lib/Index/IndexDecl.cpp b/clang/lib/Index/IndexDecl.cpp index e41b5fecdd24b..5bbbb0d32bf45 100644 --- a/clang/lib/Index/IndexDecl.cpp +++ b/clang/lib/Index/IndexDecl.cpp @@ -416,7 +416,7 @@ class IndexingDeclVisitor : public ConstDeclVisitor { if (D->isThisDeclarationADefinition()) { TRY_DECL(D, IndexCtx.handleDecl(D)); TRY_TO(handleReferencedProtocols(D->getReferencedProtocols(), D, - /*superLoc=*/SourceLocation())); + /*SuperLoc=*/SourceLocation())); TRY_TO(IndexCtx.indexDeclContext(D)); } else { return IndexCtx.handleReference(D, D->getLocation(), nullptr, @@ -466,7 +466,7 @@ class IndexingDeclVisitor : public ConstDeclVisitor { CategoryLoc = D->getLocation(); TRY_TO(IndexCtx.handleDecl(D, CategoryLoc)); TRY_TO(handleReferencedProtocols(D->getReferencedProtocols(), D, - /*superLoc=*/SourceLocation())); + /*SuperLoc=*/SourceLocation())); TRY_TO(IndexCtx.indexDeclContext(D)); return true; } diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp index ca94883ebecbb..108630cc26f69 100644 --- a/clang/lib/Lex/HeaderSearch.cpp +++ b/clang/lib/Lex/HeaderSearch.cpp @@ -539,7 +539,7 @@ const FileEntry *DirectoryLookup::DoFrameworkLookup( FrameworkName.append(Filename.begin()+SlashPos+1, Filename.end()); const FileEntry *FE = FileMgr.getFile(FrameworkName, - /*openFile=*/!SuggestedModule); + /*OpenFile=*/!SuggestedModule); if (!FE) { // Check "/System/Library/Frameworks/Cocoa.framework/PrivateHeaders/file.h" const char *Private = "Private"; @@ -549,7 +549,7 @@ const FileEntry *DirectoryLookup::DoFrameworkLookup( SearchPath->insert(SearchPath->begin()+OrigSize, Private, Private+strlen(Private)); - FE = FileMgr.getFile(FrameworkName, /*openFile=*/!SuggestedModule); + FE = FileMgr.getFile(FrameworkName, /*OpenFile=*/!SuggestedModule); } // If we found the header and are allowed to suggest a module, do so now. @@ -1047,7 +1047,7 @@ LookupSubframeworkHeader(StringRef Filename, } HeadersFilename.append(Filename.begin()+SlashPos+1, Filename.end()); - if (!(FE = FileMgr.getFile(HeadersFilename, /*openFile=*/true))) { + if (!(FE = FileMgr.getFile(HeadersFilename, /*OpenFile=*/true))) { // Check ".../Frameworks/HIToolbox.framework/PrivateHeaders/HIToolbox.h" HeadersFilename = FrameworkName; HeadersFilename += "PrivateHeaders/"; @@ -1058,7 +1058,7 @@ LookupSubframeworkHeader(StringRef Filename, } HeadersFilename.append(Filename.begin()+SlashPos+1, Filename.end()); - if (!(FE = FileMgr.getFile(HeadersFilename, /*openFile=*/true))) + if (!(FE = FileMgr.getFile(HeadersFilename, /*OpenFile=*/true))) return nullptr; } diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index 311c0e02fc696..2756042f23eb2 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -1419,7 +1419,7 @@ void Preprocessor::HandleMacroPublicDirective(Token &Tok) { // Note that this macro has now been exported. appendMacroDirective(II, AllocateVisibilityMacroDirective( - MacroNameTok.getLocation(), /*IsPublic=*/true)); + MacroNameTok.getLocation(), /*isPublic=*/true)); } /// Handle a #private directive. @@ -1446,7 +1446,7 @@ void Preprocessor::HandleMacroPrivateDirective() { // Note that this macro has now been marked private. appendMacroDirective(II, AllocateVisibilityMacroDirective( - MacroNameTok.getLocation(), /*IsPublic=*/false)); + MacroNameTok.getLocation(), /*isPublic=*/false)); } //===----------------------------------------------------------------------===// @@ -1937,7 +1937,7 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport( // and making the module loader convert it back again. ModuleLoadResult Imported = TheModuleLoader.loadModule( IncludeTok.getLocation(), Path, Module::Hidden, - /*IsIncludeDirective=*/true); + /*IsInclusionDirective=*/true); assert((Imported == nullptr || Imported == SuggestedModule.getModule()) && "the imported module is different than the suggested one"); diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp index 5021ff8aac52d..687b9a9d3b7bd 100644 --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -1707,7 +1707,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { HasLexedNextToken = Tok.is(tok::string_literal); if (!FinishLexStringLiteral(Tok, WarningName, "'__has_warning'", - /*MacroExpansion=*/false)) + /*AllowMacroExpansion=*/false)) return false; // FIXME: Should we accept "-R..." flags here, or should that be diff --git a/clang/lib/Lex/Pragma.cpp b/clang/lib/Lex/Pragma.cpp index e680e8d4bbd71..4e4db668551f8 100644 --- a/clang/lib/Lex/Pragma.cpp +++ b/clang/lib/Lex/Pragma.cpp @@ -1191,7 +1191,7 @@ struct PragmaDiagnosticHandler : public PragmaHandler { std::string WarningName; if (!PP.FinishLexStringLiteral(Tok, WarningName, "pragma diagnostic", - /*MacroExpansion=*/false)) + /*AllowMacroExpansion=*/false)) return; if (Tok.isNot(tok::eod)) { @@ -1389,7 +1389,7 @@ struct PragmaExecCharsetHandler : public PragmaHandler { std::string ExecCharset; if (!PP.FinishLexStringLiteral(Tok, ExecCharset, "pragma execution_character_set", - /*MacroExpansion=*/false)) + /*AllowMacroExpansion=*/false)) return; // MSVC supports either of these, but nothing else. @@ -1490,7 +1490,7 @@ struct PragmaMessageHandler : public PragmaHandler { std::string MessageString; if (!PP.FinishLexStringLiteral(Tok, MessageString, PragmaKind(Kind), - /*MacroExpansion=*/true)) + /*AllowMacroExpansion=*/true)) return; if (ExpectClosingParen) { @@ -1540,7 +1540,7 @@ struct PragmaModuleImportHandler : public PragmaHandler { // If we have a non-empty module path, load the named module. Module *Imported = PP.getModuleLoader().loadModule(ImportLoc, ModuleName, Module::Hidden, - /*IsIncludeDirective=*/false); + /*IsInclusionDirective=*/false); if (!Imported) return; @@ -1666,7 +1666,7 @@ struct PragmaModuleLoadHandler : public PragmaHandler { // Load the module, don't make it visible. PP.getModuleLoader().loadModule(Loc, ModuleName, Module::Hidden, - /*IsIncludeDirective=*/false); + /*IsInclusionDirective=*/false); } }; diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index dd61938955699..bdc5fbcd2beab 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -1263,7 +1263,7 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) { Imported = TheModuleLoader.loadModule(ModuleImportLoc, ModuleImportPath, Module::Hidden, - /*IsIncludeDirective=*/false); + /*IsInclusionDirective=*/false); if (Imported) makeModuleVisible(Imported, SemiLoc); } diff --git a/clang/lib/Parse/ParseCXXInlineMethods.cpp b/clang/lib/Parse/ParseCXXInlineMethods.cpp index 14151efd3d2a8..a1abf8269c451 100644 --- a/clang/lib/Parse/ParseCXXInlineMethods.cpp +++ b/clang/lib/Parse/ParseCXXInlineMethods.cpp @@ -1058,7 +1058,7 @@ bool Parser::ConsumeAndStoreInitializer(CachedTokens &Toks, case CIK_DefaultArgument: bool InvalidAsDeclaration = false; Result = TryParseParameterDeclarationClause( - &InvalidAsDeclaration, /*VersusTemplateArgument=*/true); + &InvalidAsDeclaration, /*VersusTemplateArg=*/true); // If this is an expression or a declaration with a missing // 'typename', assume it's not a declaration. if (Result == TPResult::Ambiguous && InvalidAsDeclaration) diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index 0b57c8ab66345..97fd92bec7875 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -3189,7 +3189,7 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS, Actions.getTypeName(*Next.getIdentifierInfo(), Next.getLocation(), getCurScope(), &SS, false, false, nullptr, /*IsCtorOrDtorName=*/false, - /*WantNonTrivialSourceInfo=*/true, + /*WantNontrivialTypeSourceInfo=*/true, isClassTemplateDeductionContext(DSContext)); // If the referenced identifier is not a type, then this declspec is diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp index 81a56a025048c..9c61c4da447aa 100644 --- a/clang/lib/Parse/ParseDeclCXX.cpp +++ b/clang/lib/Parse/ParseDeclCXX.cpp @@ -1208,9 +1208,9 @@ TypeResult Parser::ParseBaseTypeSpecifier(SourceLocation &BaseLoc, // We have an identifier; check whether it is actually a type. IdentifierInfo *CorrectedII = nullptr; ParsedType Type = Actions.getTypeName( - *Id, IdLoc, getCurScope(), &SS, /*IsClassName=*/true, false, nullptr, + *Id, IdLoc, getCurScope(), &SS, /*isClassName=*/true, false, nullptr, /*IsCtorOrDtorName=*/false, - /*NonTrivialTypeSourceInfo=*/true, + /*WantNontrivialTypeSourceInfo=*/true, /*IsClassTemplateDeductionContext*/ false, &CorrectedII); if (!Type) { Diag(IdLoc, diag::err_expected_class_name); diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp index b5c01fe60fb04..7a0c07bd3b04e 100644 --- a/clang/lib/Parse/ParseExpr.cpp +++ b/clang/lib/Parse/ParseExpr.cpp @@ -1770,7 +1770,7 @@ Parser::ParsePostfixExpressionSuffix(ExprResult LHS) { if (Tok.is(tok::code_completion)) { tok::TokenKind CorrectedOpKind = OpKind == tok::arrow ? tok::period : tok::arrow; - ExprResult CorrectedLHS(/*IsInvalid=*/true); + ExprResult CorrectedLHS(/*Invalid=*/true); if (getLangOpts().CPlusPlus && OrigLHS) { const bool DiagsAreSuppressed = Diags.getSuppressAllDiagnostics(); Diags.setSuppressAllDiagnostics(true); @@ -2052,7 +2052,7 @@ ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() { if (isCastExpr) return Actions.ActOnUnaryExprOrTypeTraitExpr(OpTok.getLocation(), ExprKind, - /*isType=*/true, + /*IsType=*/true, CastTy.getAsOpaquePtr(), CastRange); @@ -2063,7 +2063,7 @@ ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() { if (!Operand.isInvalid()) Operand = Actions.ActOnUnaryExprOrTypeTraitExpr(OpTok.getLocation(), ExprKind, - /*isType=*/false, + /*IsType=*/false, Operand.get(), CastRange); return Operand; @@ -3055,7 +3055,7 @@ ExprResult Parser::ParseBlockLiteralExpression() { /*IsAmbiguous=*/false, /*RParenLoc=*/NoLoc, /*ArgInfo=*/nullptr, - /*NumArgs=*/0, + /*NumParams=*/0, /*EllipsisLoc=*/NoLoc, /*RParenLoc=*/NoLoc, /*RefQualifierIsLvalueRef=*/true, diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp index 9d296f18527db..85c7e6c6bcdf9 100644 --- a/clang/lib/Parse/ParseExprCXX.cpp +++ b/clang/lib/Parse/ParseExprCXX.cpp @@ -1334,10 +1334,10 @@ ExprResult Parser::ParseLambdaExpressionAfterIntroducer( SourceLocation NoLoc; D.AddTypeInfo(DeclaratorChunk::getFunction( - /*hasProto=*/true, - /*isAmbiguous=*/false, LParenLoc, ParamInfo.data(), + /*HasProto=*/true, + /*IsAmbiguous=*/false, LParenLoc, ParamInfo.data(), ParamInfo.size(), EllipsisLoc, RParenLoc, - /*RefQualifierIsLValueRef=*/true, + /*RefQualifierIsLvalueRef=*/true, /*RefQualifierLoc=*/NoLoc, MutableLoc, ESpecType, ESpecRange, DynamicExceptions.data(), DynamicExceptionRanges.data(), DynamicExceptions.size(), @@ -1394,14 +1394,14 @@ ExprResult Parser::ParseLambdaExpressionAfterIntroducer( SourceLocation NoLoc; D.AddTypeInfo(DeclaratorChunk::getFunction( - /*hasProto=*/true, - /*isAmbiguous=*/false, + /*HasProto=*/true, + /*IsAmbiguous=*/false, /*LParenLoc=*/NoLoc, /*Params=*/nullptr, /*NumParams=*/0, /*EllipsisLoc=*/NoLoc, /*RParenLoc=*/NoLoc, - /*RefQualifierIsLValueRef=*/true, + /*RefQualifierIsLvalueRef=*/true, /*RefQualifierLoc=*/NoLoc, MutableLoc, EST_None, /*ESpecRange=*/SourceRange(), /*Exceptions=*/nullptr, @@ -1701,7 +1701,7 @@ Parser::ParseCXXPseudoDestructor(Expr *Base, SourceLocation OpLoc, ParseUnqualifiedIdTemplateId(SS, SourceLocation(), Name, NameLoc, false, ObjectType, SecondTypeName, - /*AssumeTemplateName=*/true)) + /*AssumeTemplateId=*/true)) return ExprError(); return Actions.ActOnPseudoDestructorExpr(getCurScope(), Base, OpLoc, OpKind, @@ -3061,7 +3061,7 @@ void Parser::ParseDirectNewDeclarator(Declarator &D) { MaybeParseCXX11Attributes(Attrs); D.AddTypeInfo(DeclaratorChunk::getArray(0, - /*static=*/false, /*star=*/false, + /*isStatic=*/false, /*isStar=*/false, Size.get(), T.getOpenLocation(), T.getCloseLocation()), std::move(Attrs), T.getCloseLocation()); diff --git a/clang/lib/Parse/ParseObjc.cpp b/clang/lib/Parse/ParseObjc.cpp index a8d6f7fd60270..8937a0986c956 100644 --- a/clang/lib/Parse/ParseObjc.cpp +++ b/clang/lib/Parse/ParseObjc.cpp @@ -3194,15 +3194,15 @@ Parser::ParseObjCMessageExpressionBody(SourceLocation LBracLoc, if (SuperLoc.isValid()) Actions.CodeCompleteObjCSuperMessage(getCurScope(), SuperLoc, KeyIdents, - /*AtArgumentEpression=*/true); + /*AtArgumentExpression=*/true); else if (ReceiverType) Actions.CodeCompleteObjCClassMessage(getCurScope(), ReceiverType, KeyIdents, - /*AtArgumentEpression=*/true); + /*AtArgumentExpression=*/true); else Actions.CodeCompleteObjCInstanceMessage(getCurScope(), ReceiverExpr, KeyIdents, - /*AtArgumentEpression=*/true); + /*AtArgumentExpression=*/true); cutOffParsing(); return ExprError(); @@ -3232,15 +3232,15 @@ Parser::ParseObjCMessageExpressionBody(SourceLocation LBracLoc, if (SuperLoc.isValid()) Actions.CodeCompleteObjCSuperMessage(getCurScope(), SuperLoc, KeyIdents, - /*AtArgumentEpression=*/false); + /*AtArgumentExpression=*/false); else if (ReceiverType) Actions.CodeCompleteObjCClassMessage(getCurScope(), ReceiverType, KeyIdents, - /*AtArgumentEpression=*/false); + /*AtArgumentExpression=*/false); else Actions.CodeCompleteObjCInstanceMessage(getCurScope(), ReceiverExpr, KeyIdents, - /*AtArgumentEpression=*/false); + /*AtArgumentExpression=*/false); cutOffParsing(); return ExprError(); } diff --git a/clang/lib/Parse/ParsePragma.cpp b/clang/lib/Parse/ParsePragma.cpp index 5b9749c80aa6d..f81ecc738c283 100644 --- a/clang/lib/Parse/ParsePragma.cpp +++ b/clang/lib/Parse/ParsePragma.cpp @@ -2491,7 +2491,7 @@ void PragmaDetectMismatchHandler::HandlePragma(Preprocessor &PP, std::string NameString; if (!PP.LexStringLiteral(Tok, NameString, "pragma detect_mismatch", - /*MacroExpansion=*/true)) + /*AllowMacroExpansion=*/true)) return; // Read the comma followed by a second string literal. @@ -2502,7 +2502,7 @@ void PragmaDetectMismatchHandler::HandlePragma(Preprocessor &PP, } if (!PP.LexStringLiteral(Tok, ValueString, "pragma detect_mismatch", - /*MacroExpansion=*/true)) + /*AllowMacroExpansion=*/true)) return; if (Tok.isNot(tok::r_paren)) { @@ -2584,7 +2584,7 @@ void PragmaCommentHandler::HandlePragma(Preprocessor &PP, std::string ArgumentString; if (Tok.is(tok::comma) && !PP.LexStringLiteral(Tok, ArgumentString, "pragma comment", - /*MacroExpansion=*/true)) + /*AllowMacroExpansion=*/true)) return; // FIXME: warn that 'exestr' is deprecated. diff --git a/clang/lib/Parse/ParseTemplate.cpp b/clang/lib/Parse/ParseTemplate.cpp index 6ae75eda24312..9bb5b6eac37e2 100644 --- a/clang/lib/Parse/ParseTemplate.cpp +++ b/clang/lib/Parse/ParseTemplate.cpp @@ -303,7 +303,7 @@ Decl *Parser::ParseSingleDeclarationAfterTemplate( return ParseFunctionDefinition( DeclaratorInfo, ParsedTemplateInfo(&FakedParamLists, /*isSpecialization=*/true, - /*LastParamListWasEmpty=*/true), + /*lastParameterListWasEmpty=*/true), &LateParsedAttrs); } } diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp index 9ca6e4ab0e21e..85d6da700eaad 100644 --- a/clang/lib/Sema/SemaCast.cpp +++ b/clang/lib/Sema/SemaCast.cpp @@ -2044,7 +2044,7 @@ static TryCastResult TryReinterpretCast(Sema &Self, ExprResult &SrcExpr, if (!CStyle) { Self.CheckCompatibleReinterpretCast(SrcType, DestType, - /*isDereference=*/false, OpRange); + /*IsDereference=*/false, OpRange); } // C++ 5.2.10p10: [...] a reference cast reinterpret_cast(x) has the @@ -2881,7 +2881,7 @@ ExprResult Sema::BuildCStyleCastExpr(SourceLocation LPLoc, Op.OpRange = SourceRange(LPLoc, CastExpr->getEndLoc()); if (getLangOpts().CPlusPlus) { - Op.CheckCXXCStyleCast(/*FunctionalStyle=*/ false, + Op.CheckCXXCStyleCast(/*FunctionalCast=*/ false, isa(CastExpr)); } else { Op.CheckCStyleCast(); @@ -2908,7 +2908,7 @@ ExprResult Sema::BuildCXXFunctionalCastExpr(TypeSourceInfo *CastTypeInfo, Op.DestRange = CastTypeInfo->getTypeLoc().getSourceRange(); Op.OpRange = SourceRange(Op.DestRange.getBegin(), CastExpr->getEndLoc()); - Op.CheckCXXCStyleCast(/*FunctionalStyle=*/true, /*ListInit=*/false); + Op.CheckCXXCStyleCast(/*FunctionalCast=*/true, /*ListInit=*/false); if (Op.SrcExpr.isInvalid()) return ExprError(); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 1c3d12ea0983a..f9f82cdeef432 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -11391,7 +11391,7 @@ CheckImplicitConversion(Sema &S, Expr *E, QualType T, SourceLocation CC, if (Overflowed) { S.DiagRuntimeBehavior(E->getExprLoc(), E, S.PDiag(diag::warn_impcast_fixed_point_range) - << Value.toString(/*radix=*/10) << T + << Value.toString(/*Radix=*/10) << T << E->getSourceRange() << clang::SourceRange(CC)); return; diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp index 7770f345cd051..93a104e3ade5f 100644 --- a/clang/lib/Sema/SemaCodeComplete.cpp +++ b/clang/lib/Sema/SemaCodeComplete.cpp @@ -5136,7 +5136,7 @@ QualType Sema::ProduceCallSignatureHelp(Scope *S, Expr *Fn, Decls.append(UME->decls_begin(), UME->decls_end()); const bool FirstArgumentIsBase = !UME->isImplicitAccess() && UME->getBase(); AddFunctionCandidates(Decls, ArgExprs, CandidateSet, TemplateArgs, - /*SuppressUsedConversions=*/false, + /*SuppressUserConversions=*/false, /*PartialOverloading=*/true, FirstArgumentIsBase); } else { FunctionDecl *FD = nullptr; @@ -5151,7 +5151,7 @@ QualType Sema::ProduceCallSignatureHelp(Scope *S, Expr *Fn, else AddOverloadCandidate(FD, DeclAccessPair::make(FD, FD->getAccess()), Args, CandidateSet, - /*SuppressUsedConversions=*/false, + /*SuppressUserConversions=*/false, /*PartialOverloading=*/true); } else if (auto DC = NakedFn->getType()->getAsCXXRecordDecl()) { @@ -5168,7 +5168,7 @@ QualType Sema::ProduceCallSignatureHelp(Scope *S, Expr *Fn, ArgExprs.append(Args.begin(), Args.end()); AddFunctionCandidates(R.asUnresolvedSet(), ArgExprs, CandidateSet, /*ExplicitArgs=*/nullptr, - /*SuppressUsedConversions=*/false, + /*SuppressUserConversions=*/false, /*PartialOverloading=*/true); } } else { @@ -5216,14 +5216,14 @@ QualType Sema::ProduceConstructorSignatureHelp(Scope *S, QualType Type, if (auto *FD = dyn_cast(C)) { AddOverloadCandidate(FD, DeclAccessPair::make(FD, C->getAccess()), Args, CandidateSet, - /*SuppressUsedConversions=*/false, + /*SuppressUserConversions=*/false, /*PartialOverloading=*/true, /*AllowExplicit*/ true); } else if (auto *FTD = dyn_cast(C)) { AddTemplateOverloadCandidate( FTD, DeclAccessPair::make(FTD, C->getAccess()), /*ExplicitTemplateArgs=*/nullptr, Args, CandidateSet, - /*SuppressUsedConversions=*/false, + /*SuppressUserConversions=*/false, /*PartialOverloading=*/true); } } diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index ee7950de45e73..0709c926ed90e 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -716,7 +716,7 @@ void Sema::DiagnoseUnknownTypeName(IdentifierInfo *&II, getTypeName(*Corrected.getCorrectionAsIdentifierInfo(), IILoc, S, tmpSS.isSet() ? &tmpSS : SS, false, false, nullptr, /*IsCtorOrDtorName=*/false, - /*NonTrivialTypeSourceInfo=*/true); + /*WantNontrivialTypeSourceInfo=*/true); } return; } diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 802ca52371a58..725a7770d67d7 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -900,7 +900,7 @@ static void handleAllocSizeAttr(Sema &S, Decl *D, const ParsedAttr &AL) { const Expr *SizeExpr = AL.getArgAsExpr(0); int SizeArgNoVal; // Parameter indices are 1-indexed, hence Index=1 - if (!checkPositiveIntArgument(S, AL, SizeExpr, SizeArgNoVal, /*Index=*/1)) + if (!checkPositiveIntArgument(S, AL, SizeExpr, SizeArgNoVal, /*Idx=*/1)) return; if (!checkParamIsIntegerType(S, FD, AL, /*AttrArgNo=*/0)) return; @@ -911,7 +911,7 @@ static void handleAllocSizeAttr(Sema &S, Decl *D, const ParsedAttr &AL) { const Expr *NumberExpr = AL.getArgAsExpr(1); int Val; // Parameter indices are 1-based, hence Index=2 - if (!checkPositiveIntArgument(S, AL, NumberExpr, Val, /*Index=*/2)) + if (!checkPositiveIntArgument(S, AL, NumberExpr, Val, /*Idx=*/2)) return; if (!checkParamIsIntegerType(S, FD, AL, /*AttrArgNo=*/1)) return; @@ -1673,7 +1673,7 @@ void Sema::AddAllocAlignAttr(SourceRange AttrRange, Decl *D, Expr *ParamExpr, ParamIdx Idx; const auto *FuncDecl = cast(D); if (!checkFunctionOrMethodParameterIndex(*this, FuncDecl, TmpAttr, - /*AttrArgNo=*/1, ParamExpr, Idx)) + /*AttrArgNum=*/1, ParamExpr, Idx)) return; QualType Ty = getFunctionOrMethodParamType(D, Idx.getASTIndex()); diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index b23352bffbf17..cb6b4188039f2 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -11168,8 +11168,8 @@ Sema::findInheritingConstructor(SourceLocation Loc, CXXConstructorDecl *DerivedCtor = CXXConstructorDecl::Create( Context, Derived, UsingLoc, NameInfo, TInfo->getType(), TInfo, - BaseCtor->getExplicitSpecifier(), /*Inline=*/true, - /*ImplicitlyDeclared=*/true, + BaseCtor->getExplicitSpecifier(), /*isInline=*/true, + /*isImplicitlyDeclared=*/true, Constexpr ? BaseCtor->getConstexprKind() : CSK_unspecified, InheritedConstructor(Shadow, BaseCtor)); if (Shadow->isInvalidDecl()) @@ -11190,7 +11190,7 @@ Sema::findInheritingConstructor(SourceLocation Loc, Context.getTrivialTypeSourceInfo(FPT->getParamType(I), UsingLoc); ParmVarDecl *PD = ParmVarDecl::Create( Context, DerivedCtor, UsingLoc, UsingLoc, /*IdentifierInfo=*/nullptr, - FPT->getParamType(I), TInfo, SC_None, /*DefaultArg=*/nullptr); + FPT->getParamType(I), TInfo, SC_None, /*DefArg=*/nullptr); PD->setScopeInfo(0, I); PD->setImplicit(); // Ensure attributes are propagated onto parameters (this matters for @@ -11751,7 +11751,7 @@ buildSingleCopyAssignRecursively(Sema &S, SourceLocation Loc, QualType T, // Create the reference to operator=. ExprResult OpEqualRef - = S.BuildMemberReferenceExpr(To.build(S, Loc), T, Loc, /*isArrow=*/false, + = S.BuildMemberReferenceExpr(To.build(S, Loc), T, Loc, /*IsArrow=*/false, SS, /*TemplateKWLoc=*/SourceLocation(), /*FirstQualifierInScope=*/nullptr, OpLookup, diff --git a/clang/lib/Sema/SemaDeclObjC.cpp b/clang/lib/Sema/SemaDeclObjC.cpp index 21d9b8c32266d..e629837eb71d1 100644 --- a/clang/lib/Sema/SemaDeclObjC.cpp +++ b/clang/lib/Sema/SemaDeclObjC.cpp @@ -1587,7 +1587,7 @@ void Sema::actOnObjCTypeArgsOrProtocolQualifiers( // add the '*'. if (type->getAs()) { SourceLocation starLoc = getLocForEndOfToken(loc); - D.AddTypeInfo(DeclaratorChunk::getPointer(/*typeQuals=*/0, starLoc, + D.AddTypeInfo(DeclaratorChunk::getPointer(/*TypeQuals=*/0, starLoc, SourceLocation(), SourceLocation(), SourceLocation(), diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 1e49a363ab330..d8869ffe945a2 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -1083,8 +1083,8 @@ static QualType handleFloatConversion(Sema &S, ExprResult &LHS, LHSType = S.Context.FloatTy; return handleIntToFloatConversion(S, LHS, RHS, LHSType, RHSType, - /*convertFloat=*/!IsCompAssign, - /*convertInt=*/ true); + /*ConvertFloat=*/!IsCompAssign, + /*ConvertInt=*/ true); } assert(RHSFloat); return handleIntToFloatConversion(S, RHS, LHS, RHSType, LHSType, @@ -2151,7 +2151,7 @@ recoverFromMSUnqualifiedLookup(Sema &S, ASTContext &Context, return CXXDependentScopeMemberExpr::Create( Context, /*This=*/nullptr, ThisType, /*IsArrow=*/true, /*Op=*/SourceLocation(), NestedNameSpecifierLoc(), TemplateKWLoc, - /*FirstQualifierInScope=*/nullptr, NameInfo, TemplateArgs); + /*FirstQualifierFoundInScope=*/nullptr, NameInfo, TemplateArgs); } // Synthesize a fake NNS that points to the derived class. This will @@ -7308,10 +7308,10 @@ QualType Sema::CheckConditionalOperands(ExprResult &Cond, ExprResult &LHS, // GCC compatibility: soften pointer/integer mismatch. Note that // null pointers have been filtered out by this point. if (checkPointerIntegerMismatch(*this, LHS, RHS.get(), QuestionLoc, - /*isIntFirstExpr=*/true)) + /*IsIntFirstExpr=*/true)) return RHSTy; if (checkPointerIntegerMismatch(*this, RHS, LHS.get(), QuestionLoc, - /*isIntFirstExpr=*/false)) + /*IsIntFirstExpr=*/false)) return LHSTy; // Emit a better diagnostic if one of the expressions is a null pointer @@ -9105,7 +9105,7 @@ static void DiagnoseBadDivideOrRemainderValues(Sema& S, ExprResult &LHS, QualType Sema::CheckMultiplyDivideOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, bool IsCompAssign, bool IsDiv) { - checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/false); + checkArithmeticNull(*this, LHS, RHS, Loc, /*IsCompare=*/false); if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) @@ -9129,7 +9129,7 @@ QualType Sema::CheckMultiplyDivideOperands(ExprResult &LHS, ExprResult &RHS, QualType Sema::CheckRemainderOperands( ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, bool IsCompAssign) { - checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/false); + checkArithmeticNull(*this, LHS, RHS, Loc, /*IsCompare=*/false); if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) { @@ -9418,7 +9418,7 @@ static void diagnosePointerIncompatibility(Sema &S, SourceLocation Loc, QualType Sema::CheckAdditionOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc, QualType* CompLHSTy) { - checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/false); + checkArithmeticNull(*this, LHS, RHS, Loc, /*IsCompare=*/false); if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) { @@ -9512,7 +9512,7 @@ QualType Sema::CheckAdditionOperands(ExprResult &LHS, ExprResult &RHS, QualType Sema::CheckSubtractionOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, QualType* CompLHSTy) { - checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/false); + checkArithmeticNull(*this, LHS, RHS, Loc, /*IsCompare=*/false); if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType()) { @@ -9800,7 +9800,7 @@ static QualType checkVectorShift(Sema &S, ExprResult &LHS, ExprResult &RHS, QualType Sema::CheckShiftOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc, bool IsCompAssign) { - checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/false); + checkArithmeticNull(*this, LHS, RHS, Loc, /*IsCompare=*/false); // Vector shifts promote their scalar inputs to vector type. if (LHS.get()->getType()->isVectorType() || @@ -9969,7 +9969,7 @@ static bool hasIsEqualMethod(Sema &S, const Expr *LHS, const Expr *RHS) { Selector IsEqualSel = S.NSAPIObj->getIsEqualSelector(); ObjCMethodDecl *Method = S.LookupMethodInObjectType(IsEqualSel, InterfaceType, - /*instance=*/true); + /*IsInstance=*/true); if (!Method) { if (Type->isObjCIdType()) { // For 'id', just check the global pool. @@ -9978,7 +9978,7 @@ static bool hasIsEqualMethod(Sema &S, const Expr *LHS, const Expr *RHS) { } else { // Check protocols. Method = S.LookupMethodInQualifiedType(IsEqualSel, Type, - /*instance=*/true); + /*IsInstance=*/true); } } @@ -10478,7 +10478,7 @@ QualType Sema::CheckCompareOperands(ExprResult &LHS, ExprResult &RHS, return QualType(); } - checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/true); + checkArithmeticNull(*this, LHS, RHS, Loc, /*IsCompare=*/true); // Handle vector comparisons separately. if (LHS.get()->getType()->isVectorType() || @@ -10988,7 +10988,7 @@ QualType Sema::CheckVectorLogicalOperands(ExprResult &LHS, ExprResult &RHS, inline QualType Sema::CheckBitwiseOperands(ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, BinaryOperatorKind Opc) { - checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/false); + checkArithmeticNull(*this, LHS, RHS, Loc, /*IsCompare=*/false); bool IsCompAssign = Opc == BO_AndAssign || Opc == BO_OrAssign || Opc == BO_XorAssign; @@ -15626,7 +15626,7 @@ static bool captureInLambda(LambdaScopeInfo *LSI, // Add the capture. if (BuildAndDiagnose) - LSI->addCapture(Var, /*IsBlock=*/false, ByRef, RefersToCapturedVariable, + LSI->addCapture(Var, /*isBlock=*/false, ByRef, RefersToCapturedVariable, Loc, EllipsisLoc, CaptureType, Invalid); return !Invalid; diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index 3029861f2303a..df8638a013623 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -1254,7 +1254,7 @@ ExprResult Sema::ActOnCXXThis(SourceLocation Loc) { QualType ThisTy = getCurrentThisType(); if (ThisTy.isNull()) return Diag(Loc, diag::err_invalid_this_use); - return BuildCXXThisExpr(Loc, ThisTy, /*isImplicit=*/false); + return BuildCXXThisExpr(Loc, ThisTy, /*IsImplicit=*/false); } Expr *Sema::BuildCXXThisExpr(SourceLocation Loc, QualType Type, diff --git a/clang/lib/Sema/SemaExprMember.cpp b/clang/lib/Sema/SemaExprMember.cpp index 2431f96fb3f75..c856e37e99e7a 100644 --- a/clang/lib/Sema/SemaExprMember.cpp +++ b/clang/lib/Sema/SemaExprMember.cpp @@ -1106,7 +1106,7 @@ Sema::BuildMemberReferenceExpr(Expr *BaseExpr, QualType BaseExprType, SourceLocation Loc = R.getNameLoc(); if (SS.getRange().isValid()) Loc = SS.getRange().getBegin(); - BaseExpr = BuildCXXThisExpr(Loc, BaseExprType, /*isImplicit=*/true); + BaseExpr = BuildCXXThisExpr(Loc, BaseExprType, /*IsImplicit=*/true); } // Check the use of this member. @@ -1130,7 +1130,7 @@ Sema::BuildMemberReferenceExpr(Expr *BaseExpr, QualType BaseExprType, if (VarDecl *Var = dyn_cast(MemberDecl)) { return BuildMemberExpr(BaseExpr, IsArrow, OpLoc, &SS, TemplateKWLoc, Var, - FoundDecl, /*MultipleCandidates=*/false, + FoundDecl, /*HadMultipleCandidates=*/false, MemberNameInfo, Var->getType().getNonReferenceType(), VK_LValue, OK_Ordinary); } @@ -1147,14 +1147,14 @@ Sema::BuildMemberReferenceExpr(Expr *BaseExpr, QualType BaseExprType, } return BuildMemberExpr(BaseExpr, IsArrow, OpLoc, &SS, TemplateKWLoc, - MemberFn, FoundDecl, /*MultipleCandidates=*/false, + MemberFn, FoundDecl, /*HadMultipleCandidates=*/false, MemberNameInfo, type, valueKind, OK_Ordinary); } assert(!isa(MemberDecl) && "member function not C++ method?"); if (EnumConstantDecl *Enum = dyn_cast(MemberDecl)) { return BuildMemberExpr(BaseExpr, IsArrow, OpLoc, &SS, TemplateKWLoc, Enum, - FoundDecl, /*MultipleCandidates=*/false, + FoundDecl, /*HadMultipleCandidates=*/false, MemberNameInfo, Enum->getType(), VK_RValue, OK_Ordinary); } @@ -1163,7 +1163,7 @@ Sema::BuildMemberReferenceExpr(Expr *BaseExpr, QualType BaseExprType, *this, VarTempl, TemplateArgs, MemberNameInfo, TemplateKWLoc)) return BuildMemberExpr( BaseExpr, IsArrow, OpLoc, &SS, TemplateKWLoc, Var, FoundDecl, - /*MultipleCandidates=*/false, MemberNameInfo, + /*HadMultipleCandidates=*/false, MemberNameInfo, Var->getType().getNonReferenceType(), VK_LValue, OK_Ordinary); return ExprError(); } @@ -1822,7 +1822,7 @@ Sema::BuildFieldReferenceExpr(Expr *BaseExpr, bool IsArrow, return BuildMemberExpr(Base.get(), IsArrow, OpLoc, &SS, /*TemplateKWLoc=*/SourceLocation(), Field, FoundDecl, - /*MultipleCandidates=*/false, MemberNameInfo, + /*HadMultipleCandidates=*/false, MemberNameInfo, MemberType, VK, OK); } @@ -1851,7 +1851,7 @@ Sema::BuildImplicitMemberExpr(const CXXScopeSpec &SS, SourceLocation Loc = R.getNameLoc(); if (SS.getRange().isValid()) Loc = SS.getRange().getBegin(); - baseExpr = BuildCXXThisExpr(loc, ThisTy, /*isImplicit=*/true); + baseExpr = BuildCXXThisExpr(loc, ThisTy, /*IsImplicit=*/true); } return BuildMemberReferenceExpr(baseExpr, ThisTy, diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index d2a4a0979d15a..b3b34699eb64f 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -3931,7 +3931,7 @@ static void TryConstructorInitialization(Sema &S, Result = ResolveConstructorOverload(S, Kind.getLocation(), Args, CandidateSet, DestType, Ctors, Best, CopyInitialization, AllowExplicit, - /*OnlyListConstructor=*/true, + /*OnlyListConstructors=*/true, IsListInit); } @@ -4118,7 +4118,7 @@ static void TryReferenceListInitialization(Sema &S, if (Sequence) { if (DestType->isRValueReferenceType() || (T1Quals.hasConst() && !T1Quals.hasVolatile())) - Sequence.AddReferenceBindingStep(cv1T1, /*bindingTemporary=*/true); + Sequence.AddReferenceBindingStep(cv1T1, /*BindingTemporary=*/true); else Sequence.SetFailed( InitializationSequence::FK_NonConstLValueReferenceBindingToTemporary); @@ -4833,7 +4833,7 @@ static void TryReferenceInitializationCore(Sema &S, return; } - Sequence.AddReferenceBindingStep(cv1T1IgnoreAS, /*bindingTemporary=*/true); + Sequence.AddReferenceBindingStep(cv1T1IgnoreAS, /*BindingTemporary=*/true); if (T1Quals.hasAddressSpace()) { if (!Qualifiers::isAddressSpaceSupersetOf(T1Quals.getAddressSpace(), diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp index 268e15c20ef3e..986524e6d56b4 100644 --- a/clang/lib/Sema/SemaLambda.cpp +++ b/clang/lib/Sema/SemaLambda.cpp @@ -1328,7 +1328,7 @@ static void addFunctionPointerConversion(Sema &S, S.Context.getTranslationUnitDecl(), From->getBeginLoc(), From->getLocation(), From->getIdentifier(), From->getType(), From->getTypeSourceInfo(), From->getStorageClass(), - /*DefaultArg=*/nullptr)); + /*DefArg=*/nullptr)); CallOpConvTL.setParam(I, From); CallOpConvNameTL.setParam(I, From); } @@ -1376,7 +1376,7 @@ static void addFunctionPointerConversion(Sema &S, CXXMethodDecl *Invoke = CXXMethodDecl::Create( S.Context, Class, Loc, DeclarationNameInfo(InvokerName, Loc), InvokerFunctionTy, CallOperator->getTypeSourceInfo(), SC_Static, - /*IsInline=*/true, CSK_unspecified, CallOperator->getBody()->getEndLoc()); + /*isInline=*/true, CSK_unspecified, CallOperator->getBody()->getEndLoc()); for (unsigned I = 0, N = CallOperator->getNumParams(); I != N; ++I) InvokerParams[I]->setOwningFunction(Invoke); Invoke->setParams(InvokerParams); @@ -1860,7 +1860,7 @@ ExprResult Sema::BuildBlockForLambdaConversion(SourceLocation CurrentLocation, Context, Block, From->getBeginLoc(), From->getLocation(), From->getIdentifier(), From->getType(), From->getTypeSourceInfo(), From->getStorageClass(), - /*DefaultArg=*/nullptr)); + /*DefArg=*/nullptr)); } Block->setParams(BlockParams); @@ -1875,8 +1875,8 @@ ExprResult Sema::BuildBlockForLambdaConversion(SourceLocation CurrentLocation, ConvLocation, nullptr, Src->getType(), CapVarTSI, SC_None); - BlockDecl::Capture Capture(/*Variable=*/CapVar, /*ByRef=*/false, - /*Nested=*/false, /*Copy=*/Init.get()); + BlockDecl::Capture Capture(/*variable=*/CapVar, /*byRef=*/false, + /*nested=*/false, /*copy=*/Init.get()); Block->setCaptures(Context, Capture, /*CapturesCXXThis=*/false); // Add a fake function body to the block. IR generation is responsible diff --git a/clang/lib/Sema/SemaModule.cpp b/clang/lib/Sema/SemaModule.cpp index 68c2286cf492e..10de0ca91221c 100644 --- a/clang/lib/Sema/SemaModule.cpp +++ b/clang/lib/Sema/SemaModule.cpp @@ -206,7 +206,7 @@ Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc, PP.getIdentifierInfo(ModuleName), Path[0].second); Mod = getModuleLoader().loadModule(ModuleLoc, {ModuleNameLoc}, Module::AllVisible, - /*IsIncludeDirective=*/false); + /*IsInclusionDirective=*/false); if (!Mod) { Diag(ModuleLoc, diag::err_module_not_defined) << ModuleName; // Create an empty module interface unit for error recovery. @@ -323,7 +323,7 @@ DeclResult Sema::ActOnModuleImport(SourceLocation StartLoc, Module *Mod = getModuleLoader().loadModule(ImportLoc, Path, Module::AllVisible, - /*IsIncludeDirective=*/false); + /*IsInclusionDirective=*/false); if (!Mod) return true; diff --git a/clang/lib/Sema/SemaObjCProperty.cpp b/clang/lib/Sema/SemaObjCProperty.cpp index 2521441f8bc71..e5c014501431b 100644 --- a/clang/lib/Sema/SemaObjCProperty.cpp +++ b/clang/lib/Sema/SemaObjCProperty.cpp @@ -1288,7 +1288,7 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S, Ivar = ObjCIvarDecl::Create(Context, ClassImpDecl, PropertyIvarLoc,PropertyIvarLoc, PropertyIvar, - PropertyIvarType, /*Dinfo=*/nullptr, + PropertyIvarType, /*TInfo=*/nullptr, ObjCIvarDecl::Private, (Expr *)nullptr, true); if (RequireNonAbstractType(PropertyIvarLoc, diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index b669929e655f3..222d042b6da59 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -6000,7 +6000,7 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr, CollapseLoopCountExpr->EvaluateAsInt(Result, SemaRef.getASTContext())) { NestedLoopCount = Result.Val.getInt().getLimitedValue(); } else { - Built.clear(/*size=*/1); + Built.clear(/*Size=*/1); return 1; } } @@ -6022,7 +6022,7 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr, } OrderedLoopCount = Result.getLimitedValue(); } else { - Built.clear(/*size=*/1); + Built.clear(/*Size=*/1); return 1; } } diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 11595fac2cb6b..77e6767c2b814 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -1950,7 +1950,7 @@ IsTransparentUnionStandardConversion(Sema &S, Expr* From, // It's compatible if the expression matches any of the fields. for (const auto *it : UD->fields()) { if (IsStandardConversion(S, From, it->getType(), InOverloadResolution, SCS, - CStyle, /*ObjCWritebackConversion=*/false)) { + CStyle, /*AllowObjCWritebackConversion=*/false)) { ToType = it->getType(); return true; } @@ -5423,7 +5423,7 @@ static ExprResult CheckConvertedConstantExpression(Sema &S, Expr *From, : TryCopyInitialization(S, From, T, /*SuppressUserConversions=*/false, /*InOverloadResolution=*/false, - /*AllowObjcWritebackConversion=*/false, + /*AllowObjCWritebackConversion=*/false, /*AllowExplicit=*/false); StandardConversionSequence *SCS = nullptr; switch (ICS.getKind()) { @@ -7319,7 +7319,7 @@ void Sema::AddMemberOperatorCandidates(OverloadedOperatorKind Op, ++Oper) AddMethodCandidate(Oper.getPair(), Args[0]->getType(), Args[0]->Classify(Context), Args.slice(1), - CandidateSet, /*SuppressUserConversions=*/false); + CandidateSet, /*SuppressUserConversion=*/false); } } @@ -8420,7 +8420,7 @@ class BuiltinOperatorOverloadBuilder { isEqualOp ? *Ptr : S.Context.getPointerDiffType(), }; S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet, - /*IsAssigmentOperator=*/ isEqualOp); + /*IsAssignmentOperator=*/ isEqualOp); bool NeedVolatile = !(*Ptr).isVolatileQualified() && VisibleTypeConversionsQuals.hasVolatile(); @@ -8429,7 +8429,7 @@ class BuiltinOperatorOverloadBuilder { ParamTypes[0] = S.Context.getLValueReferenceType(S.Context.getVolatileType(*Ptr)); S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet, - /*IsAssigmentOperator=*/isEqualOp); + /*IsAssignmentOperator=*/isEqualOp); } if (!(*Ptr).isRestrictQualified() && @@ -8438,7 +8438,7 @@ class BuiltinOperatorOverloadBuilder { ParamTypes[0] = S.Context.getLValueReferenceType(S.Context.getRestrictType(*Ptr)); S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet, - /*IsAssigmentOperator=*/isEqualOp); + /*IsAssignmentOperator=*/isEqualOp); if (NeedVolatile) { // volatile restrict version @@ -8448,7 +8448,7 @@ class BuiltinOperatorOverloadBuilder { (Qualifiers::Volatile | Qualifiers::Restrict))); S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet, - /*IsAssigmentOperator=*/isEqualOp); + /*IsAssignmentOperator=*/isEqualOp); } } } @@ -8469,7 +8469,7 @@ class BuiltinOperatorOverloadBuilder { // non-volatile version S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet, - /*IsAssigmentOperator=*/true); + /*IsAssignmentOperator=*/true); bool NeedVolatile = !(*Ptr).isVolatileQualified() && VisibleTypeConversionsQuals.hasVolatile(); @@ -8478,7 +8478,7 @@ class BuiltinOperatorOverloadBuilder { ParamTypes[0] = S.Context.getLValueReferenceType(S.Context.getVolatileType(*Ptr)); S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet, - /*IsAssigmentOperator=*/true); + /*IsAssignmentOperator=*/true); } if (!(*Ptr).isRestrictQualified() && @@ -8487,7 +8487,7 @@ class BuiltinOperatorOverloadBuilder { ParamTypes[0] = S.Context.getLValueReferenceType(S.Context.getRestrictType(*Ptr)); S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet, - /*IsAssigmentOperator=*/true); + /*IsAssignmentOperator=*/true); if (NeedVolatile) { // volatile restrict version @@ -8497,7 +8497,7 @@ class BuiltinOperatorOverloadBuilder { (Qualifiers::Volatile | Qualifiers::Restrict))); S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet, - /*IsAssigmentOperator=*/true); + /*IsAssignmentOperator=*/true); } } } @@ -8530,14 +8530,14 @@ class BuiltinOperatorOverloadBuilder { // Add this built-in operator as a candidate (VQ is empty). ParamTypes[0] = S.Context.getLValueReferenceType(LeftBaseTy); S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet, - /*IsAssigmentOperator=*/isEqualOp); + /*IsAssignmentOperator=*/isEqualOp); // Add this built-in operator as a candidate (VQ is 'volatile'). if (VisibleTypeConversionsQuals.hasVolatile()) { ParamTypes[0] = S.Context.getVolatileType(LeftBaseTy); ParamTypes[0] = S.Context.getLValueReferenceType(ParamTypes[0]); S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet, - /*IsAssigmentOperator=*/isEqualOp); + /*IsAssignmentOperator=*/isEqualOp); } } } @@ -8556,14 +8556,14 @@ class BuiltinOperatorOverloadBuilder { // Add this built-in operator as a candidate (VQ is empty). ParamTypes[0] = S.Context.getLValueReferenceType(*Vec1); S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet, - /*IsAssigmentOperator=*/isEqualOp); + /*IsAssignmentOperator=*/isEqualOp); // Add this built-in operator as a candidate (VQ is 'volatile'). if (VisibleTypeConversionsQuals.hasVolatile()) { ParamTypes[0] = S.Context.getVolatileType(*Vec1); ParamTypes[0] = S.Context.getLValueReferenceType(ParamTypes[0]); S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet, - /*IsAssigmentOperator=*/isEqualOp); + /*IsAssignmentOperator=*/isEqualOp); } } } @@ -9012,7 +9012,7 @@ Sema::AddArgumentDependentLookupCandidates(DeclarationName Name, continue; AddOverloadCandidate(FD, FoundDecl, Args, CandidateSet, - /*SupressUserConversions=*/false, PartialOverloading, + /*SuppressUserConversions=*/false, PartialOverloading, /*AllowExplicit*/ true, /*AllowExplicitConversions*/ false, ADLCallKind::UsesADL); @@ -11763,7 +11763,7 @@ static void AddOverloadedCallCandidate(Sema &S, return; S.AddOverloadCandidate(Func, FoundDecl, Args, CandidateSet, - /*SuppressUsedConversions=*/false, + /*SuppressUserConversions=*/false, PartialOverloading); return; } @@ -11772,7 +11772,7 @@ static void AddOverloadedCallCandidate(Sema &S, = dyn_cast(Callee)) { S.AddTemplateOverloadCandidate(FuncTemplate, FoundDecl, ExplicitTemplateArgs, Args, CandidateSet, - /*SuppressUsedConversions=*/false, + /*SuppressUserConversions=*/false, PartialOverloading); return; } @@ -13066,7 +13066,7 @@ Sema::BuildCallToMemberFunction(Scope *S, Expr *MemExprE, AddMethodTemplateCandidate( cast(Func), I.getPair(), ActingDC, TemplateArgs, ObjectType, ObjectClassification, Args, CandidateSet, - /*SuppressUsedConversions=*/false); + /*SuppressUserConversions=*/false); } } @@ -13262,7 +13262,7 @@ Sema::BuildCallToObjectOfClassType(Scope *S, Expr *Obj, Oper != OperEnd; ++Oper) { AddMethodCandidate(Oper.getPair(), Object.get()->getType(), Object.get()->Classify(Context), Args, CandidateSet, - /*SuppressUserConversions=*/false); + /*SuppressUserConversion=*/false); } // C++ [over.call.object]p2: @@ -13537,7 +13537,7 @@ Sema::BuildOverloadedArrowExpr(Scope *S, Expr *Base, SourceLocation OpLoc, for (LookupResult::iterator Oper = R.begin(), OperEnd = R.end(); Oper != OperEnd; ++Oper) { AddMethodCandidate(Oper.getPair(), Base->getType(), Base->Classify(Context), - None, CandidateSet, /*SuppressUserConversions=*/false); + None, CandidateSet, /*SuppressUserConversion=*/false); } bool HadMultipleCandidates = (CandidateSet.size() > 1); @@ -13919,7 +13919,7 @@ Expr *Sema::FixOverloadedFunctionReference(Expr *E, DeclAccessPair Found, if (MemExpr->getQualifier()) Loc = MemExpr->getQualifierLoc().getBeginLoc(); Base = - BuildCXXThisExpr(Loc, MemExpr->getBaseType(), /*isImplicit=*/true); + BuildCXXThisExpr(Loc, MemExpr->getBaseType(), /*IsImplicit=*/true); } } else Base = MemExpr->getBase(); diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp index 0e5881e327a8b..480155df89901 100644 --- a/clang/lib/Sema/SemaStmt.cpp +++ b/clang/lib/Sema/SemaStmt.cpp @@ -2447,7 +2447,7 @@ StmtResult Sema::BuildCXXForRangeStmt(SourceLocation ForLoc, ExprResult SizeOfVLAExprR = ActOnUnaryExprOrTypeTraitExpr( EndVar->getLocation(), UETT_SizeOf, - /*isType=*/true, + /*IsType=*/true, CreateParsedType(VAT->desugar(), Context.getTrivialTypeSourceInfo( VAT->desugar(), RangeLoc)) .getAsOpaquePtr(), @@ -2457,7 +2457,7 @@ StmtResult Sema::BuildCXXForRangeStmt(SourceLocation ForLoc, ExprResult SizeOfEachElementExprR = ActOnUnaryExprOrTypeTraitExpr( EndVar->getLocation(), UETT_SizeOf, - /*isType=*/true, + /*IsType=*/true, CreateParsedType(VAT->desugar(), Context.getTrivialTypeSourceInfo( VAT->getElementType(), RangeLoc)) diff --git a/clang/lib/Sema/SemaStmtAsm.cpp b/clang/lib/Sema/SemaStmtAsm.cpp index ec8958c3c5f90..b123a739a7ab1 100644 --- a/clang/lib/Sema/SemaStmtAsm.cpp +++ b/clang/lib/Sema/SemaStmtAsm.cpp @@ -849,7 +849,7 @@ Sema::LookupInlineAsmVarDeclField(Expr *E, StringRef Member, return CXXDependentScopeMemberExpr::Create( Context, E, T, /*IsArrow=*/false, AsmLoc, NestedNameSpecifierLoc(), SourceLocation(), - /*FirstQualifierInScope=*/nullptr, NameInfo, /*TemplateArgs=*/nullptr); + /*FirstQualifierFoundInScope=*/nullptr, NameInfo, /*TemplateArgs=*/nullptr); } const RecordType *RT = T->getAs(); diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index fba8cd4eee063..3212281cc34d2 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -8428,8 +8428,8 @@ bool Sema::CheckFunctionTemplateSpecialization( // here that have a different target. if (LangOpts.CUDA && IdentifyCUDATarget(Specialization, - /* IgnoreImplicitHDAttributes = */ true) != - IdentifyCUDATarget(FD, /* IgnoreImplicitHDAttributes = */ true)) { + /* IgnoreImplicitHDAttr = */ true) != + IdentifyCUDATarget(FD, /* IgnoreImplicitHDAttr = */ true)) { FailedCandidates.addCandidate().set( I.getPair(), FunTmpl->getTemplatedDecl(), MakeDeductionFailureInfo(Context, TDK_CUDATargetMismatch, Info)); @@ -9587,7 +9587,7 @@ DeclResult Sema::ActOnExplicitInstantiation(Scope *S, // have a different target. if (LangOpts.CUDA && IdentifyCUDATarget(Specialization, - /* IgnoreImplicitHDAttributes = */ true) != + /* IgnoreImplicitHDAttr = */ true) != IdentifyCUDATarget(D.getDeclSpec().getAttributes())) { FailedCandidates.addCandidate().set( P.getPair(), FunTmpl->getTemplatedDecl(), diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp index 263bc3104efe8..b55a232d26c22 100644 --- a/clang/lib/Sema/SemaTemplateDeduction.cpp +++ b/clang/lib/Sema/SemaTemplateDeduction.cpp @@ -2872,7 +2872,7 @@ Sema::DeduceTemplateArguments(ClassTemplatePartialSpecializationDecl *Partial, return Sema::TDK_SubstitutionFailure; return ::FinishTemplateArgumentDeduction( - *this, Partial, /*PartialOrdering=*/false, TemplateArgs, Deduced, Info); + *this, Partial, /*IsPartialOrdering=*/false, TemplateArgs, Deduced, Info); } /// Perform template argument deduction to determine whether @@ -2913,7 +2913,7 @@ Sema::DeduceTemplateArguments(VarTemplatePartialSpecializationDecl *Partial, return Sema::TDK_SubstitutionFailure; return ::FinishTemplateArgumentDeduction( - *this, Partial, /*PartialOrdering=*/false, TemplateArgs, Deduced, Info); + *this, Partial, /*IsPartialOrdering=*/false, TemplateArgs, Deduced, Info); } /// Determine whether the given type T is a simple-template-id type. @@ -5067,7 +5067,7 @@ static bool isAtLeastAsSpecializedAs(Sema &S, QualType T1, QualType T2, Info); auto *TST1 = T1->castAs(); if (FinishTemplateArgumentDeduction( - S, P2, /*PartialOrdering=*/true, + S, P2, /*IsPartialOrdering=*/true, TemplateArgumentList(TemplateArgumentList::OnStack, TST1->template_arguments()), Deduced, Info)) diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 514cbd90d3b85..12bad62d1f1dd 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -751,7 +751,7 @@ static void maybeSynthesizeBlockSignature(TypeProcessingState &state, /*IsAmbiguous=*/false, /*LParenLoc=*/NoLoc, /*ArgInfo=*/nullptr, - /*NumArgs=*/0, + /*NumParams=*/0, /*EllipsisLoc=*/NoLoc, /*RParenLoc=*/NoLoc, /*RefQualifierIsLvalueRef=*/true, diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index c3f3a370efc21..7f2c7f09e8a3e 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -8252,7 +8252,7 @@ void ASTReader::UpdateSema() { // Update the state of pragmas. Use the same API as if we had encountered the // pragma in the source. if(OptimizeOffPragmaLocation.isValid()) - SemaObj->ActOnPragmaOptimize(/* IsOn = */ false, OptimizeOffPragmaLocation); + SemaObj->ActOnPragmaOptimize(/* On = */ false, OptimizeOffPragmaLocation); if (PragmaMSStructState != -1) SemaObj->ActOnPragmaMSStruct((PragmaMSStructKind)PragmaMSStructState); if (PointersToMembersPragmaLocation.isValid()) { diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index 51a1e81525187..afaaa543bb27f 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -2861,7 +2861,7 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { case STMT_CXX_TRY: S = CXXTryStmt::Create(Context, Empty, - /*NumHandlers=*/Record[ASTStmtReader::NumStmtFields]); + /*numHandlers=*/Record[ASTStmtReader::NumStmtFields]); break; case STMT_CXX_FOR_RANGE: diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index a22e97aaa5cd3..10946f9b0d985 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -1447,7 +1447,7 @@ ASTFileSignature ASTWriter::writeUnhashedControlBlock(Preprocessor &PP, Stream.EmitRecord(DIAGNOSTIC_OPTIONS, Record); // Write out the diagnostic/pragma mappings. - WritePragmaDiagnosticMappings(Diags, /* IsModule = */ WritingModule); + WritePragmaDiagnosticMappings(Diags, /* isModule = */ WritingModule); // Leave the options block. Stream.ExitBlock(); diff --git a/clang/lib/Serialization/GlobalModuleIndex.cpp b/clang/lib/Serialization/GlobalModuleIndex.cpp index f90eab322785a..2db8f830c46de 100644 --- a/clang/lib/Serialization/GlobalModuleIndex.cpp +++ b/clang/lib/Serialization/GlobalModuleIndex.cpp @@ -658,8 +658,8 @@ llvm::Error GlobalModuleIndexBuilder::loadModuleFile(const FileEntry *File) { // Find the imported module file. const FileEntry *DependsOnFile - = FileMgr.getFile(ImportedFile, /*openFile=*/false, - /*cacheFailure=*/false); + = FileMgr.getFile(ImportedFile, /*OpenFile=*/false, + /*CacheFailure=*/false); if (!DependsOnFile) return llvm::createStringError(std::errc::bad_file_descriptor, diff --git a/clang/lib/Serialization/ModuleManager.cpp b/clang/lib/Serialization/ModuleManager.cpp index 3e45b30e250bd..6ae0c4f575519 100644 --- a/clang/lib/Serialization/ModuleManager.cpp +++ b/clang/lib/Serialization/ModuleManager.cpp @@ -42,8 +42,8 @@ using namespace clang; using namespace serialization; ModuleFile *ModuleManager::lookupByFileName(StringRef Name) const { - const FileEntry *Entry = FileMgr.getFile(Name, /*openFile=*/false, - /*cacheFailure=*/false); + const FileEntry *Entry = FileMgr.getFile(Name, /*OpenFile=*/false, + /*CacheFailure=*/false); if (Entry) return lookup(Entry); @@ -68,8 +68,8 @@ ModuleFile *ModuleManager::lookup(const FileEntry *File) const { std::unique_ptr ModuleManager::lookupBuffer(StringRef Name) { - const FileEntry *Entry = FileMgr.getFile(Name, /*openFile=*/false, - /*cacheFailure=*/false); + const FileEntry *Entry = FileMgr.getFile(Name, /*OpenFile=*/false, + /*CacheFailure=*/false); return std::move(InMemoryBuffers[Entry]); } @@ -184,7 +184,7 @@ ModuleManager::addModule(StringRef FileName, ModuleKind Type, } else { // Get a buffer of the file and close the file descriptor when done. Buf = FileMgr.getBufferForFile(NewModule->File, - /*IsVolatile=*/false, + /*isVolatile=*/false, /*ShouldClose=*/true); } @@ -447,7 +447,7 @@ bool ModuleManager::lookupModuleFile(StringRef FileName, // Open the file immediately to ensure there is no race between stat'ing and // opening the file. - File = FileMgr.getFile(FileName, /*openFile=*/true, /*cacheFailure=*/false); + File = FileMgr.getFile(FileName, /*OpenFile=*/true, /*CacheFailure=*/false); if (!File) return false; diff --git a/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp b/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp index 916a20e559f86..3cfe4dc82a100 100644 --- a/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp @@ -144,7 +144,7 @@ static void recordFixedType(const MemRegion *Region, const CXXMethodDecl *MD, QualType Ty = Ctx.getPointerType(Ctx.getRecordType(MD->getParent())); ProgramStateRef State = C.getState(); - State = setDynamicTypeInfo(State, Region, Ty, /*CanBeSubclass=*/false); + State = setDynamicTypeInfo(State, Region, Ty, /*CanBeSubClassed=*/false); C.addTransition(State); } @@ -307,7 +307,7 @@ void DynamicTypePropagation::checkPostStmt(const CXXNewExpr *NewE, return; C.addTransition(setDynamicTypeInfo(C.getState(), MR, NewE->getType(), - /*CanBeSubclass=*/false)); + /*CanBeSubClassed=*/false)); } const ObjCObjectType * @@ -887,7 +887,7 @@ void DynamicTypePropagation::checkPostObjCMessage(const ObjCMethodCall &M, // MostSpecializedTypeArgsMap. We should only store anything in the later if // the stored data differs from the one stored in the former. State = setDynamicTypeInfo(State, RetRegion, ResultType, - /*CanBeSubclass=*/true); + /*CanBeSubClassed=*/true); Pred = C.addTransition(State); } diff --git a/clang/lib/StaticAnalyzer/Checkers/GCDAntipatternChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GCDAntipatternChecker.cpp index 0637c2b29645b..d471c23b83bfc 100644 --- a/clang/lib/StaticAnalyzer/Checkers/GCDAntipatternChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/GCDAntipatternChecker.cpp @@ -196,7 +196,7 @@ static void emitDiagnostics(const BoundNodes &Nodes, ADC->getDecl(), Checker, /*Name=*/"GCD performance anti-pattern", - /*Category=*/"Performance", + /*BugCategory=*/"Performance", OS.str(), PathDiagnosticLocation::createBegin(SW, BR.getSourceManager(), ADC), SW->getSourceRange()); diff --git a/clang/lib/StaticAnalyzer/Checkers/IdenticalExprChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/IdenticalExprChecker.cpp index d575b2fd6ecb3..cc2cfb7742270 100644 --- a/clang/lib/StaticAnalyzer/Checkers/IdenticalExprChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/IdenticalExprChecker.cpp @@ -115,7 +115,7 @@ bool FindIdenticalExprVisitor::VisitIfStmt(const IfStmt *I) { if (const CompoundStmt *CS = dyn_cast(Stmt1)) { if (!CS->body_empty()) { const IfStmt *InnerIf = dyn_cast(*CS->body_begin()); - if (InnerIf && isIdenticalStmt(AC->getASTContext(), I->getCond(), InnerIf->getCond(), /*ignoreSideEffects=*/ false)) { + if (InnerIf && isIdenticalStmt(AC->getASTContext(), I->getCond(), InnerIf->getCond(), /*IgnoreSideEffects=*/ false)) { PathDiagnosticLocation ELoc(InnerIf->getCond(), BR.getSourceManager(), AC); BR.EmitBasicReport(AC->getDecl(), Checker, "Identical conditions", categories::LogicError, diff --git a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp index 03e779f3c52d8..8d9ab1f9e4834 100644 --- a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp @@ -1208,7 +1208,7 @@ void MallocChecker::checkPostObjCMessage(const ObjCMethodCall &Call, ProgramStateRef State = FreeMemAux(C, Call.getArgExpr(0), Call.getOriginExpr(), C.getState(), /*Hold=*/true, ReleasedAllocatedMemory, - /*RetNullOnFailure=*/true); + /*ReturnsNullOnFailure=*/true); C.addTransition(State); } diff --git a/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp index b7bf9f3db3ff6..af21c84b995b4 100644 --- a/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp @@ -478,7 +478,7 @@ void NullabilityChecker::checkEvent(ImplicitNullDerefEvent Event) const { return; const MemRegion *Region = - getTrackRegion(Event.Location, /*CheckSuperregion=*/true); + getTrackRegion(Event.Location, /*CheckSuperRegion=*/true); if (!Region) return; diff --git a/clang/lib/StaticAnalyzer/Checkers/OSObjectCStyleCast.cpp b/clang/lib/StaticAnalyzer/Checkers/OSObjectCStyleCast.cpp index 27dadd09d7038..5b9895c338d81 100644 --- a/clang/lib/StaticAnalyzer/Checkers/OSObjectCStyleCast.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/OSObjectCStyleCast.cpp @@ -49,7 +49,7 @@ static void emitDiagnostics(const BoundNodes &Nodes, ADC->getDecl(), Checker, /*Name=*/"OSObject C-Style Cast", - /*Category=*/"Security", + /*BugCategory=*/"Security", OS.str(), PathDiagnosticLocation::createBegin(CE, BR.getSourceManager(), ADC), CE->getSourceRange()); diff --git a/clang/lib/StaticAnalyzer/Checkers/ObjCAutoreleaseWriteChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ObjCAutoreleaseWriteChecker.cpp index 40f82214e9495..d2371fe60d21d 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ObjCAutoreleaseWriteChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ObjCAutoreleaseWriteChecker.cpp @@ -136,7 +136,7 @@ static void emitDiagnostics(BoundNodes &Match, const Decl *D, BugReporter &BR, ADC->getDecl(), Checker, /*Name=*/(llvm::Twine(ActionMsg) + " autoreleasing out parameter inside autorelease pool").str(), - /*Category=*/"Memory", + /*BugCategory=*/"Memory", (llvm::Twine(ActionMsg) + " autoreleasing out parameter " + (IsCapture ? "'" + PVD->getName() + "'" + " " : "") + "inside " + "autorelease pool that may exit before " + Name + " returns; consider " diff --git a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.cpp index 31d2d7c125e26..4a3a8dae23a7f 100644 --- a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountChecker.cpp @@ -951,7 +951,7 @@ bool RetainCountChecker::evalCall(const CallEvent &Call, // And on the original branch assume that both input and // output are non-zero. if (auto L = RetVal.getAs()) - state = state->assume(*L, /*Assumption=*/true); + state = state->assume(*L, /*assumption=*/true); } } diff --git a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp index 927e9ae443609..796fd882ffd5e 100644 --- a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp @@ -65,7 +65,7 @@ StringRef RefCountBug::getDescription() const { RefCountBug::RefCountBug(const CheckerBase *Checker, RefCountBugType BT) : BugType(Checker, bugTypeToName(BT), categories::MemoryRefCount, - /*SupressOnSink=*/BT == LeakWithinFunction || BT == LeakAtReturn), + /*SuppressOnSink=*/BT == LeakWithinFunction || BT == LeakAtReturn), BT(BT), Checker(Checker) {} static bool isNumericLiteralExpression(const Expr *E) { diff --git a/clang/lib/StaticAnalyzer/Checkers/RunLoopAutoreleaseLeakChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/RunLoopAutoreleaseLeakChecker.cpp index e744ff9d7c9e5..5e305aa709b64 100644 --- a/clang/lib/StaticAnalyzer/Checkers/RunLoopAutoreleaseLeakChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/RunLoopAutoreleaseLeakChecker.cpp @@ -115,7 +115,7 @@ static void emitDiagnostics(BoundNodes &Match, BR.EmitBasicReport(ADC->getDecl(), Checker, /*Name=*/"Memory leak inside autorelease pool", - /*Category=*/"Memory", + /*BugCategory=*/"Memory", /*Name=*/ (Twine("Temporary objects allocated in the") + " autorelease pool " + diff --git a/clang/lib/StaticAnalyzer/Checkers/TrustNonnullChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/TrustNonnullChecker.cpp index 417b07d14be51..62a4c2ab0209c 100644 --- a/clang/lib/StaticAnalyzer/Checkers/TrustNonnullChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/TrustNonnullChecker.cpp @@ -87,7 +87,7 @@ class TrustNonnullChecker : public Checker()) - State = State->assume(*L, /*Assumption=*/true); + State = State->assume(*L, /*assumption=*/true); C.addTransition(State); } @@ -106,7 +106,7 @@ class TrustNonnullChecker : public Checker()) - State = State->assume(*L, /*Assumption=*/true); + State = State->assume(*L, /*assumption=*/true); } // Record an implication: index is non-null if the output is non-null. diff --git a/clang/lib/StaticAnalyzer/Core/AnalysisManager.cpp b/clang/lib/StaticAnalyzer/Core/AnalysisManager.cpp index 95f2b703cdd65..1b1ffff5ade82 100644 --- a/clang/lib/StaticAnalyzer/Core/AnalysisManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/AnalysisManager.cpp @@ -23,7 +23,7 @@ AnalysisManager::AnalysisManager(ASTContext &ASTCtx, DiagnosticsEngine &diags, : AnaCtxMgr( ASTCtx, Options.UnoptimizedCFG, Options.ShouldIncludeImplicitDtorsInCFG, - /*AddInitializers=*/true, + /*addInitializers=*/true, Options.ShouldIncludeTemporaryDtorsInCFG, Options.ShouldIncludeLifetimeInCFG, // Adding LoopExit elements to the CFG is a requirement for loop diff --git a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp index 6627633f39332..e5a0794f10e2c 100644 --- a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp +++ b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp @@ -1259,7 +1259,7 @@ generateEmptyDiagnosticForReport(BugReport *R, SourceManager &SM) { return llvm::make_unique( R->getBugType().getCheckName(), R->getDeclWithIssue(), R->getBugType().getName(), R->getDescription(), - R->getShortDescription(/*Fallback=*/false), BT.getCategory(), + R->getShortDescription(/*UseFallback=*/false), BT.getCategory(), R->getUniqueingLocation(), R->getUniqueingDecl(), findExecutedLines(SM, R->getErrorNode())); } diff --git a/clang/lib/StaticAnalyzer/Core/DynamicTypeMap.cpp b/clang/lib/StaticAnalyzer/Core/DynamicTypeMap.cpp index 75ae2606910ac..79424452240d7 100644 --- a/clang/lib/StaticAnalyzer/Core/DynamicTypeMap.cpp +++ b/clang/lib/StaticAnalyzer/Core/DynamicTypeMap.cpp @@ -36,7 +36,7 @@ DynamicTypeInfo getDynamicTypeInfo(ProgramStateRef State, // Otherwise, fall back to what we know about the region. if (const auto *TR = dyn_cast(Reg)) - return DynamicTypeInfo(TR->getLocationType(), /*CanBeSubclass=*/false); + return DynamicTypeInfo(TR->getLocationType(), /*CanBeSub=*/false); if (const auto *SR = dyn_cast(Reg)) { SymbolRef Sym = SR->getSymbol(); diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index 12094c6a1e6c9..1fef5b3c1edd5 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -1568,7 +1568,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred, ProgramStateRef NewState = createTemporaryRegionIfNeeded(State, LCtx, OCE->getArg(0)); if (NewState != State) { - Pred = Bldr.generateNode(OCE, Pred, NewState, /*Tag=*/nullptr, + Pred = Bldr.generateNode(OCE, Pred, NewState, /*tag=*/nullptr, ProgramPoint::PreStmtKind); // Did we cache out? if (!Pred) diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp index e00a08b2162c1..b935e3afe34b2 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp @@ -327,30 +327,30 @@ void ExprEngine::processCallExit(ExplodedNode *CEBNode) { ExplodedNodeSet DstPostPostCallCallback; getCheckerManager().runCheckersForPostCall(DstPostPostCallCallback, CEENode, *UpdatedCall, *this, - /*WasInlined=*/true); + /*wasInlined=*/true); for (auto I : DstPostPostCallCallback) { getCheckerManager().runCheckersForNewAllocator( CNE, *getObjectUnderConstruction(I->getState(), CNE, calleeCtx->getParent()), DstPostCall, I, *this, - /*WasInlined=*/true); + /*wasInlined=*/true); } } else { getCheckerManager().runCheckersForPostCall(DstPostCall, CEENode, *UpdatedCall, *this, - /*WasInlined=*/true); + /*wasInlined=*/true); } ExplodedNodeSet Dst; if (const ObjCMethodCall *Msg = dyn_cast(Call)) { getCheckerManager().runCheckersForPostObjCMessage(Dst, DstPostCall, *Msg, *this, - /*WasInlined=*/true); + /*wasInlined=*/true); } else if (CE && !(isa(CE) && // Called when visiting CXXNewExpr. AMgr.getAnalyzerOptions().MayInlineCXXAllocator)) { getCheckerManager().runCheckersForPostStmt(Dst, DstPostCall, CE, - *this, /*WasInlined=*/true); + *this, /*wasInlined=*/true); } else { Dst.insert(DstPostCall); } @@ -645,7 +645,7 @@ ProgramStateRef ExprEngine::bindReturnValue(const CallEvent &Call, ITraits.setTrait(TargetR, RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion); State = State->invalidateRegions(TargetR, E, Count, LCtx, - /* CausedByPointerEscape=*/false, nullptr, + /* CausesPointerEscape=*/false, nullptr, &Call, &ITraits); R = State->getSVal(Target.castAs(), E->getType()); diff --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp index a389619f84260..d2aea1fd92dda 100644 --- a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp +++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp @@ -1253,7 +1253,7 @@ RegionStoreManager::invalidateGlobalRegion(MemRegion::Kind K, // Bind the globals memory space to a new symbol that we will use to derive // the bindings for all globals. const GlobalsSpaceRegion *GS = MRMgr.getGlobalsRegion(K); - SVal V = svalBuilder.conjureSymbolVal(/* SymbolTag = */ (const void*) GS, Ex, LCtx, + SVal V = svalBuilder.conjureSymbolVal(/* symbolTag = */ (const void*) GS, Ex, LCtx, /* type does not matter */ Ctx.IntTy, Count); diff --git a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp index aaf29abd47309..84c52f53ca5e7 100644 --- a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp +++ b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp @@ -525,7 +525,7 @@ SVal SimpleSValBuilder::evalBinOpNN(ProgramStateRef state, case BO_Sub: if (resultTy->isIntegralOrEnumerationType()) return makeIntVal(0, resultTy); - return evalCastFromNonLoc(makeIntVal(0, /*Unsigned=*/false), resultTy); + return evalCastFromNonLoc(makeIntVal(0, /*isUnsigned=*/false), resultTy); case BO_Or: case BO_And: return evalCastFromNonLoc(lhs, resultTy); diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index d003937825bcd..bb5c0bb711b99 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -1606,7 +1606,7 @@ void LinkerDriver::link(ArrayRef argsArr) { // Handle generation of import library from a def file. if (!args.hasArg(OPT_INPUT)) { fixupExports(); - createImportLibrary(/*AsLib=*/true); + createImportLibrary(/*asLib=*/true); return; } @@ -1830,7 +1830,7 @@ void LinkerDriver::link(ArrayRef argsArr) { // need to create a .lib file. if (!config->exports.empty() || config->dll) { fixupExports(); - createImportLibrary(/*AsLib=*/false); + createImportLibrary(/*asLib=*/false); assignExportOrdinals(); } diff --git a/lld/COFF/DriverUtils.cpp b/lld/COFF/DriverUtils.cpp index 59bdaec2edd8b..edc3b3707c7b6 100644 --- a/lld/COFF/DriverUtils.cpp +++ b/lld/COFF/DriverUtils.cpp @@ -348,7 +348,7 @@ class TemporaryFile { // IsVolatileSize=true forces MemoryBuffer to not use mmap(). return CHECK(MemoryBuffer::getFile(path, /*FileSize=*/-1, /*RequiresNullTerminator=*/false, - /*IsVolatileSize=*/true), + /*IsVolatile=*/true), "could not open " + path); } diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index f4f52b4b6fa2f..17c5860a0d785 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -272,7 +272,7 @@ void LinkerDriver::addFile(StringRef path, bool withLOption) { // Add a given library by searching it from input search paths. void LinkerDriver::addLibrary(StringRef name) { if (Optional path = searchLibrary(name)) - addFile(*path, /*WithLOption=*/true); + addFile(*path, /*withLOption=*/true); else error("unable to find library -l" + name); } @@ -1118,7 +1118,7 @@ void LinkerDriver::createFiles(opt::InputArgList &args) { addLibrary(arg->getValue()); break; case OPT_INPUT: - addFile(arg->getValue(), /*WithLOption=*/false); + addFile(arg->getValue(), /*withLOption=*/false); break; case OPT_defsym: { StringRef from; @@ -1386,7 +1386,7 @@ static void replaceCommonSymbols() { bss->markDead(); inputSections.push_back(bss); s->replace(Defined{s->file, s->getName(), s->binding, s->stOther, s->type, - /*Value=*/0, s->size, bss}); + /*value=*/0, s->size, bss}); }); } @@ -1540,7 +1540,7 @@ template void LinkerDriver::compileBitcodeFiles() { for (InputFile *file : lto->compile()) { auto *obj = cast>(file); - obj->parse(/*IgnoreComdats=*/true); + obj->parse(/*ignoreComdats=*/true); for (Symbol *sym : obj->getGlobalSymbols()) sym->parseSymbolVersion(); objectFiles.push_back(file); diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index 89b178decba2a..fda2a544aed88 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -552,11 +552,11 @@ static void addDependentLibrary(StringRef specifier, const InputFile *f) { if (!config->dependentLibraries) return; if (fs::exists(specifier)) - driver->addFile(specifier, /*WithLOption=*/false); + driver->addFile(specifier, /*withLOption=*/false); else if (Optional s = findFromSearchPaths(specifier)) - driver->addFile(*s, /*WithLOption=*/true); + driver->addFile(*s, /*withLOption=*/true); else if (Optional s = searchLibraryBaseName(specifier)) - driver->addFile(*s, /*WithLOption=*/true); + driver->addFile(*s, /*withLOption=*/true); else error(toString(f) + ": unable to find library from dependent library specifier: " + diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp index e227d0bb7093c..8f0aa660145a4 100644 --- a/lld/ELF/ScriptParser.cpp +++ b/lld/ELF/ScriptParser.cpp @@ -280,26 +280,26 @@ void ScriptParser::addFile(StringRef s) { SmallString<128> pathData; StringRef path = (config->sysroot + s).toStringRef(pathData); if (sys::fs::exists(path)) { - driver->addFile(saver.save(path), /*WithLOption=*/false); + driver->addFile(saver.save(path), /*withLOption=*/false); return; } } if (s.startswith("/")) { - driver->addFile(s, /*WithLOption=*/false); + driver->addFile(s, /*withLOption=*/false); } else if (s.startswith("=")) { if (config->sysroot.empty()) - driver->addFile(s.substr(1), /*WithLOption=*/false); + driver->addFile(s.substr(1), /*withLOption=*/false); else driver->addFile(saver.save(config->sysroot + "/" + s.substr(1)), - /*WithLOption=*/false); + /*withLOption=*/false); } else if (s.startswith("-l")) { driver->addLibrary(s.substr(2)); } else if (sys::fs::exists(s)) { - driver->addFile(s, /*WithLOption=*/false); + driver->addFile(s, /*withLOption=*/false); } else { if (Optional path = findFromSearchPaths(s)) - driver->addFile(saver.save(*path), /*WithLOption=*/true); + driver->addFile(saver.save(*path), /*withLOption=*/true); else setError("unable to find " + s); } diff --git a/lld/ELF/Thunks.cpp b/lld/ELF/Thunks.cpp index 33a3c87afc105..32e935126e790 100644 --- a/lld/ELF/Thunks.cpp +++ b/lld/ELF/Thunks.cpp @@ -319,7 +319,7 @@ class PPC64PDLongBranchThunk final : public PPC64LongBranchThunk { Defined *Thunk::addSymbol(StringRef name, uint8_t type, uint64_t value, InputSectionBase §ion) { - Defined *d = addSyntheticLocal(name, type, value, /*Size=*/0, section); + Defined *d = addSyntheticLocal(name, type, value, /*size=*/0, section); syms.push_back(d); return d; } diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 173be72e7fe07..c2d050d9ec855 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1278,12 +1278,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { case Intrinsic::experimental_vector_reduce_fmin: return ConcreteTTI->getMinMaxReductionCost( Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false, - /*IsSigned=*/true); + /*IsUnsigned=*/true); case Intrinsic::experimental_vector_reduce_umax: case Intrinsic::experimental_vector_reduce_umin: return ConcreteTTI->getMinMaxReductionCost( Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false, - /*IsSigned=*/false); + /*IsUnsigned=*/false); case Intrinsic::sadd_sat: case Intrinsic::ssub_sat: { Type *CondTy = Type::getInt1Ty(RetTy->getContext()); diff --git a/llvm/lib/CodeGen/EdgeBundles.cpp b/llvm/lib/CodeGen/EdgeBundles.cpp index e073c84f30904..486720cadd270 100644 --- a/llvm/lib/CodeGen/EdgeBundles.cpp +++ b/llvm/lib/CodeGen/EdgeBundles.cpp @@ -27,7 +27,7 @@ ViewEdgeBundles("view-edge-bundles", cl::Hidden, char EdgeBundles::ID = 0; INITIALIZE_PASS(EdgeBundles, "edge-bundles", "Bundle Machine CFG Edges", - /* cfg = */true, /* analysis = */ true) + /* cfg = */true, /* is_analysis = */ true) char &llvm::EdgeBundlesID = EdgeBundles::ID; diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index c3e9c185be9a4..4d29e883d879c 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -998,7 +998,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, while (!KilledRegs.empty()) { unsigned Reg = KilledRegs.pop_back_val(); for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) { - if (!(--I)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false)) + if (!(--I)->addRegisterKilled(Reg, TRI, /* AddIfNotFound= */ false)) continue; if (TargetRegisterInfo::isVirtualRegister(Reg)) LV->getVarInfo(Reg).Kills.push_back(&*I); diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index 2235689793715..639b588766a14 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -3040,7 +3040,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(), getAnalysisIfAvailable(), MLI, - /*AfterBlockPlacement=*/true)) { + /*AfterPlacement=*/true)) { // Redo the layout if tail merging creates/removes/moves blocks. BlockToChain.clear(); ComputedEdges.clear(); diff --git a/llvm/lib/CodeGen/MachineFrameInfo.cpp b/llvm/lib/CodeGen/MachineFrameInfo.cpp index 989a6a775afa1..bae3a4333bda7 100644 --- a/llvm/lib/CodeGen/MachineFrameInfo.cpp +++ b/llvm/lib/CodeGen/MachineFrameInfo.cpp @@ -92,7 +92,7 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment); Objects.insert(Objects.begin(), StackObject(Size, Alignment, SPOffset, IsImmutable, - /*isSpillSlot=*/false, /*Alloca=*/nullptr, + /*IsSpillSlot=*/false, /*Alloca=*/nullptr, IsAliased)); return -++NumFixedObjects; } diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index 9c75d0fd81e0d..4df5ce2dcedc1 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -165,7 +165,7 @@ void MachineFunction::init() { !F.hasFnAttribute("no-realign-stack"); FrameInfo = new (Allocator) MachineFrameInfo( getFnStackAlignment(STI, F), /*StackRealignable=*/CanRealignSP, - /*ForceRealign=*/CanRealignSP && + /*ForcedRealign=*/CanRealignSP && F.hasFnAttribute(Attribute::StackAlignment)); if (F.hasFnAttribute(Attribute::StackAlignment)) diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 72c4c0d82930f..22c23ba877e88 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -781,7 +781,7 @@ bool FastISel::addStackMapLiveVars(SmallVectorImpl &Ops, unsigned Reg = getRegForValue(Val); if (!Reg) return false; - Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false)); + Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false)); } } return true; @@ -830,8 +830,8 @@ bool FastISel::selectStackmap(const CallInst *I) { const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC); for (unsigned i = 0; ScratchRegs[i]; ++i) Ops.push_back(MachineOperand::CreateReg( - ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false, - /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true)); + ScratchRegs[i], /*isDef=*/true, /*isImp=*/true, /*isKill=*/false, + /*isDead=*/false, /*isUndef=*/false, /*isEarlyClobber=*/true)); // Issue CALLSEQ_START unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); @@ -941,7 +941,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) { assert(CLI.NumResultRegs == 0 && "Unexpected result register."); CLI.ResultReg = createResultReg(TLI.getRegClassFor(MVT::i64)); CLI.NumResultRegs = 1; - Ops.push_back(MachineOperand::CreateReg(CLI.ResultReg, /*IsDef=*/true)); + Ops.push_back(MachineOperand::CreateReg(CLI.ResultReg, /*isDef=*/true)); } // Add the and constants. @@ -990,13 +990,13 @@ bool FastISel::selectPatchpoint(const CallInst *I) { unsigned Reg = getRegForValue(I->getArgOperand(i)); if (!Reg) return false; - Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false)); + Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false)); } } // Push the arguments from the call instruction. for (auto Reg : CLI.OutRegs) - Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false)); + Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false)); // Push live variables for the stack map. if (!addStackMapLiveVars(Ops, I, NumMetaOpers + NumArgs)) @@ -1010,13 +1010,13 @@ bool FastISel::selectPatchpoint(const CallInst *I) { const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC); for (unsigned i = 0; ScratchRegs[i]; ++i) Ops.push_back(MachineOperand::CreateReg( - ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false, - /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true)); + ScratchRegs[i], /*isDef=*/true, /*isImp=*/true, /*isKill=*/false, + /*isDead=*/false, /*isUndef=*/false, /*isEarlyClobber=*/true)); // Add implicit defs (return values). for (auto Reg : CLI.InRegs) - Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/true, - /*IsImpl=*/true)); + Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/true, + /*isImp=*/true)); // Insert the patchpoint instruction before the call generated by the target. MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, CLI.Call, DbgLoc, @@ -1044,9 +1044,9 @@ bool FastISel::selectXRayCustomEvent(const CallInst *I) { return true; // don't do anything to this instruction. SmallVector Ops; Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)), - /*IsDef=*/false)); + /*isDef=*/false)); Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)), - /*IsDef=*/false)); + /*isDef=*/false)); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::PATCHABLE_EVENT_CALL)); @@ -1063,11 +1063,11 @@ bool FastISel::selectXRayTypedEvent(const CallInst *I) { return true; // don't do anything to this instruction. SmallVector Ops; Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)), - /*IsDef=*/false)); + /*isDef=*/false)); Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)), - /*IsDef=*/false)); + /*isDef=*/false)); Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(2)), - /*IsDef=*/false)); + /*isDef=*/false)); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::PATCHABLE_TYPED_EVENT_CALL)); diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 8b405562904f3..8b1759246b764 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -151,7 +151,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, auto Iter = CatchObjects.find(AI); if (Iter != CatchObjects.end() && TLI->needsFixedCatchObjects()) { FrameIndex = MF->getFrameInfo().CreateFixedObject( - TySize, 0, /*Immutable=*/false, /*isAliased=*/true); + TySize, 0, /*IsImmutable=*/false, /*isAliased=*/true); MF->getFrameInfo().setObjectAlignment(FrameIndex, Align); } else { FrameIndex = diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 7c135864766fe..9b28c1a6c4501 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -1476,7 +1476,7 @@ void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType, Flags.setZExt(); for (unsigned i = 0; i < NumParts; ++i) - Outs.push_back(ISD::OutputArg(Flags, PartVT, VT, /*isFixed=*/true, 0, 0)); + Outs.push_back(ISD::OutputArg(Flags, PartVT, VT, /*isfixed=*/true, 0, 0)); } } diff --git a/llvm/lib/CodeGen/WinEHPrepare.cpp b/llvm/lib/CodeGen/WinEHPrepare.cpp index d97d8e1dec57d..cdf79374e974f 100644 --- a/llvm/lib/CodeGen/WinEHPrepare.cpp +++ b/llvm/lib/CodeGen/WinEHPrepare.cpp @@ -1224,14 +1224,14 @@ void WinEHPrepare::replaceUseWithLoad(Value *V, Use &U, AllocaInst *&SpillSlot, if (!Load) Load = new LoadInst(V->getType(), SpillSlot, Twine(V->getName(), ".wineh.reload"), - /*Volatile=*/false, IncomingBlock->getTerminator()); + /*isVolatile=*/false, IncomingBlock->getTerminator()); U.set(Load); } else { // Reload right before the old use. auto *Load = new LoadInst(V->getType(), SpillSlot, Twine(V->getName(), ".wineh.reload"), - /*Volatile=*/false, UsingInst); + /*isVolatile=*/false, UsingInst); U.set(Load); } } diff --git a/llvm/lib/IR/LegacyPassManager.cpp b/llvm/lib/IR/LegacyPassManager.cpp index eb3d2070e5bcf..c575d6e782b93 100644 --- a/llvm/lib/IR/LegacyPassManager.cpp +++ b/llvm/lib/IR/LegacyPassManager.cpp @@ -310,7 +310,7 @@ void PassManagerPrettyStackEntry::print(raw_ostream &OS) const { OS << "value"; OS << " '"; - V->printAsOperand(OS, /*PrintTy=*/false, M); + V->printAsOperand(OS, /*PrintType=*/false, M); OS << "'\n"; } diff --git a/llvm/lib/Support/APSInt.cpp b/llvm/lib/Support/APSInt.cpp index d2dc860f7384f..7c48880f96eac 100644 --- a/llvm/lib/Support/APSInt.cpp +++ b/llvm/lib/Support/APSInt.cpp @@ -22,18 +22,18 @@ APSInt::APSInt(StringRef Str) { // (Over-)estimate the required number of bits. unsigned NumBits = ((Str.size() * 64) / 19) + 2; - APInt Tmp(NumBits, Str, /*Radix=*/10); + APInt Tmp(NumBits, Str, /*radix=*/10); if (Str[0] == '-') { unsigned MinBits = Tmp.getMinSignedBits(); if (MinBits > 0 && MinBits < NumBits) Tmp = Tmp.trunc(MinBits); - *this = APSInt(Tmp, /*IsUnsigned=*/false); + *this = APSInt(Tmp, /*isUnsigned=*/false); return; } unsigned ActiveBits = Tmp.getActiveBits(); if (ActiveBits > 0 && ActiveBits < NumBits) Tmp = Tmp.trunc(ActiveBits); - *this = APSInt(Tmp, /*IsUnsigned=*/true); + *this = APSInt(Tmp, /*isUnsigned=*/true); } void APSInt::Profile(FoldingSetNodeID& ID) const { diff --git a/llvm/lib/Support/LowLevelType.cpp b/llvm/lib/Support/LowLevelType.cpp index cffcfff7c735e..fe77cb3db4139 100644 --- a/llvm/lib/Support/LowLevelType.cpp +++ b/llvm/lib/Support/LowLevelType.cpp @@ -17,14 +17,14 @@ using namespace llvm; LLT::LLT(MVT VT) { if (VT.isVector()) { - init(/*isPointer=*/false, VT.getVectorNumElements() > 1, + init(/*IsPointer=*/false, VT.getVectorNumElements() > 1, VT.getVectorNumElements(), VT.getVectorElementType().getSizeInBits(), /*AddressSpace=*/0); } else if (VT.isValid()) { // Aggregates are no different from real scalars as far as GlobalISel is // concerned. assert(VT.getSizeInBits() != 0 && "invalid zero-sized type"); - init(/*isPointer=*/false, /*isVector=*/false, /*NumElements=*/0, + init(/*IsPointer=*/false, /*IsVector=*/false, /*NumElements=*/0, VT.getSizeInBits(), /*AddressSpace=*/0); } else { IsPointer = false; diff --git a/llvm/lib/Support/raw_ostream.cpp b/llvm/lib/Support/raw_ostream.cpp index 4124121b86b1f..2baccaa0cbd7a 100644 --- a/llvm/lib/Support/raw_ostream.cpp +++ b/llvm/lib/Support/raw_ostream.cpp @@ -612,7 +612,7 @@ raw_fd_ostream::~raw_fd_ostream() { // destructing raw_ostream objects which may have errors. if (has_error()) report_fatal_error("IO failure on output stream: " + error().message(), - /*GenCrashDiag=*/false); + /*gen_crash_diag=*/false); } #if defined(_WIN32) diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp index 911946111791b..8dc2768b95976 100644 --- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp +++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp @@ -2365,7 +2365,7 @@ bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { AArch64::sub_32); if ((BW < 32) && !IsBitTest) - SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*IsZExt=*/true); + SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true); // Emit the combined compare and branch instruction. SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); @@ -4272,7 +4272,7 @@ unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, const TargetRegisterClass *RC = (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; if (NeedTrunc) { - Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*IsZExt=*/false); + Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false); Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); Op0IsKill = Op1IsKill = true; } @@ -4952,7 +4952,7 @@ std::pair AArch64FastISel::getRegForGEPIndex(const Value *Idx) { MVT PtrVT = TLI.getPointerTy(DL); EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); if (IdxVT.bitsLT(PtrVT)) { - IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false); + IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false); IdxNIsKill = true; } else if (IdxVT.bitsGT(PtrVT)) llvm_unreachable("AArch64 FastISel doesn't support types larger than i64"); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp index 50871e1a0f150..f7231471c1077 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp @@ -119,11 +119,11 @@ bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) { auto T = ArrayType::get(Type::getInt64Ty(C), 2); auto *GV = new GlobalVariable( M, T, - /*IsConstant=*/false, GlobalValue::ExternalLinkage, + /*isConstant=*/false, GlobalValue::ExternalLinkage, /*Initializer=*/Constant::getNullValue(T), RuntimeHandle, /*InsertBefore=*/nullptr, GlobalValue::NotThreadLocal, AMDGPUAS::GLOBAL_ADDRESS, - /*IsExternallyInitialized=*/false); + /*isExternallyInitialized=*/false); LLVM_DEBUG(dbgs() << "runtime handle created: " << *GV << '\n'); for (auto U : F.users()) { diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 4eb8e0738a900..09b78115f2e3c 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -2259,7 +2259,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, unsigned TargetFlags = GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG; - Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0, + Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*offset=*/0, TargetFlags); if (GV->hasDLLImportStorageClass()) Callee = @@ -2914,7 +2914,7 @@ SDValue ARMTargetLowering::LowerConstantPool(SDValue Op, auto M = const_cast(DAG.getMachineFunction(). getFunction().getParent()); auto GV = new GlobalVariable( - *M, T, /*isConst=*/true, GlobalVariable::InternalLinkage, C, + *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C, Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" + Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" + Twine(AFI->createPICLabelUId()) @@ -3467,7 +3467,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op, // FIXME: Once remat is capable of dealing with instructions with register // operands, expand this into two nodes. Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, - DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0, + DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*offset=*/0, TargetFlags)); if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB)) Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, diff --git a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp index d983ef2fc4f10..4313fa5a82b54 100644 --- a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp +++ b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp @@ -34,7 +34,7 @@ class LanaiELFObjectWriter : public MCELFObjectTargetWriter { LanaiELFObjectWriter::LanaiELFObjectWriter(uint8_t OSABI) : MCELFObjectTargetWriter(/*Is64Bit_=*/false, OSABI, ELF::EM_LANAI, - /*HasRelocationAddend=*/true) {} + /*HasRelocationAddend_=*/true) {} unsigned LanaiELFObjectWriter::getRelocType(MCContext & /*Ctx*/, const MCValue & /*Target*/, diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp index 13bd7ee4be956..8d8ba5644e103 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp @@ -36,8 +36,8 @@ class SystemZObjectWriter : public MCELFObjectTargetWriter { } // end anonymous namespace SystemZObjectWriter::SystemZObjectWriter(uint8_t OSABI) - : MCELFObjectTargetWriter(/*Is64Bit=*/true, OSABI, ELF::EM_S390, - /*HasRelocationAddend=*/ true) {} + : MCELFObjectTargetWriter(/*Is64Bit_=*/true, OSABI, ELF::EM_S390, + /*HasRelocationAddend_=*/ true) {} // Return the relocation type for an absolute value of MCFixupKind Kind. static unsigned getAbsoluteReloc(unsigned Kind) { diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp index a439b724d9674..b5d4d369b7265 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp @@ -194,7 +194,7 @@ static std::string toString(const APFloat &FP) { static const size_t BufBytes = 128; char Buf[BufBytes]; auto Written = FP.convertToHexString( - Buf, /*hexDigits=*/0, /*upperCase=*/false, APFloat::rmNearestTiesToEven); + Buf, /*HexDigits=*/0, /*UpperCase=*/false, APFloat::rmNearestTiesToEven); (void)Written; assert(Written != 0); assert(Written < BufBytes); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp index 1a24f749b5644..312b203859d51 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -115,7 +115,7 @@ class WebAssemblyFastISel final : public FastISel { private: // Utility helper routines MVT::SimpleValueType getSimpleType(Type *Ty) { - EVT VT = TLI.getValueType(DL, Ty, /*HandleUnknown=*/true); + EVT VT = TLI.getValueType(DL, Ty, /*AllowUnknown=*/true); return VT.isSimple() ? VT.getSimpleVT().SimpleTy : MVT::INVALID_SIMPLE_VALUE_TYPE; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp index 0bfebc32a820e..ea9cfc00adfdb 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp @@ -81,7 +81,7 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex( if (static_cast(Offset) <= std::numeric_limits::max()) { MI.getOperand(OffsetOperandNum).setImm(Offset); MI.getOperand(FIOperandNum) - .ChangeToRegister(FrameRegister, /*IsDef=*/false); + .ChangeToRegister(FrameRegister, /*isDef=*/false); return; } } @@ -102,7 +102,7 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex( MachineOperand &ImmMO = Def->getOperand(1); ImmMO.setImm(ImmMO.getImm() + uint32_t(FrameOffset)); MI.getOperand(FIOperandNum) - .ChangeToRegister(FrameRegister, /*IsDef=*/false); + .ChangeToRegister(FrameRegister, /*isDef=*/false); return; } } @@ -127,7 +127,7 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex( .addReg(FrameRegister) .addReg(OffsetOp); } - MI.getOperand(FIOperandNum).ChangeToRegister(FIRegOperand, /*IsDef=*/false); + MI.getOperand(FIOperandNum).ChangeToRegister(FIRegOperand, /*isDef=*/false); } Register diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index 31cd83d942096..7b9ce02712053 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -289,7 +289,7 @@ bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I, } bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) { - EVT evt = TLI.getValueType(DL, Ty, /*HandleUnknown=*/true); + EVT evt = TLI.getValueType(DL, Ty, /*AllowUnknown=*/true); if (evt == MVT::Other || !evt.isSimple()) // Unhandled type. Halt "fast" selection and bail. return false; diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index f50e91fdabe9d..e310fe0691171 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -3170,7 +3170,7 @@ void X86FrameLowering::processFunctionBeforeFrameFinalized( MinFixedObjOffset -= std::abs(MinFixedObjOffset) % 8; int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize; int UnwindHelpFI = - MFI.CreateFixedObject(SlotSize, UnwindHelpOffset, /*Immutable=*/false); + MFI.CreateFixedObject(SlotSize, UnwindHelpOffset, /*IsImmutable=*/false); EHInfo.UnwindHelpFrameIdx = UnwindHelpFI; // Store -2 into UnwindHelp on function entry. We have to scan forwards past diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 34a85806f563e..23926ca80527d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3021,7 +3021,7 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, // load from our portion of it. This assumes that if the first part of an // argument is in memory, the rest will also be in memory. int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(), - /*Immutable=*/false); + /*IsImmutable=*/false); PartAddr = DAG.getFrameIndex(FI, PtrVT); return DAG.getLoad( ValVT, dl, Chain, PartAddr, @@ -23719,7 +23719,7 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { // Set up a frame object for the return address. unsigned SlotSize = RegInfo->getSlotSize(); FrameAddrIndex = MF.getFrameInfo().CreateFixedObject( - SlotSize, /*Offset=*/0, /*IsImmutable=*/false); + SlotSize, /*SPOffset=*/0, /*IsImmutable=*/false); FuncInfo->setFAIndex(FrameAddrIndex); } return DAG.getFrameIndex(FrameAddrIndex, VT); diff --git a/llvm/lib/Target/X86/X86WinAllocaExpander.cpp b/llvm/lib/Target/X86/X86WinAllocaExpander.cpp index 22d168c1e391c..9e499db1d7ee7 100644 --- a/llvm/lib/Target/X86/X86WinAllocaExpander.cpp +++ b/llvm/lib/Target/X86/X86WinAllocaExpander.cpp @@ -250,7 +250,7 @@ void X86WinAllocaExpander::lower(MachineInstr* MI, Lowering L) { // Do the probe. STI->getFrameLowering()->emitStackProbe(*MBB->getParent(), *MBB, MI, DL, - /*InPrologue=*/false); + /*InProlog=*/false); } else { // Sub BuildMI(*MBB, I, DL, diff --git a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp index e6f501e5d196c..692697d6f32e8 100644 --- a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp @@ -113,7 +113,7 @@ void Lowerer::lowerCoroNoop(IntrinsicInst *II) { StructType *FrameTy = StructType::create(C, "NoopCoro.Frame"); auto *FramePtrTy = FrameTy->getPointerTo(); auto *FnTy = FunctionType::get(Type::getVoidTy(C), FramePtrTy, - /*IsVarArgs=*/false); + /*isVarArg=*/false); auto *FnPtrTy = FnTy->getPointerTo(); FrameTy->setBody({FnPtrTy, FnPtrTy}); diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index 174430da171f6..58bf22bee29b4 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -378,7 +378,7 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, StructType *FrameTy = StructType::create(C, Name); auto *FramePtrTy = FrameTy->getPointerTo(); auto *FnTy = FunctionType::get(Type::getVoidTy(C), FramePtrTy, - /*IsVarArgs=*/false); + /*isVarArg=*/false); auto *FnPtrTy = FnTy->getPointerTo(); // Figure out how wide should be an integer type storing the suspend index. diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp index 8afb2f0ff115e..5458e70ff16ad 100644 --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -866,7 +866,7 @@ static void createDevirtTriggerFunc(CallGraph &CG, CallGraphSCC &SCC) { LLVMContext &C = M.getContext(); auto *FnTy = FunctionType::get(Type::getVoidTy(C), Type::getInt8PtrTy(C), - /*IsVarArgs=*/false); + /*isVarArg=*/false); Function *DevirtFn = Function::Create(FnTy, GlobalValue::LinkageTypes::PrivateLinkage, CORO_DEVIRT_TRIGGER_FN, &M); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 3a8ec1ecd38b9..2b9859b602f49 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -967,7 +967,7 @@ static Value *foldSignedTruncationCheck(ICmpInst *ICmp0, ICmpInst *ICmp1, // Can it be decomposed into icmp eq (X & Mask), 0 ? if (llvm::decomposeBitTestICmp(ICmp->getOperand(0), ICmp->getOperand(1), Pred, X, UnsetBitsMask, - /*LookThruTrunc=*/false) && + /*LookThroughTrunc=*/false) && Pred == ICmpInst::ICMP_EQ) return true; // Is it icmp eq (X & Mask), 0 already? diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 73172877d9ecc..cc753ce05313e 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -624,7 +624,7 @@ static bool isMultiple(const APInt &C1, const APInt &C2, APInt &Quotient, if (IsSigned && C1.isMinSignedValue() && C2.isAllOnesValue()) return false; - APInt Remainder(C1.getBitWidth(), /*Val=*/0ULL, IsSigned); + APInt Remainder(C1.getBitWidth(), /*val=*/0ULL, IsSigned); if (IsSigned) APInt::sdivrem(C1, C2, Quotient, Remainder); else @@ -661,7 +661,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { // (X / C1) / C2 -> X / (C1*C2) if ((IsSigned && match(Op0, m_SDiv(m_Value(X), m_APInt(C1)))) || (!IsSigned && match(Op0, m_UDiv(m_Value(X), m_APInt(C1))))) { - APInt Product(C1->getBitWidth(), /*Val=*/0ULL, IsSigned); + APInt Product(C1->getBitWidth(), /*val=*/0ULL, IsSigned); if (!multiplyOverflows(*C1, *C2, Product, IsSigned)) return BinaryOperator::Create(I.getOpcode(), X, ConstantInt::get(Ty, Product)); @@ -669,7 +669,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { if ((IsSigned && match(Op0, m_NSWMul(m_Value(X), m_APInt(C1)))) || (!IsSigned && match(Op0, m_NUWMul(m_Value(X), m_APInt(C1))))) { - APInt Quotient(C1->getBitWidth(), /*Val=*/0ULL, IsSigned); + APInt Quotient(C1->getBitWidth(), /*val=*/0ULL, IsSigned); // (X * C1) / C2 -> X / (C2 / C1) if C2 is a multiple of C1. if (isMultiple(*C2, *C1, Quotient, IsSigned)) { @@ -693,7 +693,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { if ((IsSigned && match(Op0, m_NSWShl(m_Value(X), m_APInt(C1))) && *C1 != C1->getBitWidth() - 1) || (!IsSigned && match(Op0, m_NUWShl(m_Value(X), m_APInt(C1))))) { - APInt Quotient(C1->getBitWidth(), /*Val=*/0ULL, IsSigned); + APInt Quotient(C1->getBitWidth(), /*val=*/0ULL, IsSigned); APInt C1Shifted = APInt::getOneBitSet( C1->getBitWidth(), static_cast(C1->getLimitedValue())); diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index 450ae2f79026e..22e8b4ee2e298 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -354,7 +354,7 @@ void HWAddressSanitizer::initializeModule(Module &M) { if (!TargetTriple.isAndroid()) { Constant *C = M.getOrInsertGlobal("__hwasan_tls", IntptrTy, [&] { - auto *GV = new GlobalVariable(M, IntptrTy, /*isConstantGlobal=*/false, + auto *GV = new GlobalVariable(M, IntptrTy, /*isConstant=*/false, GlobalValue::ExternalLinkage, nullptr, "__hwasan_tls", nullptr, GlobalVariable::InitialExecTLSModel); diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 8ec071536ec2d..89497177524fb 100644 --- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -541,7 +541,7 @@ static bool processUDivOrURem(BinaryOperator *Instr, LazyValueInfo *LVI) { // Find the smallest power of two bitwidth that's sufficient to hold Instr's // operands. auto OrigWidth = Instr->getType()->getIntegerBitWidth(); - ConstantRange OperandRange(OrigWidth, /*isFullset=*/false); + ConstantRange OperandRange(OrigWidth, /*isFullSet=*/false); for (Value *Operand : Instr->operands()) { OperandRange = OperandRange.unionWith( LVI->getConstantRange(Operand, Instr->getParent())); diff --git a/llvm/lib/Transforms/Scalar/Float2Int.cpp b/llvm/lib/Transforms/Scalar/Float2Int.cpp index a0935efa264be..4f83e869b3032 100644 --- a/llvm/lib/Transforms/Scalar/Float2Int.cpp +++ b/llvm/lib/Transforms/Scalar/Float2Int.cpp @@ -436,7 +436,7 @@ Value *Float2IntPass::convert(Instruction *I, Type *ToTy) { } else if (Instruction *VI = dyn_cast(V)) { NewOperands.push_back(convert(VI, ToTy)); } else if (ConstantFP *CF = dyn_cast(V)) { - APSInt Val(ToTy->getPrimitiveSizeInBits(), /*IsUnsigned=*/false); + APSInt Val(ToTy->getPrimitiveSizeInBits(), /*isUnsigned=*/false); bool Exact; CF->getValueAPF().convertToInteger(Val, APFloat::rmNearestTiesToEven, diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 6590f1d387f6a..59a387a186b83 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -3125,7 +3125,7 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, MemAccessTy AccessTy = getAccessType(TTI, UserInst, Operand); int64_t IncOffset = IncConst->getValue()->getSExtValue(); if (!isAlwaysFoldable(TTI, LSRUse::Address, AccessTy, /*BaseGV=*/nullptr, - IncOffset, /*HaseBaseReg=*/false)) + IncOffset, /*HasBaseReg=*/false)) return false; return true; diff --git a/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/llvm/lib/Transforms/Utils/LowerSwitch.cpp index 8062fe4990832..8256e3b5f5afd 100644 --- a/llvm/lib/Transforms/Utils/LowerSwitch.cpp +++ b/llvm/lib/Transforms/Utils/LowerSwitch.cpp @@ -494,7 +494,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI, KnownBits Known = computeKnownBits(Val, DL, /*Depth=*/0, AC, SI); // TODO Shouldn't this create a signed range? ConstantRange KnownBitsRange = - ConstantRange::fromKnownBits(Known, /*ForSigned=*/false); + ConstantRange::fromKnownBits(Known, /*IsSigned=*/false); const ConstantRange LVIRange = LVI->getConstantRange(Val, OrigBlock, SI); ConstantRange ValRange = KnownBitsRange.intersectWith(LVIRange); // We delegate removal of unreachable non-default cases to other passes. In diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 3e301d7c548eb..11651d040dc0d 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -5025,7 +5025,7 @@ SwitchLookupTable::SwitchLookupTable( ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize); Constant *Initializer = ConstantArray::get(ArrayTy, TableContents); - Array = new GlobalVariable(M, ArrayTy, /*constant=*/true, + Array = new GlobalVariable(M, ArrayTy, /*isConstant=*/true, GlobalVariable::PrivateLinkage, Initializer, "switch.table." + FuncName); Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); From 1ffceaa54361af7120c87656bd949880d5426355 Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Tue, 16 Jul 2019 04:56:43 +0000 Subject: [PATCH 209/451] [RISCV] Match GNU tools canonical JALR and add aliases The canonical GNU form of JALR resembles a load/store instruction rather than placing the immediate offset as a separate argument, so match this behaviour. Also add parser-only aliases for the three-operand form, and add other shorter aliases also emitted by GNU tools. Differential Revision: https://reviews.llvm.org/D55277 Patch by James Clarke. llvm-svn: 366179 --- llvm/lib/Target/RISCV/RISCVInstrInfo.td | 25 ++++++++++++----- llvm/test/CodeGen/RISCV/branch-relaxation.ll | 3 ++- llvm/test/CodeGen/RISCV/indirectbr.ll | 2 +- llvm/test/CodeGen/RISCV/option-rvc.ll | 2 +- llvm/test/MC/RISCV/compress-rv32i.s | 4 +-- llvm/test/MC/RISCV/fixups.s | 6 ++--- llvm/test/MC/RISCV/rv32e-valid.s | 2 +- llvm/test/MC/RISCV/rv32i-valid.s | 24 +++++++---------- llvm/test/MC/RISCV/rvi-aliases-valid.s | 28 ++++++++++++++++---- 9 files changed, 60 insertions(+), 36 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index b017307b46173..69bde15f12187 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -357,7 +357,7 @@ def JAL : RVInstJ; + "jalr", "$rd, ${imm12}(${rs1})">; } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 def BEQ : BranchCC_rri<0b000, "beq">; @@ -597,12 +597,23 @@ def : InstAlias<"bgtu $rs, $rt, $offset", def : InstAlias<"bleu $rs, $rt, $offset", (BGEU GPR:$rt, GPR:$rs, simm13_lsb0:$offset), 0>; -// "ret" has more weight since "ret" and "jr" alias the same "jalr" instruction. -def : InstAlias<"j $offset", (JAL X0, simm21_lsb0_jal:$offset)>; -def : InstAlias<"jal $offset", (JAL X1, simm21_lsb0_jal:$offset)>; -def : InstAlias<"jr $rs", (JALR X0, GPR:$rs, 0)>; -def : InstAlias<"jalr $rs", (JALR X1, GPR:$rs, 0)>; -def : InstAlias<"ret", (JALR X0, X1, 0), 2>; +def : InstAlias<"j $offset", (JAL X0, simm21_lsb0_jal:$offset)>; +def : InstAlias<"jal $offset", (JAL X1, simm21_lsb0_jal:$offset)>; + +// Non-zero offset aliases of "jalr" are the lowest weight, followed by the +// two-register form, then the one-register forms and finally "ret". +def : InstAlias<"jr $rs", (JALR X0, GPR:$rs, 0), 3>; +def : InstAlias<"jr ${offset}(${rs})", (JALR X0, GPR:$rs, simm12:$offset)>; +def : InstAlias<"jalr $rs", (JALR X1, GPR:$rs, 0), 3>; +def : InstAlias<"jalr ${offset}(${rs})", (JALR X1, GPR:$rs, simm12:$offset)>; +def : InstAlias<"jalr $rd, $rs", (JALR GPR:$rd, GPR:$rs, 0), 2>; +def : InstAlias<"ret", (JALR X0, X1, 0), 4>; + +// Non-canonical forms for jump targets also accepted by the assembler. +def : InstAlias<"jr $rs, $offset", (JALR X0, GPR:$rs, simm12:$offset), 0>; +def : InstAlias<"jalr $rs, $offset", (JALR X1, GPR:$rs, simm12:$offset), 0>; +def : InstAlias<"jalr $rd, $rs, $offset", (JALR GPR:$rd, GPR:$rs, simm12:$offset), 0>; + // TODO call // TODO tail diff --git a/llvm/test/CodeGen/RISCV/branch-relaxation.ll b/llvm/test/CodeGen/RISCV/branch-relaxation.ll index cd589dd9cab37..56f0f27a06488 100644 --- a/llvm/test/CodeGen/RISCV/branch-relaxation.ll +++ b/llvm/test/CodeGen/RISCV/branch-relaxation.ll @@ -25,6 +25,7 @@ tail: ret void } +; TODO: Extend simm12's MCOperandPredicate so the jalr zero is printed as a jr. define i32 @relax_jal(i1 %a) nounwind { ; CHECK-LABEL: relax_jal: ; CHECK: # %bb.0: @@ -32,7 +33,7 @@ define i32 @relax_jal(i1 %a) nounwind { ; CHECK-NEXT: bnez a0, .LBB1_1 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: lui a0, %hi(.LBB1_2) -; CHECK-NEXT: jalr zero, a0, %lo(.LBB1_2) +; CHECK-NEXT: jalr zero, %lo(.LBB1_2)(a0) ; CHECK-NEXT: .LBB1_1: # %iftrue ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP diff --git a/llvm/test/CodeGen/RISCV/indirectbr.ll b/llvm/test/CodeGen/RISCV/indirectbr.ll index 1d916b8e10846..e734de3c8e496 100644 --- a/llvm/test/CodeGen/RISCV/indirectbr.ll +++ b/llvm/test/CodeGen/RISCV/indirectbr.ll @@ -25,7 +25,7 @@ define i32 @indirectbr_with_offset(i8* %a) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) -; RV32I-NEXT: jalr zero, a0, 1380 +; RV32I-NEXT: jr 1380(a0) ; RV32I-NEXT: .LBB1_1: ; RV32I-NEXT: mv a0, zero ; RV32I-NEXT: lw ra, 12(sp) diff --git a/llvm/test/CodeGen/RISCV/option-rvc.ll b/llvm/test/CodeGen/RISCV/option-rvc.ll index 3c207bd424fa6..a0f09c052a49b 100644 --- a/llvm/test/CodeGen/RISCV/option-rvc.ll +++ b/llvm/test/CodeGen/RISCV/option-rvc.ll @@ -8,7 +8,7 @@ define i32 @add(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: add: ; CHECK: add a0, a1, a0 -; CHECK-NEXT: jalr zero, ra, 0 +; CHECK-NEXT: jalr zero, 0(ra) tail call void asm sideeffect ".option rvc", ""() %add = add nsw i32 %b, %a ret i32 %add diff --git a/llvm/test/MC/RISCV/compress-rv32i.s b/llvm/test/MC/RISCV/compress-rv32i.s index 149279c433044..1f8835faf35de 100644 --- a/llvm/test/MC/RISCV/compress-rv32i.s +++ b/llvm/test/MC/RISCV/compress-rv32i.s @@ -168,7 +168,7 @@ lw ra, 252(sp) # CHECK-ALIAS: ret # CHECK-INST: c.jr ra # CHECK: # encoding: [0x82,0x80] -jalr zero, ra, 0 +jalr zero, 0(ra) # CHECK-BYTES: 92 80 # CHECK-ALIAS: add ra, zero, tp @@ -192,7 +192,7 @@ ebreak # CHECK-ALIAS: jalr s0 # CHECK-INST: c.jalr s0 # CHECK: # encoding: [0x02,0x94] -jalr ra, s0, 0 +jalr ra, 0(s0) # CHECK-BYTES: 3e 94 # CHECK-ALIAS: add s0, s0, a5 diff --git a/llvm/test/MC/RISCV/fixups.s b/llvm/test/MC/RISCV/fixups.s index f0377debabb9e..ca0ee131fb8af 100644 --- a/llvm/test/MC/RISCV/fixups.s +++ b/llvm/test/MC/RISCV/fixups.s @@ -68,16 +68,16 @@ func: call func # CHECK-FIXUP: fixup A - offset: 0, value: func, kind: fixup_riscv_call # CHECK-INSTR: auipc ra, 0 -# CHECK-INSTR: jalr ra, ra, -100 +# CHECK-INSTR: jalr ra, -100(ra) .fill 10000 call func # CHECK-FIXUP: fixup A - offset: 0, value: func, kind: fixup_riscv_call # CHECK-INSTR: auipc ra, 1048574 -# CHECK-INSTR: jalr ra, ra, -1916 +# CHECK-INSTR: jalr ra, -1916(ra) .fill 20888 call func # CHECK-FIXUP: fixup A - offset: 0, value: func, kind: fixup_riscv_call # CHECK-INSTR: auipc ra, 1048568 -# CHECK-INSTR: jalr ra, ra, 1764 +# CHECK-INSTR: jalr ra, 1764(ra) diff --git a/llvm/test/MC/RISCV/rv32e-valid.s b/llvm/test/MC/RISCV/rv32e-valid.s index 25419768ad96a..42b85fb0535e6 100644 --- a/llvm/test/MC/RISCV/rv32e-valid.s +++ b/llvm/test/MC/RISCV/rv32e-valid.s @@ -14,7 +14,7 @@ auipc x1, 2 # CHECK-ASM-AND-OBJ: jal sp, 4 jal x2, 4 -# CHECK-ASM-AND-OBJ: jalr gp, gp, 4 +# CHECK-ASM-AND-OBJ: jalr gp, 4(gp) jalr x3, x3, 4 # CHECK-ASM-AND-OBJ: beq tp, t0, 8 diff --git a/llvm/test/MC/RISCV/rv32i-valid.s b/llvm/test/MC/RISCV/rv32i-valid.s index 3611d71ac4309..23ed9a2b4e847 100644 --- a/llvm/test/MC/RISCV/rv32i-valid.s +++ b/llvm/test/MC/RISCV/rv32i-valid.s @@ -94,27 +94,21 @@ jal s0, (0xff-99) # CHECK-OBJ: jal zero, 0 jal zero, . -# CHECK-ASM-AND-OBJ: jalr a0, a1, -2048 +# CHECK-ASM-AND-OBJ: jalr a0, -2048(a1) # CHECK-ASM: encoding: [0x67,0x85,0x05,0x80] -jalr a0, a1, -2048 -# CHECK-ASM-AND-OBJ: jalr a0, a1, -2048 +jalr a0, -2048(a1) +# CHECK-ASM-AND-OBJ: jalr a0, -2048(a1) # CHECK-ASM: encoding: [0x67,0x85,0x05,0x80] -jalr a0, a1, ~2047 -# CHECK-ASM-AND-OBJ: jalr a0, a1, 0 -# CHECK-ASM: encoding: [0x67,0x85,0x05,0x00] -jalr a0, a1, !1 -# CHECK-ASM-AND-OBJ: jalr a0, a1, -2048 -# CHECK-ASM: encoding: [0x67,0x85,0x05,0x80] -jalr a0, a1, %lo(2048) -# CHECK-ASM-AND-OBJ: jalr t2, t1, 2047 +jalr a0, %lo(2048)(a1) +# CHECK-ASM-AND-OBJ: jalr t2, 2047(t1) # CHECK-ASM: encoding: [0xe7,0x03,0xf3,0x7f] -jalr t2, t1, 2047 -# CHECK-ASM-AND-OBJ: jalr sp, zero, 256 +jalr t2, 2047(t1) +# CHECK-ASM-AND-OBJ: jalr sp, 256(zero) # CHECK-ASM: encoding: [0x67,0x01,0x00,0x10] jalr sp, zero, 256 -# CHECK-ASM-AND-OBJ: jalr a1, a2, 30 +# CHECK-ASM-AND-OBJ: jalr a1, 30(a2) # CHECK-ASM: encoding: [0xe7,0x05,0xe6,0x01] -jalr a1, a2, CONST +jalr a1, CONST(a2) # CHECK-ASM-AND-OBJ: beq s1, s1, 102 # CHECK-ASM: encoding: [0x63,0x83,0x94,0x06] diff --git a/llvm/test/MC/RISCV/rvi-aliases-valid.s b/llvm/test/MC/RISCV/rvi-aliases-valid.s index a3050aa46f621..71a50ec95aa41 100644 --- a/llvm/test/MC/RISCV/rvi-aliases-valid.s +++ b/llvm/test/MC/RISCV/rvi-aliases-valid.s @@ -139,13 +139,31 @@ jal foo # CHECK-OBJ: jal 0 # CHECK-OBJ: R_RISCV_JAL a0 jal a0 -# CHECK-S-OBJ-NOALIAS: jalr zero, s4, 0 +# CHECK-S-OBJ-NOALIAS: jalr zero, 0(s4) # CHECK-S-OBJ: jr s4 jr x20 -# CHECK-S-OBJ-NOALIAS: jalr ra, s5, 0 -# CHECK-S-OBJ: jalr s5 -jalr x21 -# CHECK-S-OBJ-NOALIAS: jalr zero, ra, 0 +# CHECK-S-OBJ-NOALIAS: jalr zero, 6(s5) +# CHECK-S-OBJ: jr 6(s5) +jr 6(x21) +# CHECK-S-OBJ-NOALIAS: jalr zero, 7(s6) +# CHECK-S-OBJ: jr 7(s6) +jr x22, 7 +# CHECK-S-OBJ-NOALIAS: jalr ra, 0(s4) +# CHECK-S-OBJ: jalr s4 +jalr x20 +# CHECK-S-OBJ-NOALIAS: jalr ra, 8(s5) +# CHECK-S-OBJ: jalr 8(s5) +jalr 8(x21) +# CHECK-S-OBJ-NOALIAS: jalr s6, 0(s7) +# CHECK-S-OBJ: jalr s6, s7 +jalr x22, x23 +# CHECK-S-OBJ-NOALIAS: jalr ra, 9(s8) +# CHECK-S-OBJ: jalr 9(s8) +jalr x24, 9 +# CHECK-S-OBJ-NOALIAS: jalr s9, 11(s10) +# CHECK-S-OBJ: jalr s9, 11(s10) +jalr x25, x26, 11 +# CHECK-S-OBJ-NOALIAS: jalr zero, 0(ra) # CHECK-S-OBJ: ret ret # TODO call From 47cfe8f321515418ce8d1c00708bebdef1c330eb Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 16 Jul 2019 05:50:45 +0000 Subject: [PATCH 210/451] [ELF] Fix variable names in comments after VariableName -> variableName change Also fix some typos. llvm-svn: 366181 --- lld/ELF/AArch64ErrataFix.cpp | 20 +++++------ lld/ELF/AArch64ErrataFix.h | 2 +- lld/ELF/Arch/AArch64.cpp | 4 +-- lld/ELF/Arch/ARM.cpp | 12 +++---- lld/ELF/Arch/RISCV.cpp | 2 +- lld/ELF/CallGraphSort.cpp | 6 ++-- lld/ELF/Config.h | 2 +- lld/ELF/Driver.cpp | 18 +++++----- lld/ELF/InputFiles.cpp | 12 +++---- lld/ELF/InputFiles.h | 4 +-- lld/ELF/InputSection.cpp | 6 ++-- lld/ELF/InputSection.h | 4 +-- lld/ELF/LTO.cpp | 6 ++-- lld/ELF/LinkerScript.cpp | 6 ++-- lld/ELF/Relocations.cpp | 24 ++++++------- lld/ELF/ScriptParser.h | 2 +- lld/ELF/SymbolTable.cpp | 4 +-- lld/ELF/SyntheticSections.cpp | 36 +++++++++---------- lld/ELF/SyntheticSections.h | 8 ++--- lld/ELF/Target.h | 2 +- lld/ELF/Thunks.cpp | 4 +-- lld/ELF/Writer.cpp | 8 ++--- lld/test/ELF/Inputs/gdb-index-multiple-cu-2.s | 2 +- lld/test/ELF/gdb-index-multiple-cu-2.s | 4 +-- lld/test/ELF/gdb-index-multiple-cu.s | 12 +++---- 25 files changed, 104 insertions(+), 106 deletions(-) diff --git a/lld/ELF/AArch64ErrataFix.cpp b/lld/ELF/AArch64ErrataFix.cpp index 7473ab61c4567..b2eda4dcbc4e9 100644 --- a/lld/ELF/AArch64ErrataFix.cpp +++ b/lld/ELF/AArch64ErrataFix.cpp @@ -413,8 +413,8 @@ void lld::elf::Patch843419Section::writeTo(uint8_t *buf) { write32le(buf, read32le(patchee->data().begin() + patcheeOffset)); // Apply any relocation transferred from the original PatcheeSection. - // For a SyntheticSection Buf already has OutSecOff added, but relocateAlloc - // also adds OutSecOff so we need to subtract to avoid double counting. + // For a SyntheticSection Buf already has outSecOff added, but relocateAlloc + // also adds outSecOff so we need to subtract to avoid double counting. this->relocateAlloc(buf - outSecOff, buf - outSecOff + getSize()); // Return address is the next instruction after the one we have just copied. @@ -427,7 +427,7 @@ void AArch64Err843419Patcher::init() { // The AArch64 ABI permits data in executable sections. We must avoid scanning // this data as if it were instructions to avoid false matches. We use the // mapping symbols in the InputObjects to identify this data, caching the - // results in SectionMap so we don't have to recalculate it each pass. + // results in sectionMap so we don't have to recalculate it each pass. // The ABI Section 4.5.4 Mapping symbols; defines local symbols that describe // half open intervals [Symbol Value, Next Symbol Value) of code and data @@ -489,7 +489,7 @@ void AArch64Err843419Patcher::insertPatches( uint64_t patchUpperBound = prevIsecLimit + target->getThunkSectionSpacing(); uint64_t outSecAddr = isd.sections.front()->getParent()->addr; - // Set the OutSecOff of patches to the place where we want to insert them. + // Set the outSecOff of patches to the place where we want to insert them. // We use a similar strategy to Thunk placement. Place patches roughly // every multiple of maximum branch range. auto patchIt = patches.begin(); @@ -511,10 +511,10 @@ void AArch64Err843419Patcher::insertPatches( (*patchIt)->outSecOff = isecLimit; } - // merge all patch sections. We use the OutSecOff assigned above to + // merge all patch sections. We use the outSecOff assigned above to // determine the insertion point. This is ok as we only merge into an // InputSectionDescription once per pass, and at the end of the pass - // assignAddresses() will recalculate all the OutSecOff values. + // assignAddresses() will recalculate all the outSecOff values. std::vector tmp; tmp.reserve(isd.sections.size() + patches.size()); auto mergeCmp = [](const InputSection *a, const InputSection *b) { @@ -530,8 +530,8 @@ void AArch64Err843419Patcher::insertPatches( isd.sections = std::move(tmp); } -// Given an erratum sequence that starts at address AdrpAddr, with an -// instruction that we need to patch at PatcheeOffset from the start of +// Given an erratum sequence that starts at address adrpAddr, with an +// instruction that we need to patch at patcheeOffset from the start of // InputSection IS, create a Patch843419 Section and add it to the // Patches that we need to insert. static void implementPatch(uint64_t adrpAddr, uint64_t patcheeOffset, @@ -587,10 +587,10 @@ AArch64Err843419Patcher::patchInputSectionDescription( // LLD doesn't use the erratum sequence in SyntheticSections. if (isa(isec)) continue; - // Use SectionMap to make sure we only scan code and not inline data. + // Use sectionMap to make sure we only scan code and not inline data. // We have already sorted MapSyms in ascending order and removed consecutive // mapping symbols of the same type. Our range of executable instructions to - // scan is therefore [CodeSym->Value, DataSym->Value) or [CodeSym->Value, + // scan is therefore [codeSym->value, dataSym->value) or [codeSym->value, // section size). std::vector &mapSyms = sectionMap[isec]; diff --git a/lld/ELF/AArch64ErrataFix.h b/lld/ELF/AArch64ErrataFix.h index e4752e7bb8a9c..0548b58751ff9 100644 --- a/lld/ELF/AArch64ErrataFix.h +++ b/lld/ELF/AArch64ErrataFix.h @@ -36,7 +36,7 @@ class AArch64Err843419Patcher { void init(); - // A cache of the mapping symbols defined by the InputSecion sorted in order + // A cache of the mapping symbols defined by the InputSection sorted in order // of ascending value with redundant symbols removed. These describe // the ranges of code and data in an executable InputSection. std::map> sectionMap; diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp index 9b6599be38fc1..4d4789702f03d 100644 --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -517,7 +517,7 @@ void AArch64BtiPac::writePltHeader(uint8_t *buf) const { uint64_t plt = in.plt->getVA(); if (btiHeader) { - // PltHeader is called indirectly by Plt[N]. Prefix PltData with a BTI C + // PltHeader is called indirectly by plt[N]. Prefix pltData with a BTI C // instruction. memcpy(buf, btiData, sizeof(btiData)); buf += sizeof(btiData); @@ -538,7 +538,7 @@ void AArch64BtiPac::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr, int32_t index, unsigned relOff) const { // The PLT entry is of the form: - // [BtiData] AddrInst (PacBr | StdBr) [NopData] + // [btiData] addrInst (pacBr | stdBr) [nopData] const uint8_t btiData[] = { 0x5f, 0x24, 0x03, 0xd5 }; // bti c const uint8_t addrInst[] = { 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.plt.got[n])) diff --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp index b69974fc72c67..64adc33c07ae2 100644 --- a/lld/ELF/Arch/ARM.cpp +++ b/lld/ELF/Arch/ARM.cpp @@ -299,13 +299,13 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file, uint32_t ARM::getThunkSectionSpacing() const { // The placing of pre-created ThunkSections is controlled by the value - // ThunkSectionSpacing returned by getThunkSectionSpacing(). The aim is to + // thunkSectionSpacing returned by getThunkSectionSpacing(). The aim is to // place the ThunkSection such that all branches from the InputSections // prior to the ThunkSection can reach a Thunk placed at the end of the // ThunkSection. Graphically: - // | up to ThunkSectionSpacing .text input sections | + // | up to thunkSectionSpacing .text input sections | // | ThunkSection | - // | up to ThunkSectionSpacing .text input sections | + // | up to thunkSectionSpacing .text input sections | // | ThunkSection | // Pre-created ThunkSections are spaced roughly 16MiB apart on ARMv7. This @@ -316,14 +316,14 @@ uint32_t ARM::getThunkSectionSpacing() const { // Thumb B.W range +/- 1MiB // If a branch cannot reach a pre-created ThunkSection a new one will be // created so we can handle the rare cases of a Thumb 2 conditional branch. - // We intentionally use a lower size for ThunkSectionSpacing than the maximum + // We intentionally use a lower size for thunkSectionSpacing than the maximum // branch range so the end of the ThunkSection is more likely to be within // range of the branch instruction that is furthest away. The value we shorten - // ThunkSectionSpacing by is set conservatively to allow us to create 16,384 + // thunkSectionSpacing by is set conservatively to allow us to create 16,384 // 12 byte Thunks at any offset in a ThunkSection without risk of a branch to // one of the Thunks going out of range. - // On Arm the ThunkSectionSpacing depends on the range of the Thumb Branch + // On Arm the thunkSectionSpacing depends on the range of the Thumb Branch // range. On earlier Architectures such as ARMv4, ARMv5 and ARMv6 (except // ARMv6T2) the range is +/- 4MiB. diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp index ddd9b9f7e5cd4..6f16ade571773 100644 --- a/lld/ELF/Arch/RISCV.cpp +++ b/lld/ELF/Arch/RISCV.cpp @@ -144,7 +144,7 @@ void RISCV::writePltHeader(uint8_t *buf) const { // 1: auipc t2, %pcrel_hi(.got.plt) // sub t1, t1, t3 // l[wd] t3, %pcrel_lo(1b)(t2); t3 = _dl_runtime_resolve - // addi t1, t1, -PltHeaderSize-12; t1 = &.plt[i] - &.plt[0] + // addi t1, t1, -pltHeaderSize-12; t1 = &.plt[i] - &.plt[0] // addi t0, t2, %pcrel_lo(1b) // srli t1, t1, (rv64?1:2); t1 = &.got.plt[i] - &.got.plt[0] // l[wd] t0, Wordsize(t0); t0 = link_map diff --git a/lld/ELF/CallGraphSort.cpp b/lld/ELF/CallGraphSort.cpp index c9a62f69accad..9aaadd4818336 100644 --- a/lld/ELF/CallGraphSort.cpp +++ b/lld/ELF/CallGraphSort.cpp @@ -177,7 +177,7 @@ void CallGraphSort::groupClusters() { }); for (int si : sortedSecs) { - // Clusters[SI] is the same as SecToClusters[SI] here because it has not + // clusters[si] is the same as secToClusters[si] here because it has not // been merged into another cluster yet. Cluster &c = clusters[si]; @@ -233,8 +233,8 @@ DenseMap CallGraphSort::run() { return orderMap; } - // Print the symbols ordered by C3, in the order of increasing CurOrder - // Instead of sorting all the OrderMap, just repeat the loops above. + // Print the symbols ordered by C3, in the order of increasing curOrder + // Instead of sorting all the orderMap, just repeat the loops above. for (const Cluster &c : clusters) for (int secIndex : c.sections) // Search all the symbols in the file of the section diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index 54991c7277829..ff9d3dc0933c0 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -249,7 +249,7 @@ struct Configuration { // True if the target is little-endian. False if big-endian. bool isLE; - // endianness::little if IsLE is true. endianness::big otherwise. + // endianness::little if isLE is true. endianness::big otherwise. llvm::support::endianness endianness; // True if the target is the little-endian MIPS64. diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 17c5860a0d785..98551d2cb34dc 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1255,7 +1255,7 @@ static uint64_t getCommonPageSize(opt::InputArgList &args) { warn("-z common-page-size set, but paging disabled by omagic or nmagic"); return 1; } - // CommonPageSize can't be larger than MaxPageSize. + // commonPageSize can't be larger than maxPageSize. if (val > config->maxPageSize) val = config->maxPageSize; return val; @@ -1263,7 +1263,7 @@ static uint64_t getCommonPageSize(opt::InputArgList &args) { // Parses -image-base option. static Optional getImageBase(opt::InputArgList &args) { - // Because we are using "Config->MaxPageSize" here, this function has to be + // Because we are using "Config->maxPageSize" here, this function has to be // called after the variable is initialized. auto *arg = args.getLastArg(OPT_image_base); if (!arg) @@ -1406,8 +1406,8 @@ static void demoteSharedSymbols() { }); } -// The section referred to by S is considered address-significant. Set the -// KeepUnique flag on the section if appropriate. +// The section referred to by `s` is considered address-significant. Set the +// keepUnique flag on the section if appropriate. static void markAddrsig(Symbol *s) { if (auto *d = dyn_cast_or_null(s)) if (d->section) @@ -1772,7 +1772,7 @@ template void LinkerDriver::link(opt::InputArgList &args) { if (args.hasArg(OPT_exclude_libs)) excludeLibs(args); - // Create ElfHeader early. We need a dummy section in + // Create elfHeader early. We need a dummy section in // addReservedSymbols to mark the created symbols as not absolute. Out::elfHeader = make("", 0, SHF_ALLOC); Out::elfHeader->size = sizeof(typename ELFT::Ehdr); @@ -1854,14 +1854,14 @@ template void LinkerDriver::link(opt::InputArgList &args) { target = getTarget(); config->eflags = target->calcEFlags(); - // MaxPageSize (sometimes called abi page size) is the maximum page size that + // maxPageSize (sometimes called abi page size) is the maximum page size that // the output can be run on. For example if the OS can use 4k or 64k page - // sizes then MaxPageSize must be 64 for the output to be useable on both. + // sizes then maxPageSize must be 64k for the output to be useable on both. // All important alignment decisions must use this value. config->maxPageSize = getMaxPageSize(args); - // CommonPageSize is the most common page size that the output will be run on. + // commonPageSize is the most common page size that the output will be run on. // For example if an OS can use 4k or 64k page sizes and 4k is more common - // than 64k then CommonPageSize is set to 4k. CommonPageSize can be used for + // than 64k then commonPageSize is set to 4k. commonPageSize can be used for // optimizations such as DATA_SEGMENT_ALIGN in linker scripts. LLD's use of it // is limited to writing trap instructions on the last executable segment. config->commonPageSize = getCommonPageSize(args); diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index fda2a544aed88..98b88283cf093 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -229,7 +229,7 @@ static std::string getSrcMsgAux(ObjFile &file, const Symbol &sym, file.getVariableLoc(sym.getName())) return createFileLineMsg(fileLine->first, fileLine->second); - // File.SourceFile contains STT_FILE symbol, and that is a last resort. + // File.sourceFile contains STT_FILE symbol, and that is a last resort. return file.sourceFile; } @@ -269,7 +269,7 @@ template void ObjFile::initializeDwarf() { continue; lineTables.push_back(lt); - // Loop over variable records and insert them to VariableLoc. + // Loop over variable records and insert them to variableLoc. for (const auto &entry : cu->dies()) { DWARFDie die(cu.get(), &entry); // Skip all tags that are not variables. @@ -290,7 +290,7 @@ template void ObjFile::initializeDwarf() { // Get the line number on which the variable is declared. unsigned line = dwarf::toUnsigned(die.find(dwarf::DW_AT_decl_line), 0); - // Here we want to take the variable name to add it into VariableLoc. + // Here we want to take the variable name to add it into variableLoc. // Variable can have regular and linkage name associated. At first, we try // to get linkage name as it can be different, for example when we have // two variables in different namespaces of the same object. Use common @@ -450,7 +450,7 @@ template ArrayRef ObjFile::getGlobalSymbols() { } template void ObjFile::parse(bool ignoreComdats) { - // Read a section table. JustSymbols is usually false. + // Read a section table. justSymbols is usually false. if (this->justSymbols) initializeJustSymbols(); else @@ -1178,7 +1178,7 @@ static std::vector parseVerdefs(const uint8_t *base, // We cannot determine the largest verdef identifier without inspecting // every Elf_Verdef, but both bfd and gold assign verdef identifiers // sequentially starting from 1, so we predict that the largest identifier - // will be VerdefCount. + // will be verdefCount. unsigned verdefCount = sec->sh_info; std::vector verdefs(verdefCount + 1); @@ -1262,7 +1262,7 @@ template void SharedFile::parse() { return; } - // Search for a DT_SONAME tag to initialize this->SoName. + // Search for a DT_SONAME tag to initialize this->soName. for (const Elf_Dyn &dyn : dynamicTags) { if (dyn.d_tag == DT_NEEDED) { uint64_t val = dyn.getVal(); diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h index 760f72fd198a4..5ccc3d402b376 100644 --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -117,7 +117,7 @@ class InputFile { // True if this is an argument for --just-symbols. Usually false. bool justSymbols = false; - // OutSecOff of .got2 in the current file. This is used by PPC32 -fPIC/-fPIE + // outSecOff of .got2 in the current file. This is used by PPC32 -fPIC/-fPIE // to compute offsets in PLT call stubs. uint32_t ppc32Got2OutSecOff = 0; @@ -132,7 +132,7 @@ class InputFile { // [.got, .got + 0xFFFC]. bool ppc64SmallCodeModelTocRelocs = false; - // GroupId is used for --warn-backrefs which is an optional error + // groupId is used for --warn-backrefs which is an optional error // checking feature. All files within the same --{start,end}-group or // --{start,end}-lib get the same group ID. Otherwise, each file gets a new // group ID. For more info, see checkDependency() in SymbolTable.cpp. diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 1ca520c3740ef..a024ac307b0a9 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -206,9 +206,9 @@ OutputSection *SectionBase::getOutputSection() { return sec ? sec->getParent() : nullptr; } -// When a section is compressed, `RawData` consists with a header followed +// When a section is compressed, `rawData` consists with a header followed // by zlib-compressed data. This function parses a header to initialize -// `UncompressedSize` member and remove the header from `RawData`. +// `uncompressedSize` member and remove the header from `rawData`. void InputSectionBase::parseCompressedHeader() { using Chdr64 = typename ELF64LE::Chdr; using Chdr32 = typename ELF32LE::Chdr; @@ -306,7 +306,7 @@ std::string InputSectionBase::getLocation(uint64_t offset) { return info->FileName + ":" + std::to_string(info->Line) + ":(" + secAndOffset + ")"; - // File->SourceFile contains STT_FILE symbol that contains a + // File->sourceFile contains STT_FILE symbol that contains a // source file name. If it's missing, we use an object file name. std::string srcFile = getFile()->sourceFile; if (srcFile.empty()) diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h index dcd4848a0ab1e..3a974074e0e51 100644 --- a/lld/ELF/InputSection.h +++ b/lld/ELF/InputSection.h @@ -219,8 +219,8 @@ class InputSectionBase : public SectionBase { mutable ArrayRef rawData; - // This field stores the uncompressed size of the compressed data in RawData, - // or -1 if RawData is not compressed (either because the section wasn't + // This field stores the uncompressed size of the compressed data in rawData, + // or -1 if rawData is not compressed (either because the section wasn't // compressed in the first place, or because we ended up uncompressing it). // Since the feature is not used often, this is usually -1. mutable int64_t uncompressedSize = -1; diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp index 7230cc51044e4..28d4bfe77c5d4 100644 --- a/lld/ELF/LTO.cpp +++ b/lld/ELF/LTO.cpp @@ -124,11 +124,11 @@ static lto::Config createConfig() { } BitcodeCompiler::BitcodeCompiler() { - // Initialize IndexFile. + // Initialize indexFile. if (!config->thinLTOIndexOnlyArg.empty()) indexFile = openFile(config->thinLTOIndexOnlyArg); - // Initialize LTOObj. + // Initialize ltoObj. lto::ThinBackend backend; if (config->thinLTOIndexOnly) { auto onIndexWrite = [&](StringRef s) { thinIndices.erase(s); }; @@ -142,7 +142,7 @@ BitcodeCompiler::BitcodeCompiler() { ltoObj = llvm::make_unique(createConfig(), backend, config->ltoPartitions); - // Initialize UsedStartStop. + // Initialize usedStartStop. symtab->forEachSymbol([&](Symbol *sym) { StringRef s = sym->getName(); for (StringRef prefix : {"__start_", "__stop_"}) diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index ce009149710fe..49e44d7804761 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -115,7 +115,7 @@ void LinkerScript::expandMemoryRegions(uint64_t size) { if (ctx->memRegion) expandMemoryRegion(ctx->memRegion, size, ctx->memRegion->name, ctx->outSec->name); - // Only expand the LMARegion if it is different from MemRegion. + // Only expand the LMARegion if it is different from memRegion. if (ctx->lmaRegion && ctx->memRegion != ctx->lmaRegion) expandMemoryRegion(ctx->lmaRegion, size, ctx->lmaRegion->name, ctx->outSec->name); @@ -1035,8 +1035,8 @@ static uint64_t getInitialDot() { return config->imageBase ? *config->imageBase : 0; uint64_t startAddr = UINT64_MAX; - // The Sections with -T
    have been sorted in order of ascending - // address. We must lower StartAddr if the lowest -T
    as + // The sections with -T
    have been sorted in order of ascending + // address. We must lower startAddr if the lowest -T
    as // calls to setDot() must be monotonically increasing. for (auto &kv : config->sectionStartMap) startAddr = std::min(startAddr, kv.second); diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index cd71961804b56..ee48f48081360 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -829,7 +829,7 @@ class OffsetGetter { // Translates offsets in input sections to offsets in output sections. // Given offset must increase monotonically. We assume that Piece is - // sorted by InputOff. + // sorted by inputOff. uint64_t get(uint64_t off) { if (pieces.empty()) return off; @@ -859,10 +859,10 @@ static void addRelativeReloc(InputSectionBase *isec, uint64_t offsetInSec, RelType type) { Partition &part = isec->getPartition(); - // Add a relative relocation. If RelrDyn section is enabled, and the + // Add a relative relocation. If relrDyn section is enabled, and the // relocation offset is guaranteed to be even, add the relocation to - // the RelrDyn section, otherwise add it to the RelaDyn section. - // RelrDyn sections don't support odd offsets. Also, RelrDyn sections + // the relrDyn section, otherwise add it to the relaDyn section. + // relrDyn sections don't support odd offsets. Also, relrDyn sections // don't store the addend values, so we must write it to the relocated // address. if (part.relrDyn && isec->alignment >= 2 && offsetInSec % 2 == 0) { @@ -922,7 +922,7 @@ static bool canDefineSymbolInExecutable(Symbol &sym) { // executable will preempt it. // Note that we want the visibility of the shared symbol itself, not // the visibility of the symbol in the output file we are producing. That is - // why we use Sym.StOther. + // why we use Sym.stOther. if ((sym.stOther & 0x3) == STV_DEFAULT) return true; @@ -1010,7 +1010,7 @@ static void processRelocAux(InputSectionBase &sec, RelExpr expr, RelType type, // Copy relocations (for STT_OBJECT) and canonical PLT (for STT_FUNC) are only // possible in an executable. // - // Among R_ABS relocatoin types, SymbolicRel has the same size as the word + // Among R_ABS relocatoin types, symbolicRel has the same size as the word // size. Others have fewer bits and may cause runtime overflow in -pie/-shared // mode. Disallow them. if (config->shared || @@ -1237,8 +1237,8 @@ static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i, // GOT-generating or PLT-generating, the handling of an ifunc is // relatively straightforward. We create a PLT entry in Iplt, which is // usually at the end of .plt, which makes an indirect call using a - // matching GOT entry in IgotPlt, which is usually at the end of .got.plt. - // The GOT entry is relocated using an IRELATIVE relocation in RelaIplt, + // matching GOT entry in igotPlt, which is usually at the end of .got.plt. + // The GOT entry is relocated using an IRELATIVE relocation in relaIplt, // which is usually at the end of .rela.plt. Unlike most relocations in // .rela.plt, which may be evaluated lazily without -z now, dynamic // loaders evaluate IRELATIVE relocs eagerly, which means that for @@ -1274,13 +1274,13 @@ static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i, // variable containing a pointer to the ifunc) needs to be relocated in // the exact same way as a GOT entry, so we can avoid needing to make the // PLT entry canonical by translating such relocations into IRELATIVE - // relocations in the RelaIplt. + // relocations in the relaIplt. if (!sym.isInPlt()) { // Create PLT and GOTPLT slots for the symbol. sym.isInIplt = true; // Create a copy of the symbol to use as the target of the IRELATIVE - // relocation in the IgotPlt. This is in case we make the PLT canonical + // relocation in the igotPlt. This is in case we make the PLT canonical // later, which would overwrite the original symbol. // // FIXME: Creating a copy of the symbol here is a bit of a hack. All @@ -1526,7 +1526,7 @@ void ThunkCreator::mergeThunks(ArrayRef outputSections) { // ISD->ThunkSections contains all created ThunkSections, including // those inserted in previous passes. Extract the Thunks created this - // pass and order them in ascending OutSecOff. + // pass and order them in ascending outSecOff. std::vector newThunks; for (const std::pair ts : isd->thunkSections) if (ts.second == pass) @@ -1536,7 +1536,7 @@ void ThunkCreator::mergeThunks(ArrayRef outputSections) { return a->outSecOff < b->outSecOff; }); - // Merge sorted vectors of Thunks and InputSections by OutSecOff + // Merge sorted vectors of Thunks and InputSections by outSecOff std::vector tmp; tmp.reserve(isd->sections.size() + newThunks.size()); diff --git a/lld/ELF/ScriptParser.h b/lld/ELF/ScriptParser.h index 110684761c76d..c953fb302b9a7 100644 --- a/lld/ELF/ScriptParser.h +++ b/lld/ELF/ScriptParser.h @@ -16,7 +16,7 @@ namespace lld { namespace elf { // Parses a linker script. Calling this function updates -// Config and ScriptConfig. +// lld::elf::config and lld::elf::script. void readLinkerScript(MemoryBufferRef mb); // Parses a version script. diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp index fde08064a606d..3faeed8c2bdc3 100644 --- a/lld/ELF/SymbolTable.cpp +++ b/lld/ELF/SymbolTable.cpp @@ -99,7 +99,7 @@ Symbol *SymbolTable::find(StringRef name) { return sym; } -// Initialize DemangledSyms with a map from demangled symbols to symbol +// Initialize demangledSyms with a map from demangled symbols to symbol // objects. Used to handle "extern C++" directive in version scripts. // // The map will contain all demangled symbols. That can be very large, @@ -225,7 +225,7 @@ void SymbolTable::assignWildcardVersion(SymbolVersion ver, uint16_t versionId) { b->versionId = versionId; } -// This function processes version scripts by updating VersionId +// This function processes version scripts by updating the versionId // member of symbols. // If there's only one anonymous version definition in a version // script file, the script does not actually define any symbol version, diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index 918849bd6ab22..f6d0f190d84d0 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -579,11 +579,9 @@ void EhFrameSection::writeTo(uint8_t *buf) { GotSection::GotSection() : SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, config->wordsize, ".got") { - // PPC64 saves the ElfSym::GlobalOffsetTable .TOC. as the first entry in the - // .got. If there are no references to .TOC. in the symbol table, - // ElfSym::GlobalOffsetTable will not be defined and we won't need to save - // .TOC. in the .got. When it is defined, we increase NumEntries by the number - // of entries used to emit ElfSym::GlobalOffsetTable. + // If ElfSym::globalOffsetTable is relative to .got and is referenced, + // increase numEntries by the number of entries used to emit + // ElfSym::globalOffsetTable. if (ElfSym::globalOffsetTable && !target->gotBaseSymInGotPlt) numEntries += target->gotHeaderEntriesNum; } @@ -861,9 +859,9 @@ void MipsGotSection::build() { } else { // If this is the first time we failed to merge with the primary GOT, // MergedGots.back() will also be the primary GOT. We must make sure not - // to try to merge again with IsPrimary=false, as otherwise, if the + // to try to merge again with isPrimary=false, as otherwise, if the // inputs are just right, we could allow the primary GOT to become 1 or 2 - // words too big due to ignoring the header size. + // words bigger due to ignoring the header size. if (mergedGots.size() == 1 || !tryMergeGots(mergedGots.back(), srcGot, false)) { mergedGots.emplace_back(); @@ -888,7 +886,7 @@ void MipsGotSection::build() { for (std::pair &p : got.pagesMap) { // For each output section referenced by GOT page relocations calculate - // and save into PagesMap an upper bound of MIPS GOT entries required + // and save into pagesMap an upper bound of MIPS GOT entries required // to store page addresses of local symbols. We assume the worst case - // each 64kb page of the output section has at least one GOT relocation // against it. And take in account the case when the section intersects @@ -910,7 +908,7 @@ void MipsGotSection::build() { } } - // Update Symbol::GotIndex field to use this + // Update Symbol::gotIndex field to use this // value later in the `sortMipsSymbols` function. for (auto &p : primGot->global) p.first->gotIndex = p.second; @@ -936,7 +934,7 @@ void MipsGotSection::build() { } else { // When building a shared library we still need a dynamic relocation // for the module index. Therefore only checking for - // S->IsPreemptible is not sufficient (this happens e.g. for + // S->isPreemptible is not sufficient (this happens e.g. for // thread-locals that have been marked as local through a linker script) if (!s->isPreemptible && !config->isPic) continue; @@ -1140,7 +1138,7 @@ StringTableSection::StringTableSection(StringRef name, bool dynamic) addString(""); } -// Adds a string to the string table. If HashIt is true we hash and check for +// Adds a string to the string table. If `hashIt` is true we hash and check for // duplicates. It is optional because the name of global symbols are already // uniqued and hashing them again has a big cost for a small value: uniquing // them with some other string that happens to be the same. @@ -1335,9 +1333,9 @@ template void DynamicSection::finalizeContents() { } // .rel[a].plt section usually consists of two parts, containing plt and // iplt relocations. It is possible to have only iplt relocations in the - // output. In that case RelaPlt is empty and have zero offset, the same offset - // as RelaIplt have. And we still want to emit proper dynamic tags for that - // case, so here we always use RelaPlt as marker for the begining of + // output. In that case relaPlt is empty and have zero offset, the same offset + // as relaIplt has. And we still want to emit proper dynamic tags for that + // case, so here we always use relaPlt as marker for the begining of // .rel[a].plt section. if (isMain && (in.relaPlt->isNeeded() || in.relaIplt->isNeeded())) { addInSec(DT_JMPREL, in.relaPlt); @@ -2365,7 +2363,7 @@ void PltSection::writeTo(uint8_t *buf) { RelocationBaseSection *relSec = isIplt ? in.relaIplt : in.relaPlt; - // The IPlt is immediately after the Plt, account for this in RelOff + // The IPlt is immediately after the Plt, account for this in relOff size_t pltOff = isIplt ? in.plt->getSize() : 0; for (size_t i = 0, e = entries.size(); i != e; ++i) { @@ -2491,9 +2489,9 @@ readPubNamesAndTypes(const LLDDwarfObj &obj, for (const DWARFSection *pub : {&pubNames, &pubTypes}) { DWARFDebugPubTable table(obj, *pub, config->isLE, true); for (const DWARFDebugPubTable::Set &set : table.getData()) { - // The value written into the constant pool is Kind << 24 | CuIndex. As we + // The value written into the constant pool is kind << 24 | cuIndex. As we // don't know how many compilation units precede this object to compute - // CuIndex, we compute (Kind << 24 | CuIndexInThisObject) instead, and add + // cuIndex, we compute (kind << 24 | cuIndexInThisObject) instead, and add // the number of preceding compilation units later. uint32_t i = lower_bound(cUs, set.Offset, @@ -2945,7 +2943,7 @@ void MergeTailSection::finalizeContents() { // finalize() fixed tail-optimized strings, so we can now get // offsets of strings. Get an offset for each string and save it - // to a corresponding StringPiece for easy access. + // to a corresponding SectionPiece for easy access. for (MergeInputSection *sec : sections) for (size_t i = 0, e = sec->pieces.size(); i != e; ++i) if (sec->pieces[i].live) @@ -3330,7 +3328,7 @@ bool PPC32Got2Section::isNeeded() const { void PPC32Got2Section::finalizeContents() { // PPC32 may create multiple GOT sections for -fPIC/-fPIE, one per file in - // .got2 . This function computes OutSecOff of each .got2 to be used in + // .got2 . This function computes outSecOff of each .got2 to be used in // PPC32PltCallStub::writeTo(). The purpose of this empty synthetic section is // to collect input sections named ".got2". uint32_t offset = 0; diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h index ab2995554c478..1c4dd06e02776 100644 --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -434,8 +434,8 @@ class DynamicReloc { uint32_t getSymIndex(SymbolTableBaseSection *symTab) const; // Computes the addend of the dynamic relocation. Note that this is not the - // same as the Addend member variable as it also includes the symbol address - // if UseSymVA is true. + // same as the addend member variable as it also includes the symbol address + // if useSymVA is true. int64_t computeAddend() const; RelType type; @@ -1026,7 +1026,7 @@ class ARMExidxSyntheticSection : public SyntheticSection { // thunks including ARM interworking and Mips LA25 PI to non-PI thunks. class ThunkSection : public SyntheticSection { public: - // ThunkSection in OS, with desired OutSecOff of Off + // ThunkSection in OS, with desired outSecOff of Off ThunkSection(OutputSection *os, uint64_t off); // Add a newly created Thunk to this container: @@ -1044,7 +1044,7 @@ class ThunkSection : public SyntheticSection { size_t size = 0; }; -// Used to compute OutSecOff of .got2 in each object file. This is needed to +// Used to compute outSecOff of .got2 in each object file. This is needed to // synthesize PLT entries for PPC32 Secure PLT ABI. class PPC32Got2Section final : public SyntheticSection { public: diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h index 00c93a9c2e6b3..effa6001f6d96 100644 --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -73,7 +73,7 @@ class TargetInfo { virtual bool adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end, uint8_t stOther) const; - // Return true if we can reach Dst from Src with Relocation RelocType + // Return true if we can reach dst from src with RelType type. virtual bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const; diff --git a/lld/ELF/Thunks.cpp b/lld/ELF/Thunks.cpp index 32e935126e790..73208f932031d 100644 --- a/lld/ELF/Thunks.cpp +++ b/lld/ELF/Thunks.cpp @@ -388,7 +388,7 @@ static uint64_t getARMThunkDestVA(const Symbol &s) { } // This function returns true if the target is not Thumb and is within 2^26, and -// it has not previously returned false (see comment for MayUseShortThunk). +// it has not previously returned false (see comment for mayUseShortThunk). bool ARMThunk::getMayUseShortThunk() { if (!mayUseShortThunk) return false; @@ -426,7 +426,7 @@ bool ARMThunk::isCompatibleWith(const InputSection &isec, } // This function returns true if the target is Thumb and is within 2^25, and -// it has not previously returned false (see comment for MayUseShortThunk). +// it has not previously returned false (see comment for mayUseShortThunk). bool ThumbThunk::getMayUseShortThunk() { if (!mayUseShortThunk) return false; diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index d1dc8ef7606c4..3cf7b056064f3 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -504,7 +504,7 @@ template static void createSyntheticSections() { config->isRela ? ".rela.plt" : ".rel.plt", /*sort=*/false); add(in.relaPlt); - // The RelaIplt immediately follows .rel.plt (.rel.dyn for ARM) to ensure + // The relaIplt immediately follows .rel.plt (.rel.dyn for ARM) to ensure // that the IRelative relocations are processed last by the dynamic loader. // We cannot place the iplt section in .rel.dyn when Android relocation // packing is enabled because that would cause a section type mismatch. @@ -1023,7 +1023,7 @@ template void Writer::addRelIpltSymbols() { // By default, __rela_iplt_{start,end} belong to a dummy section 0 // because .rela.plt might be empty and thus removed from output. - // We'll override Out::ElfHeader with In.RelaIplt later when we are + // We'll override Out::elfHeader with In.relaIplt later when we are // sure that .rela.plt exists in output. ElfSym::relaIpltStart = addOptionalRegular( config->isRela ? "__rela_iplt_start" : "__rel_iplt_start", @@ -1424,7 +1424,7 @@ template void Writer::sortSections() { continue; os->sortRank = getSectionRank(os); - // We want to assign rude approximation values to OutSecOff fields + // We want to assign rude approximation values to outSecOff fields // to know the relative order of the input sections. We use it for // sorting SHF_LINK_ORDER sections. See resolveShfLinkOrder(). uint64_t i = 0; @@ -1884,7 +1884,7 @@ template void Writer::finalizeSections() { finalizeSynthetic(in.partIndex); // Dynamic section must be the last one in this list and dynamic - // symbol table section (DynSymTab) must be the first one. + // symbol table section (dynSymTab) must be the first one. for (Partition &part : partitions) { finalizeSynthetic(part.armExidx); finalizeSynthetic(part.dynSymTab); diff --git a/lld/test/ELF/Inputs/gdb-index-multiple-cu-2.s b/lld/test/ELF/Inputs/gdb-index-multiple-cu-2.s index 80c738eab1ff3..997eb6be3e523 100644 --- a/lld/test/ELF/Inputs/gdb-index-multiple-cu-2.s +++ b/lld/test/ELF/Inputs/gdb-index-multiple-cu-2.s @@ -31,7 +31,7 @@ _start: .byte 0 .Lcu_end0: -# .debug_gnu_pubnames has just one set, associated with .Lcu_begin1 (CuIndex: 1) +# .debug_gnu_pubnames has just one set, associated with .Lcu_begin1 (cuIndex: 1) .section .debug_gnu_pubnames,"",@progbits .long .LpubNames_end0 - .LpubNames_begin0 .LpubNames_begin0: diff --git a/lld/test/ELF/gdb-index-multiple-cu-2.s b/lld/test/ELF/gdb-index-multiple-cu-2.s index 9cf2a0c826934..06316860871da 100644 --- a/lld/test/ELF/gdb-index-multiple-cu-2.s +++ b/lld/test/ELF/gdb-index-multiple-cu-2.s @@ -4,8 +4,8 @@ # RUN: ld.lld --gdb-index %t.o %t1.o -o %t # RUN: llvm-dwarfdump -gdb-index %t | FileCheck %s -# %t.o has 2 CUs while %t1 has 1, thus _start in %t1.o should have CuIndex 2. -# Attributes << 24 | CuIndex = 48 << 24 | 2 = 0x30000002 +# %t.o has 2 CUs while %t1 has 1, thus _start in %t1.o should have cuIndex 2. +# attributes << 24 | cuIndex = 48 << 24 | 2 = 0x30000002 # CHECK: Constant pool # CHECK-NEXT: 0(0x0): 0x30000002 diff --git a/lld/test/ELF/gdb-index-multiple-cu.s b/lld/test/ELF/gdb-index-multiple-cu.s index 9a8c2eae78d4a..8702d9f3924db 100644 --- a/lld/test/ELF/gdb-index-multiple-cu.s +++ b/lld/test/ELF/gdb-index-multiple-cu.s @@ -3,10 +3,10 @@ # RUN: ld.lld --gdb-index %t.o -o %t # RUN: llvm-dwarfdump -gdb-index %t | FileCheck %s -# CuIndexAndAttrs of _start: -# Attributes << 24 | CuIndex = 48 << 24 | 0 = 0x30000000 -# CuIndexAndAttrs of foo: -# Attributes << 24 | CuIndex = 48 << 24 | 1 = 0x30000001 +# cuIndexAndAttrs of _start: +# attributes << 24 | cuIndex = 48 << 24 | 0 = 0x30000000 +# cuIndexAndAttrs of foo: +# attributes << 24 | cuIndex = 48 << 24 | 1 = 0x30000001 # CHECK: Symbol table # CHECK-DAG: String name: _start, CU vector index: 0 # CHECK-DAG: String name: foo, CU vector index: 1 @@ -63,7 +63,7 @@ foo: # Swap sets to test the case where pubnames are in a # different order than the CUs they refer to. .section .debug_gnu_pubnames,"",@progbits - # CuIndex: 1 + # cuIndex: 1 .long .LpubNames_end1 - .LpubNames_begin1 .LpubNames_begin1: .short 2 # Version @@ -75,7 +75,7 @@ foo: .long 0 .LpubNames_end1: - # CuIndex: 0 + # cuIndex: 0 .long .LpubNames_end0 - .LpubNames_begin0 .LpubNames_begin0: .short 2 # Version From c0b2ed664bc0c793051eb95e89d51c02aa5871f7 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 16 Jul 2019 05:52:27 +0000 Subject: [PATCH 211/451] [X86] In combineStore, don't convert v2f32 load/store pairs to f64 loads/stores. Type legalization can take care of this. This gives DAG combine a little more time with the original types. llvm-svn: 366182 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 23926ca80527d..62499a28dff85 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -40093,7 +40093,7 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, bool NoImplicitFloatOps = F.hasFnAttribute(Attribute::NoImplicitFloat); bool F64IsLegal = !Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE2(); - if ((VT.isVector() || + if (((VT.isVector() && !VT.isFloatingPoint()) || (VT == MVT::i64 && F64IsLegal && !Subtarget.is64Bit())) && isa(St->getValue()) && !cast(St->getValue())->isVolatile() && @@ -40116,8 +40116,7 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, // Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store // pair instead. if (Subtarget.is64Bit() || F64IsLegal) { - MVT LdVT = (Subtarget.is64Bit() && - (!VT.isFloatingPoint() || !F64IsLegal)) ? MVT::i64 : MVT::f64; + MVT LdVT = Subtarget.is64Bit() ? MVT::i64 : MVT::f64; SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(), Ld->getMemOperand()); From 3e10905c49ffda36d7a90e24be1ab30832fa4afc Mon Sep 17 00:00:00 2001 From: Puyan Lotfi Date: Tue, 16 Jul 2019 05:58:03 +0000 Subject: [PATCH 212/451] [NFC][test] Fix for riscv tests. Following tests need updating for: https://reviews.llvm.org/D55277 llvm-svn: 366183 --- lld/test/ELF/riscv-call.s | 8 ++++---- lld/test/ELF/riscv-plt.s | 12 ++++++------ lld/test/ELF/riscv-tls-gd.s | 8 ++++---- lld/test/ELF/riscv-tls-ld.s | 4 ++-- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/lld/test/ELF/riscv-call.s b/lld/test/ELF/riscv-call.s index d5b9b370e807c..a9a73841fd746 100644 --- a/lld/test/ELF/riscv-call.s +++ b/lld/test/ELF/riscv-call.s @@ -8,18 +8,18 @@ # RUN: llvm-objdump -d %t.rv32 | FileCheck %s # RUN: llvm-objdump -d %t.rv64 | FileCheck %s # CHECK: 97 00 00 00 auipc ra, 0 -# CHECK-NEXT: e7 80 80 00 jalr ra, ra, 8 +# CHECK-NEXT: e7 80 80 00 jalr 8(ra) # CHECK: 97 00 00 00 auipc ra, 0 -# CHECK-NEXT: e7 80 80 ff jalr ra, ra, -8 +# CHECK-NEXT: e7 80 80 ff jalr -8(ra) # RUN: ld.lld %t.rv32.o --defsym foo=_start+0x7ffff7ff --defsym bar=_start+8-0x80000800 -o %t.rv32.limits # RUN: ld.lld %t.rv64.o --defsym foo=_start+0x7ffff7ff --defsym bar=_start+8-0x80000800 -o %t.rv64.limits # RUN: llvm-objdump -d %t.rv32.limits | FileCheck --check-prefix=LIMITS %s # RUN: llvm-objdump -d %t.rv64.limits | FileCheck --check-prefix=LIMITS %s # LIMITS: 97 f0 ff 7f auipc ra, 524287 -# LIMITS-NEXT: e7 80 f0 7f jalr ra, ra, 2047 +# LIMITS-NEXT: e7 80 f0 7f jalr 2047(ra) # LIMITS-NEXT: 97 00 00 80 auipc ra, 524288 -# LIMITS-NEXT: e7 80 00 80 jalr ra, ra, -2048 +# LIMITS-NEXT: e7 80 00 80 jalr -2048(ra) # RUN: ld.lld %t.rv32.o --defsym foo=_start+0x7ffff800 --defsym bar=_start+8-0x80000801 -o %t # RUN: not ld.lld %t.rv64.o --defsym foo=_start+0x7ffff800 --defsym bar=_start+8-0x80000801 -o %t 2>&1 | FileCheck --check-prefix=ERROR %s diff --git a/lld/test/ELF/riscv-plt.s b/lld/test/ELF/riscv-plt.s index 2a199f20cf509..0afd5b0057fad 100644 --- a/lld/test/ELF/riscv-plt.s +++ b/lld/test/ELF/riscv-plt.s @@ -47,16 +47,16 @@ ## Direct call ## foo - . = 0x11020-0x11000 = 32 # DIS-NEXT: auipc ra, 0 -# DIS-NEXT: 11004: jalr ra, ra, 32 +# DIS-NEXT: 11004: jalr 32(ra) ## bar@plt - . = 0x11050-0x1100c = 72 # DIS-NEXT: auipc ra, 0 -# DIS-NEXT: 1100c: jalr ra, ra, 72 +# DIS-NEXT: 1100c: jalr 72(ra) ## bar@plt - . = 0x11050-0x11014 = 64 # DIS-NEXT: auipc ra, 0 -# DIS-NEXT: 11014: jalr ra, ra, 64 +# DIS-NEXT: 11014: jalr 64(ra) ## weak@plt - . = 0x11060-0x1101c = 72 # DIS-NEXT: auipc ra, 0 -# DIS-NEXT: 1101c: jalr ra, ra, 72 +# DIS-NEXT: 1101c: jalr 72(ra) # DIS: foo: # DIS-NEXT: 11020: @@ -79,14 +79,14 @@ # DIS: 11050: auipc t3, 2 # DIS32-NEXT: lw t3, -72(t3) # DIS64-NEXT: ld t3, -64(t3) -# DIS-NEXT: jalr t1, t3, 0 +# DIS-NEXT: jalr t1, t3 # DIS-NEXT: nop ## 32-bit: &.got.plt[weak]-. = 0x1300c-0x11060 = 4096*2-84 # DIS: 11060: auipc t3, 2 # DIS32-NEXT: lw t3, -84(t3) # DIS64-NEXT: ld t3, -72(t3) -# DIS-NEXT: jalr t1, t3, 0 +# DIS-NEXT: jalr t1, t3 # DIS-NEXT: nop .global _start, foo, bar diff --git a/lld/test/ELF/riscv-tls-gd.s b/lld/test/ELF/riscv-tls-gd.s index 21a8695405692..3f5735aabf2de 100644 --- a/lld/test/ELF/riscv-tls-gd.s +++ b/lld/test/ELF/riscv-tls-gd.s @@ -56,13 +56,13 @@ # GD32: 1000: auipc a0, 1 # GD32-NEXT: addi a0, a0, 112 # GD32-NEXT: auipc ra, 0 -# GD32-NEXT: jalr ra, ra, 56 +# GD32-NEXT: jalr 56(ra) ## &DTPMOD(b) - . = 0x2078 - 0x1010 = 4096*1+104 # GD32: 1010: auipc a0, 1 # GD32-NEXT: addi a0, a0, 104 # GD32-NEXT: auipc ra, 0 -# GD32-NEXT: jalr ra, ra, 40 +# GD32-NEXT: jalr 40(ra) # GD64-REL: .rela.dyn { # GD64-REL-NEXT: 0x20E0 R_RISCV_TLS_DTPMOD64 a 0x0 @@ -75,13 +75,13 @@ # GD64: 1000: auipc a0, 1 # GD64-NEXT: addi a0, a0, 224 # GD64-NEXT: auipc ra, 0 -# GD64-NEXT: jalr ra, ra, 56 +# GD64-NEXT: jalr 56(ra) ## &DTPMOD(b) - . = 0x20f0 - 0x1010 = 4096*1+224 # GD64: 1010: auipc a0, 1 # GD64-NEXT: addi a0, a0, 224 # GD64-NEXT: auipc ra, 0 -# GD64-NEXT: jalr ra, ra, 40 +# GD64-NEXT: jalr 40(ra) # NOREL: no relocations diff --git a/lld/test/ELF/riscv-tls-ld.s b/lld/test/ELF/riscv-tls-ld.s index a2a6768899088..6563cf874634d 100644 --- a/lld/test/ELF/riscv-tls-ld.s +++ b/lld/test/ELF/riscv-tls-ld.s @@ -55,7 +55,7 @@ # LD32-NEXT: addi a0, a0, 124 # LD64-NEXT: addi a0, a0, 248 # LD-NEXT: auipc ra, 0 -# LD-NEXT: jalr ra, ra, 56 +# LD-NEXT: jalr 56(ra) # NOREL: no relocations @@ -74,7 +74,7 @@ # LE32-NEXT: addi a0, a0, 4 # LE64-NEXT: addi a0, a0, 8 # LE-NEXT: auipc ra, 0 -# LE-NEXT: jalr ra, ra, 24 +# LE-NEXT: jalr 24(ra) la.tls.gd a0, .LANCHOR0 call __tls_get_addr@plt From e215996a2932ed7c472f4e94dc4345b30fd0c373 Mon Sep 17 00:00:00 2001 From: Stephan Bergmann Date: Tue, 16 Jul 2019 06:23:27 +0000 Subject: [PATCH 213/451] Finish "Adapt -fsanitize=function to SANITIZER_NON_UNIQUE_TYPEINFO" i.e., recent 5745eccef54ddd3caca278d1d292a88b2281528b: * Bump the function_type_mismatch handler version, as its signature has changed. * The function_type_mismatch handler can return successfully now, so SanitizerKind::Function must be AlwaysRecoverable (like for SanitizerKind::Vptr). * But the minimal runtime would still unconditionally treat a call to the function_type_mismatch handler as failure, so disallow -fsanitize=function in combination with -fsanitize-minimal-runtime (like it was already done for -fsanitize=vptr). * Add tests. Differential Revision: https://reviews.llvm.org/D61479 llvm-svn: 366186 --- clang/docs/UndefinedBehaviorSanitizer.rst | 4 +- clang/lib/CodeGen/CGExpr.cpp | 2 +- clang/lib/CodeGen/CodeGenFunction.h | 2 +- clang/lib/Driver/SanitizerArgs.cpp | 3 +- clang/test/CodeGen/ubsan-function.cpp | 22 ++++++ clang/test/Driver/fsanitize.c | 5 +- compiler-rt/lib/ubsan/ubsan_handlers_cxx.cc | 15 ++--- compiler-rt/lib/ubsan/ubsan_handlers_cxx.h | 15 +++-- compiler-rt/lib/ubsan/ubsan_interface.inc | 4 +- .../TestCases/TypeCheck/Function/function.cpp | 67 ++++++++++++++++++- 10 files changed, 113 insertions(+), 26 deletions(-) create mode 100644 clang/test/CodeGen/ubsan-function.cpp diff --git a/clang/docs/UndefinedBehaviorSanitizer.rst b/clang/docs/UndefinedBehaviorSanitizer.rst index 7a4eaf4f60d5d..38cd3645bceac 100644 --- a/clang/docs/UndefinedBehaviorSanitizer.rst +++ b/clang/docs/UndefinedBehaviorSanitizer.rst @@ -205,8 +205,8 @@ Minimal Runtime There is a minimal UBSan runtime available suitable for use in production environments. This runtime has a small attack surface. It only provides very -basic issue logging and deduplication, and does not support ``-fsanitize=vptr`` -checking. +basic issue logging and deduplication, and does not support +``-fsanitize=function`` and ``-fsanitize=vptr`` checking. To use the minimal runtime, add ``-fsanitize-minimal-runtime`` to the clang command line options. For example, if you're used to compiling with diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 8340f48abcb64..4d19a12e5cb05 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -2927,7 +2927,7 @@ enum class CheckRecoverableKind { static CheckRecoverableKind getRecoverableKind(SanitizerMask Kind) { assert(Kind.countPopulation() == 1); - if (Kind == SanitizerKind::Vptr) + if (Kind == SanitizerKind::Function || Kind == SanitizerKind::Vptr) return CheckRecoverableKind::AlwaysRecoverable; else if (Kind == SanitizerKind::Return || Kind == SanitizerKind::Unreachable) return CheckRecoverableKind::Unrecoverable; diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index a51a9711ff170..bd9e14206a09e 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -114,7 +114,7 @@ enum TypeEvaluationKind { SANITIZER_CHECK(DivremOverflow, divrem_overflow, 0) \ SANITIZER_CHECK(DynamicTypeCacheMiss, dynamic_type_cache_miss, 0) \ SANITIZER_CHECK(FloatCastOverflow, float_cast_overflow, 0) \ - SANITIZER_CHECK(FunctionTypeMismatch, function_type_mismatch, 0) \ + SANITIZER_CHECK(FunctionTypeMismatch, function_type_mismatch, 1) \ SANITIZER_CHECK(ImplicitConversion, implicit_conversion, 0) \ SANITIZER_CHECK(InvalidBuiltin, invalid_builtin, 0) \ SANITIZER_CHECK(LoadInvalidValue, load_invalid_value, 0) \ diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp index 9132faa917646..6b6a9feec42c5 100644 --- a/clang/lib/Driver/SanitizerArgs.cpp +++ b/clang/lib/Driver/SanitizerArgs.cpp @@ -31,7 +31,8 @@ static const SanitizerMask NeedsUbsanRt = static const SanitizerMask NeedsUbsanCxxRt = SanitizerKind::Vptr | SanitizerKind::CFI; static const SanitizerMask NotAllowedWithTrap = SanitizerKind::Vptr; -static const SanitizerMask NotAllowedWithMinimalRuntime = SanitizerKind::Vptr; +static const SanitizerMask NotAllowedWithMinimalRuntime = + SanitizerKind::Function | SanitizerKind::Vptr; static const SanitizerMask RequiresPIE = SanitizerKind::DataFlow | SanitizerKind::HWAddress | SanitizerKind::Scudo; static const SanitizerMask NeedsUnwindTables = diff --git a/clang/test/CodeGen/ubsan-function.cpp b/clang/test/CodeGen/ubsan-function.cpp new file mode 100644 index 0000000000000..749e6214242cf --- /dev/null +++ b/clang/test/CodeGen/ubsan-function.cpp @@ -0,0 +1,22 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s -fsanitize=function -fno-sanitize-recover=all | FileCheck %s + +// CHECK-LABEL: define void @_Z3funv() #0 prologue <{ i32, i32 }> <{ i32 846595819, i32 trunc (i64 sub (i64 ptrtoint (i8** @0 to i64), i64 ptrtoint (void ()* @_Z3funv to i64)) to i32) }> { +void fun() {} + +// CHECK-LABEL: define void @_Z6callerPFvvE(void ()* %f) +// CHECK: getelementptr <{ i32, i32 }>, <{ i32, i32 }>* {{.*}}, i32 0, i32 0, !nosanitize +// CHECK: load i32, i32* {{.*}}, align {{.*}}, !nosanitize +// CHECK: icmp eq i32 {{.*}}, 846595819, !nosanitize +// CHECK: br i1 {{.*}}, label %[[LABEL1:.*]], label %[[LABEL4:.*]], !nosanitize +// CHECK: [[LABEL1]]: +// CHECK: getelementptr <{ i32, i32 }>, <{ i32, i32 }>* {{.*}}, i32 0, i32 1, !nosanitize +// CHECK: load i32, i32* {{.*}}, align {{.*}}, !nosanitize +// CHECK: icmp eq i8* {{.*}}, bitcast ({ i8*, i8* }* @_ZTIFvvE to i8*), !nosanitize +// CHECK: br i1 {{.*}}, label %[[LABEL3:.*]], label %[[LABEL2:[^,]*]], {{.*}}!nosanitize +// CHECK: [[LABEL2]]: +// CHECK: call void @__ubsan_handle_function_type_mismatch_v1_abort(i8* {{.*}}, i64 {{.*}}, i64 {{.*}}, i64 {{.*}}) #{{.*}}, !nosanitize +// CHECK-NOT: unreachable +// CHECK: br label %[[LABEL3]], !nosanitize +// CHECK: [[LABEL3]]: +// CHECK: br label %[[LABEL4]], !nosanitize +void caller(void (*f)()) { f(); } diff --git a/clang/test/Driver/fsanitize.c b/clang/test/Driver/fsanitize.c index 01367c7e67881..2896eda5aaa0f 100644 --- a/clang/test/Driver/fsanitize.c +++ b/clang/test/Driver/fsanitize.c @@ -759,9 +759,12 @@ // CHECK-TSAN-MINIMAL: error: invalid argument '-fsanitize-minimal-runtime' not allowed with '-fsanitize=thread' // RUN: %clang -target x86_64-linux-gnu -fsanitize=undefined -fsanitize-minimal-runtime %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-MINIMAL -// CHECK-UBSAN-MINIMAL: "-fsanitize={{((signed-integer-overflow|integer-divide-by-zero|shift-base|shift-exponent|unreachable|return|vla-bound|alignment|null|pointer-overflow|float-cast-overflow|array-bounds|enum|bool|builtin|returns-nonnull-attribute|nonnull-attribute|function),?){18}"}} +// CHECK-UBSAN-MINIMAL: "-fsanitize={{((signed-integer-overflow|integer-divide-by-zero|shift-base|shift-exponent|unreachable|return|vla-bound|alignment|null|pointer-overflow|float-cast-overflow|array-bounds|enum|bool|builtin|returns-nonnull-attribute|nonnull-attribute),?){17}"}} // CHECK-UBSAN-MINIMAL: "-fsanitize-minimal-runtime" +// RUN: %clang -target x86_64-linux-gnu -fsanitize=undefined -fsanitize=function -fsanitize-minimal-runtime %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-FUNCTION-MINIMAL +// CHECK-UBSAN-FUNCTION-MINIMAL: error: invalid argument '-fsanitize=function' not allowed with '-fsanitize-minimal-runtime' + // RUN: %clang -target x86_64-linux-gnu -fsanitize=undefined -fsanitize=vptr -fsanitize-minimal-runtime %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-VPTR-MINIMAL // CHECK-UBSAN-VPTR-MINIMAL: error: invalid argument '-fsanitize=vptr' not allowed with '-fsanitize-minimal-runtime' diff --git a/compiler-rt/lib/ubsan/ubsan_handlers_cxx.cc b/compiler-rt/lib/ubsan/ubsan_handlers_cxx.cc index 839bba3691f91..9c324cc19a11f 100644 --- a/compiler-rt/lib/ubsan/ubsan_handlers_cxx.cc +++ b/compiler-rt/lib/ubsan/ubsan_handlers_cxx.cc @@ -185,18 +185,17 @@ static bool handleFunctionTypeMismatch(FunctionTypeMismatchData *Data, return true; } -void __ubsan_handle_function_type_mismatch(FunctionTypeMismatchData *Data, - ValueHandle Function, - ValueHandle calleeRTTI, - ValueHandle fnRTTI) { +void __ubsan_handle_function_type_mismatch_v1(FunctionTypeMismatchData *Data, + ValueHandle Function, + ValueHandle calleeRTTI, + ValueHandle fnRTTI) { GET_REPORT_OPTIONS(false); handleFunctionTypeMismatch(Data, Function, calleeRTTI, fnRTTI, Opts); } -void __ubsan_handle_function_type_mismatch_abort(FunctionTypeMismatchData *Data, - ValueHandle Function, - ValueHandle calleeRTTI, - ValueHandle fnRTTI) { +void __ubsan_handle_function_type_mismatch_v1_abort( + FunctionTypeMismatchData *Data, ValueHandle Function, + ValueHandle calleeRTTI, ValueHandle fnRTTI) { GET_REPORT_OPTIONS(true); if (handleFunctionTypeMismatch(Data, Function, calleeRTTI, fnRTTI, Opts)) Die(); diff --git a/compiler-rt/lib/ubsan/ubsan_handlers_cxx.h b/compiler-rt/lib/ubsan/ubsan_handlers_cxx.h index be2345dc166eb..f7b9fc54f4724 100644 --- a/compiler-rt/lib/ubsan/ubsan_handlers_cxx.h +++ b/compiler-rt/lib/ubsan/ubsan_handlers_cxx.h @@ -40,14 +40,15 @@ struct FunctionTypeMismatchData { }; extern "C" SANITIZER_INTERFACE_ATTRIBUTE void -__ubsan_handle_function_type_mismatch(FunctionTypeMismatchData *Data, - ValueHandle Val, ValueHandle calleeRTTI, - ValueHandle fnRTTI); +__ubsan_handle_function_type_mismatch_v1(FunctionTypeMismatchData *Data, + ValueHandle Val, + ValueHandle calleeRTTI, + ValueHandle fnRTTI); extern "C" SANITIZER_INTERFACE_ATTRIBUTE void -__ubsan_handle_function_type_mismatch_abort(FunctionTypeMismatchData *Data, - ValueHandle Val, - ValueHandle calleeRTTI, - ValueHandle fnRTTI); +__ubsan_handle_function_type_mismatch_v1_abort(FunctionTypeMismatchData *Data, + ValueHandle Val, + ValueHandle calleeRTTI, + ValueHandle fnRTTI); } #endif // UBSAN_HANDLERS_H diff --git a/compiler-rt/lib/ubsan/ubsan_interface.inc b/compiler-rt/lib/ubsan/ubsan_interface.inc index 3eb07b7b9fe35..1e44bc2171ded 100644 --- a/compiler-rt/lib/ubsan/ubsan_interface.inc +++ b/compiler-rt/lib/ubsan/ubsan_interface.inc @@ -21,8 +21,8 @@ INTERFACE_FUNCTION(__ubsan_handle_dynamic_type_cache_miss) INTERFACE_FUNCTION(__ubsan_handle_dynamic_type_cache_miss_abort) INTERFACE_FUNCTION(__ubsan_handle_float_cast_overflow) INTERFACE_FUNCTION(__ubsan_handle_float_cast_overflow_abort) -INTERFACE_FUNCTION(__ubsan_handle_function_type_mismatch) -INTERFACE_FUNCTION(__ubsan_handle_function_type_mismatch_abort) +INTERFACE_FUNCTION(__ubsan_handle_function_type_mismatch_v1) +INTERFACE_FUNCTION(__ubsan_handle_function_type_mismatch_v1_abort) INTERFACE_FUNCTION(__ubsan_handle_implicit_conversion) INTERFACE_FUNCTION(__ubsan_handle_implicit_conversion_abort) INTERFACE_FUNCTION(__ubsan_handle_invalid_builtin) diff --git a/compiler-rt/test/ubsan/TestCases/TypeCheck/Function/function.cpp b/compiler-rt/test/ubsan/TestCases/TypeCheck/Function/function.cpp index 31baa2af8ca9d..07402fdcd7085 100644 --- a/compiler-rt/test/ubsan/TestCases/TypeCheck/Function/function.cpp +++ b/compiler-rt/test/ubsan/TestCases/TypeCheck/Function/function.cpp @@ -1,11 +1,53 @@ -// RUN: %clangxx -std=c++17 -fsanitize=function %s -O3 -g -o %t -// RUN: %run %t 2>&1 | FileCheck %s +// RUN: %clangxx -DDETERMINE_UNIQUE %s -o %t-unique +// RUN: %clangxx -std=c++17 -fsanitize=function %s -O3 -g -DSHARED_LIB -fPIC -shared -o %t-so.so +// RUN: %clangxx -std=c++17 -fsanitize=function %s -O3 -g -o %t %t-so.so +// RUN: %run %t 2>&1 | FileCheck %s --check-prefix=CHECK $(%run %t-unique UNIQUE) // Verify that we can disable symbolization if needed: -// RUN: %env_ubsan_opts=symbolize=0 %run %t 2>&1 | FileCheck %s --check-prefix=NOSYM +// RUN: %env_ubsan_opts=symbolize=0 %run %t 2>&1 | FileCheck %s --check-prefix=NOSYM $(%run %t-unique NOSYM-UNIQUE) // XFAIL: windows-msvc // Unsupported function flag // UNSUPPORTED: openbsd +#ifdef DETERMINE_UNIQUE + +#include + +#include "../../../../../lib/sanitizer_common/sanitizer_platform.h" + +int main(int, char **argv) { + if (!SANITIZER_NON_UNIQUE_TYPEINFO) + std::cout << "--check-prefix=" << argv[1]; +} + +#else + +struct Shared {}; +using FnShared = void (*)(Shared *); +FnShared getShared(); + +struct __attribute__((visibility("hidden"))) Hidden {}; +using FnHidden = void (*)(Hidden *); +FnHidden getHidden(); + +namespace { +struct Private {}; +} // namespace +using FnPrivate = void (*)(void *); +FnPrivate getPrivate(); + +#ifdef SHARED_LIB + +void fnShared(Shared *) {} +FnShared getShared() { return fnShared; } + +void fnHidden(Hidden *) {} +FnHidden getHidden() { return fnHidden; } + +void fnPrivate(Private *) {} +FnPrivate getPrivate() { return reinterpret_cast(fnPrivate); } + +#else + #include void f() {} @@ -64,12 +106,31 @@ void check_noexcept_calls() { p2(0); } +void check_cross_dso() { + getShared()(nullptr); + + // UNIQUE: function.cpp:[[@LINE+2]]:3: runtime error: call to function fnHidden(Hidden*) through pointer to incorrect function type 'void (*)(Hidden *)' + // NOSYM-UNIQUE: function.cpp:[[@LINE+1]]:3: runtime error: call to function (unknown) through pointer to incorrect function type 'void (*)(Hidden *)' + getHidden()(nullptr); + + // TODO: Unlike GCC, Clang fails to prefix the typeinfo name for the function + // type with "*", so this erroneously only fails for "*UNIQUE": + // UNIQUE: function.cpp:[[@LINE+2]]:3: runtime error: call to function fnPrivate((anonymous namespace)::Private*) through pointer to incorrect function type 'void (*)((anonymous namespace)::Private *)' + // NOSYM-UNIQUE: function.cpp:[[@LINE+1]]:3: runtime error: call to function (unknown) through pointer to incorrect function type 'void (*)((anonymous namespace)::Private *)' + reinterpret_cast(getPrivate())(nullptr); +} + int main(void) { make_valid_call(); make_invalid_call(); check_noexcept_calls(); + check_cross_dso(); // Check that no more errors will be printed. // CHECK-NOT: runtime error: call to function // NOSYM-NOT: runtime error: call to function make_invalid_call(); } + +#endif + +#endif From d0ac1888aab490589788bd51a9f44f7745dc5819 Mon Sep 17 00:00:00 2001 From: Jan Kratochvil Date: Tue, 16 Jul 2019 06:34:44 +0000 Subject: [PATCH 214/451] [lldb] Handle EOF from `lldb-vscode` Sometimes (when running lldb-vscode under strace) I get: read(0, "", 16) = 0 read(0, "", 16) = 0 read(0, "", 16) = 0 ... With this patch testcases finish properly even with strace: read(0, "", 16) = 0 futex(0x1346508, FUTEX_WAKE_PRIVATE, 2147483647) = 0 stat("", 0x7ffe8f2634c8) = -1 ENOENT (No such file or directory) --- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_KILLED, si_pid=9124, si_uid=1001, si_status=SIGINT, si_utime=1, si_stime=0} --- close(4) = 0 exit_group(0) = ? +++ exited with 0 +++ Differential Revision: https://reviews.llvm.org/D64698 llvm-svn: 366187 --- lldb/tools/lldb-vscode/IOStream.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lldb/tools/lldb-vscode/IOStream.cpp b/lldb/tools/lldb-vscode/IOStream.cpp index e07ae079f7ede..4b11b90b4c2e8 100644 --- a/lldb/tools/lldb-vscode/IOStream.cpp +++ b/lldb/tools/lldb-vscode/IOStream.cpp @@ -101,6 +101,11 @@ bool InputStream::read_full(std::ofstream *log, size_t length, else bytes_read = ::read(descriptor.m_fd, ptr, length); + if (bytes_read == 0) { + if (log) + *log << "End of file (EOF) reading from input file.\n"; + return false; + } if (bytes_read < 0) { int reason = 0; #if defined(_WIN32) From 860f7ec05871d36b519ba31295670963f2fd3f95 Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Tue, 16 Jul 2019 06:53:06 +0000 Subject: [PATCH 215/451] [DWARF] Simplify DWARFAttribute. NFC. The first argument in the constructor was ignored, and the remaining arguments were always passed as their defaults. Differential Revision: https://reviews.llvm.org/D64407 llvm-svn: 366188 --- llvm/include/llvm/DebugInfo/DWARF/DWARFAttribute.h | 12 +----------- llvm/lib/DebugInfo/DWARF/DWARFDie.cpp | 4 ++-- 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFAttribute.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFAttribute.h index 96e622ccc0331..c8ad19ad6bf64 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFAttribute.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFAttribute.h @@ -27,13 +27,10 @@ struct DWARFAttribute { /// The debug info/types section byte size of the data for this attribute. uint32_t ByteSize = 0; /// The attribute enumeration of this attribute. - dwarf::Attribute Attr; + dwarf::Attribute Attr = dwarf::Attribute(0); /// The form and value for this attribute. DWARFFormValue Value; - DWARFAttribute(uint32_t O, dwarf::Attribute A = dwarf::Attribute(0), - dwarf::Form F = dwarf::Form(0)) : Attr(A), Value(F) {} - bool isValid() const { return Offset != 0 && Attr != dwarf::Attribute(0); } @@ -45,13 +42,6 @@ struct DWARFAttribute { /// Identifies DWARF attributes that may contain a reference to a /// DWARF expression. static bool mayHaveLocationDescription(dwarf::Attribute Attr); - - void clear() { - Offset = 0; - ByteSize = 0; - Attr = dwarf::Attribute(0); - Value = DWARFFormValue(); - } }; } // end namespace llvm diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp index 6212842983669..d638dc4239f47 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -663,7 +663,7 @@ iterator_range DWARFDie::attributes() const { } DWARFDie::attribute_iterator::attribute_iterator(DWARFDie D, bool End) - : Die(D), AttrValue(0), Index(0) { + : Die(D), Index(0) { auto AbbrDecl = Die.getAbbreviationDeclarationPtr(); assert(AbbrDecl && "Must have abbreviation declaration"); if (End) { @@ -693,7 +693,7 @@ void DWARFDie::attribute_iterator::updateForIndex( AttrValue.ByteSize = ParseOffset - AttrValue.Offset; } else { assert(Index == NumAttrs && "Indexes should be [0, NumAttrs) only"); - AttrValue.clear(); + AttrValue = {}; } } From 74c350af2181cf21085f69bd2d9bcf4d6d2dc03c Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Tue, 16 Jul 2019 06:56:10 +0000 Subject: [PATCH 216/451] [DWARF] Fix an incorrect format specifier. This adjusts the format specifier because PCOffset is uint16_t. Differential Revision: https://reviews.llvm.org/D64620 llvm-svn: 366189 --- llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index 8a621084710e4..77b4688c23465 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -758,7 +758,7 @@ Error DWARFDebugLine::LineTable::parse( State.Row.Address.Address += PCOffset; if (OS) *OS - << format(" (0x%16.16" PRIx64 ")", PCOffset); + << format(" (0x%4.4" PRIx16 ")", PCOffset); } break; From f48bc0181232be2499afe84f478bcda5699931af Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Tue, 16 Jul 2019 07:01:08 +0000 Subject: [PATCH 217/451] [DWARF] Fix the reserved values for unit length in DWARFDebugLine. The DWARF3 documentation had inconsistency concerning the reserved range for unit length values. The issue was fixed in DWARF4. Differential Revision: https://reviews.llvm.org/D64622 llvm-svn: 366190 --- llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp | 4 ++-- .../unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index 77b4688c23465..3ee5652a0eb22 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -304,7 +304,7 @@ Error DWARFDebugLine::Prologue::parse(const DWARFDataExtractor &DebugLineData, if (TotalLength == UINT32_MAX) { FormParams.Format = dwarf::DWARF64; TotalLength = DebugLineData.getU64(OffsetPtr); - } else if (TotalLength >= 0xffffff00) { + } else if (TotalLength >= 0xfffffff0) { return createStringError(errc::invalid_argument, "parsing line table prologue at offset 0x%8.8" PRIx64 " unsupported reserved unit length found of value 0x%8.8" PRIx64, @@ -1091,7 +1091,7 @@ DWARFDebugLine::SectionParser::SectionParser(DWARFDataExtractor &Data, } bool DWARFDebugLine::Prologue::totalLengthIsValid() const { - return TotalLength == 0xffffffff || TotalLength < 0xffffff00; + return TotalLength == 0xffffffff || TotalLength < 0xfffffff0; } DWARFDebugLine::LineTable DWARFDebugLine::SectionParser::parseNext( diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp index fef8dc50fb467..ce7c252d83ee8 100644 --- a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp +++ b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp @@ -291,13 +291,13 @@ TEST_F(DebugLineBasicFixture, ErrorForReservedLength) { return; LineTable < = Gen->addLineTable(); - LT.setCustomPrologue({{0xffffff00, LineTable::Long}}); + LT.setCustomPrologue({{0xfffffff0, LineTable::Long}}); generate(); checkGetOrParseLineTableEmitsError( "parsing line table prologue at offset 0x00000000 unsupported reserved " - "unit length found of value 0xffffff00"); + "unit length found of value 0xfffffff0"); } TEST_F(DebugLineBasicFixture, ErrorForLowVersion) { @@ -532,7 +532,7 @@ TEST_F(DebugLineBasicFixture, ParserMovesToEndForBadLengthWhenParsing) { return; LineTable < = Gen->addLineTable(); - LT.setCustomPrologue({{0xffffff00, LineTable::Long}}); + LT.setCustomPrologue({{0xfffffff0, LineTable::Long}}); Gen->addLineTable(); generate(); @@ -544,7 +544,7 @@ TEST_F(DebugLineBasicFixture, ParserMovesToEndForBadLengthWhenParsing) { EXPECT_FALSE(Recoverable); checkError("parsing line table prologue at offset 0x00000000 unsupported " - "reserved unit length found of value 0xffffff00", + "reserved unit length found of value 0xfffffff0", std::move(Unrecoverable)); } @@ -553,7 +553,7 @@ TEST_F(DebugLineBasicFixture, ParserMovesToEndForBadLengthWhenSkipping) { return; LineTable < = Gen->addLineTable(); - LT.setCustomPrologue({{0xffffff00, LineTable::Long}}); + LT.setCustomPrologue({{0xfffffff0, LineTable::Long}}); Gen->addLineTable(); generate(); @@ -564,7 +564,7 @@ TEST_F(DebugLineBasicFixture, ParserMovesToEndForBadLengthWhenSkipping) { EXPECT_TRUE(Parser.done()); checkError("parsing line table prologue at offset 0x00000000 unsupported " - "reserved unit length found of value 0xffffff00", + "reserved unit length found of value 0xfffffff0", std::move(Unrecoverable)); } From a54c46674efbf045d661831d727d4c48be26a7d1 Mon Sep 17 00:00:00 2001 From: Zi Xuan Wu Date: Tue, 16 Jul 2019 07:54:47 +0000 Subject: [PATCH 218/451] [NFC][PowerPC] Add test case for D64195 llvm-svn: 366191 --- .../CodeGen/PowerPC/float-load-store-pair.ll | 106 ++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 llvm/test/CodeGen/PowerPC/float-load-store-pair.ll diff --git a/llvm/test/CodeGen/PowerPC/float-load-store-pair.ll b/llvm/test/CodeGen/PowerPC/float-load-store-pair.ll new file mode 100644 index 0000000000000..6a8bd8e7a57b5 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/float-load-store-pair.ll @@ -0,0 +1,106 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs < %s -mcpu=pwr9 -mtriple=powerpc64le-ibm-linux| FileCheck %s + +; This file verifies that for a given floating point load / store pair, +; if the load value isn't used by any other operations, +; then consider transforming the pair to integer load / store operations + +@a1 = local_unnamed_addr global double 0.000000e+00, align 8 +@a2 = local_unnamed_addr global double 0.000000e+00, align 8 +@a3 = local_unnamed_addr global double 0.000000e+00, align 8 +@a4 = local_unnamed_addr global double 0.000000e+00, align 8 +@a5 = local_unnamed_addr global double 0.000000e+00, align 8 +@a6 = local_unnamed_addr global double 0.000000e+00, align 8 +@a7 = local_unnamed_addr global double 0.000000e+00, align 8 +@a8 = local_unnamed_addr global double 0.000000e+00, align 8 +@a9 = local_unnamed_addr global double 0.000000e+00, align 8 +@a10 = local_unnamed_addr global double 0.000000e+00, align 8 +@a11 = local_unnamed_addr global double 0.000000e+00, align 8 +@a12 = local_unnamed_addr global double 0.000000e+00, align 8 +@a13 = local_unnamed_addr global double 0.000000e+00, align 8 +@a14 = local_unnamed_addr global double 0.000000e+00, align 8 +@a15 = local_unnamed_addr global double 0.000000e+00, align 8 +@a16 = local_unnamed_addr global ppc_fp128 0xM00000000000000000000000000000000, align 16 +@a17 = local_unnamed_addr global fp128 0xL00000000000000000000000000000000, align 16 + +; Because this test function is trying to pass float argument by stack, +; so the fpr is only used to load/store float argument +define signext i32 @test() { +; CHECK-LABEL: test: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: std 0, 16(1) +; CHECK-NEXT: stdu 1, -192(1) +; CHECK-NEXT: .cfi_def_cfa_offset 192 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: addis 3, 2, a1@toc@ha +; CHECK-NEXT: lfd 1, a1@toc@l(3) +; CHECK-NEXT: addis 3, 2, a2@toc@ha +; CHECK-NEXT: lfd 2, a2@toc@l(3) +; CHECK-NEXT: addis 3, 2, a3@toc@ha +; CHECK-NEXT: lfd 3, a3@toc@l(3) +; CHECK-NEXT: addis 3, 2, a4@toc@ha +; CHECK-NEXT: lfd 4, a4@toc@l(3) +; CHECK-NEXT: addis 3, 2, a5@toc@ha +; CHECK-NEXT: lfd 5, a5@toc@l(3) +; CHECK-NEXT: addis 3, 2, a6@toc@ha +; CHECK-NEXT: lfd 6, a6@toc@l(3) +; CHECK-NEXT: addis 3, 2, a7@toc@ha +; CHECK-NEXT: lfd 7, a7@toc@l(3) +; CHECK-NEXT: addis 3, 2, a8@toc@ha +; CHECK-NEXT: lfd 8, a8@toc@l(3) +; CHECK-NEXT: addis 3, 2, a9@toc@ha +; CHECK-NEXT: lfd 9, a9@toc@l(3) +; CHECK-NEXT: addis 3, 2, a10@toc@ha +; CHECK-NEXT: lfd 10, a10@toc@l(3) +; CHECK-NEXT: addis 3, 2, a11@toc@ha +; CHECK-NEXT: lfd 11, a11@toc@l(3) +; CHECK-NEXT: addis 3, 2, a12@toc@ha +; CHECK-NEXT: lfd 12, a12@toc@l(3) +; CHECK-NEXT: addis 3, 2, a13@toc@ha +; CHECK-NEXT: lfd 13, a13@toc@l(3) +; CHECK-NEXT: addis 3, 2, a14@toc@ha +; CHECK-NEXT: lfd 0, a14@toc@l(3) +; CHECK-NEXT: addis 3, 2, a15@toc@ha +; CHECK-NEXT: addis 4, 2, a17@toc@ha +; CHECK-NEXT: addi 4, 4, a17@toc@l +; CHECK-NEXT: lxsd 2, a15@toc@l(3) +; CHECK-NEXT: addis 3, 2, a16@toc@ha +; CHECK-NEXT: addi 3, 3, a16@toc@l +; CHECK-NEXT: lxvx 36, 0, 4 +; CHECK-NEXT: lxvx 35, 0, 3 +; CHECK-NEXT: li 3, 168 +; CHECK-NEXT: stxvx 36, 1, 3 +; CHECK-NEXT: li 3, 152 +; CHECK-NEXT: stxvx 35, 1, 3 +; CHECK-NEXT: stxsd 2, 144(1) +; CHECK-NEXT: stfd 0, 136(1) +; CHECK-NEXT: bl _Z3fooddddddddddddddd +; CHECK-NEXT: nop +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: addi 1, 1, 192 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr +%1 = load double, double* @a1, align 8 +%2 = load double, double* @a2, align 8 +%3 = load double, double* @a3, align 8 +%4 = load double, double* @a4, align 8 +%5 = load double, double* @a5, align 8 +%6 = load double, double* @a6, align 8 +%7 = load double, double* @a7, align 8 +%8 = load double, double* @a8, align 8 +%9 = load double, double* @a9, align 8 +%10 = load double, double* @a10, align 8 +%11 = load double, double* @a11, align 8 +%12 = load double, double* @a12, align 8 +%13 = load double, double* @a13, align 8 +%14 = load double, double* @a14, align 8 +%15 = load double, double* @a15, align 8 +%16 = load ppc_fp128, ppc_fp128* @a16, align 16 +%17 = load fp128, fp128* @a17, align 16 +tail call void @_Z3fooddddddddddddddd(double %1, double %2, double %3, double %4, double %5, double %6, double %7, double %8, double %9, double %10, double %11, double %12, double %13, double %14, double %15, ppc_fp128 %16, fp128 %17) +ret i32 0 +} + +declare void @_Z3fooddddddddddddddd(double, double, double, double, double, double, double, double, double, double, double, double, double, double, double, ppc_fp128, fp128) From 33fdf82dda0a687133016cc41cffd4ece6693d69 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 16 Jul 2019 08:08:17 +0000 Subject: [PATCH 219/451] [WebAssembly] Rename variale references in comments after VariableName -> variableName change llvm-svn: 366192 --- lld/wasm/InputFiles.cpp | 2 +- lld/wasm/MarkLive.cpp | 4 ++-- lld/wasm/SymbolTable.h | 2 +- lld/wasm/SyntheticSections.cpp | 4 ++-- lld/wasm/Writer.cpp | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/lld/wasm/InputFiles.cpp b/lld/wasm/InputFiles.cpp index b5233cf09ed00..b4945469f931c 100644 --- a/lld/wasm/InputFiles.cpp +++ b/lld/wasm/InputFiles.cpp @@ -328,7 +328,7 @@ void ObjFile::parse(bool ignoreComdats) { for (const WasmEvent &e : wasmObj->events()) events.emplace_back(make(types[e.Type.SigIndex], e, this)); - // Populate `Symbols` based on the WasmSymbols in the object. + // Populate `Symbols` based on the symbols in the object. symbols.reserve(wasmObj->getNumberOfSymbols()); for (const SymbolRef &sym : wasmObj->symbols()) { const WasmSymbol &wasmSym = wasmObj->getWasmSymbol(sym.getRawDataRefImpl()); diff --git a/lld/wasm/MarkLive.cpp b/lld/wasm/MarkLive.cpp index 703daf27e01cf..9399156b748ce 100644 --- a/lld/wasm/MarkLive.cpp +++ b/lld/wasm/MarkLive.cpp @@ -46,9 +46,9 @@ void lld::wasm::markLive() { if (InputChunk *chunk = sym->getChunk()) q.push_back(chunk); - // The ctor functions are all referenced by the synthetic CallCtors + // The ctor functions are all referenced by the synthetic callCtors // function. However, this function does not contain relocations so we - // have to manually mark the ctors as live if CallCtors itself is live. + // have to manually mark the ctors as live if callCtors itself is live. if (sym == WasmSym::callCtors) { if (config->passiveSegments) enqueue(WasmSym::initMemory); diff --git a/lld/wasm/SymbolTable.h b/lld/wasm/SymbolTable.h index 33f02ddaf9101..530d5e864103d 100644 --- a/lld/wasm/SymbolTable.h +++ b/lld/wasm/SymbolTable.h @@ -98,7 +98,7 @@ class SymbolTable { InputFunction *replaceWithUnreachable(Symbol *sym, const WasmSignature &sig, StringRef debugName); - // Maps symbol names to index into the SymVector. -1 means that symbols + // Maps symbol names to index into the symVector. -1 means that symbols // is to not yet in the vector but it should have tracing enabled if it is // ever added. llvm::DenseMap symMap; diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp index 020d2c0b99265..6d5d14ff40e64 100644 --- a/lld/wasm/SyntheticSections.cpp +++ b/lld/wasm/SyntheticSections.cpp @@ -474,8 +474,8 @@ void NameSection::writeBody() { SubSection sub(WASM_NAMES_FUNCTION); writeUleb128(sub.os, numNames(), "name count"); - // Names must appear in function index order. As it happens ImportedSymbols - // and InputFunctions are numbered in order with imported functions coming + // Names must appear in function index order. As it happens importedSymbols + // and inputFunctions are numbered in order with imported functions coming // first. for (const Symbol *s : out.importSec->importedSymbols) { if (auto *f = dyn_cast(s)) { diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp index 5d81f2305c9d9..77a29a2d99ef4 100644 --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -191,7 +191,7 @@ void Writer::writeSections() { // to each of the input data sections as well as the explicit stack region. // The default memory layout is as follows, from low to high. // -// - initialized data (starting at Config->GlobalBase) +// - initialized data (starting at Config->globalBase) // - BSS data (not currently implemented in llvm) // - explicit stack (Config->ZStackSize) // - heap start / unallocated From 2e2038b6470d4fdcdfd29bd111e67f12f688cef0 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 16 Jul 2019 08:26:38 +0000 Subject: [PATCH 220/451] [COFF] Rename variale references in comments after VariableName -> variableName change llvm-svn: 366193 --- lld/COFF/Chunks.cpp | 8 ++++---- lld/COFF/Config.h | 2 +- lld/COFF/DLL.cpp | 2 +- lld/COFF/Driver.cpp | 8 ++++---- lld/COFF/DriverUtils.cpp | 2 +- lld/COFF/InputFiles.cpp | 8 ++++---- lld/COFF/InputFiles.h | 2 +- lld/COFF/PDB.cpp | 6 +++--- lld/COFF/SymbolTable.cpp | 2 +- lld/COFF/SymbolTable.h | 2 +- lld/COFF/Writer.cpp | 6 +++--- 11 files changed, 24 insertions(+), 24 deletions(-) diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp index 374751ad510ac..0e43d2b478b40 100644 --- a/lld/COFF/Chunks.cpp +++ b/lld/COFF/Chunks.cpp @@ -31,10 +31,10 @@ namespace coff { SectionChunk::SectionChunk(ObjFile *f, const coff_section *h) : Chunk(SectionKind), file(f), header(h), repl(this) { - // Initialize Relocs. + // Initialize relocs. setRelocs(file->getCOFFObj()->getRelocations(header)); - // Initialize SectionName. + // Initialize sectionName. StringRef sectionName; if (Expected e = file->getCOFFObj()->getSectionName(header)) sectionName = *e; @@ -218,7 +218,7 @@ void applyArm64Addr(uint8_t *off, uint64_t s, uint64_t p, int shift) { // Update the immediate field in a AARCH64 ldr, str, and add instruction. // Optionally limit the range of the written immediate by one or more bits -// (RangeLimit). +// (rangeLimit). void applyArm64Imm(uint8_t *off, uint64_t imm, uint32_t rangeLimit) { uint32_t orig = read32le(off); imm += (orig >> 10) & 0xFFF; @@ -561,7 +561,7 @@ void SectionChunk::getRuntimePseudoRelocs( toString(file)); continue; } - // SizeInBits is used to initialize the Flags field; currently no + // sizeInBits is used to initialize the Flags field; currently no // other flags are defined. res.emplace_back( RuntimePseudoReloc(target, this, rel.VirtualAddress, sizeInBits)); diff --git a/lld/COFF/Config.h b/lld/COFF/Config.h index f7c8097147654..e378b6fc72484 100644 --- a/lld/COFF/Config.h +++ b/lld/COFF/Config.h @@ -49,7 +49,7 @@ struct Export { // If an export is a form of /export:foo=dllname.bar, that means // that foo should be exported as an alias to bar in the DLL. - // ForwardTo is set to "dllname.bar" part. Usually empty. + // forwardTo is set to "dllname.bar" part. Usually empty. StringRef forwardTo; StringChunk *forwardChunk = nullptr; diff --git a/lld/COFF/DLL.cpp b/lld/COFF/DLL.cpp index 932e7d64553ec..40d1f463aa3ff 100644 --- a/lld/COFF/DLL.cpp +++ b/lld/COFF/DLL.cpp @@ -567,7 +567,7 @@ void IdataContents::create() { // Create .idata contents for each DLL. for (std::vector &syms : v) { // Create lookup and address tables. If they have external names, - // we need to create HintName chunks to store the names. + // we need to create hintName chunks to store the names. // If they don't (if they are import-by-ordinals), we store only // ordinal values to the table. size_t base = lookups.size(); diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index bb5c0bb711b99..6cfd83ab96b6c 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -936,7 +936,7 @@ static void findKeepUniqueSections() { } } -// link.exe replaces each %foo% in AltPath with the contents of environment +// link.exe replaces each %foo% in altPath with the contents of environment // variable foo, and adds the two magic env vars _PDB (expands to the basename // of pdb's output path) and _EXT (expands to the extension of the output // binary). @@ -952,9 +952,9 @@ static void parsePDBAltPath(StringRef altPath) { binaryExtension = binaryExtension.substr(1); // %_EXT% does not include '.'. // Invariant: - // +--------- Cursor ('a...' might be the empty string). - // | +----- FirstMark - // | | +- SecondMark + // +--------- cursor ('a...' might be the empty string). + // | +----- firstMark + // | | +- secondMark // v v v // a...%...%... size_t cursor = 0; diff --git a/lld/COFF/DriverUtils.cpp b/lld/COFF/DriverUtils.cpp index edc3b3707c7b6..4360ac23b2622 100644 --- a/lld/COFF/DriverUtils.cpp +++ b/lld/COFF/DriverUtils.cpp @@ -345,7 +345,7 @@ class TemporaryFile { // so it is safe to remove the file immediately after this function // is called (you cannot remove an opened file on Windows.) std::unique_ptr getMemoryBuffer() { - // IsVolatileSize=true forces MemoryBuffer to not use mmap(). + // IsVolatile=true forces MemoryBuffer to not use mmap(). return CHECK(MemoryBuffer::getFile(path, /*FileSize=*/-1, /*RequiresNullTerminator=*/false, /*IsVolatile=*/true), diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp index 9936a6f69ffed..c00d5c5b494ef 100644 --- a/lld/COFF/InputFiles.cpp +++ b/lld/COFF/InputFiles.cpp @@ -556,8 +556,8 @@ Optional ObjFile::createDefined( // The second symbol entry has the name of the comdat symbol, called the // "comdat leader". // When this function is called for the first symbol entry of a comdat, - // it sets ComdatDefs and returns None, and when it's called for the second - // symbol entry it reads ComdatDefs and then sets it back to nullptr. + // it sets comdatDefs and returns None, and when it's called for the second + // symbol entry it reads comdatDefs and then sets it back to nullptr. // Handle comdat leader. if (const coff_aux_section_definition *def = comdatDefs[sectionNumber]) { @@ -626,7 +626,7 @@ ArrayRef ObjFile::getDebugSection(StringRef secName) { // even if the TU was compiled with no debug info. At least two records are // always there. S_OBJNAME stores a 32-bit signature, which is loaded into the // PCHSignature member. S_COMPILE3 stores compile-time cmd-line flags. This is -// currently used to initialize the HotPatchable member. +// currently used to initialize the hotPatchable member. void ObjFile::initializeFlags() { ArrayRef data = getDebugSection(".debug$S"); if (data.empty()) @@ -764,7 +764,7 @@ void ImportFile::parse() { impSym = symtab->addImportData(impName, this); // If this was a duplicate, we logged an error but may continue; - // in this case, ImpSym is nullptr. + // in this case, impSym is nullptr. if (!impSym) return; diff --git a/lld/COFF/InputFiles.h b/lld/COFF/InputFiles.h index 6c84ceec13dbd..dfad9814a397d 100644 --- a/lld/COFF/InputFiles.h +++ b/lld/COFF/InputFiles.h @@ -119,7 +119,7 @@ class ObjFile : public InputFile { ArrayRef getDebugSection(StringRef secName); - // Returns a Symbol object for the SymbolIndex'th symbol in the + // Returns a Symbol object for the symbolIndex'th symbol in the // underlying object file. Symbol *getSymbol(uint32_t symbolIndex) { return symbols[symbolIndex]; diff --git a/lld/COFF/PDB.cpp b/lld/COFF/PDB.cpp index d024f7573f3da..a55e5136e040b 100644 --- a/lld/COFF/PDB.cpp +++ b/lld/COFF/PDB.cpp @@ -119,7 +119,7 @@ class PDBLinker { /// /// If the object does not use a type server PDB (compiled with /Z7), we merge /// all the type and item records from the .debug$S stream and fill in the - /// caller-provided ObjectIndexMap. + /// caller-provided objectIndexMap. Expected mergeDebugT(ObjFile *file, CVIndexMap *objectIndexMap); @@ -683,7 +683,7 @@ static void translateIdSymbols(MutableArrayRef &recordData, TypeIndex *ti = reinterpret_cast(content.data() + refs[0].Offset); - // `TI` is the index of a FuncIdRecord or MemberFuncIdRecord which lives in + // `ti` is the index of a FuncIdRecord or MemberFuncIdRecord which lives in // the IPI stream, whose `FunctionType` member refers to the TPI stream. // Note that LF_FUNC_ID and LF_MEMFUNC_ID have the same record layout, and // in both cases we just need the second type index. @@ -1729,7 +1729,7 @@ static bool findLineTable(const SectionChunk *c, uint32_t addr, if (dbgC->getSectionName() != ".debug$S") continue; - // Build a mapping of SECREL relocations in DbgC that refer to C. + // Build a mapping of SECREL relocations in dbgC that refer to `c`. DenseMap secrels; for (const coff_relocation &r : dbgC->getRelocs()) { if (r.Type != secrelReloc) diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp index 2173c10c1ca56..0aff164ee5677 100644 --- a/lld/COFF/SymbolTable.cpp +++ b/lld/COFF/SymbolTable.cpp @@ -192,7 +192,7 @@ bool SymbolTable::handleMinGWAutomaticImport(Symbol *sym, StringRef name) { // Replace the reference directly to a variable with a reference // to the import address table instead. This obviously isn't right, - // but we mark the symbol as IsRuntimePseudoReloc, and a later pass + // but we mark the symbol as isRuntimePseudoReloc, and a later pass // will add runtime pseudo relocations for every relocation against // this Symbol. The runtime pseudo relocation framework expects the // reference itself to point at the IAT entry. diff --git a/lld/COFF/SymbolTable.h b/lld/COFF/SymbolTable.h index 75bd3933547b0..88f47cbe9e78a 100644 --- a/lld/COFF/SymbolTable.h +++ b/lld/COFF/SymbolTable.h @@ -112,7 +112,7 @@ class SymbolTable { private: /// Inserts symbol if not already present. std::pair insert(StringRef name); - /// Same as insert(Name), but also sets IsUsedInRegularObj. + /// Same as insert(Name), but also sets isUsedInRegularObj. std::pair insert(StringRef name, InputFile *f); std::vector getSymsWithPrefix(StringRef prefix); diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index 46b1af0934ab3..e4b35a5f8beb9 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -335,7 +335,7 @@ void OutputSection::addContributingPartialSection(PartialSection *sec) { } // namespace lld // Check whether the target address S is in range from a relocation -// of type RelType at address P. +// of type relType at address P. static bool isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin) { if (config->machine == ARMNT) { int64_t diff = AbsoluteDifference(s, p + 4) + margin; @@ -427,7 +427,7 @@ static bool createThunks(OutputSection *os, int margin) { // The estimate of the source address P should be pretty accurate, // but we don't know whether the target Symbol address should be - // offset by ThunkSize or not (or by some of ThunksSize but not all of + // offset by thunksSize or not (or by some of thunksSize but not all of // it), giving us some uncertainty once we have added one thunk. uint64_t p = sc->getRVA() + rel.VirtualAddress + thunksSize; @@ -1626,7 +1626,7 @@ void Writer::markSymbolsForRVATable(ObjFile *file, } // Replace the absolute table symbol with a synthetic symbol pointing to -// TableChunk so that we can emit base relocations for it and resolve section +// tableChunk so that we can emit base relocations for it and resolve section // relative relocations. void Writer::maybeAddRVATable(SymbolRVASet tableSymbols, StringRef tableSym, StringRef countSym) { From a3077526277c10bda5395ceebdb6963f9c253651 Mon Sep 17 00:00:00 2001 From: Serge Guelton Date: Tue, 16 Jul 2019 08:56:47 +0000 Subject: [PATCH 221/451] [clang-scan-view] Force utf-8 when handling report (python2 only) Original patch by random human Differential Revision: https://reviews.llvm.org/D64129 llvm-svn: 366194 --- clang/tools/scan-view/share/ScanView.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/tools/scan-view/share/ScanView.py b/clang/tools/scan-view/share/ScanView.py index c40366b2e8490..a6cc7692ffe00 100644 --- a/clang/tools/scan-view/share/ScanView.py +++ b/clang/tools/scan-view/share/ScanView.py @@ -764,11 +764,11 @@ def send_patched_file(self, path, ctype): variables['report'] = m.group(2) try: - f = open(path,'r') + f = open(path,'rb') except IOError: return self.send_404() fs = os.fstat(f.fileno()) - data = f.read() + data = f.read().decode('utf-8') for a,b in kReportReplacements: data = a.sub(b % variables, data) return self.send_string(data, ctype, mtime=fs.st_mtime) From a3e26d1a6cdfb5a3a97750863abb31e1e3fdd66b Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Tue, 16 Jul 2019 09:15:01 +0000 Subject: [PATCH 222/451] [NFC] Test commit: add full stop at end of comment llvm-svn: 366195 --- llvm/lib/Target/ARM/ARMInstrVFP.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td index 93c27e4630053..a0dd25de07eea 100644 --- a/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -857,7 +857,7 @@ multiclass vcvt_inst rm, let Inst{17-16} = rm; - // Encode instruction operands + // Encode instruction operands. let Inst{3-0} = Dm{3-0}; let Inst{5} = Dm{4}; let Inst{8} = 1; From c5a2d7470e10576684bc9a74626d96db8ff069f1 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Tue, 16 Jul 2019 09:27:02 +0000 Subject: [PATCH 223/451] [lldb] Rename Options.inc to CommandOptions.inc [NFC] It seems having two Options.inc files in the same project is giving our custom Xcode project a hard time. This patch renames the new Options.inc to CommandOptions.inc to prevent this conflict. llvm-svn: 366196 --- lldb/source/Commands/CMakeLists.txt | 2 +- lldb/source/Commands/CommandObjectBreakpoint.cpp | 2 +- lldb/source/Commands/CommandObjectHelp.cpp | 2 +- lldb/source/Commands/CommandObjectSettings.cpp | 6 +++--- lldb/source/Commands/CommandObjectTarget.cpp | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/lldb/source/Commands/CMakeLists.txt b/lldb/source/Commands/CMakeLists.txt index f3f96af7dab02..657da8caaf361 100644 --- a/lldb/source/Commands/CMakeLists.txt +++ b/lldb/source/Commands/CMakeLists.txt @@ -1,4 +1,4 @@ -lldb_tablegen(Options.inc -gen-lldb-option-defs +lldb_tablegen(CommandOptions.inc -gen-lldb-option-defs SOURCE Options.td TARGET LLDBOptionsGen) diff --git a/lldb/source/Commands/CommandObjectBreakpoint.cpp b/lldb/source/Commands/CommandObjectBreakpoint.cpp index a661ffc62512c..c33f3834cb13b 100644 --- a/lldb/source/Commands/CommandObjectBreakpoint.cpp +++ b/lldb/source/Commands/CommandObjectBreakpoint.cpp @@ -1249,7 +1249,7 @@ static constexpr OptionDefinition g_breakpoint_list_options[] = { // FIXME: We need to add an "internal" command, and then add this sort of // thing to it. But I need to see it for now, and don't want to wait. #define LLDB_OPTIONS_breakpoint_list -#include "Options.inc" +#include "CommandOptions.inc" }; #pragma mark List diff --git a/lldb/source/Commands/CommandObjectHelp.cpp b/lldb/source/Commands/CommandObjectHelp.cpp index ad53e03121f0d..ab557919d0a06 100644 --- a/lldb/source/Commands/CommandObjectHelp.cpp +++ b/lldb/source/Commands/CommandObjectHelp.cpp @@ -67,7 +67,7 @@ CommandObjectHelp::~CommandObjectHelp() = default; static constexpr OptionDefinition g_help_options[] = { #define LLDB_OPTIONS_help -#include "Options.inc" +#include "CommandOptions.inc" }; llvm::ArrayRef diff --git a/lldb/source/Commands/CommandObjectSettings.cpp b/lldb/source/Commands/CommandObjectSettings.cpp index 057c5de619cfa..55a0002c59973 100644 --- a/lldb/source/Commands/CommandObjectSettings.cpp +++ b/lldb/source/Commands/CommandObjectSettings.cpp @@ -23,7 +23,7 @@ using namespace lldb_private; static constexpr OptionDefinition g_settings_set_options[] = { #define LLDB_OPTIONS_settings_set -#include "Options.inc" +#include "CommandOptions.inc" }; class CommandObjectSettingsSet : public CommandObjectRaw { @@ -312,7 +312,7 @@ class CommandObjectSettingsShow : public CommandObjectParsed { static constexpr OptionDefinition g_settings_write_options[] = { #define LLDB_OPTIONS_settings_write -#include "Options.inc" +#include "CommandOptions.inc" }; class CommandObjectSettingsWrite : public CommandObjectParsed { @@ -435,7 +435,7 @@ class CommandObjectSettingsWrite : public CommandObjectParsed { static constexpr OptionDefinition g_settings_read_options[] = { #define LLDB_OPTIONS_settings_read -#include "Options.inc" +#include "CommandOptions.inc" }; class CommandObjectSettingsRead : public CommandObjectParsed { diff --git a/lldb/source/Commands/CommandObjectTarget.cpp b/lldb/source/Commands/CommandObjectTarget.cpp index e8720157ab4cb..e913a28501f23 100644 --- a/lldb/source/Commands/CommandObjectTarget.cpp +++ b/lldb/source/Commands/CommandObjectTarget.cpp @@ -1968,7 +1968,7 @@ static constexpr OptionEnumValueElement g_sort_option_enumeration[] = { static constexpr OptionDefinition g_target_modules_dump_symtab_options[] = { #define LLDB_OPTIONS_target_modules_dump_symtab -#include "Options.inc" +#include "CommandOptions.inc" }; class CommandObjectTargetModulesDumpSymtab From eb72138340ce36f3bdd29658eb2ff730cbaa25d7 Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Tue, 16 Jul 2019 09:27:39 +0000 Subject: [PATCH 224/451] [AArch64] Implement __jcvt intrinsic from Armv8.3-A The jcvt intrinsic defined in ACLE [1] is available when ARM_FEATURE_JCVT is defined. This change introduces the AArch64 intrinsic, wires it up to the instruction and a new clang builtin function. The __ARM_FEATURE_JCVT macro is now defined when an Armv8.3-A or higher target is used. I've implemented the target detection logic in Clang so that this feature is enabled for architectures from armv8.3-a onwards (so -march=armv8.4-a also enables this, for example). make check-all didn't show any new failures. [1] https://developer.arm.com/docs/101028/latest/data-processing-intrinsics Differential Revision: https://reviews.llvm.org/D64495 llvm-svn: 366197 --- clang/include/clang/Basic/BuiltinsAArch64.def | 2 + clang/lib/Basic/Targets/AArch64.cpp | 37 +++++++++++++++++++ clang/lib/Basic/Targets/AArch64.h | 6 +++ clang/lib/CodeGen/CGBuiltin.cpp | 8 ++++ clang/lib/Headers/arm_acle.h | 8 ++++ clang/test/CodeGen/arm_acle.c | 11 ++++++ clang/test/CodeGen/builtins-arm64.c | 6 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 2 + llvm/lib/Target/AArch64/AArch64InstrInfo.td | 4 +- llvm/test/CodeGen/AArch64/fjcvtzs.ll | 10 +++++ llvm/utils/git-svn/git-llvm | 2 +- 11 files changed, 94 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/fjcvtzs.ll diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def index 5ba03da4a7a05..7701ad98f4832 100644 --- a/clang/include/clang/Basic/BuiltinsAArch64.def +++ b/clang/include/clang/Basic/BuiltinsAArch64.def @@ -65,6 +65,8 @@ BUILTIN(__builtin_arm_dmb, "vUi", "nc") BUILTIN(__builtin_arm_dsb, "vUi", "nc") BUILTIN(__builtin_arm_isb, "vUi", "nc") +BUILTIN(__builtin_arm_jcvt, "Zid", "nc") + // Prefetch BUILTIN(__builtin_arm_prefetch, "vvC*UiUiUiUi", "nc") diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 6011ae17b78ea..a02530ad06756 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -118,6 +118,28 @@ void AArch64TargetInfo::getTargetDefinesARMV82A(const LangOptions &Opts, getTargetDefinesARMV81A(Opts, Builder); } +void AArch64TargetInfo::getTargetDefinesARMV83A(const LangOptions &Opts, + MacroBuilder &Builder) const { + Builder.defineMacro("__ARM_FEATURE_JCVT", "1"); + // Also include the Armv8.2 defines + getTargetDefinesARMV82A(Opts, Builder); +} + +void AArch64TargetInfo::getTargetDefinesARMV84A(const LangOptions &Opts, + MacroBuilder &Builder) const { + // Also include the Armv8.3 defines + // FIXME: Armv8.4 makes some extensions mandatory. Handle them here. + getTargetDefinesARMV83A(Opts, Builder); +} + +void AArch64TargetInfo::getTargetDefinesARMV85A(const LangOptions &Opts, + MacroBuilder &Builder) const { + // Also include the Armv8.4 defines + // FIXME: Armv8.5 makes some extensions mandatory. Handle them here. + getTargetDefinesARMV84A(Opts, Builder); +} + + void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const { // Target identification. @@ -209,6 +231,15 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts, case llvm::AArch64::ArchKind::ARMV8_2A: getTargetDefinesARMV82A(Opts, Builder); break; + case llvm::AArch64::ArchKind::ARMV8_3A: + getTargetDefinesARMV83A(Opts, Builder); + break; + case llvm::AArch64::ArchKind::ARMV8_4A: + getTargetDefinesARMV84A(Opts, Builder); + break; + case llvm::AArch64::ArchKind::ARMV8_5A: + getTargetDefinesARMV85A(Opts, Builder); + break; } // All of the __sync_(bool|val)_compare_and_swap_(1|2|4|8) builtins work. @@ -256,6 +287,12 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector &Features, ArchKind = llvm::AArch64::ArchKind::ARMV8_1A; if (Feature == "+v8.2a") ArchKind = llvm::AArch64::ArchKind::ARMV8_2A; + if (Feature == "+v8.3a") + ArchKind = llvm::AArch64::ArchKind::ARMV8_3A; + if (Feature == "+v8.4a") + ArchKind = llvm::AArch64::ArchKind::ARMV8_4A; + if (Feature == "+v8.5a") + ArchKind = llvm::AArch64::ArchKind::ARMV8_5A; if (Feature == "+fullfp16") HasFullFP16 = 1; if (Feature == "+dotprod") diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h index 0241b585c4d13..de0aed78e037e 100644 --- a/clang/lib/Basic/Targets/AArch64.h +++ b/clang/lib/Basic/Targets/AArch64.h @@ -59,6 +59,12 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo { MacroBuilder &Builder) const; void getTargetDefinesARMV82A(const LangOptions &Opts, MacroBuilder &Builder) const; + void getTargetDefinesARMV83A(const LangOptions &Opts, + MacroBuilder &Builder) const; + void getTargetDefinesARMV84A(const LangOptions &Opts, + MacroBuilder &Builder) const; + void getTargetDefinesARMV85A(const LangOptions &Opts, + MacroBuilder &Builder) const; void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index c58d1018fa0ec..acaa81ae8a9a6 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -6977,6 +6977,14 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); } + if (BuiltinID == AArch64::BI__builtin_arm_jcvt) { + assert((getContext().getTypeSize(E->getType()) == 32) && + "__jcvt of unusual size!"); + llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg); + } + if (BuiltinID == AArch64::BI__clear_cache) { assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); const FunctionDecl *FD = E->getDirectCallee(); diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h index 08d65fa0d0696..096cc261af2c6 100644 --- a/clang/lib/Headers/arm_acle.h +++ b/clang/lib/Headers/arm_acle.h @@ -597,6 +597,14 @@ __crc32cd(uint32_t __a, uint64_t __b) { } #endif +/* Armv8.3-A Javascript conversion intrinsic */ +#if __ARM_64BIT_STATE && defined(__ARM_FEATURE_JCVT) +static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) +__jcvt(double __a) { + return __builtin_arm_jcvt(__a); +} +#endif + /* 10.1 Special register intrinsics */ #define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg) #define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg) diff --git a/clang/test/CodeGen/arm_acle.c b/clang/test/CodeGen/arm_acle.c index e8a744372de2b..beca937350678 100644 --- a/clang/test/CodeGen/arm_acle.c +++ b/clang/test/CodeGen/arm_acle.c @@ -2,6 +2,9 @@ // RUN: %clang_cc1 -ffreestanding -triple armv8-eabi -target-cpu cortex-a57 -O2 -fexperimental-new-pass-manager -S -emit-llvm -o - %s | FileCheck %s -check-prefix=ARM -check-prefix=AArch32 -check-prefix=ARM-NEWPM -check-prefix=AArch32-NEWPM // RUN: %clang_cc1 -ffreestanding -triple aarch64-eabi -target-cpu cortex-a57 -target-feature +neon -target-feature +crc -target-feature +crypto -O2 -fno-experimental-new-pass-manager -S -emit-llvm -o - %s | FileCheck %s -check-prefix=ARM -check-prefix=AArch64 -check-prefix=ARM-LEGACY -check-prefix=AArch64-LEGACY // RUN: %clang_cc1 -ffreestanding -triple aarch64-eabi -target-cpu cortex-a57 -target-feature +neon -target-feature +crc -target-feature +crypto -O2 -fexperimental-new-pass-manager -S -emit-llvm -o - %s | FileCheck %s -check-prefix=ARM -check-prefix=AArch64 -check-prefix=ARM-NEWPM -check-prefix=AArch64-NEWPM +// RUN: %clang_cc1 -ffreestanding -triple aarch64-eabi -target-cpu cortex-a57 -target-feature +v8.3a -O2 -fexperimental-new-pass-manager -S -emit-llvm -o - %s | FileCheck %s -check-prefix=AArch64-v8_3 +// RUN: %clang_cc1 -ffreestanding -triple aarch64-eabi -target-cpu cortex-a57 -target-feature +v8.4a -O2 -fexperimental-new-pass-manager -S -emit-llvm -o - %s | FileCheck %s -check-prefix=AArch64-v8_3 +// RUN: %clang_cc1 -ffreestanding -triple aarch64-eabi -target-cpu cortex-a57 -target-feature +v8.5a -O2 -fexperimental-new-pass-manager -S -emit-llvm -o - %s | FileCheck %s -check-prefix=AArch64-v8_3 #include @@ -823,3 +826,11 @@ void test_wsrp(void *v) { // AArch64: ![[M0]] = !{!"1:2:3:4:5"} // AArch64: ![[M1]] = !{!"sysreg"} + +// AArch64-v8_3-LABEL: @test_jcvt( +// AArch64-v8_3: call i32 @llvm.aarch64.fjcvtzs +#ifdef __ARM_64BIT_STATE +int32_t test_jcvt(double v) { + return __jcvt(v); +} +#endif diff --git a/clang/test/CodeGen/builtins-arm64.c b/clang/test/CodeGen/builtins-arm64.c index f164c2f6f3647..5ec63fba82b5b 100644 --- a/clang/test/CodeGen/builtins-arm64.c +++ b/clang/test/CodeGen/builtins-arm64.c @@ -58,6 +58,12 @@ void prefetch() { // CHECK: call {{.*}} @llvm.prefetch(i8* null, i32 0, i32 3, i32 0) } +int32_t jcvt(double v) { + //CHECK-LABEL: @jcvt( + //CHECK: call i32 @llvm.aarch64.fjcvtzs + return __builtin_arm_jcvt(v); +} + __typeof__(__builtin_arm_rsr("1:2:3:4:5")) rsr(void); uint32_t rsr() { diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 720a7bdde2375..7616d6a90c1bc 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -31,6 +31,8 @@ def int_aarch64_sdiv : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, def int_aarch64_udiv : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; +def int_aarch64_fjcvtzs : Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>; + //===----------------------------------------------------------------------===// // HINT diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 41497a6c4fbc0..897b3ebb3847f 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -717,7 +717,9 @@ let Predicates = [HasPA] in { // v8.3a floating point conversion for javascript let Predicates = [HasJS, HasFPARMv8] in def FJCVTZS : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32, - "fjcvtzs", []> { + "fjcvtzs", + [(set GPR32:$Rd, + (int_aarch64_fjcvtzs FPR64:$Rn))]> { let Inst{31} = 0; } // HasJS, HasFPARMv8 diff --git a/llvm/test/CodeGen/AArch64/fjcvtzs.ll b/llvm/test/CodeGen/AArch64/fjcvtzs.ll new file mode 100644 index 0000000000000..017694dcd7b19 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/fjcvtzs.ll @@ -0,0 +1,10 @@ +; RUN: llc -mtriple=arm64-eabi -mattr=+jsconv -o - %s | FileCheck %s + +define i32 @test_jcvt(double %v) { +; CHECK-LABEL: test_jcvt: +; CHECK: fjcvtzs w0, d0 + %val = call i32 @llvm.aarch64.fjcvtzs(double %v) + ret i32 %val +} + +declare i32 @llvm.aarch64.fjcvtzs(double) diff --git a/llvm/utils/git-svn/git-llvm b/llvm/utils/git-svn/git-llvm index 289898d15b5fb..13c49b5fdf0c6 100755 --- a/llvm/utils/git-svn/git-llvm +++ b/llvm/utils/git-svn/git-llvm @@ -372,7 +372,7 @@ def svn_push_one_rev(svn_repo, rev, git_to_svn_mapping, dry_run): # Now we're ready to commit. commit_msg = git('show', '--pretty=%B', '--quiet', rev) if not dry_run: - commit_args = ['commit', '-m', commit_msg] + commit_args = ['commit', '-m', commit_msg, '--username', 'ktkachov'] if '--force-interactive' in svn(svn_repo, 'commit', '--help'): commit_args.append('--force-interactive') log(svn(svn_repo, *commit_args)) From 1781c28a0d3433b0608f504e45660f8511ba7742 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Tue, 16 Jul 2019 10:14:53 +0000 Subject: [PATCH 225/451] Remove username from git-llvm script, erroneously added in 366197 llvm-svn: 366198 --- llvm/utils/git-svn/git-llvm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/utils/git-svn/git-llvm b/llvm/utils/git-svn/git-llvm index 13c49b5fdf0c6..289898d15b5fb 100755 --- a/llvm/utils/git-svn/git-llvm +++ b/llvm/utils/git-svn/git-llvm @@ -372,7 +372,7 @@ def svn_push_one_rev(svn_repo, rev, git_to_svn_mapping, dry_run): # Now we're ready to commit. commit_msg = git('show', '--pretty=%B', '--quiet', rev) if not dry_run: - commit_args = ['commit', '-m', commit_msg, '--username', 'ktkachov'] + commit_args = ['commit', '-m', commit_msg] if '--force-interactive' in svn(svn_repo, 'commit', '--help'): commit_args.append('--force-interactive') log(svn(svn_repo, *commit_args)) From 06377ae2e585fd4df695f973cd8ee6b3f76bfe5f Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Tue, 16 Jul 2019 10:17:06 +0000 Subject: [PATCH 226/451] [clangd] Don't rebuild background index until we indexed one TU per thread. Summary: This increases the odds that the boosted file (cpp file matching header) will be ready. (It always enqueues first, so it'll be present unless another thread indexes *two* files before the first thread indexes one.) Reviewers: kadircet Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64682 llvm-svn: 366199 --- clang-tools-extra/clangd/index/Background.cpp | 2 +- clang-tools-extra/clangd/index/BackgroundRebuild.h | 14 ++++++++------ .../clangd/unittests/BackgroundIndexTests.cpp | 10 +++++----- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/clang-tools-extra/clangd/index/Background.cpp b/clang-tools-extra/clangd/index/Background.cpp index 458e6fc355f82..23445e16b2f3f 100644 --- a/clang-tools-extra/clangd/index/Background.cpp +++ b/clang-tools-extra/clangd/index/Background.cpp @@ -127,7 +127,7 @@ BackgroundIndex::BackgroundIndex( BackgroundIndexStorage::Factory IndexStorageFactory, size_t ThreadPoolSize) : SwapIndex(llvm::make_unique()), FSProvider(FSProvider), CDB(CDB), BackgroundContext(std::move(BackgroundContext)), - Rebuilder(this, &IndexedSymbols), + Rebuilder(this, &IndexedSymbols, ThreadPoolSize), IndexStorageFactory(std::move(IndexStorageFactory)), CommandsChanged( CDB.watch([&](const std::vector &ChangedFiles) { diff --git a/clang-tools-extra/clangd/index/BackgroundRebuild.h b/clang-tools-extra/clangd/index/BackgroundRebuild.h index 5a6227e8baab3..f660957f62419 100644 --- a/clang-tools-extra/clangd/index/BackgroundRebuild.h +++ b/clang-tools-extra/clangd/index/BackgroundRebuild.h @@ -16,6 +16,7 @@ #include "index/FileIndex.h" #include "index/Index.h" +#include "llvm/Support/Threading.h" namespace clang { namespace clangd { @@ -45,12 +46,9 @@ namespace clangd { // This class is exposed in the header so it can be tested. class BackgroundIndexRebuilder { public: - // Thresholds for rebuilding as TUs get indexed. - static constexpr unsigned TUsBeforeFirstBuild = 5; - static constexpr unsigned TUsBeforeRebuild = 100; - - BackgroundIndexRebuilder(SwapIndex *Target, FileSymbols *Source) - : Target(Target), Source(Source) {} + BackgroundIndexRebuilder(SwapIndex *Target, FileSymbols *Source, + unsigned Threads) + : TUsBeforeFirstBuild(Threads), Target(Target), Source(Source) {} // Called to indicate a TU has been indexed. // May rebuild, if enough TUs have been indexed. @@ -71,6 +69,10 @@ class BackgroundIndexRebuilder { // Ensures we won't start any more rebuilds. void shutdown(); + // Thresholds for rebuilding as TUs get indexed. + const unsigned TUsBeforeFirstBuild; // Typically one per worker thread. + const unsigned TUsBeforeRebuild = 100; + private: // Run Check under the lock, and rebuild if it returns true. void maybeRebuild(const char *Reason, std::function Check); diff --git a/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp b/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp index 15d064a995ca8..79e081bd67893 100644 --- a/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp +++ b/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp @@ -575,7 +575,8 @@ TEST_F(BackgroundIndexTest, CmdLineHash) { class BackgroundIndexRebuilderTest : public testing::Test { protected: BackgroundIndexRebuilderTest() - : Target(llvm::make_unique()), Rebuilder(&Target, &Source) { + : Target(llvm::make_unique()), + Rebuilder(&Target, &Source, /*Threads=*/10) { // Prepare FileSymbols with TestSymbol in it, for checkRebuild. TestSymbol.ID = SymbolID("foo"); } @@ -610,11 +611,10 @@ class BackgroundIndexRebuilderTest : public testing::Test { }; TEST_F(BackgroundIndexRebuilderTest, IndexingTUs) { - for (unsigned I = 0; I < BackgroundIndexRebuilder::TUsBeforeFirstBuild - 1; - ++I) + for (unsigned I = 0; I < Rebuilder.TUsBeforeFirstBuild - 1; ++I) EXPECT_FALSE(checkRebuild([&] { Rebuilder.indexedTU(); })); EXPECT_TRUE(checkRebuild([&] { Rebuilder.indexedTU(); })); - for (unsigned I = 0; I < BackgroundIndexRebuilder::TUsBeforeRebuild - 1; ++I) + for (unsigned I = 0; I < Rebuilder.TUsBeforeRebuild - 1; ++I) EXPECT_FALSE(checkRebuild([&] { Rebuilder.indexedTU(); })); EXPECT_TRUE(checkRebuild([&] { Rebuilder.indexedTU(); })); } @@ -640,7 +640,7 @@ TEST_F(BackgroundIndexRebuilderTest, LoadingShards) { // No rebuilding for indexed files while loading. Rebuilder.startLoading(); - for (unsigned I = 0; I < 3 * BackgroundIndexRebuilder::TUsBeforeRebuild; ++I) + for (unsigned I = 0; I < 3 * Rebuilder.TUsBeforeRebuild; ++I) EXPECT_FALSE(checkRebuild([&] { Rebuilder.indexedTU(); })); // But they get indexed when we're done, even if no shards were loaded. EXPECT_TRUE(checkRebuild([&] { Rebuilder.doneLoading(); })); From 0afffab0d1ea1a8eeccb2d32b976bbecacd4178a Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Tue, 16 Jul 2019 10:30:21 +0000 Subject: [PATCH 227/451] [SemaTemplate] Fix uncorrected typos after pack expansion Summary: This case is particularly important for clangd, as it is triggered after inserting the snippet for variadic functions. Reviewers: kadircet, ilya-biryukov Subscribers: llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64677 llvm-svn: 366200 --- clang/lib/Sema/SemaTemplateVariadic.cpp | 1 + clang/test/SemaTemplate/typo-variadic.cpp | 2 ++ 2 files changed, 3 insertions(+) create mode 100644 clang/test/SemaTemplate/typo-variadic.cpp diff --git a/clang/lib/Sema/SemaTemplateVariadic.cpp b/clang/lib/Sema/SemaTemplateVariadic.cpp index 9b23624a9a81f..d97626551a41f 100644 --- a/clang/lib/Sema/SemaTemplateVariadic.cpp +++ b/clang/lib/Sema/SemaTemplateVariadic.cpp @@ -619,6 +619,7 @@ ExprResult Sema::CheckPackExpansion(Expr *Pattern, SourceLocation EllipsisLoc, if (!Pattern->containsUnexpandedParameterPack()) { Diag(EllipsisLoc, diag::err_pack_expansion_without_parameter_packs) << Pattern->getSourceRange(); + CorrectDelayedTyposInExpr(Pattern); return ExprError(); } diff --git a/clang/test/SemaTemplate/typo-variadic.cpp b/clang/test/SemaTemplate/typo-variadic.cpp new file mode 100644 index 0000000000000..c9b777aebbe91 --- /dev/null +++ b/clang/test/SemaTemplate/typo-variadic.cpp @@ -0,0 +1,2 @@ +// RUN: %clang_cc1 -fsyntax-only %s -verify +int x = m(s...); // expected-error{{pack expansion does not}} expected-error{{undeclared identifier}} From 971ac4ca2cb8172a1c8f59c7703680b41aa7a5f5 Mon Sep 17 00:00:00 2001 From: Owen Reynolds Date: Tue, 16 Jul 2019 11:02:11 +0000 Subject: [PATCH 228/451] Reapply [llvm-ar][test] Add to MRI test coverage This reapplies 363232 without mri-utf8.test due to failing on Darwin. Differential Revision: https://reviews.llvm.org/D63197 llvm-svn: 366201 --- llvm/test/tools/llvm-ar/mri-addlib.test | 45 +++++++++++++++++++++ llvm/test/tools/llvm-ar/mri-addmod.test | 27 +++++++++++++ llvm/test/tools/llvm-ar/mri-comments.test | 19 +++++++++ llvm/test/tools/llvm-ar/mri-end.test | 48 +++++++++++++++++++++++ 4 files changed, 139 insertions(+) create mode 100644 llvm/test/tools/llvm-ar/mri-addlib.test create mode 100644 llvm/test/tools/llvm-ar/mri-addmod.test create mode 100644 llvm/test/tools/llvm-ar/mri-comments.test create mode 100644 llvm/test/tools/llvm-ar/mri-end.test diff --git a/llvm/test/tools/llvm-ar/mri-addlib.test b/llvm/test/tools/llvm-ar/mri-addlib.test new file mode 100644 index 0000000000000..c297653e2abb1 --- /dev/null +++ b/llvm/test/tools/llvm-ar/mri-addlib.test @@ -0,0 +1,45 @@ +## Test the ADDLIB MRI command. + +# RUN: rm -rf %t && mkdir -p %t +# RUN: yaml2obj %s -o %t/f.o +# RUN: llvm-ar r %t/f.a %t/f.o + +## Merge contents of archives. +# RUN: echo "CREATE %t/addlib.a" > %t/addlib.mri +# RUN: echo "ADDLIB %t/f.a" >> %t/addlib.mri +# RUN: echo "SAVE" >> %t/addlib.mri +# RUN: llvm-ar -M < %t/addlib.mri +# RUN: llvm-nm --print-armap %t/addlib.a | FileCheck --check-prefix=SYMS %s +# RUN: llvm-ar t %t/addlib.a | FileCheck --check-prefix=FILES %s + +# SYMS: f in {{.*}} +# FILES: f.o + +## ADDLIB with non-archive file. +# RUN: echo "CREATE %t/badlib.a" > %t/badlib.mri +# RUN: echo "ADDLIB %s" >> %t/badlib.mri +# RUN: echo "SAVE" >> %t/badlib.mri +# RUN: not llvm-ar -M < %t/badlib.mri 2>&1 | FileCheck --check-prefix=PARSE %s +# RUN: not ls %t/badlib.a + +# PARSE: Could not parse library + +## No create command. +# RUN: echo "ADDLIB %t/f.a" > %t/nocreate.mri +# RUN: echo "SAVE" >> %t/nocreate.mri +# RUN: not llvm-ar -M < %t/nocreate.mri + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: f + Binding: STB_GLOBAL + Section: .text +... diff --git a/llvm/test/tools/llvm-ar/mri-addmod.test b/llvm/test/tools/llvm-ar/mri-addmod.test new file mode 100644 index 0000000000000..2b6e4dc3adbee --- /dev/null +++ b/llvm/test/tools/llvm-ar/mri-addmod.test @@ -0,0 +1,27 @@ +## Test the ADDMOD MRI command. + +# RUN: rm -rf %t && mkdir -p %t +# RUN: yaml2obj %s -o %t/f.o + +# RUN: echo "CREATE %t/addmod.a" > %t/addmod.mri +# RUN: echo "ADDMOD %t/f.o" >> %t/addmod.mri +# RUN: echo "SAVE" >> %t/addmod.mri +# RUN: llvm-ar -M < %t/addmod.mri +# RUN: llvm-nm --print-armap %t/addmod.a | FileCheck %s + +# CHECK: f in f.o + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: f + Binding: STB_GLOBAL + Section: .text +... diff --git a/llvm/test/tools/llvm-ar/mri-comments.test b/llvm/test/tools/llvm-ar/mri-comments.test new file mode 100644 index 0000000000000..f5fa61768aeb0 --- /dev/null +++ b/llvm/test/tools/llvm-ar/mri-comments.test @@ -0,0 +1,19 @@ +# Test different MRI comment formats and white space. + +RUN: rm -rf %t && mkdir -p %t +RUN: yaml2obj %S/Inputs/elf.yaml -o %t/elf.o + +RUN: echo "create %t/mri.ar;comment" > %t/script.mri +RUN: echo "addmod %t/elf.o * comment" >> %t/script.mri +RUN: echo "; comment" >> %t/script.mri +RUN: echo " ;comment" >> %t/script.mri +RUN: echo "* comment" >> %t/script.mri +RUN: echo " *comment" >> %t/script.mri +RUN: echo "" >> %t/script.mri +RUN: echo " " >> %t/script.mri +RUN: echo " save" >> %t/script.mri + +RUN: llvm-ar -M < %t/script.mri +RUN: llvm-ar t %t/mri.ar | FileCheck %s + +CHECK: elf.o diff --git a/llvm/test/tools/llvm-ar/mri-end.test b/llvm/test/tools/llvm-ar/mri-end.test new file mode 100644 index 0000000000000..db4abce7d180e --- /dev/null +++ b/llvm/test/tools/llvm-ar/mri-end.test @@ -0,0 +1,48 @@ +# The END MRI command is optional. Scripts that omit +# or include END should be handled by llvm-ar. +RUN: rm -rf %t && mkdir -p %t + +# Empty File +RUN: yaml2obj %S/Inputs/elf.yaml -o %t/elf.o + +RUN: touch %t/empty.mri +RUN: llvm-ar -M < %t/empty.mri + +RUN: echo "END" > %t/empty-end.mri +RUN: llvm-ar -M < %t/empty-end.mri + +# Comment only +RUN: echo "; a comment" > %t/comment.mri +RUN: llvm-ar -M < %t/comment.mri + +RUN: echo "; a comment" > %t/comment-end.mri +RUN: echo "END" > %t/comment-end.mri +RUN: llvm-ar -M < %t/comment-end.mri + +# Without Save +RUN: echo "create %t/mri.ar" > %t/no-save.mri +RUN: echo "addmod %t/elf.o" >> %t/no-save.mri +RUN: llvm-ar -M < %t/no-save.mri +RUN: test ! -e %t/mri.ar + +RUN: echo "create %t/mri.ar" > %t/no-save-end.mri +RUN: echo "addmod %t/elf.o" >> %t/no-save-end.mri +RUN: echo "END" > %t/no-save-end.mri +RUN: llvm-ar -M < %t/no-save-end.mri +RUN: test ! -e %t/mri.ar + +# With Save +RUN: echo "create %t/mri.ar" > %t/save.mri +RUN: echo "addmod %t/elf.o" >> %t/save.mri +RUN: echo "save" >> %t/save.mri +RUN: llvm-ar -M < %t/save.mri +RUN: llvm-ar t %t/mri.ar | FileCheck %s + +RUN: echo "create %t/mri.ar" > %t/save-end.mri +RUN: echo "addmod %t/elf.o" >> %t/save-end.mri +RUN: echo "save" >> %t/save-end.mri +RUN: echo "END" > %t/no-save-end.mri +RUN: llvm-ar -M < %t/save-end.mri +RUN: llvm-ar t %t/mri.ar | FileCheck %s + +CHECK: elf.o From a5dc9c98352c396c0114403486fbd47e092d084a Mon Sep 17 00:00:00 2001 From: Rainer Orth Date: Tue, 16 Jul 2019 11:06:43 +0000 Subject: [PATCH 229/451] [Driver] Don't pass --dynamic-linker to ld on Solaris I noticed that clang currently passes --dynamic-linker to ld. This has been the case since Solaris 11 support was added initially back in 2012 by David Chisnall (r150580). I couldn't find any patch submission, let alone a justification, for this, and it seems completely useless: --dynamic-linker is a gld compatibility form of the option, the native option being -I. First of all, however, the dynamic linker passed is simply the default, so there's no reason at all to specify it in the first place. This patch removes passing the option and adjusts the affected testcase accordingly. Tested on x86_64-pc-solaris2.11 and sparcv9-sun-solaris2.11. Differential Revision: https://reviews.llvm.org/D64493 llvm-svn: 366202 --- clang/lib/Driver/ToolChains/Solaris.cpp | 4 ---- clang/test/Driver/Inputs/solaris_sparc_tree/usr/lib/ld.so.1 | 0 .../Driver/Inputs/solaris_sparc_tree/usr/lib/sparcv9/ld.so.1 | 0 .../test/Driver/Inputs/solaris_x86_tree/usr/lib/amd64/ld.so.1 | 0 clang/test/Driver/Inputs/solaris_x86_tree/usr/lib/ld.so.1 | 0 clang/test/Driver/solaris-ld.c | 4 ---- 6 files changed, 8 deletions(-) delete mode 100644 clang/test/Driver/Inputs/solaris_sparc_tree/usr/lib/ld.so.1 delete mode 100644 clang/test/Driver/Inputs/solaris_sparc_tree/usr/lib/sparcv9/ld.so.1 delete mode 100644 clang/test/Driver/Inputs/solaris_x86_tree/usr/lib/amd64/ld.so.1 delete mode 100644 clang/test/Driver/Inputs/solaris_x86_tree/usr/lib/ld.so.1 diff --git a/clang/lib/Driver/ToolChains/Solaris.cpp b/clang/lib/Driver/ToolChains/Solaris.cpp index c65b783739598..38f24d4cf7e74 100644 --- a/clang/lib/Driver/ToolChains/Solaris.cpp +++ b/clang/lib/Driver/ToolChains/Solaris.cpp @@ -65,10 +65,6 @@ void solaris::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-Bdynamic"); if (Args.hasArg(options::OPT_shared)) { CmdArgs.push_back("-shared"); - } else { - CmdArgs.push_back("--dynamic-linker"); - CmdArgs.push_back( - Args.MakeArgString(getToolChain().GetFilePath("ld.so.1"))); } // libpthread has been folded into libc since Solaris 10, no need to do diff --git a/clang/test/Driver/Inputs/solaris_sparc_tree/usr/lib/ld.so.1 b/clang/test/Driver/Inputs/solaris_sparc_tree/usr/lib/ld.so.1 deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/clang/test/Driver/Inputs/solaris_sparc_tree/usr/lib/sparcv9/ld.so.1 b/clang/test/Driver/Inputs/solaris_sparc_tree/usr/lib/sparcv9/ld.so.1 deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/clang/test/Driver/Inputs/solaris_x86_tree/usr/lib/amd64/ld.so.1 b/clang/test/Driver/Inputs/solaris_x86_tree/usr/lib/amd64/ld.so.1 deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/clang/test/Driver/Inputs/solaris_x86_tree/usr/lib/ld.so.1 b/clang/test/Driver/Inputs/solaris_x86_tree/usr/lib/ld.so.1 deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/clang/test/Driver/solaris-ld.c b/clang/test/Driver/solaris-ld.c index 2fc5c91272aa0..59d03c4d82253 100644 --- a/clang/test/Driver/solaris-ld.c +++ b/clang/test/Driver/solaris-ld.c @@ -11,7 +11,6 @@ // CHECK-LD-SPARC32: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "sparc-sun-solaris2.11" // CHECK-LD-SPARC32-SAME: "-isysroot" "[[SYSROOT:[^"]+]]" // CHECK-LD-SPARC32: "{{.*}}ld{{(.exe)?}}" -// CHECK-LD-SPARC32-SAME: "--dynamic-linker" "[[SYSROOT]]/usr/lib{{/|\\\\}}ld.so.1" // CHECK-LD-SPARC32-SAME: "[[SYSROOT]]/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2{{/|\\\\}}crt1.o" // CHECK-LD-SPARC32-SAME: "[[SYSROOT]]/usr/lib{{/|\\\\}}crti.o" // CHECK-LD-SPARC32-SAME: "[[SYSROOT]]/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2{{/|\\\\}}crtbegin.o" @@ -35,7 +34,6 @@ // CHECK-LD-SPARC64: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "sparcv9-sun-solaris2.11" // CHECK-LD-SPARC64-SAME: "-isysroot" "[[SYSROOT:[^"]+]]" // CHECK-LD-SPARC64: "{{.*}}ld{{(.exe)?}}" -// CHECK-LD-SPARC64-SAME: "--dynamic-linker" "[[SYSROOT]]/usr/lib/sparcv9{{/|\\\\}}ld.so.1" // CHECK-LD-SPARC64-SAME: "[[SYSROOT]]/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/sparcv9{{/|\\\\}}crt1.o" // CHECK-LD-SPARC64-SAME: "[[SYSROOT]]/usr/lib/sparcv9{{/|\\\\}}crti.o" // CHECK-LD-SPARC64-SAME: "[[SYSROOT]]/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/sparcv9{{/|\\\\}}crtbegin.o" @@ -59,7 +57,6 @@ // CHECK-LD-X32: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "i386-pc-solaris2.11" // CHECK-LD-X32-SAME: "-isysroot" "[[SYSROOT:[^"]+]]" // CHECK-LD-X32: "{{.*}}ld{{(.exe)?}}" -// CHECK-LD-X32-SAME: "--dynamic-linker" "[[SYSROOT]]/usr/lib{{/|\\\\}}ld.so.1" // CHECK-LD-X32-SAME: "[[SYSROOT]]/usr/lib{{/|\\\\}}crt1.o" // CHECK-LD-X32-SAME: "[[SYSROOT]]/usr/lib{{/|\\\\}}crti.o" // CHECK-LD-X32-SAME: "[[SYSROOT]]/usr/gcc/4.9/lib/gcc/i386-pc-solaris2.11/4.9.4{{/|\\\\}}crtbegin.o" @@ -83,7 +80,6 @@ // CHECK-LD-X64: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "x86_64-pc-solaris2.11" // CHECK-LD-X64-SAME: "-isysroot" "[[SYSROOT:[^"]+]]" // CHECK-LD-X64: "{{.*}}ld{{(.exe)?}}" -// CHECK-LD-X64-SAME: "--dynamic-linker" "[[SYSROOT]]/usr/lib/amd64{{/|\\\\}}ld.so.1" // CHECK-LD-X64-SAME: "[[SYSROOT]]/usr/lib/amd64{{/|\\\\}}crt1.o" // CHECK-LD-X64-SAME: "[[SYSROOT]]/usr/lib/amd64{{/|\\\\}}crti.o" // CHECK-LD-X64-SAME: "[[SYSROOT]]/usr/gcc/4.9/lib/gcc/i386-pc-solaris2.11/4.9.4/amd64{{/|\\\\}}crtbegin.o" From a1370877d7f4621a4243f0ae1c22dbbfd6483f35 Mon Sep 17 00:00:00 2001 From: George Rimar Date: Tue, 16 Jul 2019 11:07:30 +0000 Subject: [PATCH 230/451] [Object/llvm-readelf/llvm-readobj] - Improve error reporting when e_shstrndx is broken. When e_shstrndx is broken, it is impossible to get a section name. In this patch I improved the error message we show and added tests for Object and for llvm-readelf/llvm-readobj Message was changed in two places: 1) llvm-readelf/llvm-readobj previously used a code from Object/ELF.h, now they have a modified version of it (it has less checks and allows dumping broken things). 2) Code in Object/ELF.h is still used for generic cases. Differential revision: https://reviews.llvm.org/D64714 llvm-svn: 366203 --- llvm/include/llvm/Object/ELF.h | 5 ++-- llvm/test/Object/invalid.test | 16 ++++++++++++ .../llvm-readobj/elf-invalid-shstrndx.test | 26 +++++++++++++++++++ llvm/tools/llvm-readobj/ELFDumper.cpp | 16 +++++++++--- llvm/tools/llvm-readobj/llvm-readobj.cpp | 13 +++++----- llvm/tools/llvm-readobj/llvm-readobj.h | 1 + 6 files changed, 65 insertions(+), 12 deletions(-) create mode 100644 llvm/test/tools/llvm-readobj/elf-invalid-shstrndx.test diff --git a/llvm/include/llvm/Object/ELF.h b/llvm/include/llvm/Object/ELF.h index 7bc6dc4620c7d..cf8e4529bad96 100644 --- a/llvm/include/llvm/Object/ELF.h +++ b/llvm/include/llvm/Object/ELF.h @@ -466,9 +466,10 @@ ELFFile::getSectionStringTable(Elf_Shdr_Range Sections) const { if (!Index) // no section string table. return ""; + // TODO: Test a case when the sh_link of the section with index 0 is broken. if (Index >= Sections.size()) - // TODO: this error is untested. - return createError("invalid section index"); + return createError("section header string table index " + Twine(Index) + + " does not exist"); return getStringTable(&Sections[Index]); } diff --git a/llvm/test/Object/invalid.test b/llvm/test/Object/invalid.test index 97ebb9f857393..5723c4aef3387 100644 --- a/llvm/test/Object/invalid.test +++ b/llvm/test/Object/invalid.test @@ -536,3 +536,19 @@ ProgramHeaders: FileSize: 0xffff0000 Sections: - Section: .dynamic + +# RUN: yaml2obj --docnum=25 %s -o %t25 +# RUN: not obj2yaml 2>&1 %t25 | FileCheck %s -DFILE=%t25 --check-prefix=INVALID-SHSTRNDX + +# INVALID-SHSTRNDX: Error reading file: [[FILE]]: section header string table index 255 does not exist + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 + SHStrNdx: 0xFF +Sections: + - Name: .foo + Type: SHT_PROGBITS diff --git a/llvm/test/tools/llvm-readobj/elf-invalid-shstrndx.test b/llvm/test/tools/llvm-readobj/elf-invalid-shstrndx.test new file mode 100644 index 0000000000000..6dc3b11dfd283 --- /dev/null +++ b/llvm/test/tools/llvm-readobj/elf-invalid-shstrndx.test @@ -0,0 +1,26 @@ +# RUN: yaml2obj %s -o %t +# RUN: not llvm-readelf --headers -S 2>&1 %t | FileCheck %s -DFILE=%t --check-prefix=GNU +# RUN: not llvm-readobj --headers -S 2>&1 %t | FileCheck %s -DFILE=%t --check-prefix=LLVM + +# GNU: ELF Header: +# GNU: Section header string table index: 255 +# GNU-NEXT: There are 4 section headers, starting at offset 0x40: +# GNU: Section Headers: +# GNU-NEXT: [Nr] Name +# GNU-EMPTY: +# GNU-NEXT: error: '[[FILE]]': section header string table index 255 does not exist + +# LLVM: ElfHeader { +# LLVM: StringTableSectionIndex: 255 +# LLVM-NEXT: } +# LLVM-NEXT: Sections [ +# LLVM-EMPTY: +# LLVM-NEXT: error: '[[FILE]]': section header string table index 255 does not exist + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 + SHStrNdx: 0xFF diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 589199c009f63..4e1cb7d544e79 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -183,6 +183,8 @@ template class ELFDumper : public ObjDumper { void printELFLinkerOptions() override; + const object::ELFObjectFile *getElfObject() const { return ObjF; }; + private: std::unique_ptr> ELFDumperStyle; @@ -3009,15 +3011,19 @@ static std::string getSectionTypeString(unsigned Arch, unsigned Type) { template static StringRef getSectionName(const typename ELFT::Shdr &Sec, - const ELFFile &Obj, + const ELFObjectFile &ElfObj, ArrayRef Sections) { + const ELFFile &Obj = *ElfObj.getELFFile(); uint32_t Index = Obj.getHeader()->e_shstrndx; if (Index == ELF::SHN_XINDEX) Index = Sections[0].sh_link; if (!Index) // no section string table. return ""; + // TODO: Test a case when the sh_link of the section with index 0 is broken. if (Index >= Sections.size()) - reportError("invalid section index"); + reportError(ElfObj.getFileName(), + createError("section header string table index " + + Twine(Index) + " does not exist")); StringRef Data = toStringRef(unwrapOrError( Obj.template getSectionContentsAsArray(&Sections[Index]))); return unwrapOrError(Obj.getSectionName(&Sec, Data)); @@ -3040,10 +3046,11 @@ void GNUStyle::printSectionHeaders(const ELFO *Obj) { printField(F); OS << "\n"; + const ELFObjectFile *ElfObj = this->dumper()->getElfObject(); size_t SectionIndex = 0; for (const Elf_Shdr &Sec : Sections) { Fields[0].Str = to_string(SectionIndex); - Fields[1].Str = getSectionName(Sec, *Obj, Sections); + Fields[1].Str = getSectionName(Sec, *ElfObj, Sections); Fields[2].Str = getSectionTypeString(Obj->getHeader()->e_machine, Sec.sh_type); Fields[3].Str = @@ -4590,8 +4597,9 @@ void LLVMStyle::printSectionHeaders(const ELFO *Obj) { int SectionIndex = -1; ArrayRef Sections = unwrapOrError(Obj->sections()); + const ELFObjectFile *ElfObj = this->dumper()->getElfObject(); for (const Elf_Shdr &Sec : Sections) { - StringRef Name = getSectionName(Sec, *Obj, Sections); + StringRef Name = getSectionName(Sec, *ElfObj, Sections); DictScope SectionD(W, "Section"); W.printNumber("Index", ++SectionIndex); W.printNumber("Name", Name, Sec.sh_name); diff --git a/llvm/tools/llvm-readobj/llvm-readobj.cpp b/llvm/tools/llvm-readobj/llvm-readobj.cpp index b6d0493af700d..1bd5bb74bf290 100644 --- a/llvm/tools/llvm-readobj/llvm-readobj.cpp +++ b/llvm/tools/llvm-readobj/llvm-readobj.cpp @@ -371,11 +371,18 @@ namespace opts { namespace llvm { LLVM_ATTRIBUTE_NORETURN void reportError(Twine Msg) { + fouts().flush(); errs() << "\n"; WithColor::error(errs()) << Msg << "\n"; exit(1); } +void reportError(StringRef Input, Error Err) { + if (Input == "-") + Input = ""; + error(createFileError(Input, std::move(Err))); +} + void reportWarning(Twine Msg) { fouts().flush(); errs() << "\n"; @@ -403,12 +410,6 @@ void error(std::error_code EC) { } // namespace llvm -static void reportError(StringRef Input, Error Err) { - if (Input == "-") - Input = ""; - error(createFileError(Input, std::move(Err))); -} - static void reportError(StringRef Input, std::error_code EC) { reportError(Input, errorCodeToError(EC)); } diff --git a/llvm/tools/llvm-readobj/llvm-readobj.h b/llvm/tools/llvm-readobj/llvm-readobj.h index ac8ced6121f49..0e02da4cb847a 100644 --- a/llvm/tools/llvm-readobj/llvm-readobj.h +++ b/llvm/tools/llvm-readobj/llvm-readobj.h @@ -22,6 +22,7 @@ namespace llvm { // Various helper functions. LLVM_ATTRIBUTE_NORETURN void reportError(Twine Msg); + void reportError(StringRef Input, Error Err); void reportWarning(Twine Msg); void warn(llvm::Error Err); void error(std::error_code EC); From eea828054d37ac2c7a025384c06eb603e5c0e189 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Tue, 16 Jul 2019 11:59:17 +0000 Subject: [PATCH 231/451] Document the LLVM_ENABLE_BINDINGS option llvm-svn: 366204 --- llvm/docs/CMake.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/llvm/docs/CMake.rst b/llvm/docs/CMake.rst index 507a85d09526d..f9b384458db38 100644 --- a/llvm/docs/CMake.rst +++ b/llvm/docs/CMake.rst @@ -598,6 +598,12 @@ LLVM-specific variables **LLVM_USE_NEWPM**:BOOL If enabled, use the experimental new pass manager. +**LLVM_USE_NEWPM**:BOOL + If enabled, use the experimental new pass manager. + +**LLVM_ENABLE_BINDINGS**:BOOL + If disabled, do not try to build the OCaml and go bindings. + CMake Caches ============ From 116e58e2929dd32fa7e1e02aa8519085644ccf38 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Tue, 16 Jul 2019 12:05:54 +0000 Subject: [PATCH 232/451] remove a duplicate declaration llvm-svn: 366205 --- llvm/docs/CMake.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/llvm/docs/CMake.rst b/llvm/docs/CMake.rst index f9b384458db38..91fb5282206f2 100644 --- a/llvm/docs/CMake.rst +++ b/llvm/docs/CMake.rst @@ -598,9 +598,6 @@ LLVM-specific variables **LLVM_USE_NEWPM**:BOOL If enabled, use the experimental new pass manager. -**LLVM_USE_NEWPM**:BOOL - If enabled, use the experimental new pass manager. - **LLVM_ENABLE_BINDINGS**:BOOL If disabled, do not try to build the OCaml and go bindings. From 3a6aaa4391f058201d8cec737ab2e6903ef65892 Mon Sep 17 00:00:00 2001 From: Owen Reynolds Date: Tue, 16 Jul 2019 12:53:59 +0000 Subject: [PATCH 233/451] Reapply [llvm-ar][test] Increase llvm-ar test coverage This reapplies 365316 without extract.test due to failing on Darwin. Differential Revision: https://reviews.llvm.org/D63935 llvm-svn: 366206 --- llvm/test/tools/llvm-ar/create.test | 14 ++++ .../tools/llvm-ar/dash-before-letter.test | 12 ++++ llvm/test/tools/llvm-ar/default-add.test | 22 ++---- llvm/test/tools/llvm-ar/delete.test | 67 ++++++++++++++++++ llvm/test/tools/llvm-ar/insert-after.test | 59 ++++++++++++++++ llvm/test/tools/llvm-ar/insert-before.test | 59 ++++++++++++++++ llvm/test/tools/llvm-ar/move-after.test | 59 ++++++++++++++++ llvm/test/tools/llvm-ar/move-before.test | 69 ++++++++++++++++++ llvm/test/tools/llvm-ar/symtab.test | 70 +++++++++++++++++++ 9 files changed, 413 insertions(+), 18 deletions(-) create mode 100644 llvm/test/tools/llvm-ar/create.test create mode 100644 llvm/test/tools/llvm-ar/dash-before-letter.test create mode 100644 llvm/test/tools/llvm-ar/delete.test create mode 100644 llvm/test/tools/llvm-ar/insert-after.test create mode 100644 llvm/test/tools/llvm-ar/insert-before.test create mode 100644 llvm/test/tools/llvm-ar/move-after.test create mode 100644 llvm/test/tools/llvm-ar/move-before.test create mode 100644 llvm/test/tools/llvm-ar/symtab.test diff --git a/llvm/test/tools/llvm-ar/create.test b/llvm/test/tools/llvm-ar/create.test new file mode 100644 index 0000000000000..7e4951da5fa2b --- /dev/null +++ b/llvm/test/tools/llvm-ar/create.test @@ -0,0 +1,14 @@ +## Test the creation warning and supression of that warning. + +RUN: touch %t1.txt +RUN: touch %t2.txt + +RUN: rm -f %t.warning.ar +RUN: llvm-ar r %t.warning.ar %t1.txt %t2.txt 2>&1 \ +RUN: | FileCheck %s -DOUTPUT=%t.warning.ar + +CHECK: warning: creating [[OUTPUT]] + +RUN: rm -f %t.supressed.ar +RUN: llvm-ar cr %t.supressed.ar %t1.txt %t2.txt 2>&1 \ +RUN: | FileCheck --allow-empty /dev/null --implicit-check-not={{.}} diff --git a/llvm/test/tools/llvm-ar/dash-before-letter.test b/llvm/test/tools/llvm-ar/dash-before-letter.test new file mode 100644 index 0000000000000..3002a6de7ef90 --- /dev/null +++ b/llvm/test/tools/llvm-ar/dash-before-letter.test @@ -0,0 +1,12 @@ +# Test the use of dash before key letters. + +RUN: touch %t1.txt +RUN: touch %t2.txt + +RUN: rm -f %t.ar +RUN: llvm-ar s -cr %t.ar %t1.txt +RUN: llvm-ar -r -s %t.ar %t2.txt -s +RUN: llvm-ar -t %t.ar | FileCheck %s + +CHECK: 1.txt +CHECK-NEXT: 2.txt diff --git a/llvm/test/tools/llvm-ar/default-add.test b/llvm/test/tools/llvm-ar/default-add.test index 460965ac30582..e1ed69669442a 100644 --- a/llvm/test/tools/llvm-ar/default-add.test +++ b/llvm/test/tools/llvm-ar/default-add.test @@ -1,34 +1,20 @@ RUN: yaml2obj %S/Inputs/macho.yaml -o %t-macho.o RUN: yaml2obj %S/Inputs/coff.yaml -o %t-coff.o +RUN: yaml2obj %S/Inputs/elf.yaml -o %t-elf.o RUN: rm -f %t.ar RUN: llvm-ar crs %t.ar %t-macho.o RUN: grep -q __.SYMDEF %t.ar -Test that an option string prefixed by a dash works. -RUN: llvm-ar -crs %t.ar %t-coff.o -RUN: grep -q __.SYMDEF %t.ar RUN: rm -f %t.ar RUN: llvm-ar crs %t.ar %t-coff.o RUN: not grep -q __.SYMDEF %t.ar RUN: llvm-ar crs %t.ar %t-macho.o RUN: not grep -q __.SYMDEF %t.ar - -RUN: rm -f %t.ar -Test that multiple dashed options works. -RUN: llvm-ar -c -r -s %t.ar %t-macho.o -RUN: grep -q __.SYMDEF %t.ar -Test with duplicated options. -RUN: llvm-ar -c -r -s -c -s %t.ar %t-coff.o -RUN: grep -q __.SYMDEF %t.ar +RUN: llvm-ar crs %t.ar %t-elf.o +RUN: not grep -q __.SYMDEF %t.ar RUN: rm -f %t.ar Test with the options in a different order. RUN: llvm-ar rsc %t.ar %t-macho.o -RUN: grep -q __.SYMDEF %t.ar -Test with options everywhere. -RUN: llvm-ar rsc -cs -sc %t.ar %t-coff.o -cs -sc -RUN: grep -q __.SYMDEF %t.ar - -Ensure that we select the existing format when updating. - +RUN: grep -q __.SYMDEF %t.ar \ No newline at end of file diff --git a/llvm/test/tools/llvm-ar/delete.test b/llvm/test/tools/llvm-ar/delete.test new file mode 100644 index 0000000000000..d5ab797664173 --- /dev/null +++ b/llvm/test/tools/llvm-ar/delete.test @@ -0,0 +1,67 @@ +## Test the deletion of members and that symbols are removed from the symbol table. + +# RUN: yaml2obj %s -o %t-delete.o --docnum=1 +# RUN: yaml2obj %s -o %t-keep.o --docnum=2 +# RUN: touch %t1.txt +# RUN: touch %t2.txt + +## Add file: +# RUN: rm -f %t.a +# RUN: llvm-ar rc %t.a %t1.txt %t-delete.o %t-keep.o %t2.txt +# RUN: llvm-nm --print-armap %t.a \ +# RUN: | FileCheck %s --check-prefix=SYMBOL-ADDED +# RUN: llvm-ar t %t.a | FileCheck %s --check-prefix=FILE-ADDED + +# SYMBOL-ADDED: symbol1 +# SYMBOL-ADDED-NEXT: symbol2 + +# FILE-ADDED: 1.txt +# FILE-ADDED-NEXT: delete.o +# FILE-ADDED-NEXT: keep.o +# FILE-ADDED-NEXT: 2.txt + +## Delete file that is not a member: +# RUN: cp %t.a %t-archive-copy.a +# RUN: llvm-ar d %t.a t/missing.o +# RUN: cmp %t.a %t-archive-copy.a + +## Delete file: +# RUN: llvm-ar d %t.a %t-delete.o +# RUN: llvm-nm --print-armap %t.a \ +# RUN: | FileCheck %s --check-prefix=SYMBOL-DELETED --implicit-check-not symbol1 +# RUN: llvm-ar t %t.a \ +# RUN: | FileCheck %s --check-prefix=FILE-DELETED --implicit-check-not delete.o + +# SYMBOL-DELETED: symbol2 + +# FILE-DELETED: 1.txt +# FILE-DELETED-NEXT: keep.o +# FILE-DELETED-NEXT: 2.txt + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbol1 + Binding: STB_GLOBAL + Section: .text + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbol2 + Binding: STB_GLOBAL + Section: .text diff --git a/llvm/test/tools/llvm-ar/insert-after.test b/llvm/test/tools/llvm-ar/insert-after.test new file mode 100644 index 0000000000000..cd8ee9409c6b4 --- /dev/null +++ b/llvm/test/tools/llvm-ar/insert-after.test @@ -0,0 +1,59 @@ +## Test inserting files after a file. + +RUN: touch %t1.txt +RUN: touch %t2.txt +RUN: touch %t3.txt +RUN: touch %t4.txt + +# Insert one file: +RUN: rm -f %t-one.a +RUN: llvm-ar rc %t-one.a %t1.txt %t2.txt +RUN: llvm-ar ra %t1.txt %t-one.a %t3.txt +RUN: llvm-ar t %t-one.a | FileCheck %s --check-prefix=ONE + +ONE: 1.txt +ONE-NEXT: 3.txt +ONE-NEXT: 2.txt + +# Insert file at back: +RUN: rm -f %t-back.a +RUN: llvm-ar rc %t-back.a %t1.txt %t2.txt +RUN: llvm-ar ra %t2.txt %t-back.a %t3.txt +RUN: llvm-ar t %t-back.a | FileCheck %s --check-prefix=BACK + +BACK: 1.txt +BACK-NEXT: 2.txt +BACK-NEXT: 3.txt + +# Insert multiple files: +RUN: rm -f %t-multiple.a +RUN: llvm-ar rc %t-multiple.a %t1.txt %t2.txt +RUN: llvm-ar ra %t1.txt %t-multiple.a %t4.txt %t3.txt +RUN: llvm-ar t %t-multiple.a | FileCheck %s --check-prefix=MULTIPLE + +MULTIPLE: 1.txt +MULTIPLE-NEXT: 4.txt +MULTIPLE-NEXT: 3.txt +MULTIPLE-NEXT: 2.txt + +# Insert after invalid file: +RUN: rm -f %t-invalid.a +RUN: llvm-ar rc %t-invalid.a %t1.txt %t2.txt %t3.txt +RUN: not llvm-ar ra invalid.txt %t-invalid.a %t2.txt 2>&1 \ +RUN: | FileCheck %s --check-prefix=ERROR +RUN: llvm-ar t %t-invalid.a | FileCheck %s --check-prefix=INVALID + +ERROR: error: Insertion point not found. +INVALID: 1.txt +INVALID-NEXT: 2.txt +INVALID-NEXT: 3.txt + +# Insert file at the same position: +RUN: rm -f %t-position.a +RUN: llvm-ar rc %t-position.a %t1.txt %t2.txt %t3.txt +RUN: llvm-ar ra %t1.txt %t-position.a %t2.txt +RUN: llvm-ar t %t-position.a | FileCheck %s --check-prefix=POSITION + +POSITION: 1.txt +POSITION-NEXT: 2.txt +POSITION-NEXT: 3.txt diff --git a/llvm/test/tools/llvm-ar/insert-before.test b/llvm/test/tools/llvm-ar/insert-before.test new file mode 100644 index 0000000000000..61505d8162aad --- /dev/null +++ b/llvm/test/tools/llvm-ar/insert-before.test @@ -0,0 +1,59 @@ +## Test inserting files before a file. + +RUN: touch %t1.txt +RUN: touch %t2.txt +RUN: touch %t3.txt +RUN: touch %t4.txt + +# Insert one file: +RUN: rm -f %t-one.a +RUN: llvm-ar rc %t-one.a %t1.txt %t2.txt +RUN: llvm-ar rb %t2.txt %t-one.a %t3.txt +RUN: llvm-ar t %t-one.a | FileCheck %s --check-prefix=ONE + +ONE: 1.txt +ONE-NEXT: 3.txt +ONE-NEXT: 2.txt + +# Insert file at front: +RUN: rm -f %t-front.a +RUN: llvm-ar rc %t-front.a %t1.txt %t2.txt +RUN: llvm-ar rb %t1.txt %t-front.a %t3.txt +RUN: llvm-ar t %t-front.a | FileCheck %s --check-prefix=FRONT + +FRONT: 3.txt +FRONT-NEXT: 1.txt +FRONT-NEXT: 2.txt + +# Insert multiple files: +RUN: rm -f %t-multiple.a +RUN: llvm-ar rc %t-multiple.a %t1.txt %t2.txt +RUN: llvm-ar rb %t2.txt %t-multiple.a %t4.txt %t3.txt +RUN: llvm-ar t %t-multiple.a | FileCheck %s --check-prefix=MULTIPLE + +MULTIPLE: 1.txt +MULTIPLE-NEXT: 4.txt +MULTIPLE-NEXT: 3.txt +MULTIPLE-NEXT: 2.txt + +# Insert before an invalid file: +RUN: rm -f %t-invalid.a +RUN: llvm-ar rc %t-invalid.a %t1.txt %t2.txt %t3.txt +RUN: not llvm-ar rb invalid.txt %t-invalid.a %t2.txt 2>&1 \ +RUN: | FileCheck %s --check-prefix=ERROR +RUN: llvm-ar t %t-invalid.a | FileCheck %s --check-prefix=INVALID + +ERROR: error: Insertion point not found. +INVALID: 1.txt +INVALID-NEXT: 2.txt +INVALID-NEXT: 3.txt + +# Insert file at the same position: +RUN: rm -f %t-position.a +RUN: llvm-ar rc %t-position.a %t1.txt %t2.txt %t3.txt +RUN: llvm-ar rb %t3.txt %t-position.a %t2.txt +RUN: llvm-ar t %t-position.a | FileCheck %s --check-prefix=POSITION + +POSITION: 1.txt +POSITION-NEXT: 2.txt +POSITION-NEXT: 3.txt diff --git a/llvm/test/tools/llvm-ar/move-after.test b/llvm/test/tools/llvm-ar/move-after.test new file mode 100644 index 0000000000000..fb61f2aa118f6 --- /dev/null +++ b/llvm/test/tools/llvm-ar/move-after.test @@ -0,0 +1,59 @@ +## Test moving files after a file. + +RUN: touch %t1.txt +RUN: touch %t2.txt +RUN: touch %t3.txt +RUN: touch %t4.txt + +# Move one file: +RUN: rm -f %t-one.ar +RUN: llvm-ar rc %t-one.a %t1.txt %t2.txt %t3.txt +RUN: llvm-ar ma %t1.txt %t-one.a %t3.txt +RUN: llvm-ar t %t-one.a | FileCheck %s --check-prefix=ONE + +ONE: 1.txt +ONE-NEXT: 3.txt +ONE-NEXT: 2.txt + +# Move file to back: +RUN: rm -f %t-back.ar +RUN: llvm-ar rc %t-back.a %t1.txt %t2.txt %t3.txt +RUN: llvm-ar ma %t2.txt %t-back.a %t1.txt +RUN: llvm-ar t %t-back.a | FileCheck %s --check-prefix=BACK + +BACK: 2.txt +BACK-NEXT: 1.txt +BACK-NEXT: 3.txt + +# Move multiple files: +RUN: rm -f %t-multiple.ar +RUN: llvm-ar rc %t-multiple.a %t1.txt %t2.txt %t3.txt %t4.txt +RUN: llvm-ar ma %t1.txt %t-multiple.a %t4.txt %t3.txt +RUN: llvm-ar t %t-multiple.a | FileCheck %s --check-prefix=MULTIPLE + +MULTIPLE: 1.txt +MULTIPLE-NEXT: 3.txt +MULTIPLE-NEXT: 4.txt +MULTIPLE-NEXT: 2.txt + +# Move after invalid file: +RUN: rm -f %t-invalid.ar +RUN: llvm-ar rc %t-invalid.a %t1.txt %t2.txt %t3.txt +RUN: not llvm-ar ma invalid.txt %t-invalid.a %t2.txt 2>&1 \ +RUN: | FileCheck %s --check-prefix=ERROR +RUN: llvm-ar t %t-invalid.a | FileCheck %s --check-prefix=INVALID + +ERROR: error: Insertion point not found. +INVALID: 1.txt +INVALID-NEXT: 2.txt +INVALID-NEXT: 3.txt + +# Move file to the same position: +RUN: rm -f %t-position.ar +RUN: llvm-ar rc %t-position.a %t1.txt %t2.txt %t3.txt +RUN: llvm-ar ma %t1.txt %t-position.a %t2.txt +RUN: llvm-ar t %t-position.a | FileCheck %s --check-prefix=POSITION + +POSITION: 1.txt +POSITION-NEXT: 2.txt +POSITION-NEXT: 3.txt diff --git a/llvm/test/tools/llvm-ar/move-before.test b/llvm/test/tools/llvm-ar/move-before.test new file mode 100644 index 0000000000000..b5fefec29557a --- /dev/null +++ b/llvm/test/tools/llvm-ar/move-before.test @@ -0,0 +1,69 @@ +## Test moving files after a file. + +RUN: touch %t1.txt +RUN: touch %t2.txt +RUN: touch %t3.txt +RUN: touch %t4.txt + +# Move one file: +RUN: rm -f %t-one.ar +RUN: llvm-ar rc %t-one.a %t1.txt %t2.txt %t3.txt +RUN: llvm-ar mb %t2.txt %t-one.a %t3.txt +RUN: llvm-ar t %t-one.a | FileCheck %s --check-prefix=ONE + +ONE: 1.txt +ONE-NEXT: 3.txt +ONE-NEXT: 2.txt + +# Move file to front: +RUN: rm -f %t-front.ar +RUN: llvm-ar rc %t-front.ar %t1.txt %t2.txt %t3.txt +RUN: llvm-ar mb %t1.txt %t-front.ar %t3.txt +RUN: llvm-ar t %t-front.ar | FileCheck %s --check-prefix=FRONT + +FRONT: 3.txt +FRONT-NEXT: 1.txt +FRONT-NEXT: 2.txt + +# Move multiple files: +RUN: rm -f %t-multiple.ar +RUN: llvm-ar rc %t-multiple.a %t1.txt %t2.txt %t3.txt %t4.txt +RUN: llvm-ar mb %t2.txt %t-multiple.a %t4.txt %t3.txt +RUN: llvm-ar t %t-multiple.a | FileCheck %s --check-prefix=MULTIPLE + +MULTIPLE: 1.txt +MULTIPLE-NEXT: 3.txt +MULTIPLE-NEXT: 4.txt +MULTIPLE-NEXT: 2.txt + +# Move before invalid file: +RUN: rm -f %t-invalid.ar +RUN: llvm-ar rc %t-invalid.a %t1.txt %t2.txt %t3.txt +RUN: not llvm-ar mb invalid.txt %t-invalid.a %t2.txt 2>&1 \ +RUN: | FileCheck %s --check-prefix=ERROR +RUN: llvm-ar t %t-invalid.a | FileCheck %s --check-prefix=INVALID + +ERROR: error: Insertion point not found. +INVALID: 1.txt +INVALID-NEXT: 2.txt +INVALID-NEXT: 3.txt + +# Move file to the same position: +RUN: rm -f %t-position.ar +RUN: llvm-ar rc %t-position.a %t1.txt %t2.txt %t3.txt +RUN: llvm-ar mb %t3.txt %t-position.a %t2.txt +RUN: llvm-ar t %t-position.a | FileCheck %s --check-prefix=POSITION + +POSITION: 1.txt +POSITION-NEXT: 2.txt +POSITION-NEXT: 3.txt + +# Move file after itself: +RUN: rm -f %t-same.ar +RUN: llvm-ar rc %t-same.ar %t1.txt %t2.txt %t3.txt +RUN: llvm-ar mb %t2.txt %t-same.ar %t2.txt +RUN: llvm-ar t %t-same.ar | FileCheck %s --check-prefix=SAME + +SAME: 1.txt +SAME-NEXT: 2.txt +SAME-NEXT: 3.txt diff --git a/llvm/test/tools/llvm-ar/symtab.test b/llvm/test/tools/llvm-ar/symtab.test new file mode 100644 index 0000000000000..e59a468d3f2f0 --- /dev/null +++ b/llvm/test/tools/llvm-ar/symtab.test @@ -0,0 +1,70 @@ +## Test the s and S modifiers. Build and do not build a symbol table. + +# RUN: yaml2obj %s -o %t.o +# RUN: touch %t-other.txt + +## Default: +# RUN: rm -f %t-default.a +# RUN: llvm-ar rc %t-default.a %t.o +# RUN: llvm-nm --print-armap %t-default.a \ +# RUN: | FileCheck %s --check-prefix=SYMTAB + +## Use a modifer: +# RUN: rm -f %t-symtab.a +# RUN: llvm-ar rcs %t-symtab.a %t.o +# RUN: llvm-nm --print-armap %t-symtab.a \ +# RUN: | FileCheck %s --check-prefix=SYMTAB + +# RUN: rm -f %t-no-symtab.a +# RUN: llvm-ar rcS %t-no-symtab.a %t.o +# RUN: llvm-nm --print-armap %t-no-symtab.a \ +# RUN: | FileCheck %s --check-prefix=NO-SYMTAB + +## Use both modifers: +# RUN: rm -f %t-symtab-last.a +# RUN: llvm-ar rcSs %t-symtab-last.a %t.o +# RUN: llvm-nm --print-armap %t-symtab-last.a \ +# RUN: | FileCheck %s --check-prefix=SYMTAB + +# RUN: rm -f %t-no-symtab-last.a +# RUN: llvm-ar rcsS %t-no-symtab-last.a %t.o +# RUN: llvm-nm --print-armap %t-no-symtab-last.a \ +# RUN: | FileCheck %s --check-prefix=NO-SYMTAB + +## Use an existing archive: +# RUN: rm -f %t-to-symtab.a +# RUN: llvm-ar rcS %t-to-symtab.a %t.o +# RUN: llvm-ar rs %t-to-symtab.a %t-other.txt +# RUN: llvm-nm --print-armap %t-to-symtab.a \ +# RUN: | FileCheck %s --check-prefix=SYMTAB + +# RUN: llvm-ar rs %t-to-symtab.a %t-other.txt +# RUN: llvm-nm --print-armap %t-to-symtab.a \ +# RUN: | FileCheck %s --check-prefix=SYMTAB + +# RUN: rm -f %t-to-no-symtab.a +# RUN: llvm-ar rcs %t-to-no-symtab.a %t.o +# RUN: llvm-ar rS %t-to-no-symtab.a %t-other.txt +# RUN: llvm-nm --print-armap %t-to-no-symtab.a \ +# RUN: | FileCheck %s --check-prefix=NO-SYMTAB + +# RUN: llvm-ar rS %t-to-no-symtab.a %t-other.txt +# RUN: llvm-nm --print-armap %t-to-no-symtab.a \ +# RUN: | FileCheck %s --check-prefix=NO-SYMTAB + +# SYMTAB: symbol in +# NO-SYMTAB-NOT: symbol in + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbol + Binding: STB_GLOBAL + Section: .text From aae7553b3463558c2f7ad40de1b60b989fea0f05 Mon Sep 17 00:00:00 2001 From: Johan Vikstrom Date: Tue, 16 Jul 2019 13:23:12 +0000 Subject: [PATCH 234/451] [clangd] Added highlighting for the targets in typedefs and using. Summary: In `typedef int A` the `A` was not highlighted previously. This patch gives `A` the same kind of highlighting that the underlying type has (class/enum) (which in this example is no special highlighting because builtins are not handled yet) Will add highlightings for built ins in another patch. Reviewers: hokein, sammccall, ilya-biryukov Subscribers: MaskRay, jkorous, arphaman, kadircet, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D64754 llvm-svn: 366207 --- .../clangd/SemanticHighlighting.cpp | 16 +++++++++++++--- .../unittests/SemanticHighlightingTests.cpp | 15 ++++++++++++++- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/clang-tools-extra/clangd/SemanticHighlighting.cpp b/clang-tools-extra/clangd/SemanticHighlighting.cpp index d64472d8fdb1d..b6b1c8d8a2cf8 100644 --- a/clang-tools-extra/clangd/SemanticHighlighting.cpp +++ b/clang-tools-extra/clangd/SemanticHighlighting.cpp @@ -93,6 +93,12 @@ class HighlightingTokenCollector return true; } + bool VisitTypedefNameDecl(TypedefNameDecl *TD) { + if(const auto *TSI = TD->getTypeSourceInfo()) + addTypeLoc(TD->getLocation(), TSI->getTypeLoc()); + return true; + } + bool VisitTypeLoc(TypeLoc &TL) { // This check is for not getting two entries when there are anonymous // structs. It also makes us not highlight certain namespace qualifiers @@ -101,9 +107,7 @@ class HighlightingTokenCollector if (TL.getTypeLocClass() == TypeLoc::TypeLocClass::Elaborated) return true; - if (const Type *TP = TL.getTypePtr()) - if (const TagDecl *TD = TP->getAsTagDecl()) - addToken(TL.getBeginLoc(), TD); + addTypeLoc(TL.getBeginLoc(), TL); return true; } @@ -118,6 +122,12 @@ class HighlightingTokenCollector } private: + void addTypeLoc(SourceLocation Loc, const TypeLoc &TL) { + if (const Type *TP = TL.getTypePtr()) + if (const TagDecl *TD = TP->getAsTagDecl()) + addToken(Loc, TD); + } + void addToken(SourceLocation Loc, const NamedDecl *D) { if (D->getDeclName().isIdentifier() && D->getName().empty()) // Don't add symbols that don't have any length. diff --git a/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp b/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp index 3a1b1c3e7057f..f69e336253ca7 100644 --- a/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp +++ b/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp @@ -90,7 +90,7 @@ TEST(SemanticHighlighting, GetsCorrectTokens) { typename T::A* $Field[[D]]; }; $Namespace[[abc]]::$Class[[A]] $Variable[[AA]]; - typedef $Namespace[[abc]]::$Class[[A]] AAA; + typedef $Namespace[[abc]]::$Class[[A]] $Class[[AAA]]; struct $Class[[B]] { $Class[[B]](); ~$Class[[B]](); @@ -173,6 +173,19 @@ TEST(SemanticHighlighting, GetsCorrectTokens) { } int $Variable[[B]]; $Class[[AA]] $Variable[[A]]{$Variable[[B]]}; + )cpp", + R"cpp( + namespace $Namespace[[a]] { + struct $Class[[A]] {}; + } + typedef $Namespace[[a]]::$Class[[A]] $Class[[B]]; + using $Class[[BB]] = $Namespace[[a]]::$Class[[A]]; + enum class $Enum[[E]] {}; + typedef $Enum[[E]] $Enum[[C]]; + typedef $Enum[[C]] $Enum[[CC]]; + using $Enum[[CD]] = $Enum[[CC]]; + $Enum[[CC]] $Function[[f]]($Class[[B]]); + $Enum[[CD]] $Function[[f]]($Class[[BB]]); )cpp"}; for (const auto &TestCase : TestCases) { checkHighlightings(TestCase); From 58864fad39bf291a233bd64309546682be91ce7a Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 16 Jul 2019 13:45:10 +0000 Subject: [PATCH 235/451] [pstl] Fix compilation with TBB backend Some types were not using the right namespace qualification. llvm-svn: 366208 --- .../include/pstl/internal/parallel_backend_tbb.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pstl/include/pstl/internal/parallel_backend_tbb.h b/pstl/include/pstl/internal/parallel_backend_tbb.h index a32fd995c750b..8ac385e3992db 100644 --- a/pstl/include/pstl/internal/parallel_backend_tbb.h +++ b/pstl/include/pstl/internal/parallel_backend_tbb.h @@ -191,7 +191,7 @@ _Tp __parallel_transform_reduce(_ExecutionPolicy&&, _Index __first, _Index __last, _Up __u, _Tp __init, _Cp __combine, _Rp __brick_reduce) { - __par_backend::__par_trans_red_body<_Index, _Up, _Tp, _Cp, _Rp> __body(__u, __init, __combine, __brick_reduce); + __tbb_backend::__par_trans_red_body<_Index, _Up, _Tp, _Cp, _Rp> __body(__u, __init, __combine, __brick_reduce); // The grain size of 3 is used in order to provide mininum 2 elements for each body tbb::this_task_arena::isolate( [__first, __last, &__body]() { tbb::parallel_reduce(tbb::blocked_range<_Index>(__first, __last, 3), __body); }); @@ -305,9 +305,9 @@ __upsweep(_Index __i, _Index __m, _Index __tilesize, _Tp* __r, _Index __lastsize { _Index __k = __split(__m); tbb::parallel_invoke( - [=] { __par_backend::__upsweep(__i, __k, __tilesize, __r, __tilesize, __reduce, __combine); }, + [=] { __tbb_backend::__upsweep(__i, __k, __tilesize, __r, __tilesize, __reduce, __combine); }, [=] { - __par_backend::__upsweep(__i + __k, __m - __k, __tilesize, __r + __k, __lastsize, __reduce, __combine); + __tbb_backend::__upsweep(__i + __k, __m - __k, __tilesize, __r + __k, __lastsize, __reduce, __combine); }); if (__m == 2 * __k) __r[__m - 1] = __combine(__r[__k - 1], __r[__m - 1]); @@ -325,11 +325,11 @@ __downsweep(_Index __i, _Index __m, _Index __tilesize, _Tp* __r, _Index __lastsi { const _Index __k = __split(__m); tbb::parallel_invoke( - [=] { __par_backend::__downsweep(__i, __k, __tilesize, __r, __tilesize, __initial, __combine, __scan); }, + [=] { __tbb_backend::__downsweep(__i, __k, __tilesize, __r, __tilesize, __initial, __combine, __scan); }, // Assumes that __combine never throws. //TODO: Consider adding a requirement for user functors to be constant. [=, &__combine] { - __par_backend::__downsweep(__i + __k, __m - __k, __tilesize, __r + __k, __lastsize, + __tbb_backend::__downsweep(__i + __k, __m - __k, __tilesize, __r + __k, __lastsize, __combine(__initial, __r[__k - 1]), __combine, __scan); }); } @@ -363,7 +363,7 @@ __parallel_strict_scan(_ExecutionPolicy&&, _Index __n, _Tp __initial, _Rp __redu _Index __m = (__n - 1) / __tilesize; __buffer<_Tp> __buf(__m + 1); _Tp* __r = __buf.get(); - __par_backend::__upsweep(_Index(0), _Index(__m + 1), __tilesize, __r, __n - __m * __tilesize, __reduce, + __tbb_backend::__upsweep(_Index(0), _Index(__m + 1), __tilesize, __r, __n - __m * __tilesize, __reduce, __combine); // When __apex is a no-op and __combine has no side effects, a good optimizer @@ -375,7 +375,7 @@ __parallel_strict_scan(_ExecutionPolicy&&, _Index __n, _Tp __initial, _Rp __redu while ((__k &= __k - 1)) __t = __combine(__r[__k - 1], __t); __apex(__combine(__initial, __t)); - __par_backend::__downsweep(_Index(0), _Index(__m + 1), __tilesize, __r, __n - __m * __tilesize, __initial, + __tbb_backend::__downsweep(_Index(0), _Index(__m + 1), __tilesize, __r, __n - __m * __tilesize, __initial, __combine, __scan); return; } @@ -874,7 +874,7 @@ template ::execute() { - typedef __merge_task<_RandomAccessIterator1, _RandomAccessIterator2, _Compare, __serial_destroy, + typedef __merge_task<_RandomAccessIterator1, _RandomAccessIterator2, _Compare, __utils::__serial_destroy, __utils::__serial_move_merge> _MergeTaskType; From 8e482eb5cbb8b6839f9de701a43f1e2257ea3b34 Mon Sep 17 00:00:00 2001 From: Owen Reynolds Date: Tue, 16 Jul 2019 14:25:37 +0000 Subject: [PATCH 236/451] [llvm-ar][test] Add to llvm-ar test coverage This change adds tests to cover existing llvm-ar functionality. print.test is omitted due to failing on Darwin. Differential Revision: https://reviews.llvm.org/D64330 llvm-svn: 366209 --- llvm/test/tools/llvm-ar/move.test | 129 ++++++++++++++++++ llvm/test/tools/llvm-ar/quick-append.test | 93 +++++++++++++ .../test/tools/llvm-ar/read-only-archive.test | 30 ++++ 3 files changed, 252 insertions(+) create mode 100644 llvm/test/tools/llvm-ar/move.test create mode 100644 llvm/test/tools/llvm-ar/quick-append.test create mode 100644 llvm/test/tools/llvm-ar/read-only-archive.test diff --git a/llvm/test/tools/llvm-ar/move.test b/llvm/test/tools/llvm-ar/move.test new file mode 100644 index 0000000000000..c5028f52e405c --- /dev/null +++ b/llvm/test/tools/llvm-ar/move.test @@ -0,0 +1,129 @@ +## Test the move command without modifiers moves members to the end + +# RUN: rm -rf %t && mkdir -p %t +# RUN: yaml2obj %s -o %t/1.o --docnum=1 +# RUN: yaml2obj %s -o %t/2.o --docnum=2 +# RUN: yaml2obj %s -o %t/3.o --docnum=3 + +## Move single member: +# RUN: llvm-ar rc %t/single.a %t/1.o %t/2.o %t/3.o +# RUN: llvm-ar m %t/single.a %t/1.o +# RUN: llvm-ar t %t/single.a \ +# RUN: | FileCheck %s --check-prefix=SINGLE --match-full-lines --implicit-check-not {{.}} + +# SINGLE: 2.o +# SINGLE-NEXT: 3.o +# SINGLE-NEXT: 1.o + +# RUN: llvm-nm --print-armap %t/single.a \ +# RUN: | FileCheck %s --check-prefix=SINGLE-SYM + +# SINGLE-SYM: symbol2 +# SINGLE-SYM-NEXT: symbol3 +# SINGLE-SYM-NEXT: symbol1 + +## Move multiple members: +# RUN: llvm-ar rc %t/multiple.a %t/1.o %t/2.o %t/3.o +# RUN: llvm-ar m %t/multiple.a %t/1.o %t/2.o +# RUN: llvm-ar t %t/multiple.a \ +# RUN: | FileCheck %s --check-prefix=MULTIPLE --match-full-lines --implicit-check-not {{.}} + +# MULTIPLE: 3.o +# MULTIPLE-NEXT: 1.o +# MULTIPLE-NEXT: 2.o + +# RUN: llvm-nm --print-armap %t/multiple.a \ +# RUN: | FileCheck %s --check-prefix=MULTIPLE-SYM + +# MULTIPLE-SYM: symbol3 +# MULTIPLE-SYM-NEXT: symbol1 +# MULTIPLE-SYM-NEXT: symbol2 + +## Move same member: +# RUN: llvm-ar rc %t/same.a %t/1.o %t/2.o %t/3.o +# RUN: llvm-ar m %t/same.a %t/1.o %t/1.o +# RUN: llvm-ar t %t/same.a \ +# RUN: | FileCheck %s --check-prefix=SAME -DFILE=%t/2.o + +# SAME: 2.o +# SAME-NEXT: 3.o +# SAME-NEXT: 1.o + +# RUN: llvm-nm --print-armap %t/same.a \ +# RUN: | FileCheck %s --check-prefix=SAME-SYM + +# SAME-SYM: symbol2 +# SAME-SYM-NEXT: symbol3 +# SAME-SYM-NEXT: symbol1 + +## Move without member: +# RUN: llvm-ar rc %t/without.a %t/1.o %t/2.o %t/3.o +# RUN: llvm-ar m %t/without.a +# RUN: llvm-ar t %t/without.a \ +# RUN: | FileCheck %s --match-full-lines --check-prefix=WITHOUT --implicit-check-not {{.}} + +# WITHOUT: 1.o +# WITHOUT-NEXT: 2.o +# WITHOUT-NEXT: 3.o + +# RUN: llvm-nm --print-armap %t/without.a \ +# RUN: | FileCheck %s --check-prefix=WITHOUT-SYM + +# WITHOUT-SYM: symbol1 +# WITHOUT-SYM-NEXT: symbol2 +# WITHOUT-SYM-NEXT: symbol3 + +## No archive: +# RUN: not llvm-ar m 2>&1 \ +# RUN: | FileCheck %s --check-prefix=NO-ARCHIVE + +# NO-ARCHIVE: error: An archive name must be specified. + +## Member does not exist: +# RUN: llvm-ar rc %t/missing.a %t/1.o %t/2.o %t/3.o +# RUN: not llvm-ar m %t/missing.a %t/missing.txt 2>&1 \ +# RUN: | FileCheck %s --check-prefix=MISSING-FILE -DFILE=%t/missing.txt + +# MISSING-FILE: error: [[FILE]]: {{[nN]}}o such file or directory. + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbol1 + Binding: STB_GLOBAL + Section: .text + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbol2 + Binding: STB_GLOBAL + Section: .text + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbol3 + Binding: STB_GLOBAL + Section: .text diff --git a/llvm/test/tools/llvm-ar/quick-append.test b/llvm/test/tools/llvm-ar/quick-append.test new file mode 100644 index 0000000000000..ef2e5f74cc808 --- /dev/null +++ b/llvm/test/tools/llvm-ar/quick-append.test @@ -0,0 +1,93 @@ +## Test quick append + +# RUN: rm -rf %t && mkdir -p %t +# RUN: yaml2obj %s -o %t/1.o --docnum=1 +# RUN: yaml2obj %s -o %t/2.o --docnum=2 + +## Append single member: +# RUN: llvm-ar qc %t/single.a %t/1.o +# RUN: llvm-ar t %t/single.a \ +# RUN: | FileCheck %s --check-prefix=SINGLE --match-full-lines --implicit-check-not {{.}} + +# SINGLE: 1.o + +# RUN: llvm-nm --print-armap %t/single.a \ +# RUN: | FileCheck %s --check-prefix=SINGLE-SYM + +# SINGLE-SYM: symbol1 + +## Append multiple members: +# RUN: llvm-ar qc %t/multiple.a %t/1.o %t/2.o +# RUN: llvm-ar t %t/multiple.a \ +# RUN: | FileCheck %s --check-prefix=MULTIPLE --match-full-lines --implicit-check-not {{.}} + +# MULTIPLE: 1.o +# MULTIPLE-NEXT: 2.o + +# RUN: llvm-nm --print-armap %t/multiple.a \ +# RUN: | FileCheck %s --check-prefix=MULTIPLE-SYM + +# MULTIPLE-SYM: symbol1 +# MULTIPLE-SYM-NEXT: symbol2 + +## Append same member: +# RUN: llvm-ar qc %t/same.a %t/1.o %t/1.o +# RUN: llvm-ar t %t/same.a \ +# RUN: | FileCheck %s --check-prefix=SAME -DFILE=%t/2.o --match-full-lines --implicit-check-not {{.}} + +# SAME: 1.o +# SAME-NEXT: 1.o + +# RUN: llvm-nm --print-armap %t/same.a \ +# RUN: | FileCheck %s --check-prefix=SAME-SYM + +# SAME-SYM: symbol1 +# SAME-SYM-NEXT: symbol1 + +## Append without member: +# RUN: llvm-ar qc %t/without.a +# RUN: llvm-ar t %t/without.a \ +# RUN: | FileCheck /dev/null --allow-empty --implicit-check-not={{.}} + +# RUN: llvm-nm --print-armap %t/without.a \ +# RUN: | FileCheck /dev/null --allow-empty --implicit-check-not={{.}} + +## No archive: +# RUN: not llvm-ar qc 2>&1 \ +# RUN: | FileCheck %s --check-prefix=NO-ARCHIVE + +# NO-ARCHIVE: error: An archive name must be specified. + +## Member does not exist: +# RUN: not llvm-ar qc %t/missing.a %t/missing.txt 2>&1 \ +# RUN: | FileCheck %s --check-prefix=MISSING-FILE -DFILE=%t/missing.txt + +# MISSING-FILE: error: [[FILE]]: {{[nN]}}o such file or directory. + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbol1 + Binding: STB_GLOBAL + Section: .text + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbol2 + Binding: STB_GLOBAL + Section: .text diff --git a/llvm/test/tools/llvm-ar/read-only-archive.test b/llvm/test/tools/llvm-ar/read-only-archive.test new file mode 100644 index 0000000000000..f4adea20218c0 --- /dev/null +++ b/llvm/test/tools/llvm-ar/read-only-archive.test @@ -0,0 +1,30 @@ +## Test that read-only archives cannot be edited + +# RUN: rm -rf %t && mkdir -p %t +# RUN: touch %t/1.txt +# RUN: touch %t/2.txt +# RUN: touch %t/3.txt + +# RUN: llvm-ar rc %t/archive.a %t/1.txt %t/2.txt + +## Make read only: +# RUN: chmod 444 %t/archive.a + +# RUN: not llvm-ar r %t/archive.a %t/3.txt \ +# RUN: FileCheck %s --check-prefix=ERROR -DFILE=%t/archive.a + +# ERROR: error: [[FILE]]: no such file or directory. + +# RUN: not llvm-ar q %t/archive.a %t/3.txt \ +# RUN: FileCheck %s --check-prefix=ERROR -DFILE=%t/archive.a + +# RUN: not llvm-ar m %t/archive.a t/1.txt \ +# RUN: FileCheck %s --check-prefix=ERROR-2 -DFILE=%t/archive.a + +# ERROR-2: error: [[FILE]]: permission denied. + +# RUN: llvm-ar t %t/archive.a \ +# RUN: | FileCheck %s --check-prefix=ARCHIVE --match-full-lines --implicit-check-not {{.}} + +# ARCHIVE: 1.txt +# ARCHIVE-NEXT: 2.txt From 22c4a147a96447b38ce90e59b27c33079f1aa203 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 16 Jul 2019 14:28:30 +0000 Subject: [PATCH 237/451] AMDGPU/GlobalISel: Fix test failures in release build Apparently the check for legal instructions during instruction select does not happen without an asserts build, so these would successfully select in release, and fail in debug. Make s16 and/or/xor legal. These can just be selected directly to the 32-bit operation, as is already done in SelectionDAG, so just make them legal. llvm-svn: 366210 --- .../AMDGPU/AMDGPUInstructionSelector.cpp | 7 +- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 2 +- .../AMDGPU/GlobalISel/inst-select-and.mir | 21 +- .../AMDGPU/GlobalISel/inst-select-or.mir | 21 +- .../AMDGPU/GlobalISel/inst-select-xor.mir | 21 +- .../AMDGPU/GlobalISel/legalize-and.mir | 10 +- .../AMDGPU/GlobalISel/legalize-ashr.mir | 52 ++-- .../AMDGPU/GlobalISel/legalize-fcopysign.mir | 246 +++++++-------- .../AMDGPU/GlobalISel/legalize-lshr.mir | 48 ++- .../GlobalISel/legalize-merge-values.mir | 286 +++++++++--------- .../CodeGen/AMDGPU/GlobalISel/legalize-or.mir | 20 +- .../AMDGPU/GlobalISel/legalize-shl.mir | 62 ++-- .../AMDGPU/GlobalISel/legalize-umax.mir | 28 +- .../AMDGPU/GlobalISel/legalize-umin.mir | 28 +- .../AMDGPU/GlobalISel/legalize-xor.mir | 20 +- 15 files changed, 406 insertions(+), 466 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index fd5ee293d6cfb..aa634e881d870 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -291,10 +291,13 @@ bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const { // TODO: Should this allow an SCC bank result, and produce a copy from SCC for // the result? if (DstRB->getID() == AMDGPU::SGPRRegBankID) { - const TargetRegisterClass *RC - = TRI.getConstrainedRegClassForOperand(Dst, MRI); unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(), Size > 32); I.setDesc(TII.get(InstOpc)); + + const TargetRegisterClass *RC + = TRI.getConstrainedRegClassForOperand(Dst, MRI); + if (!RC) + return false; return RBI.constrainGenericRegister(DstReg, *RC, MRI) && RBI.constrainGenericRegister(Src0.getReg(), *RC, MRI) && RBI.constrainGenericRegister(Src1.getReg(), *RC, MRI); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 3d1f7f404c918..3cf4fbc752493 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -213,7 +213,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, // Report legal for any types we can handle anywhere. For the cases only legal // on the SALU, RegBankSelect will be able to re-legalize. getActionDefinitionsBuilder({G_AND, G_OR, G_XOR}) - .legalFor({S32, S1, S64, V2S32, V2S16, V4S16}) + .legalFor({S32, S1, S64, V2S32, S16, V2S16, V4S16}) .clampScalar(0, S32, S64) .moreElementsIf(isSmallOddVector(0), oneMoreElement(0)) .fewerElementsIf(vectorWiderThan(0, 32), fewerEltsToSize64Vector(0)) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir index f00452b5564ec..d3b877d72e28c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir @@ -117,20 +117,17 @@ body: | liveins: $sgpr0, $sgpr1 ; WAVE64-LABEL: name: and_s16_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0, $sgpr1 - ; WAVE64: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE64: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; WAVE64: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; WAVE64: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; WAVE64: [[AND:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC]], [[TRUNC1]] - ; WAVE64: S_ENDPGM 0, implicit [[AND]](s16) + ; WAVE64: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr1 + ; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 [[COPY]], [[COPY1]] + ; WAVE64: S_ENDPGM 0, implicit [[S_AND_B32_]] ; WAVE32-LABEL: name: and_s16_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 - ; WAVE32: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE32: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; WAVE32: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; WAVE32: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; WAVE32: [[AND:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC]], [[TRUNC1]] - ; WAVE32: S_ENDPGM 0, implicit [[AND]](s16) + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr1 + ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 [[COPY]], [[COPY1]] + ; WAVE32: S_ENDPGM 0, implicit [[S_AND_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:sgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir index 4e120e7c7dd19..d102761158159 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir @@ -117,20 +117,17 @@ body: | liveins: $sgpr0, $sgpr1 ; WAVE64-LABEL: name: or_s16_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0, $sgpr1 - ; WAVE64: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE64: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; WAVE64: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; WAVE64: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; WAVE64: [[OR:%[0-9]+]]:sgpr(s16) = G_OR [[TRUNC]], [[TRUNC1]] - ; WAVE64: S_ENDPGM 0, implicit [[OR]](s16) + ; WAVE64: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr1 + ; WAVE64: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0 = S_OR_B32 [[COPY]], [[COPY1]] + ; WAVE64: S_ENDPGM 0, implicit [[S_OR_B32_]] ; WAVE32-LABEL: name: or_s16_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 - ; WAVE32: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE32: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; WAVE32: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; WAVE32: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; WAVE32: [[OR:%[0-9]+]]:sgpr(s16) = G_OR [[TRUNC]], [[TRUNC1]] - ; WAVE32: S_ENDPGM 0, implicit [[OR]](s16) + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr1 + ; WAVE32: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0 = S_OR_B32 [[COPY]], [[COPY1]] + ; WAVE32: S_ENDPGM 0, implicit [[S_OR_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:sgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir index 3035e022646f0..74555ab9940ea 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir @@ -117,20 +117,17 @@ body: | liveins: $sgpr0, $sgpr1 ; WAVE64-LABEL: name: xor_s16_sgpr_sgpr_sgpr ; WAVE64: liveins: $sgpr0, $sgpr1 - ; WAVE64: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE64: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; WAVE64: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; WAVE64: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; WAVE64: [[XOR:%[0-9]+]]:sgpr(s16) = G_XOR [[TRUNC]], [[TRUNC1]] - ; WAVE64: S_ENDPGM 0, implicit [[XOR]](s16) + ; WAVE64: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr1 + ; WAVE64: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0 = S_XOR_B32 [[COPY]], [[COPY1]] + ; WAVE64: S_ENDPGM 0, implicit [[S_XOR_B32_]] ; WAVE32-LABEL: name: xor_s16_sgpr_sgpr_sgpr ; WAVE32: liveins: $sgpr0, $sgpr1 - ; WAVE32: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; WAVE32: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 - ; WAVE32: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) - ; WAVE32: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) - ; WAVE32: [[XOR:%[0-9]+]]:sgpr(s16) = G_XOR [[TRUNC]], [[TRUNC1]] - ; WAVE32: S_ENDPGM 0, implicit [[XOR]](s16) + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr1 + ; WAVE32: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0 = S_XOR_B32 [[COPY]], [[COPY1]] + ; WAVE32: S_ENDPGM 0, implicit [[S_XOR_B32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:sgpr(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir index e95e063212dd0..7b578420a6ea2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir @@ -156,11 +156,11 @@ body: | ; CHECK-LABEL: name: test_and_s16 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[AND]](s32) - ; CHECK: $vgpr0 = COPY [[COPY4]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC1]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir index 21f4ffa4a0ba9..6b5451935f40c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir @@ -223,11 +223,10 @@ body: | ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) - ; VI: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[TRUNC1]] + ; VI: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[AND]](s16) ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_ashr_s16_i8 @@ -235,11 +234,10 @@ body: | ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) - ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[TRUNC1]] + ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[AND]](s16) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 @@ -274,32 +272,30 @@ body: | ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]] ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C1]](s32) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC1]](s16) - ; VI: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC1]](s16) - ; VI: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[ASHR]], [[TRUNC]](s16) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[C1]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[TRUNC2]](s16) + ; VI: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC2]](s16) + ; VI: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[ASHR]], [[AND]](s16) ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_ashr_i8_i8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]] ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C1]](s32) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC1]](s16) - ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC1]](s16) - ; GFX9: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[ASHR]], [[TRUNC]](s16) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[C1]](s32) + ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[TRUNC2]](s16) + ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC2]](s16) + ; GFX9: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[ASHR]], [[AND]](s16) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir index 5d590ff17b4dc..d676f6711c620 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir @@ -12,48 +12,42 @@ body: | ; SI-LABEL: name: test_copysign_s16_s16 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768 + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] - ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY4]] - ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[AND]](s32) - ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[AND1]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY5]], [[COPY6]] - ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[OR]](s32) - ; SI: $vgpr0 = COPY [[COPY7]](s32) + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[C1]](s32) + ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC2]] + ; SI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC1]] + ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[AND1]] + ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; SI: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_copysign_s16_s16 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768 + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY4]] - ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[AND]](s32) - ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[AND1]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY5]], [[COPY6]] - ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[OR]](s32) - ; VI: $vgpr0 = COPY [[COPY7]](s32) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[C1]](s32) + ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC2]] + ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC1]] + ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[AND1]] + ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_copysign_s16_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768 + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY4]] - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[AND]](s32) - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[AND1]](s32) - ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY5]], [[COPY6]] - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[OR]](s32) - ; GFX9: $vgpr0 = COPY [[COPY7]](s32) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[C1]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC2]] + ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC1]] + ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[AND1]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -252,57 +246,51 @@ body: | ; SI-LABEL: name: test_copysign_s16_s32 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768 + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[C1]](s32) + ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC2]] ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) - ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[COPY5]] - ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[AND]](s32) - ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[AND1]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[COPY7]] - ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[OR]](s32) - ; SI: $vgpr0 = COPY [[COPY8]](s32) + ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[TRUNC1]] + ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[AND1]] + ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; SI: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_copysign_s16_s32 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768 + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[C1]](s32) + ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC2]] ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[COPY5]] - ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[AND]](s32) - ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[AND1]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[COPY7]] - ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[OR]](s32) - ; VI: $vgpr0 = COPY [[COPY8]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[TRUNC1]] + ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[AND1]] + ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_copysign_s16_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768 + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[C1]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC2]] ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[COPY5]] - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[AND]](s32) - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[AND1]](s32) - ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[COPY7]] - ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[OR]](s32) - ; GFX9: $vgpr0 = COPY [[COPY8]](s32) + ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[TRUNC1]] + ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[AND1]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -433,60 +421,54 @@ body: | ; SI-LABEL: name: test_copysign_s16_s64 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr1_vgpr2 + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768 + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[C1]](s32) + ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC2]] ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64) - ; SI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[TRUNC]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[COPY4]] - ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[AND]](s32) - ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[AND1]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY5]], [[COPY6]] - ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[OR]](s32) - ; SI: $vgpr0 = COPY [[COPY7]](s32) + ; SI: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64) + ; SI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[TRUNC3]](s32) + ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s64) + ; SI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[TRUNC1]] + ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[AND1]] + ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; SI: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_copysign_s16_s64 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr1_vgpr2 + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768 + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[C1]](s32) + ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC2]] ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64) - ; VI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[TRUNC]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[COPY4]] - ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[AND]](s32) - ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[AND1]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY5]], [[COPY6]] - ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[OR]](s32) - ; VI: $vgpr0 = COPY [[COPY7]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64) + ; VI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[TRUNC3]](s32) + ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s64) + ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[TRUNC1]] + ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[AND1]] + ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_copysign_s16_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr1_vgpr2 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768 + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[C1]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC2]] ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64) - ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[TRUNC]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[COPY4]] - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[AND]](s32) - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[AND1]](s32) - ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY5]], [[COPY6]] - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[OR]](s32) - ; GFX9: $vgpr0 = COPY [[COPY7]](s32) + ; GFX9: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64) + ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[TRUNC3]](s32) + ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s64) + ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[TRUNC1]] + ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[AND1]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s64) = COPY $vgpr1_vgpr2 %2:_(s16) = G_TRUNC %0 @@ -906,57 +888,51 @@ body: | ; SI-LABEL: name: test_copysign_s16_s32_flags ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768 + ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] + ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[C1]](s32) + ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC2]] ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) - ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[COPY5]] - ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[AND]](s32) - ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[AND1]](s32) - ; SI: %14:_(s32) = nnan G_OR [[COPY6]], [[COPY7]] - ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY %14(s32) - ; SI: $vgpr0 = COPY [[COPY8]](s32) + ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[TRUNC1]] + ; SI: %3:_(s16) = nnan G_OR [[AND]], [[AND1]] + ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %3(s16) + ; SI: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_copysign_s16_s32_flags ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768 + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[C1]](s32) + ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC2]] ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[COPY5]] - ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[AND]](s32) - ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[AND1]](s32) - ; VI: %14:_(s32) = nnan G_OR [[COPY6]], [[COPY7]] - ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY %14(s32) - ; VI: $vgpr0 = COPY [[COPY8]](s32) + ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[TRUNC1]] + ; VI: %3:_(s16) = nnan G_OR [[AND]], [[AND1]] + ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %3(s16) + ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_copysign_s16_s32_flags ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768 + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32767 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[C1]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC2]] ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[COPY5]] - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[AND]](s32) - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[AND1]](s32) - ; GFX9: %14:_(s32) = nnan G_OR [[COPY6]], [[COPY7]] - ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY %14(s32) - ; GFX9: $vgpr0 = COPY [[COPY8]](s32) + ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[TRUNC1]] + ; GFX9: %3:_(s16) = nnan G_OR [[AND]], [[AND1]] + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %3(s16) + ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir index 82d1c5a896b9f..48a3e4c288b33 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir @@ -219,11 +219,10 @@ body: | ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) - ; VI: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[TRUNC1]] + ; VI: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[AND]](s16) ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_lshr_s16_i8 @@ -231,11 +230,10 @@ body: | ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) - ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[TRUNC1]] + ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[AND]](s16) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 @@ -268,30 +266,24 @@ body: | ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[COPY5]] - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32) - ; VI: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[TRUNC]](s16) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]] + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[TRUNC]] + ; VI: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND1]], [[AND]](s16) ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_lshr_i8_i8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[COPY5]] - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32) - ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[TRUNC]](s16) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]] + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[TRUNC]] + ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND1]], [[AND]](s16) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir index fc2ba600340a4..7438180111db7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir @@ -15,53 +15,51 @@ body: | ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]] - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C2]] + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C2]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[AND]](s32) - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C2]] - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[COPY11]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]] - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32) - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C2]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[AND3]](s32) - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C2]] - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[SHL1]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[COPY15]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C3]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[AND1]](s32) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC2]] + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[TRUNC]] ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C1]] - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY5]](s32) - ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C2]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[AND6]](s32) - ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32) - ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C2]] - ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY [[SHL2]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[COPY19]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C2]] + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32) + ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C3]] + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[AND4]](s32) + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND3]], [[TRUNC4]] + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; CHECK: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[TRUNC]] ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C1]] - ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY7]](s32) - ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C2]] - ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[AND9]](s32) - ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY [[COPY6]](s32) - ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C2]] - ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY [[SHL3]](s32) - ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND11]], [[COPY23]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[OR3]](s32) - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C2]] + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY5]](s32) + ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C3]] + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[AND7]](s32) + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC6]] + ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) + ; CHECK: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[TRUNC]] + ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C2]] + ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY7]](s32) + ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C3]] + ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[AND10]](s32) + ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CHECK: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND9]], [[TRUNC8]] + ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16) ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](p1) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 @@ -91,20 +89,22 @@ body: | ; CHECK-LABEL: name: test_merge_s16_s8_s8 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C3]] - ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C2]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]] + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C4]] + ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C4]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[AND]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]] - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[COPY3]] - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR]](s32) - ; CHECK: $vgpr0 = COPY [[COPY4]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C5]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[AND1]](s32) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC2]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s8) = G_CONSTANT i8 0 %1:_(s8) = G_CONSTANT i8 1 %2:_(s16) = G_MERGE_VALUES %0, %1 @@ -160,31 +160,31 @@ body: | ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C5]] - ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C4]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]] + ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C6]] + ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C6]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[AND]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]] - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[COPY3]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C7]](s32) - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C5]] - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C6]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[AND3]](s32) - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C6]] - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SHL1]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[COPY7]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) - ; CHECK: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[AND1]](s32) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC2]] + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[C2]](s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[TRUNC]] + ; CHECK: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C8]](s32) + ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C6]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]] + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[AND4]](s32) + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND3]], [[TRUNC4]] + ; CHECK: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16) ; CHECK: $vgpr0 = COPY [[MV]](s32) %0:_(s8) = G_CONSTANT i8 0 %1:_(s8) = G_CONSTANT i8 1 @@ -424,75 +424,71 @@ body: | ; CHECK: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; CHECK: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 ; CHECK: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; CHECK: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C12]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C13]] - ; CHECK: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C12]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]] + ; CHECK: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C13]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C14]] + ; CHECK: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C14]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[AND]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C14]] - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[COPY3]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[OR]](s32) - ; CHECK: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C15]](s32) - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C13]] - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C14]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[AND3]](s32) - ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C14]] - ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SHL1]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[COPY7]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[OR1]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C15]] + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[AND1]](s32) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC2]] + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[C2]](s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[TRUNC]] ; CHECK: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C16]](s32) - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C13]] - ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C14]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[AND6]](s32) - ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C14]] - ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SHL2]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[COPY11]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[OR2]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C16]](s32) + ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C14]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C15]] + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[AND4]](s32) + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND3]], [[TRUNC4]] + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[C4]](s32) + ; CHECK: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[TRUNC]] ; CHECK: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C17]](s32) - ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C13]] - ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C7]](s32) - ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C14]] - ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[AND9]](s32) - ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY14]], [[C14]] - ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[SHL3]](s32) - ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND11]], [[COPY15]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[OR3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C17]](s32) + ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C14]] + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C15]] + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[AND7]](s32) + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[TRUNC6]] + ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[C6]](s32) + ; CHECK: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[TRUNC]] ; CHECK: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C18]](s32) - ; CHECK: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY16]], [[C13]] - ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C9]](s32) - ; CHECK: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C14]] - ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[AND12]](s32) - ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C8]](s32) - ; CHECK: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY18]], [[C14]] - ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY [[SHL4]](s32) - ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[COPY19]] - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[OR4]](s32) + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C18]](s32) + ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C14]] + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C7]](s32) + ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C15]] + ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[AND10]](s32) + ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; CHECK: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND9]], [[TRUNC8]] + ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[C8]](s32) + ; CHECK: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[TRUNC]] ; CHECK: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C19]](s32) - ; CHECK: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY20]], [[C13]] - ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C11]](s32) - ; CHECK: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C14]] - ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND16]], [[AND15]](s32) - ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C10]](s32) - ; CHECK: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY22]], [[C14]] - ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY [[SHL5]](s32) - ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND17]], [[COPY23]] - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[OR5]](s32) - ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16) + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C19]](s32) + ; CHECK: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C14]] + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C9]](s32) + ; CHECK: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C15]] + ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[AND13]](s32) + ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; CHECK: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[TRUNC10]] + ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[C10]](s32) + ; CHECK: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[TRUNC]] + ; CHECK: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C20]](s32) + ; CHECK: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C14]] + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C11]](s32) + ; CHECK: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C15]] + ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[AND16]](s32) + ; CHECK: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) + ; CHECK: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND15]], [[TRUNC12]] + ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR]](s16), [[OR1]](s16), [[OR2]](s16), [[OR3]](s16), [[OR4]](s16), [[OR5]](s16) ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) %0:_(s8) = G_CONSTANT i8 0 %1:_(s8) = G_CONSTANT i8 1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir index 88a15298c36f3..054686050a9a6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir @@ -156,11 +156,11 @@ body: | ; CHECK-LABEL: name: test_or_s16 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[COPY3]] - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR]](s32) - ; CHECK: $vgpr0 = COPY [[COPY4]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -179,11 +179,11 @@ body: | ; CHECK-LABEL: name: test_or_s24 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[COPY3]] - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR]](s32) - ; CHECK: $vgpr0 = COPY [[COPY4]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir index d767bfb2d7501..d661a25a76536 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir @@ -214,11 +214,10 @@ body: | ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) - ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[TRUNC1]] + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_shl_s16_i8 @@ -226,11 +225,10 @@ body: | ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[TRUNC1]] + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 @@ -262,24 +260,22 @@ body: | ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC]](s16) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]] + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[AND]](s16) ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_shl_i8_i8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC]](s16) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]] + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[AND]](s16) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 @@ -715,24 +711,22 @@ body: | ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC]](s16) + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]] + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[AND]](s16) ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_shl_s7_s7 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC]](s16) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]] + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[AND]](s16) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir index c533e00ad1a0d..cf17f35ebc735 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir @@ -93,28 +93,24 @@ body: | ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[COPY3]] - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32) - ; VI: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC1]] + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]] + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[TRUNC]] + ; VI: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[AND]], [[AND1]] ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16) ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_umax_s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[COPY3]] - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32) - ; GFX9: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC1]] + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]] + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[TRUNC]] + ; GFX9: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[AND]], [[AND1]] ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir index c33c99dc43c31..bbce1a7af3da2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir @@ -93,28 +93,24 @@ body: | ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[COPY3]] - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32) - ; VI: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC1]] + ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]] + ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[TRUNC]] + ; VI: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[AND]], [[AND1]] ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16) ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_umin_s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[COPY3]] - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32) - ; GFX9: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC1]] + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]] + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[TRUNC]] + ; GFX9: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[AND]], [[AND1]] ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir index fae9ae8c9f56e..29e37ec43eb55 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir @@ -156,11 +156,11 @@ body: | ; CHECK-LABEL: name: test_xor_s16 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[COPY3]] - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[XOR]](s32) - ; CHECK: $vgpr0 = COPY [[COPY4]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[TRUNC1]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -179,11 +179,11 @@ body: | ; CHECK-LABEL: name: test_xor_s24 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[COPY3]] - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[XOR]](s32) - ; CHECK: $vgpr0 = COPY [[COPY4]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[TRUNC1]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 From 655cb4a2d702b70899f3ad384d047537e02698d8 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Tue, 16 Jul 2019 14:51:46 +0000 Subject: [PATCH 238/451] [OPENMP]Add support for analysis of if clauses. Summary: Added support for analysis of if clauses in the OpenMP directives to be able to check for the use of uninitialized variables. Reviewers: NoQ Subscribers: guansong, jfb, jdoerfert, caomhin, kkwli0, cfe-commits Tags: clang Differential Revision: https://reviews.llvm.org/D64646 llvm-svn: 366211 --- clang/include/clang/AST/OpenMPClause.h | 7 +- clang/lib/AST/OpenMPClause.cpp | 19 + clang/lib/Analysis/CFG.cpp | 5 +- clang/test/Analysis/cfg-openmp.cpp | 532 ++++++++++-------- clang/test/OpenMP/cancel_if_messages.cpp | 10 + .../distribute_parallel_for_if_messages.cpp | 7 + ...stribute_parallel_for_simd_if_messages.cpp | 7 + .../test/OpenMP/parallel_for_if_messages.cpp | 7 + .../OpenMP/parallel_for_simd_if_messages.cpp | 7 + clang/test/OpenMP/parallel_if_messages.cpp | 7 + .../OpenMP/parallel_sections_if_messages.cpp | 8 + clang/test/OpenMP/target_data_if_messages.cpp | 7 + .../OpenMP/target_enter_data_if_messages.cpp | 7 + .../OpenMP/target_exit_data_if_messages.cpp | 7 + clang/test/OpenMP/target_if_messages.cpp | 7 + .../target_parallel_for_if_messages.cpp | 7 + .../target_parallel_for_simd_if_messages.cpp | 7 + .../OpenMP/target_parallel_if_messages.cpp | 7 + clang/test/OpenMP/target_simd_if_messages.cpp | 7 + .../target_teams_distribute_if_messages.cpp | 7 + ...ms_distribute_parallel_for_if_messages.cpp | 7 + ...stribute_parallel_for_simd_if_messages.cpp | 8 + ...rget_teams_distribute_simd_if_messages.cpp | 7 + .../test/OpenMP/target_teams_if_messages.cpp | 7 + .../test/OpenMP/target_update_if_messages.cpp | 7 + clang/test/OpenMP/task_if_messages.cpp | 7 + ...ms_distribute_parallel_for_if_messages.cpp | 8 + ...stribute_parallel_for_simd_if_messages.cpp | 8 + 28 files changed, 497 insertions(+), 241 deletions(-) diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index c6daf73a623bd..eadcc62a34575 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -501,11 +501,10 @@ class OMPIfClause : public OMPClause, public OMPClauseWithPreInit { return const_child_range(&Condition, &Condition + 1); } - child_range used_children() { - return child_range(child_iterator(), child_iterator()); - } + child_range used_children(); const_child_range used_children() const { - return const_child_range(const_child_iterator(), const_child_iterator()); + auto Children = const_cast(this)->used_children(); + return const_child_range(Children.begin(), Children.end()); } static bool classof(const OMPClause *T) { diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index 41520b380276c..9d8a7ebc3023e 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -209,6 +209,25 @@ const OMPClauseWithPostUpdate *OMPClauseWithPostUpdate::get(const OMPClause *C) return nullptr; } +/// Gets the address of the original, non-captured, expression used in the +/// clause as the preinitializer. +static Stmt **getAddrOfExprAsWritten(Stmt *S) { + if (!S) + return nullptr; + if (auto *DS = dyn_cast(S)) { + assert(DS->isSingleDecl() && "Only single expression must be captured."); + if (auto *OED = dyn_cast(DS->getSingleDecl())) + return OED->getInitAddress(); + } + return nullptr; +} + +OMPClause::child_range OMPIfClause::used_children() { + if (Stmt **C = getAddrOfExprAsWritten(getPreInitStmt())) + return child_range(C, C + 1); + return child_range(&Condition, &Condition + 1); +} + OMPOrderedClause *OMPOrderedClause::Create(const ASTContext &C, Expr *Num, unsigned NumLoops, SourceLocation StartLoc, diff --git a/clang/lib/Analysis/CFG.cpp b/clang/lib/Analysis/CFG.cpp index de89105a29257..0ed1e988a196a 100644 --- a/clang/lib/Analysis/CFG.cpp +++ b/clang/lib/Analysis/CFG.cpp @@ -4746,8 +4746,9 @@ CFGBlock *CFGBuilder::VisitOMPExecutableDirective(OMPExecutableDirective *D, // Reverse the elements to process them in natural order. Iterators are not // bidirectional, so we need to create temp vector. - for (Stmt *S : llvm::reverse(llvm::to_vector<8>( - OMPExecutableDirective::used_clauses_children(D->clauses())))) { + SmallVector Used( + OMPExecutableDirective::used_clauses_children(D->clauses())); + for (Stmt *S : llvm::reverse(Used)) { assert(S && "Expected non-null used-in-clause child."); if (CFGBlock *R = Visit(S)) B = R; diff --git a/clang/test/Analysis/cfg-openmp.cpp b/clang/test/Analysis/cfg-openmp.cpp index 2f734d14b0216..dd417bf408c8f 100644 --- a/clang/test/Analysis/cfg-openmp.cpp +++ b/clang/test/Analysis/cfg-openmp.cpp @@ -1,340 +1,402 @@ -// RUN: %clang_analyze_cc1 -analyzer-checker=debug.DumpCFG %s 2>&1 -fopenmp | FileCheck %s +// RUN: %clang_analyze_cc1 -analyzer-checker=debug.DumpCFG %s 2>&1 -fopenmp -fopenmp-version=45 | FileCheck %s // CHECK-LABEL: void xxx(int argc) void xxx(int argc) { // CHECK: [B1] // CHECK-NEXT: 1: int x; - int x; -// CHECK-NEXT: 2: x -// CHECK-NEXT: 3: [B1.2] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: 4: argc -// CHECK-NEXT: 5: [B1.4] = [B1.3] -// CHECK-NEXT: 6: #pragma omp atomic read -// CHECK-NEXT: [B1.5]; +// CHECK-NEXT: 2: int cond; + int x, cond; +// CHECK-NEXT: [[#ATOM:]]: x +// CHECK-NEXT: [[#ATOM+1]]: [B1.[[#ATOM]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#ATOM+2]]: argc +// CHECK-NEXT: [[#ATOM+3]]: [B1.[[#ATOM+2]]] = [B1.[[#ATOM+1]]] +// CHECK-NEXT: [[#ATOM+4]]: #pragma omp atomic read +// CHECK-NEXT: [B1.[[#ATOM+3]]]; #pragma omp atomic read argc = x; -// CHECK-NEXT: 7: x -// CHECK-NEXT: 8: [B1.7] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: 9: argc -// CHECK-NEXT: 10: [B1.9] = [B1.8] -// CHECK-NEXT: 11: #pragma omp critical -// CHECK-NEXT: [B1.10]; +// CHECK-NEXT: [[#CRIT:]]: x +// CHECK-NEXT: [[#CRIT+1]]: [B1.[[#CRIT]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#CRIT+2]]: argc +// CHECK-NEXT: [[#CRIT+3]]: [B1.[[#CRIT+2]]] = [B1.[[#CRIT+1]]] +// CHECK-NEXT: [[#CRIT+4]]: #pragma omp critical +// CHECK-NEXT: [B1.[[#CRIT+3]]]; #pragma omp critical argc = x; -// CHECK-NEXT: 12: x -// CHECK-NEXT: 13: [B1.12] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: 14: argc -// CHECK-NEXT: 15: [B1.14] = [B1.13] -// CHECK-NEXT: 16: #pragma omp distribute parallel for +// CHECK-NEXT: [[#DPF:]]: x +// CHECK-NEXT: [[#DPF+1]]: [B1.[[#DPF]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#DPF+2]]: argc +// CHECK-NEXT: [[#DPF+3]]: [B1.[[#DPF+2]]] = [B1.[[#DPF+1]]] +// CHECK-NEXT: [[#DPF+4]]: cond +// CHECK-NEXT: [[#DPF+5]]: [B1.[[#DPF+4]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#DPF+6]]: [B1.[[#DPF+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) +// CHECK-NEXT: [[#DPF+7]]: #pragma omp distribute parallel for if(parallel: cond) // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.15]; -#pragma omp distribute parallel for +// CHECK-NEXT: [B1.[[#DPF+3]]]; +#pragma omp distribute parallel for if(parallel:cond) for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT: 17: x -// CHECK-NEXT: 18: [B1.17] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: 19: argc -// CHECK-NEXT: 20: [B1.19] = [B1.18] -// CHECK-NEXT: 21: #pragma omp distribute parallel for simd +// CHECK-NEXT: [[#DPFS:]]: x +// CHECK-NEXT: [[#DPFS+1]]: [B1.[[#DPFS]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#DPFS+2]]: argc +// CHECK-NEXT: [[#DPFS+3]]: [B1.[[#DPFS+2]]] = [B1.[[#DPFS+1]]] +// CHECK-NEXT: [[#DPFS+4]]: cond +// CHECK-NEXT: [[#DPFS+5]]: [B1.[[#DPFS+4]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#DPFS+6]]: [B1.[[#DPFS+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) +// CHECK-NEXT: [[#DPFS+7]]: #pragma omp distribute parallel for simd if(cond) // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.20]; -#pragma omp distribute parallel for simd +// CHECK-NEXT: [B1.[[#DPFS+3]]]; +#pragma omp distribute parallel for simd if(cond) for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT: 22: x -// CHECK-NEXT: 23: [B1.22] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: 24: argc -// CHECK-NEXT: 25: [B1.24] = [B1.23] -// CHECK-NEXT: 26: #pragma omp distribute simd +// CHECK-NEXT: [[#DS:]]: x +// CHECK-NEXT: [[#DS+1]]: [B1.[[#DS]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#DS+2]]: argc +// CHECK-NEXT: [[#DS+3]]: [B1.[[#DS+2]]] = [B1.[[#DS+1]]] +// CHECK-NEXT: [[#DS+4]]: #pragma omp distribute simd // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.25]; +// CHECK-NEXT: [B1.[[#DS+3]]]; #pragma omp distribute simd for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT: 27: x -// CHECK-NEXT: 28: [B1.27] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: 29: argc -// CHECK-NEXT: 30: [B1.29] = [B1.28] -// CHECK-NEXT: 31: #pragma omp for +// CHECK-NEXT: [[#FOR:]]: x +// CHECK-NEXT: [[#FOR+1]]: [B1.[[#FOR]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#FOR+2]]: argc +// CHECK-NEXT: [[#FOR+3]]: [B1.[[#FOR+2]]] = [B1.[[#FOR+1]]] +// CHECK-NEXT: [[#FOR+4]]: #pragma omp for // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.30]; +// CHECK-NEXT: [B1.[[#FOR+3]]]; #pragma omp for for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT: 32: x -// CHECK-NEXT: 33: [B1.32] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: 34: argc -// CHECK-NEXT: 35: [B1.34] = [B1.33] -// CHECK-NEXT: 36: #pragma omp for simd +// CHECK-NEXT: [[#FS:]]: x +// CHECK-NEXT: [[#FS+1]]: [B1.[[#FS]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#FS+2]]: argc +// CHECK-NEXT: [[#FS+3]]: [B1.[[#FS+2]]] = [B1.[[#FS+1]]] +// CHECK-NEXT: [[#FS+4]]: #pragma omp for simd // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.35]; +// CHECK-NEXT: [B1.[[#FS+3]]]; #pragma omp for simd for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT: 37: x -// CHECK-NEXT: 38: [B1.37] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: 39: argc -// CHECK-NEXT: 40: [B1.39] = [B1.38] -// CHECK-NEXT: 41: #pragma omp master -// CHECK-NEXT: [B1.40]; +// CHECK-NEXT: [[#MASTER:]]: x +// CHECK-NEXT: [[#MASTER+1]]: [B1.[[#MASTER]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#MASTER+2]]: argc +// CHECK-NEXT: [[#MASTER+3]]: [B1.[[#MASTER+2]]] = [B1.[[#MASTER+1]]] +// CHECK-NEXT: [[#MASTER+4]]: #pragma omp master +// CHECK-NEXT: [B1.[[#MASTER+3]]]; #pragma omp master argc = x; -// CHECK-NEXT: 42: x -// CHECK-NEXT: 43: [B1.42] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: 44: argc -// CHECK-NEXT: 45: [B1.44] = [B1.43] -// CHECK-NEXT: 46: #pragma omp ordered -// CHECK-NEXT: [B1.45]; -// CHECK-NEXT: 47: #pragma omp for ordered +// CHECK-NEXT: [[#ORD:]]: x +// CHECK-NEXT: [[#ORD+1]]: [B1.[[#ORD]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#ORD+2]]: argc +// CHECK-NEXT: [[#ORD+3]]: [B1.[[#ORD+2]]] = [B1.[[#ORD+1]]] +// CHECK-NEXT: [[#ORD+4]]: #pragma omp ordered +// CHECK-NEXT: [B1.[[#ORD+3]]]; +// CHECK-NEXT: [[#ORD+5]]: #pragma omp for ordered // CHECK-NEXT: for (int i = 0; i < 10; ++i) { -// CHECK-NEXT:[B1.46] } +// CHECK-NEXT:[B1.[[#ORD+4]]] } #pragma omp for ordered for (int i = 0; i < 10; ++i) { #pragma omp ordered argc = x; } -// CHECK-NEXT: 48: x -// CHECK-NEXT: 49: [B1.48] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: 50: argc -// CHECK-NEXT: 51: [B1.50] = [B1.49] -// CHECK-NEXT: 52: #pragma omp parallel for +// CHECK-NEXT: [[#PF:]]: x +// CHECK-NEXT: [[#PF+1]]: [B1.[[#PF]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#PF+2]]: argc +// CHECK-NEXT: [[#PF+3]]: [B1.[[#PF+2]]] = [B1.[[#PF+1]]] +// CHECK-NEXT: [[#PF+4]]: cond +// CHECK-NEXT: [[#PF+5]]: [B1.[[#PF+4]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#PF+6]]: [B1.[[#PF+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) +// CHECK-NEXT: [[#PF+7]]: #pragma omp parallel for if(cond) // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.51]; -#pragma omp parallel for +// CHECK-NEXT: [B1.[[#PF+3]]]; +#pragma omp parallel for if(cond) for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT: 53: x -// CHECK-NEXT: 54: [B1.53] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: 55: argc -// CHECK-NEXT: 56: [B1.55] = [B1.54] -// CHECK-NEXT: 57: #pragma omp parallel for simd +// CHECK-NEXT: [[#PFS:]]: x +// CHECK-NEXT: [[#PFS+1]]: [B1.[[#PFS]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#PFS+2]]: argc +// CHECK-NEXT: [[#PFS+3]]: [B1.[[#PFS+2]]] = [B1.[[#PFS+1]]] +// CHECK-NEXT: [[#PFS+4]]: cond +// CHECK-NEXT: [[#PFS+5]]: [B1.[[#PFS+4]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#PFS+6]]: [B1.[[#PFS+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) +// CHECK-NEXT: [[#PFS+7]]: #pragma omp parallel for simd if(cond) // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.56]; -#pragma omp parallel for simd +// CHECK-NEXT: [B1.[[#PFS+3]]]; +#pragma omp parallel for simd if(cond) for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT: 58: x -// CHECK-NEXT: 59: [B1.58] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: 60: argc -// CHECK-NEXT: 61: [B1.60] = [B1.59] -// CHECK-NEXT: 62: #pragma omp parallel -// CHECK-NEXT: [B1.61]; -#pragma omp parallel +// CHECK-NEXT: [[#PAR:]]: x +// CHECK-NEXT: [[#PAR+1]]: [B1.[[#PAR]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#PAR+2]]: argc +// CHECK-NEXT: [[#PAR+3]]: [B1.[[#PAR+2]]] = [B1.[[#PAR+1]]] +// CHECK-NEXT: [[#PAR+4]]: cond +// CHECK-NEXT: [[#PAR+5]]: [B1.[[#PAR+4]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#PAR+6]]: [B1.[[#PAR+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) +// CHECK-NEXT: [[#PAR+7]]: #pragma omp parallel if(cond) +// CHECK-NEXT: [B1.[[#PAR+3]]]; +#pragma omp parallel if(cond) argc = x; -// CHECK-NEXT: 63: x -// CHECK-NEXT: 64: [B1.63] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: 65: argc -// CHECK-NEXT: 66: [B1.65] = [B1.64] -// CHECK-NEXT: 67: #pragma omp parallel sections +// CHECK-NEXT: [[#PSECT:]]: x +// CHECK-NEXT: [[#PSECT+1]]: [B1.[[#PSECT]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#PSECT+2]]: argc +// CHECK-NEXT: [[#PSECT+3]]: [B1.[[#PSECT+2]]] = [B1.[[#PSECT+1]]] +// CHECK-NEXT: [[#PSECT+4]]: cond +// CHECK-NEXT: [[#PSECT+5]]: [B1.[[#PSECT+4]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#PSECT+6]]: [B1.[[#PSECT+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) +// CHECK-NEXT: [[#PSECT+7]]: #pragma omp parallel sections if(cond) // CHECK-NEXT: { -// CHECK-NEXT: [B1.66]; +// CHECK-NEXT: [B1.[[#PSECT+3]]]; // CHECK-NEXT: } -#pragma omp parallel sections +#pragma omp parallel sections if(cond) { argc = x; } -// CHECK-NEXT: 68: x -// CHECK-NEXT: 69: [B1.68] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: 70: argc -// CHECK-NEXT: 71: [B1.70] = [B1.69] -// CHECK-NEXT: 72: #pragma omp simd +// CHECK-NEXT: [[#SIMD:]]: x +// CHECK-NEXT: [[#SIMD+1]]: [B1.[[#SIMD]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#SIMD+2]]: argc +// CHECK-NEXT: [[#SIMD+3]]: [B1.[[#SIMD+2]]] = [B1.[[#SIMD+1]]] +// CHECK-NEXT: [[#SIMD+4]]: #pragma omp simd // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.71]; +// CHECK-NEXT: [B1.[[#SIMD+3]]]; #pragma omp simd for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT: 73: x -// CHECK-NEXT: 74: [B1.73] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: 75: argc -// CHECK-NEXT: 76: [B1.75] = [B1.74] -// CHECK-NEXT: 77: #pragma omp single -// CHECK-NEXT: [B1.76]; +// CHECK-NEXT: [[#SINGLE:]]: x +// CHECK-NEXT: [[#SINGLE+1]]: [B1.[[#SINGLE]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#SINGLE+2]]: argc +// CHECK-NEXT: [[#SINGLE+3]]: [B1.[[#SINGLE+2]]] = [B1.[[#SINGLE+1]]] +// CHECK-NEXT: [[#SINGLE+4]]: #pragma omp single +// CHECK-NEXT: [B1.[[#SINGLE+3]]]; #pragma omp single argc = x; -// CHECK-NEXT: 78: x -// CHECK-NEXT: 79: [B1.78] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: 80: argc -// CHECK-NEXT: 81: [B1.80] = [B1.79] -// CHECK-NEXT: 82: #pragma omp target depend(in : argc) -// CHECK-NEXT: [B1.81]; +// CHECK-NEXT: [[#TARGET:]]: x +// CHECK-NEXT: [[#TARGET+1]]: [B1.[[#TARGET]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TARGET+2]]: argc +// CHECK-NEXT: [[#TARGET+3]]: [B1.[[#TARGET+2]]] = [B1.[[#TARGET+1]]] +// CHECK-NEXT: [[#TARGET+4]]: cond +// CHECK-NEXT: [[#TARGET+5]]: [B1.[[#TARGET+4]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TARGET+6]]: [B1.[[#TARGET+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) +// CHECK-NEXT: [[#TARGET+7]]: #pragma omp target depend(in : argc) if(cond) +// CHECK-NEXT: [B1.[[#TARGET+3]]]; #pragma omp target depend(in \ - : argc) + : argc) if(cond) argc = x; -// CHECK-NEXT: 83: x -// CHECK-NEXT: 84: [B1.83] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: 85: argc -// CHECK-NEXT: 86: [B1.85] = [B1.84] -// CHECK-NEXT: 87: #pragma omp target parallel for +// CHECK-NEXT: [[#TPF:]]: x +// CHECK-NEXT: [[#TPF+1]]: [B1.[[#TPF]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TPF+2]]: argc +// CHECK-NEXT: [[#TPF+3]]: [B1.[[#TPF+2]]] = [B1.[[#TPF+1]]] +// CHECK-NEXT: [[#TPF+4]]: cond +// CHECK-NEXT: [[#TPF+5]]: [B1.[[#TPF+4]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TPF+6]]: [B1.[[#TPF+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) +// CHECK-NEXT: [[#TPF+7]]: #pragma omp target parallel for if(parallel: cond) // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.86]; -#pragma omp target parallel for +// CHECK-NEXT: [B1.[[#TPF+3]]]; +#pragma omp target parallel for if(parallel:cond) for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT: 88: x -// CHECK-NEXT: 89: [B1.88] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: 90: argc -// CHECK-NEXT: 91: [B1.90] = [B1.89] -// CHECK-NEXT: 92: #pragma omp target parallel for simd +// CHECK-NEXT: [[#TPFS:]]: x +// CHECK-NEXT: [[#TPFS+1]]: [B1.[[#TPFS]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TPFS+2]]: argc +// CHECK-NEXT: [[#TPFS+3]]: [B1.[[#TPFS+2]]] = [B1.[[#TPFS+1]]] +// CHECK-NEXT: [[#TPFS+4]]: cond +// CHECK-NEXT: [[#TPFS+5]]: [B1.[[#TPFS+4]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TPFS+6]]: [B1.[[#TPFS+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) +// CHECK-NEXT: [[#TPFS+7]]: #pragma omp target parallel for simd if(target: cond) // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.91]; -#pragma omp target parallel for simd +// CHECK-NEXT: [B1.[[#TPFS+3]]]; +#pragma omp target parallel for simd if(target:cond) for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT: 93: x -// CHECK-NEXT: 94: [B1.93] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: 95: argc -// CHECK-NEXT: 96: [B1.95] = [B1.94] -// CHECK-NEXT: 97: #pragma omp target parallel -// CHECK-NEXT: [B1.96]; -#pragma omp target parallel +// CHECK-NEXT: [[#TP:]]: x +// CHECK-NEXT: [[#TP+1]]: [B1.[[#TP]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TP+2]]: argc +// CHECK-NEXT: [[#TP+3]]: [B1.[[#TP+2]]] = [B1.[[#TP+1]]] +// CHECK-NEXT: [[#TP+4]]: cond +// CHECK-NEXT: [[#TP+5]]: [B1.[[#TP+4]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TP+6]]: [B1.[[#TP+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) +// CHECK-NEXT: [[#TP+7]]: #pragma omp target parallel if(cond) +// CHECK-NEXT: [B1.[[#TP+3]]]; +#pragma omp target parallel if(cond) argc = x; -// CHECK-NEXT: 98: x -// CHECK-NEXT: 99: [B1.98] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: 100: argc -// CHECK-NEXT: 101: [B1.100] = [B1.99] -// CHECK-NEXT: 102: #pragma omp target simd +// CHECK-NEXT: [[#TSIMD:]]: x +// CHECK-NEXT: [[#TSIMD+1]]: [B1.[[#TSIMD]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TSIMD+2]]: argc +// CHECK-NEXT: [[#TSIMD+3]]: [B1.[[#TSIMD+2]]] = [B1.[[#TSIMD+1]]] +// CHECK-NEXT: [[#TSIMD+4]]: cond +// CHECK-NEXT: [[#TSIMD+5]]: [B1.[[#TSIMD+4]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TSIMD+6]]: [B1.[[#TSIMD+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) +// CHECK-NEXT: [[#TSIMD+7]]: #pragma omp target simd if(cond) // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.101]; -#pragma omp target simd +// CHECK-NEXT: [B1.[[#TSIMD+3]]]; +#pragma omp target simd if(cond) for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT: 103: x -// CHECK-NEXT: 104: [B1.103] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: 105: argc -// CHECK-NEXT: 106: [B1.105] = [B1.104] -// CHECK-NEXT: 107: #pragma omp target teams distribute +// CHECK-NEXT: [[#TTD:]]: x +// CHECK-NEXT: [[#TTD+1]]: [B1.[[#TTD]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TTD+2]]: argc +// CHECK-NEXT: [[#TTD+3]]: [B1.[[#TTD+2]]] = [B1.[[#TTD+1]]] +// CHECK-NEXT: [[#TTD+4]]: cond +// CHECK-NEXT: [[#TTD+5]]: [B1.[[#TTD+4]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TTD+6]]: [B1.[[#TTD+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) +// CHECK-NEXT: [[#TTD+7]]: #pragma omp target teams distribute if(cond) // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.106]; -#pragma omp target teams distribute +// CHECK-NEXT: [B1.[[#TTD+3]]]; +#pragma omp target teams distribute if(cond) for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT: 108: x -// CHECK-NEXT: 109: [B1.108] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: 110: argc -// CHECK-NEXT: 111: [B1.110] = [B1.109] -// CHECK-NEXT: 112: #pragma omp target teams distribute parallel for +// CHECK-NEXT: [[#TTDPF:]]: x +// CHECK-NEXT: [[#TTDPF+1]]: [B1.[[#TTDPF]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TTDPF+2]]: argc +// CHECK-NEXT: [[#TTDPF+3]]: [B1.[[#TTDPF+2]]] = [B1.[[#TTDPF+1]]] +// CHECK-NEXT: [[#TTDPF+4]]: cond +// CHECK-NEXT: [[#TTDPF+5]]: [B1.[[#TTDPF+4]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TTDPF+6]]: [B1.[[#TTDPF+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) +// CHECK-NEXT: [[#TTDPF+7]]: #pragma omp target teams distribute parallel for if(cond) // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.111]; -#pragma omp target teams distribute parallel for +// CHECK-NEXT: [B1.[[#TTDPF+3]]]; +#pragma omp target teams distribute parallel for if(cond) for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT: 113: x -// CHECK-NEXT: 114: [B1.113] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: 115: argc -// CHECK-NEXT: 116: [B1.115] = [B1.114] -// CHECK-NEXT: 117: #pragma omp target teams distribute parallel for simd +// CHECK-NEXT: [[#TTDPFS:]]: x +// CHECK-NEXT: [[#TTDPFS+1]]: [B1.[[#TTDPFS]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TTDPFS+2]]: argc +// CHECK-NEXT: [[#TTDPFS+3]]: [B1.[[#TTDPFS+2]]] = [B1.[[#TTDPFS+1]]] +// CHECK-NEXT: [[#TTDPFS+4]]: cond +// CHECK-NEXT: [[#TTDPFS+5]]: [B1.[[#TTDPFS+4]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TTDPFS+6]]: [B1.[[#TTDPFS+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) +// CHECK-NEXT: [[#TTDPFS+7]]: #pragma omp target teams distribute parallel for simd if(parallel: cond) // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.116]; -#pragma omp target teams distribute parallel for simd +// CHECK-NEXT: [B1.[[#TTDPFS+3]]]; +#pragma omp target teams distribute parallel for simd if(parallel:cond) for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT: 118: x -// CHECK-NEXT: 119: [B1.118] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: 120: argc -// CHECK-NEXT: 121: [B1.120] = [B1.119] -// CHECK-NEXT: 122: #pragma omp target teams distribute simd +// CHECK-NEXT: [[#TTDS:]]: x +// CHECK-NEXT: [[#TTDS+1]]: [B1.[[#TTDS]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TTDS+2]]: argc +// CHECK-NEXT: [[#TTDS+3]]: [B1.[[#TTDS+2]]] = [B1.[[#TTDS+1]]] +// CHECK-NEXT: [[#TTDS+4]]: cond +// CHECK-NEXT: [[#TTDS+5]]: [B1.[[#TTDS+4]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TTDS+6]]: [B1.[[#TTDS+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) +// CHECK-NEXT: [[#TTDS+7]]: #pragma omp target teams distribute simd if(cond) // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.121]; -#pragma omp target teams distribute simd +// CHECK-NEXT: [B1.[[#TTDS+3]]]; +#pragma omp target teams distribute simd if(cond) for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT: 123: x -// CHECK-NEXT: 124: [B1.123] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: 125: argc -// CHECK-NEXT: 126: [B1.125] = [B1.124] -// CHECK-NEXT: 127: #pragma omp target teams -// CHECK-NEXT: [B1.126]; -#pragma omp target teams +// CHECK-NEXT: [[#TT:]]: x +// CHECK-NEXT: [[#TT+1]]: [B1.[[#TT]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TT+2]]: argc +// CHECK-NEXT: [[#TT+3]]: [B1.[[#TT+2]]] = [B1.[[#TT+1]]] +// CHECK-NEXT: [[#TT+4]]: cond +// CHECK-NEXT: [[#TT+5]]: [B1.[[#TT+4]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TT+6]]: [B1.[[#TT+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) +// CHECK-NEXT: [[#TT+7]]: #pragma omp target teams if(cond) +// CHECK-NEXT: [B1.[[#TT+3]]]; +#pragma omp target teams if(cond) argc = x; -// CHECK-NEXT: 128: #pragma omp target update to(x) -#pragma omp target update to(x) -// CHECK-NEXT: 129: x -// CHECK-NEXT: 130: [B1.129] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: 131: argc -// CHECK-NEXT: 132: [B1.131] = [B1.130] +// CHECK-NEXT: [[#TU:]]: cond +// CHECK-NEXT: [[#TU+1]]: [B1.[[#TU]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TU+2]]: [B1.[[#TU+1]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) +// CHECK-NEXT: [[#TU+3]]: #pragma omp target update to(x) if(target update: cond) +#pragma omp target update to(x) if(target update:cond) +// CHECK-NEXT: [[#TASK:]]: x +// CHECK-NEXT: [[#TASK+1]]: [B1.[[#TASK]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TASK+2]]: argc +// CHECK-NEXT: [[#TASK+3]]: [B1.[[#TASK+2]]] = [B1.[[#TASK+1]]] +// CHECK-NEXT: [[#TASK+4]]: cond +// CHECK-NEXT: [[#TASK+5]]: [B1.[[#TASK+4]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TASK+6]]: [B1.[[#TASK+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) +// CHECK-NEXT: [[#TASK+7]]: #pragma omp task if(cond) +// CHECK-NEXT: [B1.[[#TASK+3]]]; +#pragma omp task if(cond) argc = x; -// CHECK-NEXT: 133: x -// CHECK-NEXT: 134: [B1.133] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: 135: argc -// CHECK-NEXT: 136: [B1.135] = [B1.134] -// CHECK-NEXT: 137: #pragma omp task -// CHECK-NEXT: [B1.136]; -#pragma omp task - argc = x; -// CHECK-NEXT: 138: x -// CHECK-NEXT: 139: [B1.138] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: 140: argc -// CHECK-NEXT: 141: [B1.140] = [B1.139] -// CHECK-NEXT: 142: #pragma omp taskgroup -// CHECK-NEXT: [B1.141]; +// CHECK-NEXT: [[#TG:]]: x +// CHECK-NEXT: [[#TG+1]]: [B1.[[#TG]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TG+2]]: argc +// CHECK-NEXT: [[#TG+3]]: [B1.[[#TG+2]]] = [B1.[[#TG+1]]] +// CHECK-NEXT: [[#TG+4]]: #pragma omp taskgroup +// CHECK-NEXT: [B1.[[#TG+3]]]; #pragma omp taskgroup argc = x; -// CHECK-NEXT: 143: x -// CHECK-NEXT: 144: [B1.143] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: 145: argc -// CHECK-NEXT: 146: [B1.145] = [B1.144] -// CHECK-NEXT: 147: #pragma omp taskloop +// CHECK-NEXT: [[#TL:]]: x +// CHECK-NEXT: [[#TL+1]]: [B1.[[#TL]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TL+2]]: argc +// CHECK-NEXT: [[#TL+3]]: [B1.[[#TL+2]]] = [B1.[[#TL+1]]] +// CHECK-NEXT: [[#TL+4]]: cond +// CHECK-NEXT: [[#TL+5]]: [B1.[[#TL+4]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TL+6]]: [B1.[[#TL+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) +// CHECK-NEXT: [[#TL+7]]: #pragma omp taskloop if(cond) // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.146]; -#pragma omp taskloop +// CHECK-NEXT: [B1.[[#TL+3]]]; +#pragma omp taskloop if(cond) for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT: 148: x -// CHECK-NEXT: 149: [B1.148] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: 150: argc -// CHECK-NEXT: 151: [B1.150] = [B1.149] -// CHECK-NEXT: 152: #pragma omp taskloop simd +// CHECK-NEXT: [[#TLS:]]: x +// CHECK-NEXT: [[#TLS+1]]: [B1.[[#TLS]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TLS+2]]: argc +// CHECK-NEXT: [[#TLS+3]]: [B1.[[#TLS+2]]] = [B1.[[#TLS+1]]] +// CHECK-NEXT: [[#TLS+4]]: cond +// CHECK-NEXT: [[#TLS+5]]: [B1.[[#TLS+4]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TLS+6]]: [B1.[[#TLS+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) +// CHECK-NEXT: [[#TLS+7]]: #pragma omp taskloop simd if(cond) // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.151]; -#pragma omp taskloop simd +// CHECK-NEXT: [B1.[[#TLS+3]]]; +#pragma omp taskloop simd if(cond) for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT: 153: x -// CHECK-NEXT: 154: [B1.153] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: 155: argc -// CHECK-NEXT: 156: [B1.155] = [B1.154] -// CHECK-NEXT: 157: #pragma omp teams distribute parallel for +// CHECK-NEXT: [[#TDPF:]]: x +// CHECK-NEXT: [[#TDPF+1]]: [B1.[[#TDPF]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TDPF+2]]: argc +// CHECK-NEXT: [[#TDPF+3]]: [B1.[[#TDPF+2]]] = [B1.[[#TDPF+1]]] +// CHECK-NEXT: [[#TDPF+4]]: cond +// CHECK-NEXT: [[#TDPF+5]]: [B1.[[#TDPF+4]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TDPF+6]]: [B1.[[#TDPF+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) +// CHECK-NEXT: [[#TDPF+7]]: #pragma omp teams distribute parallel for if(cond) // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.156]; -// CHECK-NEXT: 158: #pragma omp target +// CHECK-NEXT: [B1.[[#TDPF+3]]]; +// CHECK-NEXT: [[#TDPF+8]]: #pragma omp target #pragma omp target -#pragma omp teams distribute parallel for +#pragma omp teams distribute parallel for if(cond) for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT:[B1.157] 159: x -// CHECK-NEXT: 160: [B1.159] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: 161: argc -// CHECK-NEXT: 162: [B1.161] = [B1.160] -// CHECK-NEXT: 163: #pragma omp teams distribute parallel for simd +// CHECK-NEXT: [B1.[[#TDPF+7]]] [[#TDPFS:]]: x +// CHECK-NEXT: [[#TDPFS+1]]: [B1.[[#TDPFS]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TDPFS+2]]: argc +// CHECK-NEXT: [[#TDPFS+3]]: [B1.[[#TDPFS+2]]] = [B1.[[#TDPFS+1]]] +// CHECK-NEXT: [[#TDPFS+4]]: cond +// CHECK-NEXT: [[#TDPFS+5]]: [B1.[[#TDPFS+4]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TDPFS+6]]: [B1.[[#TDPFS+5]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) +// CHECK-NEXT: [[#TDPFS+7]]: #pragma omp teams distribute parallel for simd if(cond) // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.162]; -// CHECK-NEXT: 164: #pragma omp target +// CHECK-NEXT: [B1.[[#TDPFS+3]]]; +// CHECK-NEXT: [[#TDPFS+8]]: #pragma omp target #pragma omp target -#pragma omp teams distribute parallel for simd +#pragma omp teams distribute parallel for simd if(cond) for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT:[B1.163] 165: x -// CHECK-NEXT: 166: [B1.165] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: 167: argc -// CHECK-NEXT: 168: [B1.167] = [B1.166] -// CHECK-NEXT: 169: #pragma omp teams distribute simd +// CHECK-NEXT: [B1.[[#TDPFS+7]]] [[#TDS:]]: x +// CHECK-NEXT: [[#TDS+1]]: [B1.[[#TDS]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TDS+2]]: argc +// CHECK-NEXT: [[#TDS+3]]: [B1.[[#TDS+2]]] = [B1.[[#TDS+1]]] +// CHECK-NEXT: [[#TDS+4]]: #pragma omp teams distribute simd // CHECK-NEXT: for (int i = 0; i < 10; ++i) -// CHECK-NEXT: [B1.168]; -// CHECK-NEXT: 170: #pragma omp target +// CHECK-NEXT: [B1.[[#TDS+3]]]; +// CHECK-NEXT: [[#TDS+5]]: #pragma omp target #pragma omp target #pragma omp teams distribute simd for (int i = 0; i < 10; ++i) argc = x; -// CHECK-NEXT:[B1.169] 171: x -// CHECK-NEXT: 172: [B1.171] (ImplicitCastExpr, LValueToRValue, int) -// CHECK-NEXT: 173: argc -// CHECK-NEXT: 174: [B1.173] = [B1.172] -// CHECK-NEXT: 175: #pragma omp teams -// CHECK-NEXT: [B1.174]; -// CHECK-NEXT: 176: #pragma omp target +// CHECK-NEXT: [B1.[[#TDS+4]]] [[#TEAMS:]]: x +// CHECK-NEXT: [[#TEAMS+1]]: [B1.[[#TEAMS]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-NEXT: [[#TEAMS+2]]: argc +// CHECK-NEXT: [[#TEAMS+3]]: [B1.[[#TEAMS+2]]] = [B1.[[#TEAMS+1]]] +// CHECK-NEXT: [[#TEAMS+4]]: #pragma omp teams +// CHECK-NEXT: [B1.[[#TEAMS+3]]]; +// CHECK-NEXT: [[#TEAMS+5]]: #pragma omp target #pragma omp target #pragma omp teams argc = x; -// CHECK-NEXT:[B1.175] Preds +// CHECK-NEXT: [B1.[[#TEAMS+4]]] Preds } diff --git a/clang/test/OpenMP/cancel_if_messages.cpp b/clang/test/OpenMP/cancel_if_messages.cpp index 3d629c927e907..222087ca9e61b 100644 --- a/clang/test/OpenMP/cancel_if_messages.cpp +++ b/clang/test/OpenMP/cancel_if_messages.cpp @@ -9,6 +9,16 @@ bool foobool(int argc) { return argc; } +void xxx(int argc) { + int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} +#pragma omp parallel + { +#pragma omp cancel parallel if (cond) // expected-warning {{variable 'cond' is uninitialized when used here}} + for (int i = 0; i < 10; ++i) + ; + } +} + struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/distribute_parallel_for_if_messages.cpp b/clang/test/OpenMP/distribute_parallel_for_if_messages.cpp index a06ff2377c043..e628a15c3ab44 100644 --- a/clang/test/OpenMP/distribute_parallel_for_if_messages.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_if_messages.cpp @@ -9,6 +9,13 @@ bool foobool(int argc) { return argc; } +void xxx(int argc) { + int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} +#pragma omp distribute parallel for if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} + for (int i = 0; i < 10; ++i) + ; +} + struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_if_messages.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_if_messages.cpp index 7769272026e6b..6cf18faf0a87f 100644 --- a/clang/test/OpenMP/distribute_parallel_for_simd_if_messages.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_if_messages.cpp @@ -9,6 +9,13 @@ bool foobool(int argc) { return argc; } +void xxx(int argc) { + int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} +#pragma omp distribute parallel for simd if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} + for (int i = 0; i < 10; ++i) + ; +} + struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/parallel_for_if_messages.cpp b/clang/test/OpenMP/parallel_for_if_messages.cpp index 32f9ef3a7defa..56bb06be0cc71 100644 --- a/clang/test/OpenMP/parallel_for_if_messages.cpp +++ b/clang/test/OpenMP/parallel_for_if_messages.cpp @@ -9,6 +9,13 @@ bool foobool(int argc) { return argc; } +void xxx(int argc) { + int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} +#pragma omp parallel for if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} + for (int i = 0; i < 10; ++i) + ; +} + struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/parallel_for_simd_if_messages.cpp b/clang/test/OpenMP/parallel_for_simd_if_messages.cpp index aa1e302d04242..bab9339d49174 100644 --- a/clang/test/OpenMP/parallel_for_simd_if_messages.cpp +++ b/clang/test/OpenMP/parallel_for_simd_if_messages.cpp @@ -9,6 +9,13 @@ bool foobool(int argc) { return argc; } +void xxx(int argc) { + int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} +#pragma omp parallel for simd if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} + for (int i = 0; i < 10; ++i) + ; +} + struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/parallel_if_messages.cpp b/clang/test/OpenMP/parallel_if_messages.cpp index 7f802a9e4236a..f095e66bbfa5e 100644 --- a/clang/test/OpenMP/parallel_if_messages.cpp +++ b/clang/test/OpenMP/parallel_if_messages.cpp @@ -9,6 +9,13 @@ bool foobool(int argc) { return argc; } +void xxx(int argc) { + int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} +#pragma omp parallel if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} + for (int i = 0; i < 10; ++i) + ; +} + struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/parallel_sections_if_messages.cpp b/clang/test/OpenMP/parallel_sections_if_messages.cpp index 8d36b6d5d3086..b7c92df4f30df 100644 --- a/clang/test/OpenMP/parallel_sections_if_messages.cpp +++ b/clang/test/OpenMP/parallel_sections_if_messages.cpp @@ -9,6 +9,14 @@ bool foobool(int argc) { return argc; } +void xxx(int argc) { + int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} +#pragma omp parallel sections if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} + { + ; + } +} + struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/target_data_if_messages.cpp b/clang/test/OpenMP/target_data_if_messages.cpp index c6f9b4b34eeea..29f898c6d9fa7 100644 --- a/clang/test/OpenMP/target_data_if_messages.cpp +++ b/clang/test/OpenMP/target_data_if_messages.cpp @@ -9,6 +9,13 @@ bool foobool(int argc) { return argc; } +void xxx(int argc) { + int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} +#pragma omp target data map(argc) if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} + for (int i = 0; i < 10; ++i) + ; +} + struct S1; // expected-note {{declared here}} int main(int argc, char **argv) { diff --git a/clang/test/OpenMP/target_enter_data_if_messages.cpp b/clang/test/OpenMP/target_enter_data_if_messages.cpp index 5123d607dc6a1..21019e9ae7f8c 100644 --- a/clang/test/OpenMP/target_enter_data_if_messages.cpp +++ b/clang/test/OpenMP/target_enter_data_if_messages.cpp @@ -9,6 +9,13 @@ bool foobool(int argc) { return argc; } +void xxx(int argc) { + int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} +#pragma omp target enter data map(to:argc) if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} + for (int i = 0; i < 10; ++i) + ; +} + struct S1; // expected-note {{declared here}} int main(int argc, char **argv) { diff --git a/clang/test/OpenMP/target_exit_data_if_messages.cpp b/clang/test/OpenMP/target_exit_data_if_messages.cpp index c45b32ff3fe75..7b2385c16cd21 100644 --- a/clang/test/OpenMP/target_exit_data_if_messages.cpp +++ b/clang/test/OpenMP/target_exit_data_if_messages.cpp @@ -9,6 +9,13 @@ bool foobool(int argc) { return argc; } +void xxx(int argc) { + int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} +#pragma omp target exit data map(from: argc) if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} + for (int i = 0; i < 10; ++i) + ; +} + struct S1; // expected-note {{declared here}} int main(int argc, char **argv) { diff --git a/clang/test/OpenMP/target_if_messages.cpp b/clang/test/OpenMP/target_if_messages.cpp index e6b667f2cffbf..f381e9eb91ebd 100644 --- a/clang/test/OpenMP/target_if_messages.cpp +++ b/clang/test/OpenMP/target_if_messages.cpp @@ -9,6 +9,13 @@ bool foobool(int argc) { return argc; } +void xxx(int argc) { + int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} +#pragma omp target if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} + for (int i = 0; i < 10; ++i) + ; +} + struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/target_parallel_for_if_messages.cpp b/clang/test/OpenMP/target_parallel_for_if_messages.cpp index 445dc1775b0f2..a5a181b9d273a 100644 --- a/clang/test/OpenMP/target_parallel_for_if_messages.cpp +++ b/clang/test/OpenMP/target_parallel_for_if_messages.cpp @@ -9,6 +9,13 @@ bool foobool(int argc) { return argc; } +void xxx(int argc) { + int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} +#pragma omp target parallel for if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} + for (int i = 0; i < 10; ++i) + ; +} + struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/target_parallel_for_simd_if_messages.cpp b/clang/test/OpenMP/target_parallel_for_simd_if_messages.cpp index b0da8017019f0..ef9a2089d1087 100644 --- a/clang/test/OpenMP/target_parallel_for_simd_if_messages.cpp +++ b/clang/test/OpenMP/target_parallel_for_simd_if_messages.cpp @@ -9,6 +9,13 @@ bool foobool(int argc) { return argc; } +void xxx(int argc) { + int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} +#pragma omp target parallel for simd if(parallel: cond) // expected-warning {{variable 'cond' is uninitialized when used here}} + for (int i = 0; i < 10; ++i) + ; +} + struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/target_parallel_if_messages.cpp b/clang/test/OpenMP/target_parallel_if_messages.cpp index 460e0c8655f09..ac498a7108b0d 100644 --- a/clang/test/OpenMP/target_parallel_if_messages.cpp +++ b/clang/test/OpenMP/target_parallel_if_messages.cpp @@ -9,6 +9,13 @@ bool foobool(int argc) { return argc; } +void xxx(int argc) { + int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} +#pragma omp target parallel if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} + for (int i = 0; i < 10; ++i) + ; +} + struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/target_simd_if_messages.cpp b/clang/test/OpenMP/target_simd_if_messages.cpp index 94d2ab308daa2..5f3e9e3910ac6 100644 --- a/clang/test/OpenMP/target_simd_if_messages.cpp +++ b/clang/test/OpenMP/target_simd_if_messages.cpp @@ -9,6 +9,13 @@ bool foobool(int argc) { return argc; } +void xxx(int argc) { + int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} +#pragma omp target simd if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} + for (int i = 0; i < 10; ++i) + ; +} + struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/target_teams_distribute_if_messages.cpp b/clang/test/OpenMP/target_teams_distribute_if_messages.cpp index fd1ffb08cbe8c..499cd3ac58050 100644 --- a/clang/test/OpenMP/target_teams_distribute_if_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_if_messages.cpp @@ -9,6 +9,13 @@ bool foobool(int argc) { return argc; } +void xxx(int argc) { + int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} +#pragma omp target teams distribute if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} + for (int i = 0; i < 10; ++i) + ; +} + struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_messages.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_messages.cpp index e1114028b6877..6df23076472ec 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_messages.cpp @@ -9,6 +9,13 @@ bool foobool(int argc) { return argc; } +void xxx(int argc) { + int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} +#pragma omp target teams distribute parallel for if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} + for (int i = 0; i < 10; ++i) + ; +} + struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_messages.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_messages.cpp index 59c75893a1714..e88c1f1dbbfff 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_messages.cpp @@ -9,6 +9,14 @@ bool foobool(int argc) { return argc; } +void xxx(int argc) { + int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} +#pragma omp target teams distribute parallel for simd if (parallel \ + : cond) // expected-warning {{variable 'cond' is uninitialized when used here}} + for (int i = 0; i < 10; ++i) + ; +} + struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/target_teams_distribute_simd_if_messages.cpp b/clang/test/OpenMP/target_teams_distribute_simd_if_messages.cpp index 7134a8394cbb8..53af6e759d21e 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_if_messages.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_if_messages.cpp @@ -9,6 +9,13 @@ bool foobool(int argc) { return argc; } +void xxx(int argc) { + int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} +#pragma omp target teams distribute simd if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} + for (int i = 0; i < 10; ++i) + ; +} + struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/target_teams_if_messages.cpp b/clang/test/OpenMP/target_teams_if_messages.cpp index 8d3d690d631fa..4bc82a349398d 100644 --- a/clang/test/OpenMP/target_teams_if_messages.cpp +++ b/clang/test/OpenMP/target_teams_if_messages.cpp @@ -9,6 +9,13 @@ bool foobool(int argc) { return argc; } +void xxx(int argc) { + int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} +#pragma omp target teams if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} + for (int i = 0; i < 10; ++i) + ; +} + struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/target_update_if_messages.cpp b/clang/test/OpenMP/target_update_if_messages.cpp index 9ded332b04eb9..d967713e456fb 100644 --- a/clang/test/OpenMP/target_update_if_messages.cpp +++ b/clang/test/OpenMP/target_update_if_messages.cpp @@ -9,6 +9,13 @@ bool foobool(int argc) { return argc; } +void xxx(int argc) { + int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} +#pragma omp target update to(argc) if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} + for (int i = 0; i < 10; ++i) + ; +} + struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/task_if_messages.cpp b/clang/test/OpenMP/task_if_messages.cpp index 305af22149d85..2d47b32b9a153 100644 --- a/clang/test/OpenMP/task_if_messages.cpp +++ b/clang/test/OpenMP/task_if_messages.cpp @@ -9,6 +9,13 @@ bool foobool(int argc) { return argc; } +void xxx(int argc) { + int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} +#pragma omp task if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} + for (int i = 0; i < 10; ++i) + ; +} + struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_if_messages.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_if_messages.cpp index 6f724b050178a..b76599d41a46a 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_if_messages.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_if_messages.cpp @@ -9,6 +9,14 @@ bool foobool(int argc) { return argc; } +void xxx(int argc) { + int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} +#pragma omp target +#pragma omp teams distribute parallel for if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} + for (int i = 0; i < 10; ++i) + ; +} + struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_messages.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_messages.cpp index c01e6e87e39a5..39a0b326383a2 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_messages.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_messages.cpp @@ -9,6 +9,14 @@ bool foobool(int argc) { return argc; } +void xxx(int argc) { + int cond; // expected-note {{initialize the variable 'cond' to silence this warning}} +#pragma omp target +#pragma omp teams distribute parallel for simd if(cond) // expected-warning {{variable 'cond' is uninitialized when used here}} + for (int i = 0; i < 10; ++i) + ; +} + struct S1; // expected-note {{declared here}} template // expected-note {{declared here}} From 8ece3b6719948a08d1f654d97f53dbd08891199d Mon Sep 17 00:00:00 2001 From: Neil Hickey Date: Tue, 16 Jul 2019 14:57:32 +0000 Subject: [PATCH 239/451] [OpenCL] Fixing sampler initialisations for C++ mode. Allow conversions between integer and sampler type. Differential Revision: https://reviews.llvm.org/D64791 llvm-svn: 366212 --- clang/lib/Sema/SemaInit.cpp | 6 +++--- clang/lib/Sema/SemaOverload.cpp | 4 ++++ clang/test/CodeGenOpenCL/sampler.cl | 19 ++++++++++--------- 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index b3b34699eb64f..bc1069609336c 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -5640,6 +5640,9 @@ void InitializationSequence::InitializeFrom(Sema &S, bool allowObjCWritebackConversion = S.getLangOpts().ObjCAutoRefCount && Entity.isParameterKind(); + if (TryOCLSamplerInitialization(S, *this, DestType, Initializer)) + return; + // We're at the end of the line for C: it's either a write-back conversion // or it's a C assignment. There's no need to check anything else. if (!S.getLangOpts().CPlusPlus) { @@ -5649,9 +5652,6 @@ void InitializationSequence::InitializeFrom(Sema &S, return; } - if (TryOCLSamplerInitialization(S, *this, DestType, Initializer)) - return; - if (TryOCLZeroOpaqueTypeInitialization(S, *this, DestType, Initializer)) return; diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 77e6767c2b814..d8c4ea48ebce7 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -1851,6 +1851,10 @@ static bool IsStandardConversion(Sema &S, Expr* From, QualType ToType, (From->EvaluateKnownConstInt(S.getASTContext()) == 0)) { SCS.Second = ICK_Zero_Queue_Conversion; FromType = ToType; + } else if (ToType->isSamplerT() && + From->isIntegerConstantExpr(S.getASTContext())) { + SCS.Second = ICK_Compatible_Conversion; + FromType = ToType; } else { // No second conversion required. SCS.Second = ICK_Identity; diff --git a/clang/test/CodeGenOpenCL/sampler.cl b/clang/test/CodeGenOpenCL/sampler.cl index 74b6d55d5d37e..1ef1f538b2562 100644 --- a/clang/test/CodeGenOpenCL/sampler.cl +++ b/clang/test/CodeGenOpenCL/sampler.cl @@ -1,5 +1,6 @@ // RUN: %clang_cc1 %s -emit-llvm -triple spir-unknown-unknown -o - -O0 | FileCheck %s // RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -triple spir-unknown-unknown -o - -O0 | FileCheck %s +// RUN: %clang_cc1 %s -cl-std=c++ -emit-llvm -triple spir-unknown-unknown -o - -O0 | FileCheck %s // // This test covers 5 cases of sampler initialzation: // 1. function argument passing @@ -29,7 +30,7 @@ const sampler_t glb_smp_const = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_NORMALIZED_COORD int get_sampler_initializer(void); void fnc4smp(sampler_t s) {} -// CHECK: define spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* % +// CHECK: define spir_func void [[FUNCNAME:@.*fnc4smp.*]](%opencl.sampler_t addrspace(2)* % kernel void foo(sampler_t smp_par) { // CHECK-LABEL: define spir_kernel void @foo(%opencl.sampler_t addrspace(2)* %smp_par) @@ -45,32 +46,32 @@ kernel void foo(sampler_t smp_par) { fnc4smp(smp); // CHECK-NOT: call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 19) // CHECK: [[SAMP:%[0-9]+]] = load %opencl.sampler_t addrspace(2)*, %opencl.sampler_t addrspace(2)** [[smp_ptr]] - // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]]) + // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]]) // Case 1b fnc4smp(smp); // CHECK-NOT: call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 19) // CHECK: [[SAMP:%[0-9]+]] = load %opencl.sampler_t addrspace(2)*, %opencl.sampler_t addrspace(2)** [[smp_ptr]] - // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]]) + // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]]) // Case 1a/2a fnc4smp(glb_smp); // CHECK: [[SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 35) - // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]]) + // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]]) // Case 1a/2c fnc4smp(glb_smp_const); // CHECK: [[SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 35) - // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]]) + // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]]) // Case 1c fnc4smp(smp_par); // CHECK: [[SAMP:%[0-9]+]] = load %opencl.sampler_t addrspace(2)*, %opencl.sampler_t addrspace(2)** [[smp_par_ptr]] - // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]]) + // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]]) fnc4smp(5); // CHECK: [[SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 5) - // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]]) + // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]]) const sampler_t const_smp = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR; fnc4smp(const_smp); @@ -78,12 +79,12 @@ kernel void foo(sampler_t smp_par) { // CHECK: store %opencl.sampler_t addrspace(2)* [[CONST_SAMP]], %opencl.sampler_t addrspace(2)** [[CONST_SMP_PTR:%[a-zA-Z0-9]+]] fnc4smp(const_smp); // CHECK: [[SAMP:%[0-9]+]] = load %opencl.sampler_t addrspace(2)*, %opencl.sampler_t addrspace(2)** [[CONST_SMP_PTR]] - // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]]) + // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]]) constant sampler_t constant_smp = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR; fnc4smp(constant_smp); // CHECK: [[SAMP:%[0-9]+]] = call %opencl.sampler_t addrspace(2)* @__translate_sampler_initializer(i32 35) - // CHECK: call spir_func void @fnc4smp(%opencl.sampler_t addrspace(2)* [[SAMP]]) + // CHECK: call spir_func void [[FUNCNAME]](%opencl.sampler_t addrspace(2)* [[SAMP]]) // TODO: enable sampler initialization with non-constant integer. //const sampler_t const_smp_func_init = get_sampler_initializer(); From d3941e663066eb120ea8b79983122897d503d1c2 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 16 Jul 2019 15:14:01 +0000 Subject: [PATCH 240/451] [SWIG] Deprecate SWIG 1.x The last swig 1.x release dates from 2009, now 10 years ago. Recently, I fixed an issue that prevented us from using swig 4 (r364974), which turned out to be not backward compatible with swig 1.x (r365718). This patch deprecates this (really old) version of swig and makes swig 2 the minimum supported version in LLDB . This should be fine for the build bots, which are all running swig 3 or later. Differential revision: https://reviews.llvm.org/D64782 llvm-svn: 366213 --- lldb/scripts/CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lldb/scripts/CMakeLists.txt b/lldb/scripts/CMakeLists.txt index 439852793f18a..1a0ea96ff9f65 100644 --- a/lldb/scripts/CMakeLists.txt +++ b/lldb/scripts/CMakeLists.txt @@ -14,6 +14,11 @@ if(LLDB_BUILD_FRAMEWORK) endif() find_package(SWIG REQUIRED) +set(SWIG_MIN_VERSION "2.0.0") +if (${SWIG_VERSION} VERSION_LESS ${SWIG_MIN_VERSION}) + message(FATAL_ERROR "LLDB requires swig ${SWIG_MIN_VERSION}, your version is ${SWIG_VERSION}.") +endif() + add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/LLDBWrapPython.cpp OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/lldb.py From f34a69c2e2792f9702f14cc64723ff89294ba911 Mon Sep 17 00:00:00 2001 From: Amaury Sechet Date: Tue, 16 Jul 2019 15:17:00 +0000 Subject: [PATCH 241/451] [DAGCombiner] fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry. Summary: As per title. DAGCombiner only mathes the special case where b = 0, this patches extends the pattern to match any value of b. Depends on D57302 Reviewers: hfinkel, RKSimon, craig.topper Subscribers: llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59208 llvm-svn: 366214 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 44 ++++++++++++------- llvm/test/CodeGen/X86/addcarry.ll | 5 +-- llvm/test/CodeGen/X86/subcarry.ll | 30 +++++-------- 3 files changed, 41 insertions(+), 38 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 93b87fbe026e9..49c922f560faf 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2706,7 +2706,19 @@ static SDValue flipBoolean(SDValue V, const SDLoc &DL, return DAG.getNode(ISD::XOR, DL, VT, V, Cst); } -static SDValue extractBooleanFlip(SDValue V, const TargetLowering &TLI) { +/** + * Flips a boolean if it is cheaper to compute. If the Force parameters is set, + * then the flip also occurs if computing the inverse is the same cost. + * This function returns an empty SDValue in case it cannot flip the boolean + * without increasing the cost of the computation. If you want to flip a boolean + * no matter what, use flipBoolean. + */ +static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG, + const TargetLowering &TLI, + bool Force) { + if (Force && isa(V)) + return flipBoolean(V, SDLoc(V), DAG, TLI); + if (V.getOpcode() != ISD::XOR) return SDValue(); @@ -2731,6 +2743,8 @@ static SDValue extractBooleanFlip(SDValue V, const TargetLowering &TLI) { if (IsFlip) return V.getOperand(0); + if (Force) + return flipBoolean(V, SDLoc(V), DAG, TLI); return SDValue(); } @@ -2843,11 +2857,10 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) { return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1); } - EVT CarryVT = CarryIn.getValueType(); - // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry. if (isNullConstant(N0) && isNullConstant(N1)) { EVT VT = N0.getValueType(); + EVT CarryVT = CarryIn.getValueType(); SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT); AddToWorklist(CarryExt.getNode()); return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt, @@ -2855,17 +2868,6 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) { DAG.getConstant(0, DL, CarryVT)); } - // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry. - if (isBitwiseNot(N0) && isNullConstant(N1)) { - if (SDValue B = extractBooleanFlip(CarryIn, TLI)) { - SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), - DAG.getConstant(0, DL, N0.getValueType()), - N0.getOperand(0), B); - return CombineTo(N, Sub, - flipBoolean(Sub.getValue(1), DL, DAG, TLI)); - } - } - if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N)) return Combined; @@ -2964,6 +2966,16 @@ static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N) { + // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry. + if (isBitwiseNot(N0)) + if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) { + SDLoc DL(N); + SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1, + N0.getOperand(0), NotC); + return CombineTo(N, Sub, + flipBoolean(Sub.getValue(1), DL, DAG, TLI)); + } + // Iff the flag result is dead: // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry) // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo @@ -8302,7 +8314,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { } // select (not Cond), N1, N2 -> select Cond, N2, N1 - if (SDValue F = extractBooleanFlip(N0, TLI)) { + if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) { SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1); SelectOp->setFlags(Flags); return SelectOp; @@ -8797,7 +8809,7 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { return V; // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1 - if (SDValue F = extractBooleanFlip(N0, TLI)) + if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) return DAG.getSelect(DL, VT, F, N2, N1); // Canonicalize integer abs. diff --git a/llvm/test/CodeGen/X86/addcarry.ll b/llvm/test/CodeGen/X86/addcarry.ll index a085108d0b209..6c70fee99090b 100644 --- a/llvm/test/CodeGen/X86/addcarry.ll +++ b/llvm/test/CodeGen/X86/addcarry.ll @@ -391,11 +391,10 @@ define i128 @addcarry_to_subcarry(i64 %a, i64 %b) { ; CHECK-LABEL: addcarry_to_subcarry: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: cmpq %rsi, %rdi ; CHECK-NEXT: notq %rsi -; CHECK-NEXT: movb $1, %cl +; CHECK-NEXT: setae %cl ; CHECK-NEXT: addb $-1, %cl -; CHECK-NEXT: movq %rdi, %rcx -; CHECK-NEXT: adcq %rsi, %rcx ; CHECK-NEXT: adcq $0, %rax ; CHECK-NEXT: setb %cl ; CHECK-NEXT: movzbl %cl, %edx diff --git a/llvm/test/CodeGen/X86/subcarry.ll b/llvm/test/CodeGen/X86/subcarry.ll index 78ae3297ca3b8..449391616aa9d 100644 --- a/llvm/test/CodeGen/X86/subcarry.ll +++ b/llvm/test/CodeGen/X86/subcarry.ll @@ -90,37 +90,29 @@ entry: define %S @sub(%S* nocapture readonly %this, %S %arg.b) local_unnamed_addr { ; CHECK-LABEL: sub: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset %rbx, -16 ; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: movq (%rsi), %r10 ; CHECK-NEXT: movq 8(%rsi), %rdi -; CHECK-NEXT: movq %r10, %r11 -; CHECK-NEXT: subq %rdx, %r11 -; CHECK-NEXT: notq %rdx -; CHECK-NEXT: movb $1, %bl -; CHECK-NEXT: addb $-1, %bl -; CHECK-NEXT: adcq %r10, %rdx +; CHECK-NEXT: subq %rdx, %r10 +; CHECK-NEXT: setae %dl +; CHECK-NEXT: addb $-1, %dl ; CHECK-NEXT: adcq $0, %rdi ; CHECK-NEXT: setb %dl -; CHECK-NEXT: movzbl %dl, %edx +; CHECK-NEXT: movzbl %dl, %r11d ; CHECK-NEXT: notq %rcx ; CHECK-NEXT: addq %rdi, %rcx -; CHECK-NEXT: adcq 16(%rsi), %rdx -; CHECK-NEXT: setb %bl -; CHECK-NEXT: movzbl %bl, %edi +; CHECK-NEXT: adcq 16(%rsi), %r11 +; CHECK-NEXT: setb %dl +; CHECK-NEXT: movzbl %dl, %edx ; CHECK-NEXT: notq %r8 -; CHECK-NEXT: addq %rdx, %r8 -; CHECK-NEXT: adcq 24(%rsi), %rdi +; CHECK-NEXT: addq %r11, %r8 +; CHECK-NEXT: adcq 24(%rsi), %rdx ; CHECK-NEXT: notq %r9 -; CHECK-NEXT: addq %rdi, %r9 -; CHECK-NEXT: movq %r11, (%rax) +; CHECK-NEXT: addq %rdx, %r9 +; CHECK-NEXT: movq %r10, (%rax) ; CHECK-NEXT: movq %rcx, 8(%rax) ; CHECK-NEXT: movq %r8, 16(%rax) ; CHECK-NEXT: movq %r9, 24(%rax) -; CHECK-NEXT: popq %rbx -; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq entry: %0 = extractvalue %S %arg.b, 0 From 228a7b4f2a3575da642017a3b55062488e710d46 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Tue, 16 Jul 2019 15:23:10 +0000 Subject: [PATCH 242/451] [ADCE] Fix non-deterministic behaviour due to iterating over a pointer set. Original patch by Yann Laigle-Chapuy Differential Revision: https://reviews.llvm.org/D64785 llvm-svn: 366215 --- llvm/lib/Transforms/Scalar/ADCE.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/ADCE.cpp b/llvm/lib/Transforms/Scalar/ADCE.cpp index 8dcf6393f4602..7f7460c5746a7 100644 --- a/llvm/lib/Transforms/Scalar/ADCE.cpp +++ b/llvm/lib/Transforms/Scalar/ADCE.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -135,7 +136,7 @@ class AggressiveDeadCodeElimination { SmallPtrSet AliveScopes; /// Set of blocks with not known to have live terminators. - SmallPtrSet BlocksWithDeadTerminators; + SmallSetVector BlocksWithDeadTerminators; /// The set of blocks which we have determined whose control /// dependence sources must be live and which have not had @@ -389,7 +390,7 @@ void AggressiveDeadCodeElimination::markLive(Instruction *I) { // Mark the containing block live auto &BBInfo = *Info.Block; if (BBInfo.Terminator == I) { - BlocksWithDeadTerminators.erase(BBInfo.BB); + BlocksWithDeadTerminators.remove(BBInfo.BB); // For live terminators, mark destination blocks // live to preserve this control flow edges. if (!BBInfo.UnconditionalBranch) @@ -478,10 +479,14 @@ void AggressiveDeadCodeElimination::markLiveBranchesFromControlDependences() { // which currently have dead terminators that are control // dependence sources of a block which is in NewLiveBlocks. + const SmallPtrSet BWDT{ + BlocksWithDeadTerminators.begin(), + BlocksWithDeadTerminators.end() + }; SmallVector IDFBlocks; ReverseIDFCalculator IDFs(PDT); IDFs.setDefiningBlocks(NewLiveBlocks); - IDFs.setLiveInBlocks(BlocksWithDeadTerminators); + IDFs.setLiveInBlocks(BWDT); IDFs.calculate(IDFBlocks); NewLiveBlocks.clear(); From cc909812a39d26ba4bcc8aaa49096155802c4521 Mon Sep 17 00:00:00 2001 From: Francis Visoiu Mistrih Date: Tue, 16 Jul 2019 15:24:59 +0000 Subject: [PATCH 243/451] [Remarks][NFC] Combine ParserFormat and SerializerFormat It's useless to have both. llvm-svn: 366216 --- llvm/include/llvm/IR/RemarkStreamer.h | 4 --- llvm/include/llvm/Remarks/Remark.h | 1 - llvm/include/llvm/Remarks/RemarkFormat.h | 33 +++++++++++++++++++ llvm/include/llvm/Remarks/RemarkParser.h | 7 ++-- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 1 + llvm/lib/IR/RemarkStreamer.cpp | 22 +++---------- llvm/lib/Remarks/CMakeLists.txt | 1 + llvm/lib/Remarks/RemarkFormat.cpp | 30 +++++++++++++++++ llvm/lib/Remarks/RemarkParser.cpp | 33 ++++++++++--------- llvm/lib/Remarks/RemarkParserImpl.h | 4 +-- llvm/lib/Remarks/YAMLRemarkParser.h | 4 +-- llvm/tools/llvm-opt-report/OptReport.cpp | 2 +- .../Remarks/YAMLRemarksParsingTest.cpp | 10 +++--- 13 files changed, 100 insertions(+), 52 deletions(-) create mode 100644 llvm/include/llvm/Remarks/RemarkFormat.h create mode 100644 llvm/lib/Remarks/RemarkFormat.cpp diff --git a/llvm/include/llvm/IR/RemarkStreamer.h b/llvm/include/llvm/IR/RemarkStreamer.h index 9b6d82ee30c0c..c84de9aea3519 100644 --- a/llvm/include/llvm/IR/RemarkStreamer.h +++ b/llvm/include/llvm/IR/RemarkStreamer.h @@ -90,10 +90,6 @@ struct RemarkSetupFormatError : RemarkSetupErrorInfo { using RemarkSetupErrorInfo::RemarkSetupErrorInfo; }; -enum class RemarksSerializerFormat { Unknown, YAML }; - -Expected parseSerializerFormat(StringRef Format); - /// Setup optimization remarks. Expected> setupOptimizationRemarks(LLVMContext &Context, StringRef RemarksFilename, diff --git a/llvm/include/llvm/Remarks/Remark.h b/llvm/include/llvm/Remarks/Remark.h index d916728e0b9d2..4241fb1fda3b3 100644 --- a/llvm/include/llvm/Remarks/Remark.h +++ b/llvm/include/llvm/Remarks/Remark.h @@ -24,7 +24,6 @@ namespace llvm { namespace remarks { constexpr uint64_t Version = 0; -constexpr StringRef Magic("REMARKS", 7); /// The debug location used to track a remark back to the source file. struct RemarkLocation { diff --git a/llvm/include/llvm/Remarks/RemarkFormat.h b/llvm/include/llvm/Remarks/RemarkFormat.h new file mode 100644 index 0000000000000..e167d99d25172 --- /dev/null +++ b/llvm/include/llvm/Remarks/RemarkFormat.h @@ -0,0 +1,33 @@ +//===-- llvm/Remarks/RemarkFormat.h - The format of remarks -----*- C++/-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines utilities to deal with the format of remarks. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_REMARKS_REMARK_FORMAT_H +#define LLVM_REMARKS_REMARK_FORMAT_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace remarks { + +constexpr StringRef Magic("REMARKS", 7); + +/// The format used for serializing/deserializing remarks. +enum class Format { Unknown, YAML }; + +/// Parse and validate a string for the remark format. +Expected parseFormat(StringRef FormatStr); + +} // end namespace remarks +} // end namespace llvm + +#endif /* LLVM_REMARKS_REMARK_FORMAT_H */ diff --git a/llvm/include/llvm/Remarks/RemarkParser.h b/llvm/include/llvm/Remarks/RemarkParser.h index 457b2fbaa5f5d..b956f0c40250c 100644 --- a/llvm/include/llvm/Remarks/RemarkParser.h +++ b/llvm/include/llvm/Remarks/RemarkParser.h @@ -16,6 +16,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Remarks/Remark.h" +#include "llvm/Remarks/RemarkFormat.h" #include "llvm/Support/Error.h" #include @@ -25,8 +26,6 @@ namespace remarks { struct ParserImpl; struct ParsedStringTable; -enum class ParserFormat { YAML }; - /// Parser used to parse a raw buffer to remarks::Remark objects. struct Parser { /// The hidden implementation of the parser. @@ -35,11 +34,11 @@ struct Parser { /// Create a parser parsing \p Buffer to Remark objects. /// This constructor should be only used for parsing remarks without a string /// table. - Parser(ParserFormat Format, StringRef Buffer); + Parser(Format ParserFormat, StringRef Buffer); /// Create a parser parsing \p Buffer to Remark objects, using \p StrTab as a /// string table. - Parser(ParserFormat Format, StringRef Buffer, + Parser(Format ParserFormat, StringRef Buffer, const ParsedStringTable &StrTab); // Needed because ParserImpl is an incomplete type. diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 174a9bcfd9b2c..54f6cc2d5571a 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -100,6 +100,7 @@ #include "llvm/MC/SectionKind.h" #include "llvm/Pass.h" #include "llvm/Remarks/Remark.h" +#include "llvm/Remarks/RemarkFormat.h" #include "llvm/Remarks/RemarkStringTable.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" diff --git a/llvm/lib/IR/RemarkStreamer.cpp b/llvm/lib/IR/RemarkStreamer.cpp index 2c3bc8406e55e..32adef181f429 100644 --- a/llvm/lib/IR/RemarkStreamer.cpp +++ b/llvm/lib/IR/RemarkStreamer.cpp @@ -15,6 +15,7 @@ #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/Remarks/RemarkFormat.h" using namespace llvm; @@ -112,30 +113,16 @@ char RemarkSetupPatternError::ID = 0; char RemarkSetupFormatError::ID = 0; static std::unique_ptr -formatToSerializer(RemarksSerializerFormat RemarksFormat, raw_ostream &OS) { +formatToSerializer(remarks::Format RemarksFormat, raw_ostream &OS) { switch (RemarksFormat) { default: llvm_unreachable("Unknown remark serializer format."); return nullptr; - case RemarksSerializerFormat::YAML: + case remarks::Format::YAML: return llvm::make_unique(OS); }; } -Expected -llvm::parseSerializerFormat(StringRef StrFormat) { - auto Format = StringSwitch(StrFormat) - .Cases("", "yaml", RemarksSerializerFormat::YAML) - .Default(RemarksSerializerFormat::Unknown); - - if (Format == RemarksSerializerFormat::Unknown) - return createStringError(std::make_error_code(std::errc::invalid_argument), - "Unknown remark serializer format: '%s'", - StrFormat.data()); - - return Format; -} - Expected> llvm::setupOptimizationRemarks(LLVMContext &Context, StringRef RemarksFilename, StringRef RemarksPasses, StringRef RemarksFormat, @@ -158,8 +145,7 @@ llvm::setupOptimizationRemarks(LLVMContext &Context, StringRef RemarksFilename, if (EC) return make_error(errorCodeToError(EC)); - Expected Format = - parseSerializerFormat(RemarksFormat); + Expected Format = remarks::parseFormat(RemarksFormat); if (Error E = Format.takeError()) return make_error(std::move(E)); diff --git a/llvm/lib/Remarks/CMakeLists.txt b/llvm/lib/Remarks/CMakeLists.txt index 73383597accd5..06ddbab6de5a3 100644 --- a/llvm/lib/Remarks/CMakeLists.txt +++ b/llvm/lib/Remarks/CMakeLists.txt @@ -1,5 +1,6 @@ add_llvm_library(LLVMRemarks Remark.cpp + RemarkFormat.cpp RemarkParser.cpp RemarkStringTable.cpp YAMLRemarkParser.cpp diff --git a/llvm/lib/Remarks/RemarkFormat.cpp b/llvm/lib/Remarks/RemarkFormat.cpp new file mode 100644 index 0000000000000..bcd0f753ff64f --- /dev/null +++ b/llvm/lib/Remarks/RemarkFormat.cpp @@ -0,0 +1,30 @@ +//===- RemarkFormat.cpp --------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implementation of utilities to handle the different remark formats. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Remarks/RemarkFormat.h" +#include "llvm/ADT/StringSwitch.h" + +using namespace llvm; +using namespace llvm::remarks; + +Expected llvm::remarks::parseFormat(StringRef FormatStr) { + auto Result = StringSwitch(FormatStr) + .Cases("", "yaml", Format::YAML) + .Default(Format::Unknown); + + if (Result == Format::Unknown) + return createStringError(std::make_error_code(std::errc::invalid_argument), + "Unknown remark serializer format: '%s'", + FormatStr.data()); + + return Result; +} diff --git a/llvm/lib/Remarks/RemarkParser.cpp b/llvm/lib/Remarks/RemarkParser.cpp index bd83ba488d8a5..41ed64d022b74 100644 --- a/llvm/lib/Remarks/RemarkParser.cpp +++ b/llvm/lib/Remarks/RemarkParser.cpp @@ -20,31 +20,35 @@ using namespace llvm; using namespace llvm::remarks; -static std::unique_ptr formatToParserImpl(ParserFormat Format, +static std::unique_ptr formatToParserImpl(Format ParserFormat, StringRef Buf) { - switch (Format) { - case ParserFormat::YAML: + switch (ParserFormat) { + case Format::YAML: return llvm::make_unique(Buf); + case Format::Unknown: + llvm_unreachable("Unhandled llvm::remarks::ParserFormat enum"); + return nullptr; }; - llvm_unreachable("Unhandled llvm::remarks::ParserFormat enum"); } static std::unique_ptr -formatToParserImpl(ParserFormat Format, StringRef Buf, +formatToParserImpl(Format ParserFormat, StringRef Buf, const ParsedStringTable &StrTab) { - switch (Format) { - case ParserFormat::YAML: + switch (ParserFormat) { + case Format::YAML: return llvm::make_unique(Buf, &StrTab); + case Format::Unknown: + llvm_unreachable("Unhandled llvm::remarks::ParserFormat enum"); + return nullptr; }; - llvm_unreachable("Unhandled llvm::remarks::ParserFormat enum"); } -Parser::Parser(ParserFormat Format, StringRef Buf) - : Impl(formatToParserImpl(Format, Buf)) {} +Parser::Parser(Format ParserFormat, StringRef Buf) + : Impl(formatToParserImpl(ParserFormat, Buf)) {} -Parser::Parser(ParserFormat Format, StringRef Buf, +Parser::Parser(Format ParserFormat, StringRef Buf, const ParsedStringTable &StrTab) - : Impl(formatToParserImpl(Format, Buf, StrTab)) {} + : Impl(formatToParserImpl(ParserFormat, Buf, StrTab)) {} Parser::~Parser() = default; @@ -110,9 +114,8 @@ DEFINE_SIMPLE_CONVERSION_FUNCTIONS(remarks::Parser, LLVMRemarkParserRef) extern "C" LLVMRemarkParserRef LLVMRemarkParserCreateYAML(const void *Buf, uint64_t Size) { - return wrap( - new remarks::Parser(remarks::ParserFormat::YAML, - StringRef(static_cast(Buf), Size))); + return wrap(new remarks::Parser( + remarks::Format::YAML, StringRef(static_cast(Buf), Size))); } static void handleYAMLError(remarks::YAMLParserImpl &Impl, Error E) { diff --git a/llvm/lib/Remarks/RemarkParserImpl.h b/llvm/lib/Remarks/RemarkParserImpl.h index 6b9329b1815cd..5f8c21dcdd44c 100644 --- a/llvm/lib/Remarks/RemarkParserImpl.h +++ b/llvm/lib/Remarks/RemarkParserImpl.h @@ -19,13 +19,13 @@ namespace llvm { namespace remarks { /// This is used as a base for any parser implementation. struct ParserImpl { - explicit ParserImpl(ParserFormat Format) : Format(Format) {} + explicit ParserImpl(Format ParserFormat) : ParserFormat(ParserFormat) {} // Virtual destructor prevents mismatched deletes virtual ~ParserImpl() {} // The parser format. This is used as a tag to safely cast between // implementations. - ParserFormat Format; + Format ParserFormat; }; } // end namespace remarks } // end namespace llvm diff --git a/llvm/lib/Remarks/YAMLRemarkParser.h b/llvm/lib/Remarks/YAMLRemarkParser.h index 9ed18eebe7759..14698bbd3ca48 100644 --- a/llvm/lib/Remarks/YAMLRemarkParser.h +++ b/llvm/lib/Remarks/YAMLRemarkParser.h @@ -127,11 +127,11 @@ struct YAMLParserImpl : public ParserImpl { YAMLParserImpl(StringRef Buf, Optional StrTab = None) - : ParserImpl{ParserFormat::YAML}, YAMLParser(Buf, StrTab), + : ParserImpl{Format::YAML}, YAMLParser(Buf, StrTab), YAMLIt(YAMLParser.Stream.begin()), HasErrors(false) {} static bool classof(const ParserImpl *PI) { - return PI->Format == ParserFormat::YAML; + return PI->ParserFormat == Format::YAML; } }; } // end namespace remarks diff --git a/llvm/tools/llvm-opt-report/OptReport.cpp b/llvm/tools/llvm-opt-report/OptReport.cpp index b263d9a4fb61f..80d0b73664d01 100644 --- a/llvm/tools/llvm-opt-report/OptReport.cpp +++ b/llvm/tools/llvm-opt-report/OptReport.cpp @@ -150,7 +150,7 @@ static bool readLocationInfo(LocationInfoTy &LocationInfo) { return false; } - remarks::Parser Parser(remarks::ParserFormat::YAML, (*Buf)->getBuffer()); + remarks::Parser Parser(remarks::Format::YAML, (*Buf)->getBuffer()); while (true) { Expected RemarkOrErr = Parser.getNext(); diff --git a/llvm/unittests/Remarks/YAMLRemarksParsingTest.cpp b/llvm/unittests/Remarks/YAMLRemarksParsingTest.cpp index 6cca4c5ce8c21..e3c7cdf881e3c 100644 --- a/llvm/unittests/Remarks/YAMLRemarksParsingTest.cpp +++ b/llvm/unittests/Remarks/YAMLRemarksParsingTest.cpp @@ -14,7 +14,7 @@ using namespace llvm; template void parseGood(const char (&Buf)[N]) { - remarks::Parser Parser(remarks::ParserFormat::YAML, {Buf, N - 1}); + remarks::Parser Parser(remarks::Format::YAML, {Buf, N - 1}); Expected Remark = Parser.getNext(); EXPECT_FALSE(errorToBool(Remark.takeError())); // Check for parsing errors. EXPECT_TRUE(*Remark != nullptr); // At least one remark. @@ -25,7 +25,7 @@ template void parseGood(const char (&Buf)[N]) { template bool parseExpectError(const char (&Buf)[N], const char *Error) { - remarks::Parser Parser(remarks::ParserFormat::YAML, {Buf, N - 1}); + remarks::Parser Parser(remarks::Format::YAML, {Buf, N - 1}); Expected Remark = Parser.getNext(); EXPECT_FALSE(Remark); // Expect an error here. @@ -354,7 +354,7 @@ TEST(YAMLRemarks, Contents) { " - String: ' because its definition is unavailable'\n" "\n"; - remarks::Parser Parser(remarks::ParserFormat::YAML, Buf); + remarks::Parser Parser(remarks::Format::YAML, Buf); Expected RemarkOrErr = Parser.getNext(); EXPECT_FALSE(errorToBool(RemarkOrErr.takeError())); EXPECT_TRUE(*RemarkOrErr != nullptr); @@ -516,7 +516,7 @@ TEST(YAMLRemarks, ContentsStrTab) { 115); remarks::ParsedStringTable StrTab(StrTabBuf); - remarks::Parser Parser(remarks::ParserFormat::YAML, Buf, StrTab); + remarks::Parser Parser(remarks::Format::YAML, Buf, StrTab); Expected RemarkOrErr = Parser.getNext(); EXPECT_FALSE(errorToBool(RemarkOrErr.takeError())); EXPECT_TRUE(*RemarkOrErr != nullptr); @@ -584,7 +584,7 @@ TEST(YAMLRemarks, ParsingBadStringTableIndex) { StringRef StrTabBuf = StringRef("inline"); remarks::ParsedStringTable StrTab(StrTabBuf); - remarks::Parser Parser(remarks::ParserFormat::YAML, Buf, StrTab); + remarks::Parser Parser(remarks::Format::YAML, Buf, StrTab); Expected Remark = Parser.getNext(); EXPECT_FALSE(Remark); // Expect an error here. From 94bad22c2c66f2178e0364c5f502f0225c1ede8e Mon Sep 17 00:00:00 2001 From: Francis Visoiu Mistrih Date: Tue, 16 Jul 2019 15:25:05 +0000 Subject: [PATCH 244/451] [Remarks] Simplify and refactor the RemarkParser interface Before, everything was based on some kind of type erased parser implementation which container a lot of boilerplate code when multiple formats were to be supported. This simplifies it by: * the remark now owns its arguments * *always* returning an error from the implementation side * working around the way the YAML parser reports errors: catch them through callbacks and re-insert them in a proper llvm::Error * add a CParser wrapper that is used when implementing the C API to avoid cluttering the C++ API with useless state * LLVMRemarkParserGetNext now returns an object that needs to be released to avoid leaking resources * add a new API to dispose of a remark entry: LLVMRemarkEntryDispose llvm-svn: 366217 --- llvm/docs/Remarks.rst | 1 + llvm/include/llvm-c/Remarks.h | 23 +- llvm/include/llvm/IR/RemarkStreamer.h | 12 +- llvm/include/llvm/Remarks/Remark.h | 15 +- llvm/include/llvm/Remarks/RemarkParser.h | 42 +- llvm/include/llvm/Support/SourceMgr.h | 2 + llvm/lib/IR/RemarkStreamer.cpp | 14 +- llvm/lib/Remarks/Remark.cpp | 4 + llvm/lib/Remarks/RemarkParser.cpp | 143 ++---- llvm/lib/Remarks/RemarkParserImpl.h | 33 -- llvm/lib/Remarks/YAMLRemarkParser.cpp | 429 ++++++++++-------- llvm/lib/Remarks/YAMLRemarkParser.h | 130 ++---- llvm/tools/llvm-opt-report/OptReport.cpp | 26 +- llvm/tools/remarks-shlib/Remarks.exports | 1 + .../Remarks/YAMLRemarksParsingTest.cpp | 103 +++-- 15 files changed, 485 insertions(+), 493 deletions(-) delete mode 100644 llvm/lib/Remarks/RemarkParserImpl.h diff --git a/llvm/docs/Remarks.rst b/llvm/docs/Remarks.rst index 8215efbeebcfb..e3d088d777d29 100644 --- a/llvm/docs/Remarks.rst +++ b/llvm/docs/Remarks.rst @@ -295,6 +295,7 @@ The typical usage through the C API is like the following: LLVMRemarkEntryRef Remark = NULL; while ((Remark = LLVMRemarkParserGetNext(Parser))) { // use Remark + LLVMRemarkEntryDispose(Remark); // Release memory. } bool HasError = LLVMRemarkParserHasError(Parser); LLVMRemarkParserDispose(Parser); diff --git a/llvm/include/llvm-c/Remarks.h b/llvm/include/llvm-c/Remarks.h index 7fb16656a9a58..88eb5120c57c6 100644 --- a/llvm/include/llvm-c/Remarks.h +++ b/llvm/include/llvm-c/Remarks.h @@ -136,6 +136,13 @@ extern LLVMRemarkDebugLocRef LLVMRemarkArgGetDebugLoc(LLVMRemarkArgRef Arg); */ typedef struct LLVMRemarkOpaqueEntry *LLVMRemarkEntryRef; +/** + * Free the resources used by the remark entry. + * + * \since REMARKS_API_VERSION=0 + */ +extern void LLVMRemarkEntryDispose(LLVMRemarkEntryRef Remark); + /** * The type of the remark. For example, it can allow users to only keep the * missed optimizations from the compiler. @@ -161,7 +168,7 @@ extern LLVMRemarkStringRef LLVMRemarkEntryGetRemarkName(LLVMRemarkEntryRef Remark); /** - * Get the name of the function being processsed when the remark was emitted. + * Get the name of the function being processed when the remark was emitted. * * \since REMARKS_API_VERSION=0 */ @@ -199,6 +206,8 @@ extern uint32_t LLVMRemarkEntryGetNumArgs(LLVMRemarkEntryRef Remark); * * If there are no arguments in \p Remark, the return value will be `NULL`. * + * The lifetime of the returned value is bound to the lifetime of \p Remark. + * * \since REMARKS_API_VERSION=0 */ extern LLVMRemarkArgRef LLVMRemarkEntryGetFirstArg(LLVMRemarkEntryRef Remark); @@ -208,6 +217,8 @@ extern LLVMRemarkArgRef LLVMRemarkEntryGetFirstArg(LLVMRemarkEntryRef Remark); * * Returns `NULL` if there are no more arguments available. * + * The lifetime of the returned value is bound to the lifetime of \p Remark. + * * \since REMARKS_API_VERSION=0 */ extern LLVMRemarkArgRef LLVMRemarkEntryGetNextArg(LLVMRemarkArgRef It, @@ -232,8 +243,11 @@ extern LLVMRemarkParserRef LLVMRemarkParserCreateYAML(const void *Buf, /** * Returns the next remark in the file. * - * The value pointed to by the return value is invalidated by the next call to - * LLVMRemarkParserGetNext(). + * The value pointed to by the return value needs to be disposed using a call to + * LLVMRemarkEntryDispose(). + * + * All the entries in the returned value that are of LLVMRemarkStringRef type + * will become invalidated once a call to LLVMRemarkParserDispose is made. * * If the parser reaches the end of the buffer, the return value will be `NULL`. * @@ -258,8 +272,9 @@ extern LLVMRemarkParserRef LLVMRemarkParserCreateYAML(const void *Buf, * ``` * LLVMRemarkParserRef Parser = LLVMRemarkParserCreateYAML(Buf, Size); * LLVMRemarkEntryRef Remark = NULL; - * while ((Remark == LLVMRemarkParserGetNext(Parser))) { + * while ((Remark = LLVMRemarkParserGetNext(Parser))) { * // use Remark + * LLVMRemarkEntryDispose(Remark); // Release memory. * } * bool HasError = LLVMRemarkParserHasError(Parser); * LLVMRemarkParserDispose(Parser); diff --git a/llvm/include/llvm/IR/RemarkStreamer.h b/llvm/include/llvm/IR/RemarkStreamer.h index c84de9aea3519..f34cc660b2fb1 100644 --- a/llvm/include/llvm/IR/RemarkStreamer.h +++ b/llvm/include/llvm/IR/RemarkStreamer.h @@ -32,15 +32,9 @@ class RemarkStreamer { /// The object used to serialize the remarks to a specific format. std::unique_ptr Serializer; - /// Temporary buffer for converting diagnostics into remark objects. This is - /// used for the remark arguments that are converted from a vector of - /// diagnostic arguments to a vector of remark arguments. - SmallVector TmpArgs; - /// Convert diagnostics into remark objects. The result uses \p TmpArgs as a - /// temporary buffer for the remark arguments, and relies on all the strings - /// to be kept in memory until the next call to `toRemark`. - /// The lifetime of the members of the result is bound to the lifetime of both - /// the remark streamer and the LLVM diagnostics. + /// Convert diagnostics into remark objects. + /// The lifetime of the members of the result is bound to the lifetime of + /// the LLVM diagnostics. remarks::Remark toRemark(const DiagnosticInfoOptimizationBase &Diag); public: diff --git a/llvm/include/llvm/Remarks/Remark.h b/llvm/include/llvm/Remarks/Remark.h index 4241fb1fda3b3..05d0ea60accde 100644 --- a/llvm/include/llvm/Remarks/Remark.h +++ b/llvm/include/llvm/Remarks/Remark.h @@ -85,10 +85,23 @@ struct Remark { Optional Hotness; /// Arguments collected via the streaming interface. - ArrayRef Args; + SmallVector Args; + + Remark() = default; + Remark(Remark &&) = default; + Remark &operator=(Remark &&) = default; /// Return a message composed from the arguments as a string. std::string getArgsAsMsg() const; + + /// Clone this remark to explicitly ask for a copy. + Remark clone() const { return *this; } + +private: + /// In order to avoid unwanted copies, "delete" the copy constructor. + /// If a copy is needed, it should be done through `Remark::clone()`. + Remark(const Remark &) = default; + Remark& operator=(const Remark &) = default; }; // Create wrappers for C Binding types (see CBindingWrapping.h). diff --git a/llvm/include/llvm/Remarks/RemarkParser.h b/llvm/include/llvm/Remarks/RemarkParser.h index b956f0c40250c..671e1abe5ec77 100644 --- a/llvm/include/llvm/Remarks/RemarkParser.h +++ b/llvm/include/llvm/Remarks/RemarkParser.h @@ -26,27 +26,33 @@ namespace remarks { struct ParserImpl; struct ParsedStringTable; +class EndOfFileError : public ErrorInfo { +public: + static char ID; + + EndOfFileError() {} + + void log(raw_ostream &OS) const override { OS << "End of file reached."; } + std::error_code convertToErrorCode() const override { + return inconvertibleErrorCode(); + } +}; + /// Parser used to parse a raw buffer to remarks::Remark objects. struct Parser { - /// The hidden implementation of the parser. - std::unique_ptr Impl; - - /// Create a parser parsing \p Buffer to Remark objects. - /// This constructor should be only used for parsing remarks without a string - /// table. - Parser(Format ParserFormat, StringRef Buffer); + /// The format of the parser. + Format ParserFormat; - /// Create a parser parsing \p Buffer to Remark objects, using \p StrTab as a - /// string table. - Parser(Format ParserFormat, StringRef Buffer, - const ParsedStringTable &StrTab); + Parser(Format ParserFormat) : ParserFormat(ParserFormat) {} - // Needed because ParserImpl is an incomplete type. - ~Parser(); + /// If no error occurs, this returns a valid Remark object. + /// If an error of type EndOfFileError occurs, it is safe to recover from it + /// by stopping the parsing. + /// If any other error occurs, it should be propagated to the user. + /// The pointer should never be null. + virtual Expected> next() = 0; - /// Returns an empty Optional if it reached the end. - /// Returns a valid remark otherwise. - Expected getNext() const; + virtual ~Parser() = default; }; /// In-memory representation of the string table parsed from a buffer (e.g. the @@ -61,6 +67,10 @@ struct ParsedStringTable { ParsedStringTable(StringRef Buffer); }; +Expected> +createRemarkParser(Format ParserFormat, StringRef Buf, + Optional StrTab = None); + } // end namespace remarks } // end namespace llvm diff --git a/llvm/include/llvm/Support/SourceMgr.h b/llvm/include/llvm/Support/SourceMgr.h index 7b081d32f99e5..aa6026c23d07a 100644 --- a/llvm/include/llvm/Support/SourceMgr.h +++ b/llvm/include/llvm/Support/SourceMgr.h @@ -106,6 +106,8 @@ class SourceMgr { SourceMgr() = default; SourceMgr(const SourceMgr &) = delete; SourceMgr &operator=(const SourceMgr &) = delete; + SourceMgr(SourceMgr &&) = default; + SourceMgr &operator=(SourceMgr &&) = default; ~SourceMgr() = default; void setIncludeDirs(const std::vector &Dirs) { diff --git a/llvm/lib/IR/RemarkStreamer.cpp b/llvm/lib/IR/RemarkStreamer.cpp index 32adef181f429..5b4c7e72b479f 100644 --- a/llvm/lib/IR/RemarkStreamer.cpp +++ b/llvm/lib/IR/RemarkStreamer.cpp @@ -72,9 +72,6 @@ toRemarkLocation(const DiagnosticLocation &DL) { /// LLVM Diagnostic -> Remark remarks::Remark RemarkStreamer::toRemark(const DiagnosticInfoOptimizationBase &Diag) { - // Re-use the buffer. - TmpArgs.clear(); - remarks::Remark R; // The result. R.RemarkType = toRemarkType(static_cast(Diag.getKind())); R.PassName = Diag.getPassName(); @@ -84,15 +81,12 @@ RemarkStreamer::toRemark(const DiagnosticInfoOptimizationBase &Diag) { R.Loc = toRemarkLocation(Diag.getLocation()); R.Hotness = Diag.getHotness(); - // Use TmpArgs to build the list of arguments and re-use the memory allocated - // from previous remark conversions. for (const DiagnosticInfoOptimizationBase::Argument &Arg : Diag.getArgs()) { - TmpArgs.emplace_back(); - TmpArgs.back().Key = Arg.Key; - TmpArgs.back().Val = Arg.Val; - TmpArgs.back().Loc = toRemarkLocation(Arg.Loc); + R.Args.emplace_back(); + R.Args.back().Key = Arg.Key; + R.Args.back().Val = Arg.Val; + R.Args.back().Loc = toRemarkLocation(Arg.Loc); } - R.Args = TmpArgs; // This is valid until the next call to this function. return R; } diff --git a/llvm/lib/Remarks/Remark.cpp b/llvm/lib/Remarks/Remark.cpp index b4be19f47a5bc..401ac514b0117 100644 --- a/llvm/lib/Remarks/Remark.cpp +++ b/llvm/lib/Remarks/Remark.cpp @@ -66,6 +66,10 @@ LLVMRemarkArgGetDebugLoc(LLVMRemarkArgRef Arg) { return nullptr; } +extern "C" void LLVMRemarkEntryDispose(LLVMRemarkEntryRef Remark) { + delete unwrap(Remark); +} + extern "C" LLVMRemarkType LLVMRemarkEntryGetType(LLVMRemarkEntryRef Remark) { // Assume here that the enums can be converted both ways. return static_cast(unwrap(Remark)->RemarkType); diff --git a/llvm/lib/Remarks/RemarkParser.cpp b/llvm/lib/Remarks/RemarkParser.cpp index 41ed64d022b74..46130d28f72c5 100644 --- a/llvm/lib/Remarks/RemarkParser.cpp +++ b/llvm/lib/Remarks/RemarkParser.cpp @@ -20,69 +20,7 @@ using namespace llvm; using namespace llvm::remarks; -static std::unique_ptr formatToParserImpl(Format ParserFormat, - StringRef Buf) { - switch (ParserFormat) { - case Format::YAML: - return llvm::make_unique(Buf); - case Format::Unknown: - llvm_unreachable("Unhandled llvm::remarks::ParserFormat enum"); - return nullptr; - }; -} - -static std::unique_ptr -formatToParserImpl(Format ParserFormat, StringRef Buf, - const ParsedStringTable &StrTab) { - switch (ParserFormat) { - case Format::YAML: - return llvm::make_unique(Buf, &StrTab); - case Format::Unknown: - llvm_unreachable("Unhandled llvm::remarks::ParserFormat enum"); - return nullptr; - }; -} - -Parser::Parser(Format ParserFormat, StringRef Buf) - : Impl(formatToParserImpl(ParserFormat, Buf)) {} - -Parser::Parser(Format ParserFormat, StringRef Buf, - const ParsedStringTable &StrTab) - : Impl(formatToParserImpl(ParserFormat, Buf, StrTab)) {} - -Parser::~Parser() = default; - -static Expected getNextYAML(YAMLParserImpl &Impl) { - YAMLRemarkParser &YAMLParser = Impl.YAMLParser; - // Check for EOF. - if (Impl.YAMLIt == Impl.YAMLParser.Stream.end()) - return nullptr; - - auto CurrentIt = Impl.YAMLIt; - - // Try to parse an entry. - if (Error E = YAMLParser.parseYAMLElement(*CurrentIt)) { - // Set the iterator to the end, in case the user calls getNext again. - Impl.YAMLIt = Impl.YAMLParser.Stream.end(); - return std::move(E); - } - - // Move on. - ++Impl.YAMLIt; - - // Return the just-parsed remark. - if (const Optional &State = YAMLParser.State) - return &State->TheRemark; - else - return createStringError(std::make_error_code(std::errc::invalid_argument), - "unexpected error while parsing."); -} - -Expected Parser::getNext() const { - if (auto *Impl = dyn_cast(this->Impl.get())) - return getNextYAML(*Impl); - llvm_unreachable("Get next called with an unknown parsing implementation."); -} +char EndOfFileError::ID = 0; ParsedStringTable::ParsedStringTable(StringRef InBuffer) : Buffer(InBuffer) { while (!InBuffer.empty()) { @@ -109,59 +47,70 @@ Expected ParsedStringTable::operator[](size_t Index) const { return StringRef(Buffer.data() + Offset, NextOffset - Offset - 1); } +Expected> +llvm::remarks::createRemarkParser(Format ParserFormat, StringRef Buf, + Optional StrTab) { + switch (ParserFormat) { + case Format::YAML: + return llvm::make_unique(Buf, StrTab); + case Format::Unknown: + return createStringError(std::make_error_code(std::errc::invalid_argument), + "Unknown remark parser format."); + } +} + +// Wrapper that holds the state needed to interact with the C API. +struct CParser { + std::unique_ptr TheParser; + Optional Err; + + CParser(Format ParserFormat, StringRef Buf, + Optional StrTab = None) + : TheParser(cantFail(createRemarkParser(ParserFormat, Buf, StrTab))) {} + + void handleError(Error E) { Err.emplace(toString(std::move(E))); } + bool hasError() const { return Err.hasValue(); } + const char *getMessage() const { return Err ? Err->c_str() : nullptr; }; +}; + // Create wrappers for C Binding types (see CBindingWrapping.h). -DEFINE_SIMPLE_CONVERSION_FUNCTIONS(remarks::Parser, LLVMRemarkParserRef) +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(CParser, LLVMRemarkParserRef) extern "C" LLVMRemarkParserRef LLVMRemarkParserCreateYAML(const void *Buf, uint64_t Size) { - return wrap(new remarks::Parser( - remarks::Format::YAML, StringRef(static_cast(Buf), Size))); -} - -static void handleYAMLError(remarks::YAMLParserImpl &Impl, Error E) { - handleAllErrors( - std::move(E), - [&](const YAMLParseError &PE) { - Impl.YAMLParser.Stream.printError(&PE.getNode(), - Twine(PE.getMessage()) + Twine('\n')); - }, - [&](const ErrorInfoBase &EIB) { EIB.log(Impl.YAMLParser.ErrorStream); }); - Impl.HasErrors = true; + return wrap(new CParser(Format::YAML, + StringRef(static_cast(Buf), Size))); } extern "C" LLVMRemarkEntryRef LLVMRemarkParserGetNext(LLVMRemarkParserRef Parser) { - remarks::Parser &TheParser = *unwrap(Parser); - - Expected RemarkOrErr = TheParser.getNext(); - if (!RemarkOrErr) { - // Error during parsing. - if (auto *Impl = dyn_cast(TheParser.Impl.get())) - handleYAMLError(*Impl, RemarkOrErr.takeError()); - else - llvm_unreachable("unkown parser implementation."); + CParser &TheCParser = *unwrap(Parser); + remarks::Parser &TheParser = *TheCParser.TheParser; + + Expected> MaybeRemark = TheParser.next(); + if (Error E = MaybeRemark.takeError()) { + if (E.isA()) { + consumeError(std::move(E)); + return nullptr; + } + + // Handle the error. Allow it to be checked through HasError and + // GetErrorMessage. + TheCParser.handleError(std::move(E)); return nullptr; } - if (*RemarkOrErr == nullptr) - return nullptr; // Valid remark. - return wrap(*RemarkOrErr); + return wrap(MaybeRemark->release()); } extern "C" LLVMBool LLVMRemarkParserHasError(LLVMRemarkParserRef Parser) { - if (auto *Impl = - dyn_cast(unwrap(Parser)->Impl.get())) - return Impl->HasErrors; - llvm_unreachable("unkown parser implementation."); + return unwrap(Parser)->hasError(); } extern "C" const char * LLVMRemarkParserGetErrorMessage(LLVMRemarkParserRef Parser) { - if (auto *Impl = - dyn_cast(unwrap(Parser)->Impl.get())) - return Impl->YAMLParser.ErrorStream.str().c_str(); - llvm_unreachable("unkown parser implementation."); + return unwrap(Parser)->getMessage(); } extern "C" void LLVMRemarkParserDispose(LLVMRemarkParserRef Parser) { diff --git a/llvm/lib/Remarks/RemarkParserImpl.h b/llvm/lib/Remarks/RemarkParserImpl.h deleted file mode 100644 index 5f8c21dcdd44c..0000000000000 --- a/llvm/lib/Remarks/RemarkParserImpl.h +++ /dev/null @@ -1,33 +0,0 @@ -//===-- RemarkParserImpl.h - Implementation details -------------*- C++/-*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file provides implementation details for the remark parser. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_REMARKS_REMARK_PARSER_IMPL_H -#define LLVM_REMARKS_REMARK_PARSER_IMPL_H - -#include "llvm/Remarks/RemarkParser.h" - -namespace llvm { -namespace remarks { -/// This is used as a base for any parser implementation. -struct ParserImpl { - explicit ParserImpl(Format ParserFormat) : ParserFormat(ParserFormat) {} - // Virtual destructor prevents mismatched deletes - virtual ~ParserImpl() {} - - // The parser format. This is used as a tag to safely cast between - // implementations. - Format ParserFormat; -}; -} // end namespace remarks -} // end namespace llvm - -#endif /* LLVM_REMARKS_REMARK_PARSER_IMPL_H */ diff --git a/llvm/lib/Remarks/YAMLRemarkParser.cpp b/llvm/lib/Remarks/YAMLRemarkParser.cpp index c70eef556ffd5..ed78b7ba5d951 100644 --- a/llvm/lib/Remarks/YAMLRemarkParser.cpp +++ b/llvm/lib/Remarks/YAMLRemarkParser.cpp @@ -20,255 +20,308 @@ using namespace llvm::remarks; char YAMLParseError::ID = 0; -Error YAMLRemarkParser::parseKey(StringRef &Result, yaml::KeyValueNode &Node) { - if (auto *Key = dyn_cast(Node.getKey())) { - Result = Key->getRawValue(); +static void handleDiagnostic(const SMDiagnostic &Diag, void *Ctx) { + assert(Ctx && "Expected non-null Ctx in diagnostic handler."); + std::string &Message = *static_cast(Ctx); + assert(Message.empty() && "Expected an empty string."); + raw_string_ostream OS(Message); + Diag.print(/*ProgName=*/nullptr, OS, /*ShowColors*/ false, + /*ShowKindLabels*/ true); + OS << '\n'; + OS.flush(); +} + +YAMLParseError::YAMLParseError(StringRef Msg, SourceMgr &SM, + yaml::Stream &Stream, yaml::Node &Node) { + // 1) Set up a diagnostic handler to avoid errors being printed out to + // stderr. + // 2) Use the stream to print the error with the associated node. + // 3) The stream will use the source manager to print the error, which will + // call the diagnostic handler. + // 4) The diagnostic handler will stream the error directly into this object's + // Message member, which is used when logging is asked for. + auto OldDiagHandler = SM.getDiagHandler(); + auto OldDiagCtx = SM.getDiagContext(); + SM.setDiagHandler(handleDiagnostic, &Message); + Stream.printError(&Node, Twine(Msg) + Twine('\n')); + // Restore the old handlers. + SM.setDiagHandler(OldDiagHandler, OldDiagCtx); +} + +static SourceMgr setupSM(std::string &LastErrorMessage) { + SourceMgr SM; + SM.setDiagHandler(handleDiagnostic, &LastErrorMessage); + return SM; +} + +YAMLRemarkParser::YAMLRemarkParser(StringRef Buf, + Optional StrTab) + : Parser{Format::YAML}, StrTab(StrTab), LastErrorMessage(), + SM(setupSM(LastErrorMessage)), Stream(Buf, SM), YAMLIt(Stream.begin()) {} + +Error YAMLRemarkParser::error(StringRef Message, yaml::Node &Node) { + return make_error(Message, SM, Stream, Node); +} + +Error YAMLRemarkParser::error() { + if (LastErrorMessage.empty()) return Error::success(); + Error E = make_error(LastErrorMessage); + LastErrorMessage.clear(); + return E; +} + +Expected> +YAMLRemarkParser::parseRemark(yaml::Document &RemarkEntry) { + if (Error E = error()) + return std::move(E); + + yaml::Node *YAMLRoot = RemarkEntry.getRoot(); + if (!YAMLRoot) { + return createStringError(std::make_error_code(std::errc::invalid_argument), + "not a valid YAML file."); } - return make_error("key is not a string.", Node); + auto *Root = dyn_cast(YAMLRoot); + if (!Root) + return error("document root is not of mapping type.", *YAMLRoot); + + std::unique_ptr Result = llvm::make_unique(); + Remark &TheRemark = *Result; + + // First, the type. It needs special handling since is not part of the + // key-value stream. + Expected T = parseType(*Root); + if (!T) + return T.takeError(); + else + TheRemark.RemarkType = *T; + + // Then, parse the fields, one by one. + for (yaml::KeyValueNode &RemarkField : *Root) { + Expected MaybeKey = parseKey(RemarkField); + if (!MaybeKey) + return MaybeKey.takeError(); + StringRef KeyName = *MaybeKey; + + if (KeyName == "Pass") { + if (Expected MaybeStr = parseStr(RemarkField)) + TheRemark.PassName = *MaybeStr; + else + return MaybeStr.takeError(); + } else if (KeyName == "Name") { + if (Expected MaybeStr = parseStr(RemarkField)) + TheRemark.RemarkName = *MaybeStr; + else + return MaybeStr.takeError(); + } else if (KeyName == "Function") { + if (Expected MaybeStr = parseStr(RemarkField)) + TheRemark.FunctionName = *MaybeStr; + else + return MaybeStr.takeError(); + } else if (KeyName == "Hotness") { + if (Expected MaybeU = parseUnsigned(RemarkField)) + TheRemark.Hotness = *MaybeU; + else + return MaybeU.takeError(); + } else if (KeyName == "DebugLoc") { + if (Expected MaybeLoc = parseDebugLoc(RemarkField)) + TheRemark.Loc = *MaybeLoc; + else + return MaybeLoc.takeError(); + } else if (KeyName == "Args") { + auto *Args = dyn_cast(RemarkField.getValue()); + if (!Args) + return error("wrong value type for key.", RemarkField); + + for (yaml::Node &Arg : *Args) { + if (Expected MaybeArg = parseArg(Arg)) + TheRemark.Args.push_back(*MaybeArg); + else + return MaybeArg.takeError(); + } + } else { + return error("unknown key.", RemarkField); + } + } + + // Check if any of the mandatory fields are missing. + if (TheRemark.RemarkType == Type::Unknown || TheRemark.PassName.empty() || + TheRemark.RemarkName.empty() || TheRemark.FunctionName.empty()) + return error("Type, Pass, Name or Function missing.", + *RemarkEntry.getRoot()); + + return std::move(Result); } -template -Error YAMLRemarkParser::parseStr(T &Result, yaml::KeyValueNode &Node) { +Expected YAMLRemarkParser::parseType(yaml::MappingNode &Node) { + auto Type = StringSwitch(Node.getRawTag()) + .Case("!Passed", remarks::Type::Passed) + .Case("!Missed", remarks::Type::Missed) + .Case("!Analysis", remarks::Type::Analysis) + .Case("!AnalysisFPCommute", remarks::Type::AnalysisFPCommute) + .Case("!AnalysisAliasing", remarks::Type::AnalysisAliasing) + .Case("!Failure", remarks::Type::Failure) + .Default(remarks::Type::Unknown); + if (Type == remarks::Type::Unknown) + return error("expected a remark tag.", Node); + return Type; +} + +Expected YAMLRemarkParser::parseKey(yaml::KeyValueNode &Node) { + if (auto *Key = dyn_cast(Node.getKey())) + return Key->getRawValue(); + + return error("key is not a string.", Node); +} + +Expected YAMLRemarkParser::parseStr(yaml::KeyValueNode &Node) { auto *Value = dyn_cast(Node.getValue()); if (!Value) - return make_error("expected a value of scalar type.", Node); - StringRef Tmp; + return error("expected a value of scalar type.", Node); + StringRef Result; if (!StrTab) { - Tmp = Value->getRawValue(); + Result = Value->getRawValue(); } else { // If we have a string table, parse it as an unsigned. unsigned StrID = 0; - if (Error E = parseUnsigned(StrID, Node)) - return E; + if (Expected MaybeStrID = parseUnsigned(Node)) + StrID = *MaybeStrID; + else + return MaybeStrID.takeError(); + if (Expected Str = (**StrTab)[StrID]) - Tmp = *Str; + Result = *Str; else return Str.takeError(); } - if (Tmp.front() == '\'') - Tmp = Tmp.drop_front(); - - if (Tmp.back() == '\'') - Tmp = Tmp.drop_back(); + if (Result.front() == '\'') + Result = Result.drop_front(); - Result = Tmp; + if (Result.back() == '\'') + Result = Result.drop_back(); - return Error::success(); + return Result; } -template -Error YAMLRemarkParser::parseUnsigned(T &Result, yaml::KeyValueNode &Node) { +Expected YAMLRemarkParser::parseUnsigned(yaml::KeyValueNode &Node) { SmallVector Tmp; auto *Value = dyn_cast(Node.getValue()); if (!Value) - return make_error("expected a value of scalar type.", Node); + return error("expected a value of scalar type.", Node); unsigned UnsignedValue = 0; if (Value->getValue(Tmp).getAsInteger(10, UnsignedValue)) - return make_error("expected a value of integer type.", - *Value); - Result = UnsignedValue; - return Error::success(); -} - -Error YAMLRemarkParser::parseType(Type &Result, yaml::MappingNode &Node) { - auto Type = StringSwitch(Node.getRawTag()) - .Case("!Passed", remarks::Type::Passed) - .Case("!Missed", remarks::Type::Missed) - .Case("!Analysis", remarks::Type::Analysis) - .Case("!AnalysisFPCommute", remarks::Type::AnalysisFPCommute) - .Case("!AnalysisAliasing", remarks::Type::AnalysisAliasing) - .Case("!Failure", remarks::Type::Failure) - .Default(remarks::Type::Unknown); - if (Type == remarks::Type::Unknown) - return make_error("expected a remark tag.", Node); - Result = Type; - return Error::success(); + return error("expected a value of integer type.", *Value); + return UnsignedValue; } -Error YAMLRemarkParser::parseDebugLoc(Optional &Result, - yaml::KeyValueNode &Node) { +Expected +YAMLRemarkParser::parseDebugLoc(yaml::KeyValueNode &Node) { auto *DebugLoc = dyn_cast(Node.getValue()); if (!DebugLoc) - return make_error("expected a value of mapping type.", - Node); + return error("expected a value of mapping type.", Node); Optional File; Optional Line; Optional Column; for (yaml::KeyValueNode &DLNode : *DebugLoc) { - StringRef KeyName; - if (Error E = parseKey(KeyName, DLNode)) - return E; + Expected MaybeKey = parseKey(DLNode); + if (!MaybeKey) + return MaybeKey.takeError(); + StringRef KeyName = *MaybeKey; + if (KeyName == "File") { - if (Error E = parseStr(File, DLNode)) - return E; + if (Expected MaybeStr = parseStr(DLNode)) + File = *MaybeStr; + else + return MaybeStr.takeError(); } else if (KeyName == "Column") { - if (Error E = parseUnsigned(Column, DLNode)) - return E; + if (Expected MaybeU = parseUnsigned(DLNode)) + Column = *MaybeU; + else + return MaybeU.takeError(); } else if (KeyName == "Line") { - if (Error E = parseUnsigned(Line, DLNode)) - return E; + if (Expected MaybeU = parseUnsigned(DLNode)) + Line = *MaybeU; + else + return MaybeU.takeError(); } else { - return make_error("unknown entry in DebugLoc map.", - DLNode); + return error("unknown entry in DebugLoc map.", DLNode); } } // If any of the debug loc fields is missing, return an error. if (!File || !Line || !Column) - return make_error("DebugLoc node incomplete.", Node); - - Result = RemarkLocation{*File, *Line, *Column}; - - return Error::success(); -} - -Error YAMLRemarkParser::parseRemarkField(yaml::KeyValueNode &RemarkField) { - - StringRef KeyName; - if (Error E = parseKey(KeyName, RemarkField)) - return E; - - if (KeyName == "Pass") { - if (Error E = parseStr(State->TheRemark.PassName, RemarkField)) - return E; - } else if (KeyName == "Name") { - if (Error E = parseStr(State->TheRemark.RemarkName, RemarkField)) - return E; - } else if (KeyName == "Function") { - if (Error E = parseStr(State->TheRemark.FunctionName, RemarkField)) - return E; - } else if (KeyName == "Hotness") { - State->TheRemark.Hotness = 0; - if (Error E = parseUnsigned(*State->TheRemark.Hotness, RemarkField)) - return E; - } else if (KeyName == "DebugLoc") { - if (Error E = parseDebugLoc(State->TheRemark.Loc, RemarkField)) - return E; - } else if (KeyName == "Args") { - auto *Args = dyn_cast(RemarkField.getValue()); - if (!Args) - return make_error("wrong value type for key.", - RemarkField); - - for (yaml::Node &Arg : *Args) - if (Error E = parseArg(State->Args, Arg)) - return E; - - State->TheRemark.Args = State->Args; - } else { - return make_error("unknown key.", RemarkField); - } + return error("DebugLoc node incomplete.", Node); - return Error::success(); + return RemarkLocation{*File, *Line, *Column}; } -Error YAMLRemarkParser::parseArg(SmallVectorImpl &Args, - yaml::Node &Node) { +Expected YAMLRemarkParser::parseArg(yaml::Node &Node) { auto *ArgMap = dyn_cast(&Node); if (!ArgMap) - return make_error("expected a value of mapping type.", - Node); + return error("expected a value of mapping type.", Node); - StringRef KeyStr; - StringRef ValueStr; + Optional KeyStr; + Optional ValueStr; Optional Loc; - for (yaml::KeyValueNode &ArgEntry : *ArgMap) - if (Error E = parseArgEntry(ArgEntry, KeyStr, ValueStr, Loc)) - return E; - - if (KeyStr.empty()) - return make_error("argument key is missing.", *ArgMap); - if (ValueStr.empty()) - return make_error("argument value is missing.", *ArgMap); + for (yaml::KeyValueNode &ArgEntry : *ArgMap) { + Expected MaybeKey = parseKey(ArgEntry); + if (!MaybeKey) + return MaybeKey.takeError(); + StringRef KeyName = *MaybeKey; + + // Try to parse debug locs. + if (KeyName == "DebugLoc") { + // Can't have multiple DebugLoc entries per argument. + if (Loc) + return error("only one DebugLoc entry is allowed per argument.", + ArgEntry); + + if (Expected MaybeLoc = parseDebugLoc(ArgEntry)) { + Loc = *MaybeLoc; + continue; + } else + return MaybeLoc.takeError(); + } - Args.push_back(Argument{KeyStr, ValueStr, Loc}); + // If we already have a string, error out. + if (ValueStr) + return error("only one string entry is allowed per argument.", ArgEntry); - return Error::success(); -} + // Try to parse the value. + if (Expected MaybeStr = parseStr(ArgEntry)) + ValueStr = *MaybeStr; + else + return MaybeStr.takeError(); -Error YAMLRemarkParser::parseArgEntry(yaml::KeyValueNode &ArgEntry, - StringRef &KeyStr, StringRef &ValueStr, - Optional &Loc) { - StringRef KeyName; - if (Error E = parseKey(KeyName, ArgEntry)) - return E; - - // Try to parse debug locs. - if (KeyName == "DebugLoc") { - // Can't have multiple DebugLoc entries per argument. - if (Loc) - return make_error( - "only one DebugLoc entry is allowed per argument.", ArgEntry); - - if (Error E = parseDebugLoc(Loc, ArgEntry)) - return E; - return Error::success(); + // Keep the key from the string. + KeyStr = KeyName; } - // If we already have a string, error out. - if (!ValueStr.empty()) - return make_error( - "only one string entry is allowed per argument.", ArgEntry); + if (!KeyStr) + return error("argument key is missing.", *ArgMap); + if (!ValueStr) + return error("argument value is missing.", *ArgMap); - // Try to parse a string. - if (Error E = parseStr(ValueStr, ArgEntry)) - return E; - - // Keep the key from the string. - KeyStr = KeyName; - return Error::success(); + return Argument{*KeyStr, *ValueStr, Loc}; } -Error YAMLRemarkParser::parseYAMLElement(yaml::Document &Remark) { - // Parsing a new remark, clear the previous one by re-constructing the state - // in-place in the Optional. - State.emplace(TmpArgs); +Expected> YAMLRemarkParser::next() { + if (YAMLIt == Stream.end()) + return make_error(); - yaml::Node *YAMLRoot = Remark.getRoot(); - if (!YAMLRoot) - return createStringError(std::make_error_code(std::errc::invalid_argument), - "not a valid YAML file."); - - auto *Root = dyn_cast(YAMLRoot); - if (!Root) - return make_error("document root is not of mapping type.", - *YAMLRoot); - - if (Error E = parseType(State->TheRemark.RemarkType, *Root)) - return E; - - for (yaml::KeyValueNode &RemarkField : *Root) - if (Error E = parseRemarkField(RemarkField)) - return E; + Expected> MaybeResult = parseRemark(*YAMLIt); + if (!MaybeResult) { + // Avoid garbage input, set the iterator to the end. + YAMLIt = Stream.end(); + return MaybeResult.takeError(); + } - // If the YAML parsing failed, don't even continue parsing. We might - // encounter malformed YAML. - if (Stream.failed()) - return make_error("YAML parsing failed.", - *Remark.getRoot()); + ++YAMLIt; - // Check if any of the mandatory fields are missing. - if (State->TheRemark.RemarkType == Type::Unknown || - State->TheRemark.PassName.empty() || - State->TheRemark.RemarkName.empty() || - State->TheRemark.FunctionName.empty()) - return make_error("Type, Pass, Name or Function missing.", - *Remark.getRoot()); - - return Error::success(); -} - -/// Handle a diagnostic from the YAML stream. Records the error in the -/// YAMLRemarkParser class. -void YAMLRemarkParser::HandleDiagnostic(const SMDiagnostic &Diag, void *Ctx) { - assert(Ctx && "Expected non-null Ctx in diagnostic handler."); - auto *Parser = static_cast(Ctx); - Diag.print(/*ProgName=*/nullptr, Parser->ErrorStream, /*ShowColors*/ false, - /*ShowKindLabels*/ true); + return std::move(*MaybeResult); } diff --git a/llvm/lib/Remarks/YAMLRemarkParser.h b/llvm/lib/Remarks/YAMLRemarkParser.h index 14698bbd3ca48..cea76e63e75c1 100644 --- a/llvm/lib/Remarks/YAMLRemarkParser.h +++ b/llvm/lib/Remarks/YAMLRemarkParser.h @@ -13,7 +13,6 @@ #ifndef LLVM_REMARKS_YAML_REMARK_PARSER_H #define LLVM_REMARKS_YAML_REMARK_PARSER_H -#include "RemarkParserImpl.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Remarks/Remark.h" @@ -27,112 +26,69 @@ namespace llvm { namespace remarks { -/// Parses and holds the state of the latest parsed remark. -struct YAMLRemarkParser { - /// Source manager for better error messages. - SourceMgr SM; - /// Stream for yaml parsing. - yaml::Stream Stream; - /// Storage for the error stream. - std::string ErrorString; - /// The error stream. - raw_string_ostream ErrorStream; - /// Temporary parsing buffer for the arguments. - SmallVector TmpArgs; - /// The string table used for parsing strings. - Optional StrTab; - /// The state used by the parser to parse a remark entry. Invalidated with - /// every call to `parseYAMLElement`. - struct ParseState { - /// Temporary parsing buffer for the arguments. - /// The parser itself is owning this buffer in order to reduce the number of - /// allocations. - SmallVectorImpl &Args; - Remark TheRemark; - - ParseState(SmallVectorImpl &Args) : Args(Args) {} - /// Use Args only as a **temporary** buffer. - ~ParseState() { Args.clear(); } - }; - - /// The current state of the parser. If the parsing didn't start yet, it will - /// not be containing any value. - Optional State; - - YAMLRemarkParser(StringRef Buf, - Optional StrTab = None) - : SM(), Stream(Buf, SM), ErrorString(), ErrorStream(ErrorString), - TmpArgs(), StrTab(StrTab) { - SM.setDiagHandler(YAMLRemarkParser::HandleDiagnostic, this); - } - - /// Parse a YAML element. - Error parseYAMLElement(yaml::Document &Remark); - -private: - /// Parse one key to a string. - /// otherwise. - Error parseKey(StringRef &Result, yaml::KeyValueNode &Node); - /// Parse one value to a string. - template Error parseStr(T &Result, yaml::KeyValueNode &Node); - /// Parse one value to an unsigned. - template - Error parseUnsigned(T &Result, yaml::KeyValueNode &Node); - /// Parse the type of a remark to an enum type. - Error parseType(Type &Result, yaml::MappingNode &Node); - /// Parse a debug location. - Error parseDebugLoc(Optional &Result, - yaml::KeyValueNode &Node); - /// Parse a remark field and update the parsing state. - Error parseRemarkField(yaml::KeyValueNode &RemarkField); - /// Parse an argument. - Error parseArg(SmallVectorImpl &TmpArgs, yaml::Node &Node); - /// Parse an entry from the contents of an argument. - Error parseArgEntry(yaml::KeyValueNode &ArgEntry, StringRef &KeyStr, - StringRef &ValueStr, Optional &Loc); - - /// Handle a diagnostic from the YAML stream. Records the error in the - /// YAMLRemarkParser class. - static void HandleDiagnostic(const SMDiagnostic &Diag, void *Ctx); -}; class YAMLParseError : public ErrorInfo { public: static char ID; - YAMLParseError(StringRef Message, yaml::Node &Node) - : Message(Message), Node(Node) {} + YAMLParseError(StringRef Message, SourceMgr &SM, yaml::Stream &Stream, + yaml::Node &Node); + + YAMLParseError(StringRef Message) : Message(Message) {} void log(raw_ostream &OS) const override { OS << Message; } std::error_code convertToErrorCode() const override { return inconvertibleErrorCode(); } - StringRef getMessage() const { return Message; } - yaml::Node &getNode() const { return Node; } - private: - StringRef Message; // No need to hold a full copy of the buffer. - yaml::Node &Node; + std::string Message; }; /// Regular YAML to Remark parser. -struct YAMLParserImpl : public ParserImpl { - /// The object parsing the YAML. - YAMLRemarkParser YAMLParser; +struct YAMLRemarkParser : public Parser { + /// The string table used for parsing strings. + Optional StrTab; + /// Last error message that can come from the YAML parser diagnostics. + /// We need this for catching errors in the constructor. + std::string LastErrorMessage; + /// Source manager for better error messages. + SourceMgr SM; + /// Stream for yaml parsing. + yaml::Stream Stream; /// Iterator in the YAML stream. yaml::document_iterator YAMLIt; - /// Set to `true` if we had any errors during parsing. - bool HasErrors = false; - YAMLParserImpl(StringRef Buf, - Optional StrTab = None) - : ParserImpl{Format::YAML}, YAMLParser(Buf, StrTab), - YAMLIt(YAMLParser.Stream.begin()), HasErrors(false) {} + YAMLRemarkParser(StringRef Buf, + Optional StrTab = None); + + Expected> next() override; - static bool classof(const ParserImpl *PI) { - return PI->ParserFormat == Format::YAML; + static bool classof(const Parser *P) { + return P->ParserFormat == Format::YAML; } + +private: + /// Create a YAMLParseError error from an existing error generated by the YAML + /// parser. + /// If there is no error, this returns Success. + Error error(); + /// Create a YAMLParseError error referencing a specific node. + Error error(StringRef Message, yaml::Node &Node); + /// Parse a YAML remark to a remarks::Remark object. + Expected> parseRemark(yaml::Document &Remark); + /// Parse the type of a remark to an enum type. + Expected parseType(yaml::MappingNode &Node); + /// Parse one key to a string. + Expected parseKey(yaml::KeyValueNode &Node); + /// Parse one value to a string. + Expected parseStr(yaml::KeyValueNode &Node); + /// Parse one value to an unsigned. + Expected parseUnsigned(yaml::KeyValueNode &Node); + /// Parse a debug location. + Expected parseDebugLoc(yaml::KeyValueNode &Node); + /// Parse an argument. + Expected parseArg(yaml::Node &Node); }; } // end namespace remarks } // end namespace llvm diff --git a/llvm/tools/llvm-opt-report/OptReport.cpp b/llvm/tools/llvm-opt-report/OptReport.cpp index 80d0b73664d01..5662c9fbd7b5e 100644 --- a/llvm/tools/llvm-opt-report/OptReport.cpp +++ b/llvm/tools/llvm-opt-report/OptReport.cpp @@ -150,20 +150,32 @@ static bool readLocationInfo(LocationInfoTy &LocationInfo) { return false; } - remarks::Parser Parser(remarks::Format::YAML, (*Buf)->getBuffer()); + Expected> MaybeParser = + remarks::createRemarkParser(remarks::Format::YAML, (*Buf)->getBuffer()); + if (!MaybeParser) { + handleAllErrors(MaybeParser.takeError(), [&](const ErrorInfoBase &PE) { + PE.log(WithColor::error()); + }); + return false; + } + remarks::Parser &Parser = **MaybeParser; while (true) { - Expected RemarkOrErr = Parser.getNext(); - if (!RemarkOrErr) { - handleAllErrors(RemarkOrErr.takeError(), [&](const ErrorInfoBase &PE) { + Expected> MaybeRemark = Parser.next(); + if (!MaybeRemark) { + Error E = MaybeRemark.takeError(); + if (E.isA()) { + // EOF. + consumeError(std::move(E)); + break; + } + handleAllErrors(MaybeRemark.takeError(), [&](const ErrorInfoBase &PE) { PE.log(WithColor::error()); }); return false; } - if (!*RemarkOrErr) // End of file. - break; - const remarks::Remark &Remark = **RemarkOrErr; + const remarks::Remark &Remark = **MaybeRemark; bool Transformed = Remark.RemarkType == remarks::Type::Passed; diff --git a/llvm/tools/remarks-shlib/Remarks.exports b/llvm/tools/remarks-shlib/Remarks.exports index 7260f9a543d5d..9ec1e73a471e0 100644 --- a/llvm/tools/remarks-shlib/Remarks.exports +++ b/llvm/tools/remarks-shlib/Remarks.exports @@ -6,6 +6,7 @@ LLVMRemarkDebugLocGetSourceColumn LLVMRemarkArgGetKey LLVMRemarkArgGetValue LLVMRemarkArgGetDebugLoc +LLVMRemarkEntryDispose LLVMRemarkEntryGetType LLVMRemarkEntryGetPassName LLVMRemarkEntryGetRemarkName diff --git a/llvm/unittests/Remarks/YAMLRemarksParsingTest.cpp b/llvm/unittests/Remarks/YAMLRemarksParsingTest.cpp index e3c7cdf881e3c..8b79dfd814f05 100644 --- a/llvm/unittests/Remarks/YAMLRemarksParsingTest.cpp +++ b/llvm/unittests/Remarks/YAMLRemarksParsingTest.cpp @@ -14,20 +14,31 @@ using namespace llvm; template void parseGood(const char (&Buf)[N]) { - remarks::Parser Parser(remarks::Format::YAML, {Buf, N - 1}); - Expected Remark = Parser.getNext(); + Expected> MaybeParser = + remarks::createRemarkParser(remarks::Format::YAML, {Buf, N - 1}); + EXPECT_FALSE(errorToBool(MaybeParser.takeError())); + EXPECT_TRUE(*MaybeParser != nullptr); + + remarks::Parser &Parser = **MaybeParser; + Expected> Remark = Parser.next(); EXPECT_FALSE(errorToBool(Remark.takeError())); // Check for parsing errors. EXPECT_TRUE(*Remark != nullptr); // At least one remark. - Remark = Parser.getNext(); - EXPECT_FALSE(errorToBool(Remark.takeError())); // Check for parsing errors. - EXPECT_TRUE(*Remark == nullptr); // Check that there are no more remarks. + Remark = Parser.next(); + Error E = Remark.takeError(); + EXPECT_TRUE(E.isA()); + EXPECT_TRUE(errorToBool(std::move(E))); // Check for parsing errors. } template bool parseExpectError(const char (&Buf)[N], const char *Error) { - remarks::Parser Parser(remarks::Format::YAML, {Buf, N - 1}); - Expected Remark = Parser.getNext(); - EXPECT_FALSE(Remark); // Expect an error here. + Expected> MaybeParser = + remarks::createRemarkParser(remarks::Format::YAML, {Buf, N - 1}); + EXPECT_FALSE(errorToBool(MaybeParser.takeError())); + EXPECT_TRUE(*MaybeParser != nullptr); + + remarks::Parser &Parser = **MaybeParser; + Expected> Remark = Parser.next(); + EXPECT_FALSE(Remark); // Check for parsing errors. std::string ErrorStr; raw_string_ostream Stream(ErrorStr); @@ -42,7 +53,7 @@ TEST(YAMLRemarks, ParsingEmpty) { TEST(YAMLRemarks, ParsingNotYAML) { EXPECT_TRUE( - parseExpectError("\x01\x02\x03\x04\x05\x06", "not a valid YAML file.")); + parseExpectError("\x01\x02\x03\x04\x05\x06", "Got empty plain scalar")); } TEST(YAMLRemarks, ParsingGood) { @@ -309,17 +320,6 @@ TEST(YAMLRemarks, ParsingWrongArgs) { "", "only one string entry is allowed per argument.")); // No arg value. - EXPECT_TRUE(parseExpectError("\n" - "--- !Missed\n" - "Pass: inline\n" - "Name: NoDefinition\n" - "Function: foo\n" - "Args:\n" - " - Callee: ''\n" - " - DebugLoc: { File: a, Line: 1, Column: 2 }\n" - "", - "argument value is missing.")); - // No arg value. EXPECT_TRUE(parseExpectError("\n" "--- !Missed\n" "Pass: inline\n" @@ -354,12 +354,18 @@ TEST(YAMLRemarks, Contents) { " - String: ' because its definition is unavailable'\n" "\n"; - remarks::Parser Parser(remarks::Format::YAML, Buf); - Expected RemarkOrErr = Parser.getNext(); - EXPECT_FALSE(errorToBool(RemarkOrErr.takeError())); - EXPECT_TRUE(*RemarkOrErr != nullptr); + Expected> MaybeParser = + remarks::createRemarkParser(remarks::Format::YAML, Buf); + EXPECT_FALSE(errorToBool(MaybeParser.takeError())); + EXPECT_TRUE(*MaybeParser != nullptr); + + remarks::Parser &Parser = **MaybeParser; + Expected> MaybeRemark = Parser.next(); + EXPECT_FALSE( + errorToBool(MaybeRemark.takeError())); // Check for parsing errors. + EXPECT_TRUE(*MaybeRemark != nullptr); // At least one remark. - const remarks::Remark &Remark = **RemarkOrErr; + const remarks::Remark &Remark = **MaybeRemark; EXPECT_EQ(Remark.RemarkType, remarks::Type::Missed); EXPECT_EQ(checkStr(Remark.PassName, 6), "inline"); EXPECT_EQ(checkStr(Remark.RemarkName, 12), "NoDefinition"); @@ -408,9 +414,10 @@ TEST(YAMLRemarks, Contents) { ++ArgID; } - RemarkOrErr = Parser.getNext(); - EXPECT_FALSE(errorToBool(RemarkOrErr.takeError())); - EXPECT_EQ(*RemarkOrErr, nullptr); + MaybeRemark = Parser.next(); + Error E = MaybeRemark.takeError(); + EXPECT_TRUE(E.isA()); + EXPECT_TRUE(errorToBool(std::move(E))); // Check for parsing errors. } static inline StringRef checkStr(LLVMRemarkStringRef Str, @@ -487,6 +494,8 @@ TEST(YAMLRemarks, ContentsCAPI) { ++ArgID; } while ((Arg = LLVMRemarkEntryGetNextArg(Arg, Remark))); + LLVMRemarkEntryDispose(Remark); + EXPECT_EQ(LLVMRemarkParserGetNext(Parser), nullptr); EXPECT_FALSE(LLVMRemarkParserHasError(Parser)); @@ -516,12 +525,18 @@ TEST(YAMLRemarks, ContentsStrTab) { 115); remarks::ParsedStringTable StrTab(StrTabBuf); - remarks::Parser Parser(remarks::Format::YAML, Buf, StrTab); - Expected RemarkOrErr = Parser.getNext(); - EXPECT_FALSE(errorToBool(RemarkOrErr.takeError())); - EXPECT_TRUE(*RemarkOrErr != nullptr); - - const remarks::Remark &Remark = **RemarkOrErr; + Expected> MaybeParser = + remarks::createRemarkParser(remarks::Format::YAML, Buf, &StrTab); + EXPECT_FALSE(errorToBool(MaybeParser.takeError())); + EXPECT_TRUE(*MaybeParser != nullptr); + + remarks::Parser &Parser = **MaybeParser; + Expected> MaybeRemark = Parser.next(); + EXPECT_FALSE( + errorToBool(MaybeRemark.takeError())); // Check for parsing errors. + EXPECT_TRUE(*MaybeRemark != nullptr); // At least one remark. + + const remarks::Remark &Remark = **MaybeRemark; EXPECT_EQ(Remark.RemarkType, remarks::Type::Missed); EXPECT_EQ(checkStr(Remark.PassName, 6), "inline"); EXPECT_EQ(checkStr(Remark.RemarkName, 12), "NoDefinition"); @@ -570,9 +585,10 @@ TEST(YAMLRemarks, ContentsStrTab) { ++ArgID; } - RemarkOrErr = Parser.getNext(); - EXPECT_FALSE(errorToBool(RemarkOrErr.takeError())); - EXPECT_EQ(*RemarkOrErr, nullptr); + MaybeRemark = Parser.next(); + Error E = MaybeRemark.takeError(); + EXPECT_TRUE(E.isA()); + EXPECT_TRUE(errorToBool(std::move(E))); // Check for parsing errors. } TEST(YAMLRemarks, ParsingBadStringTableIndex) { @@ -584,13 +600,18 @@ TEST(YAMLRemarks, ParsingBadStringTableIndex) { StringRef StrTabBuf = StringRef("inline"); remarks::ParsedStringTable StrTab(StrTabBuf); - remarks::Parser Parser(remarks::Format::YAML, Buf, StrTab); - Expected Remark = Parser.getNext(); - EXPECT_FALSE(Remark); // Expect an error here. + Expected> MaybeParser = + remarks::createRemarkParser(remarks::Format::YAML, Buf, &StrTab); + EXPECT_FALSE(errorToBool(MaybeParser.takeError())); + EXPECT_TRUE(*MaybeParser != nullptr); + + remarks::Parser &Parser = **MaybeParser; + Expected> MaybeRemark = Parser.next(); + EXPECT_FALSE(MaybeRemark); // Expect an error here. std::string ErrorStr; raw_string_ostream Stream(ErrorStr); - handleAllErrors(Remark.takeError(), + handleAllErrors(MaybeRemark.takeError(), [&](const ErrorInfoBase &EIB) { EIB.log(Stream); }); EXPECT_TRUE( StringRef(Stream.str()) From 88ed076e44756f5418be17596cec943eea864f30 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Tue, 16 Jul 2019 15:28:29 +0000 Subject: [PATCH 245/451] Add missing test for r366215 llvm-svn: 366218 --- .../blocks-with-dead-term-nondeterministic.ll | 71 +++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 llvm/test/Transforms/ADCE/blocks-with-dead-term-nondeterministic.ll diff --git a/llvm/test/Transforms/ADCE/blocks-with-dead-term-nondeterministic.ll b/llvm/test/Transforms/ADCE/blocks-with-dead-term-nondeterministic.ll new file mode 100644 index 0000000000000..dd8a70e4993c0 --- /dev/null +++ b/llvm/test/Transforms/ADCE/blocks-with-dead-term-nondeterministic.ll @@ -0,0 +1,71 @@ +; RUN: opt < %s -adce --preserve-ll-uselistorder -S | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.10.0" + +; CHECK: uselistorder label %bb16, { 1, 0 } +; Function Attrs: noinline nounwind ssp uwtable +define void @ham() local_unnamed_addr #0 { +bb: + br i1 false, label %bb1, label %bb22 + +bb1: ; preds = %bb + br i1 undef, label %bb2, label %bb20 + +bb2: ; preds = %bb1 + br label %bb5 + +bb5: ; preds = %bb16, %bb2 + br i1 undef, label %bb6, label %bb17 + +bb6: ; preds = %bb5 + br i1 undef, label %bb7, label %bb16 + +bb7: ; preds = %bb6 + br i1 undef, label %bb9, label %bb8 + +bb8: ; preds = %bb7 + br i1 undef, label %bb9, label %bb10 + +bb9: ; preds = %bb8, %bb7 + br label %bb13 + +bb10: ; preds = %bb8 + br label %bb12 + +bb12: ; preds = %bb10 + br label %bb13 + +bb13: ; preds = %bb12, %bb9 + br label %bb14 + +bb14: ; preds = %bb13 + br label %bb15 + +bb15: ; preds = %bb14 + br label %bb16 + +bb16: ; preds = %bb15, %bb6 + br label %bb5 + +bb17: ; preds = %bb5 + br label %bb19 + +bb19: ; preds = %bb17 + br label %bb21 + +bb20: ; preds = %bb1 + br label %bb21 + +bb21: ; preds = %bb20, %bb19 + br label %bb22 + +bb22: ; preds = %bb21, %bb + ret void +} + +attributes #0 = { noinline nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} + +!0 = !{i32 7, !"PIC Level", i32 2} From 2eacf698802b5c8bc3c9e0031ac11cd0d79f9eae Mon Sep 17 00:00:00 2001 From: Alex Brachet Date: Tue, 16 Jul 2019 15:33:43 +0000 Subject: [PATCH 246/451] Revert [tools] [llvm-nm] Default to reading from stdin not a.out This reverts r365889 (git commit 60c81354b1d3fced1bd284d334f118d2d792ab4b) llvm-svn: 366219 --- llvm/docs/CommandGuide/llvm-nm.rst | 5 +++-- llvm/test/tools/llvm-nm/stdin.test | 33 ------------------------------ llvm/tools/llvm-nm/llvm-nm.cpp | 9 +------- 3 files changed, 4 insertions(+), 43 deletions(-) delete mode 100644 llvm/test/tools/llvm-nm/stdin.test diff --git a/llvm/docs/CommandGuide/llvm-nm.rst b/llvm/docs/CommandGuide/llvm-nm.rst index aa58a3f3884ca..f071e1be1a5c9 100644 --- a/llvm/docs/CommandGuide/llvm-nm.rst +++ b/llvm/docs/CommandGuide/llvm-nm.rst @@ -13,8 +13,9 @@ DESCRIPTION The :program:`llvm-nm` utility lists the names of symbols from LLVM bitcode files, object files, and archives. Each symbol is listed along with some simple -information about its provenance. If no filename is specified, or *-* is used as -a filename, :program:`llvm-nm` will read a file from its standard input stream. +information about its provenance. If no filename is specified, *a.out* is used +as the input. If *-* is used as a filename, :program:`llvm-nm` will read a file +from its standard input stream. :program:`llvm-nm`'s default output format is the traditional BSD :program:`nm` output format. Each such output record consists of an (optional) 8-digit diff --git a/llvm/test/tools/llvm-nm/stdin.test b/llvm/test/tools/llvm-nm/stdin.test deleted file mode 100644 index 352ee23a30ab9..0000000000000 --- a/llvm/test/tools/llvm-nm/stdin.test +++ /dev/null @@ -1,33 +0,0 @@ -## Test llvm-nm when using stdin both explicitly (using '-' as a filename) -## and implicitly (not specifying any filename). - -# RUN: yaml2obj %s -o %t.o - -## Pass an explicit filename to produce a baseline output. llvm-nm should -## have the same behavior when opening a file itself and when reading that -## file from its standard input stream. -# RUN: llvm-nm %t.o > %t.base 2> %t.err - -## Make sure there is no warning message about no file redirected to stdin. -# RUN: FileCheck %s --input-file=%t.err --allow-empty --implicit-check-not={{.}} - -# RUN: llvm-nm - < %t.o > %t.explicit 2> %t.err -# RUN: FileCheck %s --input-file=%t.err --allow-empty --implicit-check-not={{.}} -# RUN: cmp %t.base %t.explicit - -# RUN: llvm-nm < %t.o > %t.implicit 2> %t.err -# RUN: FileCheck %s --input-file=%t.err --allow-empty --implicit-check-not={{.}} -# RUN: cmp %t.base %t.implicit - -!ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 -Sections: - - Name: .text - Type: SHT_PROGBITS -Symbols: - - Name: symbol_a - Section: .text diff --git a/llvm/tools/llvm-nm/llvm-nm.cpp b/llvm/tools/llvm-nm/llvm-nm.cpp index c45c8716f18dc..aa62e6f0209b4 100644 --- a/llvm/tools/llvm-nm/llvm-nm.cpp +++ b/llvm/tools/llvm-nm/llvm-nm.cpp @@ -34,7 +34,6 @@ #include "llvm/Support/Format.h" #include "llvm/Support/InitLLVM.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Process.h" #include "llvm/Support/Program.h" #include "llvm/Support/Signals.h" #include "llvm/Support/TargetSelect.h" @@ -1752,12 +1751,6 @@ static bool checkMachOAndArchFlags(SymbolicFile *O, std::string &Filename) { } static void dumpSymbolNamesFromFile(std::string &Filename) { - if (Filename == "-" && sys::Process::StandardInIsUserInput()) { - WithColor::warning(errs(), ToolName) << "can't read from terminal\n"; - cl::PrintHelpMessage(); - HadError = true; - return; - } ErrorOr> BufferOrErr = MemoryBuffer::getFileOrSTDIN(Filename); if (error(BufferOrErr.getError(), Filename)) @@ -2089,7 +2082,7 @@ int main(int argc, char **argv) { if (OutputFormat == sysv || SizeSort) PrintSize = true; if (InputFilenames.empty()) - InputFilenames.push_back("-"); + InputFilenames.push_back("a.out"); if (InputFilenames.size() > 1) MultipleFiles = true; From 42816107f78e6ce345e68960ff2183d1cabfe815 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Tue, 16 Jul 2019 15:51:32 +0000 Subject: [PATCH 247/451] [OPENMP]Fix threadid in __kmpc_omp_taskwait call for dependent target calls. Summary: We used to call __kmpc_omp_taskwait function with global threadid set to 0. It may crash the application at the runtime if the thread executing target region is not a master thread. Reviewers: grokos, kkwli0 Subscribers: guansong, jdoerfert, caomhin, openmp-commits Tags: #openmp Differential Revision: https://reviews.llvm.org/D64571 llvm-svn: 366220 --- openmp/libomptarget/src/interface.cpp | 10 +-- .../test/offloading/target_depend_nowait.cpp | 62 +++++++++++++++++++ 2 files changed, 67 insertions(+), 5 deletions(-) create mode 100644 openmp/libomptarget/test/offloading/target_depend_nowait.cpp diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp index 52850ee39782d..32afe3fcb4b70 100644 --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -128,7 +128,7 @@ EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num, int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList) { if (depNum + noAliasDepNum > 0) - __kmpc_omp_taskwait(NULL, 0); + __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); __tgt_target_data_begin(device_id, arg_num, args_base, args, arg_sizes, arg_types); @@ -181,7 +181,7 @@ EXTERN void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num, int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList) { if (depNum + noAliasDepNum > 0) - __kmpc_omp_taskwait(NULL, 0); + __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); __tgt_target_data_end(device_id, arg_num, args_base, args, arg_sizes, arg_types); @@ -214,7 +214,7 @@ EXTERN void __tgt_target_data_update_nowait( int64_t *arg_sizes, int64_t *arg_types, int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList) { if (depNum + noAliasDepNum > 0) - __kmpc_omp_taskwait(NULL, 0); + __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); __tgt_target_data_update(device_id, arg_num, args_base, args, arg_sizes, arg_types); @@ -255,7 +255,7 @@ EXTERN int __tgt_target_nowait(int64_t device_id, void *host_ptr, int64_t *arg_types, int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList) { if (depNum + noAliasDepNum > 0) - __kmpc_omp_taskwait(NULL, 0); + __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); return __tgt_target(device_id, host_ptr, arg_num, args_base, args, arg_sizes, arg_types); @@ -298,7 +298,7 @@ EXTERN int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr, int64_t *arg_types, int32_t team_num, int32_t thread_limit, int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList) { if (depNum + noAliasDepNum > 0) - __kmpc_omp_taskwait(NULL, 0); + __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); return __tgt_target_teams(device_id, host_ptr, arg_num, args_base, args, arg_sizes, arg_types, team_num, thread_limit); diff --git a/openmp/libomptarget/test/offloading/target_depend_nowait.cpp b/openmp/libomptarget/test/offloading/target_depend_nowait.cpp new file mode 100644 index 0000000000000..2c1c7e7191882 --- /dev/null +++ b/openmp/libomptarget/test/offloading/target_depend_nowait.cpp @@ -0,0 +1,62 @@ +// RUN: %libomptarget-compilexx-run-and-check-aarch64-unknown-linux-gnu +// RUN: %libomptarget-compilexx-run-and-check-powerpc64-ibm-linux-gnu +// RUN: %libomptarget-compilexx-run-and-check-powerpc64le-ibm-linux-gnu +// RUN: %libomptarget-compilexx-run-and-check-x86_64-pc-linux-gnu + +#include +#include + +#define N 1024 + +int A[N]; +int B[N]; +int C[N]; +int main() { + for (int i = 0; i < N; i++) + A[i] = B[i] = i; + +#pragma omp parallel num_threads(2) + { + if (omp_get_thread_num() == 1) { +// map data A & B and move to +#pragma omp target enter data map(to : A, B) depend(out : A[0]) nowait + +// no data move since already mapped +#pragma omp target map(A, B) depend(out : A[0]) nowait + { + for (int i = 0; i < N; i++) + ++A[i]; + for (int i = 0; i < N; i++) + ++B[i]; + } + +// no data move since already mapped +#pragma omp target teams num_teams(1) map(A, B) depend(out : A[0]) nowait + { + for (int i = 0; i < N; i++) + ++A[i]; + for (int i = 0; i < N; i++) + ++B[i]; + } + +// A updated via update +#pragma omp target update from(A) depend(out : A[0]) nowait + +// B updated via exit, A just released +#pragma omp target exit data map(release \ + : A) map(from \ + : B) depend(out \ + : A[0]) nowait + } // if + } // parallel + + int Sum = 0; + for (int i = 0; i < N; i++) + Sum += A[i] + B[i]; + // Sum is 2 * N * (2 + N - 1 + 2) / 2 + // CHECK: Sum = 1051648. + printf("Sum = %d.\n", Sum); + + return Sum != 2 * N * (2 + N - 1 + 2) / 2; +} + From 63a0c2bce8e5731f90fc99b75d59f2e85283cb3b Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 16 Jul 2019 15:54:33 +0000 Subject: [PATCH 248/451] Revert "[swig] Add workaround for old swig" With the deprecation of swig 1.x (r366213), this workaround should no longer be necessary. llvm-svn: 366221 --- lldb/scripts/interface/SBTypeCategory.i | 5 ----- 1 file changed, 5 deletions(-) diff --git a/lldb/scripts/interface/SBTypeCategory.i b/lldb/scripts/interface/SBTypeCategory.i index a881c1590fd8a..c183862702df9 100644 --- a/lldb/scripts/interface/SBTypeCategory.i +++ b/lldb/scripts/interface/SBTypeCategory.i @@ -213,11 +213,6 @@ namespace lldb { name = property(GetName, None) enabled = property(GetEnabled, SetEnabled) %} -#if SWIG_VERSION < 0x030009 - %pythoncode %{ - __swig_setmethods__["enabled"] = SetEnabled - %} -#endif }; From 450c62e33ea5310481b06d3fd59df911f5451ff2 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Tue, 16 Jul 2019 15:55:45 +0000 Subject: [PATCH 249/451] [Strict FP] Allow more relaxed scheduling Reimplement scheduling constraints for strict FP instructions in ScheduleDAGInstrs::buildSchedGraph to allow for more relaxed scheduling. Specifially, allow one strict FP instruction to be scheduled across another, as long as it is not moved across any global barrier. Differential Revision: https://reviews.llvm.org/D64412 Reviewed By: cameron.mcinally llvm-svn: 366222 --- llvm/lib/CodeGen/ScheduleDAGInstrs.cpp | 31 ++- llvm/test/CodeGen/SystemZ/fp-strict-alias.ll | 222 ++++++++++++------ .../vector-constrained-fp-intrinsics.ll | 30 +-- 3 files changed, 186 insertions(+), 97 deletions(-) diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index a538010889607..d5ad7e92299de 100644 --- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -712,7 +712,6 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, AAForDep = UseAA ? AA : nullptr; BarrierChain = nullptr; - SUnit *FPBarrierChain = nullptr; this->TrackLaneMasks = TrackLaneMasks; MISUnitMap.clear(); @@ -744,6 +743,14 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // done. Value2SUsMap NonAliasStores, NonAliasLoads(1 /*TrueMemOrderLatency*/); + // Track all instructions that may raise floating-point exceptions. + // These do not depend on one other (or normal loads or stores), but + // must not be rescheduled across global barriers. Note that we don't + // really need a "map" here since we don't track those MIs by value; + // using the same Value2SUsMap data type here is simply a matter of + // convenience. + Value2SUsMap FPExceptions; + // Remove any stale debug info; sometimes BuildSchedGraph is called again // without emitting the info from the previous call. DbgValues.clear(); @@ -871,20 +878,24 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, addBarrierChain(Loads); addBarrierChain(NonAliasStores); addBarrierChain(NonAliasLoads); - - // Add dependency against previous FP barrier and reset FP barrier. - if (FPBarrierChain) - FPBarrierChain->addPredBarrier(BarrierChain); - FPBarrierChain = BarrierChain; + addBarrierChain(FPExceptions); continue; } - // Instructions that may raise FP exceptions depend on each other. + // Instructions that may raise FP exceptions may not be moved + // across any global barriers. if (MI.mayRaiseFPException()) { - if (FPBarrierChain) - FPBarrierChain->addPredBarrier(SU); - FPBarrierChain = SU; + if (BarrierChain) + BarrierChain->addPredBarrier(SU); + + FPExceptions.insert(SU, UnknownValue); + + if (FPExceptions.size() >= HugeRegion) { + LLVM_DEBUG(dbgs() << "Reducing FPExceptions map.\n";); + Value2SUsMap empty; + reduceHugeMemNodeMaps(FPExceptions, empty, getReductionSize()); + } } // If it's not a store or a variant load, we're done. diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-alias.ll b/llvm/test/CodeGen/SystemZ/fp-strict-alias.ll index d8ee018b9e5b6..fe27b61c20bab 100644 --- a/llvm/test/CodeGen/SystemZ/fp-strict-alias.ll +++ b/llvm/test/CodeGen/SystemZ/fp-strict-alias.ll @@ -2,138 +2,216 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) -declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata) declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata) declare float @llvm.sqrt.f32(float) declare void @llvm.s390.sfpc(i32) -; For non-strict operations, we expect the post-RA scheduler to -; separate the two square root instructions on z13. -define void @f1(float %f1, float %f2, float %f3, float %f4, float *%ptr0) { +; The basic assumption of all following tests is that on z13, we never +; want to see two square root instructions directly in a row, so the +; post-RA scheduler will always schedule something else in between +; whenever possible. + +; We can move any FP operation across a (normal) store. + +define void @f1(float %f1, float %f2, float *%ptr1, float *%ptr2) { ; CHECK-LABEL: f1: ; CHECK: sqebr -; CHECK: {{aebr|sebr}} +; CHECK: ste ; CHECK: sqebr +; CHECK: ste ; CHECK: br %r14 - %add = fadd float %f1, %f2 - %sub = fsub float %f3, %f4 - %sqrt1 = call float @llvm.sqrt.f32(float %f2) - %sqrt2 = call float @llvm.sqrt.f32(float %f4) - - %ptr1 = getelementptr float, float *%ptr0, i64 1 - %ptr2 = getelementptr float, float *%ptr0, i64 2 - %ptr3 = getelementptr float, float *%ptr0, i64 3 + %sqrt1 = call float @llvm.sqrt.f32(float %f1) + %sqrt2 = call float @llvm.sqrt.f32(float %f2) - store float %add, float *%ptr0 - store float %sub, float *%ptr1 - store float %sqrt1, float *%ptr2 - store float %sqrt2, float *%ptr3 + store float %sqrt1, float *%ptr1 + store float %sqrt2, float *%ptr2 ret void } -; But for strict operations, this must not happen. -define void @f2(float %f1, float %f2, float %f3, float %f4, float *%ptr0) { +define void @f2(float %f1, float %f2, float *%ptr1, float *%ptr2) { ; CHECK-LABEL: f2: -; CHECK: {{aebr|sebr}} -; CHECK: {{aebr|sebr}} ; CHECK: sqebr +; CHECK: ste ; CHECK: sqebr +; CHECK: ste ; CHECK: br %r14 - %add = call float @llvm.experimental.constrained.fadd.f32( - float %f1, float %f2, + %sqrt1 = call float @llvm.experimental.constrained.sqrt.f32( + float %f1, metadata !"round.dynamic", - metadata !"fpexcept.strict") - %sub = call float @llvm.experimental.constrained.fsub.f32( - float %f3, float %f4, + metadata !"fpexcept.ignore") + %sqrt2 = call float @llvm.experimental.constrained.sqrt.f32( + float %f2, metadata !"round.dynamic", - metadata !"fpexcept.strict") + metadata !"fpexcept.ignore") + + store float %sqrt1, float *%ptr1 + store float %sqrt2, float *%ptr2 + + ret void +} + +define void @f3(float %f1, float %f2, float *%ptr1, float *%ptr2) { +; CHECK-LABEL: f3: +; CHECK: sqebr +; CHECK: ste +; CHECK: sqebr +; CHECK: ste +; CHECK: br %r14 + %sqrt1 = call float @llvm.experimental.constrained.sqrt.f32( - float %f2, + float %f1, metadata !"round.dynamic", metadata !"fpexcept.strict") %sqrt2 = call float @llvm.experimental.constrained.sqrt.f32( - float %f4, + float %f2, metadata !"round.dynamic", metadata !"fpexcept.strict") - %ptr1 = getelementptr float, float *%ptr0, i64 1 - %ptr2 = getelementptr float, float *%ptr0, i64 2 - %ptr3 = getelementptr float, float *%ptr0, i64 3 + store float %sqrt1, float *%ptr1 + store float %sqrt2, float *%ptr2 - store float %add, float *%ptr0 - store float %sub, float *%ptr1 - store float %sqrt1, float *%ptr2 - store float %sqrt2, float *%ptr3 + ret void +} + + +; We can move a non-strict FP operation or a fpexcept.ignore +; operation even across a volatile store, but not a fpexcept.strict +; operation. + +define void @f4(float %f1, float %f2, float *%ptr1, float *%ptr2) { +; CHECK-LABEL: f4: +; CHECK: sqebr +; CHECK: ste +; CHECK: sqebr +; CHECK: ste +; CHECK: br %r14 + + %sqrt1 = call float @llvm.sqrt.f32(float %f1) + %sqrt2 = call float @llvm.sqrt.f32(float %f2) + + store volatile float %sqrt1, float *%ptr1 + store volatile float %sqrt2, float *%ptr2 ret void } -; On the other hand, strict operations that use the fpexcept.ignore -; exception behaviour should be scheduled freely. -define void @f3(float %f1, float %f2, float %f3, float %f4, float *%ptr0) { -; CHECK-LABEL: f3: +define void @f5(float %f1, float %f2, float *%ptr1, float *%ptr2) { +; CHECK-LABEL: f5: ; CHECK: sqebr -; CHECK: {{aebr|sebr}} +; CHECK: ste ; CHECK: sqebr +; CHECK: ste ; CHECK: br %r14 - %add = call float @llvm.experimental.constrained.fadd.f32( - float %f1, float %f2, + %sqrt1 = call float @llvm.experimental.constrained.sqrt.f32( + float %f1, metadata !"round.dynamic", metadata !"fpexcept.ignore") - %sub = call float @llvm.experimental.constrained.fsub.f32( - float %f3, float %f4, + %sqrt2 = call float @llvm.experimental.constrained.sqrt.f32( + float %f2, metadata !"round.dynamic", metadata !"fpexcept.ignore") + + store volatile float %sqrt1, float *%ptr1 + store volatile float %sqrt2, float *%ptr2 + + ret void +} + +define void @f6(float %f1, float %f2, float *%ptr1, float *%ptr2) { +; CHECK-LABEL: f6: +; CHECK: sqebr +; CHECK: sqebr +; CHECK: ste +; CHECK: ste +; CHECK: br %r14 + %sqrt1 = call float @llvm.experimental.constrained.sqrt.f32( + float %f1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %sqrt2 = call float @llvm.experimental.constrained.sqrt.f32( float %f2, metadata !"round.dynamic", + metadata !"fpexcept.strict") + + store volatile float %sqrt1, float *%ptr1 + store volatile float %sqrt2, float *%ptr2 + + ret void +} + + +; No variant of FP operations can be scheduled across a SPFC. + +define void @f7(float %f1, float %f2, float *%ptr1, float *%ptr2) { +; CHECK-LABEL: f7: +; CHECK: sqebr +; CHECK: sqebr +; CHECK: ste +; CHECK: ste +; CHECK: br %r14 + + %sqrt1 = call float @llvm.sqrt.f32(float %f1) + %sqrt2 = call float @llvm.sqrt.f32(float %f2) + + call void @llvm.s390.sfpc(i32 0) + + store float %sqrt1, float *%ptr1 + store float %sqrt2, float *%ptr2 + + ret void +} + +define void @f8(float %f1, float %f2, float *%ptr1, float *%ptr2) { +; CHECK-LABEL: f8: +; CHECK: sqebr +; CHECK: sqebr +; CHECK: ste +; CHECK: ste +; CHECK: br %r14 + + %sqrt1 = call float @llvm.experimental.constrained.sqrt.f32( + float %f1, + metadata !"round.dynamic", metadata !"fpexcept.ignore") %sqrt2 = call float @llvm.experimental.constrained.sqrt.f32( - float %f4, + float %f2, metadata !"round.dynamic", metadata !"fpexcept.ignore") - %ptr1 = getelementptr float, float *%ptr0, i64 1 - %ptr2 = getelementptr float, float *%ptr0, i64 2 - %ptr3 = getelementptr float, float *%ptr0, i64 3 + call void @llvm.s390.sfpc(i32 0) - store float %add, float *%ptr0 - store float %sub, float *%ptr1 - store float %sqrt1, float *%ptr2 - store float %sqrt2, float *%ptr3 + store float %sqrt1, float *%ptr1 + store float %sqrt2, float *%ptr2 ret void } -; However, even non-strict operations must not be scheduled across an SFPC. -define void @f4(float %f1, float %f2, float %f3, float %f4, float *%ptr0) { -; CHECK-LABEL: f4: -; CHECK: {{aebr|sebr}} -; CHECK: {{aebr|sebr}} -; CHECK: sfpc +define void @f9(float %f1, float %f2, float *%ptr1, float *%ptr2) { +; CHECK-LABEL: f9: ; CHECK: sqebr ; CHECK: sqebr +; CHECK: ste +; CHECK: ste ; CHECK: br %r14 - %add = fadd float %f1, %f2 - %sub = fsub float %f3, %f4 - call void @llvm.s390.sfpc(i32 0) - %sqrt1 = call float @llvm.sqrt.f32(float %f2) - %sqrt2 = call float @llvm.sqrt.f32(float %f4) + %sqrt1 = call float @llvm.experimental.constrained.sqrt.f32( + float %f1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %sqrt2 = call float @llvm.experimental.constrained.sqrt.f32( + float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") - %ptr1 = getelementptr float, float *%ptr0, i64 1 - %ptr2 = getelementptr float, float *%ptr0, i64 2 - %ptr3 = getelementptr float, float *%ptr0, i64 3 + call void @llvm.s390.sfpc(i32 0) - store float %add, float *%ptr0 - store float %sub, float *%ptr1 - store float %sqrt1, float *%ptr2 - store float %sqrt2, float *%ptr3 + store float %sqrt1, float *%ptr1 + store float %sqrt2, float *%ptr2 ret void } diff --git a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll index ec1e6746a6168..8ab4c6db255ba 100644 --- a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll @@ -108,8 +108,8 @@ define void @constrained_vector_fdiv_v3f64(<3 x double>* %a) { ; S390X-NEXT: ldeb %f3, 0(%r1) ; S390X-NEXT: larl %r1, .LCPI3_2 ; S390X-NEXT: ldeb %f4, 0(%r1) -; S390X-NEXT: ddb %f2, 0(%r2) ; S390X-NEXT: ddbr %f3, %f1 +; S390X-NEXT: ddb %f2, 0(%r2) ; S390X-NEXT: ddbr %f4, %f0 ; S390X-NEXT: std %f4, 16(%r2) ; S390X-NEXT: std %f3, 8(%r2) @@ -659,16 +659,16 @@ entry: define void @constrained_vector_fmul_v3f64(<3 x double>* %a) { ; S390X-LABEL: constrained_vector_fmul_v3f64: ; S390X: # %bb.0: # %entry +; S390X-NEXT: ld %f0, 8(%r2) ; S390X-NEXT: larl %r1, .LCPI13_0 -; S390X-NEXT: ld %f0, 0(%r1) -; S390X-NEXT: ld %f1, 8(%r2) +; S390X-NEXT: ld %f1, 0(%r1) ; S390X-NEXT: ld %f2, 16(%r2) -; S390X-NEXT: ldr %f3, %f0 +; S390X-NEXT: mdbr %f0, %f1 +; S390X-NEXT: ldr %f3, %f1 ; S390X-NEXT: mdb %f3, 0(%r2) -; S390X-NEXT: mdbr %f1, %f0 -; S390X-NEXT: mdbr %f2, %f0 +; S390X-NEXT: mdbr %f2, %f1 ; S390X-NEXT: std %f2, 16(%r2) -; S390X-NEXT: std %f1, 8(%r2) +; S390X-NEXT: std %f0, 8(%r2) ; S390X-NEXT: std %f3, 0(%r2) ; S390X-NEXT: br %r14 ; @@ -832,16 +832,16 @@ entry: define void @constrained_vector_fadd_v3f64(<3 x double>* %a) { ; S390X-LABEL: constrained_vector_fadd_v3f64: ; S390X: # %bb.0: # %entry +; S390X-NEXT: ld %f0, 8(%r2) ; S390X-NEXT: larl %r1, .LCPI18_0 -; S390X-NEXT: ld %f0, 0(%r1) -; S390X-NEXT: ld %f1, 8(%r2) +; S390X-NEXT: ld %f1, 0(%r1) ; S390X-NEXT: ld %f2, 16(%r2) -; S390X-NEXT: ldr %f3, %f0 +; S390X-NEXT: adbr %f0, %f1 +; S390X-NEXT: ldr %f3, %f1 ; S390X-NEXT: adb %f3, 0(%r2) -; S390X-NEXT: adbr %f1, %f0 -; S390X-NEXT: adbr %f2, %f0 +; S390X-NEXT: adbr %f2, %f1 ; S390X-NEXT: std %f2, 16(%r2) -; S390X-NEXT: std %f1, 8(%r2) +; S390X-NEXT: std %f0, 8(%r2) ; S390X-NEXT: std %f3, 0(%r2) ; S390X-NEXT: br %r14 ; @@ -969,14 +969,14 @@ define <3 x float> @constrained_vector_fsub_v3f32() { ; S390X: # %bb.0: # %entry ; S390X-NEXT: larl %r1, .LCPI22_0 ; S390X-NEXT: le %f0, 0(%r1) -; S390X-NEXT: lzer %f1 ; S390X-NEXT: ler %f4, %f0 -; S390X-NEXT: sebr %f4, %f1 ; S390X-NEXT: larl %r1, .LCPI22_1 ; S390X-NEXT: ler %f2, %f0 ; S390X-NEXT: seb %f2, 0(%r1) ; S390X-NEXT: larl %r1, .LCPI22_2 ; S390X-NEXT: seb %f0, 0(%r1) +; S390X-NEXT: lzer %f1 +; S390X-NEXT: sebr %f4, %f1 ; S390X-NEXT: br %r14 ; ; SZ13-LABEL: constrained_vector_fsub_v3f32: From b3f967d411358dcf54a8085df1d9097f5ab8b6d2 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Tue, 16 Jul 2019 15:57:12 +0000 Subject: [PATCH 250/451] [AMDGPU] Add the adjusted FP as a livein register. Reviewers: arsenm, rampitec Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64145 llvm-svn: 366223 --- .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 52 ++++++++++--------- llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 21 +++++--- llvm/lib/Target/AMDGPU/SIFrameLowering.h | 2 +- .../AMDGPU/frame-lowering-fp-adjusted.mir | 50 ++++++++++++++++++ 4 files changed, 91 insertions(+), 34 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/frame-lowering-fp-adjusted.mir diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 672e49184a501..0ea8db04c2985 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -1067,15 +1067,15 @@ bool GCNTargetMachine::parseMachineFunctionInfo( auto parseAndCheckArgument = [&](const Optional &A, const TargetRegisterClass &RC, - ArgDescriptor &Arg) { + ArgDescriptor &Arg, unsigned UserSGPRs, + unsigned SystemSGPRs) { // Skip parsing if it's not present. if (!A) return false; if (A->IsRegister) { unsigned Reg; - if (parseNamedRegisterReference(PFS, Reg, A->RegisterName.Value, - Error)) { + if (parseNamedRegisterReference(PFS, Reg, A->RegisterName.Value, Error)) { SourceRange = A->RegisterName.SourceRange; return true; } @@ -1088,60 +1088,62 @@ bool GCNTargetMachine::parseMachineFunctionInfo( if (A->Mask) Arg = ArgDescriptor::createArg(Arg, A->Mask.getValue()); + MFI->NumUserSGPRs += UserSGPRs; + MFI->NumSystemSGPRs += SystemSGPRs; return false; }; if (YamlMFI.ArgInfo && (parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentBuffer, AMDGPU::SReg_128RegClass, - MFI->ArgInfo.PrivateSegmentBuffer) || + MFI->ArgInfo.PrivateSegmentBuffer, 4, 0) || parseAndCheckArgument(YamlMFI.ArgInfo->DispatchPtr, - AMDGPU::SReg_64RegClass, - MFI->ArgInfo.DispatchPtr) || + AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchPtr, + 2, 0) || parseAndCheckArgument(YamlMFI.ArgInfo->QueuePtr, AMDGPU::SReg_64RegClass, - MFI->ArgInfo.QueuePtr) || + MFI->ArgInfo.QueuePtr, 2, 0) || parseAndCheckArgument(YamlMFI.ArgInfo->KernargSegmentPtr, AMDGPU::SReg_64RegClass, - MFI->ArgInfo.KernargSegmentPtr) || + MFI->ArgInfo.KernargSegmentPtr, 2, 0) || parseAndCheckArgument(YamlMFI.ArgInfo->DispatchID, - AMDGPU::SReg_64RegClass, - MFI->ArgInfo.DispatchID) || + AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchID, + 2, 0) || parseAndCheckArgument(YamlMFI.ArgInfo->FlatScratchInit, AMDGPU::SReg_64RegClass, - MFI->ArgInfo.FlatScratchInit) || + MFI->ArgInfo.FlatScratchInit, 2, 0) || parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentSize, AMDGPU::SGPR_32RegClass, - MFI->ArgInfo.PrivateSegmentSize) || + MFI->ArgInfo.PrivateSegmentSize, 0, 0) || parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDX, - AMDGPU::SGPR_32RegClass, - MFI->ArgInfo.WorkGroupIDX) || + AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDX, + 0, 1) || parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDY, - AMDGPU::SGPR_32RegClass, - MFI->ArgInfo.WorkGroupIDY) || + AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDY, + 0, 1) || parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDZ, - AMDGPU::SGPR_32RegClass, - MFI->ArgInfo.WorkGroupIDZ) || + AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDZ, + 0, 1) || parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupInfo, AMDGPU::SGPR_32RegClass, - MFI->ArgInfo.WorkGroupInfo) || + MFI->ArgInfo.WorkGroupInfo, 0, 1) || parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentWaveByteOffset, AMDGPU::SGPR_32RegClass, - MFI->ArgInfo.PrivateSegmentWaveByteOffset) || + MFI->ArgInfo.PrivateSegmentWaveByteOffset, 0, 1) || parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitArgPtr, AMDGPU::SReg_64RegClass, - MFI->ArgInfo.ImplicitArgPtr) || + MFI->ArgInfo.ImplicitArgPtr, 0, 0) || parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitBufferPtr, AMDGPU::SReg_64RegClass, - MFI->ArgInfo.ImplicitBufferPtr) || + MFI->ArgInfo.ImplicitBufferPtr, 2, 0) || parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDX, AMDGPU::VGPR_32RegClass, - MFI->ArgInfo.WorkItemIDX) || + MFI->ArgInfo.WorkItemIDX, 0, 0) || parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDY, AMDGPU::VGPR_32RegClass, - MFI->ArgInfo.WorkItemIDY) || + MFI->ArgInfo.WorkItemIDY, 0, 0) || parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDZ, AMDGPU::VGPR_32RegClass, - MFI->ArgInfo.WorkItemIDZ))) + MFI->ArgInfo.WorkItemIDZ, 0, 0))) return true; MFI->Mode.IEEE = YamlMFI.Mode.IEEE; diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 44647d8ba871f..feab6bed2603f 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -311,7 +311,8 @@ unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg( } // Shift down registers reserved for the scratch wave offset. -unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg( +std::pair +SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg( const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI, SIMachineFunctionInfo *MFI, MachineFunction &MF) const { MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -322,17 +323,17 @@ unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg( // No replacement necessary. if (ScratchWaveOffsetReg == AMDGPU::NoRegister || (!hasFP(MF) && !MRI.isPhysRegUsed(ScratchWaveOffsetReg))) { - return AMDGPU::NoRegister; + return std::make_pair(AMDGPU::NoRegister, false); } if (ST.hasSGPRInitBug()) - return ScratchWaveOffsetReg; + return std::make_pair(ScratchWaveOffsetReg, false); unsigned NumPreloaded = MFI->getNumPreloadedSGPRs(); ArrayRef AllSGPRs = getAllSGPRs(ST, MF); if (NumPreloaded > AllSGPRs.size()) - return ScratchWaveOffsetReg; + return std::make_pair(ScratchWaveOffsetReg, false); AllSGPRs = AllSGPRs.slice(NumPreloaded); @@ -353,10 +354,11 @@ unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg( unsigned ReservedRegCount = 13; if (AllSGPRs.size() < ReservedRegCount) - return ScratchWaveOffsetReg; + return std::make_pair(ScratchWaveOffsetReg, false); bool HandledScratchWaveOffsetReg = ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF); + bool FPAdjusted = false; for (MCPhysReg Reg : AllSGPRs.drop_back(ReservedRegCount)) { // Pick the first unallocated SGPR. Be careful not to pick an alias of the @@ -374,12 +376,13 @@ unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg( MFI->setScratchWaveOffsetReg(Reg); MFI->setFrameOffsetReg(Reg); ScratchWaveOffsetReg = Reg; + FPAdjusted = true; break; } } } - return ScratchWaveOffsetReg; + return std::make_pair(ScratchWaveOffsetReg, FPAdjusted); } void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, @@ -415,7 +418,9 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, unsigned ScratchRsrcReg = getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF); - unsigned ScratchWaveOffsetReg = + unsigned ScratchWaveOffsetReg; + bool FPAdjusted; + std::tie(ScratchWaveOffsetReg, FPAdjusted) = getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF); // We need to insert initialization of the scratch resource descriptor. @@ -453,7 +458,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, if (&OtherBB == &MBB) continue; - if (OffsetRegUsed) + if (OffsetRegUsed || FPAdjusted) OtherBB.addLiveIn(ScratchWaveOffsetReg); if (ResourceRegUsed) diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h index 1954328714899..c644f4726e2ce 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h @@ -66,7 +66,7 @@ class SIFrameLowering final : public AMDGPUFrameLowering { SIMachineFunctionInfo *MFI, MachineFunction &MF) const; - unsigned getReservedPrivateSegmentWaveByteOffsetReg( + std::pair getReservedPrivateSegmentWaveByteOffsetReg( const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI, SIMachineFunctionInfo *MFI, MachineFunction &MF) const; diff --git a/llvm/test/CodeGen/AMDGPU/frame-lowering-fp-adjusted.mir b/llvm/test/CodeGen/AMDGPU/frame-lowering-fp-adjusted.mir new file mode 100644 index 0000000000000..348559fbd0daf --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/frame-lowering-fp-adjusted.mir @@ -0,0 +1,50 @@ +# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck %s + + +# CHECK-LABEL: name: foo +# CHECK: BUFFER_STORE_DWORD_OFFSET +--- | + + define amdgpu_kernel void @foo() #0 { + ret void + } + + attributes #0 = { "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" } +... +--- +name: foo +tracksRegLiveness: true +liveins: + - { reg: '$vgpr0' } + - { reg: '$sgpr4_sgpr5' } + - { reg: '$sgpr6_sgpr7' } + - { reg: '$sgpr8' } +frameInfo: + maxAlignment: 4 +stack: + - { id: 0, type: spill-slot, size: 4, alignment: 4 } +machineFunctionInfo: + explicitKernArgSize: 660 + maxKernArgAlign: 4 + isEntryFunction: true + waveLimiter: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + scratchWaveOffsetReg: '$sgpr101' + frameOffsetReg: '$sgpr101' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + dispatchPtr: { reg: '$sgpr4_sgpr5' } + kernargSegmentPtr: { reg: '$sgpr6_sgpr7' } + workGroupIDX: { reg: '$sgpr8' } + privateSegmentWaveByteOffset: { reg: '$sgpr9' } +body: | + bb.0: + successors: %bb.1 + liveins: $sgpr8, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7 + + bb.1: + liveins: $sgpr4, $sgpr5, $sgpr9, $sgpr22, $vgpr0, $sgpr6_sgpr7 + + renamable $vgpr2 = IMPLICIT_DEF + SI_SPILL_V32_SAVE killed $vgpr2, %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) From 85b9651edd53d7bfb8d3076a79992450a787ec6d Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Tue, 16 Jul 2019 16:07:10 +0000 Subject: [PATCH 251/451] [OPENMP][NVPTX]Fixed checks for cuda versions. Summary: We used CUDART_VERSION macro to check for the installed cuda version but this macro is defined in cuda_runtime_api.h, which is not used by project. Better to use CUDA_VERSION macro, which is defined in cuda.h. Also, added the check if this macro is defined. If macro is undefined, there is something wrong with the cuda configuration and we should not continue the compilation. This also fixes problems with runtime building in cuda 10+. Reviewers: grokos Subscribers: guansong, jdoerfert, caomhin, kkwli0, openmp-commits Tags: #openmp Differential Revision: https://reviews.llvm.org/D64648 llvm-svn: 366224 --- openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h index 646417d03cf87..f28284ded6b66 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h @@ -48,7 +48,9 @@ // Macros for Cuda intrinsics // In Cuda 9.0, the *_sync() version takes an extra argument 'mask'. // Also, __ballot(1) in Cuda 8.0 is replaced with __activemask(). -#if defined(CUDART_VERSION) && CUDART_VERSION >= 9000 +#ifndef CUDA_VERSION +#error CUDA_VERSION macro is undefined, something wrong with cuda. +#elif CUDA_VERSION >= 9000 #define __SHFL_SYNC(mask, var, srcLane) __shfl_sync((mask), (var), (srcLane)) #define __SHFL_DOWN_SYNC(mask, var, delta, width) \ __shfl_down_sync((mask), (var), (delta), (width)) @@ -58,7 +60,7 @@ #define __SHFL_DOWN_SYNC(mask, var, delta, width) \ __shfl_down((var), (delta), (width)) #define __ACTIVEMASK() __ballot(1) -#endif +#endif // CUDA_VERSION #define __SYNCTHREADS_N(n) asm volatile("bar.sync %0;" : : "r"(n) : "memory"); // Use original __syncthreads if compiled by nvcc or clang >= 9.0. From e6e33cf9f5682570e6d8d362001214d12b35b599 Mon Sep 17 00:00:00 2001 From: Julian Lettner Date: Tue, 16 Jul 2019 16:22:04 +0000 Subject: [PATCH 252/451] [TSan] Improve handling of stack pointer mangling in {set,long}jmp, pt.10 Remove now-unused assembly code for determining xor key on Linux/AArch64. This is the final commit of this refactoring. llvm-svn: 366225 --- .../lib/tsan/rtl/tsan_platform_linux.cc | 5 -- compiler-rt/lib/tsan/rtl/tsan_rtl_aarch64.S | 59 ------------------- 2 files changed, 64 deletions(-) diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc index c387416c20dd7..ec8606f65d5c2 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc +++ b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc @@ -67,11 +67,6 @@ extern "C" void *__libc_stack_end; void *__libc_stack_end = 0; #endif -#if SANITIZER_LINUX && defined(__aarch64__) -__tsan::uptr InitializeGuardPtr() __attribute__((visibility("hidden"))); -extern "C" __tsan::uptr _tsan_pointer_chk_guard; -#endif - #if SANITIZER_LINUX && defined(__aarch64__) && !SANITIZER_GO # define INIT_LONGJMP_XOR_KEY 1 #else diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_aarch64.S b/compiler-rt/lib/tsan/rtl/tsan_rtl_aarch64.S index 55487bd770538..c35897d3c3624 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl_aarch64.S +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_aarch64.S @@ -3,14 +3,6 @@ #include "sanitizer_common/sanitizer_asm.h" -#if !defined(__APPLE__) -.section .bss -.type __tsan_pointer_chk_guard, %object -ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(__tsan_pointer_chk_guard)) -__tsan_pointer_chk_guard: -.zero 8 -#endif - #if defined(__APPLE__) .align 2 @@ -40,57 +32,6 @@ _sigsetjmp$non_lazy_ptr: .align 3 #endif -#if !defined(__APPLE__) -// GLIBC mangles the function pointers in jmp_buf (used in {set,long}*jmp -// functions) by XORing them with a random guard pointer. For AArch64 it is a -// global variable rather than a TCB one (as for x86_64/powerpc) and althought -// its value is exported by the loader, it lies within a private GLIBC -// namespace (meaning it should be only used by GLIBC itself and the ABI is -// not stable). So InitializeGuardPtr obtains the pointer guard value by -// issuing a setjmp and checking the resulting pointers values against the -// original ones. -ASM_HIDDEN(_Z18InitializeGuardPtrv) -.global _Z18InitializeGuardPtrv -ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(_Z18InitializeGuardPtrv)) -_Z18InitializeGuardPtrv: - CFI_STARTPROC - // Allocates a jmp_buf for the setjmp call. - stp x29, x30, [sp, -336]! - CFI_DEF_CFA_OFFSET (336) - CFI_OFFSET (29, -336) - CFI_OFFSET (30, -328) - add x29, sp, 0 - CFI_DEF_CFA_REGISTER (29) - add x0, x29, 24 - - // Call libc setjmp that mangle the stack pointer value - adrp x1, :got:_ZN14__interception12real__setjmpE - ldr x1, [x1, #:got_lo12:_ZN14__interception12real__setjmpE] - ldr x1, [x1] - blr x1 - - // glibc setjmp mangles both the frame pointer (FP, pc+4 on blr) and the - // stack pointer (SP). FP will be placed on ((uintptr*)jmp_buf)[11] and - // SP at ((uintptr*)jmp_buf)[13]. - // The mangle operation is just 'value' xor 'pointer guard value' and - // if we know the original value (SP) and the expected one, we can derive - // the guard pointer value. - mov x0, sp - - // Loads the mangled SP pointer. - ldr x1, [x29, 128] - eor x0, x0, x1 - adrp x2, __tsan_pointer_chk_guard - str x0, [x2, #:lo12:__tsan_pointer_chk_guard] - ldp x29, x30, [sp], 336 - CFI_RESTORE (30) - CFI_RESTORE (19) - CFI_DEF_CFA (31, 0) - ret - CFI_ENDPROC -ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(_Z18InitializeGuardPtrv)) -#endif - ASM_HIDDEN(__tsan_setjmp) .comm _ZN14__interception11real_setjmpE,8,8 .globl ASM_SYMBOL_INTERCEPTOR(setjmp) From e8ced86debe6cbf5d998796b0b969a782c9d5cba Mon Sep 17 00:00:00 2001 From: Stefan Granitz Date: Tue, 16 Jul 2019 16:57:45 +0000 Subject: [PATCH 253/451] [CMake] Add Apple-lldb-Xcode.cmake cache that avoids install options llvm-svn: 366226 --- lldb/cmake/caches/Apple-lldb-Xcode.cmake | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 lldb/cmake/caches/Apple-lldb-Xcode.cmake diff --git a/lldb/cmake/caches/Apple-lldb-Xcode.cmake b/lldb/cmake/caches/Apple-lldb-Xcode.cmake new file mode 100644 index 0000000000000..6602b07f7868b --- /dev/null +++ b/lldb/cmake/caches/Apple-lldb-Xcode.cmake @@ -0,0 +1,4 @@ +include(${CMAKE_CURRENT_LIST_DIR}/Apple-lldb-base.cmake) + +set(LLDB_BUILD_FRAMEWORK ON CACHE BOOL "") +set(CMAKE_OSX_DEPLOYMENT_TARGET 10.11 CACHE STRING "") From 97b4d7a8e14f37319676f750fa57d68eb09c0e16 Mon Sep 17 00:00:00 2001 From: Mitch Phillips Date: Tue, 16 Jul 2019 17:13:02 +0000 Subject: [PATCH 254/451] Removed -mno-omit-leaf-frame-pointer from flags. Removes -mno-omit-leaf-frame-pointer from Scudo and GWP-ASan's CFlags. Attempt to fix the sanitizer buildbots. llvm-svn: 366228 --- compiler-rt/lib/gwp_asan/CMakeLists.txt | 2 -- compiler-rt/lib/scudo/CMakeLists.txt | 4 ---- 2 files changed, 6 deletions(-) diff --git a/compiler-rt/lib/gwp_asan/CMakeLists.txt b/compiler-rt/lib/gwp_asan/CMakeLists.txt index 952acb1830411..94c5336ce1b3b 100644 --- a/compiler-rt/lib/gwp_asan/CMakeLists.txt +++ b/compiler-rt/lib/gwp_asan/CMakeLists.txt @@ -25,8 +25,6 @@ set(GWP_ASAN_CFLAGS -fno-rtti -fno-exceptions -nostdinc++ -pthread) append_list_if(COMPILER_RT_HAS_FPIC_FLAG -fPIC GWP_ASAN_CFLAGS) append_list_if(COMPILER_RT_HAS_OMIT_FRAME_POINTER_FLAG -fno-omit-frame-pointer GWP_ASAN_CFLAGS) -append_list_if(COMPILER_RT_HAS_OMIT_FRAME_POINTER_FLAG - -mno-omit-leaf-frame-pointer GWP_ASAN_CFLAGS) # Remove -stdlib= which is unused when passing -nostdinc++. string(REGEX REPLACE "-stdlib=[a-zA-Z+]*" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) diff --git a/compiler-rt/lib/scudo/CMakeLists.txt b/compiler-rt/lib/scudo/CMakeLists.txt index 9ee615c787de7..2a560b8fcb7f0 100644 --- a/compiler-rt/lib/scudo/CMakeLists.txt +++ b/compiler-rt/lib/scudo/CMakeLists.txt @@ -14,10 +14,6 @@ append_list_if(COMPILER_RT_HAS_LIBPTHREAD pthread SCUDO_MINIMAL_DYNAMIC_LIBS) append_list_if(COMPILER_RT_HAS_LIBLOG log SCUDO_MINIMAL_DYNAMIC_LIBS) append_list_if(COMPILER_RT_HAS_OMIT_FRAME_POINTER_FLAG -fno-omit-frame-pointer SCUDO_CFLAGS) -if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") - append_list_if(COMPILER_RT_HAS_OMIT_FRAME_POINTER_FLAG - -mno-omit-leaf-frame-pointer SCUDO_CFLAGS) -endif() set(SCUDO_DYNAMIC_LINK_FLAGS ${SANITIZER_COMMON_LINK_FLAGS}) # Use gc-sections by default to avoid unused code being pulled in. From 1ff553578551766801cbd25dd2c659dc8594b96b Mon Sep 17 00:00:00 2001 From: Jonas Hahnfeld Date: Tue, 16 Jul 2019 17:16:43 +0000 Subject: [PATCH 255/451] [OpenMP] Move header inclusion out of 'extern "C"' This leads to problems when compiling C++ code with libc++ for Nvidia GPUs because Clang now uses wrappers for math functions that might include C++ templates not allowed in 'extern "C"'. Differentiel Revision: https://reviews.llvm.org/D64625 llvm-svn: 366229 --- openmp/runtime/src/include/omp.h.var | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/openmp/runtime/src/include/omp.h.var b/openmp/runtime/src/include/omp.h.var index 54ba1f545b472..2246e7012bee2 100644 --- a/openmp/runtime/src/include/omp.h.var +++ b/openmp/runtime/src/include/omp.h.var @@ -15,6 +15,9 @@ #ifndef __OMP_H # define __OMP_H +# include +# include + # define KMP_VERSION_MAJOR @LIBOMP_VERSION_MAJOR@ # define KMP_VERSION_MINOR @LIBOMP_VERSION_MINOR@ # define KMP_VERSION_BUILD @LIBOMP_VERSION_BUILD@ @@ -134,8 +137,6 @@ extern int __KAI_KMPC_CONVENTION omp_get_team_num (void); extern int __KAI_KMPC_CONVENTION omp_get_cancellation (void); -# include -# include /* OpenMP 4.5 */ extern int __KAI_KMPC_CONVENTION omp_get_initial_device (void); extern void* __KAI_KMPC_CONVENTION omp_target_alloc(size_t, int); From c65a9db43e17f0acdd39b76498d1c23e4a70f9a1 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 16 Jul 2019 17:22:21 +0000 Subject: [PATCH 256/451] AMDGPU: Fix missing immarg for mfma intrinsics llvm-svn: 366230 --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 60 ++++++++++++------- llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll | 23 +++++++ 2 files changed, 63 insertions(+), 20 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 1cde3afd69e1d..bad4216173d0a 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1675,83 +1675,103 @@ def int_amdgcn_global_atomic_fadd : AMDGPUGlobalAtomicNoRtn; // llvm.amdgcn.mfma.f32.* vdst, srcA, srcB, srcC, cbsz, abid, blgp def int_amdgcn_mfma_f32_32x32x1f32 : Intrinsic<[llvm_v32f32_ty], [llvm_float_ty, llvm_float_ty, llvm_v32f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_mfma_f32_16x16x1f32 : Intrinsic<[llvm_v16f32_ty], [llvm_float_ty, llvm_float_ty, llvm_v16f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_mfma_f32_4x4x1f32 : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_v4f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_mfma_f32_32x32x2f32 : Intrinsic<[llvm_v16f32_ty], [llvm_float_ty, llvm_float_ty, llvm_v16f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_mfma_f32_16x16x4f32 : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_v4f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_mfma_f32_32x32x4f16 : Intrinsic<[llvm_v32f32_ty], [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v32f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_mfma_f32_16x16x4f16 : Intrinsic<[llvm_v16f32_ty], [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v16f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_mfma_f32_4x4x4f16 : Intrinsic<[llvm_v4f32_ty], [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v4f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_mfma_f32_32x32x8f16 : Intrinsic<[llvm_v16f32_ty], [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v16f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_mfma_f32_16x16x16f16 : Intrinsic<[llvm_v4f32_ty], [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v4f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_mfma_i32_32x32x4i8 : Intrinsic<[llvm_v32i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_v32i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_mfma_i32_16x16x4i8 : Intrinsic<[llvm_v16i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_v16i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_mfma_i32_4x4x4i8 : Intrinsic<[llvm_v4i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_mfma_i32_32x32x8i8 : Intrinsic<[llvm_v16i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_v16i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_mfma_i32_16x16x16i8 : Intrinsic<[llvm_v4i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_mfma_f32_32x32x2bf16 : Intrinsic<[llvm_v32f32_ty], [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v32f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_mfma_f32_16x16x2bf16 : Intrinsic<[llvm_v16f32_ty], [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v16f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_mfma_f32_4x4x2bf16 : Intrinsic<[llvm_v4f32_ty], [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v4f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_mfma_f32_32x32x4bf16 : Intrinsic<[llvm_v16f32_ty], [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v16f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; def int_amdgcn_mfma_f32_16x16x8bf16 : Intrinsic<[llvm_v4f32_ty], [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v4f32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrConvergent, IntrNoMem, ImmArg<3>, ImmArg<4>, ImmArg<5>]>; //===----------------------------------------------------------------------===// // Special Intrinsics for backend internal use only. No frontend diff --git a/llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll b/llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll index a72d1da68a212..76098385b6a23 100644 --- a/llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll +++ b/llvm/test/Verifier/AMDGPU/intrinsic-immarg.ll @@ -674,3 +674,26 @@ define void @test_interp_p2_f16(float %arg0, float %arg1, i32 %arg2, i32 %arg3, ret void } + +declare <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x i32>, i32, i32, i32) +define void @test_mfma_f32_32x32x1f32(float %arg0, float %arg1, <32 x i32> %arg2, i32 %arg3, i32 %arg4, i32 %arg5) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg3 + ; CHECK-NEXT: %val0 = call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float %arg0, float %arg1, <32 x i32> %arg2, i32 %arg3, i32 2, i32 3) + %val0 = call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float %arg0, float %arg1, <32 x i32> %arg2, i32 %arg3, i32 2, i32 3) + store volatile <32 x i32> %val0, <32 x i32> addrspace(1)* undef + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg4 + ; CHECK-NEXT: %val1 = call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float %arg0, float %arg1, <32 x i32> %arg2, i32 1, i32 %arg4, i32 3) + %val1 = call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float %arg0, float %arg1, <32 x i32> %arg2, i32 1, i32 %arg4, i32 3) + store volatile <32 x i32> %val1, <32 x i32> addrspace(1)* undef + + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg5 + ; CHECK-NEXT: %val2 = call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float %arg0, float %arg1, <32 x i32> %arg2, i32 1, i32 2, i32 %arg5) + %val2 = call <32 x i32> @llvm.amdgcn.mfma.f32.32x32x1f32(float %arg0, float %arg1, <32 x i32> %arg2, i32 1, i32 2, i32 %arg5) + store volatile <32 x i32> %val2, <32 x i32> addrspace(1)* undef + + ret void +} From 4754814c5a68cf609f83b491feb38ad53c5acd4e Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 16 Jul 2019 17:24:33 +0000 Subject: [PATCH 257/451] fix unnamed fiefield issue and add tests for __builtin_preserve_access_index intrinsic The original commit is r366076. It is temporarily reverted (r366155) due to test failure. This resubmit makes test more robust by accepting regex instead of hardcoded names/references in several places. This is a followup patch for https://reviews.llvm.org/D61809. Handle unnamed bitfield properly and add more test cases. Fixed the unnamed bitfield issue. The unnamed bitfield is ignored by debug info, so we need to ignore such a struct/union member when we try to get the member index in the debug info. D61809 contains two test cases but not enough as it does not checking generated IRs in the fine grain level, and also it does not have semantics checking tests. This patch added unit tests for both code gen and semantics checking for the new intrinsic. Signed-off-by: Yonghong Song llvm-svn: 366231 --- clang/lib/CodeGen/CGExpr.cpp | 21 ++- clang/lib/CodeGen/CodeGenFunction.h | 3 + .../CodeGen/builtin-preserve-access-index.c | 177 ++++++++++++++++++ .../test/Sema/builtin-preserve-access-index.c | 13 ++ 4 files changed, 212 insertions(+), 2 deletions(-) create mode 100644 clang/test/CodeGen/builtin-preserve-access-index.c create mode 100644 clang/test/Sema/builtin-preserve-access-index.c diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 4d19a12e5cb05..5a4b1188b7114 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -3892,6 +3892,23 @@ LValue CodeGenFunction::EmitLValueForLambdaField(const FieldDecl *Field) { return EmitLValueForField(LambdaLV, Field); } +/// Get the field index in the debug info. The debug info structure/union +/// will ignore the unnamed bitfields. +unsigned CodeGenFunction::getDebugInfoFIndex(const RecordDecl *Rec, + unsigned FieldIndex) { + unsigned I = 0, Skipped = 0; + + for (auto F : Rec->getDefinition()->fields()) { + if (I == FieldIndex) + break; + if (F->isUnnamedBitfield()) + Skipped++; + I++; + } + + return FieldIndex - Skipped; +} + /// Get the address of a zero-sized field within a record. The resulting /// address doesn't necessarily have the right type. static Address emitAddrOfZeroSizeField(CodeGenFunction &CGF, Address Base, @@ -3931,7 +3948,7 @@ static Address emitPreserveStructAccess(CodeGenFunction &CGF, Address base, CGF.CGM.getTypes().getCGRecordLayout(rec).getLLVMFieldNo(field); return CGF.Builder.CreatePreserveStructAccessIndex( - base, idx, field->getFieldIndex(), DbgInfo); + base, idx, CGF.getDebugInfoFIndex(rec, field->getFieldIndex()), DbgInfo); } static bool hasAnyVptr(const QualType Type, const ASTContext &Context) { @@ -4048,7 +4065,7 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, getContext().getRecordType(rec), rec->getLocation()); addr = Address( Builder.CreatePreserveUnionAccessIndex( - addr.getPointer(), field->getFieldIndex(), DbgInfo), + addr.getPointer(), getDebugInfoFIndex(rec, field->getFieldIndex()), DbgInfo), addr.getAlignment()); } } else { diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index bd9e14206a09e..06ef2dff7e9f5 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -2652,6 +2652,9 @@ class CodeGenFunction : public CodeGenTypeCache { /// Converts Location to a DebugLoc, if debug information is enabled. llvm::DebugLoc SourceLocToDebugLoc(SourceLocation Location); + /// Get the record field index as represented in debug info. + unsigned getDebugInfoFIndex(const RecordDecl *Rec, unsigned FieldIndex); + //===--------------------------------------------------------------------===// // Declaration Emission diff --git a/clang/test/CodeGen/builtin-preserve-access-index.c b/clang/test/CodeGen/builtin-preserve-access-index.c new file mode 100644 index 0000000000000..954a3b827d25a --- /dev/null +++ b/clang/test/CodeGen/builtin-preserve-access-index.c @@ -0,0 +1,177 @@ +// RUN: %clang -target x86_64 -emit-llvm -S -g %s -o - | FileCheck %s + +#define _(x) (__builtin_preserve_access_index(x)) + +const void *unit1(const void *arg) { + return _(arg); +} +// CHECK: define dso_local i8* @unit1 +// CHECK-NOT: llvm.preserve.array.access.index +// CHECK-NOT: llvm.preserve.struct.access.index +// CHECK-NOT: llvm.preserve.union.access.index + +const void *unit2(void) { + return _((const void *)0xffffffffFFFF0000ULL); +} +// CHECK: define dso_local i8* @unit2 +// CHECK-NOT: llvm.preserve.array.access.index +// CHECK-NOT: llvm.preserve.struct.access.index +// CHECK-NOT: llvm.preserve.union.access.index + +const void *unit3(const int *arg) { + return _(arg + 1); +} +// CHECK: define dso_local i8* @unit3 +// CHECK-NOT: llvm.preserve.array.access.index +// CHECK-NOT: llvm.preserve.struct.access.index +// CHECK-NOT: llvm.preserve.union.access.index + +const void *unit4(const int *arg) { + return _(&arg[1]); +} +// CHECK: define dso_local i8* @unit4 +// CHECK-NOT: getelementptr +// CHECK: call i32* @llvm.preserve.array.access.index.p0i32.p0i32(i32* %{{[0-9a-z]+}}, i32 0, i32 1) + +const void *unit5(const int *arg[5]) { + return _(&arg[1][2]); +} +// CHECK: define dso_local i8* @unit5 +// CHECK-NOT: getelementptr +// CHECK: call i32** @llvm.preserve.array.access.index.p0p0i32.p0p0i32(i32** %{{[0-9a-z]+}}, i32 0, i32 1) +// CHECK-NOT: getelementptr +// CHECK: call i32* @llvm.preserve.array.access.index.p0i32.p0i32(i32* %{{[0-9a-z]+}}, i32 0, i32 2) + +struct s1 { + char a; + int b; +}; + +struct s2 { + char a1:1; + char a2:1; + int b; +}; + +struct s3 { + char a1:1; + char a2:1; + char :6; + int b; +}; + +const void *unit6(struct s1 *arg) { + return _(&arg->a); +} +// CHECK: define dso_local i8* @unit6 +// CHECK-NOT: getelementptr +// CHECK: call i8* @llvm.preserve.struct.access.index.p0i8.p0s_struct.s1s(%struct.s1* %{{[0-9a-z]+}}, i32 0, i32 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[STRUCT_S1:[0-9]+]] + +const void *unit7(struct s1 *arg) { + return _(&arg->b); +} +// CHECK: define dso_local i8* @unit7 +// CHECK-NOT: getelementptr +// CHECK: call i32* @llvm.preserve.struct.access.index.p0i32.p0s_struct.s1s(%struct.s1* %{{[0-9a-z]+}}, i32 1, i32 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[STRUCT_S1]] + +const void *unit8(struct s2 *arg) { + return _(&arg->b); +} +// CHECK: define dso_local i8* @unit8 +// CHECK-NOT: getelementptr +// CHECK: call i32* @llvm.preserve.struct.access.index.p0i32.p0s_struct.s2s(%struct.s2* %{{[0-9a-z]+}}, i32 1, i32 2), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[STRUCT_S2:[0-9]+]] + +const void *unit9(struct s3 *arg) { + return _(&arg->b); +} +// CHECK: define dso_local i8* @unit9 +// CHECK-NOT: getelementptr +// CHECK: call i32* @llvm.preserve.struct.access.index.p0i32.p0s_struct.s3s(%struct.s3* %{{[0-9a-z]+}}, i32 1, i32 2), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[STRUCT_S3:[0-9]+]] + +union u1 { + char a; + int b; +}; + +union u2 { + char a; + int :32; + int b; +}; + +const void *unit10(union u1 *arg) { + return _(&arg->a); +} +// CHECK: define dso_local i8* @unit10 +// CHECK-NOT: getelementptr +// CHECK: call %union.u1* @llvm.preserve.union.access.index.p0s_union.u1s.p0s_union.u1s(%union.u1* %{{[0-9a-z]+}}, i32 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[UNION_U1:[0-9]+]] + +const void *unit11(union u1 *arg) { + return _(&arg->b); +} +// CHECK: define dso_local i8* @unit11 +// CHECK-NOT: getelementptr +// CHECK: call %union.u1* @llvm.preserve.union.access.index.p0s_union.u1s.p0s_union.u1s(%union.u1* %{{[0-9a-z]+}}, i32 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[UNION_U1]] + +const void *unit12(union u2 *arg) { + return _(&arg->b); +} +// CHECK: define dso_local i8* @unit12 +// CHECK-NOT: getelementptr +// CHECK: call %union.u2* @llvm.preserve.union.access.index.p0s_union.u2s.p0s_union.u2s(%union.u2* %{{[0-9a-z]+}}, i32 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[UNION_U2:[0-9]+]] + +struct s4 { + char d; + union u { + int b[4]; + char a; + } c; +}; + +union u3 { + struct s { + int b[4]; + } c; + char a; +}; + +const void *unit13(struct s4 *arg) { + return _(&arg->c.b[2]); +} +// CHECK: define dso_local i8* @unit13 +// CHECK: call %union.u* @llvm.preserve.struct.access.index.p0s_union.us.p0s_struct.s4s(%struct.s4* %{{[0-9a-z]+}}, i32 1, i32 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[STRUCT_S4:[0-9]+]] +// CHECK: call %union.u* @llvm.preserve.union.access.index.p0s_union.us.p0s_union.us(%union.u* %{{[0-9a-z]+}}, i32 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[UNION_I_U:[0-9]+]] +// CHECK: call i32* @llvm.preserve.array.access.index.p0i32.p0a4i32([4 x i32]* %{{[0-9a-z]+}}, i32 1, i32 2) + +const void *unit14(union u3 *arg) { + return _(&arg->c.b[2]); +} +// CHECK: define dso_local i8* @unit14 +// CHECK: call %union.u3* @llvm.preserve.union.access.index.p0s_union.u3s.p0s_union.u3s(%union.u3* %{{[0-9a-z]+}}, i32 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[UNION_U3:[0-9]+]] +// CHECK: call [4 x i32]* @llvm.preserve.struct.access.index.p0a4i32.p0s_struct.ss(%struct.s* %{{[0-9a-z]+}}, i32 0, i32 0), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[STRUCT_I_S:[0-9]+]] +// CHECK: call i32* @llvm.preserve.array.access.index.p0i32.p0a4i32([4 x i32]* %{{[0-9a-z]+}}, i32 1, i32 2) + +const void *unit15(struct s4 *arg) { + return _(&arg[2].c.a); +} +// CHECK: define dso_local i8* @unit15 +// CHECK: call %struct.s4* @llvm.preserve.array.access.index.p0s_struct.s4s.p0s_struct.s4s(%struct.s4* %{{[0-9a-z]+}}, i32 0, i32 2) +// CHECK: call %union.u* @llvm.preserve.struct.access.index.p0s_union.us.p0s_struct.s4s(%struct.s4* %{{[0-9a-z]+}}, i32 1, i32 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[STRUCT_S4]] +// CHECK: call %union.u* @llvm.preserve.union.access.index.p0s_union.us.p0s_union.us(%union.u* %{{[0-9a-z]+}}, i32 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[UNION_I_U]] + +const void *unit16(union u3 *arg) { + return _(&arg[2].a); +} +// CHECK: define dso_local i8* @unit16 +// CHECK: call %union.u3* @llvm.preserve.array.access.index.p0s_union.u3s.p0s_union.u3s(%union.u3* %{{[0-9a-z]+}}, i32 0, i32 2) +// CHECK: call %union.u3* @llvm.preserve.union.access.index.p0s_union.u3s.p0s_union.u3s(%union.u3* %{{[0-9a-z]+}}, i32 1), !dbg !{{[0-9]+}}, !llvm.preserve.access.index ![[UNION_U3]] + +// CHECK: ![[STRUCT_S1]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "s1" +// CHECK: ![[STRUCT_S2]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "s2" +// CHECK: ![[STRUCT_S3]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "s3" +// CHECK: ![[UNION_U1]] = distinct !DICompositeType(tag: DW_TAG_union_type, name: "u1" +// CHECK: ![[UNION_U2]] = distinct !DICompositeType(tag: DW_TAG_union_type, name: "u2" +// CHECK: ![[STRUCT_S4]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "s4" +// CHECK: ![[UNION_I_U]] = distinct !DICompositeType(tag: DW_TAG_union_type, name: "u" +// CHECK: ![[UNION_U3]] = distinct !DICompositeType(tag: DW_TAG_union_type, name: "u3" +// CHECK: ![[STRUCT_I_S]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "s" diff --git a/clang/test/Sema/builtin-preserve-access-index.c b/clang/test/Sema/builtin-preserve-access-index.c new file mode 100644 index 0000000000000..c10ceb5145b8c --- /dev/null +++ b/clang/test/Sema/builtin-preserve-access-index.c @@ -0,0 +1,13 @@ +// RUN: %clang_cc1 -x c -triple x86_64-pc-linux-gnu -dwarf-version=4 -fsyntax-only -verify %s + +const void *invalid1(const int *arg) { + return __builtin_preserve_access_index(&arg[1], 1); // expected-error {{too many arguments to function call, expected 1, have 2}} +} + +void *invalid2(const int *arg) { + return __builtin_preserve_access_index(&arg[1]); // expected-warning {{returning 'const void *' from a function with result type 'void *' discards qualifiers}} +} + +const void *invalid3(const int *arg) { + return __builtin_preserve_access_index(1); // expected-warning {{incompatible integer to pointer conversion passing 'int' to parameter of type 'const void *'}} +} From c26e27d802c146d08446eab0d47e1dd2be233506 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 16 Jul 2019 17:29:03 +0000 Subject: [PATCH 258/451] [NFC] Fix -Wreorder warning in TBB backend llvm-svn: 366232 --- pstl/include/pstl/internal/parallel_backend_tbb.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pstl/include/pstl/internal/parallel_backend_tbb.h b/pstl/include/pstl/internal/parallel_backend_tbb.h index 8ac385e3992db..6e7cb66077e6e 100644 --- a/pstl/include/pstl/internal/parallel_backend_tbb.h +++ b/pstl/include/pstl/internal/parallel_backend_tbb.h @@ -862,8 +862,8 @@ class __stable_sort_task : public tbb::task __stable_sort_task(_RandomAccessIterator1 __xs, _RandomAccessIterator1 __xe, _RandomAccessIterator2 __zs, bool __root, _Compare __comp, _LeafSort __leaf_sort, _SizeType __nsort, _RandomAccessIterator1 __x_beg, _RandomAccessIterator2 __z_beg) - : _M_xs(__xs), _M_xe(__xe), _M_x_beg(__x_beg), _M_zs(__zs), _M_z_beg(__z_beg), _M_root(__root), _M_comp(__comp), - _M_leaf_sort(__leaf_sort), _M_nsort(__nsort) + : _M_xs(__xs), _M_xe(__xe), _M_x_beg(__x_beg), _M_zs(__zs), _M_z_beg(__z_beg), _M_comp(__comp), + _M_leaf_sort(__leaf_sort), _M_root(__root), _M_nsort(__nsort) { } }; From 3559fcd1149b339757b10365a4b24161b6ef71a5 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 16 Jul 2019 17:29:09 +0000 Subject: [PATCH 259/451] [pstl] Use std::transform_reduce instead of hand-rolled implementation llvm-svn: 366233 --- pstl/include/pstl/internal/numeric_impl.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pstl/include/pstl/internal/numeric_impl.h b/pstl/include/pstl/internal/numeric_impl.h index 625bb9441c8a6..fe1f222d437e4 100644 --- a/pstl/include/pstl/internal/numeric_impl.h +++ b/pstl/include/pstl/internal/numeric_impl.h @@ -93,11 +93,7 @@ _Tp __brick_transform_reduce(_ForwardIterator __first, _ForwardIterator __last, _Tp __init, _BinaryOperation __binary_op, _UnaryOperation __unary_op, /*is_vector=*/std::false_type) noexcept { - for (; __first != __last; ++__first) - { - __init = __binary_op(__init, __unary_op(*__first)); - } - return __init; + return std::transform_reduce(__first, __last, __init, __binary_op, __unary_op); } template From c6fd5abecc42fb56525cb56b62f95603863ae6a0 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 16 Jul 2019 17:38:50 +0000 Subject: [PATCH 260/451] AMDGPU: Redefine load PatFrags Rewrite PatFrags using the new PatFrag address space matching in tablegen. These will now work with both SelectionDAG and GlobalISel. llvm-svn: 366234 --- llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 167 +++++++++++-------- llvm/lib/Target/AMDGPU/BUFInstructions.td | 4 +- llvm/lib/Target/AMDGPU/FLATInstructions.td | 8 +- llvm/lib/Target/AMDGPU/R600Instructions.td | 2 + 4 files changed, 105 insertions(+), 76 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index 9e9510e0fa4a0..d470b3cd51486 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -11,6 +11,18 @@ // //===----------------------------------------------------------------------===// +class AddressSpacesImpl { + int Flat = 0; + int Global = 1; + int Region = 2; + int Local = 3; + int Constant = 4; + int Private = 5; +} + +def AddrSpaces : AddressSpacesImpl; + + class AMDGPUInst pattern = []> : Instruction { field bit isRegisterLoad = 0; @@ -323,6 +335,10 @@ def TEX_SHADOW_ARRAY : PatLeaf< // Load/Store Pattern Fragments //===----------------------------------------------------------------------===// +class AddressSpaceList AS> { + list AddrSpaces = AS; +} + class Aligned8Bytes : PatFrag (N)->getAlignment() % 8 == 0; }]>; @@ -341,25 +357,25 @@ class StoreHi16 : PatFrag < (ops node:$value, node:$ptr), (op (srl node:$value, (i32 16)), node:$ptr) >; -class PrivateAddress : CodePatPred<[{ - return cast(N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; -}]>; +def LoadAddress_constant : AddressSpaceList<[ AddrSpaces.Constant ]>; +def LoadAddress_global : AddressSpaceList<[ AddrSpaces.Global, AddrSpaces.Constant ]>; +def StoreAddress_global : AddressSpaceList<[ AddrSpaces.Global ]>; -class ConstantAddress : CodePatPred<[{ - return cast(N)->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; -}]>; +def LoadAddress_flat : AddressSpaceList<[ AddrSpaces.Flat, + AddrSpaces.Global, + AddrSpaces.Constant ]>; +def StoreAddress_flat : AddressSpaceList<[ AddrSpaces.Flat, AddrSpaces.Global ]>; -class LocalAddress : CodePatPred<[{ - return cast(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; -}]>; +def LoadAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>; +def StoreAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>; + +def LoadAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>; +def StoreAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>; + +def LoadAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>; +def StoreAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>; -class RegionAddress : CodePatPred<[{ - return cast(N)->getAddressSpace() == AMDGPUAS::REGION_ADDRESS; -}]>; -class GlobalAddress : CodePatPred<[{ - return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; -}]>; class GlobalLoadAddress : CodePatPred<[{ auto AS = cast(N)->getAddressSpace(); @@ -373,37 +389,86 @@ class FlatLoadAddress : CodePatPred<[{ AS == AMDGPUAS::CONSTANT_ADDRESS; }]>; +class GlobalAddress : CodePatPred<[{ + return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; +}]>; + +class PrivateAddress : CodePatPred<[{ + return cast(N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; +}]>; + +class LocalAddress : CodePatPred<[{ + return cast(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; +}]>; + +class RegionAddress : CodePatPred<[{ + return cast(N)->getAddressSpace() == AMDGPUAS::REGION_ADDRESS; +}]>; + class FlatStoreAddress : CodePatPred<[{ const auto AS = cast(N)->getAddressSpace(); return AS == AMDGPUAS::FLAT_ADDRESS || AS == AMDGPUAS::GLOBAL_ADDRESS; }]>; -class PrivateLoad : LoadFrag , PrivateAddress; +// TODO: Remove these when stores to new PatFrag format. class PrivateStore : StoreFrag , PrivateAddress; - -class LocalLoad : LoadFrag , LocalAddress; class LocalStore : StoreFrag , LocalAddress; - -class RegionLoad : LoadFrag , RegionAddress; class RegionStore : StoreFrag , RegionAddress; - -class GlobalLoad : LoadFrag, GlobalLoadAddress; class GlobalStore : StoreFrag, GlobalAddress; - -class FlatLoad : LoadFrag , FlatLoadAddress; class FlatStore : StoreFrag , FlatStoreAddress; -class ConstantLoad : LoadFrag , ConstantAddress; +foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in { +let AddressSpaces = !cast("LoadAddress_"#as).AddrSpaces in { + +def load_#as : PatFrag<(ops node:$ptr), (unindexedload node:$ptr)> { + let IsLoad = 1; + let IsNonExtLoad = 1; +} + +def extloadi8_#as : PatFrag<(ops node:$ptr), (extload node:$ptr)> { + let IsLoad = 1; + let MemoryVT = i8; +} + +def extloadi16_#as : PatFrag<(ops node:$ptr), (extload node:$ptr)> { + let IsLoad = 1; + let MemoryVT = i16; +} + +def sextloadi8_#as : PatFrag<(ops node:$ptr), (sextload node:$ptr)> { + let IsLoad = 1; + let MemoryVT = i8; +} + +def sextloadi16_#as : PatFrag<(ops node:$ptr), (sextload node:$ptr)> { + let IsLoad = 1; + let MemoryVT = i16; +} + +def zextloadi8_#as : PatFrag<(ops node:$ptr), (zextload node:$ptr)> { + let IsLoad = 1; + let MemoryVT = i8; +} + +def zextloadi16_#as : PatFrag<(ops node:$ptr), (zextload node:$ptr)> { + let IsLoad = 1; + let MemoryVT = i16; +} + +def atomic_load_32_#as : PatFrag<(ops node:$ptr), (atomic_load_32 node:$ptr)> { + let IsAtomic = 1; + let MemoryVT = i32; +} + +def atomic_load_64_#as : PatFrag<(ops node:$ptr), (atomic_load_64 node:$ptr)> { + let IsAtomic = 1; + let MemoryVT = i64; +} -def load_private : PrivateLoad ; -def extloadi8_private : PrivateLoad ; -def zextloadi8_private : PrivateLoad ; -def sextloadi8_private : PrivateLoad ; -def extloadi16_private : PrivateLoad ; -def zextloadi16_private : PrivateLoad ; -def sextloadi16_private : PrivateLoad ; +} // End let AddressSpaces = ... +} // End foreach AddrSpace def store_private : PrivateStore ; def truncstorei8_private : PrivateStore; @@ -411,16 +476,6 @@ def truncstorei16_private : PrivateStore ; def store_hi16_private : StoreHi16 , PrivateAddress; def truncstorei8_hi16_private : StoreHi16, PrivateAddress; - -def load_global : GlobalLoad ; -def sextloadi8_global : GlobalLoad ; -def extloadi8_global : GlobalLoad ; -def zextloadi8_global : GlobalLoad ; -def sextloadi16_global : GlobalLoad ; -def extloadi16_global : GlobalLoad ; -def zextloadi16_global : GlobalLoad ; -def atomic_load_global : GlobalLoad; - def store_global : GlobalStore ; def truncstorei8_global : GlobalStore ; def truncstorei16_global : GlobalStore ; @@ -428,16 +483,6 @@ def store_atomic_global : GlobalStore; def truncstorei8_hi16_global : StoreHi16 , GlobalAddress; def truncstorei16_hi16_global : StoreHi16 , GlobalAddress; -def load_local : LocalLoad ; -def extloadi8_local : LocalLoad ; -def zextloadi8_local : LocalLoad ; -def sextloadi8_local : LocalLoad ; -def extloadi16_local : LocalLoad ; -def zextloadi16_local : LocalLoad ; -def sextloadi16_local : LocalLoad ; -def atomic_load_32_local : LocalLoad; -def atomic_load_64_local : LocalLoad; - def store_local : LocalStore ; def truncstorei8_local : LocalStore ; def truncstorei16_local : LocalStore ; @@ -461,15 +506,6 @@ def store_align16_local : Aligned16Bytes < (ops node:$val, node:$ptr), (store_local node:$val, node:$ptr) >; -def load_flat : FlatLoad ; -def extloadi8_flat : FlatLoad ; -def zextloadi8_flat : FlatLoad ; -def sextloadi8_flat : FlatLoad ; -def extloadi16_flat : FlatLoad ; -def zextloadi16_flat : FlatLoad ; -def sextloadi16_flat : FlatLoad ; -def atomic_load_flat : FlatLoad; - def store_flat : FlatStore ; def truncstorei8_flat : FlatStore ; def truncstorei16_flat : FlatStore ; @@ -478,15 +514,6 @@ def truncstorei8_hi16_flat : StoreHi16, FlatStoreAddress; def truncstorei16_hi16_flat : StoreHi16, FlatStoreAddress; -def constant_load : ConstantLoad; -def sextloadi8_constant : ConstantLoad ; -def extloadi8_constant : ConstantLoad ; -def zextloadi8_constant : ConstantLoad ; -def sextloadi16_constant : ConstantLoad ; -def extloadi16_constant : ConstantLoad ; -def zextloadi16_constant : ConstantLoad ; - - class local_binary_atomic_op : PatFrag<(ops node:$ptr, node:$value), (atomic_op node:$ptr, node:$value), [{ diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 4ff9aeb2e314e..5b6c8a7ed96fc 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1445,8 +1445,8 @@ def : MUBUFLoad_PatternADDR64 ; def : MUBUFLoad_PatternADDR64 ; -defm : MUBUFLoad_Atomic_Pattern ; -defm : MUBUFLoad_Atomic_Pattern ; +defm : MUBUFLoad_Atomic_Pattern ; +defm : MUBUFLoad_Atomic_Pattern ; } // End SubtargetPredicate = isGFX6GFX7 multiclass MUBUFLoad_Pattern ; def : FlatLoadPat ; def : FlatLoadPat ; -def : FlatLoadAtomicPat ; -def : FlatLoadAtomicPat ; +def : FlatLoadAtomicPat ; +def : FlatLoadAtomicPat ; def : FlatStorePat ; def : FlatStorePat ; @@ -868,8 +868,8 @@ def : FlatLoadSignedPat ; def : FlatLoadSignedPat ; def : FlatLoadSignedPat ; -def : FlatLoadAtomicPat ; -def : FlatLoadAtomicPat ; +def : FlatLoadAtomicPat ; +def : FlatLoadAtomicPat ; def : FlatStoreSignedPat ; def : FlatStoreSignedPat ; diff --git a/llvm/lib/Target/AMDGPU/R600Instructions.td b/llvm/lib/Target/AMDGPU/R600Instructions.td index d3ce7ffd673c5..f40eece859ee7 100644 --- a/llvm/lib/Target/AMDGPU/R600Instructions.td +++ b/llvm/lib/Target/AMDGPU/R600Instructions.td @@ -296,6 +296,8 @@ class VTX_READ pattern> } // FIXME: Deprecated. +class LocalLoad : LoadFrag , LocalAddress; + class AZExtLoadBase : PatFrag<(ops node:$ptr), (ld_node node:$ptr), [{ LoadSDNode *L = cast(N); From 17060f0a54b681b8c7cec2f9ab465f6a1e51d968 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Tue, 16 Jul 2019 17:44:54 +0000 Subject: [PATCH 261/451] [AMDGPU] Optimize atomic max/min Summary: Extend the atomic optimizer to handle signed and unsigned max and min operations, as well as add and subtract. Reviewers: arsenm, sheredom, critson, rampitec Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, jfb, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64328 llvm-svn: 366235 --- .../Target/AMDGPU/AMDGPUAtomicOptimizer.cpp | 177 ++++++++++++++---- .../atomic_optimizations_local_pointer.ll | 108 +++++++++++ 2 files changed, 249 insertions(+), 36 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp index 810861503be55..c65a49b7c5bc7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp @@ -40,7 +40,7 @@ enum DPP_CTRL { struct ReplacementInfo { Instruction *I; - Instruction::BinaryOps Op; + AtomicRMWInst::BinOp Op; unsigned ValIdx; bool ValDivergent; }; @@ -55,8 +55,8 @@ class AMDGPUAtomicOptimizer : public FunctionPass, bool HasDPP; bool IsPixelShader; - void optimizeAtomic(Instruction &I, Instruction::BinaryOps Op, - unsigned ValIdx, bool ValDivergent) const; + void optimizeAtomic(Instruction &I, AtomicRMWInst::BinOp Op, unsigned ValIdx, + bool ValDivergent) const; public: static char ID; @@ -120,16 +120,17 @@ void AMDGPUAtomicOptimizer::visitAtomicRMWInst(AtomicRMWInst &I) { break; } - Instruction::BinaryOps Op; + AtomicRMWInst::BinOp Op = I.getOperation(); - switch (I.getOperation()) { + switch (Op) { default: return; case AtomicRMWInst::Add: - Op = Instruction::Add; - break; case AtomicRMWInst::Sub: - Op = Instruction::Sub; + case AtomicRMWInst::Max: + case AtomicRMWInst::Min: + case AtomicRMWInst::UMax: + case AtomicRMWInst::UMin: break; } @@ -161,7 +162,7 @@ void AMDGPUAtomicOptimizer::visitAtomicRMWInst(AtomicRMWInst &I) { } void AMDGPUAtomicOptimizer::visitIntrinsicInst(IntrinsicInst &I) { - Instruction::BinaryOps Op; + AtomicRMWInst::BinOp Op; switch (I.getIntrinsicID()) { default: @@ -169,12 +170,32 @@ void AMDGPUAtomicOptimizer::visitIntrinsicInst(IntrinsicInst &I) { case Intrinsic::amdgcn_buffer_atomic_add: case Intrinsic::amdgcn_struct_buffer_atomic_add: case Intrinsic::amdgcn_raw_buffer_atomic_add: - Op = Instruction::Add; + Op = AtomicRMWInst::Add; break; case Intrinsic::amdgcn_buffer_atomic_sub: case Intrinsic::amdgcn_struct_buffer_atomic_sub: case Intrinsic::amdgcn_raw_buffer_atomic_sub: - Op = Instruction::Sub; + Op = AtomicRMWInst::Sub; + break; + case Intrinsic::amdgcn_buffer_atomic_smin: + case Intrinsic::amdgcn_struct_buffer_atomic_smin: + case Intrinsic::amdgcn_raw_buffer_atomic_smin: + Op = AtomicRMWInst::Min; + break; + case Intrinsic::amdgcn_buffer_atomic_umin: + case Intrinsic::amdgcn_struct_buffer_atomic_umin: + case Intrinsic::amdgcn_raw_buffer_atomic_umin: + Op = AtomicRMWInst::UMin; + break; + case Intrinsic::amdgcn_buffer_atomic_smax: + case Intrinsic::amdgcn_struct_buffer_atomic_smax: + case Intrinsic::amdgcn_raw_buffer_atomic_smax: + Op = AtomicRMWInst::Max; + break; + case Intrinsic::amdgcn_buffer_atomic_umax: + case Intrinsic::amdgcn_struct_buffer_atomic_umax: + case Intrinsic::amdgcn_raw_buffer_atomic_umax: + Op = AtomicRMWInst::UMax; break; } @@ -206,8 +227,57 @@ void AMDGPUAtomicOptimizer::visitIntrinsicInst(IntrinsicInst &I) { ToReplace.push_back(Info); } +// Use the builder to create the non-atomic counterpart of the specified +// atomicrmw binary op. +static Value *buildNonAtomicBinOp(IRBuilder<> &B, AtomicRMWInst::BinOp Op, + Value *LHS, Value *RHS) { + CmpInst::Predicate Pred; + + switch (Op) { + default: + llvm_unreachable("Unhandled atomic op"); + case AtomicRMWInst::Add: + return B.CreateBinOp(Instruction::Add, LHS, RHS); + case AtomicRMWInst::Sub: + return B.CreateBinOp(Instruction::Sub, LHS, RHS); + + case AtomicRMWInst::Max: + Pred = CmpInst::ICMP_SGT; + break; + case AtomicRMWInst::Min: + Pred = CmpInst::ICMP_SLT; + break; + case AtomicRMWInst::UMax: + Pred = CmpInst::ICMP_UGT; + break; + case AtomicRMWInst::UMin: + Pred = CmpInst::ICMP_ULT; + break; + } + Value *Cond = B.CreateICmp(Pred, LHS, RHS); + return B.CreateSelect(Cond, LHS, RHS); +} + +static APInt getIdentityValueForAtomicOp(AtomicRMWInst::BinOp Op, + unsigned BitWidth) { + switch (Op) { + default: + llvm_unreachable("Unhandled atomic op"); + case AtomicRMWInst::Add: + case AtomicRMWInst::Sub: + case AtomicRMWInst::UMax: + return APInt::getMinValue(BitWidth); + case AtomicRMWInst::UMin: + return APInt::getMaxValue(BitWidth); + case AtomicRMWInst::Max: + return APInt::getSignedMinValue(BitWidth); + case AtomicRMWInst::Min: + return APInt::getSignedMaxValue(BitWidth); + } +} + void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I, - Instruction::BinaryOps Op, + AtomicRMWInst::BinOp Op, unsigned ValIdx, bool ValDivergent) const { // Start building just before the instruction. @@ -266,16 +336,16 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I, Value *const MbcntCast = B.CreateIntCast(Mbcnt, Ty, false); - Value *LaneOffset = nullptr; + Value *const Identity = B.getInt(getIdentityValueForAtomicOp(Op, TyBitWidth)); + + Value *ExclScan = nullptr; Value *NewV = nullptr; // If we have a divergent value in each lane, we need to combine the value // using DPP. if (ValDivergent) { - Value *const Identity = B.getIntN(TyBitWidth, 0); - - // First we need to set all inactive invocations to 0, so that they can - // correctly contribute to the final result. + // First we need to set all inactive invocations to the identity value, so + // that they can correctly contribute to the final result. CallInst *const SetInactive = B.CreateIntrinsic(Intrinsic::amdgcn_set_inactive, Ty, {V, Identity}); @@ -283,7 +353,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I, B.CreateIntrinsic(Intrinsic::amdgcn_update_dpp, Ty, {Identity, SetInactive, B.getInt32(DPP_WF_SR1), B.getInt32(0xf), B.getInt32(0xf), B.getFalse()}); - NewV = FirstDPP; + ExclScan = FirstDPP; const unsigned Iters = 7; const unsigned DPPCtrl[Iters] = { @@ -295,21 +365,20 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I, // This loop performs an exclusive scan across the wavefront, with all lanes // active (by using the WWM intrinsic). for (unsigned Idx = 0; Idx < Iters; Idx++) { - Value *const UpdateValue = Idx < 3 ? FirstDPP : NewV; + Value *const UpdateValue = Idx < 3 ? FirstDPP : ExclScan; CallInst *const DPP = B.CreateIntrinsic( Intrinsic::amdgcn_update_dpp, Ty, {Identity, UpdateValue, B.getInt32(DPPCtrl[Idx]), B.getInt32(RowMask[Idx]), B.getInt32(BankMask[Idx]), B.getFalse()}); - NewV = B.CreateBinOp(Op, NewV, DPP); + ExclScan = buildNonAtomicBinOp(B, Op, ExclScan, DPP); } - LaneOffset = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, NewV); - NewV = B.CreateBinOp(Op, SetInactive, NewV); + NewV = buildNonAtomicBinOp(B, Op, SetInactive, ExclScan); // Read the value from the last lane, which has accumlated the values of - // each active lane in the wavefront. This will be our new value with which - // we will provide to the atomic operation. + // each active lane in the wavefront. This will be our new value which we + // will provide to the atomic operation. if (TyBitWidth == 64) { Value *const ExtractLo = B.CreateTrunc(NewV, B.getInt32Ty()); Value *const ExtractHi = @@ -324,9 +393,8 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I, B.CreateInsertElement(PartialInsert, ReadLaneHi, B.getInt32(1)); NewV = B.CreateBitCast(Insert, Ty); } else if (TyBitWidth == 32) { - CallInst *const ReadLane = B.CreateIntrinsic(Intrinsic::amdgcn_readlane, - {}, {NewV, B.getInt32(63)}); - NewV = ReadLane; + NewV = B.CreateIntrinsic(Intrinsic::amdgcn_readlane, {}, + {NewV, B.getInt32(63)}); } else { llvm_unreachable("Unhandled atomic bit width"); } @@ -334,14 +402,32 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I, // Finally mark the readlanes in the WWM section. NewV = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, NewV); } else { - // Get the total number of active lanes we have by using popcount. - Instruction *const Ctpop = B.CreateUnaryIntrinsic(Intrinsic::ctpop, Ballot); - Value *const CtpopCast = B.CreateIntCast(Ctpop, Ty, false); - - // Calculate the new value we will be contributing to the atomic operation - // for the entire wavefront. - NewV = B.CreateMul(V, CtpopCast); - LaneOffset = B.CreateMul(V, MbcntCast); + switch (Op) { + default: + llvm_unreachable("Unhandled atomic op"); + + case AtomicRMWInst::Add: + case AtomicRMWInst::Sub: { + // Get the total number of active lanes we have by using popcount. + Instruction *const Ctpop = + B.CreateUnaryIntrinsic(Intrinsic::ctpop, Ballot); + Value *const CtpopCast = B.CreateIntCast(Ctpop, Ty, false); + + // Calculate the new value we will be contributing to the atomic operation + // for the entire wavefront. + NewV = B.CreateMul(V, CtpopCast); + break; + } + + case AtomicRMWInst::Max: + case AtomicRMWInst::Min: + case AtomicRMWInst::UMax: + case AtomicRMWInst::UMin: + // Max/min with a uniform value is idempotent: doing the atomic operation + // multiple times has the same effect as doing it once. + NewV = V; + break; + } } // We only want a single lane to enter our new control flow, and we do this @@ -407,7 +493,26 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I, // get our individual lane's slice into the result. We use the lane offset we // previously calculated combined with the atomic result value we got from the // first lane, to get our lane's index into the atomic result. - Value *const Result = B.CreateBinOp(Op, BroadcastI, LaneOffset); + Value *LaneOffset = nullptr; + if (ValDivergent) { + LaneOffset = B.CreateIntrinsic(Intrinsic::amdgcn_wwm, Ty, ExclScan); + } else { + switch (Op) { + default: + llvm_unreachable("Unhandled atomic op"); + case AtomicRMWInst::Add: + case AtomicRMWInst::Sub: + LaneOffset = B.CreateMul(V, MbcntCast); + break; + case AtomicRMWInst::Max: + case AtomicRMWInst::Min: + case AtomicRMWInst::UMax: + case AtomicRMWInst::UMin: + LaneOffset = B.CreateSelect(Cond, Identity, V); + break; + } + } + Value *const Result = buildNonAtomicBinOp(B, Op, BroadcastI, LaneOffset); if (IsPixelShader) { // Need a final PHI to reconverge to above the helper lane branch mask. diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll index f3d50c9c490f9..5f7649c1c0ea5 100644 --- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll @@ -194,3 +194,111 @@ entry: store i64 %old, i64 addrspace(1)* %out ret void } + +; GCN-LABEL: max_i32_varying: +; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63 +; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]] +; GFX8MORE: ds_max_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]] +define amdgpu_kernel void @max_i32_varying(i32 addrspace(1)* %out) { +entry: + %lane = call i32 @llvm.amdgcn.workitem.id.x() + %old = atomicrmw max i32 addrspace(3)* @local_var32, i32 %lane acq_rel + store i32 %old, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: max_i64_constant: +; GCN: v_cmp_ne_u32_e64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, 1, 0 +; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_lo:[0-9]+]], s[[exec_lo]], 0 +; GCN: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_hi:[0-9]+]], s[[exec_hi]], v[[mbcnt_lo]] +; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc, 0, v[[mbcnt_hi]] +; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_lo:[0-9]+]], 5 +; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_hi:[0-9]+]], 0 +; GCN: ds_max_rtn_i64 v{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, v{{[0-9]+}}, v{{\[}}[[value_lo]]:[[value_hi]]{{\]}} +define amdgpu_kernel void @max_i64_constant(i64 addrspace(1)* %out) { +entry: + %old = atomicrmw max i64 addrspace(3)* @local_var64, i64 5 acq_rel + store i64 %old, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: min_i32_varying: +; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63 +; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]] +; GFX8MORE: ds_min_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]] +define amdgpu_kernel void @min_i32_varying(i32 addrspace(1)* %out) { +entry: + %lane = call i32 @llvm.amdgcn.workitem.id.x() + %old = atomicrmw min i32 addrspace(3)* @local_var32, i32 %lane acq_rel + store i32 %old, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: min_i64_constant: +; GCN: v_cmp_ne_u32_e64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, 1, 0 +; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_lo:[0-9]+]], s[[exec_lo]], 0 +; GCN: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_hi:[0-9]+]], s[[exec_hi]], v[[mbcnt_lo]] +; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc, 0, v[[mbcnt_hi]] +; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_lo:[0-9]+]], 5 +; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_hi:[0-9]+]], 0 +; GCN: ds_min_rtn_i64 v{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, v{{[0-9]+}}, v{{\[}}[[value_lo]]:[[value_hi]]{{\]}} +define amdgpu_kernel void @min_i64_constant(i64 addrspace(1)* %out) { +entry: + %old = atomicrmw min i64 addrspace(3)* @local_var64, i64 5 acq_rel + store i64 %old, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: umax_i32_varying: +; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63 +; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]] +; GFX8MORE: ds_max_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]] +define amdgpu_kernel void @umax_i32_varying(i32 addrspace(1)* %out) { +entry: + %lane = call i32 @llvm.amdgcn.workitem.id.x() + %old = atomicrmw umax i32 addrspace(3)* @local_var32, i32 %lane acq_rel + store i32 %old, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: umax_i64_constant: +; GCN: v_cmp_ne_u32_e64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, 1, 0 +; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_lo:[0-9]+]], s[[exec_lo]], 0 +; GCN: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_hi:[0-9]+]], s[[exec_hi]], v[[mbcnt_lo]] +; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc, 0, v[[mbcnt_hi]] +; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_lo:[0-9]+]], 5 +; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_hi:[0-9]+]], 0 +; GCN: ds_max_rtn_u64 v{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, v{{[0-9]+}}, v{{\[}}[[value_lo]]:[[value_hi]]{{\]}} +define amdgpu_kernel void @umax_i64_constant(i64 addrspace(1)* %out) { +entry: + %old = atomicrmw umax i64 addrspace(3)* @local_var64, i64 5 acq_rel + store i64 %old, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: umin_i32_varying: +; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63 +; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]] +; GFX8MORE: ds_min_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]] +define amdgpu_kernel void @umin_i32_varying(i32 addrspace(1)* %out) { +entry: + %lane = call i32 @llvm.amdgcn.workitem.id.x() + %old = atomicrmw umin i32 addrspace(3)* @local_var32, i32 %lane acq_rel + store i32 %old, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: umin_i64_constant: +; GCN: v_cmp_ne_u32_e64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, 1, 0 +; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_lo:[0-9]+]], s[[exec_lo]], 0 +; GCN: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_hi:[0-9]+]], s[[exec_hi]], v[[mbcnt_lo]] +; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc, 0, v[[mbcnt_hi]] +; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_lo:[0-9]+]], 5 +; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_hi:[0-9]+]], 0 +; GCN: ds_min_rtn_u64 v{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, v{{[0-9]+}}, v{{\[}}[[value_lo]]:[[value_hi]]{{\]}} +define amdgpu_kernel void @umin_i64_constant(i64 addrspace(1)* %out) { +entry: + %old = atomicrmw umin i64 addrspace(3)* @local_var64, i64 5 acq_rel + store i64 %old, i64 addrspace(1)* %out + ret void +} From d100b5dd0197df615ac0ffc1619aec796cbdc0be Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 16 Jul 2019 18:04:26 +0000 Subject: [PATCH 262/451] Teach `llvm-pdbutil pretty -native` about `-injected-sources` `pretty -native -injected-sources -injected-source-content` works with this patch, and produces identical output to the dia version. Differential Revision: https://reviews.llvm.org/D64428 llvm-svn: 366236 --- .../llvm/DebugInfo/PDB/Native/HashTable.h | 12 +- .../PDB/Native/InjectedSourceStream.h | 44 +++++++ .../PDB/Native/NativeEnumInjectedSources.h | 43 +++++++ .../llvm/DebugInfo/PDB/Native/PDBFile.h | 6 + llvm/lib/DebugInfo/PDB/CMakeLists.txt | 2 + .../PDB/Native/InjectedSourceStream.cpp | 65 ++++++++++ .../PDB/Native/NativeEnumInjectedSources.cpp | 121 ++++++++++++++++++ .../DebugInfo/PDB/Native/NativeSession.cpp | 13 +- llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp | 57 +++++++-- .../llvm-pdbutil/injected-sources-native.test | 30 +++++ llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp | 9 +- .../secondary/llvm/lib/DebugInfo/PDB/BUILD.gn | 2 + 12 files changed, 383 insertions(+), 21 deletions(-) create mode 100644 llvm/include/llvm/DebugInfo/PDB/Native/InjectedSourceStream.h create mode 100644 llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumInjectedSources.h create mode 100644 llvm/lib/DebugInfo/PDB/Native/InjectedSourceStream.cpp create mode 100644 llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp create mode 100644 llvm/test/tools/llvm-pdbutil/injected-sources-native.test diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h b/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h index e045cc28f71ae..aa38417bcf4c1 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h @@ -72,6 +72,12 @@ class HashTableIterator assert(Map->Present.test(Index)); return Map->Buckets[Index]; } + + // Implement postfix op++ in terms of prefix op++ by using the superclass + // implementation. + using iterator_facade_base, + std::forward_iterator_tag, + const std::pair>::operator++; HashTableIterator &operator++() { while (Index < Map->Buckets.size()) { ++Index; @@ -94,9 +100,6 @@ class HashTableIterator template class HashTable { - using const_iterator = HashTableIterator; - friend const_iterator; - struct Header { support::ulittle32_t Size; support::ulittle32_t Capacity; @@ -105,6 +108,9 @@ class HashTable { using BucketList = std::vector>; public: + using const_iterator = HashTableIterator; + friend const_iterator; + HashTable() { Buckets.resize(8); } explicit HashTable(uint32_t Capacity) { Buckets.resize(Capacity); diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/InjectedSourceStream.h b/llvm/include/llvm/DebugInfo/PDB/Native/InjectedSourceStream.h new file mode 100644 index 0000000000000..d0cac3749bcab --- /dev/null +++ b/llvm/include/llvm/DebugInfo/PDB/Native/InjectedSourceStream.h @@ -0,0 +1,44 @@ +//===- InjectedSourceStream.h - PDB Headerblock Stream Access ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_PDB_RAW_PDBINJECTEDSOURCESTREAM_H +#define LLVM_DEBUGINFO_PDB_RAW_PDBINJECTEDSOURCESTREAM_H + +#include "llvm/DebugInfo/PDB/Native/HashTable.h" +#include "llvm/DebugInfo/PDB/Native/RawTypes.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace msf { +class MappedBlockStream; +} +namespace pdb { +class PDBFile; +class PDBStringTable; + +class InjectedSourceStream { +public: + InjectedSourceStream(std::unique_ptr Stream); + Error reload(const PDBStringTable &Strings); + + using const_iterator = HashTable::const_iterator; + const_iterator begin() const { return InjectedSourceTable.begin(); } + const_iterator end() const { return InjectedSourceTable.end(); } + + uint32_t size() const { return InjectedSourceTable.size(); } + +private: + std::unique_ptr Stream; + + const SrcHeaderBlockHeader* Header; + HashTable InjectedSourceTable; +}; +} +} + +#endif diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumInjectedSources.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumInjectedSources.h new file mode 100644 index 0000000000000..ca1e22bd82a2b --- /dev/null +++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumInjectedSources.h @@ -0,0 +1,43 @@ +//==- NativeEnumInjectedSources.cpp - Native Injected Source Enumerator --*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMINJECTEDSOURCES_H +#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMINJECTEDSOURCES_H + +#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" +#include "llvm/DebugInfo/PDB/IPDBInjectedSource.h" +#include "llvm/DebugInfo/PDB/Native/InjectedSourceStream.h" + +namespace llvm { +namespace pdb { + +class InjectedSourceStream; +class PDBStringTable; + +class NativeEnumInjectedSources : public IPDBEnumChildren { +public: + NativeEnumInjectedSources(PDBFile &File, const InjectedSourceStream &IJS, + const PDBStringTable &Strings); + + uint32_t getChildCount() const override; + std::unique_ptr + getChildAtIndex(uint32_t Index) const override; + std::unique_ptr getNext() override; + void reset() override; + +private: + PDBFile &File; + const InjectedSourceStream &Stream; + const PDBStringTable &Strings; + InjectedSourceStream::const_iterator Cur; +}; + +} // namespace pdb +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/PDBFile.h b/llvm/include/llvm/DebugInfo/PDB/Native/PDBFile.h index 92c1e0fe2fe60..56de4030167de 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/PDBFile.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/PDBFile.h @@ -32,6 +32,7 @@ namespace pdb { class DbiStream; class GlobalsStream; class InfoStream; +class InjectedSourceStream; class PDBStringTable; class PDBFileBuilder; class PublicsStream; @@ -87,6 +88,8 @@ class PDBFile : public msf::IMSFFile { createIndexedStream(uint16_t SN) const; Expected> safelyCreateIndexedStream(uint32_t StreamIndex) const; + Expected> + safelyCreateNamedStream(StringRef Name); msf::MSFStreamLayout getStreamLayout(uint32_t StreamIdx) const; msf::MSFStreamLayout getFpmStreamLayout() const; @@ -102,6 +105,7 @@ class PDBFile : public msf::IMSFFile { Expected getPDBPublicsStream(); Expected getPDBSymbolStream(); Expected getStringTable(); + Expected getInjectedSourceStream(); BumpPtrAllocator &getAllocator() { return Allocator; } @@ -113,6 +117,7 @@ class PDBFile : public msf::IMSFFile { bool hasPDBSymbolStream(); bool hasPDBTpiStream() const; bool hasPDBStringTable(); + bool hasPDBInjectedSourceStream(); uint32_t getPointerSize(); @@ -133,6 +138,7 @@ class PDBFile : public msf::IMSFFile { std::unique_ptr Symbols; std::unique_ptr DirectoryStream; std::unique_ptr StringTableStream; + std::unique_ptr InjectedSources; std::unique_ptr Strings; }; } diff --git a/llvm/lib/DebugInfo/PDB/CMakeLists.txt b/llvm/lib/DebugInfo/PDB/CMakeLists.txt index d9d379f6d0912..0e842af9f18f2 100644 --- a/llvm/lib/DebugInfo/PDB/CMakeLists.txt +++ b/llvm/lib/DebugInfo/PDB/CMakeLists.txt @@ -47,9 +47,11 @@ add_pdb_impl_folder(Native Native/HashTable.cpp Native/InfoStream.cpp Native/InfoStreamBuilder.cpp + Native/InjectedSourceStream.cpp Native/ModuleDebugStream.cpp Native/NativeCompilandSymbol.cpp Native/NativeEnumGlobals.cpp + Native/NativeEnumInjectedSources.cpp Native/NativeEnumModules.cpp Native/NativeEnumTypes.cpp Native/NativeExeSymbol.cpp diff --git a/llvm/lib/DebugInfo/PDB/Native/InjectedSourceStream.cpp b/llvm/lib/DebugInfo/PDB/Native/InjectedSourceStream.cpp new file mode 100644 index 0000000000000..3f4101db7b93e --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/InjectedSourceStream.cpp @@ -0,0 +1,65 @@ +//===- InjectedSourceStream.cpp - PDB Headerblock Stream Access -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/InjectedSourceStream.h" + +#include "llvm/DebugInfo/MSF/MappedBlockStream.h" +#include "llvm/DebugInfo/PDB/Native/Hash.h" +#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" +#include "llvm/DebugInfo/PDB/Native/RawConstants.h" +#include "llvm/DebugInfo/PDB/Native/RawTypes.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/Endian.h" + +using namespace llvm; +using namespace llvm::msf; +using namespace llvm::support; +using namespace llvm::pdb; + +InjectedSourceStream::InjectedSourceStream( + std::unique_ptr Stream) + : Stream(std::move(Stream)) {} + +Error InjectedSourceStream::reload(const PDBStringTable &Strings) { + BinaryStreamReader Reader(*Stream); + + if (auto EC = Reader.readObject(Header)) + return EC; + + if (Header->Version != + static_cast(PdbRaw_SrcHeaderBlockVer::SrcVerOne)) + return make_error(raw_error_code::corrupt_file, + "Invalid headerblock header version"); + + if (auto EC = InjectedSourceTable.load(Reader)) + return EC; + + for (const auto& Entry : *this) { + if (Entry.second.Size != sizeof(SrcHeaderBlockEntry)) + return make_error(raw_error_code::corrupt_file, + "Invalid headerbock entry size"); + if (Entry.second.Version != + static_cast(PdbRaw_SrcHeaderBlockVer::SrcVerOne)) + return make_error(raw_error_code::corrupt_file, + "Invalid headerbock entry version"); + + // Check that all name references are valid. + auto Name = Strings.getStringForID(Entry.second.FileNI); + if (!Name) + return Name.takeError(); + auto ObjName = Strings.getStringForID(Entry.second.ObjNI); + if (!ObjName) + return ObjName.takeError(); + auto VName = Strings.getStringForID(Entry.second.VFileNI); + if (!VName) + return VName.takeError(); + } + + assert(Reader.bytesRemaining() == 0); + return Error::success(); +} diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp new file mode 100644 index 0000000000000..7c7901b708cc8 --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp @@ -0,0 +1,121 @@ +//==- NativeEnumInjectedSources.cpp - Native Injected Source Enumerator --*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/NativeEnumInjectedSources.h" + +#include "llvm/DebugInfo/PDB/Native/InfoStream.h" +#include "llvm/DebugInfo/PDB/Native/PDBFile.h" +#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" + +namespace llvm { +namespace pdb { + +namespace { + +Expected readStreamData(BinaryStream &Stream) { + uint32_t Offset = 0, DataLength = Stream.getLength(); + std::string Result; + Result.reserve(DataLength); + while (Offset < DataLength) { + ArrayRef Data; + if (auto E = Stream.readLongestContiguousChunk(Offset, Data)) + return std::move(E); + Offset += Data.size(); + Result += toStringRef(Data); + } + return Result; +} + +class NativeInjectedSource final : public IPDBInjectedSource { + const SrcHeaderBlockEntry &Entry; + const PDBStringTable &Strings; + PDBFile &File; + +public: + NativeInjectedSource(const SrcHeaderBlockEntry &Entry, + PDBFile &File, const PDBStringTable &Strings) + : Entry(Entry), Strings(Strings), File(File) {} + + uint32_t getCrc32() const override { return Entry.CRC; } + uint64_t getCodeByteSize() const override { return Entry.FileSize; } + + std::string getFileName() const override { + auto Name = Strings.getStringForID(Entry.FileNI); + assert(Name && "InjectedSourceStream should have rejected this"); + return *Name; + } + + std::string getObjectFileName() const override { + auto ObjName = Strings.getStringForID(Entry.ObjNI); + assert(ObjName && "InjectedSourceStream should have rejected this"); + return *ObjName; + } + + std::string getVirtualFileName() const override { + auto VName = Strings.getStringForID(Entry.VFileNI); + assert(VName && "InjectedSourceStream should have rejected this"); + return *VName; + } + + PDB_SourceCompression getCompression() const override { + return static_cast(Entry.Compression); + } + + std::string getCode() const override { + // Get name of stream storing the data. + auto VName = Strings.getStringForID(Entry.VFileNI); + assert(VName && "InjectedSourceStream should have rejected this"); + std::string StreamName = ("/src/files/" + *VName).str(); + + // Find stream with that name and read its data. + // FIXME: Consider validating (or even loading) all this in + // InjectedSourceStream so that no error can happen here. + auto ExpectedFileStream = File.safelyCreateNamedStream(StreamName); + if (!ExpectedFileStream) { + consumeError(ExpectedFileStream.takeError()); + return "(failed to open data stream)"; + } + + auto Data = readStreamData(**ExpectedFileStream); + if (!Data) { + consumeError(Data.takeError()); + return "(failed to read data)"; + } + return *Data; + } +}; + +} // namespace + +NativeEnumInjectedSources::NativeEnumInjectedSources( + PDBFile &File, const InjectedSourceStream &IJS, + const PDBStringTable &Strings) + : File(File), Stream(IJS), Strings(Strings), Cur(Stream.begin()) {} + +uint32_t NativeEnumInjectedSources::getChildCount() const { + return static_cast(Stream.size()); +} + +std::unique_ptr +NativeEnumInjectedSources::getChildAtIndex(uint32_t N) const { + if (N >= getChildCount()) + return nullptr; + return make_unique(std::next(Stream.begin(), N)->second, + File, Strings); +} + +std::unique_ptr NativeEnumInjectedSources::getNext() { + if (Cur == Stream.end()) + return nullptr; + return make_unique((Cur++)->second, File, Strings); +} + +void NativeEnumInjectedSources::reset() { Cur = Stream.begin(); } + +} +} diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp index 5fb2ea3fec5db..8a49cb1c5963e 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp @@ -13,6 +13,7 @@ #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" #include "llvm/DebugInfo/PDB/IPDBSourceFile.h" #include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h" +#include "llvm/DebugInfo/PDB/Native/NativeEnumInjectedSources.h" #include "llvm/DebugInfo/PDB/Native/NativeEnumTypes.h" #include "llvm/DebugInfo/PDB/Native/NativeExeSymbol.h" #include "llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h" @@ -191,7 +192,17 @@ std::unique_ptr NativeSession::getEnumTables() const { std::unique_ptr NativeSession::getInjectedSources() const { - return nullptr; + auto ISS = Pdb->getInjectedSourceStream(); + if (!ISS) { + consumeError(ISS.takeError()); + return nullptr; + } + auto Strings = Pdb->getStringTable(); + if (!Strings) { + consumeError(Strings.takeError()); + return nullptr; + } + return make_unique(*Pdb, *ISS, *Strings); } std::unique_ptr diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp index f1255d5d67718..983031dfcb78a 100644 --- a/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp @@ -14,6 +14,7 @@ #include "llvm/DebugInfo/PDB/Native/DbiStream.h" #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h" #include "llvm/DebugInfo/PDB/Native/InfoStream.h" +#include "llvm/DebugInfo/PDB/Native/InjectedSourceStream.h" #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" #include "llvm/DebugInfo/PDB/Native/PublicsStream.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" @@ -365,16 +366,7 @@ Expected PDBFile::getPDBSymbolStream() { Expected PDBFile::getStringTable() { if (!Strings) { - auto IS = getPDBInfoStream(); - if (!IS) - return IS.takeError(); - - Expected ExpectedNSI = IS->getNamedStreamIndex("/names"); - if (!ExpectedNSI) - return ExpectedNSI.takeError(); - uint32_t NameStreamIndex = *ExpectedNSI; - - auto NS = safelyCreateIndexedStream(NameStreamIndex); + auto NS = safelyCreateNamedStream("/names"); if (!NS) return NS.takeError(); @@ -389,6 +381,24 @@ Expected PDBFile::getStringTable() { return *Strings; } +Expected PDBFile::getInjectedSourceStream() { + if (!InjectedSources) { + auto IJS = safelyCreateNamedStream("/src/headerblock"); + if (!IJS) + return IJS.takeError(); + + auto Strings = getStringTable(); + if (!Strings) + return Strings.takeError(); + + auto IJ = llvm::make_unique(std::move(*IJS)); + if (auto EC = IJ->reload(*Strings)) + return std::move(EC); + InjectedSources = std::move(IJ); + } + return *InjectedSources; +} + uint32_t PDBFile::getPointerSize() { auto DbiS = getPDBDbiStream(); if (!DbiS) @@ -457,6 +467,19 @@ bool PDBFile::hasPDBStringTable() { return true; } +bool PDBFile::hasPDBInjectedSourceStream() { + auto IS = getPDBInfoStream(); + if (!IS) + return false; + Expected ExpectedNSI = IS->getNamedStreamIndex("/src/headerblock"); + if (!ExpectedNSI) { + consumeError(ExpectedNSI.takeError()); + return false; + } + assert(*ExpectedNSI < getNumStreams()); + return true; +} + /// Wrapper around MappedBlockStream::createIndexedStream() that checks if a /// stream with that index actually exists. If it does not, the return value /// will have an MSFError with code msf_error_code::no_stream. Else, the return @@ -468,3 +491,17 @@ PDBFile::safelyCreateIndexedStream(uint32_t StreamIndex) const { return make_error(raw_error_code::no_stream); return createIndexedStream(StreamIndex); } + +Expected> +PDBFile::safelyCreateNamedStream(StringRef Name) { + auto IS = getPDBInfoStream(); + if (!IS) + return IS.takeError(); + + Expected ExpectedNSI = IS->getNamedStreamIndex(Name); + if (!ExpectedNSI) + return ExpectedNSI.takeError(); + uint32_t NameStreamIndex = *ExpectedNSI; + + return safelyCreateIndexedStream(NameStreamIndex); +} diff --git a/llvm/test/tools/llvm-pdbutil/injected-sources-native.test b/llvm/test/tools/llvm-pdbutil/injected-sources-native.test new file mode 100644 index 0000000000000..374f14fc32102 --- /dev/null +++ b/llvm/test/tools/llvm-pdbutil/injected-sources-native.test @@ -0,0 +1,30 @@ +; This is identical to injected-sources.test, except that it uses the -native +; mode of pretty (and hence doesn't require diasdk and runs on all platforms). + +; RUN: llvm-pdbutil pretty -native -injected-sources -injected-source-content \ +; RUN: %p/Inputs/InjectedSource.pdb | FileCheck %s +; RUN: llvm-pdbutil pretty -native -injected-sources -injected-source-content \ +; RUN: %p/Inputs/ClassLayoutTest.pdb | FileCheck --check-prefix=NEGATIVE %s + +; CHECK: ---INJECTED SOURCES--- +; CHECK: c.natvis (140 bytes): obj=, vname=c.natvis, crc=334478030, compression=None +; CHECK-NEXT: +; CHECK-NEXT: +; CHECK-NEXT: +; CHECK: a.natvis (140 bytes): obj=, vname=a.natvis, crc=334478030, compression=None +; CHECK-NEXT: +; CHECK-NEXT: +; CHECK-NEXT: +; CHECK: b.natvis (294 bytes): obj=, vname=b.natvis, crc=2059731902, compression=None +; CHECK-NEXT: +; CHECK-NEXT: +; CHECK-NEXT: +; CHECK-NEXT: Third test +; CHECK-NEXT: +; CHECK-NEXT: +; CHECK-NEXT: Fourth test +; CHECK-NEXT: +; CHECK-NEXT: + +; NEGATIVE: ---INJECTED SOURCES--- +; NEGATIVE-NEXT: There are no injected sources. diff --git a/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp b/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp index a19257af38d65..e6e89d4bf2201 100644 --- a/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp +++ b/llvm/tools/llvm-pdbutil/llvm-pdbutil.cpp @@ -934,7 +934,7 @@ static std::string stringOr(std::string Str, std::string IfEmpty) { static void dumpInjectedSources(LinePrinter &Printer, IPDBSession &Session) { auto Sources = Session.getInjectedSources(); - if (0 == Sources->getChildCount()) { + if (!Sources || !Sources->getChildCount()) { Printer.printLine("There are no injected sources."); return; } @@ -1279,12 +1279,7 @@ static void dumpPretty(StringRef Path) { WithColor(Printer, PDB_ColorItem::SectionHeader).get() << "---INJECTED SOURCES---"; AutoIndent Indent1(Printer); - - if (ReaderType == PDB_ReaderType::Native) - Printer.printLine( - "Injected sources are not supported with the native reader."); - else - dumpInjectedSources(Printer, *Session); + dumpInjectedSources(Printer, *Session); } Printer.NewLine(); diff --git a/llvm/utils/gn/secondary/llvm/lib/DebugInfo/PDB/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/DebugInfo/PDB/BUILD.gn index 7b8adb3b49a2a..d38b2bb214cc0 100644 --- a/llvm/utils/gn/secondary/llvm/lib/DebugInfo/PDB/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/DebugInfo/PDB/BUILD.gn @@ -24,10 +24,12 @@ static_library("PDB") { "Native/HashTable.cpp", "Native/InfoStream.cpp", "Native/InfoStreamBuilder.cpp", + "Native/InjectedSourceStream.cpp", "Native/ModuleDebugStream.cpp", "Native/NamedStreamMap.cpp", "Native/NativeCompilandSymbol.cpp", "Native/NativeEnumGlobals.cpp", + "Native/NativeEnumInjectedSources.cpp", "Native/NativeEnumModules.cpp", "Native/NativeEnumTypes.cpp", "Native/NativeExeSymbol.cpp", From 35c96598b1246ea038677d7c4580f3c758ff1d93 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 16 Jul 2019 18:05:29 +0000 Subject: [PATCH 263/451] AMDGPU/GlobalISel: Select flat loads Now that the patterns use the new PatFrag address space support, the only blocker to importing most load patterns is the addressing mode complex patterns. llvm-svn: 366237 --- llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 8 + llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 17 +- .../AMDGPU/AMDGPUInstructionSelector.cpp | 96 +- .../Target/AMDGPU/AMDGPUInstructionSelector.h | 9 + .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 3 + llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 19 + llvm/lib/Target/AMDGPU/SIInstrInfo.h | 6 + .../GlobalISel/inst-select-load-flat.mir | 1709 ++++++++++++++++- .../GlobalISel/inst-select-load-global.mir | 1657 ++++++++++++++++ 9 files changed, 3459 insertions(+), 65 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td index 6f725d609072b..1ccb90b2587ed 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -50,6 +50,14 @@ def gi_smrd_sgpr : GIComplexOperandMatcher, GIComplexPatternEquiv; +def gi_flat_offset : + GIComplexOperandMatcher, + GIComplexPatternEquiv; +def gi_flat_offset_signed : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + + class GISelSop2Pat < SDPatternOperator node, Instruction inst, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 14ae62968c65b..39016ed371935 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2937,18 +2937,11 @@ bool AMDGPUTargetLowering::SelectFlatOffset(bool IsSigned, SDValue N1 = Addr.getOperand(1); int64_t COffsetVal = cast(N1)->getSExtValue(); - if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) { - if ((IsSigned && isInt<12>(COffsetVal)) || - (!IsSigned && isUInt<11>(COffsetVal))) { - Addr = N0; - OffsetVal = COffsetVal; - } - } else { - if ((IsSigned && isInt<13>(COffsetVal)) || - (!IsSigned && isUInt<12>(COffsetVal))) { - Addr = N0; - OffsetVal = COffsetVal; - } + const SIInstrInfo *TII = ST.getInstrInfo(); + if (TII->isLegalFLATOffset(COffsetVal, findMemSDNode(N)->getAddressSpace(), + IsSigned)) { + Addr = N0; + OffsetVal = COffsetVal; } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index aa634e881d870..f8f89593d0805 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1239,47 +1239,9 @@ bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef AddrInfo) const { } bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const { - MachineBasicBlock *BB = I.getParent(); - MachineFunction *MF = BB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); - const DebugLoc &DL = I.getDebugLoc(); - Register DstReg = I.getOperand(0).getReg(); - Register PtrReg = I.getOperand(1).getReg(); - unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI); - unsigned Opcode; - - if (MRI.getType(I.getOperand(1).getReg()).getSizeInBits() == 32) { - LLVM_DEBUG(dbgs() << "Unhandled address space\n"); - return false; - } - - SmallVector AddrInfo; - - getAddrModeInfo(I, MRI, AddrInfo); - - switch (LoadSize) { - case 32: - Opcode = AMDGPU::FLAT_LOAD_DWORD; - break; - case 64: - Opcode = AMDGPU::FLAT_LOAD_DWORDX2; - break; - default: - LLVM_DEBUG(dbgs() << "Unhandled load size\n"); - return false; - } - - MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode)) - .add(I.getOperand(0)) - .addReg(PtrReg) - .addImm(0) // offset - .addImm(0) // glc - .addImm(0) // slc - .addImm(0); // dlc - - bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI); - I.eraseFromParent(); - return Ret; + // TODO: Can/should we insert m0 initialization here for DS instructions and + // call the normal selector? + return false; } bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const { @@ -1397,9 +1359,7 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I, return true; return selectImpl(I, CoverageInfo); case TargetOpcode::G_LOAD: - if (selectImpl(I, CoverageInfo)) - return true; - return selectG_LOAD(I); + return selectImpl(I, CoverageInfo); case TargetOpcode::G_SELECT: return selectG_SELECT(I); case TargetOpcode::G_STORE: @@ -1584,3 +1544,51 @@ AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const { [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); } }}; } + + template +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root) const { + MachineInstr *MI = Root.getParent(); + MachineBasicBlock *MBB = MI->getParent(); + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + + InstructionSelector::ComplexRendererFns Default = {{ + [=](MachineInstrBuilder &MIB) { MIB.addReg(Root.getReg()); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // offset + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // slc + }}; + + if (!STI.hasFlatInstOffsets()) + return Default; + + const MachineInstr *OpDef = MRI.getVRegDef(Root.getReg()); + if (!OpDef || OpDef->getOpcode() != AMDGPU::G_GEP) + return Default; + + Optional Offset = + getConstantVRegVal(OpDef->getOperand(2).getReg(), MRI); + if (!Offset.hasValue()) + return Default; + + unsigned AddrSpace = (*MI->memoperands_begin())->getAddrSpace(); + if (!TII.isLegalFLATOffset(Offset.getValue(), AddrSpace, Signed)) + return Default; + + Register BasePtr = OpDef->getOperand(1).getReg(); + + return {{ + [=](MachineInstrBuilder &MIB) { MIB.addReg(BasePtr); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset.getValue()); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // slc + }}; +} + +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectFlatOffset(MachineOperand &Root) const { + return selectFlatOffsetImpl(Root); +} + +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectFlatOffsetSigned(MachineOperand &Root) const { + return selectFlatOffsetImpl(Root); +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 1027a0b5683d3..e30d745f5cb64 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -119,6 +119,15 @@ class AMDGPUInstructionSelector : public InstructionSelector { InstructionSelector::ComplexRendererFns selectSmrdSgpr(MachineOperand &Root) const; + template + InstructionSelector::ComplexRendererFns + selectFlatOffsetImpl(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns + selectFlatOffset(MachineOperand &Root) const; + + InstructionSelector::ComplexRendererFns + selectFlatOffsetSigned(MachineOperand &Root) const; + const SIInstrInfo &TII; const SIRegisterInfo &TRI; const AMDGPURegisterBankInfo &RBI; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 3cf4fbc752493..670f6225fbf78 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -497,6 +497,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .custom(); } + // TODO: Should load to s16 be legal? Most loads extend to 32-bits, but we + // handle some operations by just promoting the register during + // selection. There are also d16 loads on GFX9+ which preserve the high bits. getActionDefinitionsBuilder({G_LOAD, G_STORE}) .narrowScalarIf([](const LegalityQuery &Query) { unsigned Size = Query.Types[0].getSizeInBits(); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 34741850f82fb..ba8ed6993a560 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -6118,6 +6118,25 @@ bool SIInstrInfo::isBufferSMRD(const MachineInstr &MI) const { return RCID == AMDGPU::SReg_128RegClassID; } +bool SIInstrInfo::isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, + bool Signed) const { + // TODO: Should 0 be special cased? + if (!ST.hasFlatInstOffsets()) + return false; + + if (ST.hasFlatSegmentOffsetBug() && AddrSpace == AMDGPUAS::FLAT_ADDRESS) + return false; + + if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) { + return (Signed && isInt<12>(Offset)) || + (!Signed && isUInt<11>(Offset)); + } + + return (Signed && isInt<13>(Offset)) || + (!Signed && isUInt<12>(Offset)); +} + + // This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td enum SIEncodingFamily { SI = 0, diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 1f3c659f9d9ca..3ff35da0b9630 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -970,6 +970,12 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { return isUInt<12>(Imm); } + /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT + /// encoded instruction. If \p Signed, this is for an instruction that + /// interprets the offset as signed. + bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, + bool Signed) const; + /// \brief Return a target-specific opcode if Opcode is a pseudo instruction. /// Return -1 if the target-specific opcode for the pseudo instruction does /// not exist. If Opcode is not a pseudo instruction, this is identity. diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir index 8069dff2634f3..f579c3ce28767 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir @@ -1,26 +1,1717 @@ -# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s + + +--- + +name: load_flat_s32_from_4 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_s32_from_4 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX8-LABEL: name: load_flat_s32_from_4 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX9-LABEL: name: load_flat_s32_from_4 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX10-LABEL: name: load_flat_s32_from_4 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 0) + $vgpr0 = COPY %1 + +... + +--- + +name: load_flat_s32_from_2 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_s32_from_2 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 2) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] + ; GFX8-LABEL: name: load_flat_s32_from_2 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 2) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] + ; GFX9-LABEL: name: load_flat_s32_from_2 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 2) + ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] + ; GFX10-LABEL: name: load_flat_s32_from_2 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 2) + ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = G_LOAD %0 :: (load 2, align 2, addrspace 0) + $vgpr0 = COPY %1 + +... + +--- + +name: load_flat_s32_from_1 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_s32_from_1 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_flat_s32_from_1 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_flat_s32_from_1 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_flat_s32_from_1 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 0) + $vgpr0 = COPY %1 + +... + +--- + +name: load_flat_v2s32 +legalized: true +regBankSelected: true + + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_v2s32 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8) + ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX8-LABEL: name: load_flat_v2s32 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8) + ; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX9-LABEL: name: load_flat_v2s32 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8) + ; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX10-LABEL: name: load_flat_v2s32 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8) + ; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load 8, align 8, addrspace 0) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_flat_v3s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_v3s32 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4) + ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] + ; GFX8-LABEL: name: load_flat_v3s32 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4) + ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] + ; GFX9-LABEL: name: load_flat_v3s32 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] + ; GFX10-LABEL: name: load_flat_v3s32 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4) + ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<3 x s32>) = G_LOAD %0 :: (load 12, align 4, addrspace 0) + $vgpr0_vgpr1_vgpr2 = COPY %1 + +... + +--- + +name: load_flat_v4s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_v4s32 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4) + ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX8-LABEL: name: load_flat_v4s32 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4) + ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX9-LABEL: name: load_flat_v4s32 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX10-LABEL: name: load_flat_v4s32 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4) + ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 0) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + +... + +--- + +name: load_flat_s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_s64 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX8-LABEL: name: load_flat_s64 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-LABEL: name: load_flat_s64 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX10-LABEL: name: load_flat_s64 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_LOAD %0 :: (load 8, align 8, addrspace 0) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_flat_v2s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_v2s64 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX8-LABEL: name: load_flat_v2s64 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX9-LABEL: name: load_flat_v2s64 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX10-LABEL: name: load_flat_v2s64 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 0) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + +... + +--- + +name: load_flat_v2p1 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_v2p1 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX8-LABEL: name: load_flat_v2p1 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX9-LABEL: name: load_flat_v2p1 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX10-LABEL: name: load_flat_v2p1 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x p1>) = G_LOAD %0 :: (load 16, align 4, addrspace 0) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + +... + +--- + +name: load_flat_s96 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_s96 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 4) + ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX8-LABEL: name: load_flat_s96 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 4) + ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX9-LABEL: name: load_flat_s96 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 4) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX10-LABEL: name: load_flat_s96 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 4) + ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s96) = G_LOAD %0 :: (load 12, align 4, addrspace 0) + $vgpr0_vgpr1_vgpr2 = COPY %1 + +... + +--- + +name: load_flat_s128 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_s128 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX8-LABEL: name: load_flat_s128 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX9-LABEL: name: load_flat_s128 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX10-LABEL: name: load_flat_s128 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s128) = G_LOAD %0 :: (load 16, align 4, addrspace 0) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + +... + +--- + +name: load_flat_p3_from_4 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_p3_from_4 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load 4) + ; GFX7: $vgpr0 = COPY [[LOAD]](p3) + ; GFX8-LABEL: name: load_flat_p3_from_4 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load 4) + ; GFX8: $vgpr0 = COPY [[LOAD]](p3) + ; GFX9-LABEL: name: load_flat_p3_from_4 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load 4) + ; GFX9: $vgpr0 = COPY [[LOAD]](p3) + ; GFX10-LABEL: name: load_flat_p3_from_4 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load 4) + ; GFX10: $vgpr0 = COPY [[LOAD]](p3) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 0) + $vgpr0 = COPY %1 + +... + +--- + +name: load_flat_p1_from_8 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_p1_from_8 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX8-LABEL: name: load_flat_p1_from_8 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX9-LABEL: name: load_flat_p1_from_8 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX10-LABEL: name: load_flat_p1_from_8 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(p1) = G_LOAD %0 :: (load 8, align 8, addrspace 0) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_flat_p999_from_8 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_p999_from_8 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX8-LABEL: name: load_flat_p999_from_8 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX9-LABEL: name: load_flat_p999_from_8 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX10-LABEL: name: load_flat_p999_from_8 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(p999) = G_LOAD %0 :: (load 8, align 8, addrspace 0) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_flat_v2p3 +legalized: true +regBankSelected: true + + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_v2p3 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX8-LABEL: name: load_flat_v2p3 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX9-LABEL: name: load_flat_v2p3 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX10-LABEL: name: load_flat_v2p3 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x p3>) = G_LOAD %0 :: (load 8, align 8, addrspace 0) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_flat_v2s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_v2s16 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4) + ; GFX7: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX8-LABEL: name: load_flat_v2s16 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4) + ; GFX8: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX9-LABEL: name: load_flat_v2s16 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4) + ; GFX9: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX10-LABEL: name: load_flat_v2s16 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4) + ; GFX10: $vgpr0 = COPY [[LOAD]](<2 x s16>) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 0) + $vgpr0 = COPY %1 + +... + +--- + +name: load_flat_v4s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_v4s16 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX8-LABEL: name: load_flat_v4s16 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX9-LABEL: name: load_flat_v4s16 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX10-LABEL: name: load_flat_v4s16 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8) + ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 0) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_flat_v6s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_v6s16 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load 12, align 4) + ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX8-LABEL: name: load_flat_v6s16 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load 12, align 4) + ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX9-LABEL: name: load_flat_v6s16 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load 12, align 4) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX10-LABEL: name: load_flat_v6s16 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load 12, align 4) + ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<6 x s16>) = G_LOAD %0 :: (load 12, align 4, addrspace 0) + $vgpr0_vgpr1_vgpr2 = COPY %1 + +... + +--- + +name: load_flat_v8s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_v8s16 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) + ; GFX8-LABEL: name: load_flat_v8s16 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) + ; GFX9-LABEL: name: load_flat_v8s16 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) + ; GFX10-LABEL: name: load_flat_v8s16 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 16, align 4) + ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<8 x s16>) = G_LOAD %0 :: (load 16, align 4, addrspace 0) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + +... + +################################################################################ +### Stress addressing modes +################################################################################ + +--- + +name: load_flat_s32_from_1_gep_2047 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_s32_from_1_gep_2047 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_flat_s32_from_1_gep_2047 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_flat_s32_from_1_gep_2047 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2047, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_flat_s32_from_1_gep_2047 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 2047 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0) + $vgpr0 = COPY %3 + +... + +--- + +name: load_flat_s32_from_1_gep_2048 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_s32_from_1_gep_2048 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_flat_s32_from_1_gep_2048 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_flat_s32_from_1_gep_2048 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2048, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_flat_s32_from_1_gep_2048 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 2048 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0) + $vgpr0 = COPY %3 + +... + +--- + +name: load_flat_s32_from_1_gep_m2047 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_s32_from_1_gep_m2047 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m2047 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m2047 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec + ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m2047 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 -2047 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0) + $vgpr0 = COPY %3 + +... + +--- + +name: load_flat_s32_from_1_gep_m2048 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_s32_from_1_gep_m2048 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m2048 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m2048 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec + ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m2048 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 -2048 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0) + $vgpr0 = COPY %3 + +... + +--- + +name: load_flat_s32_from_1_gep_4095 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_s32_from_1_gep_4095 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_flat_s32_from_1_gep_4095 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_flat_s32_from_1_gep_4095 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 4095, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_flat_s32_from_1_gep_4095 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 4095 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0) + $vgpr0 = COPY %3 + +... + +--- + +name: load_flat_s32_from_1_gep_4096 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_s32_from_1_gep_4096 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_flat_s32_from_1_gep_4096 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_flat_s32_from_1_gep_4096 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_flat_s32_from_1_gep_4096 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 4096 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0) + $vgpr0 = COPY %3 + +... + +--- + +name: load_flat_s32_from_1_gep_m4095 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_s32_from_1_gep_m4095 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m4095 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m4095 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec + ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m4095 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 -4095 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0) + $vgpr0 = COPY %3 + +... + +--- + +name: load_flat_s32_from_1_gep_m4096 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_s32_from_1_gep_m4096 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m4096 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m4096 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec + ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m4096 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 -4096 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0) + $vgpr0 = COPY %3 + +... + +--- + +name: load_flat_s32_from_1_gep_8191 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_s32_from_1_gep_8191 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_flat_s32_from_1_gep_8191 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_flat_s32_from_1_gep_8191 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec + ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_flat_s32_from_1_gep_8191 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 8191 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0) + $vgpr0 = COPY %3 ---- | - define amdgpu_kernel void @global_addrspace(i32 addrspace(1)* %global0) { ret void } ... + --- -name: global_addrspace +name: load_flat_s32_from_1_gep_8192 legalized: true regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_s32_from_1_gep_8192 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_flat_s32_from_1_gep_8192 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_flat_s32_from_1_gep_8192 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec + ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_flat_s32_from_1_gep_8192 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 8192 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0) + $vgpr0 = COPY %3 -# GCN: global_addrspace -# GCN: [[PTR:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 -# GCN: FLAT_LOAD_DWORD [[PTR]], 0, 0, 0, 0 +... + +--- + +name: load_flat_s32_from_1_gep_m8191 +legalized: true +regBankSelected: true +tracksRegLiveness: true body: | bb.0: liveins: $vgpr0_vgpr1 + ; GFX7-LABEL: name: load_flat_s32_from_1_gep_m8191 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m8191 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m8191 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec + ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m8191 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 - %1:vgpr(s32) = G_LOAD %0 :: (load 4 from %ir.global0) + %1:vgpr(s64) = G_CONSTANT i64 -8191 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0) + $vgpr0 = COPY %3 + +... + +--- + +name: load_flat_s32_from_1_gep_m8192 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_flat_s32_from_1_gep_m8192 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m8192 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m8192 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec + ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m8192 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1) + ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 -8192 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 0) + $vgpr0 = COPY %3 + +... + +--- + +name: load_atomic_flat_s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true + + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_atomic_flat_s32 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4) + ; GFX7: $vgpr0 = COPY [[LOAD]](s32) + ; GFX8-LABEL: name: load_atomic_flat_s32 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4) + ; GFX8: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-LABEL: name: load_atomic_flat_s32 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4) + ; GFX9: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-LABEL: name: load_atomic_flat_s32 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4) + ; GFX10: $vgpr0 = COPY [[LOAD]](s32) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = G_LOAD %0 :: (load monotonic 4, align 4, addrspace 0) $vgpr0 = COPY %1 ... + --- + +name: load_atomic_flat_s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true + + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_atomic_flat_s64 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX8-LABEL: name: load_atomic_flat_s64 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8) + ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-LABEL: name: load_atomic_flat_s64 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX10-LABEL: name: load_atomic_flat_s64 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8) + ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_LOAD %0 :: (load monotonic 8, align 8, addrspace 0) + $vgpr0_vgpr1 = COPY %1 + +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir new file mode 100644 index 0000000000000..df86d18c3b335 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir @@ -0,0 +1,1657 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s + +# FIXME: global with MUBUF + +--- + +name: load_global_s32_from_4 +legalized: true +regBankSelected: true +tracksRegLiveness: true + + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_s32_from_4 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX8-LABEL: name: load_global_s32_from_4 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_DWORD]] + ; GFX9-LABEL: name: load_global_s32_from_4 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX10-LABEL: name: load_global_s32_from_4 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 1) + $vgpr0 = COPY %1 + +... + +--- + +name: load_global_s32_from_2 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_s32_from_2 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 2, addrspace 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] + ; GFX8-LABEL: name: load_global_s32_from_2 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 2, addrspace 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_USHORT]] + ; GFX9-LABEL: name: load_global_s32_from_2 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]] + ; GFX10-LABEL: name: load_global_s32_from_2 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 1) + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = G_LOAD %0 :: (load 2, align 2, addrspace 1) + $vgpr0 = COPY %1 + +... + +--- + +name: load_global_s32_from_1 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_s32_from_1 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_global_s32_from_1 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_global_s32_from_1 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_global_s32_from_1 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 1) + $vgpr0 = COPY %1 + +... + +--- + +name: load_global_v2s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_v2s32 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1) + ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX8-LABEL: name: load_global_v2s32 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1) + ; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]] + ; GFX9-LABEL: name: load_global_v2s32 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1) + ; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX10-LABEL: name: load_global_v2s32 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1) + ; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load 8, align 8, addrspace 1) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_global_v3s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_v3s32 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4, addrspace 1) + ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] + ; GFX8-LABEL: name: load_global_v3s32 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4, addrspace 1) + ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]] + ; GFX9-LABEL: name: load_global_v3s32 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = GLOBAL_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 12, align 4, addrspace 1) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[GLOBAL_LOAD_DWORDX3_]] + ; GFX10-LABEL: name: load_global_v3s32 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = GLOBAL_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 12, align 4, addrspace 1) + ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[GLOBAL_LOAD_DWORDX3_]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<3 x s32>) = G_LOAD %0 :: (load 12, align 4, addrspace 1) + $vgpr0_vgpr1_vgpr2 = COPY %1 + +... + +--- + +name: load_global_v4s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_v4s32 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1) + ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX8-LABEL: name: load_global_v4s32 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1) + ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]] + ; GFX9-LABEL: name: load_global_v4s32 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 1) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] + ; GFX10-LABEL: name: load_global_v4s32 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 1) + ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + +... + +--- + +name: load_global_s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_s64 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX8-LABEL: name: load_global_s64 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-LABEL: name: load_global_s64 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX10-LABEL: name: load_global_s64 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_LOAD %0 :: (load 8, align 8, addrspace 1) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_global_v2s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_v2s64 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX8-LABEL: name: load_global_v2s64 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX9-LABEL: name: load_global_v2s64 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX10-LABEL: name: load_global_v2s64 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + +... + +--- + +name: load_global_v2p1 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_v2p1 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX8-LABEL: name: load_global_v2p1 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX9-LABEL: name: load_global_v2p1 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX10-LABEL: name: load_global_v2p1 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x p1>) = G_LOAD %0 :: (load 16, align 4, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + +... + +--- + +name: load_global_s96 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_s96 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX8-LABEL: name: load_global_s96 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX9-LABEL: name: load_global_s96 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX10-LABEL: name: load_global_s96 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s96) = G_LOAD %0 :: (load 12, align 4, addrspace 1) + $vgpr0_vgpr1_vgpr2 = COPY %1 + +... + +--- + +name: load_global_s128 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_s128 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX8-LABEL: name: load_global_s128 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX9-LABEL: name: load_global_s128 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX10-LABEL: name: load_global_s128 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s128) = G_LOAD %0 :: (load 16, align 4, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + +... + +--- + +name: load_global_p3_from_4 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_p3_from_4 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX7: $vgpr0 = COPY [[LOAD]](p3) + ; GFX8-LABEL: name: load_global_p3_from_4 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX8: $vgpr0 = COPY [[LOAD]](p3) + ; GFX9-LABEL: name: load_global_p3_from_4 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9: $vgpr0 = COPY [[LOAD]](p3) + ; GFX10-LABEL: name: load_global_p3_from_4 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX10: $vgpr0 = COPY [[LOAD]](p3) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 1) + $vgpr0 = COPY %1 + +... + +--- + +name: load_global_p1_from_8 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_p1_from_8 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX8-LABEL: name: load_global_p1_from_8 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX9-LABEL: name: load_global_p1_from_8 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX10-LABEL: name: load_global_p1_from_8 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(p1) = G_LOAD %0 :: (load 8, align 8, addrspace 1) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_global_p999_from_8 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_p999_from_8 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX8-LABEL: name: load_global_p999_from_8 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX9-LABEL: name: load_global_p999_from_8 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX10-LABEL: name: load_global_p999_from_8 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(p999) = G_LOAD %0 :: (load 8, align 8, addrspace 1) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_global_v2p3 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_v2p3 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX8-LABEL: name: load_global_v2p3 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX9-LABEL: name: load_global_v2p3 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX10-LABEL: name: load_global_v2p3 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x p3>) = G_LOAD %0 :: (load 8, align 8, addrspace 1) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_global_v2s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_v2s16 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX7: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX8-LABEL: name: load_global_v2s16 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX8: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX9-LABEL: name: load_global_v2s16 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX9: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX10-LABEL: name: load_global_v2s16 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; GFX10: $vgpr0 = COPY [[LOAD]](<2 x s16>) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 1) + $vgpr0 = COPY %1 + +... + +--- + +name: load_global_v4s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_v4s16 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX8-LABEL: name: load_global_v4s16 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX9-LABEL: name: load_global_v4s16 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX10-LABEL: name: load_global_v4s16 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1) + ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 1) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_global_v6s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_v6s16 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX8-LABEL: name: load_global_v6s16 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX9-LABEL: name: load_global_v6s16 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + ; GFX10-LABEL: name: load_global_v6s16 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_96(<6 x s16>) = G_LOAD [[COPY]](p1) :: (load 12, align 4, addrspace 1) + ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<6 x s16>) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<6 x s16>) = G_LOAD %0 :: (load 12, align 4, addrspace 1) + $vgpr0_vgpr1_vgpr2 = COPY %1 + +... + +--- + +name: load_global_v8s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_v8s16 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) + ; GFX8-LABEL: name: load_global_v8s16 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) + ; GFX9-LABEL: name: load_global_v8s16 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) + ; GFX10-LABEL: name: load_global_v8s16 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1) + ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<8 x s16>) = G_LOAD %0 :: (load 16, align 4, addrspace 1) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + +... + +################################################################################ +### Stress addressing modes +################################################################################ + +--- + +name: load_global_s32_from_1_gep_2047 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_s32_from_1_gep_2047 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_global_s32_from_1_gep_2047 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_global_s32_from_1_gep_2047 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_global_s32_from_1_gep_2047 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 2047 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1) + $vgpr0 = COPY %3 + +... + +--- + +name: load_global_s32_from_1_gep_2048 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_s32_from_1_gep_2048 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_global_s32_from_1_gep_2048 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_global_s32_from_1_gep_2048 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2048, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_global_s32_from_1_gep_2048 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 2048 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1) + $vgpr0 = COPY %3 + +... + +--- + +name: load_global_s32_from_1_gep_m2047 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_s32_from_1_gep_m2047 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_global_s32_from_1_gep_m2047 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_global_s32_from_1_gep_m2047 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_global_s32_from_1_gep_m2047 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 -2047 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1) + $vgpr0 = COPY %3 + +... + +--- + +name: load_global_s32_from_1_gep_m2048 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_s32_from_1_gep_m2048 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_global_s32_from_1_gep_m2048 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_global_s32_from_1_gep_m2048 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_global_s32_from_1_gep_m2048 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 -2048 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1) + $vgpr0 = COPY %3 + +... + +--- + +name: load_global_s32_from_1_gep_4095 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_s32_from_1_gep_4095 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_global_s32_from_1_gep_4095 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_global_s32_from_1_gep_4095 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_global_s32_from_1_gep_4095 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 4095 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1) + $vgpr0 = COPY %3 + +... + +--- + +name: load_global_s32_from_1_gep_4096 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_s32_from_1_gep_4096 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_global_s32_from_1_gep_4096 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_global_s32_from_1_gep_4096 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_global_s32_from_1_gep_4096 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 4096 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1) + $vgpr0 = COPY %3 + +... + +--- + +name: load_global_s32_from_1_gep_m4095 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_s32_from_1_gep_m4095 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_global_s32_from_1_gep_m4095 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_global_s32_from_1_gep_m4095 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_global_s32_from_1_gep_m4095 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 -4095 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1) + $vgpr0 = COPY %3 + +... + +--- + +name: load_global_s32_from_1_gep_m4096 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_s32_from_1_gep_m4096 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_global_s32_from_1_gep_m4096 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_global_s32_from_1_gep_m4096 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4096, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_global_s32_from_1_gep_m4096 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 -4096 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1) + $vgpr0 = COPY %3 + +... + +--- + +name: load_global_s32_from_1_gep_8191 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_s32_from_1_gep_8191 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_global_s32_from_1_gep_8191 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_global_s32_from_1_gep_8191 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec + ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_global_s32_from_1_gep_8191 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 8191 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1) + $vgpr0 = COPY %3 + +... + +--- + +name: load_global_s32_from_1_gep_8192 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_s32_from_1_gep_8192 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_global_s32_from_1_gep_8192 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_global_s32_from_1_gep_8192 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec + ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_global_s32_from_1_gep_8192 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 8192 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1) + $vgpr0 = COPY %3 + +... + +--- + +name: load_global_s32_from_1_gep_m8191 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_s32_from_1_gep_m8191 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_global_s32_from_1_gep_m8191 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_global_s32_from_1_gep_m8191 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec + ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_global_s32_from_1_gep_m8191 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 -8191 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1) + $vgpr0 = COPY %3 + +... + +--- + +name: load_global_s32_from_1_gep_m8192 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_global_s32_from_1_gep_m8192 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX8-LABEL: name: load_global_s32_from_1_gep_m8192 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1) + ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]] + ; GFX9-LABEL: name: load_global_s32_from_1_gep_m8192 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec + ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + ; GFX10-LABEL: name: load_global_s32_from_1_gep_m8192 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1) + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]] + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_CONSTANT i64 -8192 + %2:vgpr(p1) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 1) + $vgpr0 = COPY %3 + +... + +--- + +name: load_atomic_global_s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true + + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_atomic_global_s32 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4, addrspace 1) + ; GFX7: $vgpr0 = COPY [[LOAD]](s32) + ; GFX8-LABEL: name: load_atomic_global_s32 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4, addrspace 1) + ; GFX8: $vgpr0 = COPY [[LOAD]](s32) + ; GFX9-LABEL: name: load_atomic_global_s32 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4, addrspace 1) + ; GFX9: $vgpr0 = COPY [[LOAD]](s32) + ; GFX10-LABEL: name: load_atomic_global_s32 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vgpr_32(s32) = G_LOAD [[COPY]](p1) :: (load monotonic 4, addrspace 1) + ; GFX10: $vgpr0 = COPY [[LOAD]](s32) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = G_LOAD %0 :: (load monotonic 4, align 4, addrspace 1) + $vgpr0 = COPY %1 + +... + +--- + +name: load_atomic_global_s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true + + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX7-LABEL: name: load_atomic_global_s64 + ; GFX7: liveins: $vgpr0_vgpr1 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8, addrspace 1) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX8-LABEL: name: load_atomic_global_s64 + ; GFX8: liveins: $vgpr0_vgpr1 + ; GFX8: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX8: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8, addrspace 1) + ; GFX8: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-LABEL: name: load_atomic_global_s64 + ; GFX9: liveins: $vgpr0_vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8, addrspace 1) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX10-LABEL: name: load_atomic_global_s64 + ; GFX10: liveins: $vgpr0_vgpr1 + ; GFX10: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX10: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p1) :: (load monotonic 8, addrspace 1) + ; GFX10: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = G_LOAD %0 :: (load monotonic 8, align 8, addrspace 1) + $vgpr0_vgpr1 = COPY %1 + +... From fe44a531e0e2aba07213442b1930369316a112b0 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Tue, 16 Jul 2019 18:17:33 +0000 Subject: [PATCH 264/451] [COFF] Implement /safeseh:no and check @feat.00 flags by default Summary: Fixes PR41828. Before this, LLD always emitted SafeSEH chunks and defined __safe_se_handler_table & size. Now, /safeseh:no leaves those undefined. Additionally, we were checking for the safeseh @feat.00 flag in two places: once to emit errors, and once during safeseh table construction. The error was set up to be off by default, but safeseh is supposed to be on by default. I combined the two checks, so now LLD emits an error if an input object lacks @feat.00 and safeseh is enabled. This caused the majority of 32-bit LLD tests to fail, since many test input object files lack @feat.00 symbols. I explicitly added -safeseh:no to those tests to preserve behavior. Finally, LLD no longer sets IMAGE_DLL_CHARACTERISTICS_NO_SEH if any input file wasn't compiled for safeseh. Reviewers: mstorsjo, ruiu, thakis Reviewed By: ruiu, thakis Subscribers: llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D63570 llvm-svn: 366238 --- lld/COFF/Config.h | 1 + lld/COFF/Driver.cpp | 14 ++---- lld/COFF/Writer.cpp | 16 ++---- lld/test/COFF/allow-unknown-debug-info.test | 2 +- lld/test/COFF/constant.test | 4 +- lld/test/COFF/def-export-stdcall.s | 4 +- lld/test/COFF/delayimports32.test | 2 +- lld/test/COFF/dllexport.s | 2 +- lld/test/COFF/entry-drectve.test | 2 +- lld/test/COFF/entry-inference332.test | 4 +- lld/test/COFF/exclude-all.s | 2 +- lld/test/COFF/export-all.s | 6 +-- lld/test/COFF/export-stdcall.s | 2 +- lld/test/COFF/export32.test | 18 +++---- lld/test/COFF/fixed.test | 8 +-- lld/test/COFF/gfids-relocations32.s | 2 +- lld/test/COFF/hello32.test | 5 +- lld/test/COFF/largeaddressaware.test | 2 +- lld/test/COFF/loadcfg32.test | 2 +- lld/test/COFF/locally-imported32.test | 2 +- lld/test/COFF/machine.test | 8 +-- lld/test/COFF/no-ipi-stream.test | 2 +- lld/test/COFF/order-i386.test | 4 +- lld/test/COFF/pdb-debug-f.s | 2 +- lld/test/COFF/pdb-lib.s | 2 +- lld/test/COFF/pdb-safeseh.yaml | 2 +- lld/test/COFF/pdb-unknown-subsection.s | 2 +- lld/test/COFF/reloc-x86.test | 2 +- lld/test/COFF/safeseh-no.s | 56 +++++++++++++++++++++ lld/test/COFF/subsystem-drectve.test | 2 +- lld/test/COFF/subsystem-inference32.test | 8 +-- lld/test/COFF/tls32.test | 2 +- 32 files changed, 118 insertions(+), 74 deletions(-) create mode 100644 lld/test/COFF/safeseh-no.s diff --git a/lld/COFF/Config.h b/lld/COFF/Config.h index e378b6fc72484..1b0e240427103 100644 --- a/lld/COFF/Config.h +++ b/lld/COFF/Config.h @@ -132,6 +132,7 @@ struct Configuration { GuardCFLevel guardCF = GuardCFLevel::Off; // Used for SafeSEH. + bool safeSEH = false; Symbol *sehTable = nullptr; Symbol *sehCount = nullptr; diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index 6cfd83ab96b6c..d7af50b9318fc 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -1556,6 +1556,11 @@ void LinkerDriver::link(ArrayRef argsArr) { } config->wordsize = config->is64() ? 8 : 4; + // Handle /safeseh, x86 only, on by default, except for mingw. + if (config->machine == I386 && + args.hasFlag(OPT_safeseh, OPT_safeseh_no, !config->mingw)) + config->safeSEH = true; + // Handle /functionpadmin for (auto *arg : args.filtered(OPT_functionpadmin, OPT_functionpadmin_opt)) parseFunctionPadMin(arg, config->machine); @@ -1795,15 +1800,6 @@ void LinkerDriver::link(ArrayRef argsArr) { if (errorCount()) return; - // Handle /safeseh. - if (args.hasFlag(OPT_safeseh, OPT_safeseh_no, false)) { - for (ObjFile *file : ObjFile::instances) - if (!file->hasSafeSEH()) - error("/safeseh: " + file->getName() + " is not compatible with SEH"); - if (errorCount()) - return; - } - if (config->mingw) { // In MinGW, all symbols are automatically exported if no symbols // are chosen to be exported. diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index e4b35a5f8beb9..36ef87de4263e 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -917,7 +917,7 @@ void Writer::createMiscChunks() { } // Create SEH table. x86-only. - if (config->machine == I386) + if (config->safeSEH) createSEHTable(); // Create /guard:cf tables if requested. @@ -1428,23 +1428,15 @@ void Writer::openFile(StringRef path) { } void Writer::createSEHTable() { - // Set the no SEH characteristic on x86 binaries unless we find exception - // handlers. - setNoSEHCharacteristic = true; - SymbolRVASet handlers; for (ObjFile *file : ObjFile::instances) { - // FIXME: We should error here instead of earlier unless /safeseh:no was - // passed. if (!file->hasSafeSEH()) - return; - + error("/safeseh: " + file->getName() + " is not compatible with SEH"); markSymbolsForRVATable(file, file->getSXDataChunks(), handlers); } - // Remove the "no SEH" characteristic if all object files were built with - // safeseh, we found some exception handlers, and there is a load config in - // the object. + // Set the "no SEH" characteristic if there really were no handlers, or if + // there is no load config object to point to the table of handlers. setNoSEHCharacteristic = handlers.empty() || !symtab->findUnderscore("_load_config_used"); diff --git a/lld/test/COFF/allow-unknown-debug-info.test b/lld/test/COFF/allow-unknown-debug-info.test index c45b98e2ac292..1cc9e9e0b9e27 100644 --- a/lld/test/COFF/allow-unknown-debug-info.test +++ b/lld/test/COFF/allow-unknown-debug-info.test @@ -1,5 +1,5 @@ # RUN: yaml2obj %s > %t.obj -# RUN: lld-link /dll /noentry /debug %t.obj 2>&1 | FileCheck %s +# RUN: lld-link -safeseh:no /dll /noentry /debug %t.obj 2>&1 | FileCheck %s # CHECK: ignoring section .debug$S with unrecognized magic 0x1 diff --git a/lld/test/COFF/constant.test b/lld/test/COFF/constant.test index 02d6b3e2ccae7..dc97f1cb9a38f 100644 --- a/lld/test/COFF/constant.test +++ b/lld/test/COFF/constant.test @@ -2,5 +2,5 @@ REQUIRES: x86 RUN: mkdir -p %t RUN: llvm-mc -triple i686-unknown-windows-msvc -filetype obj -o %t/import.o %S/Inputs/constant-import.s RUN: llc -mtriple i686-unknown-windows-msvc -filetype obj -o %t/export.o %S/Inputs/constant-export.ll -RUN: lld-link -machine:x86 -dll -out:%t/export.dll %t/export.o -entry:__CFConstantStringClassReference -RUN: lld-link -machine:x86 -dll -out:%t/import.dll %t/import.o %t/export.lib +RUN: lld-link -safeseh:no -machine:x86 -dll -out:%t/export.dll %t/export.o -entry:__CFConstantStringClassReference +RUN: lld-link -safeseh:no -machine:x86 -dll -out:%t/import.dll %t/import.o %t/export.lib diff --git a/lld/test/COFF/def-export-stdcall.s b/lld/test/COFF/def-export-stdcall.s index 55709f95843c4..f015e205c74a3 100644 --- a/lld/test/COFF/def-export-stdcall.s +++ b/lld/test/COFF/def-export-stdcall.s @@ -1,7 +1,7 @@ # REQUIRES: x86 # RUN: llvm-mc -filetype=obj -triple=i686-windows-msvc %s -o %t.obj # RUN: echo -e "LIBRARY foo\nEXPORTS\n stdcall\n fastcall\n vectorcall\n _underscored" > %t.def -# RUN: lld-link -entry:dllmain -dll -def:%t.def %t.obj -out:%t.dll -implib:%t.lib +# RUN: lld-link -safeseh:no -entry:dllmain -dll -def:%t.def %t.obj -out:%t.dll -implib:%t.lib # RUN: llvm-readobj %t.lib | FileCheck -check-prefix UNDECORATED-IMPLIB %s # RUN: llvm-readobj --coff-exports %t.dll | FileCheck -check-prefix UNDECORATED-EXPORTS %s @@ -25,7 +25,7 @@ # RUN: echo -e "LIBRARY foo\nEXPORTS\n _stdcall@8\n @fastcall@8\n vectorcall@@8" > %t.def -# RUN: lld-link -entry:dllmain -dll -def:%t.def %t.obj -out:%t.dll -implib:%t.lib +# RUN: lld-link -safeseh:no -entry:dllmain -dll -def:%t.def %t.obj -out:%t.dll -implib:%t.lib # RUN: llvm-readobj %t.lib | FileCheck -check-prefix DECORATED-IMPLIB %s # RUN: llvm-readobj --coff-exports %t.dll | FileCheck -check-prefix DECORATED-EXPORTS %s diff --git a/lld/test/COFF/delayimports32.test b/lld/test/COFF/delayimports32.test index b684d4105e97b..0fc90200c1bfa 100644 --- a/lld/test/COFF/delayimports32.test +++ b/lld/test/COFF/delayimports32.test @@ -1,6 +1,6 @@ # REQUIRES: x86 # RUN: yaml2obj < %p/Inputs/hello32.yaml > %t.obj -# RUN: lld-link %t.obj %p/Inputs/std32.lib /subsystem:console \ +# RUN: lld-link -safeseh:no %t.obj %p/Inputs/std32.lib /subsystem:console \ # RUN: /entry:main@0 /alternatename:___delayLoadHelper2@8=_main@0 \ # RUN: /delayload:std32.dll /out:%t.exe # RUN: llvm-readobj --coff-imports %t.exe | FileCheck -check-prefix=IMPORT %s diff --git a/lld/test/COFF/dllexport.s b/lld/test/COFF/dllexport.s index b5b7080d16d23..a238b70ce1b4f 100644 --- a/lld/test/COFF/dllexport.s +++ b/lld/test/COFF/dllexport.s @@ -1,7 +1,7 @@ # REQUIRES: x86 # RUN: llvm-mc -filetype=obj -triple=i686-windows-msvc %s -o %t.obj -# RUN: lld-link -entry:dllmain -dll %t.obj -out:%t.dll -implib:%t.lib +# RUN: lld-link -safeseh:no -entry:dllmain -dll %t.obj -out:%t.dll -implib:%t.lib # RUN: llvm-readobj %t.lib | FileCheck -check-prefix DECORATED-IMPLIB %s # RUN: llvm-readobj --coff-exports %t.dll | FileCheck -check-prefix DECORATED-EXPORTS %s diff --git a/lld/test/COFF/entry-drectve.test b/lld/test/COFF/entry-drectve.test index e51e7cb201f3f..0848b0a04aeee 100644 --- a/lld/test/COFF/entry-drectve.test +++ b/lld/test/COFF/entry-drectve.test @@ -1,5 +1,5 @@ # RUN: yaml2obj < %s > %t.obj -# RUN: lld-link /subsystem:console /out:%t.exe %t.obj +# RUN: lld-link -safeseh:no /subsystem:console /out:%t.exe %t.obj --- !COFF header: diff --git a/lld/test/COFF/entry-inference332.test b/lld/test/COFF/entry-inference332.test index 75c557af47e86..ddeaf280a9ec2 100644 --- a/lld/test/COFF/entry-inference332.test +++ b/lld/test/COFF/entry-inference332.test @@ -1,9 +1,9 @@ # RUN: sed -e s/ENTRYNAME/_mainCRTStartup/ %s | yaml2obj > %t.obj -# RUN: lld-link /subsystem:console /out:%t.exe %t.obj /verbose /nodefaultlib > %t.log 2>&1 +# RUN: lld-link -safeseh:no /subsystem:console /out:%t.exe %t.obj /verbose /nodefaultlib > %t.log 2>&1 # RUN: FileCheck %s < %t.log # RUN: sed -e s/ENTRYNAME/?mainCRTStartup@@YAHXZ/ %s | yaml2obj > %t.obj -# RUN: lld-link /subsystem:console /out:%t.exe %t.obj /verbose /nodefaultlib > %t.log 2>&1 +# RUN: lld-link -safeseh:no /subsystem:console /out:%t.exe %t.obj /verbose /nodefaultlib > %t.log 2>&1 # RUN: FileCheck %s < %t.log # CHECK: Entry name inferred: _mainCRTStartup diff --git a/lld/test/COFF/exclude-all.s b/lld/test/COFF/exclude-all.s index e2c23368dfe63..41caece2dd94a 100644 --- a/lld/test/COFF/exclude-all.s +++ b/lld/test/COFF/exclude-all.s @@ -25,7 +25,7 @@ _dataSym: # RUN: yaml2obj < %p/Inputs/export.yaml > %t.obj # -# RUN: lld-link -out:%t.dll -dll %t.obj -lldmingw -exclude-all-symbols -output-def:%t.def +# RUN: lld-link -safeseh:no -out:%t.dll -dll %t.obj -lldmingw -exclude-all-symbols -output-def:%t.def # RUN: llvm-readobj --coff-exports %t.dll | FileCheck -check-prefix=DLLEXPORT %s # DLLEXPORT: Name: exportfn3 diff --git a/lld/test/COFF/export-all.s b/lld/test/COFF/export-all.s index 77893193623e1..6292ed33e3583 100644 --- a/lld/test/COFF/export-all.s +++ b/lld/test/COFF/export-all.s @@ -42,7 +42,7 @@ __imp__unexported: # RUN: yaml2obj < %p/Inputs/export.yaml > %t.obj # -# RUN: lld-link -out:%t.dll -dll %t.obj -lldmingw -export-all-symbols -output-def:%t.def +# RUN: lld-link -safeseh:no -out:%t.dll -dll %t.obj -lldmingw -export-all-symbols -output-def:%t.def # RUN: llvm-readobj --coff-exports %t.dll | FileCheck -check-prefix=CHECK2 %s # RUN: cat %t.def | FileCheck -check-prefix=CHECK2-DEF %s @@ -69,7 +69,7 @@ __imp__unexported: # RUN: llvm-ar rcs %T/libs/libmingwex.a %T/libs/mingwfunc.o # RUN: echo -e ".global crtfunc\n.text\ncrtfunc:\nret\n" > %T/libs/crtfunc.s # RUN: llvm-mc -triple=x86_64-windows-gnu %T/libs/crtfunc.s -filetype=obj -o %T/libs/crt2.o -# RUN: lld-link -out:%t.dll -dll -entry:DllMainCRTStartup %t.main.obj -lldmingw %T/libs/crt2.o %T/libs/libmingwex.a -output-def:%t.def +# RUN: lld-link -safeseh:no -out:%t.dll -dll -entry:DllMainCRTStartup %t.main.obj -lldmingw %T/libs/crt2.o %T/libs/libmingwex.a -output-def:%t.def # RUN: echo "EOF" >> %t.def # RUN: cat %t.def | FileCheck -check-prefix=CHECK-EXCLUDE %s @@ -80,7 +80,7 @@ __imp__unexported: # Test that libraries included with -wholearchive: are autoexported, even if # they are in a library that otherwise normally would be excluded. -# RUN: lld-link -out:%t.dll -dll -entry:DllMainCRTStartup %t.main.obj -lldmingw %T/libs/crt2.o -wholearchive:%T/libs/libmingwex.a -output-def:%t.def +# RUN: lld-link -safeseh:no -out:%t.dll -dll -entry:DllMainCRTStartup %t.main.obj -lldmingw %T/libs/crt2.o -wholearchive:%T/libs/libmingwex.a -output-def:%t.def # RUN: echo "EOF" >> %t.def # RUN: cat %t.def | FileCheck -check-prefix=CHECK-WHOLEARCHIVE %s diff --git a/lld/test/COFF/export-stdcall.s b/lld/test/COFF/export-stdcall.s index 6ed3e88032435..aa39eaecf6b93 100644 --- a/lld/test/COFF/export-stdcall.s +++ b/lld/test/COFF/export-stdcall.s @@ -1,6 +1,6 @@ # REQUIRES: x86 # RUN: llvm-mc -triple i686-windows-msvc %s -o %t.obj -filetype=obj -# RUN: lld-link %t.obj -out:%t.dll -dll -nodefaultlib -noentry -export:foo_std=bar_std -export:foo_fast=bar_fast +# RUN: lld-link -safeseh:no %t.obj -out:%t.dll -dll -nodefaultlib -noentry -export:foo_std=bar_std -export:foo_fast=bar_fast # RUN: llvm-nm %t.lib | FileCheck %s # MSVC fudges the lookup of 'bar' to allow it to find the stdcall function diff --git a/lld/test/COFF/export32.test b/lld/test/COFF/export32.test index 250c305d4d2e6..1251d43aacbc7 100644 --- a/lld/test/COFF/export32.test +++ b/lld/test/COFF/export32.test @@ -1,9 +1,9 @@ # RUN: yaml2obj < %s > %t.obj # -# RUN: lld-link /out:%t.dll /dll %t.obj /export:exportfn1 /export:exportfn2 +# RUN: lld-link -safeseh:no /out:%t.dll /dll %t.obj /export:exportfn1 /export:exportfn2 # RUN: llvm-objdump -p %t.dll | FileCheck -check-prefix=CHECK1 %s # -# RUN: lld-link /out:%t.dll /dll %t.obj /export:exportfn1 /export:exportfn2 /merge:.edata=.rdata +# RUN: lld-link -safeseh:no /out:%t.dll /dll %t.obj /export:exportfn1 /export:exportfn2 /merge:.edata=.rdata # RUN: llvm-objdump -p %t.dll | FileCheck -check-prefix=CHECK1 %s # RUN: llvm-readobj --file-headers --sections %t.dll | FileCheck -check-prefix=HEADER-MERGE %s @@ -20,7 +20,7 @@ # HEADER-MERGE-NEXT: VirtualSize: 0x7E # HEADER-MERGE-NEXT: VirtualAddress: 0x2000 -# RUN: lld-link /out:%t.dll /dll %t.obj /export:exportfn1,@5 \ +# RUN: lld-link -safeseh:no /out:%t.dll /dll %t.obj /export:exportfn1,@5 \ # RUN: /export:exportfn2 /export:mangled # RUN: llvm-objdump -p %t.dll | FileCheck -check-prefix=CHECK2 %s @@ -37,7 +37,7 @@ # CHECK2-NEXT: 7 0x1010 exportfn3 # CHECK2-NEXT: 8 0x1010 mangled -# RUN: lld-link /out:%t.dll /dll %t.obj /export:exportfn1,@5,noname /export:exportfn2 +# RUN: lld-link -safeseh:no /out:%t.dll /dll %t.obj /export:exportfn1,@5,noname /export:exportfn2 # RUN: llvm-objdump -p %t.dll | FileCheck -check-prefix=CHECK3 %s # CHECK3: Export Table: @@ -51,7 +51,7 @@ # CHECK3-NEXT: 5 0x1008 # CHECK3-NEXT: 6 0x1010 exportfn2 -# RUN: lld-link /out:%t.dll /dll %t.obj /export:f1=exportfn1 /export:f2=exportfn2 +# RUN: lld-link -safeseh:no /out:%t.dll /dll %t.obj /export:f1=exportfn1 /export:f2=exportfn2 # RUN: llvm-objdump -p %t.dll | FileCheck -check-prefix=CHECK4 %s # CHECK4: Export Table: @@ -64,12 +64,12 @@ # RUN: echo "EXPORTS exportfn1 @3" > %t.def # RUN: echo "fn2=exportfn2 @2" >> %t.def -# RUN: lld-link /out:%t.dll /dll %t.obj /def:%t.def +# RUN: lld-link -safeseh:no /out:%t.dll /dll %t.obj /def:%t.def # RUN: llvm-objdump -p %t.dll | FileCheck -check-prefix=CHECK5 %s # RUN: echo "EXPORTS exportfn1 @ 3" > %t.def # RUN: echo "fn2=exportfn2 @ 2" >> %t.def -# RUN: lld-link /out:%t.dll /dll %t.obj /def:%t.def +# RUN: lld-link -safeseh:no /out:%t.dll /dll %t.obj /def:%t.def # RUN: llvm-objdump -p %t.dll | FileCheck -check-prefix=CHECK5 %s # CHECK5: Export Table: @@ -81,14 +81,14 @@ # CHECK5-NEXT: 3 0x1008 exportfn1 # CHECK5-NEXT: 4 0x1010 exportfn3 -# RUN: lld-link /out:%t.dll /dll %t.obj /export:exportfn1 /export:exportfn2 \ +# RUN: lld-link -safeseh:no /out:%t.dll /dll %t.obj /export:exportfn1 /export:exportfn2 \ # RUN: /export:exportfn1 /export:exportfn2,@5 >& %t.log # RUN: FileCheck -check-prefix=CHECK6 %s < %t.log # CHECK6: duplicate /export option: _exportfn2 # CHECK6-NOT: duplicate /export option: _exportfn1 -# RUN: lld-link /out:%t.dll /dll %t.obj /export:foo=mangled +# RUN: lld-link -safeseh:no /out:%t.dll /dll %t.obj /export:foo=mangled # RUN: llvm-objdump -p %t.dll | FileCheck -check-prefix=CHECK7 %s # CHECK7: Export Table: diff --git a/lld/test/COFF/fixed.test b/lld/test/COFF/fixed.test index 7a5d9e6ea04db..e162570dfc765 100644 --- a/lld/test/COFF/fixed.test +++ b/lld/test/COFF/fixed.test @@ -1,21 +1,21 @@ # REQUIRES: x86 # RUN: yaml2obj < %p/Inputs/hello32.yaml > %t.obj # -# RUN: lld-link %t.obj /fixed %p/Inputs/std32.lib /subsystem:console \ +# RUN: lld-link -safeseh:no %t.obj /fixed %p/Inputs/std32.lib /subsystem:console \ # RUN: /entry:main@0 /debug /out:%t.fixed.exe # RUN: llvm-readobj --file-headers %t.fixed.exe | \ # RUN: FileCheck -check-prefix=EXEFIXED %s # -# RUN: lld-link %t.obj %p/Inputs/std32.lib /subsystem:console \ +# RUN: lld-link -safeseh:no %t.obj %p/Inputs/std32.lib /subsystem:console \ # RUN: /entry:main@0 /debug /out:%t.exe # RUN: llvm-readobj --file-headers %t.exe | FileCheck -check-prefix=EXEREL %s # # RUN: yaml2obj < %p/Inputs/export.yaml > %t.obj # -# RUN: lld-link %t.obj /dll /fixed /debug /out:%t.fixed.dll +# RUN: lld-link -safeseh:no %t.obj /dll /fixed /debug /out:%t.fixed.dll # RUN: llvm-readobj --file-headers %t.fixed.dll | FileCheck -check-prefix=DLLFIXED %s # -# RUN: lld-link %t.obj /dll /debug /out:%t.dll +# RUN: lld-link -safeseh:no %t.obj /dll /debug /out:%t.dll # RUN: llvm-readobj --file-headers %t.dll | FileCheck -check-prefix=DLLREL %s EXEFIXED-NOT: IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE diff --git a/lld/test/COFF/gfids-relocations32.s b/lld/test/COFF/gfids-relocations32.s index 2b0e425a1c540..6c51f7aca8207 100644 --- a/lld/test/COFF/gfids-relocations32.s +++ b/lld/test/COFF/gfids-relocations32.s @@ -1,6 +1,6 @@ # REQUIRES: x86 # RUN: llvm-mc -triple i686-pc-win32 %s -filetype=obj -o %t.obj -# RUN: lld-link %t.obj -guard:cf -out:%t.exe -entry:main +# RUN: lld-link -safeseh:no %t.obj -guard:cf -out:%t.exe -entry:main # RUN: llvm-readobj --coff-load-config %t.exe | FileCheck %s --check-prefix=CHECK # Only f and _main should go in the table. diff --git a/lld/test/COFF/hello32.test b/lld/test/COFF/hello32.test index b53264ac6af81..61418d3d24bb5 100644 --- a/lld/test/COFF/hello32.test +++ b/lld/test/COFF/hello32.test @@ -1,5 +1,5 @@ # RUN: yaml2obj < %p/Inputs/hello32.yaml > %t.obj -# RUN: lld-link %t.obj %p/Inputs/std32.lib /subsystem:console \ +# RUN: lld-link -safeseh:no %t.obj %p/Inputs/std32.lib /subsystem:console \ # RUN: /entry:main@0 /out:%t.exe /appcontainer # RUN: llvm-readobj --file-headers %t.exe | FileCheck -check-prefix=HEADER %s # RUN: llvm-readobj --coff-imports %t.exe | FileCheck -check-prefix=IMPORTS %s @@ -42,10 +42,9 @@ HEADER-NEXT: MinorSubsystemVersion: 0 HEADER-NEXT: SizeOfImage: 20480 HEADER-NEXT: SizeOfHeaders: 1024 HEADER-NEXT: Subsystem: IMAGE_SUBSYSTEM_WINDOWS_CUI (0x3) -HEADER-NEXT: Characteristics [ (0x9540) +HEADER-NEXT: Characteristics [ (0x9140) HEADER-NEXT: IMAGE_DLL_CHARACTERISTICS_APPCONTAINER (0x1000) HEADER-NEXT: IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE (0x40) -HEADER-NEXT: IMAGE_DLL_CHARACTERISTICS_NO_SEH (0x400) HEADER-NEXT: IMAGE_DLL_CHARACTERISTICS_NX_COMPAT (0x100) HEADER-NEXT: IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE (0x8000) HEADER-NEXT: ] diff --git a/lld/test/COFF/largeaddressaware.test b/lld/test/COFF/largeaddressaware.test index 4c2ae56cc5abc..ddd37131a65db 100644 --- a/lld/test/COFF/largeaddressaware.test +++ b/lld/test/COFF/largeaddressaware.test @@ -1,5 +1,5 @@ # RUN: yaml2obj < %p/Inputs/hello32.yaml > %t.obj -# RUN: lld-link %t.obj %p/Inputs/std32.lib /subsystem:console \ +# RUN: lld-link -safeseh:no %t.obj %p/Inputs/std32.lib /subsystem:console \ # RUN: /entry:main@0 /out:%t.exe /largeaddressaware # RUN: llvm-readobj --file-headers %t.exe | FileCheck -check-prefix=HEADER %s diff --git a/lld/test/COFF/loadcfg32.test b/lld/test/COFF/loadcfg32.test index e211c95e7db09..dffdab8b146e6 100644 --- a/lld/test/COFF/loadcfg32.test +++ b/lld/test/COFF/loadcfg32.test @@ -1,5 +1,5 @@ # RUN: yaml2obj < %s > %t.obj -# RUN: lld-link /out:%t.exe %t.obj /entry:main /subsystem:console +# RUN: lld-link -safeseh:no /out:%t.exe %t.obj /entry:main /subsystem:console # RUN: llvm-readobj --file-headers %t.exe | FileCheck %s # CHECK: LoadConfigTableRVA: 0x2000 diff --git a/lld/test/COFF/locally-imported32.test b/lld/test/COFF/locally-imported32.test index 789c8c8f8094d..993099d423a5c 100644 --- a/lld/test/COFF/locally-imported32.test +++ b/lld/test/COFF/locally-imported32.test @@ -1,5 +1,5 @@ # RUN: yaml2obj < %s > %t.obj -# RUN: lld-link /out:%t.exe /entry:main %t.obj +# RUN: lld-link -safeseh:no /out:%t.exe /entry:main %t.obj # RUN: llvm-objdump -s %t.exe | FileCheck %s # CHECK: Contents of section .text: diff --git a/lld/test/COFF/machine.test b/lld/test/COFF/machine.test index 2ac276f1ba7cd..921b7e3931493 100644 --- a/lld/test/COFF/machine.test +++ b/lld/test/COFF/machine.test @@ -1,16 +1,16 @@ # RUN: yaml2obj %p/Inputs/machine-x64.yaml > %t.obj -# RUN: lld-link /entry:main /subsystem:console /out:%t.exe %t.obj +# RUN: lld-link -safeseh:no /entry:main /subsystem:console /out:%t.exe %t.obj # RUN: llvm-readobj --file-headers %t.exe | FileCheck -check-prefix=AMD64 %s -# RUN: lld-link /entry:main /subsystem:console /machine:x64 \ +# RUN: lld-link -safeseh:no /entry:main /subsystem:console /machine:x64 \ # RUN: /out:%t.exe %t.obj # RUN: llvm-readobj --file-headers %t.exe | FileCheck -check-prefix=AMD64 %s AMD64: Machine: IMAGE_FILE_MACHINE_AMD64 # RUN: yaml2obj %p/Inputs/machine-x86.yaml > %t.obj -# RUN: lld-link /entry:main /subsystem:console /out:%t.exe %t.obj +# RUN: lld-link -safeseh:no /entry:main /subsystem:console /out:%t.exe %t.obj # RUN: llvm-readobj --file-headers %t.exe | FileCheck -check-prefix=I386 %s -# RUN: lld-link /entry:main /subsystem:console /machine:x86 \ +# RUN: lld-link -safeseh:no /entry:main /subsystem:console /machine:x86 \ # RUN: /out:%t.exe %t.obj /fixed # RUN: llvm-readobj --file-headers %t.exe | FileCheck -check-prefix=I386 %s diff --git a/lld/test/COFF/no-ipi-stream.test b/lld/test/COFF/no-ipi-stream.test index 246c35907058f..e9b2a31bc0a7c 100644 --- a/lld/test/COFF/no-ipi-stream.test +++ b/lld/test/COFF/no-ipi-stream.test @@ -1,4 +1,4 @@ # RUN: rm -rf %t && mkdir %t # RUN: yaml2obj < %p/Inputs/no-ipi-stream-obj.obj.yaml > %t/no-ipi-stream-obj.obj # RUN: llvm-pdbutil yaml2pdb %p/Inputs/no-ipi-stream-pdb.pdb.yaml -pdb=%t/no-ipi-stream-pdb.pdb -# RUN: lld-link /dll /noentry /debug %t/no-ipi-stream-obj.obj +# RUN: lld-link -safeseh:no /dll /noentry /debug %t/no-ipi-stream-obj.obj diff --git a/lld/test/COFF/order-i386.test b/lld/test/COFF/order-i386.test index 4cde5fa813f74..acd9ad2aa2c95 100644 --- a/lld/test/COFF/order-i386.test +++ b/lld/test/COFF/order-i386.test @@ -3,12 +3,12 @@ # RUN: echo fn1 > %t.order # RUN: echo fn2 >> %t.order -# RUN: lld-link -entry:fn1 -subsystem:console -opt:noref %t.obj \ +# RUN: lld-link -safeseh:no -entry:fn1 -subsystem:console -opt:noref %t.obj \ # RUN: -lldmap:- -out:%t.exe -order:@%t.order | FileCheck %s # CHECK: fn1 # CHECK: fn2 -# RUN: lld-link -entry:fn1 -subsystem:console -opt:noref %t.obj \ +# RUN: lld-link -safeseh:no -entry:fn1 -subsystem:console -opt:noref %t.obj \ # RUN: -lldmap:- -out:%t.exe | FileCheck -check-prefix=DEFAULT %s # DEFAULT: fn2 # DEFAULT: fn1 diff --git a/lld/test/COFF/pdb-debug-f.s b/lld/test/COFF/pdb-debug-f.s index 624c1192914aa..ccc34558c832b 100644 --- a/lld/test/COFF/pdb-debug-f.s +++ b/lld/test/COFF/pdb-debug-f.s @@ -1,6 +1,6 @@ # REQUIRES: x86 # RUN: llvm-mc -triple=i386-pc-win32 -filetype=obj -o %t.obj %s -# RUN: lld-link /subsystem:console /debug /nodefaultlib /entry:foo /out:%t.exe /pdb:%t.pdb %t.obj +# RUN: lld-link -safeseh:no /subsystem:console /debug /nodefaultlib /entry:foo /out:%t.exe /pdb:%t.pdb %t.obj # RUN: llvm-pdbutil dump -fpo %t.pdb | FileCheck %s # CHECK: Old FPO Data diff --git a/lld/test/COFF/pdb-lib.s b/lld/test/COFF/pdb-lib.s index dacf5f27a319b..09f1892069a45 100644 --- a/lld/test/COFF/pdb-lib.s +++ b/lld/test/COFF/pdb-lib.s @@ -3,7 +3,7 @@ # RUN: llvm-mc -filetype=obj -triple=i686-windows-msvc %s -o foo.obj # RUN: llc %S/Inputs/bar.ll -filetype=obj -mtriple=i686-windows-msvc -o bar.obj # RUN: llvm-lib bar.obj -out:bar.lib -# RUN: lld-link -debug -pdb:foo.pdb foo.obj bar.lib -out:foo.exe -entry:main +# RUN: lld-link -safeseh:no -debug -pdb:foo.pdb foo.obj bar.lib -out:foo.exe -entry:main # RUN: llvm-pdbutil dump -modules %t/foo.pdb | FileCheck %s # Make sure that the PDB has module descriptors. foo.obj and bar.lib should be diff --git a/lld/test/COFF/pdb-safeseh.yaml b/lld/test/COFF/pdb-safeseh.yaml index 27948e38d3d3a..cc7ddb19a49c6 100644 --- a/lld/test/COFF/pdb-safeseh.yaml +++ b/lld/test/COFF/pdb-safeseh.yaml @@ -1,5 +1,5 @@ # RUN: yaml2obj %s -o %t.obj -# RUN: lld-link -debug -entry:main -out:%t.exe -pdb:%t.pdb %t.obj +# RUN: lld-link -safeseh:no -debug -entry:main -out:%t.exe -pdb:%t.pdb %t.obj # RUN: llvm-pdbutil dump -globals %t.pdb | FileCheck %s # There is an S_GDATA32 symbol record with .secrel32 and .secidx relocations in diff --git a/lld/test/COFF/pdb-unknown-subsection.s b/lld/test/COFF/pdb-unknown-subsection.s index b64ed0373c633..10ffa46ded3f6 100644 --- a/lld/test/COFF/pdb-unknown-subsection.s +++ b/lld/test/COFF/pdb-unknown-subsection.s @@ -3,7 +3,7 @@ # REQUIRES: x86 # RUN: llvm-mc -triple=i386-pc-win32 -filetype=obj -o %t.obj %s -# RUN: lld-link -subsystem:console -debug -nodefaultlib -entry:foo -out:%t.exe -pdb:%t.pdb %t.obj 2>&1 | FileCheck %s --check-prefix=WARNING +# RUN: lld-link -safeseh:no -subsystem:console -debug -nodefaultlib -entry:foo -out:%t.exe -pdb:%t.pdb %t.obj 2>&1 | FileCheck %s --check-prefix=WARNING # RUN: llvm-pdbutil dump -symbols %t.pdb | FileCheck %s # WARNING-NOT: ignoring unknown diff --git a/lld/test/COFF/reloc-x86.test b/lld/test/COFF/reloc-x86.test index bd500be164a71..99547d1cd1452 100644 --- a/lld/test/COFF/reloc-x86.test +++ b/lld/test/COFF/reloc-x86.test @@ -1,6 +1,6 @@ # REQUIRES: x86 # RUN: yaml2obj < %s > %t.obj -# RUN: lld-link /out:%t.exe /entry:main /base:0x400000 %t.obj +# RUN: lld-link -safeseh:no /out:%t.exe /entry:main /base:0x400000 %t.obj # RUN: llvm-objdump -d %t.exe | FileCheck %s # CHECK: .text: diff --git a/lld/test/COFF/safeseh-no.s b/lld/test/COFF/safeseh-no.s new file mode 100644 index 0000000000000..2a301a3ba9b83 --- /dev/null +++ b/lld/test/COFF/safeseh-no.s @@ -0,0 +1,56 @@ +# RUN: llvm-mc -triple i686-windows-msvc %s -filetype=obj -o %t.obj +# RUN: not lld-link %t.obj -safeseh -out:%t.exe -entry:main 2>&1 | FileCheck %s --check-prefix=ERROR +# safe seh should be on by default. +# RUN: not lld-link %t.obj -out:%t.exe -entry:main 2>&1 | FileCheck %s --check-prefix=ERROR +# RUN: lld-link %t.obj -safeseh:no -out:%t.exe -entry:main +# RUN: llvm-readobj --file-headers --coff-load-config %t.exe | FileCheck %s +# -lldmingw should also turn off safeseh by default. +# RUN: lld-link %t.obj -lldmingw -out:%t.exe -entry:main +# RUN: llvm-readobj --file-headers --coff-load-config %t.exe | FileCheck %s + +# ERROR: /safeseh: {{.*}}safeseh-no.s.tmp.obj is not compatible with SEH + +# CHECK: Characteristics [ +# CHECK-NOT: IMAGE_DLL_CHARACTERISTICS_NO_SEH +# CHECK: ] +# CHECK: LoadConfig [ +# CHECK: Size: 0x48 +# CHECK: SEHandlerTable: 0x0 +# CHECK: SEHandlerCount: 0 +# CHECK: ] +# CHECK-NOT: SEHTable + + +# Explicitly mark the object as not having safeseh. LLD should error unless +# -safeseh:no is passed. + .def @feat.00; .scl 3; .type 0; .endef + .globl @feat.00 +@feat.00 = 0 + + .def _main; + .scl 2; + .type 32; + .endef + .section .text,"xr",one_only,_main + .globl _main +_main: + movl $42, %eax + ret + +# Add a handler to create an .sxdata section, which -safeseh:no should ignore. + .def _my_handler; .scl 3; .type 32; + .endef + .section .text,"xr",one_only,_my_handler +_my_handler: + ret +.safeseh _my_handler + + + .section .rdata,"dr" +.globl __load_config_used +__load_config_used: + .long 72 + .fill 60, 1, 0 + .long ___safe_se_handler_table + .long ___safe_se_handler_count + diff --git a/lld/test/COFF/subsystem-drectve.test b/lld/test/COFF/subsystem-drectve.test index 45d48518a99db..68630eb2d82fb 100644 --- a/lld/test/COFF/subsystem-drectve.test +++ b/lld/test/COFF/subsystem-drectve.test @@ -1,5 +1,5 @@ # RUN: yaml2obj < %s > %t.obj -# RUN: lld-link /dll /noentry /out:%t.dll %t.obj +# RUN: lld-link -safeseh:no /dll /noentry /out:%t.dll %t.obj # RUN: llvm-readobj --file-headers %t.dll | FileCheck %s # CHECK: MajorOperatingSystemVersion: 42 diff --git a/lld/test/COFF/subsystem-inference32.test b/lld/test/COFF/subsystem-inference32.test index 23bcf0da6e650..d213550786bd9 100644 --- a/lld/test/COFF/subsystem-inference32.test +++ b/lld/test/COFF/subsystem-inference32.test @@ -1,17 +1,17 @@ # RUN: sed -e s/ENTRYNAME/_main/ %s | yaml2obj > %t.obj -# RUN: lld-link /out:%t.exe %t.obj +# RUN: lld-link -safeseh:no /out:%t.exe %t.obj # RUN: llvm-readobj --file-headers %t.exe | FileCheck -check-prefix=MAIN %s # RUN: sed s/ENTRYNAME/_wmain/ %s | yaml2obj > %t.obj -# RUN: lld-link /out:%t.exe %t.obj +# RUN: lld-link -safeseh:no /out:%t.exe %t.obj # RUN: llvm-readobj --file-headers %t.exe | FileCheck -check-prefix=WMAIN %s # RUN: sed s/ENTRYNAME/_WinMain@16/ %s | yaml2obj > %t.obj -# RUN: lld-link /out:%t.exe %t.obj +# RUN: lld-link -safeseh:no /out:%t.exe %t.obj # RUN: llvm-readobj --file-headers %t.exe | FileCheck -check-prefix=WINMAIN %s # RUN: sed s/ENTRYNAME/_wWinMain@16/ %s | yaml2obj > %t.obj -# RUN: lld-link /out:%t.exe %t.obj +# RUN: lld-link -safeseh:no /out:%t.exe %t.obj # RUN: llvm-readobj --file-headers %t.exe | FileCheck -check-prefix=WWINMAIN %s # MAIN: Subsystem: IMAGE_SUBSYSTEM_WINDOWS_CUI diff --git a/lld/test/COFF/tls32.test b/lld/test/COFF/tls32.test index f3db2615ea47b..462cec06b2097 100644 --- a/lld/test/COFF/tls32.test +++ b/lld/test/COFF/tls32.test @@ -1,5 +1,5 @@ # RUN: yaml2obj < %s > %t.obj -# RUN: lld-link /out:%t.exe /entry:main %t.obj +# RUN: lld-link -safeseh:no /out:%t.exe /entry:main %t.obj # RUN: llvm-readobj --file-headers %t.exe | FileCheck %s # CHECK: TLSTableRVA: 0x1000 From 8f8d07e93bf891bf67329efb8f8d8609bf77f1c0 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 16 Jul 2019 18:21:25 +0000 Subject: [PATCH 265/451] AMDGPU: Replace store PatFrags Convert the easy cases to formats understood for GlobalISel. llvm-svn: 366240 --- llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 44 ++++++++++++++------ llvm/lib/Target/AMDGPU/FLATInstructions.td | 4 +- 2 files changed, 34 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index d470b3cd51486..61bc415c839da 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -467,25 +467,48 @@ def atomic_load_64_#as : PatFrag<(ops node:$ptr), (atomic_load_64 node:$ptr)> { let MemoryVT = i64; } +def store_#as : PatFrag<(ops node:$val, node:$ptr), + (unindexedstore node:$val, node:$ptr)> { + let IsStore = 1; + let IsTruncStore = 0; +} + +// truncstore fragments. +def truncstore_#as : PatFrag<(ops node:$val, node:$ptr), + (unindexedstore node:$val, node:$ptr)> { + let IsStore = 1; + let IsTruncStore = 1; +} + +// TODO: We don't really need the truncstore here. We can use +// unindexedstore with MemoryVT directly, which will save an +// unnecessary check that the memory size is less than the value type +// in the generated matcher table. +def truncstorei8_#as : PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr)> { + let IsStore = 1; + let MemoryVT = i8; +} + +def truncstorei16_#as : PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr)> { + let IsStore = 1; + let MemoryVT = i16; +} + +defm atomic_store_#as : binary_atomic_op; + } // End let AddressSpaces = ... } // End foreach AddrSpace -def store_private : PrivateStore ; -def truncstorei8_private : PrivateStore; -def truncstorei16_private : PrivateStore ; + def store_hi16_private : StoreHi16 , PrivateAddress; def truncstorei8_hi16_private : StoreHi16, PrivateAddress; -def store_global : GlobalStore ; -def truncstorei8_global : GlobalStore ; -def truncstorei16_global : GlobalStore ; def store_atomic_global : GlobalStore; def truncstorei8_hi16_global : StoreHi16 , GlobalAddress; def truncstorei16_hi16_global : StoreHi16 , GlobalAddress; -def store_local : LocalStore ; -def truncstorei8_local : LocalStore ; -def truncstorei16_local : LocalStore ; def store_local_hi16 : StoreHi16 , LocalAddress; def truncstorei8_local_hi16 : StoreHi16, LocalAddress; def atomic_store_local : LocalStore ; @@ -506,9 +529,6 @@ def store_align16_local : Aligned16Bytes < (ops node:$val, node:$ptr), (store_local node:$val, node:$ptr) >; -def store_flat : FlatStore ; -def truncstorei8_flat : FlatStore ; -def truncstorei16_flat : FlatStore ; def atomic_store_flat : FlatStore ; def truncstorei8_hi16_flat : StoreHi16, FlatStoreAddress; def truncstorei16_hi16_flat : StoreHi16, FlatStoreAddress; diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 9d541560613cc..8ddf4e2aa2b26 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -792,8 +792,8 @@ def : FlatStorePat ; def : FlatStorePat ; def : FlatStorePat ; -def : FlatStoreAtomicPat ; -def : FlatStoreAtomicPat ; +def : FlatStoreAtomicPat ; +def : FlatStoreAtomicPat ; def : FlatAtomicPat ; def : FlatAtomicPat ; From 6e1c3bb181b754f92501ee85f157345e25769317 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 16 Jul 2019 18:23:49 +0000 Subject: [PATCH 266/451] [IndVars] Speculative fix for an assertion failure seen in bots I don't have an IR sample which is actually failing, but the issue described in the comment is theoretically possible, and should be guarded against even if there's a different root cause for the bot failures. llvm-svn: 366241 --- llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index 70508bf752580..f9fc698a4a9bc 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -2810,7 +2810,12 @@ bool IndVarSimplify::run(Loop *L) { if (isa(ExitCount)) continue; - assert(!ExitCount->isZero() && "Should have been folded above"); + // This was handled above, but as we form SCEVs, we can sometimes refine + // existing ones; this allows exit counts to be folded to zero which + // weren't when optimizeLoopExits saw them. Arguably, we should iterate + // until stable to handle cases like this better. + if (ExitCount->isZero()) + continue; PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT); if (!IndVar) From 7eb1902cd54d5715a3e3c096d9624bda749d26a5 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 16 Jul 2019 18:26:42 +0000 Subject: [PATCH 267/451] AMDGPU: Add register classes to flat store patterns For some reason GlobalISelEmitter needs register classes to import these, although it works for the load patterns. llvm-svn: 366242 --- llvm/lib/Target/AMDGPU/FLATInstructions.td | 50 +++++++++++----------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 8ddf4e2aa2b26..889f60dae9204 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -705,47 +705,47 @@ class FlatLoadPat : GCN >; class FlatLoadPat_D16 : GCNPat < - (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc), vt:$in), + (node (FLATOffset (i64 VReg_64:$vaddr), i16:$offset, i1:$slc), vt:$in), (inst $vaddr, $offset, 0, 0, $slc, $in) >; class FlatSignedLoadPat_D16 : GCNPat < - (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc), vt:$in), + (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset, i1:$slc), vt:$in), (inst $vaddr, $offset, 0, 0, $slc, $in) >; class FlatLoadAtomicPat : GCNPat < - (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc))), + (vt (node (FLATAtomic (i64 VReg_64:$vaddr), i16:$offset, i1:$slc))), (inst $vaddr, $offset, 0, 0, $slc) >; class FlatLoadSignedPat : GCNPat < - (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc))), + (vt (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset, i1:$slc))), (inst $vaddr, $offset, 0, 0, $slc) >; -class FlatStorePat : GCNPat < +class FlatStorePat : GCNPat < (node vt:$data, (FLATOffset i64:$vaddr, i16:$offset, i1:$slc)), - (inst $vaddr, $data, $offset, 0, 0, $slc) + (inst $vaddr, rc:$data, $offset, 0, 0, $slc) >; -class FlatStoreSignedPat : GCNPat < +class FlatStoreSignedPat : GCNPat < (node vt:$data, (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc)), - (inst $vaddr, $data, $offset, 0, 0, $slc) + (inst $vaddr, rc:$data, $offset, 0, 0, $slc) >; -class FlatStoreAtomicPat : GCNPat < +class FlatStoreAtomicPat : GCNPat < // atomic store follows atomic binop convention so the address comes // first. (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data), - (inst $vaddr, $data, $offset, 0, 0, $slc) + (inst $vaddr, rc:$data, $offset, 0, 0, $slc) >; -class FlatStoreSignedAtomicPat : GCNPat < +class FlatStoreSignedAtomicPat : GCNPat < // atomic store follows atomic binop convention so the address comes // first. (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data), - (inst $vaddr, $data, $offset, 0, 0, $slc) + (inst $vaddr, rc:$data, $offset, 0, 0, $slc) >; class FlatAtomicPat ; def : FlatStorePat ; def : FlatStorePat ; def : FlatStorePat ; -def : FlatStorePat ; -def : FlatStorePat ; -def : FlatStorePat ; +def : FlatStorePat ; +def : FlatStorePat ; +def : FlatStorePat ; def : FlatStoreAtomicPat ; -def : FlatStoreAtomicPat ; +def : FlatStoreAtomicPat ; def : FlatAtomicPat ; def : FlatAtomicPat ; @@ -871,14 +871,14 @@ def : FlatLoadSignedPat ; def : FlatLoadAtomicPat ; def : FlatLoadAtomicPat ; -def : FlatStoreSignedPat ; -def : FlatStoreSignedPat ; -def : FlatStoreSignedPat ; -def : FlatStoreSignedPat ; -def : FlatStoreSignedPat ; -def : FlatStoreSignedPat ; -def : FlatStoreSignedPat ; -def : FlatStoreSignedPat ; +def : FlatStoreSignedPat ; +def : FlatStoreSignedPat ; +def : FlatStoreSignedPat ; +def : FlatStoreSignedPat ; +def : FlatStoreSignedPat ; +def : FlatStoreSignedPat ; +def : FlatStoreSignedPat ; +def : FlatStoreSignedPat ; let OtherPredicates = [D16PreservesUnusedBits] in { def : FlatStoreSignedPat ; @@ -900,7 +900,7 @@ def : FlatSignedLoadPat_D16 ; } def : FlatStoreSignedAtomicPat ; -def : FlatStoreSignedAtomicPat ; +def : FlatStoreSignedAtomicPat ; def : FlatSignedAtomicPat ; def : FlatSignedAtomicPat ; From 5826ab6b0c9cf7e0dbafc164c4cca1404c29ed09 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 16 Jul 2019 18:27:12 +0000 Subject: [PATCH 268/451] [CMake] Fail when Python interpreter doesn't match Python libraries version Because of how CMake finds the Python libraries and interpreter, it's possible to end up with a discrepancy between the two. For example, you'd end up using a Python 3 interpreter to run the test suite while LLDB was built and linked against Python 2. This patch adds a fatal error to CMake so we find out at configuration time, instead of finding out at test time. Differential revision: https://reviews.llvm.org/D64812 llvm-svn: 366243 --- lldb/cmake/modules/LLDBConfig.cmake | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/lldb/cmake/modules/LLDBConfig.cmake b/lldb/cmake/modules/LLDBConfig.cmake index ef9356591c301..26a1c7a72cc08 100644 --- a/lldb/cmake/modules/LLDBConfig.cmake +++ b/lldb/cmake/modules/LLDBConfig.cmake @@ -185,7 +185,6 @@ function(find_python_libs_windows) endfunction(find_python_libs_windows) if (NOT LLDB_DISABLE_PYTHON) - if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Windows") find_python_libs_windows() @@ -194,8 +193,12 @@ if (NOT LLDB_DISABLE_PYTHON) add_definitions( -DLLDB_PYTHON_HOME="${LLDB_PYTHON_HOME}" ) endif() else() - find_package(PythonInterp) - find_package(PythonLibs) + find_package(PythonInterp REQUIRED) + find_package(PythonLibs REQUIRED) + endif() + + if (NOT PYTHON_VERSION_STRING VERSION_EQUAL PYTHONLIBS_VERSION_STRING) + message(FATAL_ERROR "Found incompatible Python interpreter (${PYTHON_VERSION_STRING}) and Python libraries (${PYTHONLIBS_VERSION_STRING})") endif() if (PYTHON_INCLUDE_DIR) From 4b6f69fe906e173603f1613bd84450486f63b3ee Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Tue, 16 Jul 2019 18:33:13 +0000 Subject: [PATCH 269/451] Fix linkrepro.test after safeseh:no change Add the @feat.00 flag to the input. llvm-svn: 366244 --- lld/test/COFF/Inputs/hello32.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lld/test/COFF/Inputs/hello32.yaml b/lld/test/COFF/Inputs/hello32.yaml index 09e76f144532b..c01c4c6ba56bb 100644 --- a/lld/test/COFF/Inputs/hello32.yaml +++ b/lld/test/COFF/Inputs/hello32.yaml @@ -79,4 +79,10 @@ symbols: SimpleType: IMAGE_SYM_TYPE_NULL ComplexType: IMAGE_SYM_DTYPE_FUNCTION StorageClass: IMAGE_SYM_CLASS_EXTERNAL + - Name: '@feat.00' + Value: 1 + SectionNumber: -1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC ... From 11dc3d371124f329762c0f2d9a75a5a82bd00b1a Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Tue, 16 Jul 2019 18:34:46 +0000 Subject: [PATCH 270/451] Mark new test as requiring an x86 backend for LTO native object generation llvm-svn: 366245 --- lld/test/COFF/undefined-symbol-lto.test | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lld/test/COFF/undefined-symbol-lto.test b/lld/test/COFF/undefined-symbol-lto.test index 700ec650857ca..41957168baf60 100644 --- a/lld/test/COFF/undefined-symbol-lto.test +++ b/lld/test/COFF/undefined-symbol-lto.test @@ -1,3 +1,5 @@ +REQUIRES: x86 + RUN: rm -rf %t && mkdir -p %t && cd %t RUN: llvm-as %S/Inputs/undefined-symbol-lto-a.ll -o t.obj RUN: llvm-as %S/Inputs/undefined-symbol-lto-b.ll -o b.obj From dad1f89210bff2a45e23fb2a6c31ffb247450b23 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 16 Jul 2019 18:42:53 +0000 Subject: [PATCH 271/451] AMDGPU/GlobalISel: Select flat stores llvm-svn: 366246 --- .../AMDGPU/AMDGPUInstructionSelector.cpp | 6 +- .../AMDGPU/GlobalISel/inst-select-copy.mir | 16 +- .../GlobalISel/inst-select-implicit-def.mir | 10 +- .../GlobalISel/inst-select-store-flat.mir | 837 +++++++++++++++++- .../GlobalISel/inst-select-store-global.mir | 817 +++++++++++++++++ .../GlobalISel/llvm.amdgcn.end.cf.i32.ll | 8 +- .../GlobalISel/llvm.amdgcn.end.cf.i64.ll | 6 +- .../GlobalISel/llvm.amdgcn.if.break.i32.ll | 4 +- 8 files changed, 1650 insertions(+), 54 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index f8f89593d0805..25e72bbe75abf 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -856,7 +856,7 @@ bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const { unsigned StoreSize = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI); unsigned Opcode; - // FIXME: Select store instruction based on address space + // FIXME: Remove this when integers > s32 naturally selected. switch (StoreSize) { default: return false; @@ -1363,6 +1363,8 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I, case TargetOpcode::G_SELECT: return selectG_SELECT(I); case TargetOpcode::G_STORE: + if (selectImpl(I, CoverageInfo)) + return true; return selectG_STORE(I); case TargetOpcode::G_TRUNC: return selectG_TRUNC(I); @@ -1545,7 +1547,7 @@ AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const { }}; } - template +template InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root) const { MachineInstr *MI = Root.getParent(); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir index 2f2ad31cd0ad7..558f672c2089c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy.mir @@ -17,13 +17,13 @@ body: | ; WAVE64: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3 ; WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]] ; WAVE64: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; WAVE64: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; WAVE64: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) ; WAVE32-LABEL: name: copy ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3 ; WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]] ; WAVE32: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; WAVE32: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; WAVE32: GLOBAL_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) %0:sgpr(p1) = COPY $sgpr2_sgpr3 %1:vgpr(p1) = COPY %0 %2:vgpr(s32) = G_IMPLICIT_DEF @@ -46,7 +46,7 @@ body: | ; WAVE64: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec ; WAVE64: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec - ; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) ; WAVE32-LABEL: name: copy_vcc_bank_scc_bank ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -55,7 +55,7 @@ body: | ; WAVE32: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec ; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec - ; WAVE32: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; WAVE32: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 @@ -83,7 +83,7 @@ body: | ; WAVE64: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec ; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec ; WAVE64: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_1]], implicit $exec - ; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) ; WAVE32-LABEL: name: copy_vcc_bank_scc_bank_2_uses ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -94,7 +94,7 @@ body: | ; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[COPY4]], implicit $exec ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec ; WAVE32: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec - ; WAVE32: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; WAVE32: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 @@ -122,7 +122,7 @@ body: | ; WAVE64: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; WAVE64: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY $scc ; WAVE64: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[COPY3]], implicit $exec - ; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) ; WAVE32-LABEL: name: copy_vcc_bank_scc_physreg ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 @@ -130,7 +130,7 @@ body: | ; WAVE32: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; WAVE32: [[COPY3:%[0-9]+]]:sreg_32_xm0_xexec = COPY $scc ; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[COPY3]], implicit $exec - ; WAVE32: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; WAVE32: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir index 43bd32644ff70..5e14f7e50083f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-implicit-def.mir @@ -104,7 +104,7 @@ body: | ; GCN-LABEL: name: implicit_def_p1_vgpr ; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec - ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) %0:vgpr(p1) = G_IMPLICIT_DEF %1:vgpr(s32) = G_CONSTANT i32 4 G_STORE %1, %0 :: (store 4, addrspace 1) @@ -119,9 +119,9 @@ regBankSelected: true body: | bb.0: ; GCN-LABEL: name: implicit_def_p3_vgpr - ; GCN: [[DEF:%[0-9]+]]:vgpr(p3) = G_IMPLICIT_DEF - ; GCN: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 4 - ; GCN: G_STORE [[C]](s32), [[DEF]](p3) :: (store 4, addrspace 1) + ; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec + ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) %0:vgpr(p3) = G_IMPLICIT_DEF %1:vgpr(s32) = G_CONSTANT i32 4 G_STORE %1, %0 :: (store 4, addrspace 1) @@ -138,7 +138,7 @@ body: | ; GCN-LABEL: name: implicit_def_p4_vgpr ; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec - ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) %0:vgpr(p4) = G_IMPLICIT_DEF %1:vgpr(s32) = G_CONSTANT i32 4 G_STORE %1, %0 :: (store 4, addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir index eb8e39cd08df0..f88d8ee615f4e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir @@ -1,42 +1,827 @@ -# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s + +--- + +name: store_flat_s32_to_4 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; GFX7-LABEL: name: store_flat_s32_to_4 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + ; GFX8-LABEL: name: store_flat_s32_to_4 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + ; GFX9-LABEL: name: store_flat_s32_to_4 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + ; GFX10-LABEL: name: store_flat_s32_to_4 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = COPY $vgpr2 + G_STORE %1, %0 :: (store 4, align 4, addrspace 0) ---- | - define amdgpu_kernel void @global_addrspace(i32 addrspace(1)* %global0, - i64 addrspace(1)* %global1, - i96 addrspace(1)* %global2, - i128 addrspace(1)* %global3) { ret void } ... + --- +name: store_flat_s32_to_2 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; GFX7-LABEL: name: store_flat_s32_to_2 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 2) + ; GFX8-LABEL: name: store_flat_s32_to_2 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 2) + ; GFX9-LABEL: name: store_flat_s32_to_2 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 2) + ; GFX10-LABEL: name: store_flat_s32_to_2 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 2) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = COPY $vgpr2 + G_STORE %1, %0 :: (store 2, align 2, addrspace 0) + +... -name: global_addrspace +--- +name: store_flat_s32_to_1 legalized: true +tracksRegLiveness: true regBankSelected: true -# GCN: global_addrspace -# GCN: [[PTR:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 -# GCN: [[VAL4:%[0-9]+]]:vgpr_32 = COPY $vgpr2 -# GCN: [[VAL8:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 -# GCN: [[VAL12:%[0-9]+]]:vreg_96 = COPY $vgpr5_vgpr6_vgpr7 -# GCN: [[VAL16:%[0-9]+]]:vreg_128 = COPY $vgpr8_vgpr9_vgpr10_vgpr11 -# GCN: FLAT_STORE_DWORD [[PTR]], [[VAL4]], 0, 0, 0 -# GCN: FLAT_STORE_DWORDX2 [[PTR]], [[VAL8]], 0, 0, 0 -# GCN: FLAT_STORE_DWORDX3 [[PTR]], [[VAL12]], 0, 0, 0 -# GCN: FLAT_STORE_DWORDX4 [[PTR]], [[VAL16]], 0, 0, 0 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; GFX7-LABEL: name: store_flat_s32_to_1 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 1) + ; GFX8-LABEL: name: store_flat_s32_to_1 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 1) + ; GFX9-LABEL: name: store_flat_s32_to_1 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 1) + ; GFX10-LABEL: name: store_flat_s32_to_1 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 1) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = COPY $vgpr2 + G_STORE %1, %0 :: (store 1, align 1, addrspace 0) + +... + +--- + +name: store_flat_s64 +legalized: true +tracksRegLiveness: true +regBankSelected: true body: | bb.0: - liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3_vgpr4, $vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX7-LABEL: name: store_flat_s64 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX8-LABEL: name: store_flat_s64 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-LABEL: name: store_flat_s64 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX10-LABEL: name: store_flat_s64 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = COPY $vgpr2_vgpr3 + G_STORE %1, %0 :: (store 8, align 8, addrspace 0) + +... +--- + +name: store_flat_s96 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 + + ; GFX7-LABEL: name: store_flat_s96 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 + ; GFX7: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX8-LABEL: name: store_flat_s96 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 + ; GFX8: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-LABEL: name: store_flat_s96 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 + ; GFX9: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX10-LABEL: name: store_flat_s96 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 + ; GFX10: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4 + G_STORE %1, %0 :: (store 12, align 16, addrspace 0) + +... +--- + +name: store_flat_s128 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + + ; GFX7-LABEL: name: store_flat_s128 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX8-LABEL: name: store_flat_s128 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-LABEL: name: store_flat_s128 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX10-LABEL: name: store_flat_s128 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX10: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1, %0 :: (store 16, align 16, addrspace 0) + +... + +--- + +name: store_flat_v2s32 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GFX7-LABEL: name: store_flat_v2s32 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8) + ; GFX8-LABEL: name: store_flat_v2s32 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8) + ; GFX9-LABEL: name: store_flat_v2s32 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8) + ; GFX10-LABEL: name: store_flat_v2s32 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3 + G_STORE %1, %0 :: (store 8, align 8, addrspace 0) + +... +--- + +name: store_flat_v3s32 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 + + ; GFX7-LABEL: name: store_flat_v3s32 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 + ; GFX7: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16) + ; GFX8-LABEL: name: store_flat_v3s32 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 + ; GFX8: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16) + ; GFX9-LABEL: name: store_flat_v3s32 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 + ; GFX9: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16) + ; GFX10-LABEL: name: store_flat_v3s32 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 + ; GFX10: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 + G_STORE %1, %0 :: (store 12, align 16, addrspace 0) + +... +--- + +name: store_flat_v4s32 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + + ; GFX7-LABEL: name: store_flat_v4s32 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16) + ; GFX8-LABEL: name: store_flat_v4s32 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16) + ; GFX9-LABEL: name: store_flat_v4s32 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16) + ; GFX10-LABEL: name: store_flat_v4s32 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX10: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1, %0 :: (store 16, align 16, addrspace 0) + +... + +--- + +name: store_flat_v2s16 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; GFX7-LABEL: name: store_flat_v2s16 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX8-LABEL: name: store_flat_v2s16 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-LABEL: name: store_flat_v2s16 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX10-LABEL: name: store_flat_v2s16 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x s16>) = COPY $vgpr2 + G_STORE %1, %0 :: (store 4, align 4, addrspace 0) + +... + +--- + +name: store_flat_v4s16 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GFX7-LABEL: name: store_flat_v4s16 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX8-LABEL: name: store_flat_v4s16 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-LABEL: name: store_flat_v4s16 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX10-LABEL: name: store_flat_v4s16 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 + G_STORE %1, %0 :: (store 8, align 8, addrspace 0) + +... + +--- + +name: store_flat_v6s16 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 + + ; GFX7-LABEL: name: store_flat_v6s16 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 + ; GFX7: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX8-LABEL: name: store_flat_v6s16 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 + ; GFX8: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-LABEL: name: store_flat_v6s16 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 + ; GFX9: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX10-LABEL: name: store_flat_v6s16 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 + ; GFX10: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4 + G_STORE %1, %0 :: (store 12, align 16, addrspace 0) + +... +--- + +name: store_flat_v8s16 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + + ; GFX7-LABEL: name: store_flat_v8s16 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX8-LABEL: name: store_flat_v8s16 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-LABEL: name: store_flat_v8s16 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX10-LABEL: name: store_flat_v8s16 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX10: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1, %0 :: (store 16, align 16, addrspace 0) + +... + +--- + +name: store_flat_v2s64 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + + ; GFX7-LABEL: name: store_flat_v2s64 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX8-LABEL: name: store_flat_v2s64 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-LABEL: name: store_flat_v2s64 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX10-LABEL: name: store_flat_v2s64 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX10: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1, %0 :: (store 16, align 16, addrspace 0) + +... + +--- + +name: store_flat_p1 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GFX7-LABEL: name: store_flat_p1 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX8-LABEL: name: store_flat_p1 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-LABEL: name: store_flat_p1 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX10-LABEL: name: store_flat_p1 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(p1) = COPY $vgpr2_vgpr3 + G_STORE %1, %0 :: (store 8, align 8, addrspace 0) + +... + +--- + +name: store_flat_v2p1 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + + ; GFX7-LABEL: name: store_flat_v2p1 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX8-LABEL: name: store_flat_v2p1 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-LABEL: name: store_flat_v2p1 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX10-LABEL: name: store_flat_v2p1 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX10: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x p1>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1, %0 :: (store 16, align 16, addrspace 0) + +... + +--- + +name: store_flat_p3 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; GFX7-LABEL: name: store_flat_p3 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX8-LABEL: name: store_flat_p3 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-LABEL: name: store_flat_p3 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX10-LABEL: name: store_flat_p3 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(p3) = COPY $vgpr2 + G_STORE %1, %0 :: (store 4, align 4, addrspace 0) + +... + +--- + +name: store_flat_v2p3 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GFX7-LABEL: name: store_flat_v2p3 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX8-LABEL: name: store_flat_v2p3 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-LABEL: name: store_flat_v2p3 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX10-LABEL: name: store_flat_v2p3 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x p3>) = COPY $vgpr2_vgpr3 + G_STORE %1, %0 :: (store 8, align 8, addrspace 0) + +... +--- + +name: store_atomic_flat_s32 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; GFX7-LABEL: name: store_atomic_flat_s32 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX8-LABEL: name: store_atomic_flat_s32 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-LABEL: name: store_atomic_flat_s32 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX10-LABEL: name: store_atomic_flat_s32 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 - %2:vgpr(s64) = COPY $vgpr3_vgpr4 - %3:vgpr(s96) = COPY $vgpr5_vgpr6_vgpr7 - %4:vgpr(s128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - G_STORE %1, %0 :: (store 4 into %ir.global0) - G_STORE %2, %0 :: (store 8 into %ir.global1) - G_STORE %3, %0 :: (store 12 into %ir.global2, align 16) - G_STORE %4, %0 :: (store 16 into %ir.global3) + G_STORE %1, %0 :: (store monotonic 4, align 4, addrspace 0) ... + --- + +name: store_atomic_flat_s64 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GFX7-LABEL: name: store_atomic_flat_s64 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX8-LABEL: name: store_atomic_flat_s64 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-LABEL: name: store_atomic_flat_s64 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX10-LABEL: name: store_atomic_flat_s64 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = COPY $vgpr2_vgpr3 + G_STORE %1, %0 :: (store monotonic 8, align 8, addrspace 0) + +... + +--- + +name: store_flat_s32_gep_2047 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; GFX7-LABEL: name: store_flat_s32_gep_2047 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + ; GFX8-LABEL: name: store_flat_s32_gep_2047 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + ; GFX9-LABEL: name: store_flat_s32_gep_2047 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + ; GFX10-LABEL: name: store_flat_s32_gep_2047 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX10: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = COPY $vgpr2 + %2:vgpr(s64) = G_CONSTANT i64 2047 + %3:vgpr(p1) = G_GEP %0, %2 + G_STORE %1, %3 :: (store 4, align 4, addrspace 0) + +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir new file mode 100644 index 0000000000000..2154d1cfee8cf --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir @@ -0,0 +1,817 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s + +--- + +name: store_global_s32_to_4 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; GFX7-LABEL: name: store_global_s32_to_4 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) + ; GFX8-LABEL: name: store_global_s32_to_4 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) + ; GFX9-LABEL: name: store_global_s32_to_4 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GFX10-LABEL: name: store_global_s32_to_4 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = COPY $vgpr2 + G_STORE %1, %0 :: (store 4, align 4, addrspace 1) + +... + +--- +name: store_global_s32_to_2 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; GFX7-LABEL: name: store_global_s32_to_2 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 2, addrspace 1) + ; GFX8-LABEL: name: store_global_s32_to_2 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 2, addrspace 1) + ; GFX9-LABEL: name: store_global_s32_to_2 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: GLOBAL_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 1) + ; GFX10-LABEL: name: store_global_s32_to_2 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10: GLOBAL_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 1) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = COPY $vgpr2 + G_STORE %1, %0 :: (store 2, align 2, addrspace 1) + +... + +--- +name: store_global_s32_to_1 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; GFX7-LABEL: name: store_global_s32_to_1 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 1, addrspace 1) + ; GFX8-LABEL: name: store_global_s32_to_1 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 1, addrspace 1) + ; GFX9-LABEL: name: store_global_s32_to_1 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: GLOBAL_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 1) + ; GFX10-LABEL: name: store_global_s32_to_1 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10: GLOBAL_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 1) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = COPY $vgpr2 + G_STORE %1, %0 :: (store 1, align 1, addrspace 1) + +... + +--- + +name: store_global_s64 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GFX7-LABEL: name: store_global_s64 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX8-LABEL: name: store_global_s64 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-LABEL: name: store_global_s64 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX10-LABEL: name: store_global_s64 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = COPY $vgpr2_vgpr3 + G_STORE %1, %0 :: (store 8, align 8, addrspace 1) + +... +--- + +name: store_global_s96 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 + + ; GFX7-LABEL: name: store_global_s96 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 + ; GFX7: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX8-LABEL: name: store_global_s96 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 + ; GFX8: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-LABEL: name: store_global_s96 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 + ; GFX9: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX10-LABEL: name: store_global_s96 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 + ; GFX10: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4 + G_STORE %1, %0 :: (store 12, align 16, addrspace 1) + +... +--- + +name: store_global_s128 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + + ; GFX7-LABEL: name: store_global_s128 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX8-LABEL: name: store_global_s128 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-LABEL: name: store_global_s128 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX10-LABEL: name: store_global_s128 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX10: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1, %0 :: (store 16, align 16, addrspace 1) + +... + +--- + +name: store_global_v2s32 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GFX7-LABEL: name: store_global_v2s32 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1) + ; GFX8-LABEL: name: store_global_v2s32 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1) + ; GFX9-LABEL: name: store_global_v2s32 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1) + ; GFX10-LABEL: name: store_global_v2s32 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3 + G_STORE %1, %0 :: (store 8, align 8, addrspace 1) + +... +--- + +name: store_global_v3s32 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 + + ; GFX7-LABEL: name: store_global_v3s32 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 + ; GFX7: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16, addrspace 1) + ; GFX8-LABEL: name: store_global_v3s32 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 + ; GFX8: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16, addrspace 1) + ; GFX9-LABEL: name: store_global_v3s32 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 + ; GFX9: GLOBAL_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (store 12, align 16, addrspace 1) + ; GFX10-LABEL: name: store_global_v3s32 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 + ; GFX10: GLOBAL_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (store 12, align 16, addrspace 1) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 + G_STORE %1, %0 :: (store 12, align 16, addrspace 1) + +... +--- + +name: store_global_v4s32 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + + ; GFX7-LABEL: name: store_global_v4s32 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1) + ; GFX8-LABEL: name: store_global_v4s32 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1) + ; GFX9-LABEL: name: store_global_v4s32 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + ; GFX10-LABEL: name: store_global_v4s32 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX10: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1, %0 :: (store 16, align 16, addrspace 1) + +... + +--- + +name: store_global_v2s16 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; GFX7-LABEL: name: store_global_v2s16 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX8-LABEL: name: store_global_v2s16 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-LABEL: name: store_global_v2s16 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX10-LABEL: name: store_global_v2s16 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x s16>) = COPY $vgpr2 + G_STORE %1, %0 :: (store 4, align 4, addrspace 1) + +... + +--- + +name: store_global_v4s16 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GFX7-LABEL: name: store_global_v4s16 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX8-LABEL: name: store_global_v4s16 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-LABEL: name: store_global_v4s16 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX10-LABEL: name: store_global_v4s16 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3 + G_STORE %1, %0 :: (store 8, align 8, addrspace 1) + +... + +--- + +name: store_global_v6s16 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 + + ; GFX7-LABEL: name: store_global_v6s16 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 + ; GFX7: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX8-LABEL: name: store_global_v6s16 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 + ; GFX8: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-LABEL: name: store_global_v6s16 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 + ; GFX9: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX10-LABEL: name: store_global_v6s16 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4 + ; GFX10: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4 + G_STORE %1, %0 :: (store 12, align 16, addrspace 1) + +... +--- + +name: store_global_v8s16 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + + ; GFX7-LABEL: name: store_global_v8s16 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX8-LABEL: name: store_global_v8s16 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-LABEL: name: store_global_v8s16 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX10-LABEL: name: store_global_v8s16 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX10: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1, %0 :: (store 16, align 16, addrspace 1) + +... + +--- + +name: store_global_v2s64 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + + ; GFX7-LABEL: name: store_global_v2s64 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX8-LABEL: name: store_global_v2s64 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-LABEL: name: store_global_v2s64 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX10-LABEL: name: store_global_v2s64 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX10: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1, %0 :: (store 16, align 16, addrspace 1) + +... + +--- + +name: store_global_p1 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GFX7-LABEL: name: store_global_p1 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX8-LABEL: name: store_global_p1 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-LABEL: name: store_global_p1 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX10-LABEL: name: store_global_p1 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(p1) = COPY $vgpr2_vgpr3 + G_STORE %1, %0 :: (store 8, align 8, addrspace 1) + +... + +--- + +name: store_global_v2p1 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + + ; GFX7-LABEL: name: store_global_v2p1 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX8-LABEL: name: store_global_v2p1 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-LABEL: name: store_global_v2p1 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX9: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX10-LABEL: name: store_global_v2p1 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; GFX10: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x p1>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1, %0 :: (store 16, align 16, addrspace 1) + +... + +--- + +name: store_global_p3 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; GFX7-LABEL: name: store_global_p3 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX8-LABEL: name: store_global_p3 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-LABEL: name: store_global_p3 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX10-LABEL: name: store_global_p3 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(p3) = COPY $vgpr2 + G_STORE %1, %0 :: (store 4, align 4, addrspace 1) + +... + +--- + +name: store_global_v2p3 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GFX7-LABEL: name: store_global_v2p3 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX8-LABEL: name: store_global_v2p3 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-LABEL: name: store_global_v2p3 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX10-LABEL: name: store_global_v2p3 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(<2 x p3>) = COPY $vgpr2_vgpr3 + G_STORE %1, %0 :: (store 8, align 8, addrspace 1) + +... +--- + +name: store_atomic_global_s32 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; GFX7-LABEL: name: store_atomic_global_s32 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX8-LABEL: name: store_atomic_global_s32 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-LABEL: name: store_atomic_global_s32 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX10-LABEL: name: store_atomic_global_s32 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = COPY $vgpr2 + G_STORE %1, %0 :: (store monotonic 4, align 4, addrspace 1) + +... + +--- + +name: store_atomic_global_s64 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GFX7-LABEL: name: store_atomic_global_s64 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX8-LABEL: name: store_atomic_global_s64 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-LABEL: name: store_atomic_global_s64 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX10-LABEL: name: store_atomic_global_s64 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = COPY $vgpr2_vgpr3 + G_STORE %1, %0 :: (store monotonic 8, align 8, addrspace 1) + +... + +--- + +name: store_global_s32_gep_2047 +legalized: true +tracksRegLiveness: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + + ; GFX7-LABEL: name: store_global_s32_gep_2047 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX7: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) + ; GFX8-LABEL: name: store_global_s32_gep_2047 + ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec + ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec + ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1 + ; GFX8: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1) + ; GFX9-LABEL: name: store_global_s32_gep_2047 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; GFX10-LABEL: name: store_global_s32_gep_2047 + ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = COPY $vgpr2 + %2:vgpr(s64) = G_CONSTANT i64 2047 + %3:vgpr(p1) = G_GEP %0, %2 + G_STORE %1, %3 :: (store 4, align 4, addrspace 1) + +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll index 8689b650b8f25..f35b0b43d3694 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll @@ -12,15 +12,11 @@ define amdgpu_kernel void @test_wave32(i32 %arg0, [8 x i32], i32 %saved) { ; GCN-NEXT: s_cbranch_scc0 BB0_2 ; GCN-NEXT: ; %bb.1: ; %mid ; GCN-NEXT: v_mov_b32_e32 v0, 0 -; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN-NEXT: s_waitcnt_vscnt null, 0x0 -; GCN-NEXT: flat_store_dword v[0:1], v0 +; GCN-NEXT: global_store_dword v[0:1], v0, off ; GCN-NEXT: BB0_2: ; %bb ; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GCN-NEXT: v_mov_b32_e32 v0, 0 -; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN-NEXT: s_waitcnt_vscnt null, 0x0 -; GCN-NEXT: flat_store_dword v[0:1], v0 +; GCN-NEXT: global_store_dword v[0:1], v0, off ; GCN-NEXT: s_endpgm entry: %cond = icmp eq i32 %arg0, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll index 9e19eefab3b5e..6172c9ceeab98 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll @@ -11,13 +11,11 @@ define amdgpu_kernel void @test_wave64(i32 %arg0, i64 %saved) { ; GCN-NEXT: s_cbranch_scc0 BB0_2 ; GCN-NEXT: ; %bb.1: ; %mid ; GCN-NEXT: v_mov_b32_e32 v0, 0 -; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN-NEXT: flat_store_dword v[0:1], v0 +; GCN-NEXT: global_store_dword v[0:1], v0, off ; GCN-NEXT: BB0_2: ; %bb ; GCN-NEXT: s_or_b64 exec, exec, s[0:1] ; GCN-NEXT: v_mov_b32_e32 v0, 0 -; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN-NEXT: flat_store_dword v[0:1], v0 +; GCN-NEXT: global_store_dword v[0:1], v0, off ; GCN-NEXT: s_endpgm entry: %cond = icmp eq i32 %arg0, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll index 282441a2a1d74..0f259fcb89500 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll @@ -13,9 +13,7 @@ define amdgpu_kernel void @test_wave32(i32 %arg0, [8 x i32], i32 %saved) { ; GCN-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 ; GCN-NEXT: s_or_b32 s0, s0, s1 ; GCN-NEXT: v_mov_b32_e32 v0, s0 -; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN-NEXT: s_waitcnt_vscnt null, 0x0 -; GCN-NEXT: flat_store_dword v[0:1], v0 +; GCN-NEXT: global_store_dword v[0:1], v0, off ; GCN-NEXT: s_endpgm entry: %cond = icmp eq i32 %arg0, 0 From 1d58c1d9d6b88b2b5894aa79dba11328c3791fb1 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Tue, 16 Jul 2019 18:46:51 +0000 Subject: [PATCH 272/451] Fix LLDB Windows build Python version logic after r366243 llvm-svn: 366247 --- lldb/cmake/modules/LLDBConfig.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/lldb/cmake/modules/LLDBConfig.cmake b/lldb/cmake/modules/LLDBConfig.cmake index 26a1c7a72cc08..ad83153a18898 100644 --- a/lldb/cmake/modules/LLDBConfig.cmake +++ b/lldb/cmake/modules/LLDBConfig.cmake @@ -139,6 +139,7 @@ function(find_python_libs_windows) PYTHONLIBS_VERSION_STRING "${python_version_str}") message(STATUS "Found Python version ${PYTHONLIBS_VERSION_STRING}") string(REGEX REPLACE "([0-9]+)[.]([0-9]+)[.][0-9]+" "python\\1\\2" PYTHONLIBS_BASE_NAME "${PYTHONLIBS_VERSION_STRING}") + set(PYTHONLIBS_VERSION_STRING "${PYTHONLIBS_VERSION_STRING}" PARENT_SCOPE) unset(python_version_str) else() message(WARNING "Unable to find ${PYTHON_INCLUDE_DIR}/patchlevel.h, Python installation is corrupt.") From 7161fb0be59e56becefb8646583cde912bcbfa5c Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 16 Jul 2019 19:22:21 +0000 Subject: [PATCH 273/451] AMDGPU/GlobalISel: Select private loads llvm-svn: 366248 --- llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 7 + .../AMDGPU/AMDGPUInstructionSelector.cpp | 136 +- .../Target/AMDGPU/AMDGPUInstructionSelector.h | 5 + .../GlobalISel/inst-select-load-private.mir | 1158 +++++++++++++++++ 4 files changed, 1305 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td index 1ccb90b2587ed..13ca1ce4b28fe 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -57,6 +57,13 @@ def gi_flat_offset_signed : GIComplexOperandMatcher, GIComplexPatternEquiv; +def gi_mubuf_scratch_offset : + GIComplexOperandMatcher, + GIComplexPatternEquiv; +def gi_mubuf_scratch_offen : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + class GISelSop2Pat < SDPatternOperator node, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 25e72bbe75abf..901a2eaa88295 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -17,10 +17,11 @@ #include "AMDGPURegisterInfo.h" #include "AMDGPUSubtarget.h" #include "AMDGPUTargetMachine.h" -#include "SIMachineFunctionInfo.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" +#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" @@ -34,6 +35,7 @@ #define DEBUG_TYPE "amdgpu-isel" using namespace llvm; +using namespace MIPatternMatch; #define GET_GLOBALISEL_IMPL #define AMDGPUSubtarget GCNSubtarget @@ -1594,3 +1596,135 @@ InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectFlatOffsetSigned(MachineOperand &Root) const { return selectFlatOffsetImpl(Root); } + +// FIXME: Implement +static bool signBitIsZero(const MachineOperand &Op, + const MachineRegisterInfo &MRI) { + return false; +} + +static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) { + auto PSV = PtrInfo.V.dyn_cast(); + return PSV && PSV->isStack(); +} + +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const { + MachineInstr *MI = Root.getParent(); + MachineBasicBlock *MBB = MI->getParent(); + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + const SIMachineFunctionInfo *Info = MF->getInfo(); + + int64_t Offset = 0; + if (mi_match(Root.getReg(), MRI, m_ICst(Offset))) { + Register HighBits = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + + // TODO: Should this be inside the render function? The iterator seems to + // move. + BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), + HighBits) + .addImm(Offset & ~4095); + + return {{[=](MachineInstrBuilder &MIB) { // rsrc + MIB.addReg(Info->getScratchRSrcReg()); + }, + [=](MachineInstrBuilder &MIB) { // vaddr + MIB.addReg(HighBits); + }, + [=](MachineInstrBuilder &MIB) { // soffset + const MachineMemOperand *MMO = *MI->memoperands_begin(); + const MachinePointerInfo &PtrInfo = MMO->getPointerInfo(); + + Register SOffsetReg = isStackPtrRelative(PtrInfo) + ? Info->getStackPtrOffsetReg() + : Info->getScratchWaveOffsetReg(); + MIB.addReg(SOffsetReg); + }, + [=](MachineInstrBuilder &MIB) { // offset + MIB.addImm(Offset & 4095); + }}}; + } + + assert(Offset == 0); + + // Try to fold a frame index directly into the MUBUF vaddr field, and any + // offsets. + Optional FI; + Register VAddr = Root.getReg(); + if (const MachineInstr *RootDef = MRI.getVRegDef(Root.getReg())) { + if (isBaseWithConstantOffset(Root, MRI)) { + const MachineOperand &LHS = RootDef->getOperand(1); + const MachineOperand &RHS = RootDef->getOperand(2); + const MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg()); + const MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg()); + if (LHSDef && RHSDef) { + int64_t PossibleOffset = + RHSDef->getOperand(1).getCImm()->getSExtValue(); + if (SIInstrInfo::isLegalMUBUFImmOffset(PossibleOffset) && + (!STI.privateMemoryResourceIsRangeChecked() || + signBitIsZero(LHS, MRI))) { + if (LHSDef->getOpcode() == AMDGPU::G_FRAME_INDEX) + FI = LHSDef->getOperand(1).getIndex(); + else + VAddr = LHS.getReg(); + Offset = PossibleOffset; + } + } + } else if (RootDef->getOpcode() == AMDGPU::G_FRAME_INDEX) { + FI = RootDef->getOperand(1).getIndex(); + } + } + + // If we don't know this private access is a local stack object, it needs to + // be relative to the entry point's scratch wave offset register. + // TODO: Should split large offsets that don't fit like above. + // TODO: Don't use scratch wave offset just because the offset didn't fit. + Register SOffset = FI.hasValue() ? Info->getStackPtrOffsetReg() + : Info->getScratchWaveOffsetReg(); + + return {{[=](MachineInstrBuilder &MIB) { // rsrc + MIB.addReg(Info->getScratchRSrcReg()); + }, + [=](MachineInstrBuilder &MIB) { // vaddr + if (FI.hasValue()) + MIB.addFrameIndex(FI.getValue()); + else + MIB.addReg(VAddr); + }, + [=](MachineInstrBuilder &MIB) { // soffset + MIB.addReg(SOffset); + }, + [=](MachineInstrBuilder &MIB) { // offset + MIB.addImm(Offset); + }}}; +} + +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectMUBUFScratchOffset( + MachineOperand &Root) const { + MachineInstr *MI = Root.getParent(); + MachineBasicBlock *MBB = MI->getParent(); + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + + int64_t Offset = 0; + if (!mi_match(Root.getReg(), MRI, m_ICst(Offset)) || + !SIInstrInfo::isLegalMUBUFImmOffset(Offset)) + return {}; + + const MachineFunction *MF = MBB->getParent(); + const SIMachineFunctionInfo *Info = MF->getInfo(); + const MachineMemOperand *MMO = *MI->memoperands_begin(); + const MachinePointerInfo &PtrInfo = MMO->getPointerInfo(); + + Register SOffsetReg = isStackPtrRelative(PtrInfo) + ? Info->getStackPtrOffsetReg() + : Info->getScratchWaveOffsetReg(); + return {{ + [=](MachineInstrBuilder &MIB) { + MIB.addReg(Info->getScratchRSrcReg()); + }, // rsrc + [=](MachineInstrBuilder &MIB) { MIB.addReg(SOffsetReg); }, // soffset + [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); } // offset + }}; +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index e30d745f5cb64..4f489ddfb23db 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -128,6 +128,11 @@ class AMDGPUInstructionSelector : public InstructionSelector { InstructionSelector::ComplexRendererFns selectFlatOffsetSigned(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns + selectMUBUFScratchOffen(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns + selectMUBUFScratchOffset(MachineOperand &Root) const; + const SIInstrInfo &TII; const SIRegisterInfo &TRI; const AMDGPURegisterBankInfo &RBI; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir new file mode 100644 index 0000000000000..e969f457fab0d --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir @@ -0,0 +1,1158 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s + +--- + +name: load_private_s32_from_4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_private_s32_from_4 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX9-LABEL: name: load_private_s32_from_4 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) + ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + %0:vgpr(p5) = COPY $vgpr0 + %1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 5) + $vgpr0 = COPY %1 + +... + +--- + +name: load_private_s32_from_2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_private_s32_from_2 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 5) + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]] + ; GFX9-LABEL: name: load_private_s32_from_2 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 5) + ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]] + %0:vgpr(p5) = COPY $vgpr0 + %1:vgpr(s32) = G_LOAD %0 :: (load 2, align 2, addrspace 5) + $vgpr0 = COPY %1 + +... + +--- + +name: load_private_s32_from_1 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_private_s32_from_1 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-LABEL: name: load_private_s32_from_1 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + %0:vgpr(p5) = COPY $vgpr0 + %1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 5) + $vgpr0 = COPY %1 + +... + +--- + +name: load_private_v2s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX6-LABEL: name: load_private_v2s32 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 5) + ; GFX6: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]] + ; GFX9-LABEL: name: load_private_v2s32 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 5) + ; GFX9: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]] + %0:vgpr(p5) = COPY $vgpr0 + %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load 8, align 8, addrspace 5) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_private_v4s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_private_v4s32 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 5) + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]] + ; GFX9-LABEL: name: load_private_v4s32 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 5) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]] + %0:vgpr(p5) = COPY $vgpr0 + %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 5) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + +... + +--- + +name: load_private_s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_private_s64 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p5) :: (load 8, addrspace 5) + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-LABEL: name: load_private_s64 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p5) :: (load 8, addrspace 5) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + %0:vgpr(p5) = COPY $vgpr0 + %1:vgpr(s64) = G_LOAD %0 :: (load 8, align 8, addrspace 5) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_private_v2s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_private_v2s64 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p5) :: (load 16, align 4, addrspace 5) + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX9-LABEL: name: load_private_v2s64 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p5) :: (load 16, align 4, addrspace 5) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + %0:vgpr(p5) = COPY $vgpr0 + %1:vgpr(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 5) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + +... + +--- + +name: load_private_v2p1 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_private_v2p1 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p5) :: (load 16, align 4, addrspace 5) + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX9-LABEL: name: load_private_v2p1 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p5) :: (load 16, align 4, addrspace 5) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + %0:vgpr(p5) = COPY $vgpr0 + %1:vgpr(<2 x p1>) = G_LOAD %0 :: (load 16, align 4, addrspace 5) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + +... + +--- + +name: load_private_s128 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_private_s128 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p5) :: (load 16, align 4, addrspace 5) + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX9-LABEL: name: load_private_s128 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p5) :: (load 16, align 4, addrspace 5) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + %0:vgpr(p5) = COPY $vgpr0 + %1:vgpr(s128) = G_LOAD %0 :: (load 16, align 4, addrspace 5) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + +... + +--- + +name: load_private_p3_from_4 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_private_p3_from_4 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 + ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; GFX6: $vgpr0 = COPY [[LOAD]](p3) + ; GFX9-LABEL: name: load_private_p3_from_4 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; GFX9: $vgpr0 = COPY [[LOAD]](p3) + %0:vgpr(p5) = COPY $vgpr0 + %1:vgpr(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 5) + $vgpr0 = COPY %1 + +... + +--- + +name: load_private_p5_from_4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_private_p5_from_4 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 + ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(p5) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; GFX6: $vgpr0 = COPY [[LOAD]](p5) + ; GFX9-LABEL: name: load_private_p5_from_4 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(p5) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; GFX9: $vgpr0 = COPY [[LOAD]](p5) + %0:vgpr(p5) = COPY $vgpr0 + %1:vgpr(p5) = G_LOAD %0 :: (load 4, align 4, addrspace 5) + $vgpr0 = COPY %1 + +... + +--- + +name: load_private_p999_from_8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_private_p999_from_8 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p5) :: (load 8, addrspace 5) + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX9-LABEL: name: load_private_p999_from_8 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p5) :: (load 8, addrspace 5) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + %0:vgpr(p5) = COPY $vgpr0 + %1:vgpr(p999) = G_LOAD %0 :: (load 8, align 8, addrspace 5) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_private_v2p3 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_private_v2p3 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p5) :: (load 8, addrspace 5) + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX9-LABEL: name: load_private_v2p3 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p5) :: (load 8, addrspace 5) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + %0:vgpr(p5) = COPY $vgpr0 + %1:vgpr(<2 x p3>) = G_LOAD %0 :: (load 8, align 8, addrspace 5) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_private_v2s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_private_v2s16 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 + ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; GFX6: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX9-LABEL: name: load_private_v2s16 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p5) :: (load 4, addrspace 5) + ; GFX9: $vgpr0 = COPY [[LOAD]](<2 x s16>) + %0:vgpr(p5) = COPY $vgpr0 + %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 5) + $vgpr0 = COPY %1 + +... + +--- + +name: load_private_v4s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_private_v4s16 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load 8, addrspace 5) + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX9-LABEL: name: load_private_v4s16 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p5) :: (load 8, addrspace 5) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + %0:vgpr(p5) = COPY $vgpr0 + %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 5) + $vgpr0_vgpr1 = COPY %1 + +... + +# --- + +# name: load_private_v6s16 +# legalized: true +# regBankSelected: true +# tracksRegLiveness: true +# machineFunctionInfo: +# scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 +# scratchWaveOffsetReg: $sgpr4 +# stackPtrOffsetReg: $sgpr32 + +# body: | +# bb.0: +# liveins: $vgpr0 + +# %0:vgpr(p5) = COPY $vgpr0 +# %1:vgpr(<6 x s16>) = G_LOAD %0 :: (load 12, align 4, addrspace 5) +# $vgpr0_vgpr1_vgpr2 = COPY %1 + +# ... + +--- + +name: load_private_v8s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_private_v8s16 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p5) :: (load 16, align 4, addrspace 5) + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) + ; GFX9-LABEL: name: load_private_v8s16 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p5) :: (load 16, align 4, addrspace 5) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) + %0:vgpr(p5) = COPY $vgpr0 + %1:vgpr(<8 x s16>) = G_LOAD %0 :: (load 16, align 4, addrspace 5) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + +... + +################################################################################ +### Stress addressing modes +################################################################################ + +--- + +name: load_private_s32_from_1_gep_2047 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_private_s32_from_1_gep_2047 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-LABEL: name: load_private_s32_from_1_gep_2047 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 2047, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + %0:vgpr(p5) = COPY $vgpr0 + %1:vgpr(s32) = G_CONSTANT i32 2047 + %2:vgpr(p5) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5) + $vgpr0 = COPY %3 + +... + +--- + +name: load_private_s32_from_1_gep_2048 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_private_s32_from_1_gep_2048 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-LABEL: name: load_private_s32_from_1_gep_2048 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 2048, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + %0:vgpr(p5) = COPY $vgpr0 + %1:vgpr(s32) = G_CONSTANT i32 2048 + %2:vgpr(p5) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5) + $vgpr0 = COPY %3 + +... + +--- + +name: load_private_s32_from_1_gep_m2047 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_private_s32_from_1_gep_m2047 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-LABEL: name: load_private_s32_from_1_gep_m2047 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec + ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + %0:vgpr(p5) = COPY $vgpr0 + %1:vgpr(s32) = G_CONSTANT i32 -2047 + %2:vgpr(p5) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5) + $vgpr0 = COPY %3 + +... + +--- + +name: load_private_s32_from_1_gep_m2048 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_private_s32_from_1_gep_m2048 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-LABEL: name: load_private_s32_from_1_gep_m2048 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec + ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + %0:vgpr(p5) = COPY $vgpr0 + %1:vgpr(s32) = G_CONSTANT i32 -2048 + %2:vgpr(p5) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5) + $vgpr0 = COPY %3 + +... + +--- + +name: load_private_s32_from_1_gep_4095 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_private_s32_from_1_gep_4095 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-LABEL: name: load_private_s32_from_1_gep_4095 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4095, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + %0:vgpr(p5) = COPY $vgpr0 + %1:vgpr(s32) = G_CONSTANT i32 4095 + %2:vgpr(p5) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5) + $vgpr0 = COPY %3 + +... + +--- + +name: load_private_s32_from_1_gep_4096 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_private_s32_from_1_gep_4096 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-LABEL: name: load_private_s32_from_1_gep_4096 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + %0:vgpr(p5) = COPY $vgpr0 + %1:vgpr(s32) = G_CONSTANT i32 4096 + %2:vgpr(p5) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5) + $vgpr0 = COPY %3 + +... + +--- + +name: load_private_s32_from_1_gep_m4095 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_private_s32_from_1_gep_m4095 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-LABEL: name: load_private_s32_from_1_gep_m4095 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec + ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + %0:vgpr(p5) = COPY $vgpr0 + %1:vgpr(s32) = G_CONSTANT i32 -4095 + %2:vgpr(p5) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5) + $vgpr0 = COPY %3 + +... + +--- + +name: load_private_s32_from_1_gep_m4096 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_private_s32_from_1_gep_m4096 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-LABEL: name: load_private_s32_from_1_gep_m4096 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec + ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + %0:vgpr(p5) = COPY $vgpr0 + %1:vgpr(s32) = G_CONSTANT i32 -4096 + %2:vgpr(p5) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5) + $vgpr0 = COPY %3 + +... + +--- + +name: load_private_s32_from_1_gep_8191 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_private_s32_from_1_gep_8191 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-LABEL: name: load_private_s32_from_1_gep_8191 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec + ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + %0:vgpr(p5) = COPY $vgpr0 + %1:vgpr(s32) = G_CONSTANT i32 8191 + %2:vgpr(p5) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5) + $vgpr0 = COPY %3 + +... + +--- + +name: load_private_s32_from_1_gep_8192 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_private_s32_from_1_gep_8192 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-LABEL: name: load_private_s32_from_1_gep_8192 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec + ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + %0:vgpr(p5) = COPY $vgpr0 + %1:vgpr(s32) = G_CONSTANT i32 8192 + %2:vgpr(p5) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5) + $vgpr0 = COPY %3 + +... + +--- + +name: load_private_s32_from_1_gep_m8191 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_private_s32_from_1_gep_m8191 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-LABEL: name: load_private_s32_from_1_gep_m8191 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec + ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + %0:vgpr(p5) = COPY $vgpr0 + %1:vgpr(s32) = G_CONSTANT i32 -8191 + %2:vgpr(p5) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5) + $vgpr0 = COPY %3 + +... + +--- + +name: load_private_s32_from_1_gep_m8192 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_private_s32_from_1_gep_m8192 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-LABEL: name: load_private_s32_from_1_gep_m8192 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec + ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + %0:vgpr(p5) = COPY $vgpr0 + %1:vgpr(s32) = G_CONSTANT i32 -8192 + %2:vgpr(p5) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5) + $vgpr0 = COPY %3 + +... + +--- + +name: load_private_s32_from_4_constant_0 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + + ; GFX6-LABEL: name: load_private_s32_from_4_constant_0 + ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX9-LABEL: name: load_private_s32_from_4_constant_0 + ; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) + ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + %0:vgpr(p5) = G_CONSTANT i32 0 + %1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 5) + $vgpr0 = COPY %1 + +... + +--- + +name: load_private_s32_from_4_constant_sgpr_16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + + ; GFX6-LABEL: name: load_private_s32_from_4_constant_sgpr_16 + ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 16, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX9-LABEL: name: load_private_s32_from_4_constant_sgpr_16 + ; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 16, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) + ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + %0:sgpr(p5) = G_CONSTANT i32 16 + %1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 5) + $vgpr0 = COPY %1 + +... + +--- + +name: load_private_s32_from_1_constant_4095 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + + ; GFX6-LABEL: name: load_private_s32_from_1_constant_4095 + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4095, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFSET]] + ; GFX9-LABEL: name: load_private_s32_from_1_constant_4095 + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4095, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFSET]] + %0:vgpr(p5) = G_CONSTANT i32 4095 + %1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 5) + $vgpr0 = COPY %1 + +... + +--- + +name: load_private_s32_from_1_constant_4096 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + + ; GFX6-LABEL: name: load_private_s32_from_1_constant_4096 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-LABEL: name: load_private_s32_from_1_constant_4096 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + %0:vgpr(p5) = G_CONSTANT i32 4096 + %1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 5) + $vgpr0 = COPY %1 + +... + +--- + +name: load_private_s32_from_fi +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 +stack: + - { id: 0, size: 4, alignment: 4 } + +body: | + bb.0: + + ; GFX6-LABEL: name: load_private_s32_from_fi + ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX9-LABEL: name: load_private_s32_from_fi + ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) + ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + %0:vgpr(p5) = G_FRAME_INDEX %stack.0 + %1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 5) + $vgpr0 = COPY %1 + +... + +--- + +name: load_private_s32_from_1_fi_offset_4095 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 +stack: + - { id: 0, size: 4096, alignment: 4 } + +body: | + bb.0: + + ; GFX6-LABEL: name: load_private_s32_from_1_fi_offset_4095 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_4095 + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + %0:vgpr(p5) = G_FRAME_INDEX %stack.0 + %1:vgpr(s32) = G_CONSTANT i32 4095 + %2:vgpr(p5) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5) + $vgpr0 = COPY %3 + +... + +--- + +name: load_private_s32_from_1_fi_offset_4096 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 +stack: + - { id: 0, size: 8192, alignment: 4 } + +body: | + bb.0: + + ; GFX6-LABEL: name: load_private_s32_from_1_fi_offset_4096 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_4096 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + %0:vgpr(p5) = G_FRAME_INDEX %stack.0 + %1:vgpr(s32) = G_CONSTANT i32 4096 + %2:vgpr(p5) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 5) + $vgpr0 = COPY %3 + +... From 2d10407719683dcfcab0f2b7f33d92cbedd9b876 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 16 Jul 2019 19:27:44 +0000 Subject: [PATCH 274/451] AMDGPU/GlobalISel: Fix selection of private stores llvm-svn: 366249 --- llvm/lib/Target/AMDGPU/BUFInstructions.td | 13 +- .../GlobalISel/inst-select-store-private.mir | 280 ++++++++++++++++++ 2 files changed, 287 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 5b6c8a7ed96fc..62a19d848af2f 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1568,17 +1568,18 @@ defm : MUBUFStore_Pattern ; multiclass MUBUFScratchStorePat { + ValueType vt, PatFrag st, + RegisterClass rc = VGPR_32> { def : GCNPat < (st vt:$value, (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset)), - (InstrOffen $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0) + (InstrOffen rc:$value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0) >; def : GCNPat < (st vt:$value, (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset)), - (InstrOffset $value, $srsrc, $soffset, $offset, 0, 0, 0, 0) + (InstrOffset rc:$value, $srsrc, $soffset, $offset, 0, 0, 0, 0) >; } @@ -1587,9 +1588,9 @@ defm : MUBUFScratchStorePat ; defm : MUBUFScratchStorePat ; defm : MUBUFScratchStorePat ; -defm : MUBUFScratchStorePat ; -defm : MUBUFScratchStorePat ; -defm : MUBUFScratchStorePat ; +defm : MUBUFScratchStorePat ; +defm : MUBUFScratchStorePat ; +defm : MUBUFScratchStorePat ; let OtherPredicates = [D16PreservesUnusedBits] in { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir new file mode 100644 index 0000000000000..822a1412d168c --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir @@ -0,0 +1,280 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s + +--- + +name: store_private_s32_to_4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: store_private_s32_to_4 + ; GFX6: liveins: $vgpr0, $vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5) + ; GFX9-LABEL: name: store_private_s32_to_4 + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5) + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(p5) = COPY $vgpr1 + G_STORE %0, %1 :: (store 4, align 4, addrspace 5) + +... + +--- + +name: store_private_s32_to_2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: store_private_s32_to_2 + ; GFX6: liveins: $vgpr0, $vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5) + ; GFX9-LABEL: name: store_private_s32_to_2 + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5) + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(p5) = COPY $vgpr1 + G_STORE %0, %1 :: (store 2, align 2, addrspace 5) + +... + +--- + +name: store_private_s32_to_1 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: store_private_s32_to_1 + ; GFX6: liveins: $vgpr0, $vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + ; GFX9-LABEL: name: store_private_s32_to_1 + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(p5) = COPY $vgpr1 + G_STORE %0, %1 :: (store 1, align 1, addrspace 5) + +... + +--- + +name: store_private_v2s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: store_private_v2s16 + ; GFX6: liveins: $vgpr0, $vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY $vgpr1 + ; GFX6: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p5) :: (store 4, addrspace 5) + ; GFX9-LABEL: name: store_private_v2s16 + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY $vgpr1 + ; GFX9: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p5) :: (store 4, addrspace 5) + %0:vgpr(<2 x s16>) = COPY $vgpr0 + %1:vgpr(p5) = COPY $vgpr1 + G_STORE %0, %1 :: (store 4, align 4, addrspace 5) + +... + +--- + +name: store_private_p3 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: store_private_p3 + ; GFX6: liveins: $vgpr0, $vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY $vgpr1 + ; GFX6: G_STORE [[COPY]](p3), [[COPY1]](p5) :: (store 4, addrspace 5) + ; GFX9-LABEL: name: store_private_p3 + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY $vgpr1 + ; GFX9: G_STORE [[COPY]](p3), [[COPY1]](p5) :: (store 4, addrspace 5) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(p5) = COPY $vgpr1 + G_STORE %0, %1 :: (store 4, align 4, addrspace 5) + +... + +--- + +name: store_private_p5 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: store_private_p5 + ; GFX6: liveins: $vgpr0, $vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY $vgpr1 + ; GFX6: G_STORE [[COPY]](p5), [[COPY1]](p5) :: (store 4, addrspace 5) + ; GFX9-LABEL: name: store_private_p5 + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY $vgpr1 + ; GFX9: G_STORE [[COPY]](p5), [[COPY1]](p5) :: (store 4, addrspace 5) + %0:vgpr(p5) = COPY $vgpr0 + %1:vgpr(p5) = COPY $vgpr1 + G_STORE %0, %1 :: (store 4, align 4, addrspace 5) + +... + +--- + +name: store_private_s32_to_1_fi_offset_4095 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 +stack: + - { id: 0, size: 4096, alignment: 4 } + +body: | + bb.0: + + ; GFX6-LABEL: name: store_private_s32_to_1_fi_offset_4095 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec + ; GFX6: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_2]], %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + ; GFX9-LABEL: name: store_private_s32_to_1_fi_offset_4095 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + %0:vgpr(p5) = G_FRAME_INDEX %stack.0 + %1:vgpr(s32) = G_CONSTANT i32 4095 + %2:vgpr(p5) = G_GEP %0, %1 + %3:vgpr(s32) = G_CONSTANT i32 0 + G_STORE %3, %2 :: (store 1, align 1, addrspace 5) + +... + +--- + +name: store_private_s32_to_1_constant_4095 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 +stack: + - { id: 0, size: 4096, alignment: 4 } + +body: | + bb.0: + + ; GFX6-LABEL: name: store_private_s32_to_1_constant_4095 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX6: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4095, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + ; GFX9-LABEL: name: store_private_s32_to_1_constant_4095 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX9: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4095, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + %0:vgpr(p5) = G_CONSTANT i32 4095 + %1:vgpr(s32) = G_CONSTANT i32 0 + G_STORE %1, %0 :: (store 1, align 1, addrspace 5) + +... + +--- + +name: store_private_s32_to_1_constant_4096 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 +stack: + - { id: 0, size: 4096, alignment: 4 } + +body: | + bb.0: + + ; GFX6-LABEL: name: store_private_s32_to_1_constant_4096 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + ; GFX9-LABEL: name: store_private_s32_to_1_constant_4096 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + %0:vgpr(p5) = G_CONSTANT i32 4096 + %1:vgpr(s32) = G_CONSTANT i32 0 + G_STORE %1, %0 :: (store 1, align 1, addrspace 5) + +... From afdf6b3c37e83b78833310be7376ee19f066c554 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 16 Jul 2019 19:44:14 +0000 Subject: [PATCH 275/451] AMDGPU: Fix some missing GCCBuiltin declarations llvm-svn: 366250 --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 56 ++++++++++++++---------- 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index bad4216173d0a..4c67cdea4d580 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -296,29 +296,33 @@ def int_amdgcn_fract : Intrinsic< [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; -def int_amdgcn_cvt_pkrtz : Intrinsic< - [llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable] +def int_amdgcn_cvt_pkrtz : GCCBuiltin<"__builtin_amdgcn_cvt_pkrtz">, + Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable] >; -def int_amdgcn_cvt_pknorm_i16 : Intrinsic< - [llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable] +def int_amdgcn_cvt_pknorm_i16 : + GCCBuiltin<"__builtin_amdgcn_cvt_pknorm_i16">, + Intrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable] >; -def int_amdgcn_cvt_pknorm_u16 : Intrinsic< - [llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable] +def int_amdgcn_cvt_pknorm_u16 : + GCCBuiltin<"__builtin_amdgcn_cvt_pknorm_u16">, + Intrinsic<[llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable] >; -def int_amdgcn_cvt_pk_i16 : Intrinsic< +def int_amdgcn_cvt_pk_i16 : + GCCBuiltin<"__builtin_amdgcn_cvt_pk_i16">, + Intrinsic< [llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable] >; -def int_amdgcn_cvt_pk_u16 : Intrinsic< - [llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable] +def int_amdgcn_cvt_pk_u16 : GCCBuiltin<"__builtin_amdgcn_cvt_pk_u16">, + Intrinsic<[llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_class : Intrinsic< @@ -1245,14 +1249,17 @@ def int_amdgcn_ds_swizzle : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent, ImmArg<1>]>; -def int_amdgcn_ubfe : Intrinsic<[llvm_anyint_ty], - [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable] +def int_amdgcn_ubfe : + GCCBuiltin<"__builtin_amdgcn_ubfe">, + Intrinsic<[llvm_anyint_ty], + [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable] >; -def int_amdgcn_sbfe : Intrinsic<[llvm_anyint_ty], - [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable] +def int_amdgcn_sbfe : GCCBuiltin<"__builtin_amdgcn_sbfe">, + Intrinsic<[llvm_anyint_ty], + [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable] >; def int_amdgcn_lerp : @@ -1340,13 +1347,14 @@ def int_amdgcn_writelane : [IntrNoMem, IntrConvergent] >; -def int_amdgcn_alignbit : Intrinsic<[llvm_i32_ty], +def int_amdgcn_alignbit : + GCCBuiltin<"__builtin_amdgcn_alignbit">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable] >; -def int_amdgcn_alignbyte : Intrinsic<[llvm_i32_ty], - [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], +def int_amdgcn_alignbyte : GCCBuiltin<"__builtin_amdgcn_alignbyte">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable] >; @@ -1515,13 +1523,13 @@ def int_amdgcn_ds_bpermute : //===----------------------------------------------------------------------===// // llvm.amdgcn.permlane16 -def int_amdgcn_permlane16 : +def int_amdgcn_permlane16 : GCCBuiltin<"__builtin_amdgcn_permlane16">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i1_ty], [IntrNoMem, IntrConvergent, ImmArg<4>, ImmArg<5>]>; // llvm.amdgcn.permlanex16 -def int_amdgcn_permlanex16 : +def int_amdgcn_permlanex16 : GCCBuiltin<"__builtin_amdgcn_permlanex16">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i1_ty], [IntrNoMem, IntrConvergent, ImmArg<4>, ImmArg<5>]>; From ccf22ef94c4a94f7598f51a70445fdec8f8a1bc8 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Tue, 16 Jul 2019 19:59:08 +0000 Subject: [PATCH 276/451] Fix -Wreturn-type warning. NFC. llvm-svn: 366251 --- llvm/lib/Remarks/RemarkParser.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/Remarks/RemarkParser.cpp b/llvm/lib/Remarks/RemarkParser.cpp index 46130d28f72c5..f67464073bd16 100644 --- a/llvm/lib/Remarks/RemarkParser.cpp +++ b/llvm/lib/Remarks/RemarkParser.cpp @@ -57,6 +57,7 @@ llvm::remarks::createRemarkParser(Format ParserFormat, StringRef Buf, return createStringError(std::make_error_code(std::errc::invalid_argument), "Unknown remark parser format."); } + llvm_unreachable("unknown format"); } // Wrapper that holds the state needed to interact with the C API. From 6e0fa292c22cde726b4ddb53cf1fa8c649384030 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Tue, 16 Jul 2019 20:06:00 +0000 Subject: [PATCH 277/451] [AMDGPU] Change register type for v32 vectors When it is AReg_1024 this results in unnecessary copying into AGPRs of a 32 element vectors even though they are not intended for an mfma instruction. Differential Revision: https://reviews.llvm.org/D64815 llvm-svn: 366252 --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 4 ++-- llvm/test/CodeGen/AMDGPU/v1024.ll | 29 +++++++++++++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/v1024.ll diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index a3226577cd02b..db0782e2bf3e5 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -152,8 +152,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, } if (Subtarget->hasMAIInsts()) { - addRegisterClass(MVT::v32i32, &AMDGPU::AReg_1024RegClass); - addRegisterClass(MVT::v32f32, &AMDGPU::AReg_1024RegClass); + addRegisterClass(MVT::v32i32, &AMDGPU::VReg_1024RegClass); + addRegisterClass(MVT::v32f32, &AMDGPU::VReg_1024RegClass); } computeRegisterProperties(Subtarget->getRegisterInfo()); diff --git a/llvm/test/CodeGen/AMDGPU/v1024.ll b/llvm/test/CodeGen/AMDGPU/v1024.ll new file mode 100644 index 0000000000000..a5e0454a36344 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/v1024.ll @@ -0,0 +1,29 @@ +; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; Check that we do not use AGPRs for v32i32 type + +; GCN-LABEL: {{^}}test_v1024: +; GCN-NOT: v_accvgpr +; GCN-COUNT-32: v_mov_b32_e32 +; GCN-NOT: v_accvgpr +define amdgpu_kernel void @test_v1024() { +entry: + %alloca = alloca <32 x i32>, align 16, addrspace(5) + %cast = bitcast <32 x i32> addrspace(5)* %alloca to i8 addrspace(5)* + br i1 undef, label %if.then.i.i, label %if.else.i + +if.then.i.i: ; preds = %entry + call void @llvm.memcpy.p5i8.p5i8.i64(i8 addrspace(5)* align 16 %cast, i8 addrspace(5)* align 4 undef, i64 128, i1 false) + br label %if.then.i62.i + +if.else.i: ; preds = %entry + br label %if.then.i62.i + +if.then.i62.i: ; preds = %if.else.i, %if.then.i.i + call void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)* align 4 undef, i8 addrspace(5)* align 16 %cast, i64 128, i1 false) + ret void +} + +declare void @llvm.memcpy.p5i8.p5i8.i64(i8 addrspace(5)* nocapture writeonly, i8 addrspace(5)* nocapture readonly, i64, i1 immarg) + +declare void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)* nocapture writeonly, i8 addrspace(5)* nocapture readonly, i64, i1 immarg) From b157dcacb5b96fd64900906911832a8ac3bb189e Mon Sep 17 00:00:00 2001 From: Mitch Phillips Date: Tue, 16 Jul 2019 20:06:17 +0000 Subject: [PATCH 278/451] [GWP-ASan] Add thread ID to PRNG seed. Summary: Adds thread ID to PRNG seed for increased entropy. In particular, this allows multiple runs in quick succession that will have different PRNG seeds, allowing for better demos/testing. Reviewers: kcc Reviewed By: kcc Subscribers: kubamracek, #sanitizers, llvm-commits Tags: #sanitizers, #llvm Differential Revision: https://reviews.llvm.org/D64453 llvm-svn: 366253 --- compiler-rt/lib/gwp_asan/guarded_pool_allocator.h | 8 ++++---- compiler-rt/lib/gwp_asan/random.cpp | 4 +++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/compiler-rt/lib/gwp_asan/guarded_pool_allocator.h b/compiler-rt/lib/gwp_asan/guarded_pool_allocator.h index 400d50c0b0b70..28a41110faede 100644 --- a/compiler-rt/lib/gwp_asan/guarded_pool_allocator.h +++ b/compiler-rt/lib/gwp_asan/guarded_pool_allocator.h @@ -132,6 +132,10 @@ class GuardedPoolAllocator { // occur. static void reportError(uintptr_t AccessPtr, Error E = Error::UNKNOWN); + // Get the current thread ID, or kInvalidThreadID if failure. Note: This + // implementation is platform-specific. + static uint64_t getThreadID(); + private: static constexpr size_t kInvalidSlotID = SIZE_MAX; @@ -146,10 +150,6 @@ class GuardedPoolAllocator { void markReadWrite(void *Ptr, size_t Size) const; void markInaccessible(void *Ptr, size_t Size) const; - // Get the current thread ID, or kInvalidThreadID if failure. Note: This - // implementation is platform-specific. - static uint64_t getThreadID(); - // Get the page size from the platform-specific implementation. Only needs to // be called once, and the result should be cached in PageSize in this class. static size_t getPlatformPageSize(); diff --git a/compiler-rt/lib/gwp_asan/random.cpp b/compiler-rt/lib/gwp_asan/random.cpp index 67f4a22ef9525..90493da7e0385 100644 --- a/compiler-rt/lib/gwp_asan/random.cpp +++ b/compiler-rt/lib/gwp_asan/random.cpp @@ -7,12 +7,14 @@ //===----------------------------------------------------------------------===// #include "gwp_asan/random.h" +#include "gwp_asan/guarded_pool_allocator.h" #include namespace gwp_asan { uint32_t getRandomUnsigned32() { - thread_local uint32_t RandomState = static_cast(time(nullptr)); + thread_local uint32_t RandomState = + time(nullptr) + GuardedPoolAllocator::getThreadID(); RandomState ^= RandomState << 13; RandomState ^= RandomState >> 17; RandomState ^= RandomState << 5; From 1b69fd275d589f48ce63bea73e311b7ef89c99ba Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 16 Jul 2019 20:15:30 +0000 Subject: [PATCH 279/451] AMDGPU/GlobalISel: Select G_SHL I think this manages to not break the DAG handling with the divergent predicates because the stadalone divergent patterns end up with a higher priority than the pattern on the instruction definition. The 16-bit versions don't work yet. llvm-svn: 366254 --- llvm/lib/Target/AMDGPU/SOPInstructions.td | 4 +- llvm/lib/Target/AMDGPU/VOP2Instructions.td | 2 +- llvm/lib/Target/AMDGPU/VOP3Instructions.td | 2 +- .../AMDGPU/GlobalISel/inst-select-shl.mir | 327 ++++++++++++++++++ .../AMDGPU/GlobalISel/inst-select-shl.s16.mir | 203 +++++++++++ .../GlobalISel/inst-select-shl.v2s16.mir | 168 +++++++++ 6 files changed, 702 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.mir create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.v2s16.mir diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index f46bee126043d..b762b84d9ca7d 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -511,10 +511,10 @@ let AddedComplexity = 1 in { let Defs = [SCC] in { // TODO: b64 versions require VOP3 change since v_lshlrev_b64 is VOP3 def S_LSHL_B32 : SOP2_32 <"s_lshl_b32", - [(set i32:$sdst, (UniformBinFrag i32:$src0, i32:$src1))] + [(set SReg_32:$sdst, (shl (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))] >; def S_LSHL_B64 : SOP2_64_32 <"s_lshl_b64", - [(set i64:$sdst, (UniformBinFrag i64:$src0, i32:$src1))] + [(set SReg_64:$sdst, (shl (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))] >; def S_LSHR_B32 : SOP2_32 <"s_lshr_b32", [(set i32:$sdst, (UniformBinFrag i32:$src0, i32:$src1))] diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index fa9b913c2de2a..260e8a498fb7b 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -474,7 +474,7 @@ defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN, umin>; defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_PAT_GEN, umax>; defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, null_frag, "v_lshr_b32">; defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, null_frag, "v_ashr_i32">; -defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, null_frag, "v_lshl_b32">; +defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, lshl_rev, "v_lshl_b32">; defm V_AND_B32 : VOP2Inst <"v_and_b32", VOP_PAT_GEN, and>; defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN, or>; defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_PAT_GEN, xor>; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index f7699e61d59ef..6ebb9557c3c0f 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -393,7 +393,7 @@ def V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile>; } // End SubtargetPredicate = isGFX6GFX7GFX10, Predicates = [isGFX6GFX7GFX10] let SubtargetPredicate = isGFX8Plus in { -def V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile>; +def V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile, lshl_rev>; def V_LSHRREV_B64 : VOP3Inst <"v_lshrrev_b64", VOP3_Profile>; def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile>; } // End SubtargetPredicate = isGFX8Plus diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.mir new file mode 100644 index 0000000000000..34c6c781f64e5 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.mir @@ -0,0 +1,327 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX6 %s +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX7 %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX8 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s + +--- +name: shl_s32_ss +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; GFX6-LABEL: name: shl_s32_ss + ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX6: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6: S_ENDPGM 0, implicit [[S_LSHL_B32_]] + ; GFX7-LABEL: name: shl_s32_ss + ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX7: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX7: S_ENDPGM 0, implicit [[S_LSHL_B32_]] + ; GFX8-LABEL: name: shl_s32_ss + ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX8: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX8: S_ENDPGM 0, implicit [[S_LSHL_B32_]] + ; GFX9-LABEL: name: shl_s32_ss + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX9: S_ENDPGM 0, implicit [[S_LSHL_B32_]] + ; GFX10-LABEL: name: shl_s32_ss + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX10: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10: S_ENDPGM 0, implicit [[S_LSHL_B32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %2:sgpr(s32) = G_SHL %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: shl_s32_sv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; GFX6-LABEL: name: shl_s32_sv + ; GFX6: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] + ; GFX7-LABEL: name: shl_s32_sv + ; GFX7: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX7: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] + ; GFX8-LABEL: name: shl_s32_sv + ; GFX8: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] + ; GFX9-LABEL: name: shl_s32_sv + ; GFX9: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] + ; GFX10-LABEL: name: shl_s32_sv + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:vgpr(s32) = G_SHL %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: shl_s32_vs +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; GFX6-LABEL: name: shl_s32_vs + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX6: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] + ; GFX7-LABEL: name: shl_s32_vs + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX7: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX7: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] + ; GFX8-LABEL: name: shl_s32_vs + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX8: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] + ; GFX9-LABEL: name: shl_s32_vs + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX9: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] + ; GFX10-LABEL: name: shl_s32_vs + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX10: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr(s32) = COPY $sgpr0 + %2:vgpr(s32) = G_SHL %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: shl_s32_vv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; GFX6-LABEL: name: shl_s32_vv + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] + ; GFX7-LABEL: name: shl_s32_vv + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX7: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] + ; GFX8-LABEL: name: shl_s32_vv + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] + ; GFX9-LABEL: name: shl_s32_vv + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] + ; GFX10-LABEL: name: shl_s32_vv + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX10: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_LSHLREV_B32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = G_SHL %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: shl_s64_ss +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2 + ; GFX6-LABEL: name: shl_s64_ss + ; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6: S_ENDPGM 0, implicit [[S_LSHL_B64_]] + ; GFX7-LABEL: name: shl_s64_ss + ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX7: S_ENDPGM 0, implicit [[S_LSHL_B64_]] + ; GFX8-LABEL: name: shl_s64_ss + ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX8: S_ENDPGM 0, implicit [[S_LSHL_B64_]] + ; GFX9-LABEL: name: shl_s64_ss + ; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX9: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX9: S_ENDPGM 0, implicit [[S_LSHL_B64_]] + ; GFX10-LABEL: name: shl_s64_ss + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX10: [[S_LSHL_B64_:%[0-9]+]]:sreg_64 = S_LSHL_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10: S_ENDPGM 0, implicit [[S_LSHL_B64_]] + %0:sgpr(s64) = COPY $sgpr0_sgpr1 + %1:sgpr(s32) = COPY $sgpr2 + %2:sgpr(s64) = G_SHL %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: shl_s64_sv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0 + ; GFX6-LABEL: name: shl_s64_sv + ; GFX6: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_LSHLREV_B64_]] + ; GFX7-LABEL: name: shl_s64_sv + ; GFX7: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX7: S_ENDPGM 0, implicit [[V_LSHLREV_B64_]] + ; GFX8-LABEL: name: shl_s64_sv + ; GFX8: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_LSHLREV_B64_]] + ; GFX9-LABEL: name: shl_s64_sv + ; GFX9: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_LSHLREV_B64_]] + ; GFX10-LABEL: name: shl_s64_sv + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_LSHLREV_B64_]] + %0:sgpr(s64) = COPY $sgpr0_sgpr1 + %1:vgpr(s32) = COPY $vgpr0 + %2:vgpr(s64) = G_SHL %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: shl_s64_vs +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0_vgpr1 + ; GFX6-LABEL: name: shl_s64_vs + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX6: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_LSHLREV_B64_]] + ; GFX7-LABEL: name: shl_s64_vs + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX7: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX7: S_ENDPGM 0, implicit [[V_LSHLREV_B64_]] + ; GFX8-LABEL: name: shl_s64_vs + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX8: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_LSHLREV_B64_]] + ; GFX9-LABEL: name: shl_s64_vs + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX9: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_LSHLREV_B64_]] + ; GFX10-LABEL: name: shl_s64_vs + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX10: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_LSHLREV_B64_]] + %0:vgpr(s64) = COPY $vgpr0_vgpr1 + %1:sgpr(s32) = COPY $sgpr0 + %2:vgpr(s64) = G_SHL %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: shl_s64_vv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX6-LABEL: name: shl_s64_vv + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_LSHLREV_B64_]] + ; GFX7-LABEL: name: shl_s64_vv + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX7: S_ENDPGM 0, implicit [[V_LSHLREV_B64_]] + ; GFX8-LABEL: name: shl_s64_vv + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_LSHLREV_B64_]] + ; GFX9-LABEL: name: shl_s64_vv + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_LSHLREV_B64_]] + ; GFX10-LABEL: name: shl_s64_vv + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10: [[V_LSHLREV_B64_:%[0-9]+]]:vreg_64 = V_LSHLREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_LSHLREV_B64_]] + %0:vgpr(s64) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = COPY $vgpr2 + %2:vgpr(s64) = G_SHL %0, %1 + S_ENDPGM 0, implicit %2 +... + diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir new file mode 100644 index 0000000000000..d41cdee39040f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir @@ -0,0 +1,203 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py + +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX8 %s +# RUN: FileCheck -check-prefixes=ERR-GFX8,ERR %s < %t + +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX9 %s +# RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t + +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s +# RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t + +# ERR-NOT: remark +# ERR-GFX8: remark: :0:0: cannot select: %3:sgpr(s16) = G_SHL %2:sgpr, %1:sgpr(s32) (in function: shl_s16_ss) +# ERR-GFX8-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_SHL %2:sgpr, %1:vgpr(s32) (in function: shl_s16_sv) +# ERR-GFX8-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_SHL %2:vgpr, %1:sgpr(s32) (in function: shl_s16_vs) +# ERR-GFX8-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_SHL %2:vgpr, %1:vgpr(s32) (in function: shl_s16_vv) + +# ERR-GFX910: remark: :0:0: cannot select: %3:sgpr(s16) = G_SHL %2:sgpr, %1:sgpr(s32) (in function: shl_s16_ss) +# ERR-GFX910-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_SHL %2:sgpr, %1:vgpr(s32) (in function: shl_s16_sv) +# ERR-GFX910-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_SHL %2:vgpr, %1:sgpr(s32) (in function: shl_s16_vs) +# ERR-GFX910-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_SHL %2:vgpr, %1:vgpr(s32) (in function: shl_s16_vv) + +# ERR-NOT: remark + +--- +name: shl_s16_ss +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; GFX6-LABEL: name: shl_s16_ss + ; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX6: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX6: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) + ; GFX6: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX7-LABEL: name: shl_s16_ss + ; GFX7: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX7: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX7: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) + ; GFX7: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX8-LABEL: name: shl_s16_ss + ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) + ; GFX8: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX9-LABEL: name: shl_s16_ss + ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) + ; GFX9: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX10-LABEL: name: shl_s16_ss + ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX10: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX10: [[SHL:%[0-9]+]]:sgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) + ; GFX10: S_ENDPGM 0, implicit [[SHL]](s16) + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %2:sgpr(s16) = G_TRUNC %0 + %3:sgpr(s16) = G_SHL %2, %1 + S_ENDPGM 0, implicit %3 +... + +--- +name: shl_s16_sv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; GFX6-LABEL: name: shl_s16_sv + ; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX6: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX6: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) + ; GFX6: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX7-LABEL: name: shl_s16_sv + ; GFX7: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX7: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX7: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) + ; GFX7: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX8-LABEL: name: shl_s16_sv + ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX8: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) + ; GFX8: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX9-LABEL: name: shl_s16_sv + ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX9: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) + ; GFX9: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX10-LABEL: name: shl_s16_sv + ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX10: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX10: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) + ; GFX10: S_ENDPGM 0, implicit [[SHL]](s16) + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:sgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_SHL %2, %1 + S_ENDPGM 0, implicit %3 +... + +--- +name: shl_s16_vs +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; GFX6-LABEL: name: shl_s16_vs + ; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX6: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX6: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) + ; GFX6: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX7-LABEL: name: shl_s16_vs + ; GFX7: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX7: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX7: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) + ; GFX7: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX8-LABEL: name: shl_s16_vs + ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX8: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) + ; GFX8: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX9-LABEL: name: shl_s16_vs + ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX9: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) + ; GFX9: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX10-LABEL: name: shl_s16_vs + ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX10: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX10: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) + ; GFX10: S_ENDPGM 0, implicit [[SHL]](s16) + %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr(s32) = COPY $sgpr0 + %2:vgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_SHL %2, %1 + S_ENDPGM 0, implicit %3 +... + +--- +name: shl_s16_vv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; GFX6-LABEL: name: shl_s16_vv + ; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX6: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX6: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) + ; GFX6: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX7-LABEL: name: shl_s16_vv + ; GFX7: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX7: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX7: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) + ; GFX7: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX8-LABEL: name: shl_s16_vv + ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) + ; GFX8: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX9-LABEL: name: shl_s16_vv + ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) + ; GFX9: S_ENDPGM 0, implicit [[SHL]](s16) + ; GFX10-LABEL: name: shl_s16_vv + ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX10: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX10: [[SHL:%[0-9]+]]:vgpr(s16) = G_SHL [[TRUNC]], [[COPY1]](s32) + ; GFX10: S_ENDPGM 0, implicit [[SHL]](s16) + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_SHL %2, %1 + S_ENDPGM 0, implicit %3 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.v2s16.mir new file mode 100644 index 0000000000000..ad9b078bcd6fd --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.v2s16.mir @@ -0,0 +1,168 @@ +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX9 %s +# RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t + +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s +# RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t + +# ERR-NOT: remark +# ERR-GFX910: remark: :0:0: cannot select: %2:sgpr(<2 x s16>) = G_SHL %0:sgpr, %1:sgpr(<2 x s16>) (in function: shl_v2s16_ss) +# ERR-GFX910-NEXT: remark: :0:0: cannot select: %2:vgpr(<2 x s16>) = G_SHL %0:sgpr, %1:vgpr(<2 x s16>) (in function: shl_v2s16_sv) +# ERR-GFX910-NEXT: remark: :0:0: cannot select: %2:vgpr(<2 x s16>) = G_SHL %0:vgpr, %1:sgpr(<2 x s16>) (in function: shl_v2s16_vs) +# ERR-GFX910-NEXT: remark: :0:0: cannot select: %2:vgpr(<2 x s16>) = G_SHL %0:vgpr, %1:vgpr(<2 x s16>) (in function: shl_v2s16_vv) +# ERR-NOT: remark + +--- +name: shl_v2s16_ss +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; GFX6-LABEL: name: shl_v2s16_ss + ; GFX6: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 + ; GFX6: [[SHL:%[0-9]+]]:sgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>) + ; GFX6: S_ENDPGM 0, implicit [[SHL]](<2 x s16>) + ; GFX7-LABEL: name: shl_v2s16_ss + ; GFX7: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 + ; GFX7: [[SHL:%[0-9]+]]:sgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>) + ; GFX7: S_ENDPGM 0, implicit [[SHL]](<2 x s16>) + ; GFX8-LABEL: name: shl_v2s16_ss + ; GFX8: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 + ; GFX8: [[SHL:%[0-9]+]]:sgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>) + ; GFX8: S_ENDPGM 0, implicit [[SHL]](<2 x s16>) + ; GFX9-LABEL: name: shl_v2s16_ss + ; GFX9: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 + ; GFX9: [[SHL:%[0-9]+]]:sgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>) + ; GFX9: S_ENDPGM 0, implicit [[SHL]](<2 x s16>) + ; GFX10-LABEL: name: shl_v2s16_ss + ; GFX10: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 + ; GFX10: [[SHL:%[0-9]+]]:sgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>) + ; GFX10: S_ENDPGM 0, implicit [[SHL]](<2 x s16>) + %0:sgpr(<2 x s16>) = COPY $sgpr0 + %1:sgpr(<2 x s16>) = COPY $sgpr1 + %2:sgpr(<2 x s16>) = G_SHL %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: shl_v2s16_sv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; GFX6-LABEL: name: shl_v2s16_sv + ; GFX6: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX6: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>) + ; GFX6: S_ENDPGM 0, implicit [[SHL]](<2 x s16>) + ; GFX7-LABEL: name: shl_v2s16_sv + ; GFX7: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX7: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>) + ; GFX7: S_ENDPGM 0, implicit [[SHL]](<2 x s16>) + ; GFX8-LABEL: name: shl_v2s16_sv + ; GFX8: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX8: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>) + ; GFX8: S_ENDPGM 0, implicit [[SHL]](<2 x s16>) + ; GFX9-LABEL: name: shl_v2s16_sv + ; GFX9: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX9: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>) + ; GFX9: S_ENDPGM 0, implicit [[SHL]](<2 x s16>) + ; GFX10-LABEL: name: shl_v2s16_sv + ; GFX10: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX10: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>) + ; GFX10: S_ENDPGM 0, implicit [[SHL]](<2 x s16>) + %0:sgpr(<2 x s16>) = COPY $sgpr0 + %1:vgpr(<2 x s16>) = COPY $vgpr0 + %2:vgpr(<2 x s16>) = G_SHL %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: shl_v2s16_vs +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; GFX6-LABEL: name: shl_v2s16_vs + ; GFX6: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX6: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>) + ; GFX6: S_ENDPGM 0, implicit [[SHL]](<2 x s16>) + ; GFX7-LABEL: name: shl_v2s16_vs + ; GFX7: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX7: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>) + ; GFX7: S_ENDPGM 0, implicit [[SHL]](<2 x s16>) + ; GFX8-LABEL: name: shl_v2s16_vs + ; GFX8: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX8: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>) + ; GFX8: S_ENDPGM 0, implicit [[SHL]](<2 x s16>) + ; GFX9-LABEL: name: shl_v2s16_vs + ; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX9: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>) + ; GFX9: S_ENDPGM 0, implicit [[SHL]](<2 x s16>) + ; GFX10-LABEL: name: shl_v2s16_vs + ; GFX10: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX10: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>) + ; GFX10: S_ENDPGM 0, implicit [[SHL]](<2 x s16>) + %0:vgpr(<2 x s16>) = COPY $vgpr0 + %1:sgpr(<2 x s16>) = COPY $sgpr0 + %2:vgpr(<2 x s16>) = G_SHL %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: shl_v2s16_vv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; GFX6-LABEL: name: shl_v2s16_vv + ; GFX6: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 + ; GFX6: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>) + ; GFX6: S_ENDPGM 0, implicit [[SHL]](<2 x s16>) + ; GFX7-LABEL: name: shl_v2s16_vv + ; GFX7: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 + ; GFX7: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>) + ; GFX7: S_ENDPGM 0, implicit [[SHL]](<2 x s16>) + ; GFX8-LABEL: name: shl_v2s16_vv + ; GFX8: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 + ; GFX8: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>) + ; GFX8: S_ENDPGM 0, implicit [[SHL]](<2 x s16>) + ; GFX9-LABEL: name: shl_v2s16_vv + ; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 + ; GFX9: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>) + ; GFX9: S_ENDPGM 0, implicit [[SHL]](<2 x s16>) + ; GFX10-LABEL: name: shl_v2s16_vv + ; GFX10: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 + ; GFX10: [[SHL:%[0-9]+]]:vgpr(<2 x s16>) = G_SHL [[COPY]], [[COPY1]](<2 x s16>) + ; GFX10: S_ENDPGM 0, implicit [[SHL]](<2 x s16>) + %0:vgpr(<2 x s16>) = COPY $vgpr0 + %1:vgpr(<2 x s16>) = COPY $vgpr1 + %2:vgpr(<2 x s16>) = G_SHL %0, %1 + S_ENDPGM 0, implicit %2 +... From 65e34a3143c48aff8a4200964abc195461f473ac Mon Sep 17 00:00:00 2001 From: Jinsong Ji Date: Tue, 16 Jul 2019 20:24:33 +0000 Subject: [PATCH 280/451] [PowerPC][HTM] Fix impossible reg-to-reg copy assert with ttest builtin Summary: This is exposed by our internal testing. The reduced testcase will assert with "Impossible reg-to-reg copy" We can't use COPY to do 32-bit to 64-bit conversion. Reviewers: kbarton, hfinkel, nemanjai Reviewed By: hfinkel Subscribers: hiraditya, MaskRay, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64499 llvm-svn: 366255 --- llvm/lib/Target/PowerPC/PPCInstrHTM.td | 4 +++- llvm/test/CodeGen/PowerPC/htm-ttest.ll | 30 ++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/PowerPC/htm-ttest.ll diff --git a/llvm/lib/Target/PowerPC/PPCInstrHTM.td b/llvm/lib/Target/PowerPC/PPCInstrHTM.td index 1af65fbb7d3bb..104b57a70a2e5 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrHTM.td +++ b/llvm/lib/Target/PowerPC/PPCInstrHTM.td @@ -164,6 +164,8 @@ def : Pat<(int_ppc_tsuspend), (TSR 0)>; def : Pat<(i64 (int_ppc_ttest)), - (RLDICL (i64 (COPY (TABORTWCI 0, (LI 0), 0))), 36, 28)>; + (RLDICL (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (TABORTWCI 0, (LI 0), 0), sub_32)), + 36, 28)>; } // [HasHTM] diff --git a/llvm/test/CodeGen/PowerPC/htm-ttest.ll b/llvm/test/CodeGen/PowerPC/htm-ttest.ll new file mode 100644 index 0000000000000..bd9db165f09bf --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/htm-ttest.ll @@ -0,0 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs \ +; RUN: -mcpu=pwr8 -mattr=+htm < %s | FileCheck %s + +define dso_local void @main() #0 { +; CHECK-LABEL: main: +; CHECK: # %bb.0: +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: tabortwci. 0, 3, 0 +; CHECK-NEXT: mfocrf 3, 128 +; CHECK-NEXT: rldicl 3, 3, 36, 28 +; CHECK-NEXT: rlwinm. 3, 3, 31, 30, 31 +; CHECK-NEXT: beqlr+ 0 +; CHECK-NEXT: # %bb.1: + %1 = call i64 @llvm.ppc.ttest() #1 + %2 = lshr i64 %1, 1 + %3 = and i64 %2, 3 + %4 = icmp eq i64 %3, 0 + br i1 %4, label %5, label %6 + +5: ; preds = %0 + ret void + +6: ; preds = %0 + unreachable +} + +; Function Attrs: nounwind +declare i64 @llvm.ppc.ttest() #1 + From e5b28b98e997f9b19ace6bcb95f6298b15b82cd4 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 16 Jul 2019 20:25:43 +0000 Subject: [PATCH 281/451] AMDGPU/GlobalISel: Select G_LSHR llvm-svn: 366256 --- llvm/lib/Target/AMDGPU/SOPInstructions.td | 4 +- llvm/lib/Target/AMDGPU/VOP2Instructions.td | 2 +- llvm/lib/Target/AMDGPU/VOP3Instructions.td | 2 +- .../AMDGPU/GlobalISel/inst-select-lshr.mir | 327 ++++++++++++++++++ .../GlobalISel/inst-select-lshr.s16.mir | 203 +++++++++++ .../GlobalISel/inst-select-lshr.v2s16.mir | 169 +++++++++ 6 files changed, 703 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.mir create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.v2s16.mir diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index b762b84d9ca7d..61a720bdaadf1 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -517,10 +517,10 @@ def S_LSHL_B64 : SOP2_64_32 <"s_lshl_b64", [(set SReg_64:$sdst, (shl (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))] >; def S_LSHR_B32 : SOP2_32 <"s_lshr_b32", - [(set i32:$sdst, (UniformBinFrag i32:$src0, i32:$src1))] + [(set SReg_32:$sdst, (srl (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))] >; def S_LSHR_B64 : SOP2_64_32 <"s_lshr_b64", - [(set i64:$sdst, (UniformBinFrag i64:$src0, i32:$src1))] + [(set SReg_64:$sdst, (srl (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))] >; def S_ASHR_I32 : SOP2_32 <"s_ashr_i32", [(set i32:$sdst, (UniformBinFrag i32:$src0, i32:$src1))] diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 260e8a498fb7b..9a5080f12684e 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -472,7 +472,7 @@ defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_PAT_GEN, smin>; defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_PAT_GEN, smax>; defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN, umin>; defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_PAT_GEN, umax>; -defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, null_frag, "v_lshr_b32">; +defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, lshr_rev, "v_lshr_b32">; defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, null_frag, "v_ashr_i32">; defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, lshl_rev, "v_lshl_b32">; defm V_AND_B32 : VOP2Inst <"v_and_b32", VOP_PAT_GEN, and>; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 6ebb9557c3c0f..26beb347fc43f 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -394,7 +394,7 @@ def V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile>; let SubtargetPredicate = isGFX8Plus in { def V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile, lshl_rev>; -def V_LSHRREV_B64 : VOP3Inst <"v_lshrrev_b64", VOP3_Profile>; +def V_LSHRREV_B64 : VOP3Inst <"v_lshrrev_b64", VOP3_Profile, lshr_rev>; def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile>; } // End SubtargetPredicate = isGFX8Plus } // End SchedRW = [Write64Bit] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.mir new file mode 100644 index 0000000000000..9e80c266c49b3 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.mir @@ -0,0 +1,327 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX6 %s +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX7 %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX8 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s + +--- +name: lshr_s32_ss +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; GFX6-LABEL: name: lshr_s32_ss + ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX6: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6: S_ENDPGM 0, implicit [[S_LSHR_B32_]] + ; GFX7-LABEL: name: lshr_s32_ss + ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX7: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX7: S_ENDPGM 0, implicit [[S_LSHR_B32_]] + ; GFX8-LABEL: name: lshr_s32_ss + ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX8: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX8: S_ENDPGM 0, implicit [[S_LSHR_B32_]] + ; GFX9-LABEL: name: lshr_s32_ss + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX9: S_ENDPGM 0, implicit [[S_LSHR_B32_]] + ; GFX10-LABEL: name: lshr_s32_ss + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX10: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10: S_ENDPGM 0, implicit [[S_LSHR_B32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %2:sgpr(s32) = G_LSHR %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: lshr_s32_sv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; GFX6-LABEL: name: lshr_s32_sv + ; GFX6: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] + ; GFX7-LABEL: name: lshr_s32_sv + ; GFX7: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX7: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] + ; GFX8-LABEL: name: lshr_s32_sv + ; GFX8: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] + ; GFX9-LABEL: name: lshr_s32_sv + ; GFX9: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] + ; GFX10-LABEL: name: lshr_s32_sv + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:vgpr(s32) = G_LSHR %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: lshr_s32_vs +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; GFX6-LABEL: name: lshr_s32_vs + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX6: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] + ; GFX7-LABEL: name: lshr_s32_vs + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX7: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX7: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] + ; GFX8-LABEL: name: lshr_s32_vs + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX8: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] + ; GFX9-LABEL: name: lshr_s32_vs + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX9: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] + ; GFX10-LABEL: name: lshr_s32_vs + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX10: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr(s32) = COPY $sgpr0 + %2:vgpr(s32) = G_LSHR %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: lshr_s32_vv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; GFX6-LABEL: name: lshr_s32_vv + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] + ; GFX7-LABEL: name: lshr_s32_vv + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX7: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] + ; GFX8-LABEL: name: lshr_s32_vv + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] + ; GFX9-LABEL: name: lshr_s32_vv + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] + ; GFX10-LABEL: name: lshr_s32_vv + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX10: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_LSHRREV_B32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = G_LSHR %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: lshr_s64_ss +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2 + ; GFX6-LABEL: name: lshr_s64_ss + ; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6: S_ENDPGM 0, implicit [[S_LSHR_B64_]] + ; GFX7-LABEL: name: lshr_s64_ss + ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX7: S_ENDPGM 0, implicit [[S_LSHR_B64_]] + ; GFX8-LABEL: name: lshr_s64_ss + ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX8: S_ENDPGM 0, implicit [[S_LSHR_B64_]] + ; GFX9-LABEL: name: lshr_s64_ss + ; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX9: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX9: S_ENDPGM 0, implicit [[S_LSHR_B64_]] + ; GFX10-LABEL: name: lshr_s64_ss + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX10: [[S_LSHR_B64_:%[0-9]+]]:sreg_64 = S_LSHR_B64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10: S_ENDPGM 0, implicit [[S_LSHR_B64_]] + %0:sgpr(s64) = COPY $sgpr0_sgpr1 + %1:sgpr(s32) = COPY $sgpr2 + %2:sgpr(s64) = G_LSHR %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: lshr_s64_sv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0 + ; GFX6-LABEL: name: lshr_s64_sv + ; GFX6: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_LSHRREV_B64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_LSHRREV_B64_]] + ; GFX7-LABEL: name: lshr_s64_sv + ; GFX7: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: [[V_LSHRREV_B64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX7: S_ENDPGM 0, implicit [[V_LSHRREV_B64_]] + ; GFX8-LABEL: name: lshr_s64_sv + ; GFX8: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[V_LSHRREV_B64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_LSHRREV_B64_]] + ; GFX9-LABEL: name: lshr_s64_sv + ; GFX9: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_LSHRREV_B64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_LSHRREV_B64_]] + ; GFX10-LABEL: name: lshr_s64_sv + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[V_LSHRREV_B64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_LSHRREV_B64_]] + %0:sgpr(s64) = COPY $sgpr0_sgpr1 + %1:vgpr(s32) = COPY $vgpr0 + %2:vgpr(s64) = G_LSHR %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: lshr_s64_vs +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0_vgpr1 + ; GFX6-LABEL: name: lshr_s64_vs + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX6: [[V_LSHRREV_B64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_LSHRREV_B64_]] + ; GFX7-LABEL: name: lshr_s64_vs + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX7: [[V_LSHRREV_B64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX7: S_ENDPGM 0, implicit [[V_LSHRREV_B64_]] + ; GFX8-LABEL: name: lshr_s64_vs + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX8: [[V_LSHRREV_B64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_LSHRREV_B64_]] + ; GFX9-LABEL: name: lshr_s64_vs + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX9: [[V_LSHRREV_B64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_LSHRREV_B64_]] + ; GFX10-LABEL: name: lshr_s64_vs + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX10: [[V_LSHRREV_B64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_LSHRREV_B64_]] + %0:vgpr(s64) = COPY $vgpr0_vgpr1 + %1:sgpr(s32) = COPY $sgpr0 + %2:vgpr(s64) = G_LSHR %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: lshr_s64_vv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX6-LABEL: name: lshr_s64_vv + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6: [[V_LSHRREV_B64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_LSHRREV_B64_]] + ; GFX7-LABEL: name: lshr_s64_vv + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7: [[V_LSHRREV_B64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX7: S_ENDPGM 0, implicit [[V_LSHRREV_B64_]] + ; GFX8-LABEL: name: lshr_s64_vv + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8: [[V_LSHRREV_B64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_LSHRREV_B64_]] + ; GFX9-LABEL: name: lshr_s64_vv + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: [[V_LSHRREV_B64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_LSHRREV_B64_]] + ; GFX10-LABEL: name: lshr_s64_vv + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10: [[V_LSHRREV_B64_:%[0-9]+]]:vreg_64 = V_LSHRREV_B64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_LSHRREV_B64_]] + %0:vgpr(s64) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = COPY $vgpr2 + %2:vgpr(s64) = G_LSHR %0, %1 + S_ENDPGM 0, implicit %2 +... + diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir new file mode 100644 index 0000000000000..2a2f600c5b7c6 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir @@ -0,0 +1,203 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py + +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX8 %s +# RUN: FileCheck -check-prefixes=ERR-GFX8,ERR %s < %t + +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX9 %s +# RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t + +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s +# RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t + +# ERR-NOT: remark +# ERR-GFX8: remark: :0:0: cannot select: %3:sgpr(s16) = G_LSHR %2:sgpr, %1:sgpr(s32) (in function: lshr_s16_ss) +# ERR-GFX8-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_LSHR %2:sgpr, %1:vgpr(s32) (in function: lshr_s16_sv) +# ERR-GFX8-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_LSHR %2:vgpr, %1:sgpr(s32) (in function: lshr_s16_vs) +# ERR-GFX8-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_LSHR %2:vgpr, %1:vgpr(s32) (in function: lshr_s16_vv) + +# ERR-GFX910: remark: :0:0: cannot select: %3:sgpr(s16) = G_LSHR %2:sgpr, %1:sgpr(s32) (in function: lshr_s16_ss) +# ERR-GFX910-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_LSHR %2:sgpr, %1:vgpr(s32) (in function: lshr_s16_sv) +# ERR-GFX910-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_LSHR %2:vgpr, %1:sgpr(s32) (in function: lshr_s16_vs) +# ERR-GFX910-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_LSHR %2:vgpr, %1:vgpr(s32) (in function: lshr_s16_vv) + +# ERR-NOT: remark + +--- +name: lshr_s16_ss +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; GFX6-LABEL: name: lshr_s16_ss + ; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX6: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX6: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) + ; GFX6: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX7-LABEL: name: lshr_s16_ss + ; GFX7: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX7: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX7: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) + ; GFX7: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX8-LABEL: name: lshr_s16_ss + ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) + ; GFX8: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX9-LABEL: name: lshr_s16_ss + ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) + ; GFX9: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX10-LABEL: name: lshr_s16_ss + ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX10: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX10: [[LSHR:%[0-9]+]]:sgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) + ; GFX10: S_ENDPGM 0, implicit [[LSHR]](s16) + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %2:sgpr(s16) = G_TRUNC %0 + %3:sgpr(s16) = G_LSHR %2, %1 + S_ENDPGM 0, implicit %3 +... + +--- +name: lshr_s16_sv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; GFX6-LABEL: name: lshr_s16_sv + ; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX6: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX6: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) + ; GFX6: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX7-LABEL: name: lshr_s16_sv + ; GFX7: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX7: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX7: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) + ; GFX7: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX8-LABEL: name: lshr_s16_sv + ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX8: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) + ; GFX8: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX9-LABEL: name: lshr_s16_sv + ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX9: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) + ; GFX9: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX10-LABEL: name: lshr_s16_sv + ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX10: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX10: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) + ; GFX10: S_ENDPGM 0, implicit [[LSHR]](s16) + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:sgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_LSHR %2, %1 + S_ENDPGM 0, implicit %3 +... + +--- +name: lshr_s16_vs +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; GFX6-LABEL: name: lshr_s16_vs + ; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX6: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX6: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) + ; GFX6: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX7-LABEL: name: lshr_s16_vs + ; GFX7: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX7: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX7: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) + ; GFX7: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX8-LABEL: name: lshr_s16_vs + ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX8: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) + ; GFX8: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX9-LABEL: name: lshr_s16_vs + ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX9: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) + ; GFX9: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX10-LABEL: name: lshr_s16_vs + ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX10: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX10: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) + ; GFX10: S_ENDPGM 0, implicit [[LSHR]](s16) + %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr(s32) = COPY $sgpr0 + %2:vgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_LSHR %2, %1 + S_ENDPGM 0, implicit %3 +... + +--- +name: lshr_s16_vv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; GFX6-LABEL: name: lshr_s16_vv + ; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX6: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX6: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) + ; GFX6: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX7-LABEL: name: lshr_s16_vv + ; GFX7: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX7: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX7: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) + ; GFX7: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX8-LABEL: name: lshr_s16_vv + ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) + ; GFX8: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX9-LABEL: name: lshr_s16_vv + ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) + ; GFX9: S_ENDPGM 0, implicit [[LSHR]](s16) + ; GFX10-LABEL: name: lshr_s16_vv + ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX10: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX10: [[LSHR:%[0-9]+]]:vgpr(s16) = G_LSHR [[TRUNC]], [[COPY1]](s32) + ; GFX10: S_ENDPGM 0, implicit [[LSHR]](s16) + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_LSHR %2, %1 + S_ENDPGM 0, implicit %3 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.v2s16.mir new file mode 100644 index 0000000000000..35724e0b4d8ec --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.v2s16.mir @@ -0,0 +1,169 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX9 %s +# RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t + +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s +# RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t + +# ERR-NOT: remark +# ERR-GFX910: remark: :0:0: cannot select: %2:sgpr(<2 x s16>) = G_LSHR %0:sgpr, %1:sgpr(<2 x s16>) (in function: lshr_v2s16_ss) +# ERR-GFX910-NEXT: remark: :0:0: cannot select: %2:vgpr(<2 x s16>) = G_LSHR %0:sgpr, %1:vgpr(<2 x s16>) (in function: lshr_v2s16_sv) +# ERR-GFX910-NEXT: remark: :0:0: cannot select: %2:vgpr(<2 x s16>) = G_LSHR %0:vgpr, %1:sgpr(<2 x s16>) (in function: lshr_v2s16_vs) +# ERR-GFX910-NEXT: remark: :0:0: cannot select: %2:vgpr(<2 x s16>) = G_LSHR %0:vgpr, %1:vgpr(<2 x s16>) (in function: lshr_v2s16_vv) +# ERR-NOT: remark + +--- +name: lshr_v2s16_ss +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; GFX6-LABEL: name: lshr_v2s16_ss + ; GFX6: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 + ; GFX6: [[LSHR:%[0-9]+]]:sgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX6: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>) + ; GFX7-LABEL: name: lshr_v2s16_ss + ; GFX7: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 + ; GFX7: [[LSHR:%[0-9]+]]:sgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX7: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>) + ; GFX8-LABEL: name: lshr_v2s16_ss + ; GFX8: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 + ; GFX8: [[LSHR:%[0-9]+]]:sgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX8: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>) + ; GFX9-LABEL: name: lshr_v2s16_ss + ; GFX9: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 + ; GFX9: [[LSHR:%[0-9]+]]:sgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX9: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>) + ; GFX10-LABEL: name: lshr_v2s16_ss + ; GFX10: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 + ; GFX10: [[LSHR:%[0-9]+]]:sgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX10: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>) + %0:sgpr(<2 x s16>) = COPY $sgpr0 + %1:sgpr(<2 x s16>) = COPY $sgpr1 + %2:sgpr(<2 x s16>) = G_LSHR %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: lshr_v2s16_sv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; GFX6-LABEL: name: lshr_v2s16_sv + ; GFX6: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX6: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX6: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>) + ; GFX7-LABEL: name: lshr_v2s16_sv + ; GFX7: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX7: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX7: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>) + ; GFX8-LABEL: name: lshr_v2s16_sv + ; GFX8: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX8: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX8: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>) + ; GFX9-LABEL: name: lshr_v2s16_sv + ; GFX9: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX9: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX9: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>) + ; GFX10-LABEL: name: lshr_v2s16_sv + ; GFX10: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX10: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX10: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>) + %0:sgpr(<2 x s16>) = COPY $sgpr0 + %1:vgpr(<2 x s16>) = COPY $vgpr0 + %2:vgpr(<2 x s16>) = G_LSHR %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: lshr_v2s16_vs +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; GFX6-LABEL: name: lshr_v2s16_vs + ; GFX6: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX6: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX6: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>) + ; GFX7-LABEL: name: lshr_v2s16_vs + ; GFX7: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX7: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX7: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>) + ; GFX8-LABEL: name: lshr_v2s16_vs + ; GFX8: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX8: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX8: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>) + ; GFX9-LABEL: name: lshr_v2s16_vs + ; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX9: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX9: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>) + ; GFX10-LABEL: name: lshr_v2s16_vs + ; GFX10: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX10: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX10: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>) + %0:vgpr(<2 x s16>) = COPY $vgpr0 + %1:sgpr(<2 x s16>) = COPY $sgpr0 + %2:vgpr(<2 x s16>) = G_LSHR %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: lshr_v2s16_vv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; GFX6-LABEL: name: lshr_v2s16_vv + ; GFX6: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 + ; GFX6: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX6: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>) + ; GFX7-LABEL: name: lshr_v2s16_vv + ; GFX7: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 + ; GFX7: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX7: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>) + ; GFX8-LABEL: name: lshr_v2s16_vv + ; GFX8: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 + ; GFX8: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX8: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>) + ; GFX9-LABEL: name: lshr_v2s16_vv + ; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 + ; GFX9: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX9: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>) + ; GFX10-LABEL: name: lshr_v2s16_vv + ; GFX10: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 + ; GFX10: [[LSHR:%[0-9]+]]:vgpr(<2 x s16>) = G_LSHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX10: S_ENDPGM 0, implicit [[LSHR]](<2 x s16>) + %0:vgpr(<2 x s16>) = COPY $vgpr0 + %1:vgpr(<2 x s16>) = COPY $vgpr1 + %2:vgpr(<2 x s16>) = G_LSHR %0, %1 + S_ENDPGM 0, implicit %2 +... From f8c8284455b11ae99aa90322b40ab5ae18d2a1b5 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 16 Jul 2019 20:31:25 +0000 Subject: [PATCH 282/451] AMDGPU/GlobalISel: Select G_ASHR llvm-svn: 366257 --- llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 9 - llvm/lib/Target/AMDGPU/SOPInstructions.td | 4 +- llvm/lib/Target/AMDGPU/VOP2Instructions.td | 2 +- llvm/lib/Target/AMDGPU/VOP3Instructions.td | 2 +- .../AMDGPU/GlobalISel/inst-select-ashr.mir | 363 +++++++++++++++--- .../GlobalISel/inst-select-ashr.s16.mir | 203 ++++++++++ .../GlobalISel/inst-select-ashr.v2s16.mir | 169 ++++++++ 7 files changed, 680 insertions(+), 72 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.v2s16.mir diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td index 13ca1ce4b28fe..cad4c2ef404c7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -143,15 +143,6 @@ multiclass GISelVop2IntrPat < def : GISelSop2Pat ; def : GISelVop2Pat ; -def : GISelSop2Pat ; -let AddedComplexity = 100 in { -let SubtargetPredicate = isGFX6GFX7 in { -def : GISelVop2Pat ; -} -def : GISelVop2CommutePat ; -} -def : GISelVop3Pat2CommutePat ; - // FIXME: We can't re-use SelectionDAG patterns here because they match // against a custom SDNode and we would need to create a generic machine // instruction that is equivalent to the custom SDNode. This would also require diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 61a720bdaadf1..dfafdccc05a3f 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -523,10 +523,10 @@ def S_LSHR_B64 : SOP2_64_32 <"s_lshr_b64", [(set SReg_64:$sdst, (srl (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))] >; def S_ASHR_I32 : SOP2_32 <"s_ashr_i32", - [(set i32:$sdst, (UniformBinFrag i32:$src0, i32:$src1))] + [(set SReg_32:$sdst, (sra (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))] >; def S_ASHR_I64 : SOP2_64_32 <"s_ashr_i64", - [(set i64:$sdst, (UniformBinFrag i64:$src0, i32:$src1))] + [(set SReg_64:$sdst, (sra (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))] >; } // End Defs = [SCC] diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 9a5080f12684e..1b30cd2ed5165 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -473,7 +473,7 @@ defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_PAT_GEN, smax>; defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN, umin>; defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_PAT_GEN, umax>; defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, lshr_rev, "v_lshr_b32">; -defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, null_frag, "v_ashr_i32">; +defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, ashr_rev, "v_ashr_i32">; defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, lshl_rev, "v_lshl_b32">; defm V_AND_B32 : VOP2Inst <"v_and_b32", VOP_PAT_GEN, and>; defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN, or>; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 26beb347fc43f..21dbef9240e10 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -395,7 +395,7 @@ def V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile>; let SubtargetPredicate = isGFX8Plus in { def V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile, lshl_rev>; def V_LSHRREV_B64 : VOP3Inst <"v_lshrrev_b64", VOP3_Profile, lshr_rev>; -def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile>; +def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile, ashr_rev>; } // End SubtargetPredicate = isGFX8Plus } // End SchedRW = [Write64Bit] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.mir index 3209f4fb808f1..f6176692cefc6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.mir @@ -1,82 +1,327 @@ -# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,SI -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,VI +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX6 %s +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX7 %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX8 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s --- - -name: ashr -legalized: true +name: ashr_s32_ss +legalized: true regBankSelected: true -# GCN-LABEL: name: ashr body: | bb.0: - liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr3_vgpr4 - ; GCN: [[SGPR0:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[SGPR1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN: [[VGPR0:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + liveins: $sgpr0, $sgpr1 + ; GFX6-LABEL: name: ashr_s32_ss + ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX6: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6: S_ENDPGM 0, implicit [[S_ASHR_I32_]] + ; GFX7-LABEL: name: ashr_s32_ss + ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX7: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX7: S_ENDPGM 0, implicit [[S_ASHR_I32_]] + ; GFX8-LABEL: name: ashr_s32_ss + ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX8: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX8: S_ENDPGM 0, implicit [[S_ASHR_I32_]] + ; GFX9-LABEL: name: ashr_s32_ss + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX9: S_ENDPGM 0, implicit [[S_ASHR_I32_]] + ; GFX10-LABEL: name: ashr_s32_ss + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX10: [[S_ASHR_I32_:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10: S_ENDPGM 0, implicit [[S_ASHR_I32_]] %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 - %2:vgpr(s32) = COPY $vgpr0 - %3:vgpr(p1) = COPY $vgpr3_vgpr4 - - ; GCN: [[C1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1 - ; GCN: [[C4096:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096 - %4:sgpr(s32) = G_CONSTANT i32 1 - %5:sgpr(s32) = G_CONSTANT i32 4096 - - ; ashr ss - ; GCN: [[SS:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[SGPR0]], [[SGPR1]] - %6:sgpr(s32) = G_ASHR %0, %1 + %2:sgpr(s32) = G_ASHR %0, %1 + S_ENDPGM 0, implicit %2 +... - ; ashr si - ; GCN: [[SI:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[SS]], [[C1]] - %7:sgpr(s32) = G_ASHR %6, %4 +--- +name: ashr_s32_sv +legalized: true +regBankSelected: true - ; ashr is - ; GCN: [[IS:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[C1]], [[SI]] - %8:sgpr(s32) = G_ASHR %4, %7 +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; GFX6-LABEL: name: ashr_s32_sv + ; GFX6: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] + ; GFX7-LABEL: name: ashr_s32_sv + ; GFX7: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX7: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] + ; GFX8-LABEL: name: ashr_s32_sv + ; GFX8: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] + ; GFX9-LABEL: name: ashr_s32_sv + ; GFX9: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] + ; GFX10-LABEL: name: ashr_s32_sv + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:vgpr(s32) = G_ASHR %0, %1 + S_ENDPGM 0, implicit %2 +... - ; ashr sc - ; GCN: [[SC:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[IS]], [[C4096]] - %9:sgpr(s32) = G_ASHR %8, %5 +--- +name: ashr_s32_vs +legalized: true +regBankSelected: true - ; ashr cs - ; GCN: [[CS:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[C4096]], [[SC]] - %10:sgpr(s32) = G_ASHR %5, %9 +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; GFX6-LABEL: name: ashr_s32_vs + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX6: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] + ; GFX7-LABEL: name: ashr_s32_vs + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX7: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX7: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] + ; GFX8-LABEL: name: ashr_s32_vs + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX8: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] + ; GFX9-LABEL: name: ashr_s32_vs + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX9: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] + ; GFX10-LABEL: name: ashr_s32_vs + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX10: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr(s32) = COPY $sgpr0 + %2:vgpr(s32) = G_ASHR %0, %1 + S_ENDPGM 0, implicit %2 +... - ; ashr vs - ; GCN: [[VS:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e32 [[CS]], [[VGPR0]] - %11:vgpr(s32) = G_ASHR %2, %10 +--- +name: ashr_s32_vv +legalized: true +regBankSelected: true - ; ashr sv - ; SI: [[SV:%[0-9]+]]:vgpr_32 = V_ASHR_I32_e32 [[CS]], [[VS]] - ; VI: [[SV:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[VS]], [[CS]] - %12:vgpr(s32) = G_ASHR %10, %11 +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; GFX6-LABEL: name: ashr_s32_vv + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] + ; GFX7-LABEL: name: ashr_s32_vv + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX7: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] + ; GFX8-LABEL: name: ashr_s32_vv + ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] + ; GFX9-LABEL: name: ashr_s32_vv + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] + ; GFX10-LABEL: name: ashr_s32_vv + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX10: [[V_ASHRREV_I32_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_ASHRREV_I32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = G_ASHR %0, %1 + S_ENDPGM 0, implicit %2 +... - ; ashr vv - ; SI: [[VV:%[0-9]+]]:vgpr_32 = V_ASHR_I32_e32 [[SV]], [[VGPR0]] - ; VI: [[VV:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e32 [[VGPR0]], [[SV]] - %13:vgpr(s32) = G_ASHR %12, %2 +--- +name: ashr_s64_ss +legalized: true +regBankSelected: true - ; ashr iv - ; SI: [[IV:%[0-9]+]]:vgpr_32 = V_ASHR_I32_e32 [[C1]], [[VV]] - ; VI: [[IV:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[VV]], [[C1]] - %14:vgpr(s32) = G_ASHR %4, %13 +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2 + ; GFX6-LABEL: name: ashr_s64_ss + ; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6: S_ENDPGM 0, implicit [[S_ASHR_I64_]] + ; GFX7-LABEL: name: ashr_s64_ss + ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX7: S_ENDPGM 0, implicit [[S_ASHR_I64_]] + ; GFX8-LABEL: name: ashr_s64_ss + ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX8: S_ENDPGM 0, implicit [[S_ASHR_I64_]] + ; GFX9-LABEL: name: ashr_s64_ss + ; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX9: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX9: S_ENDPGM 0, implicit [[S_ASHR_I64_]] + ; GFX10-LABEL: name: ashr_s64_ss + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX10: [[S_ASHR_I64_:%[0-9]+]]:sreg_64 = S_ASHR_I64 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX10: S_ENDPGM 0, implicit [[S_ASHR_I64_]] + %0:sgpr(s64) = COPY $sgpr0_sgpr1 + %1:sgpr(s32) = COPY $sgpr2 + %2:sgpr(s64) = G_ASHR %0, %1 + S_ENDPGM 0, implicit %2 +... - ; ashr vi - ; GCN: [[VI:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e32 [[C1]], [[IV]] - %15:vgpr(s32) = G_ASHR %14, %4 +--- +name: ashr_s64_sv +legalized: true +regBankSelected: true - ; ashr cv - ; SI: [[CV:%[0-9]+]]:vgpr_32 = V_ASHR_I32_e32 [[C4096]], [[VI]] - ; VI: [[CV:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[VI]], [[C4096]] - %16:vgpr(s32) = G_ASHR %5, %15 +body: | + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0 + ; GFX6-LABEL: name: ashr_s64_sv + ; GFX6: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_ASHRREV_I64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64 [[COPY1]], [[COPY]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_ASHRREV_I64_]] + ; GFX7-LABEL: name: ashr_s64_sv + ; GFX7: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: [[V_ASHRREV_I64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64 [[COPY1]], [[COPY]], implicit $exec + ; GFX7: S_ENDPGM 0, implicit [[V_ASHRREV_I64_]] + ; GFX8-LABEL: name: ashr_s64_sv + ; GFX8: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8: [[V_ASHRREV_I64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_ASHRREV_I64_]] + ; GFX9-LABEL: name: ashr_s64_sv + ; GFX9: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_ASHRREV_I64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_ASHRREV_I64_]] + ; GFX10-LABEL: name: ashr_s64_sv + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[V_ASHRREV_I64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_ASHRREV_I64_]] + %0:sgpr(s64) = COPY $sgpr0_sgpr1 + %1:vgpr(s32) = COPY $vgpr0 + %2:vgpr(s64) = G_ASHR %0, %1 + S_ENDPGM 0, implicit %2 +... - ; ashr vc - ; GCN: [[VC:%[-1-9]+]]:vgpr_32 = V_ASHRREV_I32_e32 [[C4096]], [[CV]] - %17:vgpr(s32) = G_ASHR %16, %5 +--- +name: ashr_s64_vs +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $sgpr0, $vgpr0_vgpr1 + ; GFX6-LABEL: name: ashr_s64_vs + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX6: [[V_ASHRREV_I64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64 [[COPY1]], [[COPY]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_ASHRREV_I64_]] + ; GFX7-LABEL: name: ashr_s64_vs + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX7: [[V_ASHRREV_I64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64 [[COPY1]], [[COPY]], implicit $exec + ; GFX7: S_ENDPGM 0, implicit [[V_ASHRREV_I64_]] + ; GFX8-LABEL: name: ashr_s64_vs + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX8: [[V_ASHRREV_I64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_ASHRREV_I64_]] + ; GFX9-LABEL: name: ashr_s64_vs + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX9: [[V_ASHRREV_I64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_ASHRREV_I64_]] + ; GFX10-LABEL: name: ashr_s64_vs + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GFX10: [[V_ASHRREV_I64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_ASHRREV_I64_]] + %0:vgpr(s64) = COPY $vgpr0_vgpr1 + %1:sgpr(s32) = COPY $sgpr0 + %2:vgpr(s64) = G_ASHR %0, %1 + S_ENDPGM 0, implicit %2 +... - S_ENDPGM 0, implicit %17 +--- +name: ashr_s64_vv +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + ; GFX6-LABEL: name: ashr_s64_vv + ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6: [[V_ASHRREV_I64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64 [[COPY1]], [[COPY]], implicit $exec + ; GFX6: S_ENDPGM 0, implicit [[V_ASHRREV_I64_]] + ; GFX7-LABEL: name: ashr_s64_vv + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7: [[V_ASHRREV_I64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64 [[COPY1]], [[COPY]], implicit $exec + ; GFX7: S_ENDPGM 0, implicit [[V_ASHRREV_I64_]] + ; GFX8-LABEL: name: ashr_s64_vv + ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8: [[V_ASHRREV_I64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64 [[COPY1]], [[COPY]], implicit $exec + ; GFX8: S_ENDPGM 0, implicit [[V_ASHRREV_I64_]] + ; GFX9-LABEL: name: ashr_s64_vv + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: [[V_ASHRREV_I64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64 [[COPY1]], [[COPY]], implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_ASHRREV_I64_]] + ; GFX10-LABEL: name: ashr_s64_vv + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10: [[V_ASHRREV_I64_:%[0-9]+]]:vreg_64 = V_ASHRREV_I64 [[COPY1]], [[COPY]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_ASHRREV_I64_]] + %0:vgpr(s64) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = COPY $vgpr2 + %2:vgpr(s64) = G_ASHR %0, %1 + S_ENDPGM 0, implicit %2 ... + diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir new file mode 100644 index 0000000000000..1a90e609f7bd3 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir @@ -0,0 +1,203 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py + +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX8 %s +# RUN: FileCheck -check-prefixes=ERR-GFX8,ERR %s < %t + +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX9 %s +# RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t + +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s +# RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t + +# ERR-NOT: remark +# ERR-GFX8: remark: :0:0: cannot select: %3:sgpr(s16) = G_ASHR %2:sgpr, %1:sgpr(s32) (in function: ashr_s16_ss) +# ERR-GFX8-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_ASHR %2:sgpr, %1:vgpr(s32) (in function: ashr_s16_sv) +# ERR-GFX8-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_ASHR %2:vgpr, %1:sgpr(s32) (in function: ashr_s16_vs) +# ERR-GFX8-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_ASHR %2:vgpr, %1:vgpr(s32) (in function: ashr_s16_vv) + +# ERR-GFX910: remark: :0:0: cannot select: %3:sgpr(s16) = G_ASHR %2:sgpr, %1:sgpr(s32) (in function: ashr_s16_ss) +# ERR-GFX910-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_ASHR %2:sgpr, %1:vgpr(s32) (in function: ashr_s16_sv) +# ERR-GFX910-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_ASHR %2:vgpr, %1:sgpr(s32) (in function: ashr_s16_vs) +# ERR-GFX910-NEXT: remark: :0:0: cannot select: %3:vgpr(s16) = G_ASHR %2:vgpr, %1:vgpr(s32) (in function: ashr_s16_vv) + +# ERR-NOT: remark + +--- +name: ashr_s16_ss +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; GFX6-LABEL: name: ashr_s16_ss + ; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX6: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX6: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) + ; GFX6: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX7-LABEL: name: ashr_s16_ss + ; GFX7: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX7: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX7: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) + ; GFX7: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX8-LABEL: name: ashr_s16_ss + ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) + ; GFX8: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX9-LABEL: name: ashr_s16_ss + ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) + ; GFX9: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX10-LABEL: name: ashr_s16_ss + ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; GFX10: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX10: [[ASHR:%[0-9]+]]:sgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) + ; GFX10: S_ENDPGM 0, implicit [[ASHR]](s16) + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %2:sgpr(s16) = G_TRUNC %0 + %3:sgpr(s16) = G_ASHR %2, %1 + S_ENDPGM 0, implicit %3 +... + +--- +name: ashr_s16_sv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; GFX6-LABEL: name: ashr_s16_sv + ; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX6: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX6: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) + ; GFX6: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX7-LABEL: name: ashr_s16_sv + ; GFX7: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX7: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX7: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) + ; GFX7: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX8-LABEL: name: ashr_s16_sv + ; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX8: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) + ; GFX8: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX9-LABEL: name: ashr_s16_sv + ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX9: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) + ; GFX9: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX10-LABEL: name: ashr_s16_sv + ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX10: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX10: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) + ; GFX10: S_ENDPGM 0, implicit [[ASHR]](s16) + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:sgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_ASHR %2, %1 + S_ENDPGM 0, implicit %3 +... + +--- +name: ashr_s16_vs +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; GFX6-LABEL: name: ashr_s16_vs + ; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX6: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX6: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) + ; GFX6: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX7-LABEL: name: ashr_s16_vs + ; GFX7: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX7: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX7: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) + ; GFX7: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX8-LABEL: name: ashr_s16_vs + ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX8: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) + ; GFX8: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX9-LABEL: name: ashr_s16_vs + ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX9: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) + ; GFX9: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX10-LABEL: name: ashr_s16_vs + ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; GFX10: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX10: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) + ; GFX10: S_ENDPGM 0, implicit [[ASHR]](s16) + %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr(s32) = COPY $sgpr0 + %2:vgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_ASHR %2, %1 + S_ENDPGM 0, implicit %3 +... + +--- +name: ashr_s16_vv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; GFX6-LABEL: name: ashr_s16_vv + ; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX6: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX6: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) + ; GFX6: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX7-LABEL: name: ashr_s16_vv + ; GFX7: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX7: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX7: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) + ; GFX7: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX8-LABEL: name: ashr_s16_vv + ; GFX8: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX8: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) + ; GFX8: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX9-LABEL: name: ashr_s16_vv + ; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX9: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) + ; GFX9: S_ENDPGM 0, implicit [[ASHR]](s16) + ; GFX10-LABEL: name: ashr_s16_vv + ; GFX10: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GFX10: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; GFX10: [[ASHR:%[0-9]+]]:vgpr(s16) = G_ASHR [[TRUNC]], [[COPY1]](s32) + ; GFX10: S_ENDPGM 0, implicit [[ASHR]](s16) + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s16) = G_TRUNC %0 + %3:vgpr(s16) = G_ASHR %2, %1 + S_ENDPGM 0, implicit %3 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.v2s16.mir new file mode 100644 index 0000000000000..20602f748254f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.v2s16.mir @@ -0,0 +1,169 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX9 %s +# RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t + +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GFX10 %s +# RUN: FileCheck -check-prefixes=ERR-GFX910,ERR %s < %t + +# ERR-NOT: remark +# ERR-GFX910: remark: :0:0: cannot select: %2:sgpr(<2 x s16>) = G_ASHR %0:sgpr, %1:sgpr(<2 x s16>) (in function: ashr_v2s16_ss) +# ERR-GFX910-NEXT: remark: :0:0: cannot select: %2:vgpr(<2 x s16>) = G_ASHR %0:sgpr, %1:vgpr(<2 x s16>) (in function: ashr_v2s16_sv) +# ERR-GFX910-NEXT: remark: :0:0: cannot select: %2:vgpr(<2 x s16>) = G_ASHR %0:vgpr, %1:sgpr(<2 x s16>) (in function: ashr_v2s16_vs) +# ERR-GFX910-NEXT: remark: :0:0: cannot select: %2:vgpr(<2 x s16>) = G_ASHR %0:vgpr, %1:vgpr(<2 x s16>) (in function: ashr_v2s16_vv) +# ERR-NOT: remark + +--- +name: ashr_v2s16_ss +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; GFX6-LABEL: name: ashr_v2s16_ss + ; GFX6: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 + ; GFX6: [[ASHR:%[0-9]+]]:sgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX6: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>) + ; GFX7-LABEL: name: ashr_v2s16_ss + ; GFX7: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 + ; GFX7: [[ASHR:%[0-9]+]]:sgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX7: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>) + ; GFX8-LABEL: name: ashr_v2s16_ss + ; GFX8: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 + ; GFX8: [[ASHR:%[0-9]+]]:sgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX8: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>) + ; GFX9-LABEL: name: ashr_v2s16_ss + ; GFX9: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 + ; GFX9: [[ASHR:%[0-9]+]]:sgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX9: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>) + ; GFX10-LABEL: name: ashr_v2s16_ss + ; GFX10: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 + ; GFX10: [[ASHR:%[0-9]+]]:sgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX10: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>) + %0:sgpr(<2 x s16>) = COPY $sgpr0 + %1:sgpr(<2 x s16>) = COPY $sgpr1 + %2:sgpr(<2 x s16>) = G_ASHR %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: ashr_v2s16_sv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; GFX6-LABEL: name: ashr_v2s16_sv + ; GFX6: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX6: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX6: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>) + ; GFX7-LABEL: name: ashr_v2s16_sv + ; GFX7: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX7: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX7: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>) + ; GFX8-LABEL: name: ashr_v2s16_sv + ; GFX8: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX8: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX8: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>) + ; GFX9-LABEL: name: ashr_v2s16_sv + ; GFX9: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX9: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX9: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>) + ; GFX10-LABEL: name: ashr_v2s16_sv + ; GFX10: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX10: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX10: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>) + %0:sgpr(<2 x s16>) = COPY $sgpr0 + %1:vgpr(<2 x s16>) = COPY $vgpr0 + %2:vgpr(<2 x s16>) = G_ASHR %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: ashr_v2s16_vs +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; GFX6-LABEL: name: ashr_v2s16_vs + ; GFX6: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX6: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX6: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>) + ; GFX7-LABEL: name: ashr_v2s16_vs + ; GFX7: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX7: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX7: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>) + ; GFX8-LABEL: name: ashr_v2s16_vs + ; GFX8: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX8: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX8: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>) + ; GFX9-LABEL: name: ashr_v2s16_vs + ; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX9: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX9: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>) + ; GFX10-LABEL: name: ashr_v2s16_vs + ; GFX10: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; GFX10: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX10: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>) + %0:vgpr(<2 x s16>) = COPY $vgpr0 + %1:sgpr(<2 x s16>) = COPY $sgpr0 + %2:vgpr(<2 x s16>) = G_ASHR %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: ashr_v2s16_vv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; GFX6-LABEL: name: ashr_v2s16_vv + ; GFX6: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 + ; GFX6: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX6: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>) + ; GFX7-LABEL: name: ashr_v2s16_vv + ; GFX7: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 + ; GFX7: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX7: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>) + ; GFX8-LABEL: name: ashr_v2s16_vv + ; GFX8: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX8: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 + ; GFX8: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX8: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>) + ; GFX9-LABEL: name: ashr_v2s16_vv + ; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 + ; GFX9: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX9: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>) + ; GFX10-LABEL: name: ashr_v2s16_vv + ; GFX10: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 + ; GFX10: [[ASHR:%[0-9]+]]:vgpr(<2 x s16>) = G_ASHR [[COPY]], [[COPY1]](<2 x s16>) + ; GFX10: S_ENDPGM 0, implicit [[ASHR]](<2 x s16>) + %0:vgpr(<2 x s16>) = COPY $vgpr0 + %1:vgpr(<2 x s16>) = COPY $vgpr1 + %2:vgpr(<2 x s16>) = G_ASHR %0, %1 + S_ENDPGM 0, implicit %2 +... From d069a1921065bda2c6379bb7b6dfcff1572a285e Mon Sep 17 00:00:00 2001 From: Julian Lettner Date: Tue, 16 Jul 2019 20:41:04 +0000 Subject: [PATCH 283/451] [TSan] Improve handling of stack pointer mangling in {set,long}jmp, pt.6 Cleanup ARM64 assembly after removing unnecessary computation of mangled stack pointer in previous commit. llvm-svn: 366258 --- compiler-rt/lib/tsan/rtl/tsan_rtl_aarch64.S | 89 ++++++++++----------- 1 file changed, 44 insertions(+), 45 deletions(-) diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_aarch64.S b/compiler-rt/lib/tsan/rtl/tsan_rtl_aarch64.S index c35897d3c3624..e0b4c71dfed9a 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl_aarch64.S +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_aarch64.S @@ -39,7 +39,7 @@ ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(setjmp)) ASM_SYMBOL_INTERCEPTOR(setjmp): CFI_STARTPROC - // save env parameters for function call + // Save frame/link register stp x29, x30, [sp, -32]! CFI_DEF_CFA_OFFSET (32) CFI_OFFSET (29, -32) @@ -49,10 +49,9 @@ ASM_SYMBOL_INTERCEPTOR(setjmp): add x29, sp, 0 CFI_DEF_CFA_REGISTER (29) - // Save jmp_buf - str x19, [sp, 16] - CFI_OFFSET (19, -16) - mov x19, x0 + // Save env parameter + str x0, [sp, 16] + CFI_OFFSET (0, -16) // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)` add x0, x29, 32 @@ -60,12 +59,14 @@ ASM_SYMBOL_INTERCEPTOR(setjmp): // call tsan interceptor bl ASM_SYMBOL(__tsan_setjmp) - // restore env parameter - mov x0, x19 - ldr x19, [sp, 16] + // Restore env parameter + ldr x0, [sp, 16] + CFI_RESTORE (0) + + // Restore frame/link register ldp x29, x30, [sp], 32 + CFI_RESTORE (29) CFI_RESTORE (30) - CFI_RESTORE (19) CFI_DEF_CFA (31, 0) // tail jump to libc setjmp @@ -89,7 +90,7 @@ ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(_setjmp)) ASM_SYMBOL_INTERCEPTOR(_setjmp): CFI_STARTPROC - // save env parameters for function call + // Save frame/link register stp x29, x30, [sp, -32]! CFI_DEF_CFA_OFFSET (32) CFI_OFFSET (29, -32) @@ -99,10 +100,9 @@ ASM_SYMBOL_INTERCEPTOR(_setjmp): add x29, sp, 0 CFI_DEF_CFA_REGISTER (29) - // Save jmp_buf - str x19, [sp, 16] - CFI_OFFSET (19, -16) - mov x19, x0 + // Save env parameter + str x0, [sp, 16] + CFI_OFFSET (0, -16) // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)` add x0, x29, 32 @@ -110,12 +110,14 @@ ASM_SYMBOL_INTERCEPTOR(_setjmp): // call tsan interceptor bl ASM_SYMBOL(__tsan_setjmp) - // Restore jmp_buf parameter - mov x0, x19 - ldr x19, [sp, 16] + // Restore env parameter + ldr x0, [sp, 16] + CFI_RESTORE (0) + + // Restore frame/link register ldp x29, x30, [sp], 32 + CFI_RESTORE (29) CFI_RESTORE (30) - CFI_RESTORE (19) CFI_DEF_CFA (31, 0) // tail jump to libc setjmp @@ -139,7 +141,7 @@ ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(sigsetjmp)) ASM_SYMBOL_INTERCEPTOR(sigsetjmp): CFI_STARTPROC - // save env parameters for function call + // Save frame/link register stp x29, x30, [sp, -32]! CFI_DEF_CFA_OFFSET (32) CFI_OFFSET (29, -32) @@ -149,12 +151,10 @@ ASM_SYMBOL_INTERCEPTOR(sigsetjmp): add x29, sp, 0 CFI_DEF_CFA_REGISTER (29) - // Save jmp_buf and savesigs - stp x19, x20, [sp, 16] - CFI_OFFSET (19, -16) - CFI_OFFSET (20, -8) - mov w20, w1 - mov x19, x0 + // Save env and savesigs parameter + stp x0, x1, [sp, 16] + CFI_OFFSET (0, -16) + CFI_OFFSET (1, -8) // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)` add x0, x29, 32 @@ -162,15 +162,15 @@ ASM_SYMBOL_INTERCEPTOR(sigsetjmp): // call tsan interceptor bl ASM_SYMBOL(__tsan_setjmp) - // restore env parameter - mov w1, w20 - mov x0, x19 - ldp x19, x20, [sp, 16] + // Restore env and savesigs parameter + ldp x0, x1, [sp, 16] + CFI_RESTORE (0) + CFI_RESTORE (1) + + // Restore frame/link register ldp x29, x30, [sp], 32 - CFI_RESTORE (30) CFI_RESTORE (29) - CFI_RESTORE (19) - CFI_RESTORE (20) + CFI_RESTORE (30) CFI_DEF_CFA (31, 0) // tail jump to libc sigsetjmp @@ -194,7 +194,7 @@ ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(__sigsetjmp)) ASM_SYMBOL_INTERCEPTOR(__sigsetjmp): CFI_STARTPROC - // save env parameters for function call + // Save frame/link register stp x29, x30, [sp, -32]! CFI_DEF_CFA_OFFSET (32) CFI_OFFSET (29, -32) @@ -204,12 +204,10 @@ ASM_SYMBOL_INTERCEPTOR(__sigsetjmp): add x29, sp, 0 CFI_DEF_CFA_REGISTER (29) - // Save jmp_buf and savesigs - stp x19, x20, [sp, 16] - CFI_OFFSET (19, -16) - CFI_OFFSET (20, -8) - mov w20, w1 - mov x19, x0 + // Save env and savesigs parameter + stp x0, x1, [sp, 16] + CFI_OFFSET (0, -16) + CFI_OFFSET (1, -8) // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)` add x0, x29, 32 @@ -217,14 +215,15 @@ ASM_SYMBOL_INTERCEPTOR(__sigsetjmp): // call tsan interceptor bl ASM_SYMBOL(__tsan_setjmp) - mov w1, w20 - mov x0, x19 - ldp x19, x20, [sp, 16] + // Restore env and savesigs parameter + ldp x0, x1, [sp, 16] + CFI_RESTORE (0) + CFI_RESTORE (1) + + // Restore frame/link register ldp x29, x30, [sp], 32 - CFI_RESTORE (30) CFI_RESTORE (29) - CFI_RESTORE (19) - CFI_RESTORE (20) + CFI_RESTORE (30) CFI_DEF_CFA (31, 0) // tail jump to libc __sigsetjmp From 12154ee8f1ee170bac97ef238dece50d504de08a Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 16 Jul 2019 20:41:33 +0000 Subject: [PATCH 284/451] [libc++] Add missing UNSUPPORTED for CTAD tests The tests for unordered_set and unordered_multiset were missing UNSUPPORTED markup for Apple Clang 9.1, which is still being used on some CI bots. llvm-svn: 366259 --- .../unord/unord.multiset/unord.multiset.cnstr/deduct.pass.cpp | 1 + .../containers/unord/unord.set/unord.set.cnstr/deduct.pass.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/deduct.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/deduct.pass.cpp index 7becaf96009a4..683d201976414 100644 --- a/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/deduct.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/deduct.pass.cpp @@ -9,6 +9,7 @@ // // UNSUPPORTED: c++98, c++03, c++11, c++14 // UNSUPPORTED: libcpp-no-deduction-guides +// UNSUPPORTED: apple-clang-9.1 // template>, diff --git a/libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/deduct.pass.cpp b/libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/deduct.pass.cpp index 20799823692a5..95bc08293f537 100644 --- a/libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/deduct.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/deduct.pass.cpp @@ -9,6 +9,7 @@ // // UNSUPPORTED: c++98, c++03, c++11, c++14 // UNSUPPORTED: libcpp-no-deduction-guides +// UNSUPPORTED: apple-clang-9.1 // template>, From ddbb83732afc2e392eca5873910b5424a069e656 Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Tue, 16 Jul 2019 20:51:04 +0000 Subject: [PATCH 285/451] [NFC][ScopBuilder] Move addRecordedAssumption to ScopBuilder Scope of changes: 1) Moved addRecordedAssumptions to ScopBuilder. 2) Moved Assumption struct outside Scop class. 3) Refactored addRecordedAssumptions function. Replaced while loop by for range loop. 4) Added function to clear processed Assumptions. Differential Revision: https://reviews.llvm.org/D63572 llvm-svn: 366260 --- polly/include/polly/ScopBuilder.h | 3 ++ polly/include/polly/ScopInfo.h | 51 +++++++++++++++++------------- polly/lib/Analysis/ScopBuilder.cpp | 36 ++++++++++++++++++++- polly/lib/Analysis/ScopInfo.cpp | 33 ------------------- 4 files changed, 67 insertions(+), 56 deletions(-) diff --git a/polly/include/polly/ScopBuilder.h b/polly/include/polly/ScopBuilder.h index bc0007009c6e0..9cfdf7b3cbf44 100644 --- a/polly/include/polly/ScopBuilder.h +++ b/polly/include/polly/ScopBuilder.h @@ -327,6 +327,9 @@ class ScopBuilder { BasicBlock *IncomingBlock, Value *IncomingValue, bool IsExitBlock); + /// Add all recorded assumptions to the assumed context. + void addRecordedAssumptions(); + /// Create a MemoryAccess for reading the value of a phi. /// /// The modeling assumes that all incoming blocks write their incoming value diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h index aeed4a7e3d4f3..30e8d0346ad58 100644 --- a/polly/include/polly/ScopInfo.h +++ b/polly/include/polly/ScopInfo.h @@ -1624,6 +1624,24 @@ class ScopStmt { /// Print ScopStmt S to raw_ostream OS. raw_ostream &operator<<(raw_ostream &OS, const ScopStmt &S); +/// Helper struct to remember assumptions. +struct Assumption { + /// The kind of the assumption (e.g., WRAPPING). + AssumptionKind Kind; + + /// Flag to distinguish assumptions and restrictions. + AssumptionSign Sign; + + /// The valid/invalid context if this is an assumption/restriction. + isl::set Set; + + /// The location that caused this assumption. + DebugLoc Loc; + + /// An optional block whose domain can simplify the assumption. + BasicBlock *BB; +}; + /// Static Control Part /// /// A Scop is the polyhedral representation of a control flow region detected @@ -1782,24 +1800,7 @@ class Scop { /// need to be "false". Otherwise they behave the same. isl::set InvalidContext; - /// Helper struct to remember assumptions. - struct Assumption { - /// The kind of the assumption (e.g., WRAPPING). - AssumptionKind Kind; - - /// Flag to distinguish assumptions and restrictions. - AssumptionSign Sign; - - /// The valid/invalid context if this is an assumption/restriction. - isl::set Set; - - /// The location that caused this assumption. - DebugLoc Loc; - - /// An optional block whose domain can simplify the assumption. - BasicBlock *BB; - }; - + using RecordedAssumptionsTy = SmallVector; /// Collection to hold taken assumptions. /// /// There are two reasons why we want to record assumptions first before we @@ -1810,7 +1811,7 @@ class Scop { /// construction (basically after we know all parameters), thus the user /// might see overly complicated assumptions to be taken while they will /// only be simplified later on. - SmallVector RecordedAssumptions; + RecordedAssumptionsTy RecordedAssumptions; /// The schedule of the SCoP /// @@ -2338,6 +2339,12 @@ class Scop { InvariantEquivClasses.end()); } + /// Return an iterator range containing hold assumptions. + iterator_range + recorded_assumptions() const { + return make_range(RecordedAssumptions.begin(), RecordedAssumptions.end()); + } + /// Return whether this scop is empty, i.e. contains no statements that /// could be executed. bool isEmpty() const { return Stmts.empty(); } @@ -2494,6 +2501,9 @@ class Scop { /// @returns True if the optimized SCoP can be executed. bool hasFeasibleRuntimeContext() const; + /// Clear assumptions which have been already processed. + void clearRecordedAssumptions() { return RecordedAssumptions.clear(); } + /// Check if the assumption in @p Set is trivial or not. /// /// @param Set The relations between parameters that are assumed to hold. @@ -2559,9 +2569,6 @@ class Scop { void recordAssumption(AssumptionKind Kind, isl::set Set, DebugLoc Loc, AssumptionSign Sign, BasicBlock *BB = nullptr); - /// Add all recorded assumptions to the assumed context. - void addRecordedAssumptions(); - /// Mark the scop as invalid. /// /// This method adds an assumption to the scop that is always invalid. As a diff --git a/polly/lib/Analysis/ScopBuilder.cpp b/polly/lib/Analysis/ScopBuilder.cpp index 8ae29e223a7eb..617bc81908273 100644 --- a/polly/lib/Analysis/ScopBuilder.cpp +++ b/polly/lib/Analysis/ScopBuilder.cpp @@ -385,6 +385,40 @@ Value *ScopBuilder::findFADAllocationInvisible(MemAccInst Inst) { return Descriptor; } +void ScopBuilder::addRecordedAssumptions() { + for (auto &AS : llvm::reverse(scop->recorded_assumptions())) { + + if (!AS.BB) { + scop->addAssumption(AS.Kind, AS.Set, AS.Loc, AS.Sign, + nullptr /* BasicBlock */); + continue; + } + + // If the domain was deleted the assumptions are void. + isl_set *Dom = scop->getDomainConditions(AS.BB).release(); + if (!Dom) + continue; + + // If a basic block was given use its domain to simplify the assumption. + // In case of restrictions we know they only have to hold on the domain, + // thus we can intersect them with the domain of the block. However, for + // assumptions the domain has to imply them, thus: + // _ _____ + // Dom => S <==> A v B <==> A - B + // + // To avoid the complement we will register A - B as a restriction not an + // assumption. + isl_set *S = AS.Set.copy(); + if (AS.Sign == AS_RESTRICTION) + S = isl_set_params(isl_set_intersect(S, Dom)); + else /* (AS.Sign == AS_ASSUMPTION) */ + S = isl_set_params(isl_set_subtract(Dom, S)); + + scop->addAssumption(AS.Kind, isl::manage(S), AS.Loc, AS_RESTRICTION, AS.BB); + } + scop->clearRecordedAssumptions(); +} + bool ScopBuilder::buildAccessMultiDimFixed(MemAccInst Inst, ScopStmt *Stmt) { Value *Val = Inst.getValueOperand(); Type *ElementType = Val->getType(); @@ -1972,7 +2006,7 @@ void ScopBuilder::buildScop(Region &R, AssumptionCache &AC, // After the context was fully constructed, thus all our knowledge about // the parameters is in there, we add all recorded assumptions to the // assumed/invalid context. - scop->addRecordedAssumptions(); + addRecordedAssumptions(); scop->simplifyContexts(); if (!scop->buildAliasChecks(AA)) { diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp index 0e5c3c39435cf..2b0d8052aa03e 100644 --- a/polly/lib/Analysis/ScopInfo.cpp +++ b/polly/lib/Analysis/ScopInfo.cpp @@ -3779,39 +3779,6 @@ void Scop::recordAssumption(AssumptionKind Kind, isl::set Set, DebugLoc Loc, RecordedAssumptions.push_back({Kind, Sign, Set, Loc, BB}); } -void Scop::addRecordedAssumptions() { - while (!RecordedAssumptions.empty()) { - Assumption AS = RecordedAssumptions.pop_back_val(); - - if (!AS.BB) { - addAssumption(AS.Kind, AS.Set, AS.Loc, AS.Sign, nullptr /* BasicBlock */); - continue; - } - - // If the domain was deleted the assumptions are void. - isl_set *Dom = getDomainConditions(AS.BB).release(); - if (!Dom) - continue; - - // If a basic block was given use its domain to simplify the assumption. - // In case of restrictions we know they only have to hold on the domain, - // thus we can intersect them with the domain of the block. However, for - // assumptions the domain has to imply them, thus: - // _ _____ - // Dom => S <==> A v B <==> A - B - // - // To avoid the complement we will register A - B as a restriction not an - // assumption. - isl_set *S = AS.Set.copy(); - if (AS.Sign == AS_RESTRICTION) - S = isl_set_params(isl_set_intersect(S, Dom)); - else /* (AS.Sign == AS_ASSUMPTION) */ - S = isl_set_params(isl_set_subtract(Dom, S)); - - addAssumption(AS.Kind, isl::manage(S), AS.Loc, AS_RESTRICTION, AS.BB); - } -} - void Scop::invalidate(AssumptionKind Kind, DebugLoc Loc, BasicBlock *BB) { LLVM_DEBUG(dbgs() << "Invalidate SCoP because of reason " << Kind << "\n"); addAssumption(Kind, isl::set::empty(getParamSpace()), Loc, AS_ASSUMPTION, BB); From 0e534de4fef8d13116283a841d6d1875222a3ed3 Mon Sep 17 00:00:00 2001 From: Alex Langford Date: Tue, 16 Jul 2019 21:05:08 +0000 Subject: [PATCH 286/451] [Symbol] Remove unused fields from ClangASTContext llvm-svn: 366261 --- lldb/include/lldb/Symbol/ClangASTContext.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/lldb/include/lldb/Symbol/ClangASTContext.h b/lldb/include/lldb/Symbol/ClangASTContext.h index dda9f4347f0cd..d0a834e01f3aa 100644 --- a/lldb/include/lldb/Symbol/ClangASTContext.h +++ b/lldb/include/lldb/Symbol/ClangASTContext.h @@ -982,7 +982,6 @@ class ClangASTContext : public TypeSystem { std::unique_ptr m_ast_up; std::unique_ptr m_language_options_up; std::unique_ptr m_file_manager_up; - std::unique_ptr m_file_system_options_up; std::unique_ptr m_source_manager_up; std::unique_ptr m_diagnostics_engine_up; std::unique_ptr m_diagnostic_consumer_up; @@ -1001,7 +1000,6 @@ class ClangASTContext : public TypeSystem { clang::ExternalASTMerger::OriginMap m_origins; uint32_t m_pointer_byte_size; bool m_ast_owned; - bool m_can_evaluate_expressions; /// The sema associated that is currently used to build this ASTContext. /// May be null if we are already done parsing this ASTContext or the /// ASTContext wasn't created by parsing source code. From 588fc9e756d3c9981cf7b17f18bd199e7bcd4172 Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Tue, 16 Jul 2019 21:10:45 +0000 Subject: [PATCH 287/451] [NFC][ScopBuilder] Move buildAliasChecks and its implementing methods to ScopBuilder Scope of changes: 1) Moved buildAliasChecks to ScopBuilder. 2) Moved buildAliasGroup to ScopBuilder. 3) Moved buildAliasGroups to ScopBuilder. 4) Moved buildAliasGroupsForAccesses to ScopBuilder. 5) Moved splitAliasGroupsByDomain to ScopBuilder. 6) Moved addNonEmptyDomainConstraints to ScopBuilder. 7) Moved buildMinMaxAccess to ScopBuilder. 8) Moved calculateMinMaxAccess to ScopBuilder. 9) Moved getAccessDomain to ScopBuilder. 10) Moved command line options used only by buildAliasChecks functions to ScopBuilder. 11) Refactored buildAliasGroup function. Added addAliasGroup function to Scop class for pushing back calculated min/max accesses. 12) Added function incrementNumberOfAliasingAssumptions which increments number of statistic variable AssumptionsAliasing. AssumptionsAliasing variable is defined by STATISTIC macro inside ScopInfo.cpp and it is also used by function trackAssumption from Scop class. 13) Added reference to OptimizationRemarkEmitter to ScopBuilder class. 14) Moved calculateMinMaxAccess function to ScopBuilder class. Differential Revision: https://reviews.llvm.org/D63693 llvm-svn: 366262 --- polly/include/polly/ScopBuilder.h | 56 ++++- polly/include/polly/ScopInfo.h | 60 +---- polly/lib/Analysis/ScopBuilder.cpp | 335 +++++++++++++++++++++++++- polly/lib/Analysis/ScopInfo.cpp | 371 ++--------------------------- 4 files changed, 420 insertions(+), 402 deletions(-) diff --git a/polly/include/polly/ScopBuilder.h b/polly/include/polly/ScopBuilder.h index 9cfdf7b3cbf44..0cb90e3ca0e6a 100644 --- a/polly/include/polly/ScopBuilder.h +++ b/polly/include/polly/ScopBuilder.h @@ -30,6 +30,7 @@ extern bool ModelReadOnlyScalars; /// Build the Polly IR (Scop and ScopStmt) on a Region. class ScopBuilder { + /// The AliasAnalysis to build AliasSetTracker. AliasAnalysis &AA; @@ -48,6 +49,9 @@ class ScopBuilder { /// The ScalarEvolution to help building Scop. ScalarEvolution &SE; + /// An optimization diagnostic interface to add optimization remarks. + OptimizationRemarkEmitter &ORE; + /// Set of instructions that might read any memory location. SmallVector, 16> GlobalReads; @@ -117,8 +121,7 @@ class ScopBuilder { // @} // Build the SCoP for Region @p R. - void buildScop(Region &R, AssumptionCache &AC, - OptimizationRemarkEmitter &ORE); + void buildScop(Region &R, AssumptionCache &AC); /// Create equivalence classes for required invariant accesses. /// @@ -175,6 +178,52 @@ class ScopBuilder { /// @param Stmt The parent statement of the instruction void buildAccessSingleDim(MemAccInst Inst, ScopStmt *Stmt); + /// Build the alias checks for this SCoP. + bool buildAliasChecks(); + + /// A vector of memory accesses that belong to an alias group. + using AliasGroupTy = SmallVector; + + /// A vector of alias groups. + using AliasGroupVectorTy = SmallVector; + + /// Build a given alias group and its access data. + /// + /// @param AliasGroup The alias group to build. + /// @param HasWriteAccess A set of arrays through which memory is not only + /// read, but also written. + // + /// @returns True if __no__ error occurred, false otherwise. + bool buildAliasGroup(AliasGroupTy &AliasGroup, + DenseSet HasWriteAccess); + + /// Build all alias groups for this SCoP. + /// + /// @returns True if __no__ error occurred, false otherwise. + bool buildAliasGroups(); + + /// Build alias groups for all memory accesses in the Scop. + /// + /// Using the alias analysis and an alias set tracker we build alias sets + /// for all memory accesses inside the Scop. For each alias set we then map + /// the aliasing pointers back to the memory accesses we know, thus obtain + /// groups of memory accesses which might alias. We also collect the set of + /// arrays through which memory is written. + /// + /// @returns A pair consistent of a vector of alias groups and a set of arrays + /// through which memory is written. + std::tuple> + buildAliasGroupsForAccesses(); + + /// Split alias groups by iteration domains. + /// + /// We split each group based on the domains of the minimal/maximal accesses. + /// That means two minimal/maximal accesses are only in a group if their + /// access domains intersect. Otherwise, they are in different groups. + /// + /// @param AliasGroups The alias groups to split + void splitAliasGroupsByDomain(AliasGroupVectorTy &AliasGroups); + /// Build an instance of MemoryAccess from the Load/Store instruction. /// /// @param Inst The Load/Store instruction that access the memory @@ -344,6 +393,9 @@ class ScopBuilder { /// @see MemoryKind void addPHIReadAccess(ScopStmt *PHIStmt, PHINode *PHI); + /// Wrapper function to calculate minimal/maximal accesses to each array. + bool calculateMinMaxAccess(AliasGroupTy AliasGroup, + Scop::MinMaxVectorTy &MinMaxAccesses); /// Build the domain of @p Stmt. void buildDomain(ScopStmt &Stmt); diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h index 30e8d0346ad58..c86513a5f62c9 100644 --- a/polly/include/polly/ScopInfo.h +++ b/polly/include/polly/ScopInfo.h @@ -2258,6 +2258,12 @@ class Scop { Scop &operator=(const Scop &) = delete; ~Scop(); + /// Increment actual number of aliasing assumptions taken + /// + /// @param Step Number of new aliasing assumptions which should be added to + /// the number of already taken assumptions. + static void incrementNumberOfAliasingAssumptions(unsigned Step); + /// Get the count of copy statements added to this Scop. /// /// @return The count of copy statements added to this Scop. @@ -2589,59 +2595,17 @@ class Scop { /// Return true if and only if the InvalidContext is trivial (=empty). bool hasTrivialInvalidContext() const { return InvalidContext.is_empty(); } - /// A vector of memory accesses that belong to an alias group. - using AliasGroupTy = SmallVector; - - /// A vector of alias groups. - using AliasGroupVectorTy = SmallVector; - - /// Build the alias checks for this SCoP. - bool buildAliasChecks(AliasAnalysis &AA); - - /// Build all alias groups for this SCoP. - /// - /// @returns True if __no__ error occurred, false otherwise. - bool buildAliasGroups(AliasAnalysis &AA); - - /// Build alias groups for all memory accesses in the Scop. - /// - /// Using the alias analysis and an alias set tracker we build alias sets - /// for all memory accesses inside the Scop. For each alias set we then map - /// the aliasing pointers back to the memory accesses we know, thus obtain - /// groups of memory accesses which might alias. We also collect the set of - /// arrays through which memory is written. - /// - /// @param AA A reference to the alias analysis. - /// - /// @returns A pair consistent of a vector of alias groups and a set of arrays - /// through which memory is written. - std::tuple> - buildAliasGroupsForAccesses(AliasAnalysis &AA); - - /// Split alias groups by iteration domains. - /// - /// We split each group based on the domains of the minimal/maximal accesses. - /// That means two minimal/maximal accesses are only in a group if their - /// access domains intersect. Otherwise, they are in different groups. - /// - /// @param AliasGroups The alias groups to split - void splitAliasGroupsByDomain(AliasGroupVectorTy &AliasGroups); - - /// Build a given alias group and its access data. - /// - /// @param AliasGroup The alias group to build. - /// @param HasWriteAccess A set of arrays through which memory is not only - /// read, but also written. - /// - /// @returns True if __no__ error occurred, false otherwise. - bool buildAliasGroup(Scop::AliasGroupTy &AliasGroup, - DenseSet HasWriteAccess); - /// Return all alias groups for this SCoP. const MinMaxVectorPairVectorTy &getAliasGroups() const { return MinMaxAliasGroups; } + void addAliasGroup(MinMaxVectorTy &MinMaxAccessesReadWrite, + MinMaxVectorTy &MinMaxAccessesReadOnly) { + MinMaxAliasGroups.emplace_back(); + MinMaxAliasGroups.back().first = MinMaxAccessesReadWrite; + MinMaxAliasGroups.back().second = MinMaxAccessesReadOnly; + } /// Get an isl string representing the context. std::string getContextStr() const; diff --git a/polly/lib/Analysis/ScopBuilder.cpp b/polly/lib/Analysis/ScopBuilder.cpp index 617bc81908273..f079a749dcb11 100644 --- a/polly/lib/Analysis/ScopBuilder.cpp +++ b/polly/lib/Analysis/ScopBuilder.cpp @@ -76,6 +76,13 @@ static cl::opt XModelReadOnlyScalars( cl::location(ModelReadOnlyScalars), cl::Hidden, cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory)); +static cl::opt + OptComputeOut("polly-analysis-computeout", + cl::desc("Bound the scop analysis by a maximal amount of " + "computational steps (0 means no bound)"), + cl::Hidden, cl::init(800000), cl::ZeroOrMore, + cl::cat(PollyCategory)); + static cl::opt PollyAllowDereferenceOfAllFunctionParams( "polly-allow-dereference-of-all-function-parameters", cl::desc( @@ -86,6 +93,22 @@ static cl::opt PollyAllowDereferenceOfAllFunctionParams( " their loads. "), cl::Hidden, cl::init(false), cl::cat(PollyCategory)); +static cl::opt RunTimeChecksMaxArraysPerGroup( + "polly-rtc-max-arrays-per-group", + cl::desc("The maximal number of arrays to compare in each alias group."), + cl::Hidden, cl::ZeroOrMore, cl::init(20), cl::cat(PollyCategory)); + +static cl::opt RunTimeChecksMaxAccessDisjuncts( + "polly-rtc-max-array-disjuncts", + cl::desc("The maximal number of disjunts allowed in memory accesses to " + "to build RTCs."), + cl::Hidden, cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory)); + +static cl::opt RunTimeChecksMaxParameters( + "polly-rtc-max-parameters", + cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden, + cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory)); + static cl::opt UnprofitableScalarAccs( "polly-unprofitable-scalar-accs", cl::desc("Count statements with scalar accesses as not optimizable"), @@ -1801,6 +1824,309 @@ void ScopBuilder::buildAccessRelations(ScopStmt &Stmt) { } } +/// Add the minimal/maximal access in @p Set to @p User. +/// +/// @return True if more accesses should be added, false if we reached the +/// maximal number of run-time checks to be generated. +static bool buildMinMaxAccess(isl::set Set, + Scop::MinMaxVectorTy &MinMaxAccesses, Scop &S) { + isl::pw_multi_aff MinPMA, MaxPMA; + isl::pw_aff LastDimAff; + isl::aff OneAff; + unsigned Pos; + + Set = Set.remove_divs(); + polly::simplify(Set); + + if (Set.n_basic_set() > RunTimeChecksMaxAccessDisjuncts) + Set = Set.simple_hull(); + + // Restrict the number of parameters involved in the access as the lexmin/ + // lexmax computation will take too long if this number is high. + // + // Experiments with a simple test case using an i7 4800MQ: + // + // #Parameters involved | Time (in sec) + // 6 | 0.01 + // 7 | 0.04 + // 8 | 0.12 + // 9 | 0.40 + // 10 | 1.54 + // 11 | 6.78 + // 12 | 30.38 + // + if (isl_set_n_param(Set.get()) > RunTimeChecksMaxParameters) { + unsigned InvolvedParams = 0; + for (unsigned u = 0, e = isl_set_n_param(Set.get()); u < e; u++) + if (Set.involves_dims(isl::dim::param, u, 1)) + InvolvedParams++; + + if (InvolvedParams > RunTimeChecksMaxParameters) + return false; + } + + MinPMA = Set.lexmin_pw_multi_aff(); + MaxPMA = Set.lexmax_pw_multi_aff(); + + MinPMA = MinPMA.coalesce(); + MaxPMA = MaxPMA.coalesce(); + + // Adjust the last dimension of the maximal access by one as we want to + // enclose the accessed memory region by MinPMA and MaxPMA. The pointer + // we test during code generation might now point after the end of the + // allocated array but we will never dereference it anyway. + assert((!MaxPMA || MaxPMA.dim(isl::dim::out)) && + "Assumed at least one output dimension"); + + Pos = MaxPMA.dim(isl::dim::out) - 1; + LastDimAff = MaxPMA.get_pw_aff(Pos); + OneAff = isl::aff(isl::local_space(LastDimAff.get_domain_space())); + OneAff = OneAff.add_constant_si(1); + LastDimAff = LastDimAff.add(OneAff); + MaxPMA = MaxPMA.set_pw_aff(Pos, LastDimAff); + + if (!MinPMA || !MaxPMA) + return false; + + MinMaxAccesses.push_back(std::make_pair(MinPMA, MaxPMA)); + + return true; +} + +/// Wrapper function to calculate minimal/maximal accesses to each array. +bool ScopBuilder::calculateMinMaxAccess(AliasGroupTy AliasGroup, + Scop::MinMaxVectorTy &MinMaxAccesses) { + MinMaxAccesses.reserve(AliasGroup.size()); + + isl::union_set Domains = scop->getDomains(); + isl::union_map Accesses = isl::union_map::empty(scop->getParamSpace()); + + for (MemoryAccess *MA : AliasGroup) + Accesses = Accesses.add_map(MA->getAccessRelation()); + + Accesses = Accesses.intersect_domain(Domains); + isl::union_set Locations = Accesses.range(); + + bool LimitReached = false; + for (isl::set Set : Locations.get_set_list()) { + LimitReached |= !buildMinMaxAccess(Set, MinMaxAccesses, *scop); + if (LimitReached) + break; + } + + return !LimitReached; +} + +static isl::set getAccessDomain(MemoryAccess *MA) { + isl::set Domain = MA->getStatement()->getDomain(); + Domain = Domain.project_out(isl::dim::set, 0, Domain.n_dim()); + return Domain.reset_tuple_id(); +} + +bool ScopBuilder::buildAliasChecks() { + if (!PollyUseRuntimeAliasChecks) + return true; + + if (buildAliasGroups()) { + // Aliasing assumptions do not go through addAssumption but we still want to + // collect statistics so we do it here explicitly. + if (scop->getAliasGroups().size()) + Scop::incrementNumberOfAliasingAssumptions(1); + return true; + } + + // If a problem occurs while building the alias groups we need to delete + // this SCoP and pretend it wasn't valid in the first place. To this end + // we make the assumed context infeasible. + scop->invalidate(ALIASING, DebugLoc()); + + LLVM_DEBUG( + dbgs() << "\n\nNOTE: Run time checks for " << scop->getNameStr() + << " could not be created as the number of parameters involved " + "is too high. The SCoP will be " + "dismissed.\nUse:\n\t--polly-rtc-max-parameters=X\nto adjust " + "the maximal number of parameters but be advised that the " + "compile time might increase exponentially.\n\n"); + return false; +} + +std::tuple> +ScopBuilder::buildAliasGroupsForAccesses() { + AliasSetTracker AST(AA); + + DenseMap PtrToAcc; + DenseSet HasWriteAccess; + for (ScopStmt &Stmt : *scop) { + + isl::set StmtDomain = Stmt.getDomain(); + bool StmtDomainEmpty = StmtDomain.is_empty(); + + // Statements with an empty domain will never be executed. + if (StmtDomainEmpty) + continue; + + for (MemoryAccess *MA : Stmt) { + if (MA->isScalarKind()) + continue; + if (!MA->isRead()) + HasWriteAccess.insert(MA->getScopArrayInfo()); + MemAccInst Acc(MA->getAccessInstruction()); + if (MA->isRead() && isa(Acc)) + PtrToAcc[cast(Acc)->getRawSource()] = MA; + else + PtrToAcc[Acc.getPointerOperand()] = MA; + AST.add(Acc); + } + } + + AliasGroupVectorTy AliasGroups; + for (AliasSet &AS : AST) { + if (AS.isMustAlias() || AS.isForwardingAliasSet()) + continue; + AliasGroupTy AG; + for (auto &PR : AS) + AG.push_back(PtrToAcc[PR.getValue()]); + if (AG.size() < 2) + continue; + AliasGroups.push_back(std::move(AG)); + } + + return std::make_tuple(AliasGroups, HasWriteAccess); +} + +bool ScopBuilder::buildAliasGroups() { + // To create sound alias checks we perform the following steps: + // o) We partition each group into read only and non read only accesses. + // o) For each group with more than one base pointer we then compute minimal + // and maximal accesses to each array of a group in read only and non + // read only partitions separately. + AliasGroupVectorTy AliasGroups; + DenseSet HasWriteAccess; + + std::tie(AliasGroups, HasWriteAccess) = buildAliasGroupsForAccesses(); + + splitAliasGroupsByDomain(AliasGroups); + + for (AliasGroupTy &AG : AliasGroups) { + if (!scop->hasFeasibleRuntimeContext()) + return false; + + { + IslMaxOperationsGuard MaxOpGuard(scop->getIslCtx().get(), OptComputeOut); + bool Valid = buildAliasGroup(AG, HasWriteAccess); + if (!Valid) + return false; + } + if (isl_ctx_last_error(scop->getIslCtx().get()) == isl_error_quota) { + scop->invalidate(COMPLEXITY, DebugLoc()); + return false; + } + } + + return true; +} + +bool ScopBuilder::buildAliasGroup( + AliasGroupTy &AliasGroup, DenseSet HasWriteAccess) { + AliasGroupTy ReadOnlyAccesses; + AliasGroupTy ReadWriteAccesses; + SmallPtrSet ReadWriteArrays; + SmallPtrSet ReadOnlyArrays; + + if (AliasGroup.size() < 2) + return true; + + for (MemoryAccess *Access : AliasGroup) { + ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "PossibleAlias", + Access->getAccessInstruction()) + << "Possibly aliasing pointer, use restrict keyword."); + const ScopArrayInfo *Array = Access->getScopArrayInfo(); + if (HasWriteAccess.count(Array)) { + ReadWriteArrays.insert(Array); + ReadWriteAccesses.push_back(Access); + } else { + ReadOnlyArrays.insert(Array); + ReadOnlyAccesses.push_back(Access); + } + } + + // If there are no read-only pointers, and less than two read-write pointers, + // no alias check is needed. + if (ReadOnlyAccesses.empty() && ReadWriteArrays.size() <= 1) + return true; + + // If there is no read-write pointer, no alias check is needed. + if (ReadWriteArrays.empty()) + return true; + + // For non-affine accesses, no alias check can be generated as we cannot + // compute a sufficiently tight lower and upper bound: bail out. + for (MemoryAccess *MA : AliasGroup) { + if (!MA->isAffine()) { + scop->invalidate(ALIASING, MA->getAccessInstruction()->getDebugLoc(), + MA->getAccessInstruction()->getParent()); + return false; + } + } + + // Ensure that for all memory accesses for which we generate alias checks, + // their base pointers are available. + for (MemoryAccess *MA : AliasGroup) { + if (MemoryAccess *BasePtrMA = scop->lookupBasePtrAccess(MA)) + scop->addRequiredInvariantLoad( + cast(BasePtrMA->getAccessInstruction())); + } + + // scop->getAliasGroups().emplace_back(); + // Scop::MinMaxVectorPairTy &pair = scop->getAliasGroups().back(); + Scop::MinMaxVectorTy MinMaxAccessesReadWrite; + Scop::MinMaxVectorTy MinMaxAccessesReadOnly; + + bool Valid; + + Valid = calculateMinMaxAccess(ReadWriteAccesses, MinMaxAccessesReadWrite); + + if (!Valid) + return false; + + // Bail out if the number of values we need to compare is too large. + // This is important as the number of comparisons grows quadratically with + // the number of values we need to compare. + if (MinMaxAccessesReadWrite.size() + ReadOnlyArrays.size() > + RunTimeChecksMaxArraysPerGroup) + return false; + + Valid = calculateMinMaxAccess(ReadOnlyAccesses, MinMaxAccessesReadOnly); + + scop->addAliasGroup(MinMaxAccessesReadWrite, MinMaxAccessesReadOnly); + if (!Valid) + return false; + + return true; +} + +void ScopBuilder::splitAliasGroupsByDomain(AliasGroupVectorTy &AliasGroups) { + for (unsigned u = 0; u < AliasGroups.size(); u++) { + AliasGroupTy NewAG; + AliasGroupTy &AG = AliasGroups[u]; + AliasGroupTy::iterator AGI = AG.begin(); + isl::set AGDomain = getAccessDomain(*AGI); + while (AGI != AG.end()) { + MemoryAccess *MA = *AGI; + isl::set MADomain = getAccessDomain(MA); + if (AGDomain.is_disjoint(MADomain)) { + NewAG.push_back(MA); + AGI = AG.erase(AGI); + } else { + AGDomain = AGDomain.unite(MADomain); + AGI++; + } + } + if (NewAG.size() > 1) + AliasGroups.push_back(std::move(NewAG)); + } +} + #ifndef NDEBUG static void verifyUse(Scop *S, Use &Op, LoopInfo &LI) { auto PhysUse = VirtualUse::create(S, Op, &LI, false); @@ -1879,8 +2205,7 @@ static inline BasicBlock *getRegionNodeBasicBlock(RegionNode *RN) { : RN->getNodeAs(); } -void ScopBuilder::buildScop(Region &R, AssumptionCache &AC, - OptimizationRemarkEmitter &ORE) { +void ScopBuilder::buildScop(Region &R, AssumptionCache &AC) { scop.reset(new Scop(R, SE, LI, DT, *SD.getDetectionContext(&R), ORE)); buildStmts(R); @@ -2009,7 +2334,7 @@ void ScopBuilder::buildScop(Region &R, AssumptionCache &AC, addRecordedAssumptions(); scop->simplifyContexts(); - if (!scop->buildAliasChecks(AA)) { + if (!buildAliasChecks()) { LLVM_DEBUG(dbgs() << "Bailing-out because could not build alias checks\n"); return; } @@ -2035,7 +2360,7 @@ ScopBuilder::ScopBuilder(Region *R, AssumptionCache &AC, AliasAnalysis &AA, const DataLayout &DL, DominatorTree &DT, LoopInfo &LI, ScopDetection &SD, ScalarEvolution &SE, OptimizationRemarkEmitter &ORE) - : AA(AA), DL(DL), DT(DT), LI(LI), SD(SD), SE(SE) { + : AA(AA), DL(DL), DT(DT), LI(LI), SD(SD), SE(SE), ORE(ORE) { DebugLoc Beg, End; auto P = getBBPairForRegion(R); getDebugLocations(P, Beg, End); @@ -2044,7 +2369,7 @@ ScopBuilder::ScopBuilder(Region *R, AssumptionCache &AC, AliasAnalysis &AA, ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEntry", Beg, P.first) << Msg); - buildScop(*R, AC, ORE); + buildScop(*R, AC); LLVM_DEBUG(dbgs() << *scop); diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp index 2b0d8052aa03e..9244796a23c6c 100644 --- a/polly/lib/Analysis/ScopInfo.cpp +++ b/polly/lib/Analysis/ScopInfo.cpp @@ -117,34 +117,11 @@ int const polly::MaxDisjunctsInDomain = 20; // number of disjunct when adding non-convex sets to the context. static int const MaxDisjunctsInContext = 4; -static cl::opt - OptComputeOut("polly-analysis-computeout", - cl::desc("Bound the scop analysis by a maximal amount of " - "computational steps (0 means no bound)"), - cl::Hidden, cl::init(800000), cl::ZeroOrMore, - cl::cat(PollyCategory)); - static cl::opt PollyRemarksMinimal( "polly-remarks-minimal", cl::desc("Do not emit remarks about assumptions that are known"), cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::cat(PollyCategory)); -static cl::opt RunTimeChecksMaxAccessDisjuncts( - "polly-rtc-max-array-disjuncts", - cl::desc("The maximal number of disjunts allowed in memory accesses to " - "to build RTCs."), - cl::Hidden, cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory)); - -static cl::opt RunTimeChecksMaxParameters( - "polly-rtc-max-parameters", - cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden, - cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory)); - -static cl::opt RunTimeChecksMaxArraysPerGroup( - "polly-rtc-max-arrays-per-group", - cl::desc("The maximal number of arrays to compare in each alias group."), - cl::Hidden, cl::ZeroOrMore, cl::init(20), cl::cat(PollyCategory)); - static cl::opt UserContextStr( "polly-context", cl::value_desc("isl parameter set"), cl::desc("Provide additional constraints on the context parameters"), @@ -1963,11 +1940,6 @@ isl::id Scop::getIdForParam(const SCEV *Parameter) const { return ParameterIds.lookup(Parameter); } -isl::set Scop::addNonEmptyDomainConstraints(isl::set C) const { - isl::set DomainContext = getDomains().params(); - return C.intersect_params(DomainContext); -} - bool Scop::isDominatedBy(const DominatorTree &DT, BasicBlock *BB) const { return DT.dominates(BB, getEntry()); } @@ -2205,105 +2177,6 @@ void Scop::simplifyContexts() { InvalidContext = InvalidContext.align_params(getParamSpace()); } -/// Add the minimal/maximal access in @p Set to @p User. -/// -/// @return True if more accesses should be added, false if we reached the -/// maximal number of run-time checks to be generated. -static bool buildMinMaxAccess(isl::set Set, - Scop::MinMaxVectorTy &MinMaxAccesses, Scop &S) { - isl::pw_multi_aff MinPMA, MaxPMA; - isl::pw_aff LastDimAff; - isl::aff OneAff; - unsigned Pos; - - Set = Set.remove_divs(); - polly::simplify(Set); - - if (Set.n_basic_set() > RunTimeChecksMaxAccessDisjuncts) - Set = Set.simple_hull(); - - // Restrict the number of parameters involved in the access as the lexmin/ - // lexmax computation will take too long if this number is high. - // - // Experiments with a simple test case using an i7 4800MQ: - // - // #Parameters involved | Time (in sec) - // 6 | 0.01 - // 7 | 0.04 - // 8 | 0.12 - // 9 | 0.40 - // 10 | 1.54 - // 11 | 6.78 - // 12 | 30.38 - // - if (isl_set_n_param(Set.get()) > RunTimeChecksMaxParameters) { - unsigned InvolvedParams = 0; - for (unsigned u = 0, e = isl_set_n_param(Set.get()); u < e; u++) - if (Set.involves_dims(isl::dim::param, u, 1)) - InvolvedParams++; - - if (InvolvedParams > RunTimeChecksMaxParameters) - return false; - } - - MinPMA = Set.lexmin_pw_multi_aff(); - MaxPMA = Set.lexmax_pw_multi_aff(); - - MinPMA = MinPMA.coalesce(); - MaxPMA = MaxPMA.coalesce(); - - // Adjust the last dimension of the maximal access by one as we want to - // enclose the accessed memory region by MinPMA and MaxPMA. The pointer - // we test during code generation might now point after the end of the - // allocated array but we will never dereference it anyway. - assert((!MaxPMA || MaxPMA.dim(isl::dim::out)) && - "Assumed at least one output dimension"); - - Pos = MaxPMA.dim(isl::dim::out) - 1; - LastDimAff = MaxPMA.get_pw_aff(Pos); - OneAff = isl::aff(isl::local_space(LastDimAff.get_domain_space())); - OneAff = OneAff.add_constant_si(1); - LastDimAff = LastDimAff.add(OneAff); - MaxPMA = MaxPMA.set_pw_aff(Pos, LastDimAff); - - if (!MinPMA || !MaxPMA) - return false; - - MinMaxAccesses.push_back(std::make_pair(MinPMA, MaxPMA)); - - return true; -} - -static isl::set getAccessDomain(MemoryAccess *MA) { - isl::set Domain = MA->getStatement()->getDomain(); - Domain = Domain.project_out(isl::dim::set, 0, Domain.n_dim()); - return Domain.reset_tuple_id(); -} - -/// Wrapper function to calculate minimal/maximal accesses to each array. -static bool calculateMinMaxAccess(Scop::AliasGroupTy AliasGroup, Scop &S, - Scop::MinMaxVectorTy &MinMaxAccesses) { - MinMaxAccesses.reserve(AliasGroup.size()); - - isl::union_set Domains = S.getDomains(); - isl::union_map Accesses = isl::union_map::empty(S.getParamSpace()); - - for (MemoryAccess *MA : AliasGroup) - Accesses = Accesses.add_map(MA->getAccessRelation()); - - Accesses = Accesses.intersect_domain(Domains); - isl::union_set Locations = Accesses.range(); - - bool LimitReached = false; - for (isl::set Set : Locations.get_set_list()) { - LimitReached |= !buildMinMaxAccess(Set, MinMaxAccesses, S); - if (LimitReached) - break; - } - - return !LimitReached; -} - /// Helper to treat non-affine regions and basic blocks the same. /// ///{ @@ -2960,225 +2833,6 @@ bool Scop::addLoopBoundsToHeaderDomain( return true; } -MemoryAccess *Scop::lookupBasePtrAccess(MemoryAccess *MA) { - Value *PointerBase = MA->getOriginalBaseAddr(); - - auto *PointerBaseInst = dyn_cast(PointerBase); - if (!PointerBaseInst) - return nullptr; - - auto *BasePtrStmt = getStmtFor(PointerBaseInst); - if (!BasePtrStmt) - return nullptr; - - return BasePtrStmt->getArrayAccessOrNULLFor(PointerBaseInst); -} - -bool Scop::buildAliasChecks(AliasAnalysis &AA) { - if (!PollyUseRuntimeAliasChecks) - return true; - - if (buildAliasGroups(AA)) { - // Aliasing assumptions do not go through addAssumption but we still want to - // collect statistics so we do it here explicitly. - if (MinMaxAliasGroups.size()) - AssumptionsAliasing++; - return true; - } - - // If a problem occurs while building the alias groups we need to delete - // this SCoP and pretend it wasn't valid in the first place. To this end - // we make the assumed context infeasible. - invalidate(ALIASING, DebugLoc()); - - LLVM_DEBUG( - dbgs() << "\n\nNOTE: Run time checks for " << getNameStr() - << " could not be created as the number of parameters involved " - "is too high. The SCoP will be " - "dismissed.\nUse:\n\t--polly-rtc-max-parameters=X\nto adjust " - "the maximal number of parameters but be advised that the " - "compile time might increase exponentially.\n\n"); - return false; -} - -std::tuple> -Scop::buildAliasGroupsForAccesses(AliasAnalysis &AA) { - AliasSetTracker AST(AA); - - DenseMap PtrToAcc; - DenseSet HasWriteAccess; - for (ScopStmt &Stmt : *this) { - - isl::set StmtDomain = Stmt.getDomain(); - bool StmtDomainEmpty = StmtDomain.is_empty(); - - // Statements with an empty domain will never be executed. - if (StmtDomainEmpty) - continue; - - for (MemoryAccess *MA : Stmt) { - if (MA->isScalarKind()) - continue; - if (!MA->isRead()) - HasWriteAccess.insert(MA->getScopArrayInfo()); - MemAccInst Acc(MA->getAccessInstruction()); - if (MA->isRead() && isa(Acc)) - PtrToAcc[cast(Acc)->getRawSource()] = MA; - else - PtrToAcc[Acc.getPointerOperand()] = MA; - AST.add(Acc); - } - } - - AliasGroupVectorTy AliasGroups; - for (AliasSet &AS : AST) { - if (AS.isMustAlias() || AS.isForwardingAliasSet()) - continue; - AliasGroupTy AG; - for (auto &PR : AS) - AG.push_back(PtrToAcc[PR.getValue()]); - if (AG.size() < 2) - continue; - AliasGroups.push_back(std::move(AG)); - } - - return std::make_tuple(AliasGroups, HasWriteAccess); -} - -void Scop::splitAliasGroupsByDomain(AliasGroupVectorTy &AliasGroups) { - for (unsigned u = 0; u < AliasGroups.size(); u++) { - AliasGroupTy NewAG; - AliasGroupTy &AG = AliasGroups[u]; - AliasGroupTy::iterator AGI = AG.begin(); - isl::set AGDomain = getAccessDomain(*AGI); - while (AGI != AG.end()) { - MemoryAccess *MA = *AGI; - isl::set MADomain = getAccessDomain(MA); - if (AGDomain.is_disjoint(MADomain)) { - NewAG.push_back(MA); - AGI = AG.erase(AGI); - } else { - AGDomain = AGDomain.unite(MADomain); - AGI++; - } - } - if (NewAG.size() > 1) - AliasGroups.push_back(std::move(NewAG)); - } -} - -bool Scop::buildAliasGroups(AliasAnalysis &AA) { - // To create sound alias checks we perform the following steps: - // o) We partition each group into read only and non read only accesses. - // o) For each group with more than one base pointer we then compute minimal - // and maximal accesses to each array of a group in read only and non - // read only partitions separately. - AliasGroupVectorTy AliasGroups; - DenseSet HasWriteAccess; - - std::tie(AliasGroups, HasWriteAccess) = buildAliasGroupsForAccesses(AA); - - splitAliasGroupsByDomain(AliasGroups); - - for (AliasGroupTy &AG : AliasGroups) { - if (!hasFeasibleRuntimeContext()) - return false; - - { - IslMaxOperationsGuard MaxOpGuard(getIslCtx().get(), OptComputeOut); - bool Valid = buildAliasGroup(AG, HasWriteAccess); - if (!Valid) - return false; - } - if (isl_ctx_last_error(getIslCtx().get()) == isl_error_quota) { - invalidate(COMPLEXITY, DebugLoc()); - return false; - } - } - - return true; -} - -bool Scop::buildAliasGroup(Scop::AliasGroupTy &AliasGroup, - DenseSet HasWriteAccess) { - AliasGroupTy ReadOnlyAccesses; - AliasGroupTy ReadWriteAccesses; - SmallPtrSet ReadWriteArrays; - SmallPtrSet ReadOnlyArrays; - - if (AliasGroup.size() < 2) - return true; - - for (MemoryAccess *Access : AliasGroup) { - ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "PossibleAlias", - Access->getAccessInstruction()) - << "Possibly aliasing pointer, use restrict keyword."); - const ScopArrayInfo *Array = Access->getScopArrayInfo(); - if (HasWriteAccess.count(Array)) { - ReadWriteArrays.insert(Array); - ReadWriteAccesses.push_back(Access); - } else { - ReadOnlyArrays.insert(Array); - ReadOnlyAccesses.push_back(Access); - } - } - - // If there are no read-only pointers, and less than two read-write pointers, - // no alias check is needed. - if (ReadOnlyAccesses.empty() && ReadWriteArrays.size() <= 1) - return true; - - // If there is no read-write pointer, no alias check is needed. - if (ReadWriteArrays.empty()) - return true; - - // For non-affine accesses, no alias check can be generated as we cannot - // compute a sufficiently tight lower and upper bound: bail out. - for (MemoryAccess *MA : AliasGroup) { - if (!MA->isAffine()) { - invalidate(ALIASING, MA->getAccessInstruction()->getDebugLoc(), - MA->getAccessInstruction()->getParent()); - return false; - } - } - - // Ensure that for all memory accesses for which we generate alias checks, - // their base pointers are available. - for (MemoryAccess *MA : AliasGroup) { - if (MemoryAccess *BasePtrMA = lookupBasePtrAccess(MA)) - addRequiredInvariantLoad( - cast(BasePtrMA->getAccessInstruction())); - } - - MinMaxAliasGroups.emplace_back(); - MinMaxVectorPairTy &pair = MinMaxAliasGroups.back(); - MinMaxVectorTy &MinMaxAccessesReadWrite = pair.first; - MinMaxVectorTy &MinMaxAccessesReadOnly = pair.second; - - bool Valid; - - Valid = - calculateMinMaxAccess(ReadWriteAccesses, *this, MinMaxAccessesReadWrite); - - if (!Valid) - return false; - - // Bail out if the number of values we need to compare is too large. - // This is important as the number of comparisons grows quadratically with - // the number of values we need to compare. - if (MinMaxAccessesReadWrite.size() + ReadOnlyArrays.size() > - RunTimeChecksMaxArraysPerGroup) - return false; - - Valid = - calculateMinMaxAccess(ReadOnlyAccesses, *this, MinMaxAccessesReadOnly); - - if (!Valid) - return false; - - return true; -} - /// Get the smallest loop that contains @p S but is not in @p S. static Loop *getLoopSurroundingScop(Scop &S, LoopInfo &LI) { // Start with the smallest loop containing the entry and expand that @@ -3647,11 +3301,30 @@ bool Scop::hasFeasibleRuntimeContext() const { auto DomainContext = getDomains().params(); IsFeasible = !DomainContext.is_subset(NegativeContext); - IsFeasible &= !Context.is_subset(NegativeContext); + IsFeasible &= !getContext().is_subset(NegativeContext); return IsFeasible; } +isl::set Scop::addNonEmptyDomainConstraints(isl::set C) const { + isl::set DomainContext = getDomains().params(); + return C.intersect_params(DomainContext); +} + +MemoryAccess *Scop::lookupBasePtrAccess(MemoryAccess *MA) { + Value *PointerBase = MA->getOriginalBaseAddr(); + + auto *PointerBaseInst = dyn_cast(PointerBase); + if (!PointerBaseInst) + return nullptr; + + auto *BasePtrStmt = getStmtFor(PointerBaseInst); + if (!BasePtrStmt) + return nullptr; + + return BasePtrStmt->getArrayAccessOrNULLFor(PointerBaseInst); +} + static std::string toString(AssumptionKind Kind) { switch (Kind) { case ALIASING: @@ -4380,6 +4053,10 @@ bool Scop::isEscaping(Instruction *Inst) { return false; } +void Scop::incrementNumberOfAliasingAssumptions(unsigned step) { + AssumptionsAliasing += step; +} + Scop::ScopStatistics Scop::getStatistics() const { ScopStatistics Result; #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS) From e559f62506423d5df23355862e11d233ff3c5242 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 16 Jul 2019 21:13:40 +0000 Subject: [PATCH 288/451] [libcxx] Rejigger test for destroying delete feature-test macros In r361572, we introduced library support for C++20 destroying delete and decided to only define the library feature-test macro when the compiler supports the underlying language feature. This patch reworks the tests to mirror that. llvm-svn: 366263 --- .../destroying_delete_t.pass.cpp | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/libcxx/test/std/language.support/support.dynamic/destroying_delete_t.pass.cpp b/libcxx/test/std/language.support/support.dynamic/destroying_delete_t.pass.cpp index 834222c06bfc0..fa4e77c71f759 100644 --- a/libcxx/test/std/language.support/support.dynamic/destroying_delete_t.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/destroying_delete_t.pass.cpp @@ -52,10 +52,19 @@ void A::operator delete(A* a, std::destroying_delete_t) { ::operator delete(a); } -#ifndef __cpp_lib_destroying_delete -#error "Expected __cpp_lib_destroying_delete to be defined" -#elif __cpp_lib_destroying_delete < 201806L -#error "Unexpected value of __cpp_lib_destroying_delete" +// Only test the definition of the library feature-test macro when the compiler +// supports the feature -- otherwise we don't define the library feature-test +// macro. +#if defined(__cpp_impl_destroying_delete) +# if !defined(__cpp_lib_destroying_delete) +# error "Expected __cpp_lib_destroying_delete to be defined" +# elif __cpp_lib_destroying_delete < 201806L +# error "Unexpected value of __cpp_lib_destroying_delete" +# endif +#else +# if defined(__cpp_lib_destroying_delete) +# error "The library feature-test macro for destroying delete shouldn't be defined when the compiler doesn't support the language feature" +# endif #endif int main() { From 40580d36c4de7dfbff897ab72cc4e535c33d09c5 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Tue, 16 Jul 2019 21:15:19 +0000 Subject: [PATCH 289/451] DWARF: Skip zero column for inline call sites D64033 added DW_AT_call_column for inline sites. However, that change wasn't aware of "-gno-column-info". To avoid adding column info when "-gno-column-info" is used, now DW_AT_call_column is only added when we have non-zero column (when "-gno-column-info" is used, column will be zero). Patch by Wenlei He! Differential Revision: https://reviews.llvm.org/D64784 llvm-svn: 366264 --- llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 3 ++- llvm/test/DebugInfo/X86/fission-inline.ll | 6 +++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 8862fa17e5b60..9548ad9918c1f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -543,7 +543,8 @@ DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) { addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None, getOrCreateSourceID(IA->getFile())); addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, IA->getLine()); - addUInt(*ScopeDIE, dwarf::DW_AT_call_column, None, IA->getColumn()); + if (IA->getColumn()) + addUInt(*ScopeDIE, dwarf::DW_AT_call_column, None, IA->getColumn()); if (IA->getDiscriminator() && DD->getDwarfVersion() >= 4) addUInt(*ScopeDIE, dwarf::DW_AT_GNU_discriminator, None, IA->getDiscriminator()); diff --git a/llvm/test/DebugInfo/X86/fission-inline.ll b/llvm/test/DebugInfo/X86/fission-inline.ll index 0702465e60e3f..0fb4b83bdf933 100644 --- a/llvm/test/DebugInfo/X86/fission-inline.ll +++ b/llvm/test/DebugInfo/X86/fission-inline.ll @@ -71,6 +71,8 @@ ; CHECK: DW_AT_call_file ; CHECK-NEXT: DW_AT_call_line {{.*}} (18) ; CHECK-NEXT: DW_AT_call_column {{.*}} (0x05) +; CHECK: DW_AT_call_file +; CHECK-NEXT: DW_AT_call_line {{.*}} (21) ; CHECK-NOT: DW_ ; CHECK: .debug_info.dwo contents: @@ -82,6 +84,7 @@ entry: call void @_Z2f1v(), !dbg !26 call void @_Z2f1v(), !dbg !25 call void @_Z2f1v(), !dbg !28 + call void @_Z2f1v(), !dbg !29 ret void, !dbg !29 } @@ -122,4 +125,5 @@ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n !26 = !DILocation(line: 11, column: 3, scope: !11, inlinedAt: !27) !27 = !DILocation(line: 18, column: 5, scope: !20) !28 = !DILocation(line: 12, column: 3, scope: !11, inlinedAt: !27) -!29 = !DILocation(line: 21, column: 1, scope: !10) +!29 = !DILocation(line: 12, column: 3, scope: !11, inlinedAt: !30) +!30 = !DILocation(line: 21, column: 0, scope: !10) From ffca322266fcd431f7f17ae42ef3f665a3157d39 Mon Sep 17 00:00:00 2001 From: Matthias Gehre Date: Tue, 16 Jul 2019 21:19:00 +0000 Subject: [PATCH 290/451] [clang-tidy] initial version of readability-convert-member-functions-to-static Summary: Finds non-static member functions that can be made ``static``. I have run this check (repeatedly) over llvm-project. It made 1708 member functions ``static``. Out of those, I had to exclude 22 via ``NOLINT`` because their address was taken and stored in a variable of pointer-to-member type (e.g. passed to llvm::StringSwitch). It also made 243 member functions ``const``. (This is currently very conservative to have no false-positives and can hopefully be extended in the future.) You can find the results here: https://github.com/mgehre/llvm-project/commits/static_const_eval Reviewers: alexfh, aaron.ballman Subscribers: mgorny, xazax.hun, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D61749 llvm-svn: 366265 --- .../clang-tidy/readability/CMakeLists.txt | 1 + .../ConvertMemberFunctionsToStatic.cpp | 172 ++++++++++++++ .../ConvertMemberFunctionsToStatic.h | 37 +++ .../readability/ReadabilityTidyModule.cpp | 3 + clang-tools-extra/docs/ReleaseNotes.rst | 5 + .../docs/clang-tidy/checks/list.rst | 1 + ...ity-convert-member-functions-to-static.rst | 14 ++ ...ity-convert-member-functions-to-static.cpp | 218 ++++++++++++++++++ 8 files changed, 451 insertions(+) create mode 100644 clang-tools-extra/clang-tidy/readability/ConvertMemberFunctionsToStatic.cpp create mode 100644 clang-tools-extra/clang-tidy/readability/ConvertMemberFunctionsToStatic.h create mode 100644 clang-tools-extra/docs/clang-tidy/checks/readability-convert-member-functions-to-static.rst create mode 100644 clang-tools-extra/test/clang-tidy/readability-convert-member-functions-to-static.cpp diff --git a/clang-tools-extra/clang-tidy/readability/CMakeLists.txt b/clang-tools-extra/clang-tidy/readability/CMakeLists.txt index b48e307e6153f..2d226b10334af 100644 --- a/clang-tools-extra/clang-tidy/readability/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/readability/CMakeLists.txt @@ -5,6 +5,7 @@ add_clang_library(clangTidyReadabilityModule BracesAroundStatementsCheck.cpp ConstReturnTypeCheck.cpp ContainerSizeEmptyCheck.cpp + ConvertMemberFunctionsToStatic.cpp DeleteNullPointerCheck.cpp DeletedDefaultCheck.cpp ElseAfterReturnCheck.cpp diff --git a/clang-tools-extra/clang-tidy/readability/ConvertMemberFunctionsToStatic.cpp b/clang-tools-extra/clang-tidy/readability/ConvertMemberFunctionsToStatic.cpp new file mode 100644 index 0000000000000..f0e5b29dcab60 --- /dev/null +++ b/clang-tools-extra/clang-tidy/readability/ConvertMemberFunctionsToStatic.cpp @@ -0,0 +1,172 @@ +//===--- ConvertMemberFunctionsToStatic.cpp - clang-tidy ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ConvertMemberFunctionsToStatic.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/Basic/SourceLocation.h" + +using namespace clang::ast_matchers; + +namespace clang { +namespace tidy { +namespace readability { + +AST_MATCHER(CXXMethodDecl, isStatic) { return Node.isStatic(); } + +AST_MATCHER(CXXMethodDecl, hasTrivialBody) { return Node.hasTrivialBody(); } + +AST_MATCHER(CXXMethodDecl, isOverloadedOperator) { + return Node.isOverloadedOperator(); +} + +AST_MATCHER(CXXRecordDecl, hasAnyDependentBases) { + return Node.hasAnyDependentBases(); +} + +AST_MATCHER(CXXMethodDecl, isTemplate) { + return Node.getTemplatedKind() != FunctionDecl::TK_NonTemplate; +} + +AST_MATCHER(CXXMethodDecl, isDependentContext) { + return Node.isDependentContext(); +} + +AST_MATCHER(CXXMethodDecl, isInsideMacroDefinition) { + const ASTContext &Ctxt = Finder->getASTContext(); + return clang::Lexer::makeFileCharRange( + clang::CharSourceRange::getCharRange( + Node.getTypeSourceInfo()->getTypeLoc().getSourceRange()), + Ctxt.getSourceManager(), Ctxt.getLangOpts()) + .isInvalid(); +} + +AST_MATCHER_P(CXXMethodDecl, hasCanonicalDecl, + ast_matchers::internal::Matcher, InnerMatcher) { + return InnerMatcher.matches(*Node.getCanonicalDecl(), Finder, Builder); +} + +AST_MATCHER(CXXMethodDecl, usesThis) { + class FindUsageOfThis : public RecursiveASTVisitor { + public: + bool Used = false; + + bool VisitCXXThisExpr(const CXXThisExpr *E) { + Used = true; + return false; // Stop traversal. + } + } UsageOfThis; + + // TraverseStmt does not modify its argument. + UsageOfThis.TraverseStmt(const_cast(Node.getBody())); + + return UsageOfThis.Used; +} + +void ConvertMemberFunctionsToStatic::registerMatchers(MatchFinder *Finder) { + Finder->addMatcher( + cxxMethodDecl( + isDefinition(), isUserProvided(), + unless(anyOf( + isExpansionInSystemHeader(), isVirtual(), isStatic(), + hasTrivialBody(), isOverloadedOperator(), cxxConstructorDecl(), + cxxDestructorDecl(), cxxConversionDecl(), isTemplate(), + isDependentContext(), + ofClass(anyOf( + isLambda(), + hasAnyDependentBases()) // Method might become virtual + // depending on template base class. + ), + isInsideMacroDefinition(), + hasCanonicalDecl(isInsideMacroDefinition()), usesThis()))) + .bind("x"), + this); +} + +/// \brief Obtain the original source code text from a SourceRange. +static StringRef getStringFromRange(SourceManager &SourceMgr, + const LangOptions &LangOpts, + SourceRange Range) { + if (SourceMgr.getFileID(Range.getBegin()) != + SourceMgr.getFileID(Range.getEnd())) + return {}; + + return Lexer::getSourceText(CharSourceRange(Range, true), SourceMgr, + LangOpts); +} + +static SourceRange getLocationOfConst(const TypeSourceInfo *TSI, + SourceManager &SourceMgr, + const LangOptions &LangOpts) { + assert(TSI); + const auto FTL = TSI->getTypeLoc().IgnoreParens().getAs(); + assert(FTL); + + SourceRange Range{FTL.getRParenLoc().getLocWithOffset(1), + FTL.getLocalRangeEnd()}; + // Inside Range, there might be other keywords and trailing return types. + // Find the exact position of "const". + StringRef Text = getStringFromRange(SourceMgr, LangOpts, Range); + size_t Offset = Text.find("const"); + if (Offset == StringRef::npos) + return {}; + + SourceLocation Start = Range.getBegin().getLocWithOffset(Offset); + return {Start, Start.getLocWithOffset(strlen("const") - 1)}; +} + +void ConvertMemberFunctionsToStatic::check( + const MatchFinder::MatchResult &Result) { + const auto *Definition = Result.Nodes.getNodeAs("x"); + + // TODO: For out-of-line declarations, don't modify the source if the header + // is excluded by the -header-filter option. + DiagnosticBuilder Diag = + diag(Definition->getLocation(), "method %0 can be made static") + << Definition; + + // TODO: Would need to remove those in a fix-it. + if (Definition->getMethodQualifiers().hasVolatile() || + Definition->getMethodQualifiers().hasRestrict() || + Definition->getRefQualifier() != RQ_None) + return; + + const CXXMethodDecl *Declaration = Definition->getCanonicalDecl(); + + if (Definition->isConst()) { + // Make sure that we either remove 'const' on both declaration and + // definition or emit no fix-it at all. + SourceRange DefConst = getLocationOfConst(Definition->getTypeSourceInfo(), + *Result.SourceManager, + Result.Context->getLangOpts()); + + if (DefConst.isInvalid()) + return; + + if (Declaration != Definition) { + SourceRange DeclConst = getLocationOfConst( + Declaration->getTypeSourceInfo(), *Result.SourceManager, + Result.Context->getLangOpts()); + + if (DeclConst.isInvalid()) + return; + Diag << FixItHint::CreateRemoval(DeclConst); + } + + // Remove existing 'const' from both declaration and definition. + Diag << FixItHint::CreateRemoval(DefConst); + } + Diag << FixItHint::CreateInsertion(Declaration->getBeginLoc(), "static "); +} + +} // namespace readability +} // namespace tidy +} // namespace clang diff --git a/clang-tools-extra/clang-tidy/readability/ConvertMemberFunctionsToStatic.h b/clang-tools-extra/clang-tidy/readability/ConvertMemberFunctionsToStatic.h new file mode 100644 index 0000000000000..d9947650ab539 --- /dev/null +++ b/clang-tools-extra/clang-tidy/readability/ConvertMemberFunctionsToStatic.h @@ -0,0 +1,37 @@ +//===--- ConvertMemberFunctionsToStatic.h - clang-tidy ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_CONVERTMEMFUNCTOSTATIC_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_CONVERTMEMFUNCTOSTATIC_H + +#include "../ClangTidy.h" + +namespace clang { +namespace tidy { +namespace readability { + +/// This check finds C++ class methods than can be made static +/// because they don't use the 'this' pointer. +/// +/// For the user-facing documentation see: +/// http://clang.llvm.org/extra/clang-tidy/checks/ +/// readability-convert-member-functions-to-static.html +class ConvertMemberFunctionsToStatic : public ClangTidyCheck { +public: + ConvertMemberFunctionsToStatic(StringRef Name, ClangTidyContext *Context) + : ClangTidyCheck(Name, Context) {} + void registerMatchers(ast_matchers::MatchFinder *Finder) override; + void check(const ast_matchers::MatchFinder::MatchResult &Result) override; +}; + +} // namespace readability +} // namespace tidy +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_CONVERTMEMFUNCTOSTATIC_H diff --git a/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp b/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp index 5b2aed421bf11..5005ba3df61fd 100644 --- a/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp @@ -13,6 +13,7 @@ #include "BracesAroundStatementsCheck.h" #include "ConstReturnTypeCheck.h" #include "ContainerSizeEmptyCheck.h" +#include "ConvertMemberFunctionsToStatic.h" #include "DeleteNullPointerCheck.h" #include "DeletedDefaultCheck.h" #include "ElseAfterReturnCheck.h" @@ -57,6 +58,8 @@ class ReadabilityModule : public ClangTidyModule { "readability-const-return-type"); CheckFactories.registerCheck( "readability-container-size-empty"); + CheckFactories.registerCheck( + "readability-convert-member-functions-to-static"); CheckFactories.registerCheck( "readability-delete-null-pointer"); CheckFactories.registerCheck( diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index edc158499dcbd..be79ce7dc479a 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -230,6 +230,11 @@ Improvements to clang-tidy If set to true, the check will provide fix-its with literal initializers (``int i = 0;``) instead of curly braces (``int i{};``). +- New :doc:`readability-convert-member-functions-to-static + ` check. + + Finds non-static member functions that can be made ``static``. + Improvements to include-fixer ----------------------------- diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst index f0349b9a90e9e..27521c86e9676 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/list.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst @@ -257,6 +257,7 @@ Clang-Tidy Checks readability-braces-around-statements readability-const-return-type readability-container-size-empty + readability-convert-member-functions-to-static readability-delete-null-pointer readability-deleted-default readability-else-after-return diff --git a/clang-tools-extra/docs/clang-tidy/checks/readability-convert-member-functions-to-static.rst b/clang-tools-extra/docs/clang-tidy/checks/readability-convert-member-functions-to-static.rst new file mode 100644 index 0000000000000..891f6be637142 --- /dev/null +++ b/clang-tools-extra/docs/clang-tidy/checks/readability-convert-member-functions-to-static.rst @@ -0,0 +1,14 @@ +.. title:: clang-tidy - readability-convert-member-functions-to-static + +readability-convert-member-functions-to-static +============================================== + +Finds non-static member functions that can be made ``static`` +because the functions don't use ``this``. + +After applying modifications as suggested by the check, runnnig the check again +might find more opportunities to mark member functions ``static``. + +After making a member function ``static``, you might want to run the check +`readability-static-accessed-through-instance` to replace calls like +``Instance.method()`` by ``Class::method()``. diff --git a/clang-tools-extra/test/clang-tidy/readability-convert-member-functions-to-static.cpp b/clang-tools-extra/test/clang-tidy/readability-convert-member-functions-to-static.cpp new file mode 100644 index 0000000000000..9612fa9de8c20 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/readability-convert-member-functions-to-static.cpp @@ -0,0 +1,218 @@ +// RUN: %check_clang_tidy %s readability-convert-member-functions-to-static %t + +class DoNotMakeEmptyStatic { + void emptyMethod() {} + void empty_method_out_of_line(); +}; + +void DoNotMakeEmptyStatic::empty_method_out_of_line() {} + +class A { + int field; + const int const_field; + static int static_field; + + void no_use() { + // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: method 'no_use' can be made static + // CHECK-FIXES: {{^}} static void no_use() { + int i = 1; + } + + int read_field() { + return field; + } + + void write_field() { + field = 1; + } + + int call_non_const_member() { return read_field(); } + + int call_static_member() { + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: method 'call_static_member' can be made static + // CHECK-FIXES: {{^}} static int call_static_member() { + already_static(); + } + + int read_static() { + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: method 'read_static' can be made static + // CHECK-FIXES: {{^}} static int read_static() { + return static_field; + } + void write_static() { + // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: method 'write_static' can be made static + // CHECK-FIXES: {{^}} static void write_static() { + static_field = 1; + } + + static int already_static() { return static_field; } + + int already_const() const { return field; } + + int already_const_convert_to_static() const { + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: method 'already_const_convert_to_static' can be made static + // CHECK-FIXES: {{^}} static int already_const_convert_to_static() { + return static_field; + } + + static int out_of_line_already_static(); + + void out_of_line_call_static(); + // CHECK-FIXES: {{^}} static void out_of_line_call_static(); + int out_of_line_const_to_static() const; + // CHECK-FIXES: {{^}} static int out_of_line_const_to_static() ; +}; + +int A::out_of_line_already_static() { return 0; } + +void A::out_of_line_call_static() { + // CHECK-MESSAGES: :[[@LINE-1]]:9: warning: method 'out_of_line_call_static' can be made static + // CHECK-FIXES: {{^}}void A::out_of_line_call_static() { + already_static(); +} + +int A::out_of_line_const_to_static() const { + // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: method 'out_of_line_const_to_static' can be made static + // CHECK-FIXES: {{^}}int A::out_of_line_const_to_static() { + return 0; +} + +struct KeepVirtual { + virtual int f() { return 0; } + virtual int h() const { return 0; } +}; + +struct KeepVirtualDerived : public KeepVirtual { + int f() { return 0; } + int h() const override { return 0; } +}; + +// Don't add 'static' to special member functions and operators. +struct KeepSpecial { + KeepSpecial() { int L = 0; } + ~KeepSpecial() { int L = 0; } + int operator+() { return 0; } + operator int() { return 0; } +}; + +void KeepLambdas() { + using FT = int (*)(); + auto F = static_cast([]() { return 0; }); + auto F2 = []() { return 0; }; +} + +template +struct KeepWithTemplateBase : public Base { + int i; + // We cannot make these methods static because they might need to override + // a function from Base. + int static_f() { return 0; } +}; + +template +struct KeepTemplateClass { + int i; + // We cannot make these methods static because a specialization + // might use *this differently. + int static_f() { return 0; } +}; + +struct KeepTemplateMethod { + int i; + // We cannot make these methods static because a specialization + // might use *this differently. + template + static int static_f() { return 0; } +}; + +void instantiate() { + struct S {}; + KeepWithTemplateBase I1; + I1.static_f(); + + KeepTemplateClass I2; + I2.static_f(); + + KeepTemplateMethod I3; + I3.static_f(); +} + +struct Trailing { + auto g() const -> int { + // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: method 'g' can be made static + // CHECK-FIXES: {{^}} static auto g() -> int { + return 0; + } + + void vol() volatile { + // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: method 'vol' can be made static + return; + } + + void ref() const & { + // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: method 'ref' can be made static + return; + } + void refref() const && { + // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: method 'refref' can be made static + return; + } + + void restr() __restrict { + // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: method 'restr' can be made static + return; + } +}; + +struct UnevaluatedContext { + void f() { sizeof(this); } + + void noex() noexcept(noexcept(this)); +}; + +struct LambdaCapturesThis { + int Field; + + int explicitCapture() { + return [this]() { return Field; }(); + } + + int implicitCapture() { + return [&]() { return Field; }(); + } +}; + +struct NoFixitInMacro { +#define CONST const + int no_use_macro_const() CONST { + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: method 'no_use_macro_const' can be made static + return 0; + } + +#define ADD_CONST(F) F const + int ADD_CONST(no_use_macro2()) { + return 0; + } + +#define FUN no_use_macro() + int i; + int FUN { + return i; + } + +#define T(FunctionName, Keyword) \ + Keyword int FunctionName() { return 0; } +#define EMPTY + T(A, EMPTY) + T(B, static) + +#define T2(FunctionName) \ + int FunctionName() { return 0; } + T2(A2) + +#define VOLATILE volatile + void volatileMacro() VOLATILE { + // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: method 'volatileMacro' can be made static + return; + } +}; From fdc61bce9470ca7e80125b54d6dfbf24a3cc68c5 Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Tue, 16 Jul 2019 21:29:06 +0000 Subject: [PATCH 291/451] [NFC][ScopBuilder] Move addUserContext to ScopBuilder Scope of changes: 1) Moved addUserContext to ScopBuilder. 2) Moved command line option UserContextStr to ScopBuilder. Differential Revision: https://reviews.llvm.org/D63740 llvm-svn: 366266 --- polly/include/polly/ScopBuilder.h | 3 ++ polly/include/polly/ScopInfo.h | 3 -- polly/lib/Analysis/ScopBuilder.cpp | 46 +++++++++++++++++++++++++++++- polly/lib/Analysis/ScopInfo.cpp | 43 ---------------------------- 4 files changed, 48 insertions(+), 47 deletions(-) diff --git a/polly/include/polly/ScopBuilder.h b/polly/include/polly/ScopBuilder.h index 0cb90e3ca0e6a..c5cbcf8a8ab42 100644 --- a/polly/include/polly/ScopBuilder.h +++ b/polly/include/polly/ScopBuilder.h @@ -376,6 +376,9 @@ class ScopBuilder { BasicBlock *IncomingBlock, Value *IncomingValue, bool IsExitBlock); + /// Add user provided parameter constraints to context (command line). + void addUserContext(); + /// Add all recorded assumptions to the assumed context. void addRecordedAssumptions(); diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h index c86513a5f62c9..e76442aadcefd 100644 --- a/polly/include/polly/ScopInfo.h +++ b/polly/include/polly/ScopInfo.h @@ -2044,9 +2044,6 @@ class Scop { void addUserAssumptions(AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI, DenseMap &InvalidDomainMap); - /// Add user provided parameter constraints to context (command line). - void addUserContext(); - /// Add the bounds of the parameters to the context. void addParameterBounds(); diff --git a/polly/lib/Analysis/ScopBuilder.cpp b/polly/lib/Analysis/ScopBuilder.cpp index f079a749dcb11..ec078be083850 100644 --- a/polly/lib/Analysis/ScopBuilder.cpp +++ b/polly/lib/Analysis/ScopBuilder.cpp @@ -114,6 +114,11 @@ static cl::opt UnprofitableScalarAccs( cl::desc("Count statements with scalar accesses as not optimizable"), cl::Hidden, cl::init(false), cl::cat(PollyCategory)); +static cl::opt UserContextStr( + "polly-context", cl::value_desc("isl parameter set"), + cl::desc("Provide additional constraints on the context parameters"), + cl::init(""), cl::cat(PollyCategory)); + static cl::opt DetectFortranArrays( "polly-detect-fortran-arrays", cl::desc("Detect Fortran arrays and use this for code generation"), @@ -1454,6 +1459,45 @@ bool ScopBuilder::hasNonHoistableBasePtrInScop(MemoryAccess *MA, return false; } +void ScopBuilder::addUserContext() { + if (UserContextStr.empty()) + return; + + isl::set UserContext = isl::set(scop->getIslCtx(), UserContextStr.c_str()); + isl::space Space = scop->getParamSpace(); + if (Space.dim(isl::dim::param) != UserContext.dim(isl::dim::param)) { + std::string SpaceStr = Space.to_str(); + errs() << "Error: the context provided in -polly-context has not the same " + << "number of dimensions than the computed context. Due to this " + << "mismatch, the -polly-context option is ignored. Please provide " + << "the context in the parameter space: " << SpaceStr << ".\n"; + return; + } + + for (unsigned i = 0; i < Space.dim(isl::dim::param); i++) { + std::string NameContext = + scop->getContext().get_dim_name(isl::dim::param, i); + std::string NameUserContext = UserContext.get_dim_name(isl::dim::param, i); + + if (NameContext != NameUserContext) { + std::string SpaceStr = Space.to_str(); + errs() << "Error: the name of dimension " << i + << " provided in -polly-context " + << "is '" << NameUserContext << "', but the name in the computed " + << "context is '" << NameContext + << "'. Due to this name mismatch, " + << "the -polly-context option is ignored. Please provide " + << "the context in the parameter space: " << SpaceStr << ".\n"; + return; + } + + UserContext = UserContext.set_dim_id(isl::dim::param, i, + Space.get_dim_id(isl::dim::param, i)); + } + isl::set newContext = scop->getContext().intersect(UserContext); + scop->setContext(newContext); +} + isl::set ScopBuilder::getNonHoistableCtx(MemoryAccess *Access, isl::union_map Writes) { // TODO: Loads that are not loop carried, hence are in a statement with @@ -2326,7 +2370,7 @@ void ScopBuilder::buildScop(Region &R, AssumptionCache &AC) { scop->finalizeAccesses(); scop->realignParams(); - scop->addUserContext(); + addUserContext(); // After the context was fully constructed, thus all our knowledge about // the parameters is in there, we add all recorded assumptions to the diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp index 9244796a23c6c..9ccd9bf1680fd 100644 --- a/polly/lib/Analysis/ScopInfo.cpp +++ b/polly/lib/Analysis/ScopInfo.cpp @@ -122,11 +122,6 @@ static cl::opt PollyRemarksMinimal( cl::desc("Do not emit remarks about assumptions that are known"), cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::cat(PollyCategory)); -static cl::opt UserContextStr( - "polly-context", cl::value_desc("isl parameter set"), - cl::desc("Provide additional constraints on the context parameters"), - cl::init(""), cl::cat(PollyCategory)); - static cl::opt IslOnErrorAbort("polly-on-isl-error-abort", cl::desc("Abort if an isl error is encountered"), @@ -2017,44 +2012,6 @@ void Scop::addUserAssumptions( } } -void Scop::addUserContext() { - if (UserContextStr.empty()) - return; - - isl::set UserContext = isl::set(getIslCtx(), UserContextStr.c_str()); - isl::space Space = getParamSpace(); - if (Space.dim(isl::dim::param) != UserContext.dim(isl::dim::param)) { - std::string SpaceStr = Space.to_str(); - errs() << "Error: the context provided in -polly-context has not the same " - << "number of dimensions than the computed context. Due to this " - << "mismatch, the -polly-context option is ignored. Please provide " - << "the context in the parameter space: " << SpaceStr << ".\n"; - return; - } - - for (unsigned i = 0; i < Space.dim(isl::dim::param); i++) { - std::string NameContext = Context.get_dim_name(isl::dim::param, i); - std::string NameUserContext = UserContext.get_dim_name(isl::dim::param, i); - - if (NameContext != NameUserContext) { - std::string SpaceStr = Space.to_str(); - errs() << "Error: the name of dimension " << i - << " provided in -polly-context " - << "is '" << NameUserContext << "', but the name in the computed " - << "context is '" << NameContext - << "'. Due to this name mismatch, " - << "the -polly-context option is ignored. Please provide " - << "the context in the parameter space: " << SpaceStr << ".\n"; - return; - } - - UserContext = UserContext.set_dim_id(isl::dim::param, i, - Space.get_dim_id(isl::dim::param, i)); - } - - Context = Context.intersect(UserContext); -} - void Scop::buildContext() { isl::space Space = isl::space::params_alloc(getIslCtx(), 0); Context = isl::set::universe(Space); From f4c2d57f767d870b4787c86b543ded8076fe108b Mon Sep 17 00:00:00 2001 From: Ben Hamilton Date: Tue, 16 Jul 2019 21:29:40 +0000 Subject: [PATCH 292/451] [clang-format] Don't detect call to ObjC class method as C++11 attribute specifier Summary: Previously, clang-format detected something like the following as a C++11 attribute specifier. @[[NSArray class]] instead of an array with an Objective-C method call inside. In general, when the attribute specifier checking runs, if it sees 2 identifiers in a row, it decides that the square brackets represent an Objective-C method call. However, here, `class` is tokenized as a keyword instead of an identifier, so this check fails. To fix this, the attribute specifier first checks whether the first square bracket has an "@" before it. If it does, then that square bracket is not the start of a attribute specifier because it is an Objective-C array literal. (The assumption is that @[[.*]] is not valid C/C++.) Contributed by rkgibson2. Reviewers: benhamilton Reviewed By: benhamilton Subscribers: aaron.ballman, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D64632 llvm-svn: 366267 --- clang/lib/Format/TokenAnnotator.cpp | 6 +++++- clang/unittests/Format/FormatTest.cpp | 6 ++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 6b698e24b5e50..490c4f46135e2 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -388,6 +388,10 @@ class AnnotatingParser { bool isCpp11AttributeSpecifier(const FormatToken &Tok) { if (!Style.isCpp() || !Tok.startsSequence(tok::l_square, tok::l_square)) return false; + // The first square bracket is part of an ObjC array literal + if (Tok.Previous && Tok.Previous->is(tok::at)) { + return false; + } const FormatToken *AttrTok = Tok.Next->Next; if (!AttrTok) return false; @@ -400,7 +404,7 @@ class AnnotatingParser { while (AttrTok && !AttrTok->startsSequence(tok::r_square, tok::r_square)) { // ObjC message send. We assume nobody will use : in a C++11 attribute // specifier parameter, although this is technically valid: - // [[foo(:)]] + // [[foo(:)]]. if (AttrTok->is(tok::colon) || AttrTok->startsSequence(tok::identifier, tok::identifier) || AttrTok->startsSequence(tok::r_paren, tok::identifier)) diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index c4abad228d0a3..c1cec110137bd 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -7027,6 +7027,12 @@ TEST_F(FormatTest, UnderstandsSquareAttributes) { // On the other hand, we still need to correctly find array subscripts. verifyFormat("int a = std::vector{1, 2, 3}[0];"); + // Make sure that we do not mistake Objective-C method inside array literals + // as attributes, even if those method names are also keywords. + verifyFormat("@[ [foo bar] ];"); + verifyFormat("@[ [NSArray class] ];"); + verifyFormat("@[ [foo enum] ];"); + // Make sure we do not parse attributes as lambda introducers. FormatStyle MultiLineFunctions = getLLVMStyle(); MultiLineFunctions.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None; From d746a210e16925d8c26bd8359598d95213712218 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 16 Jul 2019 21:30:41 +0000 Subject: [PATCH 293/451] [x86] use more phadd for reductions This is part of what is requested by PR42023: https://bugs.llvm.org/show_bug.cgi?id=42023 There's an extension needed for FP add, but exactly how we would specify that using flags is not clear to me, so I left that as a TODO. We're still missing patterns for partial reductions when the input vector is 256-bit or 512-bit, but I think that's a failure of vector narrowing. If we can reduce the widths, then this matching should work on those tests. Differential Revision: https://reviews.llvm.org/D64760 llvm-svn: 366268 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 54 +++++++++++++++++++ llvm/test/CodeGen/X86/phaddsub-extract.ll | 44 ++++++--------- .../CodeGen/X86/vector-reduce-add-widen.ll | 22 +++----- llvm/test/CodeGen/X86/vector-reduce-add.ll | 22 +++----- 4 files changed, 86 insertions(+), 56 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 62499a28dff85..59540211d5495 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -35624,6 +35624,57 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG) { llvm_unreachable("All opcodes should return within switch"); } +/// Try to convert a vector reduction sequence composed of binops and shuffles +/// into horizontal ops. +static SDValue combineReductionToHorizontal(SDNode *ExtElt, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + assert(ExtElt->getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unexpected caller"); + bool OptForSize = DAG.getMachineFunction().getFunction().hasOptSize(); + if (!Subtarget.hasFastHorizontalOps() && !OptForSize) + return SDValue(); + SDValue Index = ExtElt->getOperand(1); + if (!isNullConstant(Index)) + return SDValue(); + + // TODO: Allow FADD with reduction and/or reassociation and no-signed-zeros. + ISD::NodeType Opc; + SDValue Rdx = DAG.matchBinOpReduction(ExtElt, Opc, {ISD::ADD}); + if (!Rdx) + return SDValue(); + + EVT VT = ExtElt->getValueType(0); + EVT VecVT = ExtElt->getOperand(0).getValueType(); + if (VecVT.getScalarType() != VT) + return SDValue(); + + unsigned HorizOpcode = Opc == ISD::ADD ? X86ISD::HADD : X86ISD::FHADD; + SDLoc DL(ExtElt); + + // 256-bit horizontal instructions operate on 128-bit chunks rather than + // across the whole vector, so we need an extract + hop preliminary stage. + // This is the only step where the operands of the hop are not the same value. + // TODO: We could extend this to handle 512-bit or even longer vectors. + if (((VecVT == MVT::v16i16 || VecVT == MVT::v8i32) && Subtarget.hasSSSE3()) || + ((VecVT == MVT::v8f32 || VecVT == MVT::v4f64) && Subtarget.hasSSE3())) { + unsigned NumElts = VecVT.getVectorNumElements(); + SDValue Hi = extract128BitVector(Rdx, NumElts / 2, DAG, DL); + SDValue Lo = extract128BitVector(Rdx, 0, DAG, DL); + VecVT = EVT::getVectorVT(*DAG.getContext(), VT, NumElts / 2); + Rdx = DAG.getNode(HorizOpcode, DL, VecVT, Hi, Lo); + } + if (!((VecVT == MVT::v8i16 || VecVT == MVT::v4i32) && Subtarget.hasSSSE3()) && + !((VecVT == MVT::v4f32 || VecVT == MVT::v2f64) && Subtarget.hasSSE3())) + return SDValue(); + + // extract (add (shuf X), X), 0 --> extract (hadd X, X), 0 + assert(Rdx.getValueType() == VecVT && "Unexpected reduction match"); + unsigned ReductionSteps = Log2_32(VecVT.getVectorNumElements()); + for (unsigned i = 0; i != ReductionSteps; ++i) + Rdx = DAG.getNode(HorizOpcode, DL, VecVT, Rdx, Rdx); + + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index); +} + /// Detect vector gather/scatter index generation and convert it from being a /// bunch of shuffles and extracts into a somewhat faster sequence. /// For i686, the best sequence is apparently storing the value and loading @@ -35710,6 +35761,9 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, if (SDValue MinMax = combineHorizontalMinMaxResult(N, DAG, Subtarget)) return MinMax; + if (SDValue V = combineReductionToHorizontal(N, DAG, Subtarget)) + return V; + if (SDValue V = scalarizeExtEltFP(N, DAG)) return V; diff --git a/llvm/test/CodeGen/X86/phaddsub-extract.ll b/llvm/test/CodeGen/X86/phaddsub-extract.ll index e81952d331c25..2a7039e932c36 100644 --- a/llvm/test/CodeGen/X86/phaddsub-extract.ll +++ b/llvm/test/CodeGen/X86/phaddsub-extract.ll @@ -1903,10 +1903,8 @@ define i16 @hadd16_8(<8 x i16> %x223) { ; ; SSE3-FAST-LABEL: hadd16_8: ; SSE3-FAST: # %bb.0: -; SSE3-FAST-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; SSE3-FAST-NEXT: paddw %xmm0, %xmm1 -; SSE3-FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; SSE3-FAST-NEXT: paddw %xmm1, %xmm0 +; SSE3-FAST-NEXT: phaddw %xmm0, %xmm0 +; SSE3-FAST-NEXT: phaddw %xmm0, %xmm0 ; SSE3-FAST-NEXT: phaddw %xmm0, %xmm0 ; SSE3-FAST-NEXT: movd %xmm0, %eax ; SSE3-FAST-NEXT: # kill: def $ax killed $ax killed $eax @@ -1926,10 +1924,8 @@ define i16 @hadd16_8(<8 x i16> %x223) { ; ; AVX-FAST-LABEL: hadd16_8: ; AVX-FAST: # %bb.0: -; AVX-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; AVX-FAST-NEXT: vpaddw %xmm1, %xmm0, %xmm0 -; AVX-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; AVX-FAST-NEXT: vpaddw %xmm1, %xmm0, %xmm0 +; AVX-FAST-NEXT: vphaddw %xmm0, %xmm0, %xmm0 +; AVX-FAST-NEXT: vphaddw %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vphaddw %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vmovd %xmm0, %eax ; AVX-FAST-NEXT: # kill: def $ax killed $ax killed $eax @@ -1956,10 +1952,9 @@ define i32 @hadd32_4(<4 x i32> %x225) { ; ; SSE3-FAST-LABEL: hadd32_4: ; SSE3-FAST: # %bb.0: -; SSE3-FAST-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; SSE3-FAST-NEXT: paddd %xmm0, %xmm1 -; SSE3-FAST-NEXT: phaddd %xmm1, %xmm1 -; SSE3-FAST-NEXT: movd %xmm1, %eax +; SSE3-FAST-NEXT: phaddd %xmm0, %xmm0 +; SSE3-FAST-NEXT: phaddd %xmm0, %xmm0 +; SSE3-FAST-NEXT: movd %xmm0, %eax ; SSE3-FAST-NEXT: retq ; ; AVX-SLOW-LABEL: hadd32_4: @@ -1973,8 +1968,7 @@ define i32 @hadd32_4(<4 x i32> %x225) { ; ; AVX-FAST-LABEL: hadd32_4: ; AVX-FAST: # %bb.0: -; AVX-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; AVX-FAST-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0 ; AVX-FAST-NEXT: vmovd %xmm0, %eax ; AVX-FAST-NEXT: retq @@ -2097,10 +2091,8 @@ define i32 @hadd32_16(<16 x i32> %x225) { define i16 @hadd16_8_optsize(<8 x i16> %x223) optsize { ; SSE3-LABEL: hadd16_8_optsize: ; SSE3: # %bb.0: -; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; SSE3-NEXT: paddw %xmm0, %xmm1 -; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; SSE3-NEXT: paddw %xmm1, %xmm0 +; SSE3-NEXT: phaddw %xmm0, %xmm0 +; SSE3-NEXT: phaddw %xmm0, %xmm0 ; SSE3-NEXT: phaddw %xmm0, %xmm0 ; SSE3-NEXT: movd %xmm0, %eax ; SSE3-NEXT: # kill: def $ax killed $ax killed $eax @@ -2108,10 +2100,8 @@ define i16 @hadd16_8_optsize(<8 x i16> %x223) optsize { ; ; AVX-LABEL: hadd16_8_optsize: ; AVX: # %bb.0: -; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vphaddw %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vphaddw %xmm0, %xmm0, %xmm0 ; AVX-NEXT: vphaddw %xmm0, %xmm0, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $ax killed $ax killed $eax @@ -2129,16 +2119,14 @@ define i16 @hadd16_8_optsize(<8 x i16> %x223) optsize { define i32 @hadd32_4_optsize(<4 x i32> %x225) optsize { ; SSE3-LABEL: hadd32_4_optsize: ; SSE3: # %bb.0: -; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; SSE3-NEXT: paddd %xmm0, %xmm1 -; SSE3-NEXT: phaddd %xmm1, %xmm1 -; SSE3-NEXT: movd %xmm1, %eax +; SSE3-NEXT: phaddd %xmm0, %xmm0 +; SSE3-NEXT: phaddd %xmm0, %xmm0 +; SSE3-NEXT: movd %xmm0, %eax ; SSE3-NEXT: retq ; ; AVX-LABEL: hadd32_4_optsize: ; AVX: # %bb.0: -; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vphaddd %xmm0, %xmm0, %xmm0 ; AVX-NEXT: vphaddd %xmm0, %xmm0, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-reduce-add-widen.ll b/llvm/test/CodeGen/X86/vector-reduce-add-widen.ll index b886a745edc1c..6dc5a2b54b506 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-add-widen.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-add-widen.ll @@ -254,8 +254,7 @@ define i32 @test_v4i32(<4 x i32> %a0) { ; ; AVX1-FAST-LABEL: test_v4i32: ; AVX1-FAST: # %bb.0: -; AVX1-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; AVX1-FAST-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX1-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0 ; AVX1-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0 ; AVX1-FAST-NEXT: vmovd %xmm0, %eax ; AVX1-FAST-NEXT: retq @@ -307,9 +306,8 @@ define i32 @test_v8i32(<8 x i32> %a0) { ; AVX1-FAST-LABEL: test_v8i32: ; AVX1-FAST: # %bb.0: ; AVX1-FAST-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-FAST-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVX1-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; AVX1-FAST-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX1-FAST-NEXT: vphaddd %xmm0, %xmm1, %xmm0 +; AVX1-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0 ; AVX1-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0 ; AVX1-FAST-NEXT: vmovd %xmm0, %eax ; AVX1-FAST-NEXT: vzeroupper @@ -635,10 +633,8 @@ define i16 @test_v8i16(<8 x i16> %a0) { ; ; AVX1-FAST-LABEL: test_v8i16: ; AVX1-FAST: # %bb.0: -; AVX1-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; AVX1-FAST-NEXT: vpaddw %xmm1, %xmm0, %xmm0 -; AVX1-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; AVX1-FAST-NEXT: vpaddw %xmm1, %xmm0, %xmm0 +; AVX1-FAST-NEXT: vphaddw %xmm0, %xmm0, %xmm0 +; AVX1-FAST-NEXT: vphaddw %xmm0, %xmm0, %xmm0 ; AVX1-FAST-NEXT: vphaddw %xmm0, %xmm0, %xmm0 ; AVX1-FAST-NEXT: vmovd %xmm0, %eax ; AVX1-FAST-NEXT: # kill: def $ax killed $ax killed $eax @@ -704,11 +700,9 @@ define i16 @test_v16i16(<16 x i16> %a0) { ; AVX1-FAST-LABEL: test_v16i16: ; AVX1-FAST: # %bb.0: ; AVX1-FAST-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-FAST-NEXT: vpaddw %xmm1, %xmm0, %xmm0 -; AVX1-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; AVX1-FAST-NEXT: vpaddw %xmm1, %xmm0, %xmm0 -; AVX1-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; AVX1-FAST-NEXT: vpaddw %xmm1, %xmm0, %xmm0 +; AVX1-FAST-NEXT: vphaddw %xmm0, %xmm1, %xmm0 +; AVX1-FAST-NEXT: vphaddw %xmm0, %xmm0, %xmm0 +; AVX1-FAST-NEXT: vphaddw %xmm0, %xmm0, %xmm0 ; AVX1-FAST-NEXT: vphaddw %xmm0, %xmm0, %xmm0 ; AVX1-FAST-NEXT: vmovd %xmm0, %eax ; AVX1-FAST-NEXT: # kill: def $ax killed $ax killed $eax diff --git a/llvm/test/CodeGen/X86/vector-reduce-add.ll b/llvm/test/CodeGen/X86/vector-reduce-add.ll index 02fb375a318f5..630299a1824e0 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-add.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-add.ll @@ -241,8 +241,7 @@ define i32 @test_v4i32(<4 x i32> %a0) { ; ; AVX1-FAST-LABEL: test_v4i32: ; AVX1-FAST: # %bb.0: -; AVX1-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; AVX1-FAST-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX1-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0 ; AVX1-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0 ; AVX1-FAST-NEXT: vmovd %xmm0, %eax ; AVX1-FAST-NEXT: retq @@ -294,9 +293,8 @@ define i32 @test_v8i32(<8 x i32> %a0) { ; AVX1-FAST-LABEL: test_v8i32: ; AVX1-FAST: # %bb.0: ; AVX1-FAST-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-FAST-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVX1-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; AVX1-FAST-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX1-FAST-NEXT: vphaddd %xmm0, %xmm1, %xmm0 +; AVX1-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0 ; AVX1-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0 ; AVX1-FAST-NEXT: vmovd %xmm0, %eax ; AVX1-FAST-NEXT: vzeroupper @@ -605,10 +603,8 @@ define i16 @test_v8i16(<8 x i16> %a0) { ; ; AVX1-FAST-LABEL: test_v8i16: ; AVX1-FAST: # %bb.0: -; AVX1-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; AVX1-FAST-NEXT: vpaddw %xmm1, %xmm0, %xmm0 -; AVX1-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; AVX1-FAST-NEXT: vpaddw %xmm1, %xmm0, %xmm0 +; AVX1-FAST-NEXT: vphaddw %xmm0, %xmm0, %xmm0 +; AVX1-FAST-NEXT: vphaddw %xmm0, %xmm0, %xmm0 ; AVX1-FAST-NEXT: vphaddw %xmm0, %xmm0, %xmm0 ; AVX1-FAST-NEXT: vmovd %xmm0, %eax ; AVX1-FAST-NEXT: # kill: def $ax killed $ax killed $eax @@ -674,11 +670,9 @@ define i16 @test_v16i16(<16 x i16> %a0) { ; AVX1-FAST-LABEL: test_v16i16: ; AVX1-FAST: # %bb.0: ; AVX1-FAST-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-FAST-NEXT: vpaddw %xmm1, %xmm0, %xmm0 -; AVX1-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; AVX1-FAST-NEXT: vpaddw %xmm1, %xmm0, %xmm0 -; AVX1-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; AVX1-FAST-NEXT: vpaddw %xmm1, %xmm0, %xmm0 +; AVX1-FAST-NEXT: vphaddw %xmm0, %xmm1, %xmm0 +; AVX1-FAST-NEXT: vphaddw %xmm0, %xmm0, %xmm0 +; AVX1-FAST-NEXT: vphaddw %xmm0, %xmm0, %xmm0 ; AVX1-FAST-NEXT: vphaddw %xmm0, %xmm0, %xmm0 ; AVX1-FAST-NEXT: vmovd %xmm0, %eax ; AVX1-FAST-NEXT: # kill: def $ax killed $ax killed $eax From 607cd44bdc6a5117241fdc59191bf78da7466b0c Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Tue, 16 Jul 2019 21:34:59 +0000 Subject: [PATCH 294/451] [ORC][docs] Trim ORCv1 to ORCv2 transition section, add a how-to section. llvm-svn: 366269 --- llvm/docs/ORCv2.rst | 409 +++++++++++++++++++++++++++++++------------- 1 file changed, 288 insertions(+), 121 deletions(-) diff --git a/llvm/docs/ORCv2.rst b/llvm/docs/ORCv2.rst index 4daa12f560869..2f37a2fbdad59 100644 --- a/llvm/docs/ORCv2.rst +++ b/llvm/docs/ORCv2.rst @@ -2,6 +2,9 @@ ORC Design and Implementation =============================== +.. contents:: + :local: + Introduction ============ @@ -9,9 +12,6 @@ This document aims to provide a high-level overview of the design and implementation of the ORC JIT APIs. Except where otherwise stated, all discussion applies to the design of the APIs as of LLVM verison 9 (ORCv2). -.. contents:: - :local: - Use-cases ========= @@ -158,7 +158,7 @@ common symbol definitions. To see how this works, imagine a program ``foo`` which links against a pair of dynamic libraries: ``libA`` and ``libB``. On the command line, building this -system might look like: +program might look like: .. code-block:: bash @@ -196,29 +196,30 @@ checking omitted for brevity) as: auto MainSym = ExitOnErr(ES.lookup({&ES.getMainJITDylib()}, "main")); auto *Main = (int(*)(int, char*[]))MainSym.getAddress(); - int Result = Main(...); - +v int Result = Main(...); This example tells us nothing about *how* or *when* compilation will happen. -That will depend on the implementation of the hypothetical CXXCompilingLayer, -but the linking rules will be the same regardless. For example, if a1.cpp and -a2.cpp both define a function "foo" the API should generate a duplicate -definition error. On the other hand, if a1.cpp and b1.cpp both define "foo" -there is no error (different dynamic libraries may define the same symbol). If -main.cpp refers to "foo", it should bind to the definition in LibA rather than -the one in LibB, since main.cpp is part of the "main" dylib, and the main dylib -links against LibA before LibB. +That will depend on the implementation of the hypothetical CXXCompilingLayer. +The same linker-based symbol resolution rules will apply regardless of that +implementation, however. For example, if a1.cpp and a2.cpp both define a +function "foo" then ORCv2 will generate a duplicate definition error. On the +other hand, if a1.cpp and b1.cpp both define "foo" there is no error (different +dynamic libraries may define the same symbol). If main.cpp refers to "foo", it +should bind to the definition in LibA rather than the one in LibB, since +main.cpp is part of the "main" dylib, and the main dylib links against LibA +before LibB. Many JIT clients will have no need for this strict adherence to the usual -ahead-of-time linking rules and should be able to get by just fine by putting +ahead-of-time linking rules, and should be able to get by just fine by putting all of their code in a single JITDylib. However, clients who want to JIT code for languages/projects that traditionally rely on ahead-of-time linking (e.g. C++) will find that this feature makes life much easier. -Symbol lookup in ORC serves two other important functions, beyond basic lookup: -(1) It triggers compilation of the symbol(s) searched for, and (2) it provides -the synchronization mechanism for concurrent compilation. The pseudo-code for -the lookup process is: +Symbol lookup in ORC serves two other important functions, beyond providing +addresses for symbols: (1) It triggers compilation of the symbol(s) searched for +(if they have not been compiled already), and (2) it provides the +synchronization mechanism for concurrent compilation. The pseudo-code for the +lookup process is: .. code-block:: none @@ -229,13 +230,13 @@ the lookup process is: dispatch materializers (if any) In this context a materializer is something that provides a working definition -of a symbol upon request. Generally materializers wrap compilers, but they may -also wrap a linker directly (if the program representation backing the -definitions is an object file), or even just a class that writes bits directly -into memory (if the definitions are stubs). Materialization is the blanket term -for any actions (compiling, linking, splatting bits, registering with runtimes, -etc.) that is requried to generate a symbol definition that is safe to call or -access. +of a symbol upon request. Usually materializers are just wrappers for compilers, +but they may also wrap a jit-linker directly (if the program representation +backing the definitions is an object file), or may even be a class that writes +bits directly into memory (for example, if the definitions are +stubs). Materialization is the blanket term for any actions (compiling, linking, +splatting bits, registering with runtimes, etc.) that are requried to generate a +symbol definition that is safe to call or access. As each materializer completes its work it notifies the JITDylib, which in turn notifies any query objects that are waiting on the newly materialized @@ -314,126 +315,292 @@ TBD. Transitioning from ORCv1 to ORCv2 ================================= -Since LLVM 7.0 new ORC developement has focused on adding support for concurrent -compilation. In order to enable concurrency new APIs were introduced -(ExecutionSession, JITDylib, etc.) and new implementations of existing layers -were written. In LLVM 8.0 the old layer implementations, which do not support -concurrency, were renamed (with a "Legacy" prefix), but remained in tree. In -LLVM 9.0 we have added a deprecation warning for the old layers and utilities, -and in LLVM 10.0 the old layers and utilities will be removed. +Since LLVM 7.0, new ORC development work has focused on adding support for +concurrent JIT compilation. The new APIs (including new layer interfaces and +implementations, and new utilities) that support concurrency are collectively +referred to as ORCv2, and the original, non-concurrent layers and utilities +are now referred to as ORCv1. + +The majority of the ORCv1 layers and utilities were renamed with a 'Legacy' +prefix in LLVM 8.0, and have deprecation warnings attached in LLVM 9.0. In LLVM +10.0 ORCv1 will be removed entirely. + +Transitioning from ORCv1 to ORCv2 should be easy for most clients. Most of the +ORCv1 layers and utilities have ORCv2 counterparts[2]_ that can be directly +substituted. However there are some design differences between ORCv1 and ORCv2 +to be aware of: + + 1. ORCv2 fully adopts the JIT-as-linker model that began with MCJIT. Modules + (and other program representations, e.g. Object Files) are no longer added + directly to JIT classes or layers. Instead, they are added to ``JITDylib`` + instances *by* layers. The ``JITDylib`` determines *where* the definitions + reside, the layers determine *how* the definitions will be compiled. + Linkage relationships between ``JITDylibs`` determine how inter-module + references are resolved, and symbol resolvers are no longer used. See the + section `Design Overview`_ for more details. + + Unless multiple JITDylibs are needed to model linkage relationsips, ORCv1 + clients should place all code in the main JITDylib (returned by + ``ExecutionSession::getMainJITDylib()``). MCJIT clients should use LLJIT + (see `LLJIT and LLLazyJIT`_). + + 2. All JIT stacks now need an ``ExecutionSession`` instance. ExecutionSession + manages the string pool, error reporting, synchronization, and symbol + lookup. + + 3. ORCv2 uses uniqued strings (``SymbolStringPtr`` instances) rather than + string values in order to reduce memory overhead and improve lookup + performance. See the subsection `How to manage symbol strings`_. -Clients currently using the legacy (ORCv1) layers and utilities will usually -find it easy to transition to the newer (ORCv2) variants. Most of the ORCv1 -layers and utilities have ORCv2 counterparts[2]_ that can be -substituted. However there are some differences between ORCv1 and ORCv2 to be -aware of: - - 1. All JIT stacks now need an ExecutionSession instance which manages the - string pool, error reporting, synchronization, and symbol lookup. + 4. IR layers require ThreadSafeModule instances, rather than + std::unique_ptrs. ThreadSafeModule is a wrapper that ensures that + Modules that use the same LLVMContext are not accessed concurrently. + See `How to use ThreadSafeModule and ThreadSafeContext`_. - 2. ORCv2 uses uniqued strings (``SymbolStringPtr`` instances) to reduce memory - overhead and improve lookup performance. To get a uniqued string, call - ``intern`` on your ExecutionSession instance: + 5. Symbol lookup is no longer handled by layers. Instead, there is a + ``lookup`` method on JITDylib that takes a list of JITDylibs to scan. .. code-block:: c++ ExecutionSession ES; + JITDylib &JD1 = ...; + JITDylib &JD2 = ...; - /// ... + auto Sym = ES.lookup({&JD1, &JD2}, ES.intern("_main")); - auto MainSymbolName = ES.intern("main"); + 6. Module removal is not yet supported. There is no equivalent of the + layer concept removeModule/removeObject methods. Work on resource tracking + and removal in ORCv2 is ongoing. - 3. Program representations (Modules, Object Files, etc.) are no longer added - *to* layers. Instead they are added *to* JITDylibs *by* layers. The layer - determines how the program representation will be compiled if it is needed. - The JITDylib provides the symbol table, enforces linkage rules (e.g. - rejecting duplicate definitions), and synchronizes concurrent compiles. +For code examples and suggestions of how to use the ORCv2 APIs, please see +the section `How-tos`_. - Most ORCv1 clients (or MCJIT clients wanting to try out ORCv2) should - simply add code to the default *main* JITDylib provided by the - ExecutionSession: +How-tos +======= - .. code-block:: c++ +How to manage symbol strings +############################ - ExecutionSession ES; - RTDyldObjectLinkingLayer ObjLinkingLayer( - ES, []() { return llvm::make_unique(); }); - IRCompileLayer CompileLayer(ES, ObjLinkingLayer, SimpleIRCompiler(TM)); +Symbol strings in ORC are uniqued to improve lookup performance, reduce memory +overhead, and allow symbol names to function as efficient keys. To get the +unique ``SymbolStringPtr`` for a string value, call the +``ExecutionSession::intern`` method: - auto M = loadModule(...); + .. code-block:: c++ - if (auto Err = CompileLayer.add(ES.getMainJITDylib(), M)) - return Err; + ExecutionSession ES; + /// ... + auto MainSymbolName = ES.intern("main"); - 4. IR layers require ThreadSafeModule instances, rather than - std::unique_ptrs. A ThreadSafeModule instance is a pair of a - std::unique_ptr and a ThreadSafeContext, which is in turn a - pair of a std::unique_ptr and a lock. This allows the JIT - to ensure that the LLVMContext for a module is locked before the module - is accessed. Multiple ThreadSafeModules may share a ThreadSafeContext - value, but in that case the modules will not be able to be compiled - concurrently[3]_. +If you wish to perform lookup using the C/IR name of a symbol you will also +need to apply the platform linker-mangling before interning the string. On +Linux this mangling is a no-op, but on other platforms it usually involves +adding a prefix to the string (e.g. '_' on Darwin). The mangling scheme is +based on the DataLayout for the target. Given a DataLayout and an +ExecutionSession, you can create a MangleAndInterner function object that +will perform both jobs for you: - ThreadSafeContexts may be constructed explicitly: + .. code-block:: c++ - .. code-block:: c++ + ExecutionSession ES; + const DataLayout &DL = ...; + MangleAndInterner Mangle(ES, DL); - // ThreadSafeContext shared between two modules. - ThreadSafeContext TSCtx(llvm::make_unique()); - ThreadSafeModule TSM1( - llvm::make_unique("M1", *TSCtx.getContext()), TSCtx); - ThreadSafeModule TSM2( - llvm::make_unique("M2", *TSCtx.getContext()), TSCtx); + // ... - , or they can be created implicitly by passing a new LLVMContext to the - ThreadSafeModuleConstructor: + // Portable IR-symbol-name lookup: + auto Sym = ES.lookup({&ES.getMainJITDylib()}, Mangle("main")); - .. code-block:: c++ +How to create JITDylibs and set up linkage relationships +######################################################## - // Constructing a ThreadSafeModule (and implicitly a ThreadSafeContext) - // from a pair of a Module and a Context. - auto Ctx = llvm::make_unique(); - auto M = llvm::make_unique("M", *Ctx); - return ThreadSafeModule(std::move(M), std::move(Ctx)); - - 5. The symbol resolution and lookup scheme have been fundamentally changed. - Symbol lookup has been removed from the layer interface. Instead, - symbols are looked up via the ``ExecutionSession::lookup`` method by - scanning a list of JITDylibs. - - SymbolResolvers have been removed entirely. Resolution rules now follow the - linkage relationship between JITDylibs. For example, to resolve a reference - to a symbol *F* from a module *M* that has been added to JITDylib *J1* we - would first search for a definition of *F* in *J1* then (if no definition - was found) search each of the JITDylibs that *J1* links against. - - While the new resolution scheme is, strictly speaking, less flexible than - the old scheme of customizable resolvers this has not yet led to problems - in practice. Instead, using standard linker rules has removed a lot of - boilerplate while providing correct[4]_ behavior for common and weak symbols. - - One notable difference is in exposing in-process symbols to the JIT. To - support this (without requiring the set of symbols to be enumerated up - front), JITDylibs allow for a *GeneratorFunction* to be attached to - generate new definitions upon lookup. Reflecting the processes symbols into - the JIT can be done by writing: +In ORC, all symbol definitions reside in JITDylibs. JITDylibs are created by +calling the ``ExecutionSession::createJITDylib`` method with a unique name: - .. code-block:: c++ + .. code-block:: c++ - ExecutionSession ES; - const auto DataLayout &DL = ...; + ExecutionSession ES; + auto &JD = ES.createJITDylib("libFoo.dylib"); - { - auto ProcessSymbolsGenerator = - DynamicLibrarySearchGenerator::GetForCurrentProcess(DL.getGlobalPrefix()); - if (!ProcessSymbolsGenerator) - return ProcessSymbolsGenerator.takeError(); - ES.getMainJITDylib().setGenerator(std::move(*ProcessSymbolsGenerator)); - } +The JITDylib is owned by the ``ExecutionEngine`` instance and will be freed +when it is destroyed. - 6. Module removal is not yet supported. There is no equivalent of the - layer concept removeModule/removeObject methods. Work on resource tracking - and removal in ORCv2 is ongoing. +A JITDylib representing the JIT main program is created by ExecutionEngine by +default. A reference to it can be obtained by calling +``ExecutionSession::getMainJITDylib()``: + + .. code-block:: c++ + + ExecutionSession ES; + auto &MainJD = ES.getMainJITDylib(); + +How to use ThreadSafeModule and ThreadSafeContext +################################################# + +ThreadSafeModule and ThreadSafeContext are wrappers around Modules and +LLVMContexts respectively. A ThreadSafeModule is a pair of a +std::unique_ptr and a (possibly shared) ThreadSafeContext value. A +ThreadSafeContext is a pair of a std::unique_ptr and a lock. +This design serves two purposes: providing both a locking scheme and lifetime +management for LLVMContexts. The ThreadSafeContext may be locked to prevent +accidental concurrent access by two Modules that use the same LLVMContext. +The underlying LLVMContext is freed once all ThreadSafeContext values pointing +to it are destroyed, allowing the context memory to be reclaimed as soon as +the Modules referring to it are destroyed. + +ThreadSafeContexts can be explicitly constructed from a +std::unique_ptr: + + .. code-block:: c++ + ThreadSafeContext TSCtx(llvm::make_unique()); + +ThreadSafeModules can be constructed from a pair of a std::unique_ptr +and a ThreadSafeContext value. ThreadSafeContext values may be shared between +multiple ThreadSafeModules: + + .. code-block:: c++ + + ThreadSafeModule TSM1( + llvm::make_unique("M1", *TSCtx.getContext()), TSCtx); + + ThreadSafeModule TSM2( + llvm::make_unique("M2", *TSCtx.getContext()), TSCtx); + +Before using a ThreadSafeContext, clients should ensure that either the context +is only accessible on the current thread, or that the context is locked. In the +example above (where the context is never locked) we rely on the fact that both +``TSM1`` and ``TSM2``, and TSCtx are all created on one thread. If a context is +going to be shared between threads then it must be locked before the context, +or any Modules attached to it, are accessed. When code is added to in-tree IR +layers this locking is is done automatically by the +``BasicIRLayerMaterializationUnit::materialize`` method. In all other +situations, for example when writing a custom IR materialization unit, or +constructing a new ThreadSafeModule from higher-level program representations, +locking must be done explicitly: + + .. code-block:: c++ + + void HighLevelRepresentationLayer::emit(MaterializationResponsibility R, + HighLevelProgramRepresentation H) { + // Get or create a context value that may be shared between threads. + ThreadSafeContext TSCtx = getContext(); + + // Lock the context to prevent concurrent access. + auto Lock = TSCtx.getLock(); + + // IRGen a module onto the locked Context. + ThreadSafeModule TSM(IRGen(H, *TSCtx.getContext()), TSCtx); + + // Emit the module to the base layer with the context still locked. + BaseIRLayer.emit(std::move(R), std::move(TSM)); + } + +Clients wishing to maximize possibilities for concurrent compilation will want +to create every new ThreadSafeModule on a new ThreadSafeContext. For this reason +a convenience constructor for ThreadSafeModule is provided that implicitly +constructs a new ThreadSafeContext value from a std::unique_ptr: + + .. code-block:: c++ + + // Maximize concurrency opportunities by loading every module on a + // separate context. + for (const auto &IRPath : IRPaths) { + auto Ctx = llvm::make_unique(); + auto M = llvm::make_unique("M", *Ctx); + CompileLayer.add(ES.getMainJITDylib(), + ThreadSafeModule(std::move(M), std::move(Ctx))); + } + +Clients who plan to run single-threaded may choose to save memory by loading +all modules on the same context: + + .. code-block:: c++ + + // Save memory by using one context for all Modules: + ThreadSafeContext TSCtx(llvm::make_unique()); + for (const auto &IRPath : IRPaths) { + ThreadSafeModule TSM(parsePath(IRPath, *TSCtx.getContext()), TSCtx); + CompileLayer.add(ES.getMainJITDylib(), ThreadSafeModule(std::move(TSM)); + } + +How to Add Process and Library Symbols to the JITDylibs +======================================================= + +JIT'd code typically needs access to symbols in the host program or in +supporting libraries. References to process symbols can be "baked in" to code +as it is compiled by turning external references into pre-resolved integer +constants, however this ties the JIT'd code to the current process's virtual +memory layout (meaning that it can not be cached between runs) and makes +debugging lower level program representations difficult (as all external +references are opaque integer values). A bettor solution is to maintain symbolic +external references and let the jit-linker bind them for you at runtime. To +allow the JIT linker to find these external definitions their addresses must +be added to a JITDylib that the JIT'd definitions link against. + +Adding definitions for external symbols could be done using the absoluteSymbols +function: + + .. code-block:: c++ + + const DataLayout &DL = getDataLayout(); + MangleAndInterner Mangle(ES, DL); + + auto &JD = ES.getMainJITDylib(); + + JD.define( + absoluteSymbols({ + { Mangle("puts"), pointerToJITTargetAddress(&puts)}, + { Mangle("gets"), pointerToJITTargetAddress(&getS)} + })); + +Manually adding absolute symbols for a large or changing interface is cumbersome +however, so ORC provides an alternative to generate new definitions on demand: +*definition generators*. If a definition generator is attached to a JITDylib, +then any unsuccessful lookup on that JITDylib will fall back to calling the +definition generator, and the definition generator may choose to generate a new +definition for the missing symbols. Of particular use here is the +``DynamicLibrarySearchGenerator`` utility. This can be used to reflect the whole +exported symbol set of the process or a specific dynamic library, or a subset +of either of these determined by a predicate. + +For example, to load the whole interface of a runtime library: + + .. code-block:: c++ + + const DataLayout &DL = getDataLayout(); + auto &JD = ES.getMainJITDylib(); + + JD.setGenerator(DynamicLibrarySearchGenerator::Load("/path/to/lib" + DL.getGlobalPrefix())); + + // IR added to JD can now link against all symbols exported by the library + // at '/path/to/lib'. + CompileLayer.add(JD, loadModule(...)); + +Or, to expose a whitelisted set of symbols from the main process: + + .. code-block:: c++ + + const DataLayout &DL = getDataLayout(); + MangleAndInterner Mangle(ES, DL); + + auto &JD = ES.getMainJITDylib(); + + DenseSet Whitelist({ + Mangle("puts"), + Mangle("gets") + }); + + // Use GetForCurrentProcess with a predicate function that checks the + // whitelist. + JD.setGenerator( + DynamicLibrarySearchGenerator::GetForCurrentProcess( + DL.getGlobalPrefix(), + [&](const SymbolStringPtr &S) { return Whitelist.count(S); })); + + // IR added to JD can now link against any symbols exported by the process + // and contained in the whitelist. + CompileLayer.add(JD, loadModule(...)); Future Features =============== From c23619b0c90056f9bd63f9b5d79caf5bf63618e8 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Tue, 16 Jul 2019 21:41:43 +0000 Subject: [PATCH 295/451] [ORC][docs] Fix an RST error: the code-block directive needs a newline after it. llvm-svn: 366270 --- llvm/docs/ORCv2.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/docs/ORCv2.rst b/llvm/docs/ORCv2.rst index 2f37a2fbdad59..7423c041d40a5 100644 --- a/llvm/docs/ORCv2.rst +++ b/llvm/docs/ORCv2.rst @@ -452,6 +452,7 @@ ThreadSafeContexts can be explicitly constructed from a std::unique_ptr: .. code-block:: c++ + ThreadSafeContext TSCtx(llvm::make_unique()); ThreadSafeModules can be constructed from a pair of a std::unique_ptr From 21f2858dcf3a556f01f6ae151bf7638b70f01c02 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 16 Jul 2019 22:00:10 +0000 Subject: [PATCH 296/451] AMDGPU: Partially revert r366250 GCCBuiltin doesn't work for these, because they have a mangled type (although they arguably should not). llvm-svn: 366271 --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 4c67cdea4d580..1f835171386f7 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1249,15 +1249,12 @@ def int_amdgcn_ds_swizzle : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent, ImmArg<1>]>; -def int_amdgcn_ubfe : - GCCBuiltin<"__builtin_amdgcn_ubfe">, - Intrinsic<[llvm_anyint_ty], +def int_amdgcn_ubfe : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable] >; -def int_amdgcn_sbfe : GCCBuiltin<"__builtin_amdgcn_sbfe">, - Intrinsic<[llvm_anyint_ty], +def int_amdgcn_sbfe : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable] >; From 42bba4b852b1a63db4043798bba7d9fcea61cbaf Mon Sep 17 00:00:00 2001 From: Guanzhong Chen Date: Tue, 16 Jul 2019 22:00:45 +0000 Subject: [PATCH 297/451] [WebAssembly] Implement thread-local storage (local-exec model) Summary: Thread local variables are placed inside a `.tdata` segment. Their symbols are offsets from the start of the segment. The address of a thread local variable is computed as `__tls_base` + the offset from the start of the segment. `.tdata` segment is a passive segment and `memory.init` is used once per thread to initialize the thread local storage. `__tls_base` is a wasm global. Since each thread has its own wasm instance, it is effectively thread local. Currently, `__tls_base` must be initialized at thread startup, and so cannot be used with dynamic libraries. `__tls_base` is to be initialized with a new linker-synthesized function, `__wasm_init_tls`, which takes as an argument a block of memory to use as the storage for thread locals. It then initializes the block of memory and sets `__tls_base`. As `__wasm_init_tls` will handle the memory initialization, the memory does not have to be zeroed. To help allocating memory for thread-local storage, a new compiler intrinsic is introduced: `__builtin_wasm_tls_size()`. This instrinsic function returns the size of the thread-local storage for the current function. The expected usage is to run something like the following upon thread startup: __wasm_init_tls(malloc(__builtin_wasm_tls_size())); Reviewers: tlively, aheejin, kripken, sbc100 Subscribers: dschuff, jgravelle-google, hiraditya, sunfish, jfb, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D64537 llvm-svn: 366272 --- .../clang/Basic/BuiltinsWebAssembly.def | 3 + clang/lib/CodeGen/CGBuiltin.cpp | 5 ++ clang/test/CodeGen/builtins-wasm.c | 6 ++ lld/test/wasm/data-segments.ll | 20 +++-- lld/test/wasm/tls.ll | 81 ++++++++++++++++++ lld/wasm/Driver.cpp | 25 ++++++ lld/wasm/Symbols.cpp | 11 ++- lld/wasm/Symbols.h | 13 +++ lld/wasm/Writer.cpp | 81 +++++++++++++++++- llvm/include/llvm/BinaryFormat/Wasm.h | 2 + llvm/include/llvm/IR/IntrinsicsWebAssembly.td | 9 ++ llvm/include/llvm/MC/MCSectionWasm.h | 3 +- .../WebAssembly/WebAssemblyFastISel.cpp | 4 + .../WebAssembly/WebAssemblyISelDAGToDAG.cpp | 49 +++++++++++ .../WebAssembly/WebAssemblyMCInstLower.cpp | 8 +- .../WebAssembly/WebAssemblyTargetMachine.cpp | 23 +++-- .../WebAssembly/target-features-tls.ll | 32 +++---- llvm/test/CodeGen/WebAssembly/tls.ll | 85 ++++++++++++++++--- 18 files changed, 413 insertions(+), 47 deletions(-) create mode 100644 lld/test/wasm/tls.ll diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def index 57ebb27ab4695..63177f016ac77 100644 --- a/clang/include/clang/Basic/BuiltinsWebAssembly.def +++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def @@ -29,6 +29,9 @@ BUILTIN(__builtin_wasm_memory_grow, "zIiz", "n") TARGET_BUILTIN(__builtin_wasm_memory_init, "vIUiIUiv*UiUi", "", "bulk-memory") TARGET_BUILTIN(__builtin_wasm_data_drop, "vIUi", "", "bulk-memory") +// Thread-local storage +TARGET_BUILTIN(__builtin_wasm_tls_size, "z", "nc", "bulk-memory") + // Floating point min/max BUILTIN(__builtin_wasm_min_f32, "fff", "nc") BUILTIN(__builtin_wasm_max_f32, "fff", "nc") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index acaa81ae8a9a6..1658be5a88e02 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -13913,6 +13913,11 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_data_drop); return Builder.CreateCall(Callee, {Arg}); } + case WebAssembly::BI__builtin_wasm_tls_size: { + llvm::Type *ResultType = ConvertType(E->getType()); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType); + return Builder.CreateCall(Callee); + } case WebAssembly::BI__builtin_wasm_throw: { Value *Tag = EmitScalarExpr(E->getArg(0)); Value *Obj = EmitScalarExpr(E->getArg(1)); diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c index 4784d6ff86ebd..8a17fb39641b3 100644 --- a/clang/test/CodeGen/builtins-wasm.c +++ b/clang/test/CodeGen/builtins-wasm.c @@ -38,6 +38,12 @@ void data_drop() { // WEBASSEMBLY64: call void @llvm.wasm.data.drop(i32 3) } +__SIZE_TYPE__ tls_size() { + return __builtin_wasm_tls_size(); + // WEBASSEMBLY32: call i32 @llvm.wasm.tls.size.i32() + // WEBASSEMBLY64: call i64 @llvm.wasm.tls.size.i64() +} + void throw(void *obj) { return __builtin_wasm_throw(0, obj); // WEBASSEMBLY32: call void @llvm.wasm.throw(i32 0, i8* %{{.*}}) diff --git a/lld/test/wasm/data-segments.ll b/lld/test/wasm/data-segments.ll index a9a403f3c5f10..944895a0d39cc 100644 --- a/lld/test/wasm/data-segments.ll +++ b/lld/test/wasm/data-segments.ll @@ -4,11 +4,11 @@ ; atomics => active segments (TODO: error) ; RUN: wasm-ld -no-gc-sections --no-entry --shared-memory --max-memory=131072 %t.atomics.o -o %t.atomics.wasm -; RUN: obj2yaml %t.atomics.wasm | FileCheck %s --check-prefix ACTIVE +; RUN: obj2yaml %t.atomics.wasm | FileCheck %s --check-prefixes ACTIVE,ACTIVE-TLS ; atomics, active segments => active segments ; RUN: wasm-ld -no-gc-sections --no-entry --shared-memory --max-memory=131072 --active-segments %t.atomics.o -o %t.atomics.active.wasm -; RUN: obj2yaml %t.atomics.active.wasm | FileCheck %s --check-prefix ACTIVE +; RUN: obj2yaml %t.atomics.active.wasm | FileCheck %s --check-prefixes ACTIVE,ACTIVE-TLS ; atomics, passive segments => error ; RUN: not wasm-ld -no-gc-sections --no-entry --shared-memory --max-memory=131072 --passive-segments %t.atomics.o -o %t.atomics.passive.wasm 2>&1 | FileCheck %s --check-prefix ERROR @@ -27,15 +27,15 @@ ; atomics, bulk memory => active segments (TODO: passive) ; RUN: wasm-ld -no-gc-sections --no-entry --shared-memory --max-memory=131072 %t.atomics.bulk-mem.o -o %t.atomics.bulk-mem.wasm -; RUN: obj2yaml %t.atomics.bulk-mem.wasm | FileCheck %s --check-prefix ACTIVE +; RUN: obj2yaml %t.atomics.bulk-mem.wasm | FileCheck %s --check-prefixes ACTIVE,ACTIVE-TLS ; atomics, bulk memory, active segments => active segments ; RUN: wasm-ld -no-gc-sections --no-entry --shared-memory --max-memory=131072 --active-segments %t.atomics.bulk-mem.o -o %t.atomics.bulk-mem.active.wasm -; RUN: obj2yaml %t.atomics.bulk-mem.active.wasm | FileCheck %s --check-prefix ACTIVE +; RUN: obj2yaml %t.atomics.bulk-mem.active.wasm | FileCheck %s --check-prefixes ACTIVE,ACTIVE-TLS ; atomics, bulk memory, passive segments => passive segments ; RUN: wasm-ld -no-gc-sections --no-entry --shared-memory --max-memory=131072 --passive-segments %t.atomics.bulk-mem.o -o %t.atomics.bulk-mem.passive.wasm -; RUN: obj2yaml %t.atomics.bulk-mem.passive.wasm | FileCheck %s --check-prefix PASSIVE +; RUN: obj2yaml %t.atomics.bulk-mem.passive.wasm | FileCheck %s --check-prefixes PASSIVE,PASSIVE-TLS target triple = "wasm32-unknown-unknown" @@ -54,6 +54,9 @@ target triple = "wasm32-unknown-unknown" ; ACTIVE-NEXT: - Index: 0 ; ACTIVE-NEXT: Locals: [] ; ACTIVE-NEXT: Body: 0B +; ACTIVE-TLS-NEXT: - Index: 1 +; ACTIVE-TLS-NEXT: Locals: [] +; ACTIVE-TLS-NEXT: Body: 0B ; ACTIVE-NEXT: - Type: DATA ; ACTIVE-NEXT: Segments: ; ACTIVE-NEXT: - SectionOffset: 7 @@ -80,6 +83,8 @@ target triple = "wasm32-unknown-unknown" ; ACTIVE-NEXT: FunctionNames: ; ACTIVE-NEXT: - Index: 0 ; ACTIVE-NEXT: Name: __wasm_call_ctors +; ACTIVE-TLS-NEXT: - Index: 1 +; ACTIVE-TLS-NEXT: Name: __wasm_init_tls ; PASSIVE-LABEL: - Type: CODE ; PASSIVE-NEXT: Functions: @@ -89,6 +94,9 @@ target triple = "wasm32-unknown-unknown" ; PASSIVE-NEXT: - Index: 1 ; PASSIVE-NEXT: Locals: [] ; PASSIVE-NEXT: Body: 41800841004114FC080000FC090041940841004190CE00FC080100FC090141A4D6004100410DFC080200FC09020B +; PASSIVE-TLS-NEXT: - Index: 2 +; PASSIVE-TLS-NEXT: Locals: [] +; PASSIVE-TLS-NEXT: Body: 0B ; PASSIVE-NEXT: - Type: DATA ; PASSIVE-NEXT: Segments: ; PASSIVE-NEXT: - SectionOffset: 3 @@ -108,3 +116,5 @@ target triple = "wasm32-unknown-unknown" ; PASSIVE-NEXT: Name: __wasm_call_ctors ; PASSIVE-NEXT: - Index: 1 ; PASSIVE-NEXT: Name: __wasm_init_memory +; PASSIVE-TLS-NEXT: - Index: 2 +; PASSIVE-TLS-NEXT: Name: __wasm_init_tls diff --git a/lld/test/wasm/tls.ll b/lld/test/wasm/tls.ll new file mode 100644 index 0000000000000..b570d46756875 --- /dev/null +++ b/lld/test/wasm/tls.ll @@ -0,0 +1,81 @@ +; RUN: llc -mattr=+bulk-memory -filetype=obj %s -o %t.o + +target triple = "wasm32-unknown-unknown" + +@tls1 = thread_local(localexec) global i32 1, align 4 +@no_tls = global i32 0, align 4 +@tls2 = thread_local(localexec) global i32 1, align 4 + +define i32* @tls1_addr() { + ret i32* @tls1 +} + +define i32* @tls2_addr() { + ret i32* @tls2 +} + +; RUN: wasm-ld -no-gc-sections --shared-memory --max-memory=131072 --no-entry -o %t.wasm %t.o +; RUN: obj2yaml %t.wasm | FileCheck %s + +; RUN: wasm-ld -no-gc-sections --shared-memory --max-memory=131072 --no-merge-data-segments --no-entry -o %t.wasm %t.o +; RUN: obj2yaml %t.wasm | FileCheck %s + +; CHECK: - Type: GLOBAL +; CHECK-NEXT: Globals: +; CHECK-NEXT: - Index: 0 +; CHECK-NEXT: Type: I32 +; CHECK-NEXT: Mutable: true +; CHECK-NEXT: InitExpr: +; CHECK-NEXT: Opcode: I32_CONST +; CHECK-NEXT: Value: 66576 +; CHECK-NEXT: - Index: 1 +; CHECK-NEXT: Type: I32 +; CHECK-NEXT: Mutable: true +; CHECK-NEXT: InitExpr: +; CHECK-NEXT: Opcode: I32_CONST +; CHECK-NEXT: Value: 0 +; CHECK-NEXT: - Index: 2 +; CHECK-NEXT: Type: I32 +; CHECK-NEXT: Mutable: false +; CHECK-NEXT: InitExpr: +; CHECK-NEXT: Opcode: I32_CONST +; CHECK-NEXT: Value: 8 + + +; CHECK: - Type: CODE +; CHECK-NEXT: Functions: +; CHECK-NEXT: - Index: 0 +; CHECK-NEXT: Locals: [] +; CHECK-NEXT: Body: 0B +; CHECK-NEXT: - Index: 1 +; CHECK-NEXT: Locals: [] +; CHECK-NEXT: Body: 20002401200041004108FC0800000B + +; Expected body of __wasm_init_tls: +; local.get 0 +; global.set 1 +; local.get 0 +; i32.const 0 +; i32.const 8 +; memory.init 0, 0 +; end + +; CHECK-NEXT: - Index: 2 +; CHECK-NEXT: Locals: [] +; CHECK-NEXT: Body: 2381808080004180808080006A0B + +; Expected body of tls1_addr: +; global.get 1 +; i32.const 0 +; i32.add +; end + +; CHECK-NEXT: - Index: 3 +; CHECK-NEXT: Locals: [] +; CHECK-NEXT: Body: 2381808080004184808080006A0B + +; Expected body of tls1_addr: +; global.get 1 +; i32.const 4 +; i32.add +; end diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp index 3de69954e0a1a..900cd051dcfb8 100644 --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -454,6 +454,7 @@ createUndefinedGlobal(StringRef name, llvm::wasm::WasmGlobalType *type) { // Create ABI-defined synthetic symbols static void createSyntheticSymbols() { static WasmSignature nullSignature = {{}, {}}; + static WasmSignature i32ArgSignature = {{}, {ValType::I32}}; static llvm::wasm::WasmGlobalType globalTypeI32 = {WASM_TYPE_I32, false}; static llvm::wasm::WasmGlobalType mutableGlobalTypeI32 = {WASM_TYPE_I32, true}; @@ -516,6 +517,30 @@ static void createSyntheticSymbols() { WasmSym::heapBase = symtab->addOptionalDataSymbol("__heap_base"); } + if (config->sharedMemory && !config->shared) { + llvm::wasm::WasmGlobal tlsBaseGlobal; + tlsBaseGlobal.Type = {WASM_TYPE_I32, true}; + tlsBaseGlobal.InitExpr.Value.Int32 = 0; + tlsBaseGlobal.InitExpr.Opcode = WASM_OPCODE_I32_CONST; + tlsBaseGlobal.SymbolName = "__tls_base"; + WasmSym::tlsBase = + symtab->addSyntheticGlobal("__tls_base", WASM_SYMBOL_VISIBILITY_HIDDEN, + make(tlsBaseGlobal, nullptr)); + + llvm::wasm::WasmGlobal tlsSizeGlobal; + tlsSizeGlobal.Type = {WASM_TYPE_I32, false}; + tlsSizeGlobal.InitExpr.Value.Int32 = 0; + tlsSizeGlobal.InitExpr.Opcode = WASM_OPCODE_I32_CONST; + tlsSizeGlobal.SymbolName = "__tls_size"; + WasmSym::tlsSize = + symtab->addSyntheticGlobal("__tls_size", WASM_SYMBOL_VISIBILITY_HIDDEN, + make(tlsSizeGlobal, nullptr)); + + WasmSym::initTLS = symtab->addSyntheticFunction( + "__wasm_init_tls", WASM_SYMBOL_VISIBILITY_HIDDEN, + make(i32ArgSignature, "__wasm_init_tls")); + } + WasmSym::dsoHandle = symtab->addSyntheticDataSymbol( "__dso_handle", WASM_SYMBOL_VISIBILITY_HIDDEN); } diff --git a/lld/wasm/Symbols.cpp b/lld/wasm/Symbols.cpp index 61868f37577fb..7d8d532c8ce16 100644 --- a/lld/wasm/Symbols.cpp +++ b/lld/wasm/Symbols.cpp @@ -27,11 +27,14 @@ using namespace lld::wasm; DefinedFunction *WasmSym::callCtors; DefinedFunction *WasmSym::initMemory; DefinedFunction *WasmSym::applyRelocs; +DefinedFunction *WasmSym::initTLS; DefinedData *WasmSym::dsoHandle; DefinedData *WasmSym::dataEnd; DefinedData *WasmSym::globalBase; DefinedData *WasmSym::heapBase; GlobalSymbol *WasmSym::stackPointer; +GlobalSymbol *WasmSym::tlsBase; +GlobalSymbol *WasmSym::tlsSize; UndefinedGlobal *WasmSym::tableBase; UndefinedGlobal *WasmSym::memoryBase; @@ -200,8 +203,14 @@ DefinedFunction::DefinedFunction(StringRef name, uint32_t flags, InputFile *f, uint32_t DefinedData::getVirtualAddress() const { LLVM_DEBUG(dbgs() << "getVirtualAddress: " << getName() << "\n"); - if (segment) + if (segment) { + // For thread local data, the symbol location is relative to the start of + // the .tdata section, since they are used as offsets from __tls_base. + // Hence, we do not add in segment->outputSeg->startVA. + if (segment->outputSeg->name == ".tdata") + return segment->outputSegmentOffset + offset; return segment->outputSeg->startVA + segment->outputSegmentOffset + offset; + } return offset; } diff --git a/lld/wasm/Symbols.h b/lld/wasm/Symbols.h index 499a265be739f..f4816aae70551 100644 --- a/lld/wasm/Symbols.h +++ b/lld/wasm/Symbols.h @@ -426,6 +426,15 @@ struct WasmSym { // linear memory. static GlobalSymbol *stackPointer; + // __tls_base + // Global that holds the address of the base of the current thread's + // TLS block. + static GlobalSymbol *tlsBase; + + // __tls_size + // Symbol whose value is the size of the TLS block. + static GlobalSymbol *tlsSize; + // __data_end // Symbol marking the end of the data and bss. static DefinedData *dataEnd; @@ -448,6 +457,10 @@ struct WasmSym { // Function that applies relocations to data segment post-instantiation. static DefinedFunction *applyRelocs; + // __wasm_init_tls + // Function that allocates thread-local storage and initializes it. + static DefinedFunction *initTLS; + // __dso_handle // Symbol used in calls to __cxa_atexit to determine current DLL static DefinedData *dsoHandle; diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp index 77a29a2d99ef4..23a63edee7cca 100644 --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -57,6 +57,7 @@ class Writer { void createInitMemoryFunction(); void createApplyRelocationsFunction(); void createCallCtorsFunction(); + void createInitTLSFunction(); void assignIndexes(); void populateSymtab(); @@ -242,6 +243,11 @@ void Writer::layoutMemory() { log(formatv("mem: {0,-15} offset={1,-8} size={2,-8} align={3}", seg->name, memoryPtr, seg->size, seg->alignment)); memoryPtr += seg->size; + + if (WasmSym::tlsSize && seg->name == ".tdata") { + auto *tlsSize = cast(WasmSym::tlsSize); + tlsSize->global->global.InitExpr.Value.Int32 = seg->size; + } } // TODO: Add .bss space here. @@ -353,6 +359,7 @@ void Writer::populateTargetFeatures() { StringMap used; StringMap required; StringMap disallowed; + bool tlsUsed = false; // Only infer used features if user did not specify features bool inferFeatures = !config->features.hasValue(); @@ -385,6 +392,14 @@ void Writer::populateTargetFeatures() { std::to_string(feature.Prefix)); } } + + for (InputSegment *segment : file->segments) { + if (!segment->live) + continue; + StringRef name = segment->getName(); + if (name.startswith(".tdata") || name.startswith(".tbss")) + tlsUsed = true; + } } if (inferFeatures) @@ -411,6 +426,10 @@ void Writer::populateTargetFeatures() { error("'bulk-memory' feature must be used in order to emit passive " "segments"); + if (!used.count("bulk-memory") && tlsUsed) + error("'bulk-memory' feature must be used in order to use thread-local " + "storage"); + // Validate that used features are allowed in output if (!inferFeatures) { for (auto &feature : used.keys()) { @@ -492,8 +511,8 @@ void Writer::calculateExports() { // implement in all major browsers. // See: https://github.com/WebAssembly/mutable-global if (g->getGlobalType()->Mutable) { - // Only the __stack_pointer should ever be create as mutable. - assert(g == WasmSym::stackPointer); + // Only __stack_pointer and __tls_base should ever be create as mutable. + assert(g == WasmSym::stackPointer || g == WasmSym::tlsBase); continue; } export_ = {name, WASM_EXTERNAL_GLOBAL, g->getGlobalIndex()}; @@ -602,6 +621,11 @@ static StringRef getOutputDataSegmentName(StringRef name) { // we only have a single __memory_base to use as our base address. if (config->isPic) return ".data"; + // We only support one thread-local segment, so we must merge the segments + // despite --no-merge-data-segments. + // We also need to merge .tbss into .tdata so they share the same offsets. + if (name.startswith(".tdata") || name.startswith(".tbss")) + return ".tdata"; if (!config->mergeDataSegments) return name; if (name.startswith(".text.")) @@ -625,7 +649,7 @@ void Writer::createOutputSegments() { if (s == nullptr) { LLVM_DEBUG(dbgs() << "new segment: " << name << "\n"); s = make(name, segments.size()); - if (config->passiveSegments) + if (config->passiveSegments || name == ".tdata") s->initFlags = WASM_SEGMENT_IS_PASSIVE; segments.push_back(s); } @@ -655,7 +679,7 @@ void Writer::createInitMemoryFunction() { // initialize passive data segments for (const OutputSegment *s : segments) { - if (s->initFlags & WASM_SEGMENT_IS_PASSIVE) { + if (s->initFlags & WASM_SEGMENT_IS_PASSIVE && s->name != ".tdata") { // destination address writeU8(os, WASM_OPCODE_I32_CONST, "i32.const"); writeSleb128(os, s->startVA, "destination address"); @@ -737,6 +761,49 @@ void Writer::createCallCtorsFunction() { createFunction(WasmSym::callCtors, bodyContent); } +void Writer::createInitTLSFunction() { + if (!WasmSym::initTLS->isLive()) + return; + + std::string bodyContent; + { + raw_string_ostream os(bodyContent); + + OutputSegment *tlsSeg = nullptr; + for (auto *seg : segments) { + if (seg->name == ".tdata") + tlsSeg = seg; + break; + } + + writeUleb128(os, 0, "num locals"); + if (tlsSeg) { + writeU8(os, WASM_OPCODE_LOCAL_GET, "local.get"); + writeUleb128(os, 0, "local index"); + + writeU8(os, WASM_OPCODE_GLOBAL_SET, "global.set"); + writeUleb128(os, WasmSym::tlsBase->getGlobalIndex(), "global index"); + + writeU8(os, WASM_OPCODE_LOCAL_GET, "local.get"); + writeUleb128(os, 0, "local index"); + + writeU8(os, WASM_OPCODE_I32_CONST, "i32.const"); + writeSleb128(os, 0, "segment offset"); + + writeU8(os, WASM_OPCODE_I32_CONST, "i32.const"); + writeSleb128(os, tlsSeg->size, "memory region size"); + + writeU8(os, WASM_OPCODE_MISC_PREFIX, "bulk-memory prefix"); + writeUleb128(os, WASM_OPCODE_MEMORY_INIT, "MEMORY.INIT"); + writeUleb128(os, tlsSeg->index, "segment index immediate"); + writeU8(os, 0, "memory index immediate"); + } + writeU8(os, WASM_OPCODE_END, "end function"); + } + + createFunction(WasmSym::initTLS, bodyContent); +} + // Populate InitFunctions vector with init functions from all input objects. // This is then used either when creating the output linking section or to // synthesize the "__wasm_call_ctors" function. @@ -829,6 +896,12 @@ void Writer::run() { createCallCtorsFunction(); } + if (config->sharedMemory && !config->shared) + createInitTLSFunction(); + + if (errorCount()) + return; + log("-- calculateTypes"); calculateTypes(); log("-- calculateExports"); diff --git a/llvm/include/llvm/BinaryFormat/Wasm.h b/llvm/include/llvm/BinaryFormat/Wasm.h index 4f6c24bbc68df..0f22bfe610c6c 100644 --- a/llvm/include/llvm/BinaryFormat/Wasm.h +++ b/llvm/include/llvm/BinaryFormat/Wasm.h @@ -242,7 +242,9 @@ enum : unsigned { enum : unsigned { WASM_OPCODE_END = 0x0b, WASM_OPCODE_CALL = 0x10, + WASM_OPCODE_LOCAL_GET = 0x20, WASM_OPCODE_GLOBAL_GET = 0x23, + WASM_OPCODE_GLOBAL_SET = 0x24, WASM_OPCODE_I32_STORE = 0x36, WASM_OPCODE_I32_CONST = 0x41, WASM_OPCODE_I64_CONST = 0x42, diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td index 1731995b28734..1b892727547dc 100644 --- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td +++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td @@ -124,4 +124,13 @@ def int_wasm_data_drop : [llvm_i32_ty], [IntrNoDuplicate, IntrHasSideEffects, ImmArg<0>]>; +//===----------------------------------------------------------------------===// +// Thread-local storage intrinsics +//===----------------------------------------------------------------------===// + +def int_wasm_tls_size : + Intrinsic<[llvm_anyint_ty], + [], + [IntrNoMem, IntrSpeculatable]>; + } // TargetPrefix = "wasm" diff --git a/llvm/include/llvm/MC/MCSectionWasm.h b/llvm/include/llvm/MC/MCSectionWasm.h index 1adc812649232..2941a40f3b8c6 100644 --- a/llvm/include/llvm/MC/MCSectionWasm.h +++ b/llvm/include/llvm/MC/MCSectionWasm.h @@ -66,7 +66,8 @@ class MCSectionWasm final : public MCSection { bool isVirtualSection() const override; bool isWasmData() const { - return Kind.isGlobalWriteableData() || Kind.isReadOnly(); + return Kind.isGlobalWriteableData() || Kind.isReadOnly() || + Kind.isThreadLocal(); } bool isUnique() const { return UniqueID != ~0U; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp index 312b203859d51..2552e91508334 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -233,6 +233,8 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) { return false; if (Addr.getGlobalValue()) return false; + if (GV->isThreadLocal()) + return false; Addr.setGlobalValue(GV); return true; } @@ -614,6 +616,8 @@ unsigned WebAssemblyFastISel::fastMaterializeConstant(const Constant *C) { if (const GlobalValue *GV = dyn_cast(C)) { if (TLI.isPositionIndependent()) return 0; + if (GV->isThreadLocal()) + return 0; unsigned ResultReg = createResultReg(Subtarget->hasAddr64() ? &WebAssembly::I64RegClass : &WebAssembly::I32RegClass); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp index bd699d92f76c7..1efbb3b067b85 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp @@ -15,6 +15,7 @@ #include "WebAssembly.h" #include "WebAssemblyTargetMachine.h" #include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" // To access function attributes. #include "llvm/Support/Debug.h" #include "llvm/Support/KnownBits.h" @@ -171,6 +172,54 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) { } } + case ISD::GlobalTLSAddress: { + const auto *GA = cast(Node); + + if (!MF.getSubtarget().hasBulkMemory()) + report_fatal_error("cannot use thread-local storage without bulk memory", + false); + + if (GA->getGlobal()->getThreadLocalMode() != + GlobalValue::LocalExecTLSModel) { + report_fatal_error("only -ftls-model=local-exec is supported for now", + false); + } + + MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout()); + assert(PtrVT == MVT::i32 && "only wasm32 is supported for now"); + + SDValue TLSBaseSym = CurDAG->getTargetExternalSymbol("__tls_base", PtrVT); + SDValue TLSOffsetSym = CurDAG->getTargetGlobalAddress( + GA->getGlobal(), DL, PtrVT, GA->getOffset(), 0); + + MachineSDNode *TLSBase = CurDAG->getMachineNode(WebAssembly::GLOBAL_GET_I32, + DL, MVT::i32, TLSBaseSym); + MachineSDNode *TLSOffset = CurDAG->getMachineNode( + WebAssembly::CONST_I32, DL, MVT::i32, TLSOffsetSym); + MachineSDNode *TLSAddress = + CurDAG->getMachineNode(WebAssembly::ADD_I32, DL, MVT::i32, + SDValue(TLSBase, 0), SDValue(TLSOffset, 0)); + ReplaceNode(Node, TLSAddress); + return; + } + + case ISD::INTRINSIC_WO_CHAIN: { + unsigned IntNo = cast(Node->getOperand(0))->getZExtValue(); + switch (IntNo) { + case Intrinsic::wasm_tls_size: { + MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout()); + assert(PtrVT == MVT::i32 && "only wasm32 is supported for now"); + + MachineSDNode *TLSSize = CurDAG->getMachineNode( + WebAssembly::GLOBAL_GET_I32, DL, PtrVT, + CurDAG->getTargetExternalSymbol("__tls_size", MVT::i32)); + ReplaceNode(Node, TLSSize); + return; + } + } + break; + } + default: break; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp index 611f05f949691..288b991ae2c54 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp @@ -77,9 +77,11 @@ MCSymbol *WebAssemblyMCInstLower::GetExternalSymbolSymbol( // functions. It's OK to hardcode knowledge of specific symbols here; this // method is precisely there for fetching the signatures of known // Clang-provided symbols. - if (strcmp(Name, "__stack_pointer") == 0 || - strcmp(Name, "__memory_base") == 0 || strcmp(Name, "__table_base") == 0) { - bool Mutable = strcmp(Name, "__stack_pointer") == 0; + if (strcmp(Name, "__stack_pointer") == 0 || strcmp(Name, "__tls_base") == 0 || + strcmp(Name, "__memory_base") == 0 || strcmp(Name, "__table_base") == 0 || + strcmp(Name, "__tls_size") == 0) { + bool Mutable = + strcmp(Name, "__stack_pointer") == 0 || strcmp(Name, "__tls_base") == 0; WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL); WasmSym->setGlobalType(wasm::WasmGlobalType{ uint8_t(Subtarget.hasAddr64() ? wasm::WASM_TYPE_I64 diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index a75df34979bd9..7e65368e671a5 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -186,13 +186,21 @@ class CoalesceFeaturesAndStripAtomics final : public ModulePass { for (auto &F : M) replaceFeatures(F, FeatureStr); - bool Stripped = false; - if (!Features[WebAssembly::FeatureAtomics]) { - Stripped |= stripAtomics(M); - Stripped |= stripThreadLocals(M); - } + bool StrippedAtomics = false; + bool StrippedTLS = false; + + if (!Features[WebAssembly::FeatureAtomics]) + StrippedAtomics = stripAtomics(M); + + if (!Features[WebAssembly::FeatureBulkMemory]) + StrippedTLS = stripThreadLocals(M); + + if (StrippedAtomics && !StrippedTLS) + stripThreadLocals(M); + else if (StrippedTLS && !StrippedAtomics) + stripAtomics(M); - recordFeatures(M, Features, Stripped); + recordFeatures(M, Features, StrippedAtomics || StrippedTLS); // Conservatively assume we have made some change return true; @@ -271,7 +279,8 @@ class CoalesceFeaturesAndStripAtomics final : public ModulePass { // "atomics" is special: code compiled without atomics may have had its // atomics lowered to nonatomic operations. In that case, atomics is // disallowed to prevent unsafe linking with atomics-enabled objects. - assert(!Features[WebAssembly::FeatureAtomics]); + assert(!Features[WebAssembly::FeatureAtomics] || + !Features[WebAssembly::FeatureBulkMemory]); M.addModuleFlag(Module::ModFlagBehavior::Error, MDKey, wasm::WASM_FEATURE_PREFIX_DISALLOWED); } else if (Features[KV.Value]) { diff --git a/llvm/test/CodeGen/WebAssembly/target-features-tls.ll b/llvm/test/CodeGen/WebAssembly/target-features-tls.ll index a5c08f850e228..c25b9e59b1b22 100644 --- a/llvm/test/CodeGen/WebAssembly/target-features-tls.ll +++ b/llvm/test/CodeGen/WebAssembly/target-features-tls.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mattr=-atomics | FileCheck %s --check-prefixes CHECK,NO-ATOMICS -; RUN: llc < %s -mattr=+atomics | FileCheck %s --check-prefixes CHECK,ATOMICS +; RUN: llc < %s -mattr=-bulk-memory | FileCheck %s --check-prefixes NO-BULK-MEM +; RUN: llc < %s -mattr=+bulk-memory | FileCheck %s --check-prefixes BULK-MEM ; Test that the target features section contains -atomics or +atomics ; for modules that have thread local storage in their source. @@ -9,18 +9,18 @@ target triple = "wasm32-unknown-unknown" @foo = internal thread_local global i32 0 -; CHECK-LABEL: .custom_section.target_features,"",@ +; -bulk-memory +; NO-BULK-MEM-LABEL: .custom_section.target_features,"",@ +; NO-BULK-MEM-NEXT: .int8 1 +; NO-BULK-MEM-NEXT: .int8 45 +; NO-BULK-MEM-NEXT: .int8 7 +; NO-BULK-MEM-NEXT: .ascii "atomics" +; NO-BULK-MEM-NEXT: .bss.foo,"",@ -; -atomics -; NO-ATOMICS-NEXT: .int8 1 -; NO-ATOMICS-NEXT: .int8 45 -; NO-ATOMICS-NEXT: .int8 7 -; NO-ATOMICS-NEXT: .ascii "atomics" -; NO-ATOMICS-NEXT: .bss.foo,"",@ - -; +atomics -; ATOMICS-NEXT: .int8 1 -; ATOMICS-NEXT: .int8 43 -; ATOMICS-NEXT: .int8 7 -; ATOMICS-NEXT: .ascii "atomics" -; ATOMICS-NEXT: .tbss.foo,"",@ +; +bulk-memory +; BULK-MEM-LABEL: .custom_section.target_features,"",@ +; BULK-MEM-NEXT: .int8 1 +; BULK-MEM-NEXT: .int8 43 +; BULK-MEM-NEXT: .int8 11 +; BULK-MEM-NEXT: .ascii "bulk-memory" +; BULK-MEM-NEXT: .tbss.foo,"",@ diff --git a/llvm/test/CodeGen/WebAssembly/tls.ll b/llvm/test/CodeGen/WebAssembly/tls.ll index 21e84f9fa9799..02979a28af99b 100644 --- a/llvm/test/CodeGen/WebAssembly/tls.ll +++ b/llvm/test/CodeGen/WebAssembly/tls.ll @@ -1,17 +1,82 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck --check-prefix=SINGLE %s +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory | FileCheck %s --check-prefixes=CHECK,TLS +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory -fast-isel | FileCheck %s --check-prefixes=CHECK,TLS +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=-bulk-memory | FileCheck %s --check-prefixes=CHECK,NO-TLS target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" -; SINGLE-LABEL: address_of_tls: +; CHECK-LABEL: address_of_tls: +; CHECK-NEXT: .functype address_of_tls () -> (i32) define i32 @address_of_tls() { - ; SINGLE: i32.const $push0=, tls - ; SINGLE-NEXT: return $pop0 + ; TLS-DAG: global.get __tls_base + ; TLS-DAG: i32.const tls + ; TLS-NEXT: i32.add + ; TLS-NEXT: return + + ; NO-TLS-NEXT: i32.const tls + ; NO-TLS-NEXT: return ret i32 ptrtoint(i32* @tls to i32) } -; SINGLE: .type tls,@object -; SINGLE-NEXT: .section .bss.tls,"",@ -; SINGLE-NEXT: .p2align 2 -; SINGLE-NEXT: tls: -; SINGLE-NEXT: .int32 0 -@tls = internal thread_local global i32 0 +; CHECK-LABEL: ptr_to_tls: +; CHECK-NEXT: .functype ptr_to_tls () -> (i32) +define i32* @ptr_to_tls() { + ; TLS-DAG: global.get __tls_base + ; TLS-DAG: i32.const tls + ; TLS-NEXT: i32.add + ; TLS-NEXT: return + + ; NO-TLS-NEXT: i32.const tls + ; NO-TLS-NEXT: return + ret i32* @tls +} + +; CHECK-LABEL: tls_load: +; CHECK-NEXT: .functype tls_load () -> (i32) +define i32 @tls_load() { + ; TLS-DAG: global.get __tls_base + ; TLS-DAG: i32.const tls + ; TLS-NEXT: i32.add + ; TLS-NEXT: i32.load 0 + ; TLS-NEXT: return + + ; NO-TLS-NEXT: i32.const 0 + ; NO-TLS-NEXT: i32.load tls + ; NO-TLS-NEXT: return + %tmp = load i32, i32* @tls, align 4 + ret i32 %tmp +} + +; CHECK-LABEL: tls_store: +; CHECK-NEXT: .functype tls_store (i32) -> () +define void @tls_store(i32 %x) { + ; TLS-DAG: global.get __tls_base + ; TLS-DAG: i32.const tls + ; TLS-NEXT: i32.add + ; TLS-NEXT: i32.store 0 + ; TLS-NEXT: return + + ; NO-TLS-NEXT: i32.const 0 + ; NO-TLS-NEXT: i32.store tls + ; NO-TLS-NEXT: return + store i32 %x, i32* @tls, align 4 + ret void +} + +; CHECK-LABEL: tls_size: +; CHECK-NEXT: .functype tls_size () -> (i32) +define i32 @tls_size() { +; CHECK-NEXT: global.get __tls_size +; CHECK-NEXT: return + %1 = call i32 @llvm.wasm.tls.size.i32() + ret i32 %1 +} + +; CHECK: .type tls,@object +; TLS-NEXT: .section .tbss.tls,"",@ +; NO-TLS-NEXT: .section .bss.tls,"",@ +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: tls: +; CHECK-NEXT: .int32 0 +@tls = internal thread_local(localexec) global i32 0 + +declare i32 @llvm.wasm.tls.size.i32() From fa575839225a0b04ed0ba923e8eacdf6d369d7fe Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Tue, 16 Jul 2019 22:01:30 +0000 Subject: [PATCH 298/451] Add REQUIRES: x86 to safeseh-no.s test for x86 llvm-svn: 366273 --- lld/test/COFF/safeseh-no.s | 1 + 1 file changed, 1 insertion(+) diff --git a/lld/test/COFF/safeseh-no.s b/lld/test/COFF/safeseh-no.s index 2a301a3ba9b83..70d6f56e88803 100644 --- a/lld/test/COFF/safeseh-no.s +++ b/lld/test/COFF/safeseh-no.s @@ -1,3 +1,4 @@ +# REQUIRES: x86 # RUN: llvm-mc -triple i686-windows-msvc %s -filetype=obj -o %t.obj # RUN: not lld-link %t.obj -safeseh -out:%t.exe -entry:main 2>&1 | FileCheck %s --check-prefix=ERROR # safe seh should be on by default. From fe66fdb8f3076a5146c274bba1258b402cf4e726 Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Tue, 16 Jul 2019 22:10:16 +0000 Subject: [PATCH 299/451] [TableGen] Add "getOperandType" to get operand types from opcode/opidx The InstrInfoEmitter outputs an enum called "OperandType" which gives numerical IDs to each operand type. This patch makes use of this enum to define a function called "getOperandType", which allows looking up the type of an operand given its opcode and operand index. Patch by Nicolas Guillemot. Thanks! Differential Revision: https://reviews.llvm.org/D63320 llvm-svn: 366274 --- llvm/test/TableGen/get-operand-type.td | 40 ++++++++++++++++ llvm/utils/TableGen/InstrInfoEmitter.cpp | 61 ++++++++++++++++++++++-- 2 files changed, 97 insertions(+), 4 deletions(-) create mode 100644 llvm/test/TableGen/get-operand-type.td diff --git a/llvm/test/TableGen/get-operand-type.td b/llvm/test/TableGen/get-operand-type.td new file mode 100644 index 0000000000000..5be2c777c8ae9 --- /dev/null +++ b/llvm/test/TableGen/get-operand-type.td @@ -0,0 +1,40 @@ +// RUN: llvm-tblgen -gen-instr-info -I %p/../../include %s | FileCheck %s + +// Check that getOperandType has the expected info in it + +include "llvm/Target/Target.td" + +def archInstrInfo : InstrInfo { } + +def arch : Target { + let InstructionSet = archInstrInfo; +} + +def Reg : Register<"reg">; +def RegClass : RegisterClass<"foo", [i32], 0, (add Reg)>; + +def OpA : Operand; +def OpB : Operand; + +def InstA : Instruction { + let Size = 1; + let OutOperandList = (outs OpA:$a); + let InOperandList = (ins OpB:$b, i32imm:$c); + field bits<8> Inst; + field bits<8> SoftFail = 0; + let Namespace = "MyNamespace"; +} + +def InstB : Instruction { + let Size = 1; + let OutOperandList = (outs i32imm:$d); + let InOperandList = (ins unknown:$x); + field bits<8> Inst; + field bits<8> SoftFail = 0; + let Namespace = "MyNamespace"; +} + +// CHECK: #ifdef GET_INSTRINFO_OPERAND_TYPE +// CHECK: { OpTypes::OpA, OpTypes::OpB, OpTypes::i32imm, } +// CHECK-NEXT: { OpTypes::i32imm, -1, } +// CHECK: #endif //GET_INSTRINFO_OPERAND_TYPE diff --git a/llvm/utils/TableGen/InstrInfoEmitter.cpp b/llvm/utils/TableGen/InstrInfoEmitter.cpp index a4d66bb871cc6..d92585685e145 100644 --- a/llvm/utils/TableGen/InstrInfoEmitter.cpp +++ b/llvm/utils/TableGen/InstrInfoEmitter.cpp @@ -76,7 +76,9 @@ class InstrInfoEmitter { std::map, unsigned> &EL, const OperandInfoMapTy &OpInfo, raw_ostream &OS); - void emitOperandTypesEnum(raw_ostream &OS, const CodeGenTarget &Target); + void emitOperandTypeMappings( + raw_ostream &OS, const CodeGenTarget &Target, + ArrayRef NumberedInstructions); void initOperandMapData( ArrayRef NumberedInstructions, StringRef Namespace, @@ -324,8 +326,9 @@ void InstrInfoEmitter::emitOperandNameMappings(raw_ostream &OS, /// Generate an enum for all the operand types for this target, under the /// llvm::TargetNamespace::OpTypes namespace. /// Operand types are all definitions derived of the Operand Target.td class. -void InstrInfoEmitter::emitOperandTypesEnum(raw_ostream &OS, - const CodeGenTarget &Target) { +void InstrInfoEmitter::emitOperandTypeMappings( + raw_ostream &OS, const CodeGenTarget &Target, + ArrayRef NumberedInstructions) { StringRef Namespace = Target.getInstNamespace(); std::vector Operands = Records.getAllDerivedDefinitions("Operand"); @@ -349,6 +352,56 @@ void InstrInfoEmitter::emitOperandTypesEnum(raw_ostream &OS, OS << "} // end namespace " << Namespace << "\n"; OS << "} // end namespace llvm\n"; OS << "#endif // GET_INSTRINFO_OPERAND_TYPES_ENUM\n\n"; + + OS << "#ifdef GET_INSTRINFO_OPERAND_TYPE\n"; + OS << "#undef GET_INSTRINFO_OPERAND_TYPE\n"; + OS << "namespace llvm {\n"; + OS << "namespace " << Namespace << " {\n"; + OS << "LLVM_READONLY\n"; + OS << "int getOperandType(uint16_t Opcode, uint16_t OpIdx) {\n"; + if (!NumberedInstructions.empty()) { + OS << " static const std::initializer_list OpcodeOperandTypes[] = " + "{\n"; + for (const CodeGenInstruction *Inst : NumberedInstructions) { + OS << " { "; + for (const auto &Op : Inst->Operands) { + // Handle aggregate operands and normal operands the same way by + // expanding either case into a list of operands for this op. + std::vector OperandList; + + const DagInit *MIOI = Op.MIOperandInfo; + if (!MIOI || MIOI->getNumArgs() == 0) { + // Single, anonymous, operand. + OperandList.push_back(Op); + } else { + for (unsigned j = 0, e = Op.MINumOperands; j != e; ++j) { + OperandList.push_back(Op); + + auto *OpR = cast(MIOI->getArg(j))->getDef(); + OperandList.back().Rec = OpR; + } + } + + for (unsigned j = 0, e = OperandList.size(); j != e; ++j) { + Record *OpR = OperandList[j].Rec; + if (OpR->isSubClassOf("Operand") && !OpR->isAnonymous()) + OS << "OpTypes::" << OpR->getName(); + else + OS << -1; + OS << ", "; + } + } + OS << "},\n"; + } + OS << " };\n"; + OS << " return OpcodeOperandTypes[Opcode].begin()[OpIdx];\n"; + } else { + OS << " llvm_unreachable(\"No instructions defined\");\n"; + } + OS << "}\n"; + OS << "} // end namespace " << Namespace << "\n"; + OS << "} // end namespace llvm\n"; + OS << "#endif //GET_INSTRINFO_OPERAND_TYPE\n\n"; } void InstrInfoEmitter::emitMCIIHelperMethods(raw_ostream &OS, @@ -560,7 +613,7 @@ void InstrInfoEmitter::run(raw_ostream &OS) { emitOperandNameMappings(OS, Target, NumberedInstructions); - emitOperandTypesEnum(OS, Target); + emitOperandTypeMappings(OS, Target, NumberedInstructions); emitMCIIHelperMethods(OS, TargetName); } From 0a8d4df7999eaa876ff7c7510c964f6ec127ffa8 Mon Sep 17 00:00:00 2001 From: Guanzhong Chen Date: Tue, 16 Jul 2019 22:22:08 +0000 Subject: [PATCH 300/451] [WebAssembly] Compile all TLS on Emscripten as local-exec Summary: Currently, on Emscripten, dynamic linking is not supported with threads. This means that if thread-local storage is used, it must be used in a statically-linked executable. Hence, local-exec is the only possible model. This diff compiles all TLS variables to use local-exec on Emscripten as a temporary measure until dynamic linking is supported with threads. The goal for this is to allow C++ types with constructors to be thread-local. Currently, when `clang` compiles a `thread_local` variable with a constructor, it generates `__tls_guard` variable: @__tls_guard = internal thread_local global i8 0, align 1 As no TLS model is specified, this is treated as general-dynamic, which we do not support (and cannot support without implementing dynamic linking support with threads in Emscripten). As a result, any C++ constructor in `thread_local` variables would not compile. By compiling all `thread_local` as local-exec, `__tls_guard` will compile and we can support C++ constructors with TLS without implementing dynamic linking with threads. Depends on D64537 Reviewers: tlively, aheejin, sbc100 Reviewed By: aheejin Subscribers: dschuff, jgravelle-google, hiraditya, sunfish, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64776 llvm-svn: 366275 --- .../WebAssembly/WebAssemblyISelDAGToDAG.cpp | 12 ++- .../WebAssembly/tls-general-dynamic.ll | 86 +++++++++++++++++++ .../WebAssembly/{tls.ll => tls-local-exec.ll} | 0 3 files changed, 96 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/WebAssembly/tls-general-dynamic.ll rename llvm/test/CodeGen/WebAssembly/{tls.ll => tls-local-exec.ll} (100%) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp index 1efbb3b067b85..26339eaef37db 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp @@ -179,9 +179,17 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) { report_fatal_error("cannot use thread-local storage without bulk memory", false); + // Currently Emscripten does not support dynamic linking with threads. + // Therefore, if we have thread-local storage, only the local-exec model + // is possible. + // TODO: remove this and implement proper TLS models once Emscripten + // supports dynamic linking with threads. if (GA->getGlobal()->getThreadLocalMode() != - GlobalValue::LocalExecTLSModel) { - report_fatal_error("only -ftls-model=local-exec is supported for now", + GlobalValue::LocalExecTLSModel && + !Subtarget->getTargetTriple().isOSEmscripten()) { + report_fatal_error("only -ftls-model=local-exec is supported for now on " + "non-Emscripten OSes: variable " + + GA->getGlobal()->getName(), false); } diff --git a/llvm/test/CodeGen/WebAssembly/tls-general-dynamic.ll b/llvm/test/CodeGen/WebAssembly/tls-general-dynamic.ll new file mode 100644 index 0000000000000..3f6d9d325c68c --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/tls-general-dynamic.ll @@ -0,0 +1,86 @@ +; RUN: not llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory 2>&1 | FileCheck %s --check-prefix=ERROR +; RUN: not llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory -fast-isel 2>&1 | FileCheck %s --check-prefix=ERROR +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory --mtriple wasm32-unknown-emscripten | FileCheck %s --check-prefixes=CHECK,TLS +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory --mtriple wasm32-unknown-emscripten -fast-isel | FileCheck %s --check-prefixes=CHECK,TLS +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=-bulk-memory | FileCheck %s --check-prefixes=CHECK,NO-TLS +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +; ERROR: LLVM ERROR: only -ftls-model=local-exec is supported for now on non-Emscripten OSes: variable tls + +; CHECK-LABEL: address_of_tls: +; CHECK-NEXT: .functype address_of_tls () -> (i32) +define i32 @address_of_tls() { + ; TLS-DAG: global.get __tls_base + ; TLS-DAG: i32.const tls + ; TLS-NEXT: i32.add + ; TLS-NEXT: return + + ; NO-TLS-NEXT: i32.const tls + ; NO-TLS-NEXT: return + ret i32 ptrtoint(i32* @tls to i32) +} + +; CHECK-LABEL: ptr_to_tls: +; CHECK-NEXT: .functype ptr_to_tls () -> (i32) +define i32* @ptr_to_tls() { + ; TLS-DAG: global.get __tls_base + ; TLS-DAG: i32.const tls + ; TLS-NEXT: i32.add + ; TLS-NEXT: return + + ; NO-TLS-NEXT: i32.const tls + ; NO-TLS-NEXT: return + ret i32* @tls +} + +; CHECK-LABEL: tls_load: +; CHECK-NEXT: .functype tls_load () -> (i32) +define i32 @tls_load() { + ; TLS-DAG: global.get __tls_base + ; TLS-DAG: i32.const tls + ; TLS-NEXT: i32.add + ; TLS-NEXT: i32.load 0 + ; TLS-NEXT: return + + ; NO-TLS-NEXT: i32.const 0 + ; NO-TLS-NEXT: i32.load tls + ; NO-TLS-NEXT: return + %tmp = load i32, i32* @tls, align 4 + ret i32 %tmp +} + +; CHECK-LABEL: tls_store: +; CHECK-NEXT: .functype tls_store (i32) -> () +define void @tls_store(i32 %x) { + ; TLS-DAG: global.get __tls_base + ; TLS-DAG: i32.const tls + ; TLS-NEXT: i32.add + ; TLS-NEXT: i32.store 0 + ; TLS-NEXT: return + + ; NO-TLS-NEXT: i32.const 0 + ; NO-TLS-NEXT: i32.store tls + ; NO-TLS-NEXT: return + store i32 %x, i32* @tls, align 4 + ret void +} + +; CHECK-LABEL: tls_size: +; CHECK-NEXT: .functype tls_size () -> (i32) +define i32 @tls_size() { +; CHECK-NEXT: global.get __tls_size +; CHECK-NEXT: return + %1 = call i32 @llvm.wasm.tls.size.i32() + ret i32 %1 +} + +; CHECK: .type tls,@object +; TLS-NEXT: .section .tbss.tls,"",@ +; NO-TLS-NEXT: .section .bss.tls,"",@ +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: tls: +; CHECK-NEXT: .int32 0 +@tls = internal thread_local global i32 0 + +declare i32 @llvm.wasm.tls.size.i32() diff --git a/llvm/test/CodeGen/WebAssembly/tls.ll b/llvm/test/CodeGen/WebAssembly/tls-local-exec.ll similarity index 100% rename from llvm/test/CodeGen/WebAssembly/tls.ll rename to llvm/test/CodeGen/WebAssembly/tls-local-exec.ll From fdeed837edf354558cf0c6b0a0dd3af2124906b8 Mon Sep 17 00:00:00 2001 From: George Burgess IV Date: Tue, 16 Jul 2019 22:32:17 +0000 Subject: [PATCH 301/451] Fix a typo in target features There was a slight typo in r364352 that ended up causing our backend to complain on some x86 Android builds. This CL fixes that. Differential Revision: https://reviews.llvm.org/D64781 llvm-svn: 366276 --- clang/lib/Driver/ToolChains/Arch/X86.cpp | 2 +- clang/test/Driver/clang-translation.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Arch/X86.cpp b/clang/lib/Driver/ToolChains/Arch/X86.cpp index 2e75039bf0d65..34be226b69e98 100644 --- a/clang/lib/Driver/ToolChains/Arch/X86.cpp +++ b/clang/lib/Driver/ToolChains/Arch/X86.cpp @@ -135,7 +135,7 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple, if (ArchType == llvm::Triple::x86_64) { Features.push_back("+sse4.2"); Features.push_back("+popcnt"); - Features.push_back("+mcx16"); + Features.push_back("+cx16"); } else Features.push_back("+ssse3"); } diff --git a/clang/test/Driver/clang-translation.c b/clang/test/Driver/clang-translation.c index 0054535115aad..766e779382692 100644 --- a/clang/test/Driver/clang-translation.c +++ b/clang/test/Driver/clang-translation.c @@ -318,7 +318,7 @@ // ANDROID-X86_64: "-target-cpu" "x86-64" // ANDROID-X86_64: "-target-feature" "+sse4.2" // ANDROID-X86_64: "-target-feature" "+popcnt" -// ANDROID-X86_64: "-target-feature" "+mcx16" +// ANDROID-X86_64: "-target-feature" "+cx16" // RUN: %clang -target mips-linux-gnu -### -S %s 2>&1 | \ // RUN: FileCheck -check-prefix=MIPS %s From 418516c7b8658994622273a2a44a9fba3280dcfc Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Tue, 16 Jul 2019 22:39:18 +0000 Subject: [PATCH 302/451] [TableGen] Generate offsets into a flat array for getOperandType Rather than an array of std::initializer_list, generate a table of offsets and a flat array of the operands for getOperandType. This is a bit more efficient on platforms that don't manage to get the array of inintializer_lists initialized at link time (I'm looking at you macOS). It's also quite quite a bit faster to compile. llvm-svn: 366278 --- llvm/test/TableGen/get-operand-type.td | 4 +- llvm/utils/TableGen/InstrInfoEmitter.cpp | 63 ++++++++++++++---------- 2 files changed, 40 insertions(+), 27 deletions(-) diff --git a/llvm/test/TableGen/get-operand-type.td b/llvm/test/TableGen/get-operand-type.td index 5be2c777c8ae9..69bcde38c7ef2 100644 --- a/llvm/test/TableGen/get-operand-type.td +++ b/llvm/test/TableGen/get-operand-type.td @@ -35,6 +35,6 @@ def InstB : Instruction { } // CHECK: #ifdef GET_INSTRINFO_OPERAND_TYPE -// CHECK: { OpTypes::OpA, OpTypes::OpB, OpTypes::i32imm, } -// CHECK-NEXT: { OpTypes::i32imm, -1, } +// CHECK: OpTypes::OpA, OpTypes::OpB, OpTypes::i32imm, +// CHECK-NEXT: OpTypes::i32imm, -1, // CHECK: #endif //GET_INSTRINFO_OPERAND_TYPE diff --git a/llvm/utils/TableGen/InstrInfoEmitter.cpp b/llvm/utils/TableGen/InstrInfoEmitter.cpp index d92585685e145..2d367f538b71f 100644 --- a/llvm/utils/TableGen/InstrInfoEmitter.cpp +++ b/llvm/utils/TableGen/InstrInfoEmitter.cpp @@ -213,7 +213,7 @@ void InstrInfoEmitter::EmitOperandInfo(raw_ostream &OS, } /// Initialize data structures for generating operand name mappings. -/// +/// /// \param Operands [out] A map used to generate the OpName enum with operand /// names as its keys and operand enum values as its values. /// \param OperandMap [out] A map for representing the operand name mappings for @@ -360,41 +360,54 @@ void InstrInfoEmitter::emitOperandTypeMappings( OS << "LLVM_READONLY\n"; OS << "int getOperandType(uint16_t Opcode, uint16_t OpIdx) {\n"; if (!NumberedInstructions.empty()) { - OS << " static const std::initializer_list OpcodeOperandTypes[] = " - "{\n"; + std::vector OperandOffsets; + std::vector OperandRecords; + int CurrentOffset = 0; for (const CodeGenInstruction *Inst : NumberedInstructions) { - OS << " { "; + OperandOffsets.push_back(CurrentOffset); for (const auto &Op : Inst->Operands) { - // Handle aggregate operands and normal operands the same way by - // expanding either case into a list of operands for this op. - std::vector OperandList; - const DagInit *MIOI = Op.MIOperandInfo; if (!MIOI || MIOI->getNumArgs() == 0) { // Single, anonymous, operand. - OperandList.push_back(Op); + OperandRecords.push_back(Op.Rec); + ++CurrentOffset; } else { - for (unsigned j = 0, e = Op.MINumOperands; j != e; ++j) { - OperandList.push_back(Op); - - auto *OpR = cast(MIOI->getArg(j))->getDef(); - OperandList.back().Rec = OpR; + for (Init *Arg : make_range(MIOI->arg_begin(), MIOI->arg_end())) { + OperandRecords.push_back(cast(Arg)->getDef()); + ++CurrentOffset; } } - - for (unsigned j = 0, e = OperandList.size(); j != e; ++j) { - Record *OpR = OperandList[j].Rec; - if (OpR->isSubClassOf("Operand") && !OpR->isAnonymous()) - OS << "OpTypes::" << OpR->getName(); - else - OS << -1; - OS << ", "; - } } - OS << "},\n"; } + + // Emit the table of offsets for the opcode lookup. + OS << " const int Offsets[] = {\n"; + for (int I = 0, E = OperandOffsets.size(); I != E; ++I) + OS << " " << OperandOffsets[I] << ",\n"; OS << " };\n"; - OS << " return OpcodeOperandTypes[Opcode].begin()[OpIdx];\n"; + + // Add an entry for the end so that we don't need to special case it below. + OperandOffsets.push_back(OperandRecords.size()); + // Emit the actual operand types in a flat table. + OS << " const int OpcodeOperandTypes[] = {\n "; + for (int I = 0, E = OperandRecords.size(), CurOffset = 1; I != E; ++I) { + // We print each Opcode's operands in its own row. + if (I == OperandOffsets[CurOffset]) { + OS << "\n "; + // If there are empty rows, mark them with an empty comment. + while (OperandOffsets[++CurOffset] == I) + OS << "/**/\n "; + } + Record *OpR = OperandRecords[I]; + if (OpR->isSubClassOf("Operand") && !OpR->isAnonymous()) + OS << "OpTypes::" << OpR->getName(); + else + OS << -1; + OS << ", "; + } + OS << "\n };\n"; + + OS << " return OpcodeOperandTypes[Offsets[Opcode] + OpIdx];\n"; } else { OS << " llvm_unreachable(\"No instructions defined\");\n"; } From 1c3f4ec7fc1d0c58ec4024a064c055462448e50f Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 16 Jul 2019 22:41:34 +0000 Subject: [PATCH 303/451] GlobalISel: Add overload of handleAssignments with CCState AMDGPU needs to allocate special argument registers separately from the user function argument list, so needs direct control over the CCState. The ArgLocs argument is only really necessary because CCState doesn't allow access to it. llvm-svn: 366279 --- llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h | 6 +++++- llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 13 +++++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h index d8d15bd0713ad..d717121ad78ec 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h @@ -27,6 +27,7 @@ namespace llvm { +class CCState; class DataLayout; class Function; class MachineIRBuilder; @@ -163,7 +164,10 @@ class CallLowering { /// \return True if everything has succeeded, false otherwise. bool handleAssignments(MachineIRBuilder &MIRBuilder, ArrayRef Args, ValueHandler &Handler) const; - + bool handleAssignments(CCState &CCState, + SmallVectorImpl &ArgLocs, + MachineIRBuilder &MIRBuilder, ArrayRef Args, + ValueHandler &Handler) const; public: CallLowering(const TargetLowering *TLI) : TLI(TLI) {} virtual ~CallLowering() = default; diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index 342fb18d9d613..a5d8205a34a8e 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -163,10 +163,19 @@ bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder, ValueHandler &Handler) const { MachineFunction &MF = MIRBuilder.getMF(); const Function &F = MF.getFunction(); - const DataLayout &DL = F.getParent()->getDataLayout(); - SmallVector ArgLocs; CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext()); + return handleAssignments(CCInfo, ArgLocs, MIRBuilder, Args, Handler); +} + +bool CallLowering::handleAssignments(CCState &CCInfo, + SmallVectorImpl &ArgLocs, + MachineIRBuilder &MIRBuilder, + ArrayRef Args, + ValueHandler &Handler) const { + MachineFunction &MF = MIRBuilder.getMF(); + const Function &F = MF.getFunction(); + const DataLayout &DL = F.getParent()->getDataLayout(); unsigned NumArgs = Args.size(); for (unsigned i = 0; i != NumArgs; ++i) { From 1bd9c6547f92e694d8d724efb757bc9e8b1f3607 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 16 Jul 2019 22:41:38 +0000 Subject: [PATCH 304/451] ARM: Fix missing immarg for space intrinsic llvm-svn: 366280 --- llvm/include/llvm/IR/IntrinsicsARM.td | 2 +- llvm/test/Verifier/ARM/intrinsic-immarg.ll | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td index 886f1d7fd1bc6..4792af097d95d 100644 --- a/llvm/include/llvm/IR/IntrinsicsARM.td +++ b/llvm/include/llvm/IR/IntrinsicsARM.td @@ -19,7 +19,7 @@ let TargetPrefix = "arm" in { // All intrinsics start with "llvm.arm.". // A space-consuming intrinsic primarily for testing ARMConstantIslands. The // first argument is the number of bytes this "instruction" takes up, the second // and return value are essentially chains, used to force ordering during ISel. -def int_arm_space : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; +def int_arm_space : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>]>; // 16-bit multiplications def int_arm_smulbb : GCCBuiltin<"__builtin_arm_smulbb">, diff --git a/llvm/test/Verifier/ARM/intrinsic-immarg.ll b/llvm/test/Verifier/ARM/intrinsic-immarg.ll index b578c6d76195c..d069dd682fdb5 100644 --- a/llvm/test/Verifier/ARM/intrinsic-immarg.ll +++ b/llvm/test/Verifier/ARM/intrinsic-immarg.ll @@ -100,3 +100,12 @@ define void @mcrr2(i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4) { call void @llvm.arm.mcrr2(i32 0, i32 1, i32 2, i32 3, i32 %arg4) ret void } + +declare i32 @llvm.arm.space(i32, i32) nounwind +define i32 @space(i32 %arg0, i32 %arg1) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %arg0 + ; CHECK-NEXT: call i32 @llvm.arm.space(i32 %arg0, i32 %arg1) + %space = call i32 @llvm.arm.space(i32 %arg0, i32 %arg1) + ret i32 %space +} From e71679082c0ed1598a924aa974376e8ce21c1cea Mon Sep 17 00:00:00 2001 From: Nathan Lanza Date: Tue, 16 Jul 2019 23:01:59 +0000 Subject: [PATCH 305/451] add a workaround in GetLine to account for ReadFile not reporintg error Summary: ReadFile on Windows is supposed to set ERROR_OPERATION_ABORTED according to the docs on MSDN. However, this has evidently been a known bug since Windows 8. Therefore, we can't detect if a signal interrupted in the fgets. So pressing ctrl-c causes the repl to end and the process to exit. A temporary workaround is just to attempt to fgets twice until this bug is fixed. A possible alternative would be to set a flag in the `sigint_handler` and simply check that flag in the true part of the if statement. However, signal handlers on Windows are asynchronous and this would require sleeping on the repl loop thread while still not necessarily guarnateeing that you caught the sigint. Reviewers: jfb Differential Revision: https://reviews.llvm.org/D64660 llvm-svn: 366281 --- lldb/source/Core/IOHandler.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/lldb/source/Core/IOHandler.cpp b/lldb/source/Core/IOHandler.cpp index 3a7a75e8ae56b..b30308490cca5 100644 --- a/lldb/source/Core/IOHandler.cpp +++ b/lldb/source/Core/IOHandler.cpp @@ -374,7 +374,18 @@ bool IOHandlerEditline::GetLine(std::string &line, bool &interrupted) { bool got_line = false; m_editing = true; while (!done) { +#ifdef _WIN32 + // ReadFile on Windows is supposed to set ERROR_OPERATION_ABORTED + // according to the docs on MSDN. However, this has evidently been a + // known bug since Windows 8. Therefore, we can't detect if a signal + // interrupted in the fgets. So pressing ctrl-c causes the repl to end + // and the process to exit. A temporary workaround is just to attempt to + // fgets twice until this bug is fixed. + if (fgets(buffer, sizeof(buffer), in) == nullptr && + fgets(buffer, sizeof(buffer), in) == nullptr) { +#else if (fgets(buffer, sizeof(buffer), in) == nullptr) { +#endif const int saved_errno = errno; if (feof(in)) done = true; From 2ecca781a15915b82333e90f92ac8c9b7d48560d Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Tue, 16 Jul 2019 23:38:05 +0000 Subject: [PATCH 306/451] Fix darwin-ld.c if dsymutil.exe exists on PATH llvm-svn: 366282 --- clang/test/Driver/darwin-ld.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/test/Driver/darwin-ld.c b/clang/test/Driver/darwin-ld.c index f01eeb4ea28e0..eb357a9819ffc 100644 --- a/clang/test/Driver/darwin-ld.c +++ b/clang/test/Driver/darwin-ld.c @@ -5,9 +5,9 @@ // Make sure we run dsymutil on source input files. // RUN: %clang -target i386-apple-darwin9 -### -g %s -o BAR 2> %t.log -// RUN: grep '".*dsymutil" "-o" "BAR.dSYM" "BAR"' %t.log +// RUN: grep '".*dsymutil\(.exe\)\?" "-o" "BAR.dSYM" "BAR"' %t.log // RUN: %clang -target i386-apple-darwin9 -### -g -filelist FOO %s -o BAR 2> %t.log -// RUN: grep '".*dsymutil" "-o" "BAR.dSYM" "BAR"' %t.log +// RUN: grep '".*dsymutil\(.exe\)\?" "-o" "BAR.dSYM" "BAR"' %t.log // Check linker changes that came with new linkedit format. // RUN: touch %t.o From e5012ab308200792ea5e12e54a36be13380882ea Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Tue, 16 Jul 2019 23:44:21 +0000 Subject: [PATCH 307/451] [AMDGPU] Autogenerate register asm names Differential Revision: https://reviews.llvm.org/D64839 llvm-svn: 366283 --- .../Target/AMDGPU/AMDGPURegAsmNames.inc.cpp | 593 ------------------ llvm/lib/Target/AMDGPU/CMakeLists.txt | 1 - .../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h | 4 +- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 79 +-- llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 183 ++++-- 5 files changed, 139 insertions(+), 721 deletions(-) delete mode 100644 llvm/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp deleted file mode 100644 index eb0cb911b841f..0000000000000 --- a/llvm/lib/Target/AMDGPU/AMDGPURegAsmNames.inc.cpp +++ /dev/null @@ -1,593 +0,0 @@ -//===-- AMDGPURegAsmNames.inc - Register asm names ----------*- C++ -*-----===// - -#ifdef AMDGPU_REG_ASM_NAMES - -static const char *const VGPR32RegNames[] = { - "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", - "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", - "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", - "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35", - "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44", - "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53", - "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62", - "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71", - "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80", - "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89", - "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98", - "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107", - "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116", - "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125", - "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134", - "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143", - "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152", - "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161", - "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170", - "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179", - "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188", - "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197", - "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206", - "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215", - "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224", - "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233", - "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242", - "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251", - "v252", "v253", "v254", "v255" -}; - -static const char *const SGPR32RegNames[] = { - "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", - "s10", "s11", "s12", "s13", "s14", "s15", "s16", "s17", "s18", "s19", - "s20", "s21", "s22", "s23", "s24", "s25", "s26", "s27", "s28", "s29", - "s30", "s31", "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", - "s40", "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49", - "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58", "s59", - "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67", "s68", "s69", - "s70", "s71", "s72", "s73", "s74", "s75", "s76", "s77", "s78", "s79", - "s80", "s81", "s82", "s83", "s84", "s85", "s86", "s87", "s88", "s89", - "s90", "s91", "s92", "s93", "s94", "s95", "s96", "s97", "s98", "s99", - "s100", "s101", "s102", "s103", "s104", "s105" -}; - -static const char *const VGPR64RegNames[] = { - "v[0:1]", "v[1:2]", "v[2:3]", "v[3:4]", "v[4:5]", - "v[5:6]", "v[6:7]", "v[7:8]", "v[8:9]", "v[9:10]", - "v[10:11]", "v[11:12]", "v[12:13]", "v[13:14]", "v[14:15]", - "v[15:16]", "v[16:17]", "v[17:18]", "v[18:19]", "v[19:20]", - "v[20:21]", "v[21:22]", "v[22:23]", "v[23:24]", "v[24:25]", - "v[25:26]", "v[26:27]", "v[27:28]", "v[28:29]", "v[29:30]", - "v[30:31]", "v[31:32]", "v[32:33]", "v[33:34]", "v[34:35]", - "v[35:36]", "v[36:37]", "v[37:38]", "v[38:39]", "v[39:40]", - "v[40:41]", "v[41:42]", "v[42:43]", "v[43:44]", "v[44:45]", - "v[45:46]", "v[46:47]", "v[47:48]", "v[48:49]", "v[49:50]", - "v[50:51]", "v[51:52]", "v[52:53]", "v[53:54]", "v[54:55]", - "v[55:56]", "v[56:57]", "v[57:58]", "v[58:59]", "v[59:60]", - "v[60:61]", "v[61:62]", "v[62:63]", "v[63:64]", "v[64:65]", - "v[65:66]", "v[66:67]", "v[67:68]", "v[68:69]", "v[69:70]", - "v[70:71]", "v[71:72]", "v[72:73]", "v[73:74]", "v[74:75]", - "v[75:76]", "v[76:77]", "v[77:78]", "v[78:79]", "v[79:80]", - "v[80:81]", "v[81:82]", "v[82:83]", "v[83:84]", "v[84:85]", - "v[85:86]", "v[86:87]", "v[87:88]", "v[88:89]", "v[89:90]", - "v[90:91]", "v[91:92]", "v[92:93]", "v[93:94]", "v[94:95]", - "v[95:96]", "v[96:97]", "v[97:98]", "v[98:99]", "v[99:100]", - "v[100:101]", "v[101:102]", "v[102:103]", "v[103:104]", "v[104:105]", - "v[105:106]", "v[106:107]", "v[107:108]", "v[108:109]", "v[109:110]", - "v[110:111]", "v[111:112]", "v[112:113]", "v[113:114]", "v[114:115]", - "v[115:116]", "v[116:117]", "v[117:118]", "v[118:119]", "v[119:120]", - "v[120:121]", "v[121:122]", "v[122:123]", "v[123:124]", "v[124:125]", - "v[125:126]", "v[126:127]", "v[127:128]", "v[128:129]", "v[129:130]", - "v[130:131]", "v[131:132]", "v[132:133]", "v[133:134]", "v[134:135]", - "v[135:136]", "v[136:137]", "v[137:138]", "v[138:139]", "v[139:140]", - "v[140:141]", "v[141:142]", "v[142:143]", "v[143:144]", "v[144:145]", - "v[145:146]", "v[146:147]", "v[147:148]", "v[148:149]", "v[149:150]", - "v[150:151]", "v[151:152]", "v[152:153]", "v[153:154]", "v[154:155]", - "v[155:156]", "v[156:157]", "v[157:158]", "v[158:159]", "v[159:160]", - "v[160:161]", "v[161:162]", "v[162:163]", "v[163:164]", "v[164:165]", - "v[165:166]", "v[166:167]", "v[167:168]", "v[168:169]", "v[169:170]", - "v[170:171]", "v[171:172]", "v[172:173]", "v[173:174]", "v[174:175]", - "v[175:176]", "v[176:177]", "v[177:178]", "v[178:179]", "v[179:180]", - "v[180:181]", "v[181:182]", "v[182:183]", "v[183:184]", "v[184:185]", - "v[185:186]", "v[186:187]", "v[187:188]", "v[188:189]", "v[189:190]", - "v[190:191]", "v[191:192]", "v[192:193]", "v[193:194]", "v[194:195]", - "v[195:196]", "v[196:197]", "v[197:198]", "v[198:199]", "v[199:200]", - "v[200:201]", "v[201:202]", "v[202:203]", "v[203:204]", "v[204:205]", - "v[205:206]", "v[206:207]", "v[207:208]", "v[208:209]", "v[209:210]", - "v[210:211]", "v[211:212]", "v[212:213]", "v[213:214]", "v[214:215]", - "v[215:216]", "v[216:217]", "v[217:218]", "v[218:219]", "v[219:220]", - "v[220:221]", "v[221:222]", "v[222:223]", "v[223:224]", "v[224:225]", - "v[225:226]", "v[226:227]", "v[227:228]", "v[228:229]", "v[229:230]", - "v[230:231]", "v[231:232]", "v[232:233]", "v[233:234]", "v[234:235]", - "v[235:236]", "v[236:237]", "v[237:238]", "v[238:239]", "v[239:240]", - "v[240:241]", "v[241:242]", "v[242:243]", "v[243:244]", "v[244:245]", - "v[245:246]", "v[246:247]", "v[247:248]", "v[248:249]", "v[249:250]", - "v[250:251]", "v[251:252]", "v[252:253]", "v[253:254]", "v[254:255]" -}; - -static const char *const VGPR96RegNames[] = { - "v[0:2]", "v[1:3]", "v[2:4]", "v[3:5]", "v[4:6]", - "v[5:7]", "v[6:8]", "v[7:9]", "v[8:10]", "v[9:11]", - "v[10:12]", "v[11:13]", "v[12:14]", "v[13:15]", "v[14:16]", - "v[15:17]", "v[16:18]", "v[17:19]", "v[18:20]", "v[19:21]", - "v[20:22]", "v[21:23]", "v[22:24]", "v[23:25]", "v[24:26]", - "v[25:27]", "v[26:28]", "v[27:29]", "v[28:30]", "v[29:31]", - "v[30:32]", "v[31:33]", "v[32:34]", "v[33:35]", "v[34:36]", - "v[35:37]", "v[36:38]", "v[37:39]", "v[38:40]", "v[39:41]", - "v[40:42]", "v[41:43]", "v[42:44]", "v[43:45]", "v[44:46]", - "v[45:47]", "v[46:48]", "v[47:49]", "v[48:50]", "v[49:51]", - "v[50:52]", "v[51:53]", "v[52:54]", "v[53:55]", "v[54:56]", - "v[55:57]", "v[56:58]", "v[57:59]", "v[58:60]", "v[59:61]", - "v[60:62]", "v[61:63]", "v[62:64]", "v[63:65]", "v[64:66]", - "v[65:67]", "v[66:68]", "v[67:69]", "v[68:70]", "v[69:71]", - "v[70:72]", "v[71:73]", "v[72:74]", "v[73:75]", "v[74:76]", - "v[75:77]", "v[76:78]", "v[77:79]", "v[78:80]", "v[79:81]", - "v[80:82]", "v[81:83]", "v[82:84]", "v[83:85]", "v[84:86]", - "v[85:87]", "v[86:88]", "v[87:89]", "v[88:90]", "v[89:91]", - "v[90:92]", "v[91:93]", "v[92:94]", "v[93:95]", "v[94:96]", - "v[95:97]", "v[96:98]", "v[97:99]", "v[98:100]", "v[99:101]", - "v[100:102]", "v[101:103]", "v[102:104]", "v[103:105]", "v[104:106]", - "v[105:107]", "v[106:108]", "v[107:109]", "v[108:110]", "v[109:111]", - "v[110:112]", "v[111:113]", "v[112:114]", "v[113:115]", "v[114:116]", - "v[115:117]", "v[116:118]", "v[117:119]", "v[118:120]", "v[119:121]", - "v[120:122]", "v[121:123]", "v[122:124]", "v[123:125]", "v[124:126]", - "v[125:127]", "v[126:128]", "v[127:129]", "v[128:130]", "v[129:131]", - "v[130:132]", "v[131:133]", "v[132:134]", "v[133:135]", "v[134:136]", - "v[135:137]", "v[136:138]", "v[137:139]", "v[138:140]", "v[139:141]", - "v[140:142]", "v[141:143]", "v[142:144]", "v[143:145]", "v[144:146]", - "v[145:147]", "v[146:148]", "v[147:149]", "v[148:150]", "v[149:151]", - "v[150:152]", "v[151:153]", "v[152:154]", "v[153:155]", "v[154:156]", - "v[155:157]", "v[156:158]", "v[157:159]", "v[158:160]", "v[159:161]", - "v[160:162]", "v[161:163]", "v[162:164]", "v[163:165]", "v[164:166]", - "v[165:167]", "v[166:168]", "v[167:169]", "v[168:170]", "v[169:171]", - "v[170:172]", "v[171:173]", "v[172:174]", "v[173:175]", "v[174:176]", - "v[175:177]", "v[176:178]", "v[177:179]", "v[178:180]", "v[179:181]", - "v[180:182]", "v[181:183]", "v[182:184]", "v[183:185]", "v[184:186]", - "v[185:187]", "v[186:188]", "v[187:189]", "v[188:190]", "v[189:191]", - "v[190:192]", "v[191:193]", "v[192:194]", "v[193:195]", "v[194:196]", - "v[195:197]", "v[196:198]", "v[197:199]", "v[198:200]", "v[199:201]", - "v[200:202]", "v[201:203]", "v[202:204]", "v[203:205]", "v[204:206]", - "v[205:207]", "v[206:208]", "v[207:209]", "v[208:210]", "v[209:211]", - "v[210:212]", "v[211:213]", "v[212:214]", "v[213:215]", "v[214:216]", - "v[215:217]", "v[216:218]", "v[217:219]", "v[218:220]", "v[219:221]", - "v[220:222]", "v[221:223]", "v[222:224]", "v[223:225]", "v[224:226]", - "v[225:227]", "v[226:228]", "v[227:229]", "v[228:230]", "v[229:231]", - "v[230:232]", "v[231:233]", "v[232:234]", "v[233:235]", "v[234:236]", - "v[235:237]", "v[236:238]", "v[237:239]", "v[238:240]", "v[239:241]", - "v[240:242]", "v[241:243]", "v[242:244]", "v[243:245]", "v[244:246]", - "v[245:247]", "v[246:248]", "v[247:249]", "v[248:250]", "v[249:251]", - "v[250:252]", "v[251:253]", "v[252:254]", "v[253:255]" -}; - -static const char *const VGPR128RegNames[] = { - "v[0:3]", "v[1:4]", "v[2:5]", "v[3:6]", "v[4:7]", - "v[5:8]", "v[6:9]", "v[7:10]", "v[8:11]", "v[9:12]", - "v[10:13]", "v[11:14]", "v[12:15]", "v[13:16]", "v[14:17]", - "v[15:18]", "v[16:19]", "v[17:20]", "v[18:21]", "v[19:22]", - "v[20:23]", "v[21:24]", "v[22:25]", "v[23:26]", "v[24:27]", - "v[25:28]", "v[26:29]", "v[27:30]", "v[28:31]", "v[29:32]", - "v[30:33]", "v[31:34]", "v[32:35]", "v[33:36]", "v[34:37]", - "v[35:38]", "v[36:39]", "v[37:40]", "v[38:41]", "v[39:42]", - "v[40:43]", "v[41:44]", "v[42:45]", "v[43:46]", "v[44:47]", - "v[45:48]", "v[46:49]", "v[47:50]", "v[48:51]", "v[49:52]", - "v[50:53]", "v[51:54]", "v[52:55]", "v[53:56]", "v[54:57]", - "v[55:58]", "v[56:59]", "v[57:60]", "v[58:61]", "v[59:62]", - "v[60:63]", "v[61:64]", "v[62:65]", "v[63:66]", "v[64:67]", - "v[65:68]", "v[66:69]", "v[67:70]", "v[68:71]", "v[69:72]", - "v[70:73]", "v[71:74]", "v[72:75]", "v[73:76]", "v[74:77]", - "v[75:78]", "v[76:79]", "v[77:80]", "v[78:81]", "v[79:82]", - "v[80:83]", "v[81:84]", "v[82:85]", "v[83:86]", "v[84:87]", - "v[85:88]", "v[86:89]", "v[87:90]", "v[88:91]", "v[89:92]", - "v[90:93]", "v[91:94]", "v[92:95]", "v[93:96]", "v[94:97]", - "v[95:98]", "v[96:99]", "v[97:100]", "v[98:101]", "v[99:102]", - "v[100:103]", "v[101:104]", "v[102:105]", "v[103:106]", "v[104:107]", - "v[105:108]", "v[106:109]", "v[107:110]", "v[108:111]", "v[109:112]", - "v[110:113]", "v[111:114]", "v[112:115]", "v[113:116]", "v[114:117]", - "v[115:118]", "v[116:119]", "v[117:120]", "v[118:121]", "v[119:122]", - "v[120:123]", "v[121:124]", "v[122:125]", "v[123:126]", "v[124:127]", - "v[125:128]", "v[126:129]", "v[127:130]", "v[128:131]", "v[129:132]", - "v[130:133]", "v[131:134]", "v[132:135]", "v[133:136]", "v[134:137]", - "v[135:138]", "v[136:139]", "v[137:140]", "v[138:141]", "v[139:142]", - "v[140:143]", "v[141:144]", "v[142:145]", "v[143:146]", "v[144:147]", - "v[145:148]", "v[146:149]", "v[147:150]", "v[148:151]", "v[149:152]", - "v[150:153]", "v[151:154]", "v[152:155]", "v[153:156]", "v[154:157]", - "v[155:158]", "v[156:159]", "v[157:160]", "v[158:161]", "v[159:162]", - "v[160:163]", "v[161:164]", "v[162:165]", "v[163:166]", "v[164:167]", - "v[165:168]", "v[166:169]", "v[167:170]", "v[168:171]", "v[169:172]", - "v[170:173]", "v[171:174]", "v[172:175]", "v[173:176]", "v[174:177]", - "v[175:178]", "v[176:179]", "v[177:180]", "v[178:181]", "v[179:182]", - "v[180:183]", "v[181:184]", "v[182:185]", "v[183:186]", "v[184:187]", - "v[185:188]", "v[186:189]", "v[187:190]", "v[188:191]", "v[189:192]", - "v[190:193]", "v[191:194]", "v[192:195]", "v[193:196]", "v[194:197]", - "v[195:198]", "v[196:199]", "v[197:200]", "v[198:201]", "v[199:202]", - "v[200:203]", "v[201:204]", "v[202:205]", "v[203:206]", "v[204:207]", - "v[205:208]", "v[206:209]", "v[207:210]", "v[208:211]", "v[209:212]", - "v[210:213]", "v[211:214]", "v[212:215]", "v[213:216]", "v[214:217]", - "v[215:218]", "v[216:219]", "v[217:220]", "v[218:221]", "v[219:222]", - "v[220:223]", "v[221:224]", "v[222:225]", "v[223:226]", "v[224:227]", - "v[225:228]", "v[226:229]", "v[227:230]", "v[228:231]", "v[229:232]", - "v[230:233]", "v[231:234]", "v[232:235]", "v[233:236]", "v[234:237]", - "v[235:238]", "v[236:239]", "v[237:240]", "v[238:241]", "v[239:242]", - "v[240:243]", "v[241:244]", "v[242:245]", "v[243:246]", "v[244:247]", - "v[245:248]", "v[246:249]", "v[247:250]", "v[248:251]", "v[249:252]", - "v[250:253]", "v[251:254]", "v[252:255]" -}; - -static const char *const VGPR256RegNames[] = { - "v[0:7]", "v[1:8]", "v[2:9]", "v[3:10]", "v[4:11]", - "v[5:12]", "v[6:13]", "v[7:14]", "v[8:15]", "v[9:16]", - "v[10:17]", "v[11:18]", "v[12:19]", "v[13:20]", "v[14:21]", - "v[15:22]", "v[16:23]", "v[17:24]", "v[18:25]", "v[19:26]", - "v[20:27]", "v[21:28]", "v[22:29]", "v[23:30]", "v[24:31]", - "v[25:32]", "v[26:33]", "v[27:34]", "v[28:35]", "v[29:36]", - "v[30:37]", "v[31:38]", "v[32:39]", "v[33:40]", "v[34:41]", - "v[35:42]", "v[36:43]", "v[37:44]", "v[38:45]", "v[39:46]", - "v[40:47]", "v[41:48]", "v[42:49]", "v[43:50]", "v[44:51]", - "v[45:52]", "v[46:53]", "v[47:54]", "v[48:55]", "v[49:56]", - "v[50:57]", "v[51:58]", "v[52:59]", "v[53:60]", "v[54:61]", - "v[55:62]", "v[56:63]", "v[57:64]", "v[58:65]", "v[59:66]", - "v[60:67]", "v[61:68]", "v[62:69]", "v[63:70]", "v[64:71]", - "v[65:72]", "v[66:73]", "v[67:74]", "v[68:75]", "v[69:76]", - "v[70:77]", "v[71:78]", "v[72:79]", "v[73:80]", "v[74:81]", - "v[75:82]", "v[76:83]", "v[77:84]", "v[78:85]", "v[79:86]", - "v[80:87]", "v[81:88]", "v[82:89]", "v[83:90]", "v[84:91]", - "v[85:92]", "v[86:93]", "v[87:94]", "v[88:95]", "v[89:96]", - "v[90:97]", "v[91:98]", "v[92:99]", "v[93:100]", "v[94:101]", - "v[95:102]", "v[96:103]", "v[97:104]", "v[98:105]", "v[99:106]", - "v[100:107]", "v[101:108]", "v[102:109]", "v[103:110]", "v[104:111]", - "v[105:112]", "v[106:113]", "v[107:114]", "v[108:115]", "v[109:116]", - "v[110:117]", "v[111:118]", "v[112:119]", "v[113:120]", "v[114:121]", - "v[115:122]", "v[116:123]", "v[117:124]", "v[118:125]", "v[119:126]", - "v[120:127]", "v[121:128]", "v[122:129]", "v[123:130]", "v[124:131]", - "v[125:132]", "v[126:133]", "v[127:134]", "v[128:135]", "v[129:136]", - "v[130:137]", "v[131:138]", "v[132:139]", "v[133:140]", "v[134:141]", - "v[135:142]", "v[136:143]", "v[137:144]", "v[138:145]", "v[139:146]", - "v[140:147]", "v[141:148]", "v[142:149]", "v[143:150]", "v[144:151]", - "v[145:152]", "v[146:153]", "v[147:154]", "v[148:155]", "v[149:156]", - "v[150:157]", "v[151:158]", "v[152:159]", "v[153:160]", "v[154:161]", - "v[155:162]", "v[156:163]", "v[157:164]", "v[158:165]", "v[159:166]", - "v[160:167]", "v[161:168]", "v[162:169]", "v[163:170]", "v[164:171]", - "v[165:172]", "v[166:173]", "v[167:174]", "v[168:175]", "v[169:176]", - "v[170:177]", "v[171:178]", "v[172:179]", "v[173:180]", "v[174:181]", - "v[175:182]", "v[176:183]", "v[177:184]", "v[178:185]", "v[179:186]", - "v[180:187]", "v[181:188]", "v[182:189]", "v[183:190]", "v[184:191]", - "v[185:192]", "v[186:193]", "v[187:194]", "v[188:195]", "v[189:196]", - "v[190:197]", "v[191:198]", "v[192:199]", "v[193:200]", "v[194:201]", - "v[195:202]", "v[196:203]", "v[197:204]", "v[198:205]", "v[199:206]", - "v[200:207]", "v[201:208]", "v[202:209]", "v[203:210]", "v[204:211]", - "v[205:212]", "v[206:213]", "v[207:214]", "v[208:215]", "v[209:216]", - "v[210:217]", "v[211:218]", "v[212:219]", "v[213:220]", "v[214:221]", - "v[215:222]", "v[216:223]", "v[217:224]", "v[218:225]", "v[219:226]", - "v[220:227]", "v[221:228]", "v[222:229]", "v[223:230]", "v[224:231]", - "v[225:232]", "v[226:233]", "v[227:234]", "v[228:235]", "v[229:236]", - "v[230:237]", "v[231:238]", "v[232:239]", "v[233:240]", "v[234:241]", - "v[235:242]", "v[236:243]", "v[237:244]", "v[238:245]", "v[239:246]", - "v[240:247]", "v[241:248]", "v[242:249]", "v[243:250]", "v[244:251]", - "v[245:252]", "v[246:253]", "v[247:254]", "v[248:255]" -}; - -static const char *const VGPR512RegNames[] = { - "v[0:15]", "v[1:16]", "v[2:17]", "v[3:18]", "v[4:19]", - "v[5:20]", "v[6:21]", "v[7:22]", "v[8:23]", "v[9:24]", - "v[10:25]", "v[11:26]", "v[12:27]", "v[13:28]", "v[14:29]", - "v[15:30]", "v[16:31]", "v[17:32]", "v[18:33]", "v[19:34]", - "v[20:35]", "v[21:36]", "v[22:37]", "v[23:38]", "v[24:39]", - "v[25:40]", "v[26:41]", "v[27:42]", "v[28:43]", "v[29:44]", - "v[30:45]", "v[31:46]", "v[32:47]", "v[33:48]", "v[34:49]", - "v[35:50]", "v[36:51]", "v[37:52]", "v[38:53]", "v[39:54]", - "v[40:55]", "v[41:56]", "v[42:57]", "v[43:58]", "v[44:59]", - "v[45:60]", "v[46:61]", "v[47:62]", "v[48:63]", "v[49:64]", - "v[50:65]", "v[51:66]", "v[52:67]", "v[53:68]", "v[54:69]", - "v[55:70]", "v[56:71]", "v[57:72]", "v[58:73]", "v[59:74]", - "v[60:75]", "v[61:76]", "v[62:77]", "v[63:78]", "v[64:79]", - "v[65:80]", "v[66:81]", "v[67:82]", "v[68:83]", "v[69:84]", - "v[70:85]", "v[71:86]", "v[72:87]", "v[73:88]", "v[74:89]", - "v[75:90]", "v[76:91]", "v[77:92]", "v[78:93]", "v[79:94]", - "v[80:95]", "v[81:96]", "v[82:97]", "v[83:98]", "v[84:99]", - "v[85:100]", "v[86:101]", "v[87:102]", "v[88:103]", "v[89:104]", - "v[90:105]", "v[91:106]", "v[92:107]", "v[93:108]", "v[94:109]", - "v[95:110]", "v[96:111]", "v[97:112]", "v[98:113]", "v[99:114]", - "v[100:115]", "v[101:116]", "v[102:117]", "v[103:118]", "v[104:119]", - "v[105:120]", "v[106:121]", "v[107:122]", "v[108:123]", "v[109:124]", - "v[110:125]", "v[111:126]", "v[112:127]", "v[113:128]", "v[114:129]", - "v[115:130]", "v[116:131]", "v[117:132]", "v[118:133]", "v[119:134]", - "v[120:135]", "v[121:136]", "v[122:137]", "v[123:138]", "v[124:139]", - "v[125:140]", "v[126:141]", "v[127:142]", "v[128:143]", "v[129:144]", - "v[130:145]", "v[131:146]", "v[132:147]", "v[133:148]", "v[134:149]", - "v[135:150]", "v[136:151]", "v[137:152]", "v[138:153]", "v[139:154]", - "v[140:155]", "v[141:156]", "v[142:157]", "v[143:158]", "v[144:159]", - "v[145:160]", "v[146:161]", "v[147:162]", "v[148:163]", "v[149:164]", - "v[150:165]", "v[151:166]", "v[152:167]", "v[153:168]", "v[154:169]", - "v[155:170]", "v[156:171]", "v[157:172]", "v[158:173]", "v[159:174]", - "v[160:175]", "v[161:176]", "v[162:177]", "v[163:178]", "v[164:179]", - "v[165:180]", "v[166:181]", "v[167:182]", "v[168:183]", "v[169:184]", - "v[170:185]", "v[171:186]", "v[172:187]", "v[173:188]", "v[174:189]", - "v[175:190]", "v[176:191]", "v[177:192]", "v[178:193]", "v[179:194]", - "v[180:195]", "v[181:196]", "v[182:197]", "v[183:198]", "v[184:199]", - "v[185:200]", "v[186:201]", "v[187:202]", "v[188:203]", "v[189:204]", - "v[190:205]", "v[191:206]", "v[192:207]", "v[193:208]", "v[194:209]", - "v[195:210]", "v[196:211]", "v[197:212]", "v[198:213]", "v[199:214]", - "v[200:215]", "v[201:216]", "v[202:217]", "v[203:218]", "v[204:219]", - "v[205:220]", "v[206:221]", "v[207:222]", "v[208:223]", "v[209:224]", - "v[210:225]", "v[211:226]", "v[212:227]", "v[213:228]", "v[214:229]", - "v[215:230]", "v[216:231]", "v[217:232]", "v[218:233]", "v[219:234]", - "v[220:235]", "v[221:236]", "v[222:237]", "v[223:238]", "v[224:239]", - "v[225:240]", "v[226:241]", "v[227:242]", "v[228:243]", "v[229:244]", - "v[230:245]", "v[231:246]", "v[232:247]", "v[233:248]", "v[234:249]", - "v[235:250]", "v[236:251]", "v[237:252]", "v[238:253]", "v[239:254]", - "v[240:255]" -}; - -static const char *const SGPR64RegNames[] = { - "s[0:1]", "s[2:3]", "s[4:5]", "s[6:7]", "s[8:9]", "s[10:11]", - "s[12:13]", "s[14:15]", "s[16:17]", "s[18:19]", "s[20:21]", "s[22:23]", - "s[24:25]", "s[26:27]", "s[28:29]", "s[30:31]", "s[32:33]", "s[34:35]", - "s[36:37]", "s[38:39]", "s[40:41]", "s[42:43]", "s[44:45]", "s[46:47]", - "s[48:49]", "s[50:51]", "s[52:53]", "s[54:55]", "s[56:57]", "s[58:59]", - "s[60:61]", "s[62:63]", "s[64:65]", "s[66:67]", "s[68:69]", "s[70:71]", - "s[72:73]", "s[74:75]", "s[76:77]", "s[78:79]", "s[80:81]", "s[82:83]", - "s[84:85]", "s[86:87]", "s[88:89]", "s[90:91]", "s[92:93]", "s[94:95]", - "s[96:97]", "s[98:99]", "s[100:101]", "s[102:103]", "s[104:105]" -}; - -static const char *const SGPR128RegNames[] = { - "s[0:3]", "s[4:7]", "s[8:11]", "s[12:15]", "s[16:19]", "s[20:23]", - "s[24:27]", "s[28:31]", "s[32:35]", "s[36:39]", "s[40:43]", "s[44:47]", - "s[48:51]", "s[52:55]", "s[56:59]", "s[60:63]", "s[64:67]", "s[68:71]", - "s[72:75]", "s[76:79]", "s[80:83]", "s[84:87]", "s[88:91]", "s[92:95]", - "s[96:99]", "s[100:103]" -}; - -static const char *const SGPR256RegNames[] = { - "s[0:7]", "s[4:11]", "s[8:15]", "s[12:19]", "s[16:23]", - "s[20:27]", "s[24:31]", "s[28:35]", "s[32:39]", "s[36:43]", - "s[40:47]", "s[44:51]", "s[48:55]", "s[52:59]", "s[56:63]", - "s[60:67]", "s[64:71]", "s[68:75]", "s[72:79]", "s[76:83]", - "s[80:87]", "s[84:91]", "s[88:95]", "s[92:99]", "s[96:103]" -}; - -static const char *const SGPR512RegNames[] = { - "s[0:15]", "s[4:19]", "s[8:23]", "s[12:27]", "s[16:31]", "s[20:35]", - "s[24:39]", "s[28:43]", "s[32:47]", "s[36:51]", "s[40:55]", "s[44:59]", - "s[48:63]", "s[52:67]", "s[56:71]", "s[60:75]", "s[64:79]", "s[68:83]", - "s[72:87]", "s[76:91]", "s[80:95]", "s[84:99]", "s[88:103]" -}; - -static const char *const AGPR32RegNames[] = { - "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", - "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17", - "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26", - "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35", - "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44", - "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53", - "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62", - "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71", - "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80", - "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89", - "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98", - "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107", - "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116", - "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125", - "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134", - "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143", - "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152", - "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161", - "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170", - "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179", - "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188", - "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197", - "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206", - "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215", - "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224", - "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233", - "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242", - "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251", - "a252", "a253", "a254", "a255" -}; - -static const char *const AGPR64RegNames[] = { - "a[0:1]", "a[1:2]", "a[2:3]", "a[3:4]", "a[4:5]", - "a[5:6]", "a[6:7]", "a[7:8]", "a[8:9]", "a[9:10]", - "a[10:11]", "a[11:12]", "a[12:13]", "a[13:14]", "a[14:15]", - "a[15:16]", "a[16:17]", "a[17:18]", "a[18:19]", "a[19:20]", - "a[20:21]", "a[21:22]", "a[22:23]", "a[23:24]", "a[24:25]", - "a[25:26]", "a[26:27]", "a[27:28]", "a[28:29]", "a[29:30]", - "a[30:31]", "a[31:32]", "a[32:33]", "a[33:34]", "a[34:35]", - "a[35:36]", "a[36:37]", "a[37:38]", "a[38:39]", "a[39:40]", - "a[40:41]", "a[41:42]", "a[42:43]", "a[43:44]", "a[44:45]", - "a[45:46]", "a[46:47]", "a[47:48]", "a[48:49]", "a[49:50]", - "a[50:51]", "a[51:52]", "a[52:53]", "a[53:54]", "a[54:55]", - "a[55:56]", "a[56:57]", "a[57:58]", "a[58:59]", "a[59:60]", - "a[60:61]", "a[61:62]", "a[62:63]", "a[63:64]", "a[64:65]", - "a[65:66]", "a[66:67]", "a[67:68]", "a[68:69]", "a[69:70]", - "a[70:71]", "a[71:72]", "a[72:73]", "a[73:74]", "a[74:75]", - "a[75:76]", "a[76:77]", "a[77:78]", "a[78:79]", "a[79:80]", - "a[80:81]", "a[81:82]", "a[82:83]", "a[83:84]", "a[84:85]", - "a[85:86]", "a[86:87]", "a[87:88]", "a[88:89]", "a[89:90]", - "a[90:91]", "a[91:92]", "a[92:93]", "a[93:94]", "a[94:95]", - "a[95:96]", "a[96:97]", "a[97:98]", "a[98:99]", "a[99:100]", - "a[100:101]", "a[101:102]", "a[102:103]", "a[103:104]", "a[104:105]", - "a[105:106]", "a[106:107]", "a[107:108]", "a[108:109]", "a[109:110]", - "a[110:111]", "a[111:112]", "a[112:113]", "a[113:114]", "a[114:115]", - "a[115:116]", "a[116:117]", "a[117:118]", "a[118:119]", "a[119:120]", - "a[120:121]", "a[121:122]", "a[122:123]", "a[123:124]", "a[124:125]", - "a[125:126]", "a[126:127]", "a[127:128]", "a[128:129]", "a[129:130]", - "a[130:131]", "a[131:132]", "a[132:133]", "a[133:134]", "a[134:135]", - "a[135:136]", "a[136:137]", "a[137:138]", "a[138:139]", "a[139:140]", - "a[140:141]", "a[141:142]", "a[142:143]", "a[143:144]", "a[144:145]", - "a[145:146]", "a[146:147]", "a[147:148]", "a[148:149]", "a[149:150]", - "a[150:151]", "a[151:152]", "a[152:153]", "a[153:154]", "a[154:155]", - "a[155:156]", "a[156:157]", "a[157:158]", "a[158:159]", "a[159:160]", - "a[160:161]", "a[161:162]", "a[162:163]", "a[163:164]", "a[164:165]", - "a[165:166]", "a[166:167]", "a[167:168]", "a[168:169]", "a[169:170]", - "a[170:171]", "a[171:172]", "a[172:173]", "a[173:174]", "a[174:175]", - "a[175:176]", "a[176:177]", "a[177:178]", "a[178:179]", "a[179:180]", - "a[180:181]", "a[181:182]", "a[182:183]", "a[183:184]", "a[184:185]", - "a[185:186]", "a[186:187]", "a[187:188]", "a[188:189]", "a[189:190]", - "a[190:191]", "a[191:192]", "a[192:193]", "a[193:194]", "a[194:195]", - "a[195:196]", "a[196:197]", "a[197:198]", "a[198:199]", "a[199:200]", - "a[200:201]", "a[201:202]", "a[202:203]", "a[203:204]", "a[204:205]", - "a[205:206]", "a[206:207]", "a[207:208]", "a[208:209]", "a[209:210]", - "a[210:211]", "a[211:212]", "a[212:213]", "a[213:214]", "a[214:215]", - "a[215:216]", "a[216:217]", "a[217:218]", "a[218:219]", "a[219:220]", - "a[220:221]", "a[221:222]", "a[222:223]", "a[223:224]", "a[224:225]", - "a[225:226]", "a[226:227]", "a[227:228]", "a[228:229]", "a[229:230]", - "a[230:231]", "a[231:232]", "a[232:233]", "a[233:234]", "a[234:235]", - "a[235:236]", "a[236:237]", "a[237:238]", "a[238:239]", "a[239:240]", - "a[240:241]", "a[241:242]", "a[242:243]", "a[243:244]", "a[244:245]", - "a[245:246]", "a[246:247]", "a[247:248]", "a[248:249]", "a[249:250]", - "a[250:251]", "a[251:252]", "a[252:253]", "a[253:254]", "a[254:255]" -}; - -static const char *const AGPR128RegNames[] = { - "a[0:3]", "a[1:4]", "a[2:5]", "a[3:6]", "a[4:7]", - "a[5:8]", "a[6:9]", "a[7:10]", "a[8:11]", "a[9:12]", - "a[10:13]", "a[11:14]", "a[12:15]", "a[13:16]", "a[14:17]", - "a[15:18]", "a[16:19]", "a[17:20]", "a[18:21]", "a[19:22]", - "a[20:23]", "a[21:24]", "a[22:25]", "a[23:26]", "a[24:27]", - "a[25:28]", "a[26:29]", "a[27:30]", "a[28:31]", "a[29:32]", - "a[30:33]", "a[31:34]", "a[32:35]", "a[33:36]", "a[34:37]", - "a[35:38]", "a[36:39]", "a[37:40]", "a[38:41]", "a[39:42]", - "a[40:43]", "a[41:44]", "a[42:45]", "a[43:46]", "a[44:47]", - "a[45:48]", "a[46:49]", "a[47:50]", "a[48:51]", "a[49:52]", - "a[50:53]", "a[51:54]", "a[52:55]", "a[53:56]", "a[54:57]", - "a[55:58]", "a[56:59]", "a[57:60]", "a[58:61]", "a[59:62]", - "a[60:63]", "a[61:64]", "a[62:65]", "a[63:66]", "a[64:67]", - "a[65:68]", "a[66:69]", "a[67:70]", "a[68:71]", "a[69:72]", - "a[70:73]", "a[71:74]", "a[72:75]", "a[73:76]", "a[74:77]", - "a[75:78]", "a[76:79]", "a[77:80]", "a[78:81]", "a[79:82]", - "a[80:83]", "a[81:84]", "a[82:85]", "a[83:86]", "a[84:87]", - "a[85:88]", "a[86:89]", "a[87:90]", "a[88:91]", "a[89:92]", - "a[90:93]", "a[91:94]", "a[92:95]", "a[93:96]", "a[94:97]", - "a[95:98]", "a[96:99]", "a[97:100]", "a[98:101]", "a[99:102]", - "a[100:103]", "a[101:104]", "a[102:105]", "a[103:106]", "a[104:107]", - "a[105:108]", "a[106:109]", "a[107:110]", "a[108:111]", "a[109:112]", - "a[110:113]", "a[111:114]", "a[112:115]", "a[113:116]", "a[114:117]", - "a[115:118]", "a[116:119]", "a[117:120]", "a[118:121]", "a[119:122]", - "a[120:123]", "a[121:124]", "a[122:125]", "a[123:126]", "a[124:127]", - "a[125:128]", "a[126:129]", "a[127:130]", "a[128:131]", "a[129:132]", - "a[130:133]", "a[131:134]", "a[132:135]", "a[133:136]", "a[134:137]", - "a[135:138]", "a[136:139]", "a[137:140]", "a[138:141]", "a[139:142]", - "a[140:143]", "a[141:144]", "a[142:145]", "a[143:146]", "a[144:147]", - "a[145:148]", "a[146:149]", "a[147:150]", "a[148:151]", "a[149:152]", - "a[150:153]", "a[151:154]", "a[152:155]", "a[153:156]", "a[154:157]", - "a[155:158]", "a[156:159]", "a[157:160]", "a[158:161]", "a[159:162]", - "a[160:163]", "a[161:164]", "a[162:165]", "a[163:166]", "a[164:167]", - "a[165:168]", "a[166:169]", "a[167:170]", "a[168:171]", "a[169:172]", - "a[170:173]", "a[171:174]", "a[172:175]", "a[173:176]", "a[174:177]", - "a[175:178]", "a[176:179]", "a[177:180]", "a[178:181]", "a[179:182]", - "a[180:183]", "a[181:184]", "a[182:185]", "a[183:186]", "a[184:187]", - "a[185:188]", "a[186:189]", "a[187:190]", "a[188:191]", "a[189:192]", - "a[190:193]", "a[191:194]", "a[192:195]", "a[193:196]", "a[194:197]", - "a[195:198]", "a[196:199]", "a[197:200]", "a[198:201]", "a[199:202]", - "a[200:203]", "a[201:204]", "a[202:205]", "a[203:206]", "a[204:207]", - "a[205:208]", "a[206:209]", "a[207:210]", "a[208:211]", "a[209:212]", - "a[210:213]", "a[211:214]", "a[212:215]", "a[213:216]", "a[214:217]", - "a[215:218]", "a[216:219]", "a[217:220]", "a[218:221]", "a[219:222]", - "a[220:223]", "a[221:224]", "a[222:225]", "a[223:226]", "a[224:227]", - "a[225:228]", "a[226:229]", "a[227:230]", "a[228:231]", "a[229:232]", - "a[230:233]", "a[231:234]", "a[232:235]", "a[233:236]", "a[234:237]", - "a[235:238]", "a[236:239]", "a[237:240]", "a[238:241]", "a[239:242]", - "a[240:243]", "a[241:244]", "a[242:245]", "a[243:246]", "a[244:247]", - "a[245:248]", "a[246:249]", "a[247:250]", "a[248:251]", "a[249:252]", - "a[250:253]", "a[251:254]", "a[252:255]" -}; - -static const char *const AGPR512RegNames[] = { - "a[0:15]", "a[1:16]", "a[2:17]", "a[3:18]", "a[4:19]", - "a[5:20]", "a[6:21]", "a[7:22]", "a[8:23]", "a[9:24]", - "a[10:25]", "a[11:26]", "a[12:27]", "a[13:28]", "a[14:29]", - "a[15:30]", "a[16:31]", "a[17:32]", "a[18:33]", "a[19:34]", - "a[20:35]", "a[21:36]", "a[22:37]", "a[23:38]", "a[24:39]", - "a[25:40]", "a[26:41]", "a[27:42]", "a[28:43]", "a[29:44]", - "a[30:45]", "a[31:46]", "a[32:47]", "a[33:48]", "a[34:49]", - "a[35:50]", "a[36:51]", "a[37:52]", "a[38:53]", "a[39:54]", - "a[40:55]", "a[41:56]", "a[42:57]", "a[43:58]", "a[44:59]", - "a[45:60]", "a[46:61]", "a[47:62]", "a[48:63]", "a[49:64]", - "a[50:65]", "a[51:66]", "a[52:67]", "a[53:68]", "a[54:69]", - "a[55:70]", "a[56:71]", "a[57:72]", "a[58:73]", "a[59:74]", - "a[60:75]", "a[61:76]", "a[62:77]", "a[63:78]", "a[64:79]", - "a[65:80]", "a[66:81]", "a[67:82]", "a[68:83]", "a[69:84]", - "a[70:85]", "a[71:86]", "a[72:87]", "a[73:88]", "a[74:89]", - "a[75:90]", "a[76:91]", "a[77:92]", "a[78:93]", "a[79:94]", - "a[80:95]", "a[81:96]", "a[82:97]", "a[83:98]", "a[84:99]", - "a[85:100]", "a[86:101]", "a[87:102]", "a[88:103]", "a[89:104]", - "a[90:105]", "a[91:106]", "a[92:107]", "a[93:108]", "a[94:109]", - "a[95:110]", "a[96:111]", "a[97:112]", "a[98:113]", "a[99:114]", - "a[100:115]", "a[101:116]", "a[102:117]", "a[103:118]", "a[104:119]", - "a[105:120]", "a[106:121]", "a[107:122]", "a[108:123]", "a[109:124]", - "a[110:125]", "a[111:126]", "a[112:127]", "a[113:128]", "a[114:129]", - "a[115:130]", "a[116:131]", "a[117:132]", "a[118:133]", "a[119:134]", - "a[120:135]", "a[121:136]", "a[122:137]", "a[123:138]", "a[124:139]", - "a[125:140]", "a[126:141]", "a[127:142]", "a[128:143]", "a[129:144]", - "a[130:145]", "a[131:146]", "a[132:147]", "a[133:148]", "a[134:149]", - "a[135:150]", "a[136:151]", "a[137:152]", "a[138:153]", "a[139:154]", - "a[140:155]", "a[141:156]", "a[142:157]", "a[143:158]", "a[144:159]", - "a[145:160]", "a[146:161]", "a[147:162]", "a[148:163]", "a[149:164]", - "a[150:165]", "a[151:166]", "a[152:167]", "a[153:168]", "a[154:169]", - "a[155:170]", "a[156:171]", "a[157:172]", "a[158:173]", "a[159:174]", - "a[160:175]", "a[161:176]", "a[162:177]", "a[163:178]", "a[164:179]", - "a[165:180]", "a[166:181]", "a[167:182]", "a[168:183]", "a[169:184]", - "a[170:185]", "a[171:186]", "a[172:187]", "a[173:188]", "a[174:189]", - "a[175:190]", "a[176:191]", "a[177:192]", "a[178:193]", "a[179:194]", - "a[180:195]", "a[181:196]", "a[182:197]", "a[183:198]", "a[184:199]", - "a[185:200]", "a[186:201]", "a[187:202]", "a[188:203]", "a[189:204]", - "a[190:205]", "a[191:206]", "a[192:207]", "a[193:208]", "a[194:209]", - "a[195:210]", "a[196:211]", "a[197:212]", "a[198:213]", "a[199:214]", - "a[200:215]", "a[201:216]", "a[202:217]", "a[203:218]", "a[204:219]", - "a[205:220]", "a[206:221]", "a[207:222]", "a[208:223]", "a[209:224]", - "a[210:225]", "a[211:226]", "a[212:227]", "a[213:228]", "a[214:229]", - "a[215:230]", "a[216:231]", "a[217:232]", "a[218:233]", "a[219:234]", - "a[220:235]", "a[221:236]", "a[222:237]", "a[223:238]", "a[224:239]", - "a[225:240]", "a[226:241]", "a[227:242]", "a[228:243]", "a[229:244]", - "a[230:245]", "a[231:246]", "a[232:247]", "a[233:248]", "a[234:249]", - "a[235:250]", "a[236:251]", "a[237:252]", "a[238:253]", "a[239:254]", - "a[240:255]" -}; - -static const char *const AGPR1024RegNames[] = { - "a[0:31]", "a[1:32]", "a[2:33]", "a[3:34]", "a[4:35]", - "a[5:36]", "a[6:37]", "a[7:38]", "a[8:39]", "a[9:40]", - "a[10:41]", "a[11:42]", "a[12:43]", "a[13:44]", "a[14:45]", - "a[15:46]", "a[16:47]", "a[17:48]", "a[18:49]", "a[19:50]", - "a[20:51]", "a[21:52]", "a[22:53]", "a[23:54]", "a[24:55]", - "a[25:56]", "a[26:57]", "a[27:58]", "a[28:59]", "a[29:60]", - "a[30:61]", "a[31:62]", "a[32:63]", "a[33:64]", "a[34:65]", - "a[35:66]", "a[36:67]", "a[37:68]", "a[38:69]", "a[39:70]", - "a[40:71]", "a[41:72]", "a[42:73]", "a[43:74]", "a[44:75]", - "a[45:76]", "a[46:77]", "a[47:78]", "a[48:79]", "a[49:80]", - "a[50:81]", "a[51:82]", "a[52:83]", "a[53:84]", "a[54:85]", - "a[55:86]", "a[56:87]", "a[57:88]", "a[58:89]", "a[59:90]", - "a[60:91]", "a[61:92]", "a[62:93]", "a[63:94]", "a[64:95]", - "a[65:96]", "a[66:97]", "a[67:98]", "a[68:99]", "a[69:100]", - "a[70:101]", "a[71:102]", "a[72:103]", "a[73:104]", "a[74:105]", - "a[75:106]", "a[76:107]", "a[77:108]", "a[78:109]", "a[79:110]", - "a[80:111]", "a[81:112]", "a[82:113]", "a[83:114]", "a[84:115]", - "a[85:116]", "a[86:117]", "a[87:118]", "a[88:119]", "a[89:120]", - "a[90:121]", "a[91:122]", "a[92:123]", "a[93:124]", "a[94:125]", - "a[95:126]", "a[96:127]", "a[97:128]", "a[98:129]", "a[99:130]", - "a[100:131]", "a[101:132]", "a[102:133]", "a[103:134]", "a[104:135]", - "a[105:136]", "a[106:137]", "a[107:138]", "a[108:139]", "a[109:140]", - "a[110:141]", "a[111:142]", "a[112:143]", "a[113:144]", "a[114:145]", - "a[115:146]", "a[116:147]", "a[117:148]", "a[118:149]", "a[119:150]", - "a[120:151]", "a[121:152]", "a[122:153]", "a[123:154]", "a[124:155]", - "a[125:156]", "a[126:157]", "a[127:158]", "a[128:159]", "a[129:160]", - "a[130:161]", "a[131:162]", "a[132:163]", "a[133:164]", "a[134:165]", - "a[135:166]", "a[136:167]", "a[137:168]", "a[138:169]", "a[139:170]", - "a[140:171]", "a[141:172]", "a[142:173]", "a[143:174]", "a[144:175]", - "a[145:176]", "a[146:177]", "a[147:178]", "a[148:179]", "a[149:180]", - "a[150:181]", "a[151:182]", "a[152:183]", "a[153:184]", "a[154:185]", - "a[155:186]", "a[156:187]", "a[157:188]", "a[158:189]", "a[159:190]", - "a[160:191]", "a[161:192]", "a[162:193]", "a[163:194]", "a[164:195]", - "a[165:196]", "a[166:197]", "a[167:198]", "a[168:199]", "a[169:200]", - "a[170:201]", "a[171:202]", "a[172:203]", "a[173:204]", "a[174:205]", - "a[175:206]", "a[176:207]", "a[177:208]", "a[178:209]", "a[179:210]", - "a[180:211]", "a[181:212]", "a[182:213]", "a[183:214]", "a[184:215]", - "a[185:216]", "a[186:217]", "a[187:218]", "a[188:219]", "a[189:220]", - "a[190:221]", "a[191:222]", "a[192:223]", "a[193:224]", "a[194:225]", - "a[195:226]", "a[196:227]", "a[197:228]", "a[198:229]", "a[199:230]", - "a[200:231]", "a[201:232]", "a[202:233]", "a[203:234]", "a[204:235]", - "a[205:236]", "a[206:237]", "a[207:238]", "a[208:239]", "a[209:240]", - "a[210:241]", "a[211:242]", "a[212:243]", "a[213:244]", "a[214:245]", - "a[215:246]", "a[216:247]", "a[217:248]", "a[218:249]", "a[219:250]", - "a[220:251]", "a[221:252]", "a[222:253]", "a[223:254]", "a[224:255]" -}; - -#endif diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt index 5dbb63dea467f..ab82ae4a6653d 100644 --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -59,7 +59,6 @@ add_llvm_target(AMDGPUCodeGen AMDGPUOpenCLEnqueuedBlockLowering.cpp AMDGPUPromoteAlloca.cpp AMDGPUPropagateAttributes.cpp - AMDGPURegAsmNames.inc.cpp AMDGPURegisterBankInfo.cpp AMDGPURegisterInfo.cpp AMDGPURewriteOutArguments.cpp diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h index 0f62f039763ef..b544d1ef36053 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h @@ -12,6 +12,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUINSTPRINTER_H #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUINSTPRINTER_H +#include "AMDGPUMCTargetDesc.h" #include "llvm/MC/MCInstPrinter.h" namespace llvm { @@ -25,7 +26,8 @@ class AMDGPUInstPrinter : public MCInstPrinter { //Autogenerated by tblgen void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI, raw_ostream &O); - static const char *getRegisterName(unsigned RegNo); + static const char *getRegisterName(unsigned RegNo, + unsigned AltIdx = AMDGPU::NoRegAltName); void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, const MCSubtargetInfo &STI) override; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 7c2839ccb4c09..483793fe4dcbb 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -16,6 +16,7 @@ #include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" +#include "MCTargetDesc/AMDGPUInstPrinter.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineDominators.h" @@ -1346,65 +1347,6 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, } StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const { - #define AMDGPU_REG_ASM_NAMES - #include "AMDGPURegAsmNames.inc.cpp" - - #define REG_RANGE(BeginReg, EndReg, RegTable) \ - if (Reg >= BeginReg && Reg <= EndReg) { \ - unsigned Index = Reg - BeginReg; \ - assert(Index < array_lengthof(RegTable)); \ - return RegTable[Index]; \ - } - - REG_RANGE(AMDGPU::VGPR0, AMDGPU::VGPR255, VGPR32RegNames); - REG_RANGE(AMDGPU::SGPR0, AMDGPU::SGPR105, SGPR32RegNames); - REG_RANGE(AMDGPU::AGPR0, AMDGPU::AGPR255, AGPR32RegNames); - REG_RANGE(AMDGPU::VGPR0_VGPR1, AMDGPU::VGPR254_VGPR255, VGPR64RegNames); - REG_RANGE(AMDGPU::SGPR0_SGPR1, AMDGPU::SGPR104_SGPR105, SGPR64RegNames); - REG_RANGE(AMDGPU::AGPR0_AGPR1, AMDGPU::AGPR254_AGPR255, AGPR64RegNames); - REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2, AMDGPU::VGPR253_VGPR254_VGPR255, - VGPR96RegNames); - - REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3, - AMDGPU::VGPR252_VGPR253_VGPR254_VGPR255, - VGPR128RegNames); - REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, - AMDGPU::SGPR100_SGPR101_SGPR102_SGPR103, - SGPR128RegNames); - REG_RANGE(AMDGPU::AGPR0_AGPR1_AGPR2_AGPR3, - AMDGPU::AGPR252_AGPR253_AGPR254_AGPR255, - AGPR128RegNames); - - REG_RANGE(AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7, - AMDGPU::VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255, - VGPR256RegNames); - - REG_RANGE( - AMDGPU::VGPR0_VGPR1_VGPR2_VGPR3_VGPR4_VGPR5_VGPR6_VGPR7_VGPR8_VGPR9_VGPR10_VGPR11_VGPR12_VGPR13_VGPR14_VGPR15, - AMDGPU::VGPR240_VGPR241_VGPR242_VGPR243_VGPR244_VGPR245_VGPR246_VGPR247_VGPR248_VGPR249_VGPR250_VGPR251_VGPR252_VGPR253_VGPR254_VGPR255, - VGPR512RegNames); - REG_RANGE( - AMDGPU::AGPR0_AGPR1_AGPR2_AGPR3_AGPR4_AGPR5_AGPR6_AGPR7_AGPR8_AGPR9_AGPR10_AGPR11_AGPR12_AGPR13_AGPR14_AGPR15, - AMDGPU::AGPR240_AGPR241_AGPR242_AGPR243_AGPR244_AGPR245_AGPR246_AGPR247_AGPR248_AGPR249_AGPR250_AGPR251_AGPR252_AGPR253_AGPR254_AGPR255, - AGPR512RegNames); - - REG_RANGE(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7, - AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103, - SGPR256RegNames); - - REG_RANGE( - AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7_SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15, - AMDGPU::SGPR88_SGPR89_SGPR90_SGPR91_SGPR92_SGPR93_SGPR94_SGPR95_SGPR96_SGPR97_SGPR98_SGPR99_SGPR100_SGPR101_SGPR102_SGPR103, - SGPR512RegNames - ); - - REG_RANGE( - AMDGPU::AGPR0_AGPR1_AGPR2_AGPR3_AGPR4_AGPR5_AGPR6_AGPR7_AGPR8_AGPR9_AGPR10_AGPR11_AGPR12_AGPR13_AGPR14_AGPR15_AGPR16_AGPR17_AGPR18_AGPR19_AGPR20_AGPR21_AGPR22_AGPR23_AGPR24_AGPR25_AGPR26_AGPR27_AGPR28_AGPR29_AGPR30_AGPR31, - AMDGPU::AGPR224_AGPR225_AGPR226_AGPR227_AGPR228_AGPR229_AGPR230_AGPR231_AGPR232_AGPR233_AGPR234_AGPR235_AGPR236_AGPR237_AGPR238_AGPR239_AGPR240_AGPR241_AGPR242_AGPR243_AGPR244_AGPR245_AGPR246_AGPR247_AGPR248_AGPR249_AGPR250_AGPR251_AGPR252_AGPR253_AGPR254_AGPR255, - AGPR1024RegNames); - -#undef REG_RANGE - // FIXME: Rename flat_scr so we don't need to special case this. switch (Reg) { case AMDGPU::FLAT_SCR: @@ -1414,9 +1356,24 @@ StringRef SIRegisterInfo::getRegAsmName(unsigned Reg) const { case AMDGPU::FLAT_SCR_HI: return "flat_scratch_hi"; default: - // For the special named registers the default is fine. - return TargetRegisterInfo::getRegAsmName(Reg); + break; + } + + const TargetRegisterClass *RC = getMinimalPhysRegClass(Reg); + unsigned Size = getRegSizeInBits(*RC); + unsigned AltName = AMDGPU::NoRegAltName; + + switch (Size) { + case 32: AltName = AMDGPU::Reg32; break; + case 64: AltName = AMDGPU::Reg64; break; + case 96: AltName = AMDGPU::Reg96; break; + case 128: AltName = AMDGPU::Reg128; break; + case 160: AltName = AMDGPU::Reg160; break; + case 256: AltName = AMDGPU::Reg256; break; + case 512: AltName = AMDGPU::Reg512; break; + case 1024: AltName = AMDGPU::Reg1024; break; } + return AMDGPUInstPrinter::getRegisterName(Reg, AltName); } // FIXME: This is very slow. It might be worth creating a map from physreg to diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 4767f3c30ed32..353347073b877 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -37,31 +37,63 @@ class getSubRegs { !if(!eq(size, 16), ret16, ret32)))))); } +let Namespace = "AMDGPU" in { +defset list AllRegAltNameIndices = { + def Reg32 : RegAltNameIndex; + def Reg64 : RegAltNameIndex; + def Reg96 : RegAltNameIndex; + def Reg128 : RegAltNameIndex; + def Reg160 : RegAltNameIndex; + def Reg256 : RegAltNameIndex; + def Reg512 : RegAltNameIndex; + def Reg1024 : RegAltNameIndex; +} +} + //===----------------------------------------------------------------------===// // Declarations that describe the SI registers //===----------------------------------------------------------------------===// -class SIReg regIdx = 0> : Register, +class SIReg regIdx = 0, string prefix = "", + int regNo = !cast(regIdx)> : + Register, DwarfRegNum<[!cast(HWEncoding)]> { let Namespace = "AMDGPU"; + let RegAltNameIndices = AllRegAltNameIndices; // This is the not yet the complete register encoding. An additional // bit is set for VGPRs. let HWEncoding = regIdx; } +class SIRegisterWithSubRegs subregs> : + RegisterWithSubRegs { + let RegAltNameIndices = AllRegAltNameIndices; + let AltNames = [ n, n, n, n, n, n, n, n ]; +} + // Special Registers def VCC_LO : SIReg<"vcc_lo", 106>; def VCC_HI : SIReg<"vcc_hi", 107>; // Pseudo-registers: Used as placeholders during isel and immediately // replaced, never seeing the verifier. -def PRIVATE_RSRC_REG : SIReg<"", 0>; -def FP_REG : SIReg<"", 0>; -def SP_REG : SIReg<"", 0>; -def SCRATCH_WAVE_OFFSET_REG : SIReg<"", 0>; +def PRIVATE_RSRC_REG : SIReg<"private_rsrc", 0>; +def FP_REG : SIReg<"fp", 0>; +def SP_REG : SIReg<"sp", 0>; +def SCRATCH_WAVE_OFFSET_REG : SIReg<"scratch_wave_offset", 0>; // VCC for 64-bit instructions -def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>, +def VCC : SIRegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>, DwarfRegAlias { let Namespace = "AMDGPU"; let SubRegIndices = [sub0, sub1]; @@ -71,7 +103,7 @@ def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>, def EXEC_LO : SIReg<"exec_lo", 126>; def EXEC_HI : SIReg<"exec_hi", 127>; -def EXEC : RegisterWithSubRegs<"EXEC", [EXEC_LO, EXEC_HI]>, +def EXEC : SIRegisterWithSubRegs<"EXEC", [EXEC_LO, EXEC_HI]>, DwarfRegAlias { let Namespace = "AMDGPU"; let SubRegIndices = [sub0, sub1]; @@ -86,7 +118,7 @@ def SRC_SCC : SIReg<"src_scc", 253>; // 1-bit pseudo register, for codegen only. // Should never be emitted. -def SCC : SIReg<"">; +def SCC : SIReg<"scc">; def M0 : SIReg <"m0", 124>; def SGPR_NULL : SIReg<"null", 125>; @@ -102,7 +134,7 @@ def LDS_DIRECT : SIReg <"lds_direct", 254>; def XNACK_MASK_LO : SIReg<"xnack_mask_lo", 104>; def XNACK_MASK_HI : SIReg<"xnack_mask_hi", 105>; -def XNACK_MASK : RegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_HI]>, +def XNACK_MASK : SIRegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_HI]>, DwarfRegAlias { let Namespace = "AMDGPU"; let SubRegIndices = [sub0, sub1]; @@ -113,7 +145,7 @@ def XNACK_MASK : RegisterWithSubRegs<"xnack_mask", [XNACK_MASK_LO, XNACK_MASK_HI def TBA_LO : SIReg<"tba_lo", 108>; def TBA_HI : SIReg<"tba_hi", 109>; -def TBA : RegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]>, +def TBA : SIRegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]>, DwarfRegAlias { let Namespace = "AMDGPU"; let SubRegIndices = [sub0, sub1]; @@ -123,7 +155,7 @@ def TBA : RegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]>, def TMA_LO : SIReg<"tma_lo", 110>; def TMA_HI : SIReg<"tma_hi", 111>; -def TMA : RegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]>, +def TMA : SIRegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]>, DwarfRegAlias { let Namespace = "AMDGPU"; let SubRegIndices = [sub0, sub1]; @@ -133,7 +165,7 @@ def TMA : RegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]>, foreach Index = 0-15 in { def TTMP#Index#_vi : SIReg<"ttmp"#Index, !add(112, Index)>; def TTMP#Index#_gfx9_gfx10 : SIReg<"ttmp"#Index, !add(108, Index)>; - def TTMP#Index : SIReg<"", 0>; + def TTMP#Index : SIReg<"ttmp"#Index, 0>; } multiclass FLAT_SCR_LOHI_m ci_e, bits<16> vi_e> { @@ -143,7 +175,7 @@ multiclass FLAT_SCR_LOHI_m ci_e, bits<16> vi_e> { } class FlatReg encoding> : - RegisterWithSubRegs<"flat_scratch", [lo, hi]>, + SIRegisterWithSubRegs<"flat_scratch", [lo, hi]>, DwarfRegAlias { let Namespace = "AMDGPU"; let SubRegIndices = [sub0, sub1]; @@ -159,19 +191,19 @@ def FLAT_SCR : FlatReg; // SGPR registers foreach Index = 0-105 in { - def SGPR#Index : SIReg <"SGPR"#Index, Index>; + def SGPR#Index : SIReg <"SGPR"#Index, Index, "S">; } // VGPR registers foreach Index = 0-255 in { - def VGPR#Index : SIReg <"VGPR"#Index, Index> { + def VGPR#Index : SIReg <"VGPR"#Index, Index, "V"> { let HWEncoding{8} = 1; } } // AccVGPR registers foreach Index = 0-255 in { - def AGPR#Index : SIReg <"AGPR"#Index, Index> { + def AGPR#Index : SIReg <"AGPR"#Index, Index, "A"> { let HWEncoding{8} = 1; } } @@ -194,7 +226,7 @@ def M0_CLASS : RegisterClass<"AMDGPU", [i32], 32, (add M0)> { // SGPR 32-bit registers def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, - (add (sequence "SGPR%u", 0, 105))> { + (add (sequence "SGPR%u", 0, 105)), Reg32> { // Give all SGPR classes higher priority than VGPR classes, because // we want to spill SGPRs to VGPRs. let AllocationPriority = 9; @@ -342,7 +374,7 @@ class TmpRegTuplesBase indices = getSubRegs.ret, int index1 = !add(index, !add(size, -1)), string name = "ttmp["#index#":"#index1#"]"> : - RegisterWithSubRegs { + SIRegisterWithSubRegs { let HWEncoding = subRegs[0].HWEncoding; let SubRegIndices = indices; } @@ -419,7 +451,7 @@ def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TT // VGPR 32-bit registers // i16/f16 only on VI+ def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, - (add (sequence "VGPR%u", 0, 255))> { + (add (sequence "VGPR%u", 0, 255)), Reg32> { let AllocationPriority = 1; let Size = 32; } @@ -517,7 +549,7 @@ def VGPR_1024 : RegisterTuples.ret, // AccVGPR 32-bit registers def AGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, - (add (sequence "AGPR%u", 0, 255))> { + (add (sequence "AGPR%u", 0, 255)), Reg32> { let AllocationPriority = 1; let Size = 32; } @@ -593,19 +625,19 @@ def AGPR_1024 : RegisterTuples.ret, //===----------------------------------------------------------------------===// def Pseudo_SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, - (add FP_REG, SP_REG, SCRATCH_WAVE_OFFSET_REG)> { + (add FP_REG, SP_REG, SCRATCH_WAVE_OFFSET_REG), Reg32> { let isAllocatable = 0; let CopyCost = -1; } def Pseudo_SReg_128 : RegisterClass<"AMDGPU", [v4i32, v2i64, v2f64], 32, - (add PRIVATE_RSRC_REG)> { + (add PRIVATE_RSRC_REG), Reg128> { let isAllocatable = 0; let CopyCost = -1; } def LDS_DIRECT_CLASS : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, - (add LDS_DIRECT)> { + (add LDS_DIRECT), Reg32> { let isAllocatable = 0; let CopyCost = -1; } @@ -616,54 +648,58 @@ def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f1 (add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, XNACK_MASK_LO, XNACK_MASK_HI, SGPR_NULL, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT, SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, SRC_POPS_EXITING_WAVE_ID, - SRC_VCCZ, SRC_EXECZ, SRC_SCC)> { + SRC_VCCZ, SRC_EXECZ, SRC_SCC), Reg32> { let AllocationPriority = 10; } def SReg_32_XEXEC_HI : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, - (add SReg_32_XM0_XEXEC, EXEC_LO, M0_CLASS)> { + (add SReg_32_XM0_XEXEC, EXEC_LO, M0_CLASS), Reg32> { let AllocationPriority = 10; } def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, - (add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI)> { + (add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI), Reg32> { let AllocationPriority = 10; } // Register class for all scalar registers (SGPRs + Special Registers) def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, - (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI)> { + (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI), Reg32> { let AllocationPriority = 10; } def SRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32, - (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI, LDS_DIRECT_CLASS)> { + (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI, LDS_DIRECT_CLASS), + Reg32> { let isAllocatable = 0; } -def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32, (add SGPR_64Regs)> { +def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32, + (add SGPR_64Regs), Reg64> { let CopyCost = 1; let AllocationPriority = 11; } // CCR (call clobbered registers) SGPR 64-bit registers -def CCR_SGPR_64 : RegisterClass<"AMDGPU", SGPR_64.RegTypes, 32, (add (trunc SGPR_64, 16))> { +def CCR_SGPR_64 : RegisterClass<"AMDGPU", SGPR_64.RegTypes, 32, + (add (trunc SGPR_64, 16)), Reg64> { let CopyCost = SGPR_64.CopyCost; let AllocationPriority = SGPR_64.AllocationPriority; } -def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32, (add TTMP_64Regs)> { +def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32, + (add TTMP_64Regs)> { let isAllocatable = 0; } def SReg_64_XEXEC : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32, - (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, TTMP_64, TBA, TMA)> { + (add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, TTMP_64, TBA, TMA), Reg64> { let CopyCost = 1; let AllocationPriority = 13; } def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32, - (add SReg_64_XEXEC, EXEC)> { + (add SReg_64_XEXEC, EXEC), Reg64> { let CopyCost = 1; let AllocationPriority = 13; } @@ -686,25 +722,27 @@ let CopyCost = 2 in { // There are no 3-component scalar instructions, but this is needed // for symmetry with VGPRs. def SGPR_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32, - (add SGPR_96Regs)> { + (add SGPR_96Regs), Reg96> { let AllocationPriority = 14; } def SReg_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32, - (add SGPR_96)> { + (add SGPR_96), Reg96> { let AllocationPriority = 14; } -def SGPR_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64], 32, (add SGPR_128Regs)> { +def SGPR_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64], 32, + (add SGPR_128Regs), Reg128> { let AllocationPriority = 15; } -def TTMP_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64], 32, (add TTMP_128Regs)> { +def TTMP_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64], 32, + (add TTMP_128Regs)> { let isAllocatable = 0; } def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, - (add SGPR_128, TTMP_128)> { + (add SGPR_128, TTMP_128), Reg128> { let AllocationPriority = 15; } @@ -713,16 +751,17 @@ def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, // There are no 5-component scalar instructions, but this is needed // for symmetry with VGPRs. def SGPR_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32, - (add SGPR_160Regs)> { + (add SGPR_160Regs), Reg160> { let AllocationPriority = 16; } def SReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32, - (add SGPR_160)> { + (add SGPR_160), Reg160> { let AllocationPriority = 16; } -def SGPR_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256Regs)> { +def SGPR_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256Regs), + Reg256> { let AllocationPriority = 17; } @@ -731,44 +770,48 @@ def TTMP_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add TTMP_256Regs)> { } def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, - (add SGPR_256, TTMP_256)> { + (add SGPR_256, TTMP_256), Reg256> { // Requires 4 s_mov_b64 to copy let CopyCost = 4; let AllocationPriority = 17; } -def SGPR_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add SGPR_512Regs)> { +def SGPR_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, + (add SGPR_512Regs), Reg512> { let AllocationPriority = 18; } -def TTMP_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add TTMP_512Regs)> { +def TTMP_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, + (add TTMP_512Regs)> { let isAllocatable = 0; } def SReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, - (add SGPR_512, TTMP_512)> { + (add SGPR_512, TTMP_512), Reg512> { // Requires 8 s_mov_b64 to copy let CopyCost = 8; let AllocationPriority = 18; } def VRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, - (add VGPR_32, LDS_DIRECT_CLASS)> { + (add VGPR_32, LDS_DIRECT_CLASS), Reg32> { let isAllocatable = 0; } -def SGPR_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32, (add SGPR_1024Regs)> { +def SGPR_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32, + (add SGPR_1024Regs), Reg1024> { let AllocationPriority = 19; } def SReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32, - (add SGPR_1024)> { + (add SGPR_1024), Reg1024> { let CopyCost = 16; let AllocationPriority = 19; } // Register class for all vector registers (VGPRs + Interploation Registers) -def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32, (add VGPR_64)> { +def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32, + (add VGPR_64), Reg64> { let Size = 64; // Requires 2 v_mov_b32 to copy @@ -776,7 +819,7 @@ def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32 let AllocationPriority = 2; } -def VReg_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32, (add VGPR_96)> { +def VReg_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32, (add VGPR_96), Reg96> { let Size = 96; // Requires 3 v_mov_b32 to copy @@ -784,7 +827,8 @@ def VReg_96 : RegisterClass<"AMDGPU", [v3i32, v3f32], 32, (add VGPR_96)> { let AllocationPriority = 3; } -def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, (add VGPR_128)> { +def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, + (add VGPR_128), Reg128> { let Size = 128; // Requires 4 v_mov_b32 to copy @@ -792,7 +836,8 @@ def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, (add VG let AllocationPriority = 4; } -def VReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32, (add VGPR_160)> { +def VReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32, + (add VGPR_160), Reg160> { let Size = 160; // Requires 5 v_mov_b32 to copy @@ -800,32 +845,37 @@ def VReg_160 : RegisterClass<"AMDGPU", [v5i32, v5f32], 32, (add VGPR_160)> { let AllocationPriority = 5; } -def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add VGPR_256)> { +def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, + (add VGPR_256), Reg256> { let Size = 256; let CopyCost = 8; let AllocationPriority = 6; } -def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add VGPR_512)> { +def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, + (add VGPR_512), Reg512> { let Size = 512; let CopyCost = 16; let AllocationPriority = 7; } -def VReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32, (add VGPR_1024)> { +def VReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32, + (add VGPR_1024), Reg1024> { let Size = 1024; let CopyCost = 32; let AllocationPriority = 8; } -def AReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32, (add AGPR_64)> { +def AReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32, v4f16, v4i16], 32, + (add AGPR_64), Reg64> { let Size = 64; let CopyCost = 5; let AllocationPriority = 2; } -def AReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, (add AGPR_128)> { +def AReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, + (add AGPR_128), Reg128> { let Size = 128; // Requires 4 v_accvgpr_write and 4 v_accvgpr_read to copy + burn 1 vgpr @@ -833,38 +883,41 @@ def AReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, (add AG let AllocationPriority = 4; } -def AReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add AGPR_512)> { +def AReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, + (add AGPR_512), Reg512> { let Size = 512; let CopyCost = 33; let AllocationPriority = 7; } -// TODO: add v32f32 value type -def AReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32, (add AGPR_1024)> { +def AReg_1024 : RegisterClass<"AMDGPU", [v32i32, v32f32], 32, + (add AGPR_1024), Reg1024> { let Size = 1024; let CopyCost = 65; let AllocationPriority = 8; } -def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)> { +def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32), Reg32> { let Size = 32; } def VS_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, - (add VGPR_32, SReg_32, LDS_DIRECT_CLASS)> { + (add VGPR_32, SReg_32, LDS_DIRECT_CLASS), Reg32> { let isAllocatable = 0; } -def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 32, (add VReg_64, SReg_64)> { +def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 32, (add VReg_64, SReg_64), + Reg64> { let isAllocatable = 0; } def AV_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32, - (add AGPR_32, VGPR_32)> { + (add AGPR_32, VGPR_32), Reg32> { let isAllocatable = 0; } -def AV_64 : RegisterClass<"AMDGPU", [i64, f64, v4f16], 32, (add AReg_64, VReg_64)> { +def AV_64 : RegisterClass<"AMDGPU", [i64, f64, v4f16], 32, + (add AReg_64, VReg_64), Reg64> { let isAllocatable = 0; } From 9304e59c013122aeb8f5bad8f85a89770301b8f1 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Tue, 16 Jul 2019 23:44:33 +0000 Subject: [PATCH 308/451] Fix OpenCLCXX test on 32-bit Windows where thiscall is present llvm-svn: 366284 --- clang/test/SemaOpenCLCXX/address-space-deduction.cl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/clang/test/SemaOpenCLCXX/address-space-deduction.cl b/clang/test/SemaOpenCLCXX/address-space-deduction.cl index f66d224e25414..08668951dbca6 100644 --- a/clang/test/SemaOpenCLCXX/address-space-deduction.cl +++ b/clang/test/SemaOpenCLCXX/address-space-deduction.cl @@ -30,8 +30,8 @@ struct c2 { template struct x1 { -//CHECK: -CXXMethodDecl {{.*}} operator= 'x1 &(const x1 &) __generic' -//CHECK: -CXXMethodDecl {{.*}} operator= '__generic x1 &(const __generic x1 &) __generic' +//CHECK: -CXXMethodDecl {{.*}} operator= 'x1 &(const x1 &){{( __attribute__.*)?}} __generic' +//CHECK: -CXXMethodDecl {{.*}} operator= '__generic x1 &(const __generic x1 &){{( __attribute__.*)?}} __generic' x1& operator=(const x1& xx) { y = xx.y; return *this; @@ -41,8 +41,8 @@ struct x1 { template struct x2 { -//CHECK: -CXXMethodDecl {{.*}} foo 'void (x1 *) __generic' -//CHECK: -CXXMethodDecl {{.*}} foo 'void (__generic x1 *) __generic' +//CHECK: -CXXMethodDecl {{.*}} foo 'void (x1 *){{( __attribute__.*)?}} __generic' +//CHECK: -CXXMethodDecl {{.*}} foo 'void (__generic x1 *){{( __attribute__.*)?}} __generic' void foo(x1* xx) { m[0] = *xx; } @@ -57,9 +57,9 @@ void bar(__global x1 *xx, __global x2 *bar) { template class x3 : public T { public: - //CHECK: -CXXConstructorDecl {{.*}} x3 'void (const x3 &) __generic' + //CHECK: -CXXConstructorDecl {{.*}} x3 'void (const x3 &){{( __attribute__.*)?}} __generic' x3(const x3 &t); }; -//CHECK: -CXXConstructorDecl {{.*}} x3 'void (const x3 &) __generic' +//CHECK: -CXXConstructorDecl {{.*}} x3 'void (const x3 &){{( __attribute__.*)?}} __generic' template x3::x3(const x3 &t) {} From 98a48794ecdfb131c73abc7ef0dd45435863ff4b Mon Sep 17 00:00:00 2001 From: Nathan Lanza Date: Tue, 16 Jul 2019 23:54:17 +0000 Subject: [PATCH 309/451] Don't require python exe and lib versions to match while crosscompiling Summary: While cross compiling, the python executable is used to run a handful of scripts while the libraries are linked and headers are included. Theoretically it's possible for the versions to match completely, but requiring the build to match 2.7.10 to 2.7.15 is unnecessary. Subscribers: mgorny Differential Revision: https://reviews.llvm.org/D64822 llvm-svn: 366285 --- lldb/cmake/modules/LLDBConfig.cmake | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lldb/cmake/modules/LLDBConfig.cmake b/lldb/cmake/modules/LLDBConfig.cmake index ad83153a18898..ccb5de568b43c 100644 --- a/lldb/cmake/modules/LLDBConfig.cmake +++ b/lldb/cmake/modules/LLDBConfig.cmake @@ -198,7 +198,8 @@ if (NOT LLDB_DISABLE_PYTHON) find_package(PythonLibs REQUIRED) endif() - if (NOT PYTHON_VERSION_STRING VERSION_EQUAL PYTHONLIBS_VERSION_STRING) + if (NOT PYTHON_VERSION_STRING VERSION_EQUAL PYTHONLIBS_VERSION_STRING AND + NOT CMAKE_CROSSCOMPILING) message(FATAL_ERROR "Found incompatible Python interpreter (${PYTHON_VERSION_STRING}) and Python libraries (${PYTHONLIBS_VERSION_STRING})") endif() From e56865d40c1691fdd215179c827be66f0cd5bae0 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 17 Jul 2019 00:01:03 +0000 Subject: [PATCH 310/451] AMDGPU: Add some missing builtins llvm-svn: 366286 --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 17 +++++ clang/lib/CodeGen/CGBuiltin.cpp | 6 ++ .../CodeGenOpenCL/builtins-amdgcn-gfx10.cl | 24 +++++++ clang/test/CodeGenOpenCL/builtins-amdgcn.cl | 64 +++++++++++++++++++ .../builtins-amdgcn-error-gfx10-param.cl | 18 ++++++ .../SemaOpenCL/builtins-amdgcn-error-gfx10.cl | 15 +++++ 6 files changed, 144 insertions(+) create mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl create mode 100644 clang/test/SemaOpenCL/builtins-amdgcn-error-gfx10-param.cl create mode 100644 clang/test/SemaOpenCL/builtins-amdgcn-error-gfx10.cl diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index e882d3b87c666..2f8fb9000a76a 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -108,6 +108,16 @@ BUILTIN(__builtin_amdgcn_ds_fminf, "ff*3fIiIiIb", "n") BUILTIN(__builtin_amdgcn_ds_fmaxf, "ff*3fIiIiIb", "n") BUILTIN(__builtin_amdgcn_ds_append, "ii*3", "n") BUILTIN(__builtin_amdgcn_ds_consume, "ii*3", "n") +BUILTIN(__builtin_amdgcn_alignbit, "UiUiUiUi", "nc") +BUILTIN(__builtin_amdgcn_alignbyte, "UiUiUiUi", "nc") +BUILTIN(__builtin_amdgcn_ubfe, "UiUiUiUi", "nc") +BUILTIN(__builtin_amdgcn_sbfe, "UiUiUiUi", "nc") +BUILTIN(__builtin_amdgcn_cvt_pkrtz, "E2hff", "nc") +BUILTIN(__builtin_amdgcn_cvt_pknorm_i16, "E2sff", "nc") +BUILTIN(__builtin_amdgcn_cvt_pknorm_u16, "E2Usff", "nc") +BUILTIN(__builtin_amdgcn_cvt_pk_i16, "E2sii", "nc") +BUILTIN(__builtin_amdgcn_cvt_pk_u16, "E2UsUiUi", "nc") +BUILTIN(__builtin_amdgcn_cvt_pk_u8_f32, "UifUiUi", "nc") //===----------------------------------------------------------------------===// // CI+ only builtins. @@ -162,6 +172,13 @@ TARGET_BUILTIN(__builtin_amdgcn_udot4, "UiUiUiUiIb", "nc", "dot2-insts") TARGET_BUILTIN(__builtin_amdgcn_sdot8, "SiSiSiSiIb", "nc", "dot1-insts") TARGET_BUILTIN(__builtin_amdgcn_udot8, "UiUiUiUiIb", "nc", "dot2-insts") +//===----------------------------------------------------------------------===// +// GFX10+ only builtins. +//===----------------------------------------------------------------------===// +TARGET_BUILTIN(__builtin_amdgcn_permlane16, "UiUiUiUiUiIbIb", "nc", "gfx10-insts") +TARGET_BUILTIN(__builtin_amdgcn_permlanex16, "UiUiUiUiUiIbIb", "nc", "gfx10-insts") +TARGET_BUILTIN(__builtin_amdgcn_mov_dpp8, "UiUiIUi", "nc", "gfx10-insts") + //===----------------------------------------------------------------------===// // Special builtins. //===----------------------------------------------------------------------===// diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 1658be5a88e02..a300bab49f9c8 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -12679,6 +12679,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_ds_swizzle: return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle); + case AMDGPU::BI__builtin_amdgcn_mov_dpp8: + return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_mov_dpp8); case AMDGPU::BI__builtin_amdgcn_mov_dpp: case AMDGPU::BI__builtin_amdgcn_update_dpp: { llvm::SmallVector Args; @@ -12744,6 +12746,10 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract); case AMDGPU::BI__builtin_amdgcn_lerp: return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp); + case AMDGPU::BI__builtin_amdgcn_ubfe: + return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_ubfe); + case AMDGPU::BI__builtin_amdgcn_sbfe: + return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_sbfe); case AMDGPU::BI__builtin_amdgcn_uicmp: case AMDGPU::BI__builtin_amdgcn_uicmpl: case AMDGPU::BI__builtin_amdgcn_sicmp: diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl new file mode 100644 index 0000000000000..3921cb90c3a57 --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl @@ -0,0 +1,24 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -S -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1011 -S -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -S -emit-llvm -o - %s | FileCheck %s + +typedef unsigned int uint; + +// CHECK-LABEL: @test_permlane16( +// CHECK: call i32 @llvm.amdgcn.permlane16(i32 %a, i32 %b, i32 %c, i32 %d, i1 true, i1 true) +void test_permlane16(global uint* out, uint a, uint b, uint c, uint d) { + *out = __builtin_amdgcn_permlane16(a, b, c, d, 1, 1); +} + +// CHECK-LABEL: @test_permlanex16( +// CHECK: call i32 @llvm.amdgcn.permlanex16(i32 %a, i32 %b, i32 %c, i32 %d, i1 true, i1 true) +void test_permlanex16(global uint* out, uint a, uint b, uint c, uint d) { + *out = __builtin_amdgcn_permlanex16(a, b, c, d, 1, 1); +} + +// CHECK-LABEL: @test_mov_dpp8( +// CHECK: call i32 @llvm.amdgcn.mov.dpp8.i32(i32 %a, i32 1) +void test_mov_dpp8(global uint* out, uint a) { + *out = __builtin_amdgcn_mov_dpp8(a, 1); +} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index e4c40d92266fd..bbae5ea24be0a 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -5,6 +5,10 @@ typedef unsigned long ulong; typedef unsigned int uint; +typedef unsigned short ushort; +typedef half __attribute__((ext_vector_type(2))) half2; +typedef short __attribute__((ext_vector_type(2))) short2; +typedef ushort __attribute__((ext_vector_type(2))) ushort2; // CHECK-LABEL: @test_div_scale_f64 // CHECK: call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) @@ -590,6 +594,66 @@ kernel void test_mbcnt_hi(global uint* out, uint src0, uint src1) { *out = __builtin_amdgcn_mbcnt_hi(src0, src1); } +// CHECK-LABEL: @test_alignbit( +// CHECK: tail call i32 @llvm.amdgcn.alignbit(i32 %src0, i32 %src1, i32 %src2) +kernel void test_alignbit(global uint* out, uint src0, uint src1, uint src2) { + *out = __builtin_amdgcn_alignbit(src0, src1, src2); +} + +// CHECK-LABEL: @test_alignbyte( +// CHECK: tail call i32 @llvm.amdgcn.alignbyte(i32 %src0, i32 %src1, i32 %src2) +kernel void test_alignbyte(global uint* out, uint src0, uint src1, uint src2) { + *out = __builtin_amdgcn_alignbyte(src0, src1, src2); +} + +// CHECK-LABEL: @test_ubfe( +// CHECK: tail call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src2) +kernel void test_ubfe(global uint* out, uint src0, uint src1, uint src2) { + *out = __builtin_amdgcn_ubfe(src0, src1, src2); +} + +// CHECK-LABEL: @test_sbfe( +// CHECK: tail call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 %src2) +kernel void test_sbfe(global uint* out, uint src0, uint src1, uint src2) { + *out = __builtin_amdgcn_sbfe(src0, src1, src2); +} + +// CHECK-LABEL: @test_cvt_pkrtz( +// CHECK: tail call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %src0, float %src1) +kernel void test_cvt_pkrtz(global half2* out, float src0, float src1) { + *out = __builtin_amdgcn_cvt_pkrtz(src0, src1); +} + +// CHECK-LABEL: @test_cvt_pknorm_i16( +// CHECK: tail call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %src0, float %src1) +kernel void test_cvt_pknorm_i16(global short2* out, float src0, float src1) { + *out = __builtin_amdgcn_cvt_pknorm_i16(src0, src1); +} + +// CHECK-LABEL: @test_cvt_pknorm_u16( +// CHECK: tail call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %src0, float %src1) +kernel void test_cvt_pknorm_u16(global ushort2* out, float src0, float src1) { + *out = __builtin_amdgcn_cvt_pknorm_u16(src0, src1); +} + +// CHECK-LABEL: @test_cvt_pk_i16( +// CHECK: tail call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 %src0, i32 %src1) +kernel void test_cvt_pk_i16(global short2* out, int src0, int src1) { + *out = __builtin_amdgcn_cvt_pk_i16(src0, src1); +} + +// CHECK-LABEL: @test_cvt_pk_u16( +// CHECK: tail call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 %src0, i32 %src1) +kernel void test_cvt_pk_u16(global ushort2* out, uint src0, uint src1) { + *out = __builtin_amdgcn_cvt_pk_u16(src0, src1); +} + +// CHECK-LABEL: @test_cvt_pk_u8_f32 +// CHECK: tail call i32 @llvm.amdgcn.cvt.pk.u8.f32(float %src0, i32 %src1, i32 %src2) +kernel void test_cvt_pk_u8_f32(global uint* out, float src0, uint src1, uint src2) { + *out = __builtin_amdgcn_cvt_pk_u8_f32(src0, src1, src2); +} + // CHECK-DAG: [[$WI_RANGE]] = !{i32 0, i32 1024} // CHECK-DAG: attributes #[[$NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly } // CHECK-DAG: attributes #[[$READ_EXEC_ATTRS]] = { convergent } diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx10-param.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx10-param.cl new file mode 100644 index 0000000000000..75d9cd3831c55 --- /dev/null +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx10-param.cl @@ -0,0 +1,18 @@ +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -verify -S -o - %s + +typedef unsigned int uint; + + +void test_permlane16(global uint* out, uint a, uint b, uint c, uint d, uint e) { + *out = __builtin_amdgcn_permlane16(a, b, c, d, e, 1); // expected-error{{argument to '__builtin_amdgcn_permlane16' must be a constant integer}} + *out = __builtin_amdgcn_permlane16(a, b, c, d, 1, e); // expected-error{{argument to '__builtin_amdgcn_permlane16' must be a constant integer}} +} + +void test_permlanex16(global uint* out, uint a, uint b, uint c, uint d, uint e) { + *out = __builtin_amdgcn_permlanex16(a, b, c, d, e, 1); // expected-error{{argument to '__builtin_amdgcn_permlanex16' must be a constant integer}} + *out = __builtin_amdgcn_permlanex16(a, b, c, d, 1, e); // expected-error{{argument to '__builtin_amdgcn_permlanex16' must be a constant integer}} +} + +void test_mov_dpp8(global uint* out, uint a, uint b) { + *out = __builtin_amdgcn_mov_dpp8(a, b); // expected-error{{argument to '__builtin_amdgcn_mov_dpp8' must be a constant integer}} +} diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx10.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx10.cl new file mode 100644 index 0000000000000..02c8dc8c1339e --- /dev/null +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx10.cl @@ -0,0 +1,15 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -verify -S -o - %s +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu hawaii -verify -S -o - %s +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu fiji -verify -S -o - %s +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx900 -verify -S -o - %s +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx908 -verify -S -o - %s + +typedef unsigned int uint; + + +void test(global uint* out, uint a, uint b, uint c, uint d) { + *out = __builtin_amdgcn_permlane16(a, b, c, d, 1, 1); // expected-error {{'__builtin_amdgcn_permlane16' needs target feature gfx10-insts}} + *out = __builtin_amdgcn_permlanex16(a, b, c, d, 1, 1); // expected-error {{'__builtin_amdgcn_permlanex16' needs target feature gfx10-insts}} + *out = __builtin_amdgcn_mov_dpp8(a, 1); // expected-error {{'__builtin_amdgcn_mov_dpp8' needs target feature gfx10-insts}} +} From 420f3f6444ac393a3c65382d4c9a7b4f2bb23ed4 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Wed, 17 Jul 2019 01:19:30 +0000 Subject: [PATCH 311/451] gn build: Merge r366216 llvm-svn: 366288 --- llvm/utils/gn/secondary/llvm/lib/Remarks/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/Remarks/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Remarks/BUILD.gn index 59d15041a5268..19510c1629d36 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Remarks/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Remarks/BUILD.gn @@ -6,6 +6,7 @@ static_library("Remarks") { sources = [ "Remark.cpp", + "RemarkFormat.cpp", "RemarkParser.cpp", "RemarkStringTable.cpp", "YAMLRemarkParser.cpp", From 67cf3d61ee3c12595afa264c6c079382cb33f8de Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Wed, 17 Jul 2019 01:21:14 +0000 Subject: [PATCH 312/451] gn build: Merge r366265 llvm-svn: 366289 --- .../secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn index 2bd2a69b4e6ae..b82db708cc894 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn @@ -16,6 +16,7 @@ static_library("readability") { "BracesAroundStatementsCheck.cpp", "ConstReturnTypeCheck.cpp", "ContainerSizeEmptyCheck.cpp", + "ConvertMemberFunctionsToStatic.cpp", "DeleteNullPointerCheck.cpp", "DeletedDefaultCheck.cpp", "ElseAfterReturnCheck.cpp", From 4c77a696ae4d42d791b7443ce387d9f42197e10d Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Wed, 17 Jul 2019 07:02:02 +0000 Subject: [PATCH 313/451] Update email address. llvm-svn: 366291 --- llvm/CREDITS.TXT | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/CREDITS.TXT b/llvm/CREDITS.TXT index dab633c7e3b11..e998fa6bb257d 100644 --- a/llvm/CREDITS.TXT +++ b/llvm/CREDITS.TXT @@ -412,6 +412,10 @@ W: http://vladimir_prus.blogspot.com E: ghost@cs.msu.su D: Made inst_iterator behave like a proper iterator, LowerConstantExprs pass +N: QIU Chaofan +E: qiucofan@cn.ibm.com +D: PowerPC Backend Developer + N: Kalle Raiskila E: kalle.rasikila@nokia.com D: Some bugfixes to CellSPU @@ -530,10 +534,6 @@ N: Zheng Chen E: czhengsz@cn.ibm.com D: PowerPC Backend Developer -N: Qiu Chaofan -E: qiucf@cn.ibm.com -D: PowerPC Backend Developer - N: Djordje Todorovic E: djordje.todorovic@rt-rk.com D: Debug Information From e574f8b3d8910205ff89cf3562088ef50f384638 Mon Sep 17 00:00:00 2001 From: Alex Langford Date: Wed, 17 Jul 2019 07:03:17 +0000 Subject: [PATCH 314/451] [Target][NFCI] Rename variable This variable doesn't have anything to do with clang. llvm-svn: 366292 --- lldb/source/Target/ABI.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lldb/source/Target/ABI.cpp b/lldb/source/Target/ABI.cpp index d71cee7f58054..28cd9aec665cb 100644 --- a/lldb/source/Target/ABI.cpp +++ b/lldb/source/Target/ABI.cpp @@ -124,11 +124,11 @@ ValueObjectSP ABI::GetReturnValueObject(Thread &thread, CompilerType &ast_type, return_valobj_sp = const_valobj_sp; - ExpressionVariableSP clang_expr_variable_sp( + ExpressionVariableSP expr_variable_sp( persistent_expression_state->CreatePersistentVariable( return_valobj_sp)); - assert(clang_expr_variable_sp); + assert(expr_variable_sp); // Set flags and live data as appropriate @@ -141,21 +141,21 @@ ValueObjectSP ABI::GetReturnValueObject(Thread &thread, CompilerType &ast_type, break; case Value::eValueTypeScalar: case Value::eValueTypeVector: - clang_expr_variable_sp->m_flags |= + expr_variable_sp->m_flags |= ExpressionVariable::EVIsFreezeDried; - clang_expr_variable_sp->m_flags |= + expr_variable_sp->m_flags |= ExpressionVariable::EVIsLLDBAllocated; - clang_expr_variable_sp->m_flags |= + expr_variable_sp->m_flags |= ExpressionVariable::EVNeedsAllocation; break; case Value::eValueTypeLoadAddress: - clang_expr_variable_sp->m_live_sp = live_valobj_sp; - clang_expr_variable_sp->m_flags |= + expr_variable_sp->m_live_sp = live_valobj_sp; + expr_variable_sp->m_flags |= ExpressionVariable::EVIsProgramReference; break; } - return_valobj_sp = clang_expr_variable_sp->GetValueObject(); + return_valobj_sp = expr_variable_sp->GetValueObject(); } return return_valobj_sp; } From 06bf5d853881420757146602521b288afd943f3c Mon Sep 17 00:00:00 2001 From: Nathan Lanza Date: Wed, 17 Jul 2019 07:05:49 +0000 Subject: [PATCH 315/451] [NativePDB] Make GetTranslationUnitDecl return an lldb CompilerDeclCtx Summary: We intend to make PdbAstBuilder abstract and implement PdbAstBuilderClang along with any other languages that wish to use PDBs. This is the first step. Differential Revision: https://reviews.llvm.org/D64852 llvm-svn: 366293 --- .../SymbolFile/NativePDB/PdbAstBuilder.cpp | 19 ++++++++++--------- .../SymbolFile/NativePDB/PdbAstBuilder.h | 2 +- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp index e8fd59c7b7453..eb8da2a51b6cb 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp @@ -214,8 +214,8 @@ PdbAstBuilder::PdbAstBuilder(ObjectFile &obj, PdbIndex &index) BuildParentMap(); } -clang::DeclContext &PdbAstBuilder::GetTranslationUnitDecl() { - return *m_clang.GetTranslationUnitDecl(); +lldb_private::CompilerDeclContext PdbAstBuilder::GetTranslationUnitDecl() { + return ToCompilerDeclContext(*m_clang.GetTranslationUnitDecl()); } std::pair @@ -492,7 +492,7 @@ clang::Decl *PdbAstBuilder::GetOrCreateDeclForUid(PdbSymUid uid) { clang::DeclContext *PdbAstBuilder::GetOrCreateDeclContextForUid(PdbSymUid uid) { if (uid.kind() == PdbSymUidKind::CompilandSym) { if (uid.asCompilandSym().offset == 0) - return &GetTranslationUnitDecl(); + return FromCompilerDeclContext(GetTranslationUnitDecl()); } clang::Decl *decl = GetOrCreateDeclForUid(uid); @@ -507,7 +507,7 @@ PdbAstBuilder::CreateDeclInfoForUndecoratedName(llvm::StringRef name) { MSVCUndecoratedNameParser parser(name); llvm::ArrayRef specs = parser.GetSpecifiers(); - clang::DeclContext *context = &GetTranslationUnitDecl(); + auto context = FromCompilerDeclContext(GetTranslationUnitDecl()); llvm::StringRef uname = specs.back().GetBaseName(); specs = specs.drop_back(); @@ -548,7 +548,7 @@ PdbAstBuilder::GetParentDeclContextForSymbol(const CVSymbol &sym) { StringView name{pub->Name.begin(), pub->Name.size()}; llvm::ms_demangle::SymbolNode *node = demangler.parse(name); if (!node) - return &GetTranslationUnitDecl(); + return FromCompilerDeclContext(GetTranslationUnitDecl()); llvm::ArrayRef name_components{ node->Name->Components->Nodes, node->Name->Components->Count - 1}; @@ -569,7 +569,7 @@ PdbAstBuilder::GetParentDeclContextForSymbol(const CVSymbol &sym) { } // It's not a type. It must be a series of namespaces. - clang::DeclContext *context = &GetTranslationUnitDecl(); + auto context = FromCompilerDeclContext(GetTranslationUnitDecl()); while (!name_components.empty()) { std::string ns = name_components.front()->toString(); context = GetOrCreateNamespaceDecl(ns.c_str(), *context); @@ -597,7 +597,7 @@ clang::DeclContext *PdbAstBuilder::GetParentDeclContext(PdbSymUid uid) { PdbTypeSymId type_id = uid.asTypeSym(); auto iter = m_parent_types.find(type_id.index); if (iter == m_parent_types.end()) - return &GetTranslationUnitDecl(); + return FromCompilerDeclContext(GetTranslationUnitDecl()); return GetOrCreateDeclContextForUid(PdbTypeSymId(iter->second)); } case PdbSymUidKind::FieldListMember: @@ -635,7 +635,7 @@ clang::DeclContext *PdbAstBuilder::GetParentDeclContext(PdbSymUid uid) { default: break; } - return &GetTranslationUnitDecl(); + return FromCompilerDeclContext(GetTranslationUnitDecl()); } bool PdbAstBuilder::CompleteType(clang::QualType qt) { @@ -866,7 +866,8 @@ clang::VarDecl *PdbAstBuilder::GetOrCreateVariableDecl(PdbGlobalSymId var_id) { return llvm::dyn_cast(decl); CVSymbol sym = m_index.ReadSymbolRecord(var_id); - return CreateVariableDecl(PdbSymUid(var_id), sym, GetTranslationUnitDecl()); + auto context = FromCompilerDeclContext(GetTranslationUnitDecl()); + return CreateVariableDecl(PdbSymUid(var_id), sym, *context); } clang::TypedefNameDecl * diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.h b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.h index e4241594845a9..8331abeaf47da 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.h +++ b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.h @@ -53,7 +53,7 @@ class PdbAstBuilder { // Constructors and Destructors PdbAstBuilder(ObjectFile &obj, PdbIndex &index); - clang::DeclContext &GetTranslationUnitDecl(); + lldb_private::CompilerDeclContext GetTranslationUnitDecl(); clang::Decl *GetOrCreateDeclForUid(PdbSymUid uid); clang::DeclContext *GetOrCreateDeclContextForUid(PdbSymUid uid); From 587d13d4c23500f517a99b8c0517e5e2b5aa0889 Mon Sep 17 00:00:00 2001 From: Serguei Katkov Date: Wed, 17 Jul 2019 07:09:20 +0000 Subject: [PATCH 316/451] [LoopInfo] Fix getUniqueNonLatchExitBlocks It is possible that exit block has two predecessors and one of them is a latch block while another is not. Current algorithm is based on the assumption that all exits are dedicated and therefore we can check only first predecessor of loop exit to find all unique exits. However if we do not consider latch block and it is first predecessor of some exit then this exit will be found. Regression test is added. As a side effect of algorithm re-writing, the restriction that all exits are dedicated is eliminated. Reviewers: reames, fhahn, efriedma Reviewed By: efriedma Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D64787 llvm-svn: 366294 --- llvm/include/llvm/Analysis/LoopInfo.h | 4 -- llvm/include/llvm/Analysis/LoopInfoImpl.h | 47 ++++------------------- llvm/unittests/Analysis/LoopInfoTest.cpp | 43 +++++++++++++++++++++ 3 files changed, 50 insertions(+), 44 deletions(-) diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h index f8f0a0c339d58..584eb3a8c854c 100644 --- a/llvm/include/llvm/Analysis/LoopInfo.h +++ b/llvm/include/llvm/Analysis/LoopInfo.h @@ -270,16 +270,12 @@ template class LoopBase { /// Return all unique successor blocks of this loop. /// These are the blocks _outside of the current loop_ which are branched to. - /// This assumes that loop exits are in canonical form, i.e. all exits are - /// dedicated exits. void getUniqueExitBlocks(SmallVectorImpl &ExitBlocks) const; /// Return all unique successor blocks of this loop except successors from /// Latch block are not considered. If the exit comes from Latch has also /// non Latch predecessor in a loop it will be added to ExitBlocks. /// These are the blocks _outside of the current loop_ which are branched to. - /// This assumes that loop exits are in canonical form, i.e. all exits are - /// dedicated exits. void getUniqueNonLatchExitBlocks(SmallVectorImpl &ExitBlocks) const; /// If getUniqueExitBlocks would return exactly one block, return that block. diff --git a/llvm/include/llvm/Analysis/LoopInfoImpl.h b/llvm/include/llvm/Analysis/LoopInfoImpl.h index c9f690dac65eb..4c33dac9e21e1 100644 --- a/llvm/include/llvm/Analysis/LoopInfoImpl.h +++ b/llvm/include/llvm/Analysis/LoopInfoImpl.h @@ -101,47 +101,14 @@ template void getUniqueExitBlocksHelper(const LoopT *L, SmallVectorImpl &ExitBlocks, PredicateT Pred) { - typedef GraphTraits BlockTraits; - typedef GraphTraits> InvBlockTraits; - - assert(L->hasDedicatedExits() && - "getUniqueExitBlocks assumes the loop has canonical form exits!"); - - SmallVector SwitchExitBlocks; + assert(!L->isInvalid() && "Loop not in a valid state!"); + SmallPtrSet Visited; auto Filtered = make_filter_range(L->blocks(), Pred); - for (BlockT *Block : Filtered) { - SwitchExitBlocks.clear(); - for (BlockT *Successor : children(Block)) { - // If block is inside the loop then it is not an exit block. - if (L->contains(Successor)) - continue; - - BlockT *FirstPred = *InvBlockTraits::child_begin(Successor); - - // If current basic block is this exit block's first predecessor then only - // insert exit block in to the output ExitBlocks vector. This ensures that - // same exit block is not inserted twice into ExitBlocks vector. - if (Block != FirstPred) - continue; - - // If a terminator has more then two successors, for example SwitchInst, - // then it is possible that there are multiple edges from current block to - // one exit block. - if (std::distance(BlockTraits::child_begin(Block), - BlockTraits::child_end(Block)) <= 2) { - ExitBlocks.push_back(Successor); - continue; - } - - // In case of multiple edges from current block to exit block, collect - // only one edge in ExitBlocks. Use switchExitBlocks to keep track of - // duplicate edges. - if (!is_contained(SwitchExitBlocks, Successor)) { - SwitchExitBlocks.push_back(Successor); - ExitBlocks.push_back(Successor); - } - } - } + for (BlockT *BB : Filtered) + for (BlockT *Successor : children(BB)) + if (!L->contains(Successor)) + if (Visited.insert(Successor).second) + ExitBlocks.push_back(Successor); } template diff --git a/llvm/unittests/Analysis/LoopInfoTest.cpp b/llvm/unittests/Analysis/LoopInfoTest.cpp index 953a72aee8e36..4f0047f2fd7fc 100644 --- a/llvm/unittests/Analysis/LoopInfoTest.cpp +++ b/llvm/unittests/Analysis/LoopInfoTest.cpp @@ -1156,3 +1156,46 @@ TEST(LoopInfoTest, LoopUniqueExitBlocks) { EXPECT_TRUE(Exits.size() == 1); }); } + +// Regression test for getUniqueNonLatchExitBlocks functions. +// It should detect the exit if it comes from both latch and non-latch blocks. +TEST(LoopInfoTest, LoopNonLatchUniqueExitBlocks) { + const char *ModuleStr = + "target datalayout = \"e-m:o-i64:64-f80:128-n8:16:32:64-S128\"\n" + "define void @foo(i32 %n, i1 %cond) {\n" + "entry:\n" + " br label %for.cond\n" + "for.cond:\n" + " %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]\n" + " %cmp = icmp slt i32 %i.0, %n\n" + " br i1 %cond, label %for.inc, label %for.end\n" + "for.inc:\n" + " %inc = add nsw i32 %i.0, 1\n" + " br i1 %cmp, label %for.cond, label %for.end, !llvm.loop !0\n" + "for.end:\n" + " ret void\n" + "}\n" + "!0 = distinct !{!0, !1}\n" + "!1 = !{!\"llvm.loop.distribute.enable\", i1 true}\n"; + + // Parse the module. + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleStr); + + runWithLoopInfo(*M, "foo", [&](Function &F, LoopInfo &LI) { + Function::iterator FI = F.begin(); + // First basic block is entry - skip it. + BasicBlock *Header = &*(++FI); + assert(Header->getName() == "for.cond"); + Loop *L = LI.getLoopFor(Header); + + SmallVector Exits; + // This loop has 1 unique exit. + L->getUniqueExitBlocks(Exits); + EXPECT_TRUE(Exits.size() == 1); + // And one unique non latch exit. + Exits.clear(); + L->getUniqueNonLatchExitBlocks(Exits); + EXPECT_TRUE(Exits.size() == 1); + }); +} From fc1c8f5d7d47139492ed9d6ef0df3c47eb1fbfd4 Mon Sep 17 00:00:00 2001 From: Alex Langford Date: Wed, 17 Jul 2019 07:13:42 +0000 Subject: [PATCH 317/451] [Target][NFCI] Remove commented out code llvm-svn: 366295 --- lldb/source/Target/ThreadPlanTracer.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/lldb/source/Target/ThreadPlanTracer.cpp b/lldb/source/Target/ThreadPlanTracer.cpp index 129c0896d491c..4e79b6b1e59d0 100644 --- a/lldb/source/Target/ThreadPlanTracer.cpp +++ b/lldb/source/Target/ThreadPlanTracer.cpp @@ -187,8 +187,6 @@ void ThreadPlanAssemblyTracer::Log() { for (int arg_index = 0; arg_index < num_args; ++arg_index) { Value value; value.SetValueType(Value::eValueTypeScalar); - // value.SetContext (Value::eContextTypeClangType, - // intptr_type.GetOpaqueQualType()); value.SetCompilerType(intptr_type); value_list.PushValue(value); } From 7f308af5eeea2d1b24aee0361d39dc43bac4cfe5 Mon Sep 17 00:00:00 2001 From: Simon Atanasyan Date: Wed, 17 Jul 2019 08:11:15 +0000 Subject: [PATCH 318/451] [mips] Support the "o" inline asm constraint As well as other LLVM targets we do not handle "offsettable" memory addresses in any special way. In other words, the "o" constraint is an exact equivalent of the "m" one. But some existing code require the "o" constraint support. This fixes PR42589. Differential Revision: https://reviews.llvm.org/D64792 llvm-svn: 366299 --- llvm/lib/Target/Mips/MipsISelLowering.h | 2 + llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp | 1 + .../CodeGen/Mips/inlineasm_constraint_o.ll | 61 +++++++++++++++++++ 3 files changed, 64 insertions(+) create mode 100644 llvm/test/CodeGen/Mips/inlineasm_constraint_o.ll diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h index 27bf18c8ba90d..2db60e9801f1b 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.h +++ b/llvm/lib/Target/Mips/MipsISelLowering.h @@ -653,6 +653,8 @@ class TargetRegisterClass; unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override { + if (ConstraintCode == "o") + return InlineAsm::Constraint_o; if (ConstraintCode == "R") return InlineAsm::Constraint_R; if (ConstraintCode == "ZC") diff --git a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index c50e4c215a4df..703f99f37dd1b 100644 --- a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -1237,6 +1237,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32)); return false; case InlineAsm::Constraint_m: + case InlineAsm::Constraint_o: if (selectAddrRegImm16(Op, Base, Offset)) { OutOps.push_back(Base); OutOps.push_back(Offset); diff --git a/llvm/test/CodeGen/Mips/inlineasm_constraint_o.ll b/llvm/test/CodeGen/Mips/inlineasm_constraint_o.ll new file mode 100644 index 0000000000000..de677cbcc6813 --- /dev/null +++ b/llvm/test/CodeGen/Mips/inlineasm_constraint_o.ll @@ -0,0 +1,61 @@ +; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s + +@data = global [8193 x i32] zeroinitializer + +define void @o(i32 *%p) nounwind { +entry: + ; CHECK-LABEL: o: + + call void asm sideeffect "lw $$1, $0", "*o,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 0)) + + ; CHECK: lw $[[BASEPTR:[0-9]+]], %got(data)( + ; CHECK: #APP + ; CHECK: lw $1, 0($[[BASEPTR]]) + ; CHECK: #NO_APP + + ret void +} + +define void @o_offset_4(i32 *%p) nounwind { +entry: + ; CHECK-LABEL: o_offset_4: + + call void asm sideeffect "lw $$1, $0", "*o,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 1)) + + ; CHECK: lw $[[BASEPTR:[0-9]+]], %got(data)( + ; CHECK: #APP + ; CHECK: lw $1, 4($[[BASEPTR]]) + ; CHECK: #NO_APP + + ret void +} + +define void @o_offset_32764(i32 *%p) nounwind { +entry: + ; CHECK-LABEL: o_offset_32764: + + call void asm sideeffect "lw $$1, $0", "*o,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 8191)) + + ; CHECK-DAG: lw $[[BASEPTR:[0-9]+]], %got(data)( + ; CHECK: #APP + ; CHECK: lw $1, 32764($[[BASEPTR]]) + ; CHECK: #NO_APP + + ret void +} + +define void @o_offset_32768(i32 *%p) nounwind { +entry: + ; CHECK-LABEL: o_offset_32768: + + call void asm sideeffect "lw $$1, $0", "*o,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 8192)) + + ; CHECK-DAG: lw $[[BASEPTR:[0-9]+]], %got(data)( + ; CHECK-DAG: ori $[[T0:[0-9]+]], $zero, 32768 + ; CHECK: addu $[[BASEPTR2:[0-9]+]], $[[BASEPTR]], $[[T0]] + ; CHECK: #APP + ; CHECK: lw $1, 0($[[BASEPTR2]]) + ; CHECK: #NO_APP + + ret void +} From a884afb6f8ea18f89de767f56f54b43c93fa8b85 Mon Sep 17 00:00:00 2001 From: Simon Atanasyan Date: Wed, 17 Jul 2019 08:11:31 +0000 Subject: [PATCH 319/451] [mips] Implement .cplocal directive This directive forces to use the alternate register for context pointer. For example, this code: .cplocal $4 jal foo expands to: ld $25, %call16(foo)($4) jalr $25 Differential Revision: https://reviews.llvm.org/D64743 llvm-svn: 366300 --- .../Target/Mips/AsmParser/MipsAsmParser.cpp | 67 +++++++++++++++---- .../Mips/MCTargetDesc/MipsTargetStreamer.cpp | 67 +++++++++++++------ llvm/lib/Target/Mips/MipsTargetStreamer.h | 4 ++ llvm/test/MC/Mips/cplocal-bad.s | 20 ++++++ llvm/test/MC/Mips/cplocal.s | 45 +++++++++++++ 5 files changed, 170 insertions(+), 33 deletions(-) create mode 100644 llvm/test/MC/Mips/cplocal-bad.s create mode 100644 llvm/test/MC/Mips/cplocal.s diff --git a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 0d968674faa85..1f7d095bf49b5 100644 --- a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -146,6 +146,7 @@ class MipsAsmParser : public MCTargetAsmParser { bool IsPicEnabled; bool IsCpRestoreSet; int CpRestoreOffset; + unsigned GPReg; unsigned CpSaveLocation; /// If true, then CpSaveLocation is a register, otherwise it's an offset. bool CpSaveLocationIsRegister; @@ -334,6 +335,7 @@ class MipsAsmParser : public MCTargetAsmParser { bool parseSetFeature(uint64_t Feature); bool isPicAndNotNxxAbi(); // Used by .cpload, .cprestore, and .cpsetup. bool parseDirectiveCpLoad(SMLoc Loc); + bool parseDirectiveCpLocal(SMLoc Loc); bool parseDirectiveCpRestore(SMLoc Loc); bool parseDirectiveCPSetup(); bool parseDirectiveCPReturn(); @@ -527,6 +529,7 @@ class MipsAsmParser : public MCTargetAsmParser { IsCpRestoreSet = false; CpRestoreOffset = -1; + GPReg = ABI.GetGlobalPtr(); const Triple &TheTriple = sti.getTargetTriple(); IsLittleEndian = TheTriple.isLittleEndian(); @@ -2040,7 +2043,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, const MCExpr *Lo16RelocExpr = MipsMCExpr::create(MipsMCExpr::MEK_LO, JalExpr, getContext()); - TOut.emitRRX(Mips::LW, Mips::T9, Mips::GP, + TOut.emitRRX(Mips::LW, Mips::T9, GPReg, MCOperand::createExpr(Got16RelocExpr), IDLoc, STI); TOut.emitRRX(Mips::ADDiu, Mips::T9, Mips::T9, MCOperand::createExpr(Lo16RelocExpr), IDLoc, STI); @@ -2054,7 +2057,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, MipsMCExpr::create(MipsMCExpr::MEK_GOT_DISP, JalExpr, getContext()); TOut.emitRRX(ABI.ArePtrs64bit() ? Mips::LD : Mips::LW, Mips::T9, - Mips::GP, MCOperand::createExpr(GotDispRelocExpr), IDLoc, + GPReg, MCOperand::createExpr(GotDispRelocExpr), IDLoc, STI); } } else { @@ -2065,7 +2068,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, const MCExpr *Call16RelocExpr = MipsMCExpr::create(MipsMCExpr::MEK_GOT_CALL, JalExpr, getContext()); - TOut.emitRRX(ABI.ArePtrs64bit() ? Mips::LD : Mips::LW, Mips::T9, Mips::GP, + TOut.emitRRX(ABI.ArePtrs64bit() ? Mips::LD : Mips::LW, Mips::T9, GPReg, MCOperand::createExpr(Call16RelocExpr), IDLoc, STI); } @@ -2893,8 +2896,8 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr, ELF::STB_LOCAL))) { const MCExpr *CallExpr = MipsMCExpr::create(MipsMCExpr::MEK_GOT_CALL, SymExpr, getContext()); - TOut.emitRRX(Mips::LW, DstReg, ABI.GetGlobalPtr(), - MCOperand::createExpr(CallExpr), IDLoc, STI); + TOut.emitRRX(Mips::LW, DstReg, GPReg, MCOperand::createExpr(CallExpr), + IDLoc, STI); return false; } @@ -2933,8 +2936,8 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr, TmpReg = ATReg; } - TOut.emitRRX(Mips::LW, TmpReg, ABI.GetGlobalPtr(), - MCOperand::createExpr(GotExpr), IDLoc, STI); + TOut.emitRRX(Mips::LW, TmpReg, GPReg, MCOperand::createExpr(GotExpr), IDLoc, + STI); if (LoExpr) TOut.emitRRX(Mips::ADDiu, TmpReg, TmpReg, MCOperand::createExpr(LoExpr), @@ -2969,8 +2972,8 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr, ELF::STB_LOCAL))) { const MCExpr *CallExpr = MipsMCExpr::create(MipsMCExpr::MEK_GOT_CALL, SymExpr, getContext()); - TOut.emitRRX(Mips::LD, DstReg, ABI.GetGlobalPtr(), - MCOperand::createExpr(CallExpr), IDLoc, STI); + TOut.emitRRX(Mips::LD, DstReg, GPReg, MCOperand::createExpr(CallExpr), + IDLoc, STI); return false; } @@ -3012,8 +3015,8 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr, TmpReg = ATReg; } - TOut.emitRRX(Mips::LD, TmpReg, ABI.GetGlobalPtr(), - MCOperand::createExpr(GotExpr), IDLoc, STI); + TOut.emitRRX(Mips::LD, TmpReg, GPReg, MCOperand::createExpr(GotExpr), IDLoc, + STI); if (LoExpr) TOut.emitRRX(Mips::DADDiu, TmpReg, TmpReg, MCOperand::createExpr(LoExpr), @@ -3243,10 +3246,10 @@ bool MipsAsmParser::emitPartialAddress(MipsTargetStreamer &TOut, SMLoc IDLoc, MipsMCExpr::create(MipsMCExpr::MEK_GOT, GotSym, getContext()); if(isABI_O32() || isABI_N32()) { - TOut.emitRRX(Mips::LW, ATReg, Mips::GP, MCOperand::createExpr(GotExpr), + TOut.emitRRX(Mips::LW, ATReg, GPReg, MCOperand::createExpr(GotExpr), IDLoc, STI); } else { //isABI_N64() - TOut.emitRRX(Mips::LD, ATReg, Mips::GP, MCOperand::createExpr(GotExpr), + TOut.emitRRX(Mips::LD, ATReg, GPReg, MCOperand::createExpr(GotExpr), IDLoc, STI); } } else { //!IsPicEnabled @@ -7241,6 +7244,40 @@ bool MipsAsmParser::parseDirectiveCpLoad(SMLoc Loc) { return false; } +bool MipsAsmParser::parseDirectiveCpLocal(SMLoc Loc) { + if (!isABI_N32() && !isABI_N64()) { + reportParseError(".cplocal is allowed only in N32 or N64 mode"); + return false; + } + + SmallVector, 1> Reg; + OperandMatchResultTy ResTy = parseAnyRegister(Reg); + if (ResTy == MatchOperand_NoMatch || ResTy == MatchOperand_ParseFail) { + reportParseError("expected register containing global pointer"); + return false; + } + + MipsOperand &RegOpnd = static_cast(*Reg[0]); + if (!RegOpnd.isGPRAsmReg()) { + reportParseError(RegOpnd.getStartLoc(), "invalid register"); + return false; + } + + // If this is not the end of the statement, report an error. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("unexpected token, expected end of statement"); + return false; + } + getParser().Lex(); // Consume the EndOfStatement. + + unsigned NewReg = RegOpnd.getGPR32Reg(); + if (IsPicEnabled) + GPReg = NewReg; + + getTargetStreamer().emitDirectiveCpLocal(NewReg); + return false; +} + bool MipsAsmParser::parseDirectiveCpRestore(SMLoc Loc) { MCAsmParser &Parser = getParser(); @@ -8091,6 +8128,10 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) { parseDirectiveCpRestore(DirectiveID.getLoc()); return false; } + if (IDVal == ".cplocal") { + parseDirectiveCpLocal(DirectiveID.getLoc()); + return false; + } if (IDVal == ".ent") { StringRef SymbolName; diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp index bd978e9e6ae58..e3bdb3b140a8f 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp @@ -35,7 +35,7 @@ static cl::opt RoundSectionSizes( } // end anonymous namespace MipsTargetStreamer::MipsTargetStreamer(MCStreamer &S) - : MCTargetStreamer(S), ModuleDirectiveAllowed(true) { + : MCTargetStreamer(S), GPReg(Mips::GP), ModuleDirectiveAllowed(true) { GPRInfoSet = FPRInfoSet = FrameInfoSet = false; } void MipsTargetStreamer::emitDirectiveSetMicroMips() {} @@ -106,6 +106,23 @@ void MipsTargetStreamer::emitDirectiveSetDsp() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetDspr2() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetNoDsp() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveCpLoad(unsigned RegNo) {} +void MipsTargetStreamer::emitDirectiveCpLocal(unsigned RegNo) { + // .cplocal $reg + // This directive forces to use the alternate register for context pointer. + // For example + // .cplocal $4 + // jal foo + // expands to + // ld $25, %call16(foo)($4) + // jalr $25 + + if (!getABI().IsN32() && !getABI().IsN64()) + return; + + GPReg = RegNo; + + forbidModuleDirective(); +} bool MipsTargetStreamer::emitDirectiveCpRestore( int Offset, function_ref GetATReg, SMLoc IDLoc, const MCSubtargetInfo *STI) { @@ -257,8 +274,7 @@ void MipsTargetStreamer::emitNop(SMLoc IDLoc, const MCSubtargetInfo *STI) { /// Emit the $gp restore operation for .cprestore. void MipsTargetStreamer::emitGPRestore(int Offset, SMLoc IDLoc, const MCSubtargetInfo *STI) { - emitLoadWithImmOffset(Mips::LW, Mips::GP, Mips::SP, Offset, Mips::GP, IDLoc, - STI); + emitLoadWithImmOffset(Mips::LW, GPReg, Mips::SP, Offset, GPReg, IDLoc, STI); } /// Emit a store instruction with an immediate offset. @@ -665,6 +681,12 @@ void MipsTargetAsmStreamer::emitDirectiveCpLoad(unsigned RegNo) { forbidModuleDirective(); } +void MipsTargetAsmStreamer::emitDirectiveCpLocal(unsigned RegNo) { + OS << "\t.cplocal\t$" + << StringRef(MipsInstPrinter::getRegisterName(RegNo)).lower() << "\n"; + MipsTargetStreamer::emitDirectiveCpLocal(RegNo); +} + bool MipsTargetAsmStreamer::emitDirectiveCpRestore( int Offset, function_ref GetATReg, SMLoc IDLoc, const MCSubtargetInfo *STI) { @@ -1135,7 +1157,7 @@ void MipsTargetELFStreamer::emitDirectiveCpLoad(unsigned RegNo) { MCInst TmpInst; TmpInst.setOpcode(Mips::LUi); - TmpInst.addOperand(MCOperand::createReg(Mips::GP)); + TmpInst.addOperand(MCOperand::createReg(GPReg)); const MCExpr *HiSym = MipsMCExpr::create( MipsMCExpr::MEK_HI, MCSymbolRefExpr::create("_gp_disp", MCSymbolRefExpr::VK_None, @@ -1147,8 +1169,8 @@ void MipsTargetELFStreamer::emitDirectiveCpLoad(unsigned RegNo) { TmpInst.clear(); TmpInst.setOpcode(Mips::ADDiu); - TmpInst.addOperand(MCOperand::createReg(Mips::GP)); - TmpInst.addOperand(MCOperand::createReg(Mips::GP)); + TmpInst.addOperand(MCOperand::createReg(GPReg)); + TmpInst.addOperand(MCOperand::createReg(GPReg)); const MCExpr *LoSym = MipsMCExpr::create( MipsMCExpr::MEK_LO, MCSymbolRefExpr::create("_gp_disp", MCSymbolRefExpr::VK_None, @@ -1160,14 +1182,19 @@ void MipsTargetELFStreamer::emitDirectiveCpLoad(unsigned RegNo) { TmpInst.clear(); TmpInst.setOpcode(Mips::ADDu); - TmpInst.addOperand(MCOperand::createReg(Mips::GP)); - TmpInst.addOperand(MCOperand::createReg(Mips::GP)); + TmpInst.addOperand(MCOperand::createReg(GPReg)); + TmpInst.addOperand(MCOperand::createReg(GPReg)); TmpInst.addOperand(MCOperand::createReg(RegNo)); getStreamer().EmitInstruction(TmpInst, STI); forbidModuleDirective(); } +void MipsTargetELFStreamer::emitDirectiveCpLocal(unsigned RegNo) { + if (Pic) + MipsTargetStreamer::emitDirectiveCpLocal(RegNo); +} + bool MipsTargetELFStreamer::emitDirectiveCpRestore( int Offset, function_ref GetATReg, SMLoc IDLoc, const MCSubtargetInfo *STI) { @@ -1184,7 +1211,7 @@ bool MipsTargetELFStreamer::emitDirectiveCpRestore( return true; // Store the $gp on the stack. - emitStoreWithImmOffset(Mips::SW, Mips::GP, Mips::SP, Offset, GetATReg, IDLoc, + emitStoreWithImmOffset(Mips::SW, GPReg, Mips::SP, Offset, GetATReg, IDLoc, STI); return true; } @@ -1205,10 +1232,10 @@ void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo, // Either store the old $gp in a register or on the stack if (IsReg) { // move $save, $gpreg - emitRRR(Mips::OR64, RegOrOffset, Mips::GP, Mips::ZERO, SMLoc(), &STI); + emitRRR(Mips::OR64, RegOrOffset, GPReg, Mips::ZERO, SMLoc(), &STI); } else { // sd $gpreg, offset($sp) - emitRRI(Mips::SD, Mips::GP, Mips::SP, RegOrOffset, SMLoc(), &STI); + emitRRI(Mips::SD, GPReg, Mips::SP, RegOrOffset, SMLoc(), &STI); } if (getABI().IsN32()) { @@ -1221,11 +1248,11 @@ void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo, MCA.getContext()); // lui $gp, %hi(__gnu_local_gp) - emitRX(Mips::LUi, Mips::GP, MCOperand::createExpr(HiExpr), SMLoc(), &STI); + emitRX(Mips::LUi, GPReg, MCOperand::createExpr(HiExpr), SMLoc(), &STI); // addiu $gp, $gp, %lo(__gnu_local_gp) - emitRRX(Mips::ADDiu, Mips::GP, Mips::GP, MCOperand::createExpr(LoExpr), - SMLoc(), &STI); + emitRRX(Mips::ADDiu, GPReg, GPReg, MCOperand::createExpr(LoExpr), SMLoc(), + &STI); return; } @@ -1238,14 +1265,14 @@ void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo, MCA.getContext()); // lui $gp, %hi(%neg(%gp_rel(funcSym))) - emitRX(Mips::LUi, Mips::GP, MCOperand::createExpr(HiExpr), SMLoc(), &STI); + emitRX(Mips::LUi, GPReg, MCOperand::createExpr(HiExpr), SMLoc(), &STI); // addiu $gp, $gp, %lo(%neg(%gp_rel(funcSym))) - emitRRX(Mips::ADDiu, Mips::GP, Mips::GP, MCOperand::createExpr(LoExpr), - SMLoc(), &STI); + emitRRX(Mips::ADDiu, GPReg, GPReg, MCOperand::createExpr(LoExpr), SMLoc(), + &STI); // daddu $gp, $gp, $funcreg - emitRRR(Mips::DADDu, Mips::GP, Mips::GP, RegNo, SMLoc(), &STI); + emitRRR(Mips::DADDu, GPReg, GPReg, RegNo, SMLoc(), &STI); } void MipsTargetELFStreamer::emitDirectiveCpreturn(unsigned SaveLocation, @@ -1258,12 +1285,12 @@ void MipsTargetELFStreamer::emitDirectiveCpreturn(unsigned SaveLocation, // Either restore the old $gp from a register or on the stack if (SaveLocationIsRegister) { Inst.setOpcode(Mips::OR); - Inst.addOperand(MCOperand::createReg(Mips::GP)); + Inst.addOperand(MCOperand::createReg(GPReg)); Inst.addOperand(MCOperand::createReg(SaveLocation)); Inst.addOperand(MCOperand::createReg(Mips::ZERO)); } else { Inst.setOpcode(Mips::LD); - Inst.addOperand(MCOperand::createReg(Mips::GP)); + Inst.addOperand(MCOperand::createReg(GPReg)); Inst.addOperand(MCOperand::createReg(Mips::SP)); Inst.addOperand(MCOperand::createImm(SaveLocation)); } diff --git a/llvm/lib/Target/Mips/MipsTargetStreamer.h b/llvm/lib/Target/Mips/MipsTargetStreamer.h index 697a5c4193ea2..1fa8ebadd6435 100644 --- a/llvm/lib/Target/Mips/MipsTargetStreamer.h +++ b/llvm/lib/Target/Mips/MipsTargetStreamer.h @@ -91,6 +91,7 @@ class MipsTargetStreamer : public MCTargetStreamer { // PIC support virtual void emitDirectiveCpLoad(unsigned RegNo); + virtual void emitDirectiveCpLocal(unsigned RegNo); virtual bool emitDirectiveCpRestore(int Offset, function_ref GetATReg, SMLoc IDLoc, const MCSubtargetInfo *STI); @@ -199,6 +200,7 @@ class MipsTargetStreamer : public MCTargetStreamer { bool FrameInfoSet; int FrameOffset; unsigned FrameReg; + unsigned GPReg; unsigned ReturnReg; private: @@ -274,6 +276,7 @@ class MipsTargetAsmStreamer : public MipsTargetStreamer { // PIC support void emitDirectiveCpLoad(unsigned RegNo) override; + void emitDirectiveCpLocal(unsigned RegNo) override; /// Emit a .cprestore directive. If the offset is out of range then it will /// be synthesized using the assembler temporary. @@ -345,6 +348,7 @@ class MipsTargetELFStreamer : public MipsTargetStreamer { // PIC support void emitDirectiveCpLoad(unsigned RegNo) override; + void emitDirectiveCpLocal(unsigned RegNo) override; bool emitDirectiveCpRestore(int Offset, function_ref GetATReg, SMLoc IDLoc, const MCSubtargetInfo *STI) override; void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset, diff --git a/llvm/test/MC/Mips/cplocal-bad.s b/llvm/test/MC/Mips/cplocal-bad.s new file mode 100644 index 0000000000000..0236fdb34db12 --- /dev/null +++ b/llvm/test/MC/Mips/cplocal-bad.s @@ -0,0 +1,20 @@ +# RUN: not llvm-mc -triple=mips-unknown-linux-gnu %s 2>&1 \ +# RUN: | FileCheck -check-prefix=O32 %s +# RUN: not llvm-mc -triple=mips64-unknown-linux-gnuabin32 %s 2>&1 \ +# RUN: | FileCheck -check-prefix=NABI %s +# RUN: not llvm-mc -triple=mips64-unknown-linux-gnu %s 2>&1 \ +# RUN: | FileCheck -check-prefix=NABI %s + + .text + .cplocal $32 +# O32: :[[@LINE-1]]:{{[0-9]+}}: error: .cplocal is allowed only in N32 or N64 mode +# NABI: :[[@LINE-2]]:{{[0-9]+}}: error: invalid register + .cplocal $foo +# O32: :[[@LINE-1]]:{{[0-9]+}}: error: .cplocal is allowed only in N32 or N64 mode +# NABI: :[[@LINE-2]]:{{[0-9]+}}: error: expected register containing global pointer + .cplocal bar +# O32: :[[@LINE-1]]:{{[0-9]+}}: error: .cplocal is allowed only in N32 or N64 mode +# NABI: :[[@LINE-2]]:{{[0-9]+}}: error: expected register containing global pointer + .cplocal $25 foobar +# O32: :[[@LINE-1]]:{{[0-9]+}}: error: .cplocal is allowed only in N32 or N64 mode +# NABI: :[[@LINE-2]]:{{[0-9]+}}: error: unexpected token, expected end of statement diff --git a/llvm/test/MC/Mips/cplocal.s b/llvm/test/MC/Mips/cplocal.s new file mode 100644 index 0000000000000..bfd9ba560b5de --- /dev/null +++ b/llvm/test/MC/Mips/cplocal.s @@ -0,0 +1,45 @@ +# RUN: llvm-mc -triple=mips64-unknown-linux-gnuabin32 -position-independent %s \ +# RUN: | FileCheck -check-prefix=ASM-PIC32 %s +# RUN: llvm-mc -triple=mips64-unknown-linux-gnu -position-independent %s \ +# RUN: | FileCheck -check-prefix=ASM-PIC64 %s +# RUN: llvm-mc -triple=mips64-unknown-linux-gnuabin32 %s \ +# RUN: | FileCheck -check-prefix=ASM-NPIC %s +# RUN: llvm-mc -triple=mips64-unknown-linux-gnu %s \ +# RUN: | FileCheck -check-prefix=ASM-NPIC %s + +# RUN: llvm-mc -triple=mips64-unknown-linux-gnuabin32 \ +# RUN: -position-independent -filetype=obj -o - %s \ +# RUN: | llvm-objdump -d -r - | FileCheck -check-prefix=OBJ32 %s +# RUN: llvm-mc -triple=mips64-unknown-linux-gnu \ +# RUN: -position-independent -filetype=obj -o - %s \ +# RUN: | llvm-objdump -d -r - | FileCheck -check-prefix=OBJ64 %s + +# ASM-PIC32: .text +# ASM-PIC32: .cplocal $4 +# ASM-PIC32: lw $25, %call16(foo)($4) +# ASM-PIC32: jalr $25 + +# ASM-PIC64: .text +# ASM-PIC64: .cplocal $4 +# ASM-PIC64: ld $25, %call16(foo)($4) +# ASM-PIC64: jalr $25 + +# ASM-NPIC: .text +# ASM-NPIC: .cplocal $4 +# ASM-NPIC: jal foo + +# OBJ32: lw $25, 0($4) +# OBJ32: R_MIPS_CALL16 foo +# OBJ32: jalr $25 +# OBJ32: R_MIPS_JALR foo + +# OBJ64: ld $25, 0($4) +# OBJ64: R_MIPS_CALL16/R_MIPS_NONE/R_MIPS_NONE foo +# OBJ64: jalr $25 +# OBJ64: R_MIPS_JALR/R_MIPS_NONE/R_MIPS_NONE foo + + .text + .cplocal $4 + jal foo +foo: + nop From 4c1e4408927e0c260beb30114ccaf48b017a5ecb Mon Sep 17 00:00:00 2001 From: Simon Atanasyan Date: Wed, 17 Jul 2019 08:11:40 +0000 Subject: [PATCH 320/451] [mips] Use mult/mflo pattern on 64-bit targets prior to MIPS64 The `MUL` instruction is available starting from the MIPS32/MIPS64 targets. llvm-svn: 366301 --- llvm/lib/Target/Mips/Mips64InstrInfo.td | 2 +- llvm/test/CodeGen/Mips/llvm-ir/mul.ll | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/Mips/Mips64InstrInfo.td b/llvm/lib/Target/Mips/Mips64InstrInfo.td index b5711004f7007..7f35280f79363 100644 --- a/llvm/lib/Target/Mips/Mips64InstrInfo.td +++ b/llvm/lib/Target/Mips/Mips64InstrInfo.td @@ -853,7 +853,7 @@ def : MipsPat<(i64 (sext (i32 (sub GPR32:$src, GPR32:$src2)))), (SUBu GPR32:$src, GPR32:$src2), sub_32)>; def : MipsPat<(i64 (sext (i32 (mul GPR32:$src, GPR32:$src2)))), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), - (MUL GPR32:$src, GPR32:$src2), sub_32)>, ISA_MIPS3_NOT_32R6_64R6; + (MUL GPR32:$src, GPR32:$src2), sub_32)>, ISA_MIPS32_NOT_32R6_64R6; def : MipsPat<(i64 (sext (i32 (MipsMFHI ACC64:$src)))), (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (PseudoMFHI ACC64:$src), sub_32)>; diff --git a/llvm/test/CodeGen/Mips/llvm-ir/mul.ll b/llvm/test/CodeGen/Mips/llvm-ir/mul.ll index b6f535abdee35..c75bda3f394ad 100644 --- a/llvm/test/CodeGen/Mips/llvm-ir/mul.ll +++ b/llvm/test/CodeGen/Mips/llvm-ir/mul.ll @@ -155,6 +155,9 @@ entry: ; M2: mult $4, $5 ; M2: mflo $2 + ; M4: mult $4, $5 + ; M4: mflo $1 + ; 32R1-R5: mul $2, $4, $5 ; 32R6: mul $2, $4, $5 From 6d4b1c046b821179f0cdf126234a12704a2fbed7 Mon Sep 17 00:00:00 2001 From: Simon Atanasyan Date: Wed, 17 Jul 2019 08:11:57 +0000 Subject: [PATCH 321/451] [mips] Name inline asm constraint test cases in a uniform manner. NFC llvm-svn: 366302 --- .../Mips/{inlineasm_constraint_R.ll => inlineasm-constraint-R.ll} | 0 .../{inlineasm_constraint_ZC.ll => inlineasm-constraint-ZC-1.ll} | 0 ...{inlineasm-constraint_ZC_2.ll => inlineasm-constraint-ZC-2.ll} | 0 ...lineasm-cnstrnt-bad-I-1.ll => inlineasm-constraint-bad-I-1.ll} | 0 .../{inlineasm-cnstrnt-bad-J.ll => inlineasm-constraint-bad-J.ll} | 0 .../{inlineasm-cnstrnt-bad-K.ll => inlineasm-constraint-bad-K.ll} | 0 .../{inlineasm-cnstrnt-bad-L.ll => inlineasm-constraint-bad-L.ll} | 0 .../{inlineasm-cnstrnt-bad-N.ll => inlineasm-constraint-bad-N.ll} | 0 .../{inlineasm-cnstrnt-bad-O.ll => inlineasm-constraint-bad-O.ll} | 0 .../{inlineasm-cnstrnt-bad-P.ll => inlineasm-constraint-bad-P.ll} | 0 ...inlineasm-cnstrnt-bad-l1.ll => inlineasm-constraint-bad-l1.ll} | 0 .../{inline-asm-i-constraint-i1.ll => inlineasm-constraint-i1.ll} | 0 .../{inlineasm_constraint_m.ll => inlineasm-constraint-m-1.ll} | 0 .../Mips/{inlineasmmemop.ll => inlineasm-constraint-m-2.ll} | 0 .../Mips/{inlineasm_constraint_o.ll => inlineasm-constraint-o.ll} | 0 .../{inlineasm-cnstrnt-reg.ll => inlineasm-constraint-reg.ll} | 0 .../{inlineasm-cnstrnt-reg64.ll => inlineasm-constraint-reg64.ll} | 0 .../Mips/{inlineasm_constraint.ll => inlineasm-constraint.ll} | 0 18 files changed, 0 insertions(+), 0 deletions(-) rename llvm/test/CodeGen/Mips/{inlineasm_constraint_R.ll => inlineasm-constraint-R.ll} (100%) rename llvm/test/CodeGen/Mips/{inlineasm_constraint_ZC.ll => inlineasm-constraint-ZC-1.ll} (100%) rename llvm/test/CodeGen/Mips/{inlineasm-constraint_ZC_2.ll => inlineasm-constraint-ZC-2.ll} (100%) rename llvm/test/CodeGen/Mips/{inlineasm-cnstrnt-bad-I-1.ll => inlineasm-constraint-bad-I-1.ll} (100%) rename llvm/test/CodeGen/Mips/{inlineasm-cnstrnt-bad-J.ll => inlineasm-constraint-bad-J.ll} (100%) rename llvm/test/CodeGen/Mips/{inlineasm-cnstrnt-bad-K.ll => inlineasm-constraint-bad-K.ll} (100%) rename llvm/test/CodeGen/Mips/{inlineasm-cnstrnt-bad-L.ll => inlineasm-constraint-bad-L.ll} (100%) rename llvm/test/CodeGen/Mips/{inlineasm-cnstrnt-bad-N.ll => inlineasm-constraint-bad-N.ll} (100%) rename llvm/test/CodeGen/Mips/{inlineasm-cnstrnt-bad-O.ll => inlineasm-constraint-bad-O.ll} (100%) rename llvm/test/CodeGen/Mips/{inlineasm-cnstrnt-bad-P.ll => inlineasm-constraint-bad-P.ll} (100%) rename llvm/test/CodeGen/Mips/{inlineasm-cnstrnt-bad-l1.ll => inlineasm-constraint-bad-l1.ll} (100%) rename llvm/test/CodeGen/Mips/{inline-asm-i-constraint-i1.ll => inlineasm-constraint-i1.ll} (100%) rename llvm/test/CodeGen/Mips/{inlineasm_constraint_m.ll => inlineasm-constraint-m-1.ll} (100%) rename llvm/test/CodeGen/Mips/{inlineasmmemop.ll => inlineasm-constraint-m-2.ll} (100%) rename llvm/test/CodeGen/Mips/{inlineasm_constraint_o.ll => inlineasm-constraint-o.ll} (100%) rename llvm/test/CodeGen/Mips/{inlineasm-cnstrnt-reg.ll => inlineasm-constraint-reg.ll} (100%) rename llvm/test/CodeGen/Mips/{inlineasm-cnstrnt-reg64.ll => inlineasm-constraint-reg64.ll} (100%) rename llvm/test/CodeGen/Mips/{inlineasm_constraint.ll => inlineasm-constraint.ll} (100%) diff --git a/llvm/test/CodeGen/Mips/inlineasm_constraint_R.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-R.ll similarity index 100% rename from llvm/test/CodeGen/Mips/inlineasm_constraint_R.ll rename to llvm/test/CodeGen/Mips/inlineasm-constraint-R.ll diff --git a/llvm/test/CodeGen/Mips/inlineasm_constraint_ZC.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-ZC-1.ll similarity index 100% rename from llvm/test/CodeGen/Mips/inlineasm_constraint_ZC.ll rename to llvm/test/CodeGen/Mips/inlineasm-constraint-ZC-1.ll diff --git a/llvm/test/CodeGen/Mips/inlineasm-constraint_ZC_2.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-ZC-2.ll similarity index 100% rename from llvm/test/CodeGen/Mips/inlineasm-constraint_ZC_2.ll rename to llvm/test/CodeGen/Mips/inlineasm-constraint-ZC-2.ll diff --git a/llvm/test/CodeGen/Mips/inlineasm-cnstrnt-bad-I-1.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-bad-I-1.ll similarity index 100% rename from llvm/test/CodeGen/Mips/inlineasm-cnstrnt-bad-I-1.ll rename to llvm/test/CodeGen/Mips/inlineasm-constraint-bad-I-1.ll diff --git a/llvm/test/CodeGen/Mips/inlineasm-cnstrnt-bad-J.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-bad-J.ll similarity index 100% rename from llvm/test/CodeGen/Mips/inlineasm-cnstrnt-bad-J.ll rename to llvm/test/CodeGen/Mips/inlineasm-constraint-bad-J.ll diff --git a/llvm/test/CodeGen/Mips/inlineasm-cnstrnt-bad-K.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-bad-K.ll similarity index 100% rename from llvm/test/CodeGen/Mips/inlineasm-cnstrnt-bad-K.ll rename to llvm/test/CodeGen/Mips/inlineasm-constraint-bad-K.ll diff --git a/llvm/test/CodeGen/Mips/inlineasm-cnstrnt-bad-L.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-bad-L.ll similarity index 100% rename from llvm/test/CodeGen/Mips/inlineasm-cnstrnt-bad-L.ll rename to llvm/test/CodeGen/Mips/inlineasm-constraint-bad-L.ll diff --git a/llvm/test/CodeGen/Mips/inlineasm-cnstrnt-bad-N.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-bad-N.ll similarity index 100% rename from llvm/test/CodeGen/Mips/inlineasm-cnstrnt-bad-N.ll rename to llvm/test/CodeGen/Mips/inlineasm-constraint-bad-N.ll diff --git a/llvm/test/CodeGen/Mips/inlineasm-cnstrnt-bad-O.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-bad-O.ll similarity index 100% rename from llvm/test/CodeGen/Mips/inlineasm-cnstrnt-bad-O.ll rename to llvm/test/CodeGen/Mips/inlineasm-constraint-bad-O.ll diff --git a/llvm/test/CodeGen/Mips/inlineasm-cnstrnt-bad-P.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-bad-P.ll similarity index 100% rename from llvm/test/CodeGen/Mips/inlineasm-cnstrnt-bad-P.ll rename to llvm/test/CodeGen/Mips/inlineasm-constraint-bad-P.ll diff --git a/llvm/test/CodeGen/Mips/inlineasm-cnstrnt-bad-l1.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-bad-l1.ll similarity index 100% rename from llvm/test/CodeGen/Mips/inlineasm-cnstrnt-bad-l1.ll rename to llvm/test/CodeGen/Mips/inlineasm-constraint-bad-l1.ll diff --git a/llvm/test/CodeGen/Mips/inline-asm-i-constraint-i1.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-i1.ll similarity index 100% rename from llvm/test/CodeGen/Mips/inline-asm-i-constraint-i1.ll rename to llvm/test/CodeGen/Mips/inlineasm-constraint-i1.ll diff --git a/llvm/test/CodeGen/Mips/inlineasm_constraint_m.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-m-1.ll similarity index 100% rename from llvm/test/CodeGen/Mips/inlineasm_constraint_m.ll rename to llvm/test/CodeGen/Mips/inlineasm-constraint-m-1.ll diff --git a/llvm/test/CodeGen/Mips/inlineasmmemop.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-m-2.ll similarity index 100% rename from llvm/test/CodeGen/Mips/inlineasmmemop.ll rename to llvm/test/CodeGen/Mips/inlineasm-constraint-m-2.ll diff --git a/llvm/test/CodeGen/Mips/inlineasm_constraint_o.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-o.ll similarity index 100% rename from llvm/test/CodeGen/Mips/inlineasm_constraint_o.ll rename to llvm/test/CodeGen/Mips/inlineasm-constraint-o.ll diff --git a/llvm/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-reg.ll similarity index 100% rename from llvm/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll rename to llvm/test/CodeGen/Mips/inlineasm-constraint-reg.ll diff --git a/llvm/test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint-reg64.ll similarity index 100% rename from llvm/test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll rename to llvm/test/CodeGen/Mips/inlineasm-constraint-reg64.ll diff --git a/llvm/test/CodeGen/Mips/inlineasm_constraint.ll b/llvm/test/CodeGen/Mips/inlineasm-constraint.ll similarity index 100% rename from llvm/test/CodeGen/Mips/inlineasm_constraint.ll rename to llvm/test/CodeGen/Mips/inlineasm-constraint.ll From 1292464125fd257114fc03384ac9cdf185ee1831 Mon Sep 17 00:00:00 2001 From: Simon Atanasyan Date: Wed, 17 Jul 2019 08:12:03 +0000 Subject: [PATCH 322/451] [mips] Remove redundant test case. NFC The `inlineasm-constraint-reg64.ll` test checks the same functionality. llvm-svn: 366303 --- llvm/test/CodeGen/Mips/inlineasm64.ll | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100644 llvm/test/CodeGen/Mips/inlineasm64.ll diff --git a/llvm/test/CodeGen/Mips/inlineasm64.ll b/llvm/test/CodeGen/Mips/inlineasm64.ll deleted file mode 100644 index 82abdf82a3ed3..0000000000000 --- a/llvm/test/CodeGen/Mips/inlineasm64.ll +++ /dev/null @@ -1,17 +0,0 @@ -; RUN: llc -march=mips64el -mcpu=mips64r2 -target-abi=n64 < %s | FileCheck %s - -@gl2 = external global i64 -@gl1 = external global i64 -@gl0 = external global i64 - -define void @foo1() nounwind { -entry: -; CHECK: foo1 -; CHECK: daddu - %0 = load i64, i64* @gl1, align 8 - %1 = load i64, i64* @gl0, align 8 - %2 = tail call i64 asm "daddu $0, $1, $2", "=r,r,r"(i64 %0, i64 %1) nounwind - store i64 %2, i64* @gl2, align 8 - ret void -} - From 42cfbafc1fe9813720d6456c91547432d33c2570 Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Wed, 17 Jul 2019 08:31:51 +0000 Subject: [PATCH 323/451] Replace CRLF with LF. llvm-svn: 366304 --- .../ClangTidyDiagnosticConsumerTest.cpp | 188 +++++++++--------- 1 file changed, 94 insertions(+), 94 deletions(-) diff --git a/clang-tools-extra/unittests/clang-tidy/ClangTidyDiagnosticConsumerTest.cpp b/clang-tools-extra/unittests/clang-tidy/ClangTidyDiagnosticConsumerTest.cpp index 5547f98ffd030..67f1dfdf029ec 100644 --- a/clang-tools-extra/unittests/clang-tidy/ClangTidyDiagnosticConsumerTest.cpp +++ b/clang-tools-extra/unittests/clang-tidy/ClangTidyDiagnosticConsumerTest.cpp @@ -1,94 +1,94 @@ -#include "ClangTidy.h" -#include "ClangTidyTest.h" -#include "gtest/gtest.h" - -namespace clang { -namespace tidy { -namespace test { - -class TestCheck : public ClangTidyCheck { -public: - TestCheck(StringRef Name, ClangTidyContext *Context) - : ClangTidyCheck(Name, Context) {} - void registerMatchers(ast_matchers::MatchFinder *Finder) override { - Finder->addMatcher(ast_matchers::varDecl().bind("var"), this); - } - void check(const ast_matchers::MatchFinder::MatchResult &Result) override { - const auto *Var = Result.Nodes.getNodeAs("var"); - // Add diagnostics in the wrong order. - diag(Var->getLocation(), "variable"); - diag(Var->getTypeSpecStartLoc(), "type specifier"); - } -}; - -TEST(ClangTidyDiagnosticConsumer, SortsErrors) { - std::vector Errors; - runCheckOnCode("int a;", &Errors); - EXPECT_EQ(2ul, Errors.size()); - EXPECT_EQ("type specifier", Errors[0].Message.Message); - EXPECT_EQ("variable", Errors[1].Message.Message); -} - -TEST(GlobList, Empty) { - GlobList Filter(""); - - EXPECT_TRUE(Filter.contains("")); - EXPECT_FALSE(Filter.contains("aaa")); -} - -TEST(GlobList, Nothing) { - GlobList Filter("-*"); - - EXPECT_FALSE(Filter.contains("")); - EXPECT_FALSE(Filter.contains("a")); - EXPECT_FALSE(Filter.contains("-*")); - EXPECT_FALSE(Filter.contains("-")); - EXPECT_FALSE(Filter.contains("*")); -} - -TEST(GlobList, Everything) { - GlobList Filter("*"); - - EXPECT_TRUE(Filter.contains("")); - EXPECT_TRUE(Filter.contains("aaaa")); - EXPECT_TRUE(Filter.contains("-*")); - EXPECT_TRUE(Filter.contains("-")); - EXPECT_TRUE(Filter.contains("*")); -} - -TEST(GlobList, Simple) { - GlobList Filter("aaa"); - - EXPECT_TRUE(Filter.contains("aaa")); - EXPECT_FALSE(Filter.contains("")); - EXPECT_FALSE(Filter.contains("aa")); - EXPECT_FALSE(Filter.contains("aaaa")); - EXPECT_FALSE(Filter.contains("bbb")); -} - -TEST(GlobList, WhitespacesAtBegin) { - GlobList Filter("-*, a.b.*"); - - EXPECT_TRUE(Filter.contains("a.b.c")); - EXPECT_FALSE(Filter.contains("b.c")); -} - -TEST(GlobList, Complex) { - GlobList Filter("*,-a.*, -b.*, \r \n a.1.* ,-a.1.A.*,-..,-...,-..+,-*$, -*qwe* "); - - EXPECT_TRUE(Filter.contains("aaa")); - EXPECT_TRUE(Filter.contains("qqq")); - EXPECT_FALSE(Filter.contains("a.")); - EXPECT_FALSE(Filter.contains("a.b")); - EXPECT_FALSE(Filter.contains("b.")); - EXPECT_FALSE(Filter.contains("b.b")); - EXPECT_TRUE(Filter.contains("a.1.b")); - EXPECT_FALSE(Filter.contains("a.1.A.a")); - EXPECT_FALSE(Filter.contains("qwe")); - EXPECT_FALSE(Filter.contains("asdfqweasdf")); - EXPECT_TRUE(Filter.contains("asdfqwEasdf")); -} - -} // namespace test -} // namespace tidy -} // namespace clang +#include "ClangTidy.h" +#include "ClangTidyTest.h" +#include "gtest/gtest.h" + +namespace clang { +namespace tidy { +namespace test { + +class TestCheck : public ClangTidyCheck { +public: + TestCheck(StringRef Name, ClangTidyContext *Context) + : ClangTidyCheck(Name, Context) {} + void registerMatchers(ast_matchers::MatchFinder *Finder) override { + Finder->addMatcher(ast_matchers::varDecl().bind("var"), this); + } + void check(const ast_matchers::MatchFinder::MatchResult &Result) override { + const auto *Var = Result.Nodes.getNodeAs("var"); + // Add diagnostics in the wrong order. + diag(Var->getLocation(), "variable"); + diag(Var->getTypeSpecStartLoc(), "type specifier"); + } +}; + +TEST(ClangTidyDiagnosticConsumer, SortsErrors) { + std::vector Errors; + runCheckOnCode("int a;", &Errors); + EXPECT_EQ(2ul, Errors.size()); + EXPECT_EQ("type specifier", Errors[0].Message.Message); + EXPECT_EQ("variable", Errors[1].Message.Message); +} + +TEST(GlobList, Empty) { + GlobList Filter(""); + + EXPECT_TRUE(Filter.contains("")); + EXPECT_FALSE(Filter.contains("aaa")); +} + +TEST(GlobList, Nothing) { + GlobList Filter("-*"); + + EXPECT_FALSE(Filter.contains("")); + EXPECT_FALSE(Filter.contains("a")); + EXPECT_FALSE(Filter.contains("-*")); + EXPECT_FALSE(Filter.contains("-")); + EXPECT_FALSE(Filter.contains("*")); +} + +TEST(GlobList, Everything) { + GlobList Filter("*"); + + EXPECT_TRUE(Filter.contains("")); + EXPECT_TRUE(Filter.contains("aaaa")); + EXPECT_TRUE(Filter.contains("-*")); + EXPECT_TRUE(Filter.contains("-")); + EXPECT_TRUE(Filter.contains("*")); +} + +TEST(GlobList, Simple) { + GlobList Filter("aaa"); + + EXPECT_TRUE(Filter.contains("aaa")); + EXPECT_FALSE(Filter.contains("")); + EXPECT_FALSE(Filter.contains("aa")); + EXPECT_FALSE(Filter.contains("aaaa")); + EXPECT_FALSE(Filter.contains("bbb")); +} + +TEST(GlobList, WhitespacesAtBegin) { + GlobList Filter("-*, a.b.*"); + + EXPECT_TRUE(Filter.contains("a.b.c")); + EXPECT_FALSE(Filter.contains("b.c")); +} + +TEST(GlobList, Complex) { + GlobList Filter("*,-a.*, -b.*, \r \n a.1.* ,-a.1.A.*,-..,-...,-..+,-*$, -*qwe* "); + + EXPECT_TRUE(Filter.contains("aaa")); + EXPECT_TRUE(Filter.contains("qqq")); + EXPECT_FALSE(Filter.contains("a.")); + EXPECT_FALSE(Filter.contains("a.b")); + EXPECT_FALSE(Filter.contains("b.")); + EXPECT_FALSE(Filter.contains("b.b")); + EXPECT_TRUE(Filter.contains("a.1.b")); + EXPECT_FALSE(Filter.contains("a.1.A.a")); + EXPECT_FALSE(Filter.contains("qwe")); + EXPECT_FALSE(Filter.contains("asdfqweasdf")); + EXPECT_TRUE(Filter.contains("asdfqwEasdf")); +} + +} // namespace test +} // namespace tidy +} // namespace clang From 96627215729080f42488185b29e03e73cee08204 Mon Sep 17 00:00:00 2001 From: Rainer Orth Date: Wed, 17 Jul 2019 08:37:00 +0000 Subject: [PATCH 324/451] [Driver] Enable __cxa_atexit on Solaris Starting with Solaris 11.4 (which is now the required minimal version), Solaris does support __cxa_atexit. This patch reflects that. One might consider removing the affected tests altogether instead of inverting them, as is done on other targets. Besides, this lets two ASan tests PASS: AddressSanitizer-i386-sunos :: TestCases/init-order-atexit.cc AddressSanitizer-i386-sunos-dynamic :: TestCases/init-order-atexit.cc Tested on x86_64-pc-solaris2.11 and sparcv9-sun-solaris2.11. Differential Revision: https://reviews.llvm.org/D64491 llvm-svn: 366305 --- clang/lib/Driver/ToolChains/Clang.cpp | 1 - clang/test/Driver/cxa-atexit.cpp | 2 +- clang/test/Driver/solaris-opts.c | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index edc64581172ff..cb861f27aedab 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4749,7 +4749,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (!Args.hasFlag( options::OPT_fuse_cxa_atexit, options::OPT_fno_use_cxa_atexit, !RawTriple.isOSWindows() && - RawTriple.getOS() != llvm::Triple::Solaris && TC.getArch() != llvm::Triple::xcore && ((RawTriple.getVendor() != llvm::Triple::MipsTechnologies) || RawTriple.hasEnvironment())) || diff --git a/clang/test/Driver/cxa-atexit.cpp b/clang/test/Driver/cxa-atexit.cpp index ae955ea5a7dfc..336756dedcecd 100644 --- a/clang/test/Driver/cxa-atexit.cpp +++ b/clang/test/Driver/cxa-atexit.cpp @@ -19,7 +19,7 @@ // RUN: %clang -### -target sparc-sun-solaris -c %s -o /dev/null 2>&1 | FileCheck %s -check-prefix CHECK-SOLARIS // CHECK-WINDOWS: "-fno-use-cxa-atexit" -// CHECK-SOLARIS: "-fno-use-cxa-atexit" +// CHECK-SOLARIS-NOT: "-fno-use-cxa-atexit" // CHECK-HEXAGON-NOT: "-fno-use-cxa-atexit" // CHECK-XCORE: "-fno-use-cxa-atexit" // CHECK-MTI: "-fno-use-cxa-atexit" diff --git a/clang/test/Driver/solaris-opts.c b/clang/test/Driver/solaris-opts.c index 8c54ae0a628d2..33d769efaa179 100644 --- a/clang/test/Driver/solaris-opts.c +++ b/clang/test/Driver/solaris-opts.c @@ -1,4 +1,4 @@ // RUN: %clang %s --target=sparc-sun-solaris2.11 -### -o %t.o 2>&1 | FileCheck %s -// CHECK: "-fno-use-cxa-atexit" +// CHECK-NOT: "-fno-use-cxa-atexit" From 9eb95903da4575b3b95d1a743d48e51bb5026ccd Mon Sep 17 00:00:00 2001 From: Marco Antognini Date: Wed, 17 Jul 2019 08:52:09 +0000 Subject: [PATCH 325/451] [OpenCL][Sema] Minor refactoring and constraint checking Summary: Simplify code a bit and add assertion to address post-landing comments from D64083. Subscribers: yaxunl, Anastasia, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D64804 llvm-svn: 366306 --- clang/lib/Sema/SemaExprCXX.cpp | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index df8638a013623..cdca2e8cac6b7 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -4216,17 +4216,12 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType, break; case ICK_Block_Pointer_Conversion: { - QualType LHSType = Context.getCanonicalType(ToType).getUnqualifiedType(); - QualType RHSType = Context.getCanonicalType(FromType).getUnqualifiedType(); - - // Assumptions based on Sema::IsBlockPointerConversion. - assert(isa(LHSType) && "BlockPointerType expected"); - assert(isa(RHSType) && "BlockPointerType expected"); - LangAS AddrSpaceL = - LHSType->getAs()->getPointeeType().getAddressSpace(); + ToType->castAs()->getPointeeType().getAddressSpace(); LangAS AddrSpaceR = - RHSType->getAs()->getPointeeType().getAddressSpace(); + FromType->castAs()->getPointeeType().getAddressSpace(); + assert(Qualifiers::isAddressSpaceSupersetOf(AddrSpaceL, AddrSpaceR) && + "Invalid cast"); CastKind Kind = AddrSpaceL != AddrSpaceR ? CK_AddressSpaceConversion : CK_BitCast; From = ImpCastExprToType(From, ToType.getUnqualifiedType(), Kind, From 2be0ebb0d80d75241ac7ef033153466317ff13a0 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 17 Jul 2019 09:23:04 +0000 Subject: [PATCH 326/451] [ELF] Delete redundant pageAlign at PT_GNU_RELRO boundaries after D58892 Summary: After D58892 split the RW PT_LOAD on the PT_GNU_RELRO boundary, the new layout is: PT_LOAD(PT_GNU_RELRO(.data.rel.ro .bss.rel.ro)) PT_LOAD(.data. .bss) The two pageAlign() calls at PT_GNU_RELRO boundaries are redundant due to the existence of PT_LOAD. Reviewers: grimar, peter.smith, ruiu, espindola Reviewed By: ruiu Subscribers: sfertile, atanasyan, emaste, arichardson, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64854 llvm-svn: 366307 --- lld/ELF/Writer.cpp | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 3cf7b056064f3..b8c8891648a46 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -2223,25 +2223,6 @@ template void Writer::fixSectionAlignments() { for (const PhdrEntry *p : part.phdrs) if (p->p_type == PT_LOAD && p->firstSec) pageAlign(p->firstSec); - - for (const PhdrEntry *p : part.phdrs) { - if (p->p_type != PT_GNU_RELRO) - continue; - - if (p->firstSec) - pageAlign(p->firstSec); - - // Find the first section after PT_GNU_RELRO. If it is in a PT_LOAD we - // have to align it to a page. - auto end = outputSections.end(); - auto i = llvm::find(outputSections, p->lastSec); - if (i == end || (i + 1) == end) - continue; - - OutputSection *cmd = (*(i + 1)); - if (needsPtLoad(cmd)) - pageAlign(cmd); - } } } From 37e403d18c1a6dc0121a2e67e56fa348934f2018 Mon Sep 17 00:00:00 2001 From: Diana Picus Date: Wed, 17 Jul 2019 10:01:27 +0000 Subject: [PATCH 327/451] [ARM GlobalISel] Cleanup CallLowering. NFC Migrate CallLowering::lowerReturnVal to use the same infrastructure as lowerCall/FormalArguments and remove the now obsolete code path from splitToValueTypes. Forgot to push this earlier. llvm-svn: 366308 --- llvm/lib/Target/ARM/ARMCallLowering.cpp | 84 ++++++------------------- llvm/lib/Target/ARM/ARMCallLowering.h | 7 +-- 2 files changed, 20 insertions(+), 71 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMCallLowering.cpp b/llvm/lib/Target/ARM/ARMCallLowering.cpp index fdd94ab25ef81..0cbe6e1871e4b 100644 --- a/llvm/lib/Target/ARM/ARMCallLowering.cpp +++ b/llvm/lib/Target/ARM/ARMCallLowering.cpp @@ -184,22 +184,21 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler { } // end anonymous namespace -void ARMCallLowering::splitToValueTypes( - const ArgInfo &OrigArg, SmallVectorImpl &SplitArgs, - MachineFunction &MF, const SplitArgTy &PerformArgSplit) const { +void ARMCallLowering::splitToValueTypes(const ArgInfo &OrigArg, + SmallVectorImpl &SplitArgs, + MachineFunction &MF) const { const ARMTargetLowering &TLI = *getTLI(); LLVMContext &Ctx = OrigArg.Ty->getContext(); const DataLayout &DL = MF.getDataLayout(); - MachineRegisterInfo &MRI = MF.getRegInfo(); const Function &F = MF.getFunction(); SmallVector SplitVTs; ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, nullptr, nullptr, 0); + assert(OrigArg.Regs.size() == SplitVTs.size() && "Regs / types mismatch"); if (SplitVTs.size() == 1) { // Even if there is no splitting to do, we still want to replace the // original type (e.g. pointer type -> integer). - assert(OrigArg.Regs.size() == 1 && "Regs / types mismatch"); auto Flags = OrigArg.Flags; unsigned OriginalAlignment = DL.getABITypeAlignment(OrigArg.Ty); Flags.setOrigAlign(OriginalAlignment); @@ -208,34 +207,7 @@ void ARMCallLowering::splitToValueTypes( return; } - if (OrigArg.Regs.size() > 1) { - // Create one ArgInfo for each virtual register. - assert(OrigArg.Regs.size() == SplitVTs.size() && "Regs / types mismatch"); - for (unsigned i = 0, e = SplitVTs.size(); i != e; ++i) { - EVT SplitVT = SplitVTs[i]; - Type *SplitTy = SplitVT.getTypeForEVT(Ctx); - auto Flags = OrigArg.Flags; - - unsigned OriginalAlignment = DL.getABITypeAlignment(SplitTy); - Flags.setOrigAlign(OriginalAlignment); - - bool NeedsConsecutiveRegisters = - TLI.functionArgumentNeedsConsecutiveRegisters( - SplitTy, F.getCallingConv(), F.isVarArg()); - if (NeedsConsecutiveRegisters) { - Flags.setInConsecutiveRegs(); - if (i == e - 1) - Flags.setInConsecutiveRegsLast(); - } - - // FIXME: We also want to split SplitTy further. - Register PartReg = OrigArg.Regs[i]; - SplitArgs.emplace_back(PartReg, SplitTy, Flags, OrigArg.IsFixed); - } - - return; - } - + // Create one ArgInfo for each virtual register. for (unsigned i = 0, e = SplitVTs.size(); i != e; ++i) { EVT SplitVT = SplitVTs[i]; Type *SplitTy = SplitVT.getTypeForEVT(Ctx); @@ -253,10 +225,9 @@ void ARMCallLowering::splitToValueTypes( Flags.setInConsecutiveRegsLast(); } - Register PartReg = - MRI.createGenericVirtualRegister(getLLTForType(*SplitTy, DL)); - SplitArgs.push_back(ArgInfo{PartReg, SplitTy, Flags, OrigArg.IsFixed}); - PerformArgSplit(PartReg); + // FIXME: We also want to split SplitTy further. + Register PartReg = OrigArg.Regs[i]; + SplitArgs.emplace_back(PartReg, SplitTy, Flags, OrigArg.IsFixed); } } @@ -277,29 +248,17 @@ bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder, if (!isSupportedType(DL, TLI, Val->getType())) return false; - SmallVector SplitEVTs; - ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs); - assert(VRegs.size() == SplitEVTs.size() && - "For each split Type there should be exactly one VReg."); - - SmallVector SplitVTs; - LLVMContext &Ctx = Val->getType()->getContext(); - for (unsigned i = 0; i < SplitEVTs.size(); ++i) { - ArgInfo CurArgInfo(VRegs[i], SplitEVTs[i].getTypeForEVT(Ctx)); - setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F); - - SmallVector Regs; - splitToValueTypes(CurArgInfo, SplitVTs, MF, - [&](Register Reg) { Regs.push_back(Reg); }); - if (Regs.size() > 1) - MIRBuilder.buildUnmerge(Regs, VRegs[i]); - } + ArgInfo OrigRetInfo(VRegs, Val->getType()); + setArgFlags(OrigRetInfo, AttributeList::ReturnIndex, DL, F); + + SmallVector SplitRetInfos; + splitToValueTypes(OrigRetInfo, SplitRetInfos, MF); CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(F.getCallingConv(), F.isVarArg()); OutgoingValueHandler RetHandler(MIRBuilder, MF.getRegInfo(), Ret, AssignFn); - return handleAssignments(MIRBuilder, SplitVTs, RetHandler); + return handleAssignments(MIRBuilder, SplitRetInfos, RetHandler); } bool ARMCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, @@ -489,11 +448,9 @@ bool ARMCallLowering::lowerFormalArguments( unsigned Idx = 0; for (auto &Arg : F.args()) { ArgInfo OrigArgInfo(VRegs[Idx], Arg.getType()); - setArgFlags(OrigArgInfo, Idx + AttributeList::FirstArgIndex, DL, F); - splitToValueTypes(OrigArgInfo, SplitArgInfos, MF, [&](Register Reg) { - llvm_unreachable("Args should already be split"); - }); + setArgFlags(OrigArgInfo, Idx + AttributeList::FirstArgIndex, DL, F); + splitToValueTypes(OrigArgInfo, SplitArgInfos, MF); Idx++; } @@ -596,9 +553,7 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, if (Arg.Flags.isByVal()) return false; - splitToValueTypes(Arg, ArgInfos, MF, [&](Register Reg) { - llvm_unreachable("Function args should already be split"); - }); + splitToValueTypes(Arg, ArgInfos, MF); } auto ArgAssignFn = TLI.CCAssignFnForCall(CallConv, IsVarArg); @@ -614,10 +569,7 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, return false; ArgInfos.clear(); - splitToValueTypes(OrigRet, ArgInfos, MF, [&](Register Reg) { - llvm_unreachable("Call results should already be split"); - }); - + splitToValueTypes(OrigRet, ArgInfos, MF); auto RetAssignFn = TLI.CCAssignFnForReturn(CallConv, IsVarArg); CallReturnHandler RetHandler(MIRBuilder, MRI, MIB, RetAssignFn); if (!handleAssignments(MIRBuilder, ArgInfos, RetHandler)) diff --git a/llvm/lib/Target/ARM/ARMCallLowering.h b/llvm/lib/Target/ARM/ARMCallLowering.h index d0f204491921d..794127b5ebc7e 100644 --- a/llvm/lib/Target/ARM/ARMCallLowering.h +++ b/llvm/lib/Target/ARM/ARMCallLowering.h @@ -47,14 +47,11 @@ class ARMCallLowering : public CallLowering { ArrayRef VRegs, MachineInstrBuilder &Ret) const; - using SplitArgTy = std::function; - /// Split an argument into one or more arguments that the CC lowering can cope - /// with (e.g. replace pointers with integers). + /// with. void splitToValueTypes(const ArgInfo &OrigArg, SmallVectorImpl &SplitArgs, - MachineFunction &MF, - const SplitArgTy &PerformArgSplit) const; + MachineFunction &MF) const; }; } // end namespace llvm From 247add6f3a37e73bfb01ab20d9fa3529655f4311 Mon Sep 17 00:00:00 2001 From: Owen Reynolds Date: Wed, 17 Jul 2019 10:16:44 +0000 Subject: [PATCH 328/451] [llvm-ar][test] Add coverage for replace and update key letters Some more tests to increase llvm-ar test coverage, this time for replace 'r' and update 'u'. Differential Revision: https://reviews.llvm.org/D64803 llvm-svn: 366309 --- llvm/test/tools/llvm-ar/replace-update.test | 189 ++++++++++++++++++++ llvm/test/tools/llvm-ar/replace.test | 178 ++++++++++++++++++ 2 files changed, 367 insertions(+) create mode 100644 llvm/test/tools/llvm-ar/replace-update.test create mode 100644 llvm/test/tools/llvm-ar/replace.test diff --git a/llvm/test/tools/llvm-ar/replace-update.test b/llvm/test/tools/llvm-ar/replace-update.test new file mode 100644 index 0000000000000..1b51dba1bca00 --- /dev/null +++ b/llvm/test/tools/llvm-ar/replace-update.test @@ -0,0 +1,189 @@ +## Test that the replace command with "u" updates the relevant members. + +# RUN: rm -rf %t && mkdir -p %t/new/other + +# RUN: yaml2obj %s -o %t/1.o --docnum=1 +# RUN: yaml2obj %s -o %t/2.o --docnum=2 +# RUN: yaml2obj %s -o %t/3.o --docnum=3 + +# RUN: env TZ=GMT touch -t 200001020304 %t/1.o +# RUN: env TZ=GMT touch -t 200001020304 %t/2.o +# RUN: env TZ=GMT touch -t 200001020304 %t/3.o + +# RUN: yaml2obj %s -o %t/new/1.o --docnum=4 +# RUN: yaml2obj %s -o %t/new/3.o --docnum=5 + +# RUN: yaml2obj %s -o %t/new/other/1.o --docnum=6 + +# RUN: env TZ=GMT touch -t 200001020304 %t/new/other/1.o + +## Replace single member with newer file: +# RUN: llvm-ar rcU %t/single.a %t/1.o %t/2.o %t/3.o +# RUN: llvm-ar ruU %t/single.a %t/new/1.o +# RUN: llvm-ar t %t/single.a \ +# RUN: | FileCheck %s --check-prefix=SINGLE --implicit-check-not {{.}} + +# SINGLE: 1.o +# SINGLE-NEXT: 2.o +# SINGLE-NEXT: 3.o + +# RUN: llvm-nm --print-armap %t/single.a \ +# RUN: | FileCheck %s --check-prefix=SINGLE-SYM + +# SINGLE-SYM: symbolnew1 +# SINGLE-SYM-NEXT: symbol2 +# SINGLE-SYM-NEXT: symbol3 + +## Replace new single member with older file: +# RUN: llvm-ar ruU %t/single.a %t/1.o +# RUN: llvm-ar t %t/single.a \ +# RUN: | FileCheck %s --check-prefix=SINGLE --implicit-check-not {{.}} + +# RUN: llvm-nm --print-armap %t/single.a \ +# RUN: | FileCheck %s --check-prefix=SINGLE-SYM + +## Replace multiple members with newer files: +# RUN: llvm-ar rcuU %t/multiple.a %t/1.o %t/2.o %t/3.o +# RUN: llvm-ar ruU %t/multiple.a %t/new/1.o %t/new/3.o +# RUN: llvm-ar t %t/multiple.a \ +# RUN: | FileCheck %s --check-prefix=MULTIPLE --implicit-check-not {{.}} + +# MULTIPLE: 1.o +# MULTIPLE-NEXT: 2.o +# MULTIPLE-NEXT: 3.o + +# RUN: llvm-nm --print-armap %t/multiple.a \ +# RUN: | FileCheck %s --check-prefix=MULTIPLE-SYM + +# MULTIPLE-SYM: symbolnew1 +# MULTIPLE-SYM-NEXT: symbol2 +# MULTIPLE-SYM-NEXTs: symbolnew3 + +## Replace newer members with multiple older files: +# RUN: llvm-ar ruU %t/multiple.a %t/1.o %t/2.o +# RUN: llvm-ar t %t/multiple.a \ +# RUN: | FileCheck %s --check-prefix=MULTIPLE --implicit-check-not {{.}} + +# RUN: llvm-nm --print-armap %t/multiple.a \ +# RUN: | FileCheck %s --check-prefix=MULTIPLE-SYM + +## Replace same member with newer files: +# RUN: llvm-ar rcuU %t/same.a %t/1.o %t/2.o %t/3.o +# RUN: llvm-ar ruU %t/same.a %t/new/1.o %t/new/other/1.o +# RUN: llvm-ar t %t/same.a \ +# RUN: | FileCheck %s --check-prefix=SAME -DFILE=%t/2.o --implicit-check-not {{.}} + +# SAME: 1.o +# SAME-NEXT: 2.o +# SAME-NEXT: 3.o +# SAME-NEXT: 1.o + +# RUN: llvm-nm --print-armap %t/same.a \ +# RUN: | FileCheck %s --check-prefix=SAME-SYM + +# SAME-SYM: symbolnew1 +# SAME-SYM-NEXT: symbol2 +# SAME-SYM-NEXT: symbol3 +# SAME-SYM-NEXT: symbolother1 + +## Replace multiple members with an older file and a newer file: +# RUN: llvm-ar rcuU %t/old-new.a %t/new/1.o %t/2.o %t/3.o +# RUN: llvm-ar ruU %t/old-new.a %t/1.o %t/new/3.o +# RUN: llvm-ar t %t/old-new.a \ +# RUN: | FileCheck %s --check-prefix=MULTIPLE --implicit-check-not {{.}} + +# RUN: llvm-nm --print-armap %t/old-new.a \ +# RUN: | FileCheck %s --check-prefix=MULTIPLE-SYM + +## Replace same member with an older file and a newer file: +# RUN: llvm-ar rcuU %t/old-new-same.a %t/new/1.o %t/2.o %t/3.o +# RUN: llvm-ar ruU %t/old-new-same.a %t/1.o %t/new/other/1.o +# RUN: llvm-ar t %t/old-new-same.a \ +# RUN: | FileCheck %s --check-prefix=SAME -DFILE=%t/2.o --implicit-check-not {{.}} + +# RUN: llvm-nm --print-armap %t/same.a \ +# RUN: | FileCheck %s --check-prefix=SAME-SYM + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbol1 + Binding: STB_GLOBAL + Section: .text + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbol2 + Binding: STB_GLOBAL + Section: .text + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbol3 + Binding: STB_GLOBAL + Section: .text + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbolnew1 + Binding: STB_GLOBAL + Section: .text + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbolnew3 + Binding: STB_GLOBAL + Section: .text + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbolother1 + Binding: STB_GLOBAL + Section: .text diff --git a/llvm/test/tools/llvm-ar/replace.test b/llvm/test/tools/llvm-ar/replace.test new file mode 100644 index 0000000000000..2a7cddfdeb535 --- /dev/null +++ b/llvm/test/tools/llvm-ar/replace.test @@ -0,0 +1,178 @@ +## Test the replace command without modifiers replaces the relevant members. + +# RUN: rm -rf %t && mkdir -p %t/new/other +# RUN: yaml2obj %s -o %t/1.o --docnum=1 +# RUN: yaml2obj %s -o %t/2.o --docnum=2 +# RUN: yaml2obj %s -o %t/3.o --docnum=3 + +# RUN: yaml2obj %s -o %t/new/1.o --docnum=4 +# RUN: yaml2obj %s -o %t/new/3.o --docnum=5 + +# RUN: yaml2obj %s -o %t/new/other/1.o --docnum=6 + +## Replace single member: +# RUN: llvm-ar rc %t/single.a %t/1.o %t/2.o %t/3.o +# RUN: llvm-ar r %t/single.a %t/new/1.o +# RUN: llvm-ar t %t/single.a \ +# RUN: | FileCheck %s --check-prefix=SINGLE --implicit-check-not {{.}} + +# SINGLE: 1.o +# SINGLE-NEXT: 2.o +# SINGLE-NEXT: 3.o + +# RUN: llvm-nm --print-armap %t/single.a \ +# RUN: | FileCheck %s --check-prefix=SINGLE-SYM + +# SINGLE-SYM: symbolnew1 +# SINGLE-SYM-NEXT: symbol2 +# SINGLE-SYM-NEXT: symbol3 + +## Replace multiple members: +# RUN: llvm-ar rc %t/multiple.a %t/1.o %t/2.o %t/3.o +# RUN: llvm-ar r %t/multiple.a %t/new/1.o %t/new/3.o +# RUN: llvm-ar t %t/multiple.a \ +# RUN: | FileCheck %s --check-prefix=MULTIPLE --implicit-check-not {{.}} + +# MULTIPLE: 1.o +# MULTIPLE-NEXT: 2.o +# MULTIPLE-NEXT: 3.o + +# RUN: llvm-nm --print-armap %t/multiple.a \ +# RUN: | FileCheck %s --check-prefix=MULTIPLE-SYM + +# MULTIPLE-SYM: symbolnew1 +# MULTIPLE-SYM-NEXT: symbol2 +# MULTIPLE-SYM-NEXT: symbolnew3 + +## Replace same member: +# RUN: llvm-ar rc %t/same.a %t/1.o %t/2.o %t/3.o +# RUN: llvm-ar r %t/same.a %t/new/1.o %t/new/other/1.o +# RUN: llvm-ar t %t/same.a \ +# RUN: | FileCheck %s --check-prefix=SAME --implicit-check-not {{.}} + +# SAME: 1.o +# SAME-NEXT: 2.o +# SAME-NEXT: 3.o +# SAME-NEXT: 1.o + +# RUN: llvm-nm --print-armap %t/same.a \ +# RUN: | FileCheck %s --check-prefix=SAME-SYM + +# SAME-SYM: symbolnew1 +# SAME-SYM-NEXT: symbol2 +# SAME-SYM-NEXT: symbol3 +# SAME-SYM-NEXT: symbolother1 + +## Replace without member: +# RUN: llvm-ar rc %t/without.a %t/1.o %t/2.o %t/3.o +# RUN: llvm-ar r %t/without.a +# RUN: llvm-ar t %t/without.a \ +# RUN: | FileCheck %s --check-prefix=WITHOUT --implicit-check-not {{.}} + +# WITHOUT: 1.o +# WITHOUT-NEXT: 2.o +# WITHOUT-NEXT: 3.o + +# RUN: llvm-nm --print-armap %t/without.a \ +# RUN: | FileCheck %s --check-prefix=WITHOUT-SYM + +# WITHOUT-SYM: symbol1 +# WITHOUT-SYM-NEXT: symbol2 +# WITHOUT-SYM-NEXT: symbol3 + +## No archive: +# RUN: not llvm-ar r 2>&1 \ +# RUN: | FileCheck %s --check-prefix=NO-ARCHIVE + +# NO-ARCHIVE: error: An archive name must be specified. + +## Member does not exist: +# RUN: llvm-ar rc %t/missing.a %t/1.o %t/2.o %t/3.o +# RUN: not llvm-ar r %t/missing.a %t/missing.txt 2>&1 \ +# RUN: | FileCheck %s --check-prefix=MISSING-FILE -DFILE=%t/missing.txt + +# MISSING-FILE: error: [[FILE]]: {{[Nn]}}o such file or directory. + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbol1 + Binding: STB_GLOBAL + Section: .text + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbol2 + Binding: STB_GLOBAL + Section: .text + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbol3 + Binding: STB_GLOBAL + Section: .text + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbolnew1 + Binding: STB_GLOBAL + Section: .text + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbolnew3 + Binding: STB_GLOBAL + Section: .text + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbolother1 + Binding: STB_GLOBAL + Section: .text From 11b06242a7e554f489c10bc9613be3e3c9e0c70a Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Wed, 17 Jul 2019 10:17:47 +0000 Subject: [PATCH 329/451] [clangd] Fix error message in tweaktests to be useful. NFC llvm-svn: 366311 --- clang-tools-extra/clangd/unittests/TweakTests.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clang-tools-extra/clangd/unittests/TweakTests.cpp b/clang-tools-extra/clangd/unittests/TweakTests.cpp index 7a0d88405b4f2..69f74e9d41b97 100644 --- a/clang-tools-extra/clangd/unittests/TweakTests.cpp +++ b/clang-tools-extra/clangd/unittests/TweakTests.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Support/Error.h" #include "llvm/Testing/Support/Error.h" +#include "gmock/gmock-matchers.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include @@ -134,10 +135,9 @@ void checkApplyContainsError(llvm::StringRef ID, llvm::StringRef Input, auto Result = apply(ID, Input); ASSERT_FALSE(Result) << "expected error message:\n " << ErrorMessage << "\non input:" << Input; - EXPECT_NE(std::string::npos, - llvm::toString(Result.takeError()).find(ErrorMessage)) - << "Wrong error message:\n " << llvm::toString(Result.takeError()) - << "\nexpected:\n " << ErrorMessage; + EXPECT_THAT(llvm::toString(Result.takeError()), + testing::HasSubstr(ErrorMessage)) + << Input; } TEST(TweakTest, SwapIfBranches) { From 52c39396151978ca946e2a80d9118c8672bace14 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Wed, 17 Jul 2019 10:53:13 +0000 Subject: [PATCH 330/451] [TableGen] Do not set ReadNone attribute on intrinsics with side effects If an intrinsic is defined without outputs, but having side effects, it still can be removed completely from the program. This patch makes TableGen not set Attribute::ReadNone for intrinsics which are declared with IntrHasSideEffects. Differential Revision: https://reviews.llvm.org/D64414 llvm-svn: 366312 --- llvm/test/TableGen/intrin-side-effects.td | 39 ++++++++++++++++++++++ llvm/utils/TableGen/CodeGenDAGPatterns.cpp | 2 +- llvm/utils/TableGen/IntrinsicEmitter.cpp | 4 ++- 3 files changed, 43 insertions(+), 2 deletions(-) create mode 100644 llvm/test/TableGen/intrin-side-effects.td diff --git a/llvm/test/TableGen/intrin-side-effects.td b/llvm/test/TableGen/intrin-side-effects.td new file mode 100644 index 0000000000000..7588855830fae --- /dev/null +++ b/llvm/test/TableGen/intrin-side-effects.td @@ -0,0 +1,39 @@ +// RUN: llvm-tblgen -gen-intrinsic-impl -I %p/../../include %s | FileCheck %s + +// Get the minimum blurb necessary to process ... +include "llvm/CodeGen/ValueTypes.td" +include "llvm/CodeGen/SDNodeProperties.td" + +class LLVMType { + ValueType VT = vt; + int isAny = 0; +} + +def llvm_i32_ty : LLVMType; + +class IntrinsicProperty; +def IntrNoMem : IntrinsicProperty; +def IntrHasSideEffects : IntrinsicProperty; + + +class Intrinsic ret_types, + list param_types = [], + list intr_properties = [], + string name = "", + list sd_properties = []> : SDPatternOperator { + string LLVMName = name; + string TargetPrefix = ""; + list RetTypes = ret_types; + list ParamTypes = param_types; + list IntrProperties = intr_properties; + let Properties = sd_properties; + + bit isTarget = 0; +} + +// ... this intrinsic. +def int_random_gen : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrHasSideEffects]>; + +// CHECK: 1, // llvm.random.gen +// CHECK: case 1: +// CHECK-NEXT: Atts[] = {Attribute::NoUnwind} diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp index a0e8696001b0e..c8f710d66a036 100644 --- a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp +++ b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp @@ -2807,7 +2807,7 @@ TreePatternNodePtr TreePattern::ParseTreePattern(Init *TheInit, // chain. if (Int.IS.RetVTs.empty()) Operator = getDAGPatterns().get_intrinsic_void_sdnode(); - else if (Int.ModRef != CodeGenIntrinsic::NoMem) + else if (Int.ModRef != CodeGenIntrinsic::NoMem || Int.hasSideEffects) // Has side-effects, requires chain. Operator = getDAGPatterns().get_intrinsic_w_chain_sdnode(); else // Otherwise, no chain. diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp index bcb8af2fc56bf..6bcdc3d777a2d 100644 --- a/llvm/utils/TableGen/IntrinsicEmitter.cpp +++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp @@ -685,7 +685,7 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints, } if (!intrinsic.canThrow || - intrinsic.ModRef != CodeGenIntrinsic::ReadWriteMem || + (intrinsic.ModRef != CodeGenIntrinsic::ReadWriteMem && !intrinsic.hasSideEffects) || intrinsic.isNoReturn || intrinsic.isCold || intrinsic.isNoDuplicate || intrinsic.isConvergent || intrinsic.isSpeculatable) { OS << " const Attribute::AttrKind Atts[] = {"; @@ -727,6 +727,8 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints, switch (intrinsic.ModRef) { case CodeGenIntrinsic::NoMem: + if (intrinsic.hasSideEffects) + break; if (addComma) OS << ","; OS << "Attribute::ReadNone"; From a256b8b7d77cdc9cf4675dcd784d008cb545b10b Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Wed, 17 Jul 2019 11:22:19 +0000 Subject: [PATCH 331/451] AMDGPU: Improve alias analysis for GDS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: GDS cannot alias anything else. Original patch by: Marek Olšák Reviewers: arsenm, mareko Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, Petar.Avramovic, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64114 Change-Id: I07bfbd96f5d5c37a6dfba7997df12f291dd794b0 llvm-svn: 366313 --- .../lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp | 8 +-- .../CodeGen/AMDGPU/amdgpu-alias-analysis.ll | 51 ++++++++++++++++--- 2 files changed, 47 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp index de54db3b1b02d..bba132c3bc46f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp @@ -56,13 +56,13 @@ void AMDGPUAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { // These arrays are indexed by address space value enum elements 0 ... to 7 static const AliasResult ASAliasRules[8][8] = { /* Flat Global Region Group Constant Private Constant 32-bit Buffer Fat Ptr */ - /* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias}, + /* Flat */ {MayAlias, MayAlias, NoAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias}, /* Global */ {MayAlias, MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , MayAlias, MayAlias}, - /* Region */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias, NoAlias , MayAlias, NoAlias}, + /* Region */ {NoAlias, NoAlias , MayAlias, NoAlias , NoAlias, NoAlias , NoAlias, NoAlias}, /* Group */ {MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , NoAlias , NoAlias , NoAlias}, - /* Constant */ {MayAlias, MayAlias, MayAlias, NoAlias , NoAlias , NoAlias , MayAlias, MayAlias}, + /* Constant */ {MayAlias, MayAlias, NoAlias, NoAlias , NoAlias , NoAlias , MayAlias, MayAlias}, /* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, NoAlias , NoAlias}, - /* Constant 32-bit */ {MayAlias, MayAlias, MayAlias, NoAlias , MayAlias, NoAlias , NoAlias , MayAlias}, + /* Constant 32-bit */ {MayAlias, MayAlias, NoAlias, NoAlias , MayAlias, NoAlias , NoAlias , MayAlias}, /* Buffer Fat Ptr */ {MayAlias, MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , MayAlias, MayAlias} }; diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll index 1f64208cf99d3..25ec7af9d2318 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll @@ -51,42 +51,77 @@ define void @test_999_1(i8 addrspace(999)* %p, i8 addrspace(1)* %p1) { ret void } +; CHECK: NoAlias: i8 addrspace(2)* %p, i8* %p1 +define void @test_region_vs_flat(i8 addrspace(2)* %p, i8 addrspace(0)* %p1) { + ret void +} + +; CHECK: NoAlias: i8 addrspace(1)* %p1, i8 addrspace(2)* %p +define void @test_region_vs_global(i8 addrspace(2)* %p, i8 addrspace(1)* %p1) { + ret void +} + +; CHECK: MayAlias: i8 addrspace(2)* %p, i8 addrspace(2)* %p1 +define void @test_region(i8 addrspace(2)* %p, i8 addrspace(2)* %p1) { + ret void +} + +; CHECK: NoAlias: i8 addrspace(2)* %p, i8 addrspace(3)* %p1 +define void @test_region_vs_group(i8 addrspace(2)* %p, i8 addrspace(3)* %p1) { + ret void +} + +; CHECK: NoAlias: i8 addrspace(2)* %p, i8 addrspace(4)* %p1 +define void @test_region_vs_constant(i8 addrspace(2)* %p, i8 addrspace(4)* %p1) { + ret void +} + +; CHECK: NoAlias: i8 addrspace(2)* %p, i8 addrspace(5)* %p1 +define void @test_region_vs_private(i8 addrspace(2)* %p, i8 addrspace(5)* %p1) { + ret void +} + +; CHECK: NoAlias: i8 addrspace(2)* %p, i8 addrspace(6)* %p1 +define void @test_region_vs_const32(i8 addrspace(2)* %p, i8 addrspace(6)* %p1) { + ret void +} + ; CHECK: MayAlias: i8 addrspace(7)* %p, i8* %p1 define void @test_7_0(i8 addrspace(7)* %p, i8 addrspace(0)* %p1) { - ret void + ret void } ; CHECK: MayAlias: i8 addrspace(1)* %p1, i8 addrspace(7)* %p define void @test_7_1(i8 addrspace(7)* %p, i8 addrspace(1)* %p1) { - ret void + ret void } ; CHECK: NoAlias: i8 addrspace(2)* %p1, i8 addrspace(7)* %p define void @test_7_2(i8 addrspace(7)* %p, i8 addrspace(2)* %p1) { - ret void + ret void } ; CHECK: NoAlias: i8 addrspace(3)* %p1, i8 addrspace(7)* %p define void @test_7_3(i8 addrspace(7)* %p, i8 addrspace(3)* %p1) { - ret void + ret void } ; CHECK: MayAlias: i8 addrspace(4)* %p1, i8 addrspace(7)* %p define void @test_7_4(i8 addrspace(7)* %p, i8 addrspace(4)* %p1) { - ret void + ret void } ; CHECK: NoAlias: i8 addrspace(5)* %p1, i8 addrspace(7)* %p define void @test_7_5(i8 addrspace(7)* %p, i8 addrspace(5)* %p1) { - ret void + ret void } ; CHECK: MayAlias: i8 addrspace(6)* %p1, i8 addrspace(7)* %p define void @test_7_6(i8 addrspace(7)* %p, i8 addrspace(6)* %p1) { - ret void + ret void } ; CHECK: MayAlias: i8 addrspace(7)* %p, i8 addrspace(7)* %p1 define void @test_7_7(i8 addrspace(7)* %p, i8 addrspace(7)* %p1) { - ret void + ret void } From 8b7041a5c6f0a373d4886ca807d89790ad6dedab Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Wed, 17 Jul 2019 11:22:57 +0000 Subject: [PATCH 332/451] AMDGPU/GFX10: Apply the VMEM-to-scalar-write hazard also to writes to EXEC Summary: Change-Id: I854fbf7d48e937bef9f8f3f5d0c8aeb970652630 Reviewers: rampitec, mareko Subscribers: arsenm, kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64807 Change-Id: I4405b3a7f84186acea5a78d291bff71056e745fc llvm-svn: 366314 --- llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 2 +- .../AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll | 1 + llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir | 14 ++++++++++++++ llvm/test/CodeGen/AMDGPU/wave32.ll | 2 ++ 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index a23348e18f92d..885239e2faed3 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -920,7 +920,7 @@ bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) { for (const MachineOperand &Def : MI->defs()) { MachineOperand *Op = I->findRegisterUseOperand(Def.getReg(), false, TRI); - if (!Op || (Op->isImplicit() && Op->getReg() == AMDGPU::EXEC)) + if (!Op) continue; return true; } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll index f35b0b43d3694..0f04c0c445f53 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll @@ -14,6 +14,7 @@ define amdgpu_kernel void @test_wave32(i32 %arg0, [8 x i32], i32 %saved) { ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: global_store_dword v[0:1], v0, off ; GCN-NEXT: BB0_2: ; %bb +; GCN-NEXT: v_nop ; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: global_store_dword v[0:1], v0, off diff --git a/llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir b/llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir index 630070c13a310..9d45c5b19e656 100644 --- a/llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir +++ b/llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir @@ -92,6 +92,7 @@ body: | ... # GCN-LABEL: name: vmem_write_exec_impread # GCN: BUFFER_LOAD_DWORD_OFFEN +# GCN: V_NOP # GCN-NEXT: S_MOV_B64 --- name: vmem_write_exec_impread @@ -208,3 +209,16 @@ body: | $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec S_BRANCH %bb.0 ... +# GCN-LABEL: name: ds_write_exec +# GCN: DS_WRITE_B32_gfx9 +# GCN-NEXT: V_NOP +# GCN-NEXT: S_MOV_B32 +--- +name: ds_write_exec +body: | + bb.0: + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + DS_WRITE_B32_gfx9 $vgpr0, $vgpr1, 0, 0, implicit $exec + $exec_lo = S_MOV_B32 -1 +... diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll index 781ebbb268fec..a71ca5db7658d 100644 --- a/llvm/test/CodeGen/AMDGPU/wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/wave32.ll @@ -1073,6 +1073,7 @@ declare void @external_void_func_void() #1 ; GFX1064-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; GFX1032-NEXT: s_or_saveexec_b32 [[COPY_EXEC0:s[0-9]]], -1{{$}} ; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; 4-byte Folded Spill +; GCN-NEXT: v_nop ; GFX1064-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] ; GFX1032-NEXT: s_mov_b32 exec_lo, [[COPY_EXEC0]] @@ -1095,6 +1096,7 @@ declare void @external_void_func_void() #1 ; GFX1064: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; GFX1032: s_or_saveexec_b32 [[COPY_EXEC1:s[0-9]]], -1{{$}} ; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 ; 4-byte Folded Reload +; GCN-NEXT: v_nop ; GFX1064-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] ; GFX1032-NEXT: s_mov_b32 exec_lo, [[COPY_EXEC1]] ; GCN-NEXT: s_waitcnt vmcnt(0) From e14cfe2d2ea2d0ef7eccc7905dbe01e943fea9e8 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Wed, 17 Jul 2019 11:24:37 +0000 Subject: [PATCH 333/451] [AArch64] Consistent types and naming for AArch64 target features (NFC) Differential Revision: https://reviews.llvm.org/D64415 Committed as obvious. llvm-svn: 366315 --- clang/lib/Basic/Targets/AArch64.cpp | 34 ++++++++++++++--------------- clang/lib/Basic/Targets/AArch64.h | 15 +++++++------ 2 files changed, 25 insertions(+), 24 deletions(-) diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index a02530ad06756..74ac69ab8946a 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -199,13 +199,13 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts, if (FPU & SveMode) Builder.defineMacro("__ARM_FEATURE_SVE", "1"); - if (CRC) + if (HasCRC) Builder.defineMacro("__ARM_FEATURE_CRC32", "1"); - if (Crypto) + if (HasCrypto) Builder.defineMacro("__ARM_FEATURE_CRYPTO", "1"); - if (Unaligned) + if (HasUnaligned) Builder.defineMacro("__ARM_FEATURE_UNALIGNED", "1"); if ((FPU & NeonMode) && HasFullFP16) @@ -263,13 +263,13 @@ bool AArch64TargetInfo::hasFeature(StringRef Feature) const { bool AArch64TargetInfo::handleTargetFeatures(std::vector &Features, DiagnosticsEngine &Diags) { FPU = FPUMode; - CRC = 0; - Crypto = 0; - Unaligned = 1; - HasFullFP16 = 0; - HasDotProd = 0; - HasFP16FML = 0; - HasMTE = 0; + HasCRC = false; + HasCrypto = false; + HasUnaligned = true; + HasFullFP16 = false; + HasDotProd = false; + HasFP16FML = false; + HasMTE = false; ArchKind = llvm::AArch64::ArchKind::ARMV8A; for (const auto &Feature : Features) { @@ -278,11 +278,11 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector &Features, if (Feature == "+sve") FPU |= SveMode; if (Feature == "+crc") - CRC = 1; + HasCRC = true; if (Feature == "+crypto") - Crypto = 1; + HasCrypto = true; if (Feature == "+strict-align") - Unaligned = 0; + HasUnaligned = false; if (Feature == "+v8.1a") ArchKind = llvm::AArch64::ArchKind::ARMV8_1A; if (Feature == "+v8.2a") @@ -294,13 +294,13 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector &Features, if (Feature == "+v8.5a") ArchKind = llvm::AArch64::ArchKind::ARMV8_5A; if (Feature == "+fullfp16") - HasFullFP16 = 1; + HasFullFP16 = true; if (Feature == "+dotprod") - HasDotProd = 1; + HasDotProd = true; if (Feature == "+fp16fml") - HasFP16FML = 1; + HasFP16FML = true; if (Feature == "+mte") - HasMTE = 1; + HasMTE = true; } setDataLayout(); diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h index de0aed78e037e..5833c146003b0 100644 --- a/clang/lib/Basic/Targets/AArch64.h +++ b/clang/lib/Basic/Targets/AArch64.h @@ -28,13 +28,14 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo { enum FPUModeEnum { FPUMode, NeonMode = (1 << 0), SveMode = (1 << 1) }; unsigned FPU; - unsigned CRC; - unsigned Crypto; - unsigned Unaligned; - unsigned HasFullFP16; - unsigned HasDotProd; - unsigned HasFP16FML; - unsigned HasMTE; + bool HasCRC; + bool HasCrypto; + bool HasUnaligned; + bool HasFullFP16; + bool HasDotProd; + bool HasFP16FML; + bool HasMTE; + llvm::AArch64::ArchKind ArchKind; static const Builtin::Info BuiltinInfo[]; From 60bd7a9c13bdb6da854c4a7b595407c0ce18b55d Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Wed, 17 Jul 2019 11:48:29 +0000 Subject: [PATCH 334/451] [lldb][NFC] Tablegenify watchpoint commands Part of the project that migrates these struct initializers to our new lldb-tablegen. llvm-svn: 366316 --- .../Commands/CommandObjectWatchpoint.cpp | 17 +++------ .../CommandObjectWatchpointCommand.cpp | 8 +--- lldb/source/Commands/Options.td | 38 +++++++++++++++++++ 3 files changed, 46 insertions(+), 17 deletions(-) diff --git a/lldb/source/Commands/CommandObjectWatchpoint.cpp b/lldb/source/Commands/CommandObjectWatchpoint.cpp index 3c3bd2d12095a..98e758b7ef6a7 100644 --- a/lldb/source/Commands/CommandObjectWatchpoint.cpp +++ b/lldb/source/Commands/CommandObjectWatchpoint.cpp @@ -148,11 +148,8 @@ bool CommandObjectMultiwordWatchpoint::VerifyWatchpointIDs( #pragma mark List::CommandOptions static constexpr OptionDefinition g_watchpoint_list_options[] = { - // clang-format off - { LLDB_OPT_SET_1, false, "brief", 'b', OptionParser::eNoArgument, nullptr, {}, 0, eArgTypeNone, "Give a brief description of the watchpoint (no location info)." }, - { LLDB_OPT_SET_2, false, "full", 'f', OptionParser::eNoArgument, nullptr, {}, 0, eArgTypeNone, "Give a full description of the watchpoint and its locations." }, - { LLDB_OPT_SET_3, false, "verbose", 'v', OptionParser::eNoArgument, nullptr, {}, 0, eArgTypeNone, "Explain everything we know about the watchpoint (for debugging debugger bugs)." } - // clang-format on +#define LLDB_OPTIONS_watchpoint_list +#include "CommandOptions.inc" }; #pragma mark List @@ -511,9 +508,8 @@ class CommandObjectWatchpointDelete : public CommandObjectParsed { #pragma mark Ignore::CommandOptions static constexpr OptionDefinition g_watchpoint_ignore_options[] = { - // clang-format off - { LLDB_OPT_SET_ALL, true, "ignore-count", 'i', OptionParser::eRequiredArgument, nullptr, {}, 0, eArgTypeCount, "Set the number of times this watchpoint is skipped before stopping." } - // clang-format on +#define LLDB_OPTIONS_watchpoint_ignore +#include "CommandOptions.inc" }; class CommandObjectWatchpointIgnore : public CommandObjectParsed { @@ -631,9 +627,8 @@ class CommandObjectWatchpointIgnore : public CommandObjectParsed { #pragma mark Modify::CommandOptions static constexpr OptionDefinition g_watchpoint_modify_options[] = { - // clang-format off - { LLDB_OPT_SET_ALL, false, "condition", 'c', OptionParser::eRequiredArgument, nullptr, {}, 0, eArgTypeExpression, "The watchpoint stops only if this condition expression evaluates to true." } - // clang-format on +#define LLDB_OPTIONS_watchpoint_modify +#include "CommandOptions.inc" }; #pragma mark Modify diff --git a/lldb/source/Commands/CommandObjectWatchpointCommand.cpp b/lldb/source/Commands/CommandObjectWatchpointCommand.cpp index 8be6688fc3a69..2be0b5b154e0f 100644 --- a/lldb/source/Commands/CommandObjectWatchpointCommand.cpp +++ b/lldb/source/Commands/CommandObjectWatchpointCommand.cpp @@ -43,12 +43,8 @@ static constexpr OptionEnumValues ScriptOptionEnum() { } static constexpr OptionDefinition g_watchpoint_command_add_options[] = { - // clang-format off - { LLDB_OPT_SET_1, false, "one-liner", 'o', OptionParser::eRequiredArgument, nullptr, {}, 0, eArgTypeOneLiner, "Specify a one-line watchpoint command inline. Be sure to surround it with quotes." }, - { LLDB_OPT_SET_ALL, false, "stop-on-error", 'e', OptionParser::eRequiredArgument, nullptr, {}, 0, eArgTypeBoolean, "Specify whether watchpoint command execution should terminate on error." }, - { LLDB_OPT_SET_ALL, false, "script-type", 's', OptionParser::eRequiredArgument, nullptr, ScriptOptionEnum(), 0, eArgTypeNone, "Specify the language for the commands - if none is specified, the lldb command interpreter will be used." }, - { LLDB_OPT_SET_2, false, "python-function", 'F', OptionParser::eRequiredArgument, nullptr, {}, 0, eArgTypePythonFunction, "Give the name of a Python function to run as command for this watchpoint. Be sure to give a module name if appropriate." } - // clang-format on +#define LLDB_OPTIONS_watchpoint_command_add +#include "CommandOptions.inc" }; class CommandObjectWatchpointCommandAdd : public CommandObjectParsed, diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td index 1f4c09c386096..1d1bbbf7b7041 100644 --- a/lldb/source/Commands/Options.td +++ b/lldb/source/Commands/Options.td @@ -51,3 +51,41 @@ let Command = "breakpoint list" in { Desc<"List Dummy breakpoints - i.e. breakpoints set before a file is " "provided, which prime new targets.">; } + +let Command = "watchpoint list" in { + def watchpoint_list_brief : Option<"brief", "b">, Group<1>, Desc<"Give a " + "brief description of the watchpoint (no location info).">; + def watchpoint_list_full : Option<"full", "f">, Group<2>, Desc<"Give a full " + "description of the watchpoint and its locations.">; + def watchpoint_list_verbose : Option<"verbose", "v">, Group<3>, Desc<"Explain" + "everything we know about the watchpoint (for debugging debugger bugs).">; +} + +let Command = "watchpoint ignore" in { + def watchpoint_ignore_ignore_count : Option<"ignore-count", "i">, + Arg<"Count">, Required, Desc<"Set the number of times this watchpoint is" + " skipped before stopping.">; +} + +let Command = "watchpoint modify" in { + def watchpoint_modify_condition : Option<"condition", "c">, Arg<"Expression">, + Desc<"The watchpoint stops only if this condition expression evaluates " + "to true.">; +} + +let Command = "watchpoint command add" in { + def watchpoint_command_add_one_liner : Option<"one-liner", "o">, Group<1>, + Arg<"OneLiner">, Desc<"Specify a one-line watchpoint command inline. Be " + "sure to surround it with quotes.">; + def watchpoint_command_add_stop_on_error : Option<"stop-on-error", "e">, + Arg<"Boolean">, Desc<"Specify whether watchpoint command execution should " + "terminate on error.">; + def watchpoint_command_add_script_type : Option<"script-type", "s">, + EnumArg<"None", "ScriptOptionEnum()">, Desc<"Specify the language for the" + " commands - if none is specified, the lldb command interpreter will be " + "used.">; + def watchpoint_command_add_python_function : Option<"python-function", "F">, + Group<2>, Arg<"PythonFunction">, Desc<"Give the name of a Python function " + "to run as command for this watchpoint. Be sure to give a module name if " + "appropriate.">; +} From 1e62635d0551578bf3899d2a1f4c835e30f2eed8 Mon Sep 17 00:00:00 2001 From: Petar Avramovic Date: Wed, 17 Jul 2019 12:08:01 +0000 Subject: [PATCH 335/451] [MIPS GlobalISel] ClampScalar and select pointer G_ICMP Add narrowScalar to half of original size for G_ICMP. ClampScalar G_ICMP's operands 2 and 3 to to s32. Select G_ICMP for pointers for MIPS32. Pointer compare is same as for integers, it is enough to declare them as legal type. Differential Revision: https://reviews.llvm.org/D64856 llvm-svn: 366317 --- .../CodeGen/GlobalISel/LegalizerHelper.cpp | 36 ++ llvm/lib/Target/Mips/MipsLegalizerInfo.cpp | 3 +- .../GlobalISel/instruction-select/icmp.mir | 168 +++--- .../Mips/GlobalISel/legalizer/icmp.mir | 487 +++++++++++++----- .../CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll | 335 +++++++++--- .../Mips/GlobalISel/regbankselect/icmp.mir | 280 +--------- 6 files changed, 754 insertions(+), 555 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index bf3cca4115a13..958e9b59cf443 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -747,6 +747,42 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_ICMP: { + uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); + if (NarrowSize * 2 != SrcSize) + return UnableToLegalize; + + Observer.changingInstr(MI); + Register LHSL = MRI.createGenericVirtualRegister(NarrowTy); + Register LHSH = MRI.createGenericVirtualRegister(NarrowTy); + MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2).getReg()); + + Register RHSL = MRI.createGenericVirtualRegister(NarrowTy); + Register RHSH = MRI.createGenericVirtualRegister(NarrowTy); + MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3).getReg()); + + CmpInst::Predicate Pred = + static_cast(MI.getOperand(1).getPredicate()); + + if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) { + MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL); + MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH); + MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH); + MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0); + MIRBuilder.buildICmp(Pred, MI.getOperand(0).getReg(), Or, Zero); + } else { + const LLT s1 = LLT::scalar(1); + MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, s1, LHSH, RHSH); + MachineInstrBuilder CmpHEQ = + MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, s1, LHSH, RHSH); + MachineInstrBuilder CmpLU = MIRBuilder.buildICmp( + ICmpInst::getUnsignedPredicate(Pred), s1, LHSL, RHSL); + MIRBuilder.buildSelect(MI.getOperand(0).getReg(), CmpHEQ, CmpLU, CmpH); + } + Observer.changedInstr(MI); + MI.eraseFromParent(); + return Legalized; + } } } diff --git a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp index 621f3e54a04bf..e442a81837edf 100644 --- a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp +++ b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp @@ -81,7 +81,8 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) { .minScalar(1, s32); getActionDefinitionsBuilder(G_ICMP) - .legalFor({{s32, s32}}) + .legalForCartesianProduct({s32}, {s32, p0}) + .clampScalar(1, s32, s32) .minScalar(0, s32); getActionDefinitionsBuilder(G_CONSTANT) diff --git a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/icmp.mir b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/icmp.mir index 02086b0c86f61..0e6f1211b2b81 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/icmp.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/icmp.mir @@ -2,20 +2,22 @@ # RUN: llc -O0 -mtriple=mipsel-linux-gnu -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=MIPS32 --- | - define void @eq() {entry: ret void} - define void @ne() {entry: ret void} - define void @sgt() {entry: ret void} - define void @sge() {entry: ret void} - define void @slt() {entry: ret void} - define void @sle() {entry: ret void} - define void @ugt() {entry: ret void} - define void @uge() {entry: ret void} - define void @ult() {entry: ret void} - define void @ule() {entry: ret void} + define void @eq_i32() {entry: ret void} + define void @ne_i32() {entry: ret void} + define void @sgt_i32() {entry: ret void} + define void @sge_i32() {entry: ret void} + define void @slt_i32() {entry: ret void} + define void @sle_i32() {entry: ret void} + define void @ugt_i32() {entry: ret void} + define void @uge_i32() {entry: ret void} + define void @ult_i32() {entry: ret void} + define void @ule_i32() {entry: ret void} + define void @eq_ptr() {entry: ret void} + ... --- -name: eq +name: eq_i32 alignment: 2 legalized: true regBankSelected: true @@ -24,28 +26,24 @@ body: | bb.1.entry: liveins: $a0, $a1 - ; MIPS32-LABEL: name: eq + ; MIPS32-LABEL: name: eq_i32 ; MIPS32: liveins: $a0, $a1 ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; MIPS32: [[XOR:%[0-9]+]]:gpr32 = XOR [[COPY]], [[COPY1]] ; MIPS32: [[SLTiu:%[0-9]+]]:gpr32 = SLTiu [[XOR]], 1 - ; MIPS32: [[ORi:%[0-9]+]]:gpr32 = ORi $zero, 1 - ; MIPS32: [[AND:%[0-9]+]]:gpr32 = AND [[SLTiu]], [[ORi]] - ; MIPS32: $v0 = COPY [[AND]] + ; MIPS32: $v0 = COPY [[SLTiu]] ; MIPS32: RetRA implicit $v0 %0:gprb(s32) = COPY $a0 %1:gprb(s32) = COPY $a1 %4:gprb(s32) = G_ICMP intpred(eq), %0(s32), %1 - %5:gprb(s32) = G_CONSTANT i32 1 - %6:gprb(s32) = COPY %4(s32) - %3:gprb(s32) = G_AND %6, %5 + %3:gprb(s32) = COPY %4(s32) $v0 = COPY %3(s32) RetRA implicit $v0 ... --- -name: ne +name: ne_i32 alignment: 2 legalized: true regBankSelected: true @@ -54,28 +52,24 @@ body: | bb.1.entry: liveins: $a0, $a1 - ; MIPS32-LABEL: name: ne + ; MIPS32-LABEL: name: ne_i32 ; MIPS32: liveins: $a0, $a1 ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; MIPS32: [[XOR:%[0-9]+]]:gpr32 = XOR [[COPY]], [[COPY1]] ; MIPS32: [[SLTu:%[0-9]+]]:gpr32 = SLTu $zero, [[XOR]] - ; MIPS32: [[ORi:%[0-9]+]]:gpr32 = ORi $zero, 1 - ; MIPS32: [[AND:%[0-9]+]]:gpr32 = AND [[SLTu]], [[ORi]] - ; MIPS32: $v0 = COPY [[AND]] + ; MIPS32: $v0 = COPY [[SLTu]] ; MIPS32: RetRA implicit $v0 %0:gprb(s32) = COPY $a0 %1:gprb(s32) = COPY $a1 %4:gprb(s32) = G_ICMP intpred(ne), %0(s32), %1 - %5:gprb(s32) = G_CONSTANT i32 1 - %6:gprb(s32) = COPY %4(s32) - %3:gprb(s32) = G_AND %6, %5 + %3:gprb(s32) = COPY %4(s32) $v0 = COPY %3(s32) RetRA implicit $v0 ... --- -name: sgt +name: sgt_i32 alignment: 2 legalized: true regBankSelected: true @@ -84,27 +78,23 @@ body: | bb.1.entry: liveins: $a0, $a1 - ; MIPS32-LABEL: name: sgt + ; MIPS32-LABEL: name: sgt_i32 ; MIPS32: liveins: $a0, $a1 ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; MIPS32: [[SLT:%[0-9]+]]:gpr32 = SLT [[COPY1]], [[COPY]] - ; MIPS32: [[ORi:%[0-9]+]]:gpr32 = ORi $zero, 1 - ; MIPS32: [[AND:%[0-9]+]]:gpr32 = AND [[SLT]], [[ORi]] - ; MIPS32: $v0 = COPY [[AND]] + ; MIPS32: $v0 = COPY [[SLT]] ; MIPS32: RetRA implicit $v0 %0:gprb(s32) = COPY $a0 %1:gprb(s32) = COPY $a1 %4:gprb(s32) = G_ICMP intpred(sgt), %0(s32), %1 - %5:gprb(s32) = G_CONSTANT i32 1 - %6:gprb(s32) = COPY %4(s32) - %3:gprb(s32) = G_AND %6, %5 + %3:gprb(s32) = COPY %4(s32) $v0 = COPY %3(s32) RetRA implicit $v0 ... --- -name: sge +name: sge_i32 alignment: 2 legalized: true regBankSelected: true @@ -113,28 +103,24 @@ body: | bb.1.entry: liveins: $a0, $a1 - ; MIPS32-LABEL: name: sge + ; MIPS32-LABEL: name: sge_i32 ; MIPS32: liveins: $a0, $a1 ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; MIPS32: [[SLT:%[0-9]+]]:gpr32 = SLT [[COPY]], [[COPY1]] ; MIPS32: [[XORi:%[0-9]+]]:gpr32 = XORi [[SLT]], 1 - ; MIPS32: [[ORi:%[0-9]+]]:gpr32 = ORi $zero, 1 - ; MIPS32: [[AND:%[0-9]+]]:gpr32 = AND [[XORi]], [[ORi]] - ; MIPS32: $v0 = COPY [[AND]] + ; MIPS32: $v0 = COPY [[XORi]] ; MIPS32: RetRA implicit $v0 %0:gprb(s32) = COPY $a0 %1:gprb(s32) = COPY $a1 %4:gprb(s32) = G_ICMP intpred(sge), %0(s32), %1 - %5:gprb(s32) = G_CONSTANT i32 1 - %6:gprb(s32) = COPY %4(s32) - %3:gprb(s32) = G_AND %6, %5 + %3:gprb(s32) = COPY %4(s32) $v0 = COPY %3(s32) RetRA implicit $v0 ... --- -name: slt +name: slt_i32 alignment: 2 legalized: true regBankSelected: true @@ -143,27 +129,23 @@ body: | bb.1.entry: liveins: $a0, $a1 - ; MIPS32-LABEL: name: slt + ; MIPS32-LABEL: name: slt_i32 ; MIPS32: liveins: $a0, $a1 ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; MIPS32: [[SLT:%[0-9]+]]:gpr32 = SLT [[COPY]], [[COPY1]] - ; MIPS32: [[ORi:%[0-9]+]]:gpr32 = ORi $zero, 1 - ; MIPS32: [[AND:%[0-9]+]]:gpr32 = AND [[SLT]], [[ORi]] - ; MIPS32: $v0 = COPY [[AND]] + ; MIPS32: $v0 = COPY [[SLT]] ; MIPS32: RetRA implicit $v0 %0:gprb(s32) = COPY $a0 %1:gprb(s32) = COPY $a1 %4:gprb(s32) = G_ICMP intpred(slt), %0(s32), %1 - %5:gprb(s32) = G_CONSTANT i32 1 - %6:gprb(s32) = COPY %4(s32) - %3:gprb(s32) = G_AND %6, %5 + %3:gprb(s32) = COPY %4(s32) $v0 = COPY %3(s32) RetRA implicit $v0 ... --- -name: sle +name: sle_i32 alignment: 2 legalized: true regBankSelected: true @@ -172,28 +154,24 @@ body: | bb.1.entry: liveins: $a0, $a1 - ; MIPS32-LABEL: name: sle + ; MIPS32-LABEL: name: sle_i32 ; MIPS32: liveins: $a0, $a1 ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; MIPS32: [[SLT:%[0-9]+]]:gpr32 = SLT [[COPY1]], [[COPY]] ; MIPS32: [[XORi:%[0-9]+]]:gpr32 = XORi [[SLT]], 1 - ; MIPS32: [[ORi:%[0-9]+]]:gpr32 = ORi $zero, 1 - ; MIPS32: [[AND:%[0-9]+]]:gpr32 = AND [[XORi]], [[ORi]] - ; MIPS32: $v0 = COPY [[AND]] + ; MIPS32: $v0 = COPY [[XORi]] ; MIPS32: RetRA implicit $v0 %0:gprb(s32) = COPY $a0 %1:gprb(s32) = COPY $a1 %4:gprb(s32) = G_ICMP intpred(sle), %0(s32), %1 - %5:gprb(s32) = G_CONSTANT i32 1 - %6:gprb(s32) = COPY %4(s32) - %3:gprb(s32) = G_AND %6, %5 + %3:gprb(s32) = COPY %4(s32) $v0 = COPY %3(s32) RetRA implicit $v0 ... --- -name: ugt +name: ugt_i32 alignment: 2 legalized: true regBankSelected: true @@ -202,27 +180,23 @@ body: | bb.1.entry: liveins: $a0, $a1 - ; MIPS32-LABEL: name: ugt + ; MIPS32-LABEL: name: ugt_i32 ; MIPS32: liveins: $a0, $a1 ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; MIPS32: [[SLTu:%[0-9]+]]:gpr32 = SLTu [[COPY1]], [[COPY]] - ; MIPS32: [[ORi:%[0-9]+]]:gpr32 = ORi $zero, 1 - ; MIPS32: [[AND:%[0-9]+]]:gpr32 = AND [[SLTu]], [[ORi]] - ; MIPS32: $v0 = COPY [[AND]] + ; MIPS32: $v0 = COPY [[SLTu]] ; MIPS32: RetRA implicit $v0 %0:gprb(s32) = COPY $a0 %1:gprb(s32) = COPY $a1 %4:gprb(s32) = G_ICMP intpred(ugt), %0(s32), %1 - %5:gprb(s32) = G_CONSTANT i32 1 - %6:gprb(s32) = COPY %4(s32) - %3:gprb(s32) = G_AND %6, %5 + %3:gprb(s32) = COPY %4(s32) $v0 = COPY %3(s32) RetRA implicit $v0 ... --- -name: uge +name: uge_i32 alignment: 2 legalized: true regBankSelected: true @@ -231,28 +205,24 @@ body: | bb.1.entry: liveins: $a0, $a1 - ; MIPS32-LABEL: name: uge + ; MIPS32-LABEL: name: uge_i32 ; MIPS32: liveins: $a0, $a1 ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; MIPS32: [[SLTu:%[0-9]+]]:gpr32 = SLTu [[COPY]], [[COPY1]] ; MIPS32: [[XORi:%[0-9]+]]:gpr32 = XORi [[SLTu]], 1 - ; MIPS32: [[ORi:%[0-9]+]]:gpr32 = ORi $zero, 1 - ; MIPS32: [[AND:%[0-9]+]]:gpr32 = AND [[XORi]], [[ORi]] - ; MIPS32: $v0 = COPY [[AND]] + ; MIPS32: $v0 = COPY [[XORi]] ; MIPS32: RetRA implicit $v0 %0:gprb(s32) = COPY $a0 %1:gprb(s32) = COPY $a1 %4:gprb(s32) = G_ICMP intpred(uge), %0(s32), %1 - %5:gprb(s32) = G_CONSTANT i32 1 - %6:gprb(s32) = COPY %4(s32) - %3:gprb(s32) = G_AND %6, %5 + %3:gprb(s32) = COPY %4(s32) $v0 = COPY %3(s32) RetRA implicit $v0 ... --- -name: ult +name: ult_i32 alignment: 2 legalized: true regBankSelected: true @@ -261,27 +231,23 @@ body: | bb.1.entry: liveins: $a0, $a1 - ; MIPS32-LABEL: name: ult + ; MIPS32-LABEL: name: ult_i32 ; MIPS32: liveins: $a0, $a1 ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; MIPS32: [[SLTu:%[0-9]+]]:gpr32 = SLTu [[COPY]], [[COPY1]] - ; MIPS32: [[ORi:%[0-9]+]]:gpr32 = ORi $zero, 1 - ; MIPS32: [[AND:%[0-9]+]]:gpr32 = AND [[SLTu]], [[ORi]] - ; MIPS32: $v0 = COPY [[AND]] + ; MIPS32: $v0 = COPY [[SLTu]] ; MIPS32: RetRA implicit $v0 %0:gprb(s32) = COPY $a0 %1:gprb(s32) = COPY $a1 %4:gprb(s32) = G_ICMP intpred(ult), %0(s32), %1 - %5:gprb(s32) = G_CONSTANT i32 1 - %6:gprb(s32) = COPY %4(s32) - %3:gprb(s32) = G_AND %6, %5 + %3:gprb(s32) = COPY %4(s32) $v0 = COPY %3(s32) RetRA implicit $v0 ... --- -name: ule +name: ule_i32 alignment: 2 legalized: true regBankSelected: true @@ -290,22 +256,44 @@ body: | bb.1.entry: liveins: $a0, $a1 - ; MIPS32-LABEL: name: ule + ; MIPS32-LABEL: name: ule_i32 ; MIPS32: liveins: $a0, $a1 ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 ; MIPS32: [[SLTu:%[0-9]+]]:gpr32 = SLTu [[COPY1]], [[COPY]] ; MIPS32: [[XORi:%[0-9]+]]:gpr32 = XORi [[SLTu]], 1 - ; MIPS32: [[ORi:%[0-9]+]]:gpr32 = ORi $zero, 1 - ; MIPS32: [[AND:%[0-9]+]]:gpr32 = AND [[XORi]], [[ORi]] - ; MIPS32: $v0 = COPY [[AND]] + ; MIPS32: $v0 = COPY [[XORi]] ; MIPS32: RetRA implicit $v0 %0:gprb(s32) = COPY $a0 %1:gprb(s32) = COPY $a1 %4:gprb(s32) = G_ICMP intpred(ule), %0(s32), %1 - %5:gprb(s32) = G_CONSTANT i32 1 - %6:gprb(s32) = COPY %4(s32) - %3:gprb(s32) = G_AND %6, %5 + %3:gprb(s32) = COPY %4(s32) + $v0 = COPY %3(s32) + RetRA implicit $v0 + +... +--- +name: eq_ptr +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; MIPS32-LABEL: name: eq_ptr + ; MIPS32: liveins: $a0, $a1 + ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; MIPS32: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1 + ; MIPS32: [[XOR:%[0-9]+]]:gpr32 = XOR [[COPY]], [[COPY1]] + ; MIPS32: [[SLTiu:%[0-9]+]]:gpr32 = SLTiu [[XOR]], 1 + ; MIPS32: $v0 = COPY [[SLTiu]] + ; MIPS32: RetRA implicit $v0 + %0:gprb(p0) = COPY $a0 + %1:gprb(p0) = COPY $a1 + %4:gprb(s32) = G_ICMP intpred(eq), %0(p0), %1 + %3:gprb(s32) = COPY %4(s32) $v0 = COPY %3(s32) RetRA implicit $v0 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/icmp.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/icmp.mir index 7f0196f902267..c60767a1afc9a 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/icmp.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/icmp.mir @@ -2,276 +2,503 @@ # RUN: llc -O0 -mtriple=mipsel-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=MIPS32 --- | - define void @eq() {entry: ret void} - define void @ne() {entry: ret void} - define void @sgt() {entry: ret void} - define void @sge() {entry: ret void} - define void @slt() {entry: ret void} - define void @sle() {entry: ret void} - define void @ugt() {entry: ret void} - define void @uge() {entry: ret void} - define void @ult() {entry: ret void} - define void @ule() {entry: ret void} + define void @ne_i32() {entry: ret void} + define void @eq_ptr() {entry: ret void} + define void @ult_i8() {entry: ret void} + define void @slt_i16() {entry: ret void} + define void @eq_i64() {entry: ret void} + define void @ne_i64() {entry: ret void} + define void @sgt_i64() {entry: ret void} + define void @sge_i64() {entry: ret void} + define void @slt_i64() {entry: ret void} + define void @sle_i64() {entry: ret void} + define void @ugt_i64() {entry: ret void} + define void @uge_i64() {entry: ret void} + define void @ult_i64() {entry: ret void} + define void @ule_i64() {entry: ret void} ... --- -name: eq +name: ne_i32 alignment: 2 tracksRegLiveness: true body: | bb.1.entry: liveins: $a0, $a1 - ; MIPS32-LABEL: name: eq + ; MIPS32-LABEL: name: ne_i32 ; MIPS32: liveins: $a0, $a1 ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; MIPS32: $v0 = COPY [[AND]](s32) + ; MIPS32: $v0 = COPY [[COPY2]](s32) ; MIPS32: RetRA implicit $v0 %0:_(s32) = COPY $a0 %1:_(s32) = COPY $a1 - %2:_(s1) = G_ICMP intpred(eq), %0(s32), %1 - %3:_(s32) = G_ZEXT %2(s1) + %2:_(s1) = G_ICMP intpred(ne), %0(s32), %1 + %3:_(s32) = G_ANYEXT %2(s1) $v0 = COPY %3(s32) RetRA implicit $v0 ... --- -name: ne +name: eq_ptr alignment: 2 tracksRegLiveness: true body: | bb.1.entry: liveins: $a0, $a1 - ; MIPS32-LABEL: name: ne + ; MIPS32-LABEL: name: eq_ptr ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 + ; MIPS32: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 + ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](p0), [[COPY1]] ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; MIPS32: $v0 = COPY [[AND]](s32) + ; MIPS32: $v0 = COPY [[COPY2]](s32) ; MIPS32: RetRA implicit $v0 - %0:_(s32) = COPY $a0 - %1:_(s32) = COPY $a1 - %2:_(s1) = G_ICMP intpred(ne), %0(s32), %1 - %3:_(s32) = G_ZEXT %2(s1) + %0:_(p0) = COPY $a0 + %1:_(p0) = COPY $a1 + %2:_(s1) = G_ICMP intpred(eq), %0(p0), %1 + %3:_(s32) = G_ANYEXT %2(s1) $v0 = COPY %3(s32) RetRA implicit $v0 ... --- -name: sgt +name: ult_i8 alignment: 2 tracksRegLiveness: true body: | bb.1.entry: liveins: $a0, $a1 - ; MIPS32-LABEL: name: sgt + ; MIPS32-LABEL: name: ult_i8 ; MIPS32: liveins: $a0, $a1 ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY]](s32), [[COPY1]] - ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) + ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; MIPS32: $v0 = COPY [[AND]](s32) + ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] + ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[AND]](s32), [[AND1]] + ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) + ; MIPS32: $v0 = COPY [[COPY4]](s32) ; MIPS32: RetRA implicit $v0 - %0:_(s32) = COPY $a0 - %1:_(s32) = COPY $a1 - %2:_(s1) = G_ICMP intpred(sgt), %0(s32), %1 - %3:_(s32) = G_ZEXT %2(s1) - $v0 = COPY %3(s32) + %2:_(s32) = COPY $a0 + %0:_(s8) = G_TRUNC %2(s32) + %3:_(s32) = COPY $a1 + %1:_(s8) = G_TRUNC %3(s32) + %4:_(s1) = G_ICMP intpred(ult), %0(s8), %1 + %5:_(s32) = G_ANYEXT %4(s1) + $v0 = COPY %5(s32) RetRA implicit $v0 ... --- -name: sge +name: slt_i16 alignment: 2 tracksRegLiveness: true body: | bb.1.entry: liveins: $a0, $a1 - ; MIPS32-LABEL: name: sge + ; MIPS32-LABEL: name: slt_i16 ; MIPS32: liveins: $a0, $a1 ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sge), [[COPY]](s32), [[COPY1]] + ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; MIPS32: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; MIPS32: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]](s32) + ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) + ; MIPS32: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C]](s32) + ; MIPS32: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[C]](s32) + ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[ASHR]](s32), [[ASHR1]] + ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) + ; MIPS32: $v0 = COPY [[COPY4]](s32) + ; MIPS32: RetRA implicit $v0 + %2:_(s32) = COPY $a0 + %0:_(s16) = G_TRUNC %2(s32) + %3:_(s32) = COPY $a1 + %1:_(s16) = G_TRUNC %3(s32) + %4:_(s1) = G_ICMP intpred(slt), %0(s16), %1 + %5:_(s32) = G_ANYEXT %4(s1) + $v0 = COPY %5(s32) + RetRA implicit $v0 + +... +--- +name: eq_i64 +alignment: 2 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1, $a2, $a3 + + ; MIPS32-LABEL: name: eq_i64 + ; MIPS32: liveins: $a0, $a1, $a2, $a3 + ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[COPY2]] + ; MIPS32: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[COPY3]] + ; MIPS32: [[OR:%[0-9]+]]:_(s32) = G_OR [[XOR]], [[XOR1]] + ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s32), [[C]] + ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) + ; MIPS32: $v0 = COPY [[COPY4]](s32) + ; MIPS32: RetRA implicit $v0 + %2:_(s32) = COPY $a0 + %3:_(s32) = COPY $a1 + %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) + %4:_(s32) = COPY $a2 + %5:_(s32) = COPY $a3 + %1:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) + %6:_(s1) = G_ICMP intpred(eq), %0(s64), %1 + %7:_(s32) = G_ANYEXT %6(s1) + $v0 = COPY %7(s32) + RetRA implicit $v0 + +... +--- +name: ne_i64 +alignment: 2 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1, $a2, $a3 + + ; MIPS32-LABEL: name: ne_i64 + ; MIPS32: liveins: $a0, $a1, $a2, $a3 + ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[COPY2]] + ; MIPS32: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[COPY3]] + ; MIPS32: [[OR:%[0-9]+]]:_(s32) = G_OR [[XOR]], [[XOR1]] + ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[OR]](s32), [[C]] + ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) + ; MIPS32: $v0 = COPY [[COPY4]](s32) + ; MIPS32: RetRA implicit $v0 + %2:_(s32) = COPY $a0 + %3:_(s32) = COPY $a1 + %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) + %4:_(s32) = COPY $a2 + %5:_(s32) = COPY $a3 + %1:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) + %6:_(s1) = G_ICMP intpred(ne), %0(s64), %1 + %7:_(s32) = G_ANYEXT %6(s1) + $v0 = COPY %7(s32) + RetRA implicit $v0 + +... +--- +name: sgt_i64 +alignment: 2 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1, $a2, $a3 + + ; MIPS32-LABEL: name: sgt_i64 + ; MIPS32: liveins: $a0, $a1, $a2, $a3 + ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY1]](s32), [[COPY3]] + ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[COPY2]] + ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP2]](s32) + ; MIPS32: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; MIPS32: $v0 = COPY [[AND]](s32) + ; MIPS32: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ICMP1]](s32) + ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C]] + ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY4]], [[COPY5]] + ; MIPS32: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32) + ; MIPS32: $v0 = COPY [[COPY7]](s32) ; MIPS32: RetRA implicit $v0 - %0:_(s32) = COPY $a0 - %1:_(s32) = COPY $a1 - %2:_(s1) = G_ICMP intpred(sge), %0(s32), %1 - %3:_(s32) = G_ZEXT %2(s1) - $v0 = COPY %3(s32) + %2:_(s32) = COPY $a0 + %3:_(s32) = COPY $a1 + %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) + %4:_(s32) = COPY $a2 + %5:_(s32) = COPY $a3 + %1:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) + %6:_(s1) = G_ICMP intpred(sgt), %0(s64), %1 + %7:_(s32) = G_ANYEXT %6(s1) + $v0 = COPY %7(s32) RetRA implicit $v0 ... --- -name: slt +name: sge_i64 alignment: 2 tracksRegLiveness: true body: | bb.1.entry: - liveins: $a0, $a1 + liveins: $a0, $a1, $a2, $a3 - ; MIPS32-LABEL: name: slt - ; MIPS32: liveins: $a0, $a1 + ; MIPS32-LABEL: name: sge_i64 + ; MIPS32: liveins: $a0, $a1, $a2, $a3 ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY]](s32), [[COPY1]] + ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sge), [[COPY1]](s32), [[COPY3]] + ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(uge), [[COPY]](s32), [[COPY2]] + ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP2]](s32) + ; MIPS32: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; MIPS32: $v0 = COPY [[AND]](s32) + ; MIPS32: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ICMP1]](s32) + ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C]] + ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY4]], [[COPY5]] + ; MIPS32: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32) + ; MIPS32: $v0 = COPY [[COPY7]](s32) ; MIPS32: RetRA implicit $v0 - %0:_(s32) = COPY $a0 - %1:_(s32) = COPY $a1 - %2:_(s1) = G_ICMP intpred(slt), %0(s32), %1 - %3:_(s32) = G_ZEXT %2(s1) - $v0 = COPY %3(s32) + %2:_(s32) = COPY $a0 + %3:_(s32) = COPY $a1 + %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) + %4:_(s32) = COPY $a2 + %5:_(s32) = COPY $a3 + %1:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) + %6:_(s1) = G_ICMP intpred(sge), %0(s64), %1 + %7:_(s32) = G_ANYEXT %6(s1) + $v0 = COPY %7(s32) RetRA implicit $v0 ... --- -name: sle +name: slt_i64 alignment: 2 tracksRegLiveness: true body: | bb.1.entry: - liveins: $a0, $a1 + liveins: $a0, $a1, $a2, $a3 - ; MIPS32-LABEL: name: sle - ; MIPS32: liveins: $a0, $a1 + ; MIPS32-LABEL: name: slt_i64 + ; MIPS32: liveins: $a0, $a1, $a2, $a3 ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sle), [[COPY]](s32), [[COPY1]] + ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(slt), [[COPY1]](s32), [[COPY3]] + ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY2]] + ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP2]](s32) + ; MIPS32: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; MIPS32: $v0 = COPY [[AND]](s32) + ; MIPS32: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ICMP1]](s32) + ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C]] + ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY4]], [[COPY5]] + ; MIPS32: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32) + ; MIPS32: $v0 = COPY [[COPY7]](s32) ; MIPS32: RetRA implicit $v0 - %0:_(s32) = COPY $a0 - %1:_(s32) = COPY $a1 - %2:_(s1) = G_ICMP intpred(sle), %0(s32), %1 - %3:_(s32) = G_ZEXT %2(s1) - $v0 = COPY %3(s32) + %2:_(s32) = COPY $a0 + %3:_(s32) = COPY $a1 + %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) + %4:_(s32) = COPY $a2 + %5:_(s32) = COPY $a3 + %1:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) + %6:_(s1) = G_ICMP intpred(slt), %0(s64), %1 + %7:_(s32) = G_ANYEXT %6(s1) + $v0 = COPY %7(s32) RetRA implicit $v0 ... --- -name: ugt +name: sle_i64 alignment: 2 tracksRegLiveness: true body: | bb.1.entry: - liveins: $a0, $a1 + liveins: $a0, $a1, $a2, $a3 - ; MIPS32-LABEL: name: ugt - ; MIPS32: liveins: $a0, $a1 + ; MIPS32-LABEL: name: sle_i64 + ; MIPS32: liveins: $a0, $a1, $a2, $a3 ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[COPY1]] + ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sle), [[COPY1]](s32), [[COPY3]] + ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ule), [[COPY]](s32), [[COPY2]] + ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP2]](s32) + ; MIPS32: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; MIPS32: $v0 = COPY [[AND]](s32) + ; MIPS32: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ICMP1]](s32) + ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C]] + ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY4]], [[COPY5]] + ; MIPS32: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32) + ; MIPS32: $v0 = COPY [[COPY7]](s32) ; MIPS32: RetRA implicit $v0 - %0:_(s32) = COPY $a0 - %1:_(s32) = COPY $a1 - %2:_(s1) = G_ICMP intpred(ugt), %0(s32), %1 - %3:_(s32) = G_ZEXT %2(s1) - $v0 = COPY %3(s32) + %2:_(s32) = COPY $a0 + %3:_(s32) = COPY $a1 + %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) + %4:_(s32) = COPY $a2 + %5:_(s32) = COPY $a3 + %1:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) + %6:_(s1) = G_ICMP intpred(sle), %0(s64), %1 + %7:_(s32) = G_ANYEXT %6(s1) + $v0 = COPY %7(s32) RetRA implicit $v0 ... --- -name: uge +name: ugt_i64 alignment: 2 tracksRegLiveness: true body: | bb.1.entry: - liveins: $a0, $a1 + liveins: $a0, $a1, $a2, $a3 - ; MIPS32-LABEL: name: uge - ; MIPS32: liveins: $a0, $a1 + ; MIPS32-LABEL: name: ugt_i64 + ; MIPS32: liveins: $a0, $a1, $a2, $a3 ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(uge), [[COPY]](s32), [[COPY1]] + ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY1]](s32), [[COPY3]] + ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[COPY2]] + ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP2]](s32) + ; MIPS32: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; MIPS32: $v0 = COPY [[AND]](s32) + ; MIPS32: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ICMP1]](s32) + ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C]] + ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY4]], [[COPY5]] + ; MIPS32: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32) + ; MIPS32: $v0 = COPY [[COPY7]](s32) ; MIPS32: RetRA implicit $v0 - %0:_(s32) = COPY $a0 - %1:_(s32) = COPY $a1 - %2:_(s1) = G_ICMP intpred(uge), %0(s32), %1 - %3:_(s32) = G_ZEXT %2(s1) - $v0 = COPY %3(s32) + %2:_(s32) = COPY $a0 + %3:_(s32) = COPY $a1 + %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) + %4:_(s32) = COPY $a2 + %5:_(s32) = COPY $a3 + %1:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) + %6:_(s1) = G_ICMP intpred(ugt), %0(s64), %1 + %7:_(s32) = G_ANYEXT %6(s1) + $v0 = COPY %7(s32) RetRA implicit $v0 ... --- -name: ult +name: uge_i64 alignment: 2 tracksRegLiveness: true body: | bb.1.entry: - liveins: $a0, $a1 + liveins: $a0, $a1, $a2, $a3 - ; MIPS32-LABEL: name: ult - ; MIPS32: liveins: $a0, $a1 + ; MIPS32-LABEL: name: uge_i64 + ; MIPS32: liveins: $a0, $a1, $a2, $a3 ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY1]] + ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(uge), [[COPY1]](s32), [[COPY3]] + ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(uge), [[COPY]](s32), [[COPY2]] + ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP2]](s32) + ; MIPS32: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; MIPS32: $v0 = COPY [[AND]](s32) + ; MIPS32: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ICMP1]](s32) + ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C]] + ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY4]], [[COPY5]] + ; MIPS32: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32) + ; MIPS32: $v0 = COPY [[COPY7]](s32) ; MIPS32: RetRA implicit $v0 - %0:_(s32) = COPY $a0 - %1:_(s32) = COPY $a1 - %2:_(s1) = G_ICMP intpred(ult), %0(s32), %1 - %3:_(s32) = G_ZEXT %2(s1) - $v0 = COPY %3(s32) + %2:_(s32) = COPY $a0 + %3:_(s32) = COPY $a1 + %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) + %4:_(s32) = COPY $a2 + %5:_(s32) = COPY $a3 + %1:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) + %6:_(s1) = G_ICMP intpred(uge), %0(s64), %1 + %7:_(s32) = G_ANYEXT %6(s1) + $v0 = COPY %7(s32) RetRA implicit $v0 ... --- -name: ule +name: ult_i64 alignment: 2 tracksRegLiveness: true body: | bb.1.entry: - liveins: $a0, $a1 + liveins: $a0, $a1, $a2, $a3 - ; MIPS32-LABEL: name: ule - ; MIPS32: liveins: $a0, $a1 + ; MIPS32-LABEL: name: ult_i64 + ; MIPS32: liveins: $a0, $a1, $a2, $a3 ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 - ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ule), [[COPY]](s32), [[COPY1]] + ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY1]](s32), [[COPY3]] + ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY2]] + ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP2]](s32) + ; MIPS32: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) - ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; MIPS32: $v0 = COPY [[AND]](s32) + ; MIPS32: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ICMP1]](s32) + ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C]] + ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY4]], [[COPY5]] + ; MIPS32: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32) + ; MIPS32: $v0 = COPY [[COPY7]](s32) ; MIPS32: RetRA implicit $v0 - %0:_(s32) = COPY $a0 - %1:_(s32) = COPY $a1 - %2:_(s1) = G_ICMP intpred(ule), %0(s32), %1 - %3:_(s32) = G_ZEXT %2(s1) - $v0 = COPY %3(s32) + %2:_(s32) = COPY $a0 + %3:_(s32) = COPY $a1 + %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) + %4:_(s32) = COPY $a2 + %5:_(s32) = COPY $a3 + %1:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) + %6:_(s1) = G_ICMP intpred(ult), %0(s64), %1 + %7:_(s32) = G_ANYEXT %6(s1) + $v0 = COPY %7(s32) RetRA implicit $v0 ... +--- +name: ule_i64 +alignment: 2 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1, $a2, $a3 + ; MIPS32-LABEL: name: ule_i64 + ; MIPS32: liveins: $a0, $a1, $a2, $a3 + ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ule), [[COPY1]](s32), [[COPY3]] + ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[COPY3]] + ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ule), [[COPY]](s32), [[COPY2]] + ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP2]](s32) + ; MIPS32: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) + ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ICMP1]](s32) + ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C]] + ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[COPY4]], [[COPY5]] + ; MIPS32: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32) + ; MIPS32: $v0 = COPY [[COPY7]](s32) + ; MIPS32: RetRA implicit $v0 + %2:_(s32) = COPY $a0 + %3:_(s32) = COPY $a1 + %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) + %4:_(s32) = COPY $a2 + %5:_(s32) = COPY $a3 + %1:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) + %6:_(s1) = G_ICMP intpred(ule), %0(s64), %1 + %7:_(s32) = G_ANYEXT %6(s1) + $v0 = COPY %7(s32) + RetRA implicit $v0 + +... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll index fc5f3971af1a9..bb098761f3348 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll @@ -1,148 +1,343 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -O0 -mtriple=mipsel-linux-gnu -global-isel -verify-machineinstrs %s -o -| FileCheck %s -check-prefixes=MIPS32 -define i32 @eq(i32 %a, i32 %b){ -; MIPS32-LABEL: eq: +define i1 @eq_i32(i32 %a, i32 %b){ +; MIPS32-LABEL: eq_i32: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: xor $1, $4, $5 -; MIPS32-NEXT: sltiu $1, $1, 1 -; MIPS32-NEXT: ori $2, $zero, 1 -; MIPS32-NEXT: and $2, $1, $2 +; MIPS32-NEXT: sltiu $2, $1, 1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: %cmp = icmp eq i32 %a, %b - %conv = zext i1 %cmp to i32 - ret i32 %conv + ret i1 %cmp } -define i32 @ne(i32 %a, i32 %b) { -; MIPS32-LABEL: ne: +define i1 @ne_i32(i32 %a, i32 %b) { +; MIPS32-LABEL: ne_i32: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: xor $1, $4, $5 -; MIPS32-NEXT: sltu $1, $zero, $1 -; MIPS32-NEXT: ori $2, $zero, 1 -; MIPS32-NEXT: and $2, $1, $2 +; MIPS32-NEXT: sltu $2, $zero, $1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: %cmp = icmp ne i32 %a, %b - %conv = zext i1 %cmp to i32 - ret i32 %conv + ret i1 %cmp } -define i32 @sgt(i32 %a, i32 %b) { -; MIPS32-LABEL: sgt: +define i1 @sgt_i32(i32 %a, i32 %b) { +; MIPS32-LABEL: sgt_i32: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: slt $1, $5, $4 -; MIPS32-NEXT: ori $2, $zero, 1 -; MIPS32-NEXT: and $2, $1, $2 +; MIPS32-NEXT: slt $2, $5, $4 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: %cmp = icmp sgt i32 %a, %b - %conv = zext i1 %cmp to i32 - ret i32 %conv + ret i1 %cmp } -define i32 @sge(i32 %a, i32 %b) { -; MIPS32-LABEL: sge: +define i1 @sge_i32(i32 %a, i32 %b) { +; MIPS32-LABEL: sge_i32: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: slt $1, $4, $5 -; MIPS32-NEXT: xori $1, $1, 1 -; MIPS32-NEXT: ori $2, $zero, 1 -; MIPS32-NEXT: and $2, $1, $2 +; MIPS32-NEXT: xori $2, $1, 1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: %cmp = icmp sge i32 %a, %b - %conv = zext i1 %cmp to i32 - ret i32 %conv + ret i1 %cmp } -define i32 @slt(i32 %a, i32 %b) { -; MIPS32-LABEL: slt: +define i1 @slt_i32(i32 %a, i32 %b) { +; MIPS32-LABEL: slt_i32: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: slt $1, $4, $5 -; MIPS32-NEXT: ori $2, $zero, 1 -; MIPS32-NEXT: and $2, $1, $2 +; MIPS32-NEXT: slt $2, $4, $5 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: %cmp = icmp slt i32 %a, %b - %conv = zext i1 %cmp to i32 - ret i32 %conv + ret i1 %cmp } -define i32 @sle(i32 %a, i32 %b) { -; MIPS32-LABEL: sle: +define i1 @sle_i32(i32 %a, i32 %b) { +; MIPS32-LABEL: sle_i32: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: slt $1, $5, $4 -; MIPS32-NEXT: xori $1, $1, 1 -; MIPS32-NEXT: ori $2, $zero, 1 -; MIPS32-NEXT: and $2, $1, $2 +; MIPS32-NEXT: xori $2, $1, 1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: %cmp = icmp sle i32 %a, %b - %conv = zext i1 %cmp to i32 - ret i32 %conv + ret i1 %cmp } -define i32 @ugt(i32 %a, i32 %b) { -; MIPS32-LABEL: ugt: +define i1 @ugt_i32(i32 %a, i32 %b) { +; MIPS32-LABEL: ugt_i32: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: sltu $1, $5, $4 -; MIPS32-NEXT: ori $2, $zero, 1 -; MIPS32-NEXT: and $2, $1, $2 +; MIPS32-NEXT: sltu $2, $5, $4 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: %cmp = icmp ugt i32 %a, %b - %conv = zext i1 %cmp to i32 - ret i32 %conv + ret i1 %cmp } -define i32 @uge(i32 %a, i32 %b) { -; MIPS32-LABEL: uge: +define i1 @uge_i32(i32 %a, i32 %b) { +; MIPS32-LABEL: uge_i32: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: sltu $1, $4, $5 -; MIPS32-NEXT: xori $1, $1, 1 -; MIPS32-NEXT: ori $2, $zero, 1 -; MIPS32-NEXT: and $2, $1, $2 +; MIPS32-NEXT: xori $2, $1, 1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: %cmp = icmp uge i32 %a, %b - %conv = zext i1 %cmp to i32 - ret i32 %conv + ret i1 %cmp } -define i32 @ult(i32 %a, i32 %b) { -; MIPS32-LABEL: ult: +define i1 @ult_i32(i32 %a, i32 %b) { +; MIPS32-LABEL: ult_i32: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: sltu $1, $4, $5 -; MIPS32-NEXT: ori $2, $zero, 1 -; MIPS32-NEXT: and $2, $1, $2 +; MIPS32-NEXT: sltu $2, $4, $5 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: %cmp = icmp ult i32 %a, %b - %conv = zext i1 %cmp to i32 - ret i32 %conv + ret i1 %cmp } -define i32 @ule(i32 %a, i32 %b) { -; MIPS32-LABEL: ule: +define i1 @ule_i32(i32 %a, i32 %b) { +; MIPS32-LABEL: ule_i32: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: sltu $1, $5, $4 -; MIPS32-NEXT: xori $1, $1, 1 -; MIPS32-NEXT: ori $2, $zero, 1 -; MIPS32-NEXT: and $2, $1, $2 +; MIPS32-NEXT: xori $2, $1, 1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: %cmp = icmp ule i32 %a, %b - %conv = zext i1 %cmp to i32 - ret i32 %conv + ret i1 %cmp +} + +define i1 @eq_ptr(i32* %a, i32* %b){ +; MIPS32-LABEL: eq_ptr: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: xor $1, $4, $5 +; MIPS32-NEXT: sltiu $2, $1, 1 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +entry: + %cmp = icmp eq i32* %a, %b + ret i1 %cmp +} + +define i1 @ult_i8(i8 %a, i8 %b) { +; MIPS32-LABEL: ult_i8: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: ori $1, $zero, 255 +; MIPS32-NEXT: and $2, $4, $1 +; MIPS32-NEXT: and $1, $5, $1 +; MIPS32-NEXT: sltu $2, $2, $1 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +entry: + %cmp = icmp ult i8 %a, %b + ret i1 %cmp +} + +define i1 @slt_i16(i16 %a, i16 %b) { +; MIPS32-LABEL: slt_i16: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: sll $1, $4, 16 +; MIPS32-NEXT: sra $1, $1, 16 +; MIPS32-NEXT: sll $2, $5, 16 +; MIPS32-NEXT: sra $2, $2, 16 +; MIPS32-NEXT: slt $2, $1, $2 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +entry: + %cmp = icmp slt i16 %a, %b + ret i1 %cmp +} + +define i1 @eq_i64(i64 %a, i64 %b){ +; MIPS32-LABEL: eq_i64: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: xor $1, $4, $6 +; MIPS32-NEXT: xor $2, $5, $7 +; MIPS32-NEXT: or $1, $1, $2 +; MIPS32-NEXT: ori $2, $zero, 0 +; MIPS32-NEXT: xor $1, $1, $2 +; MIPS32-NEXT: sltiu $2, $1, 1 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +entry: + %cmp = icmp eq i64 %a, %b + ret i1 %cmp +} + +define i1 @ne_i64(i64 %a, i64 %b) { +; MIPS32-LABEL: ne_i64: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: xor $1, $4, $6 +; MIPS32-NEXT: xor $2, $5, $7 +; MIPS32-NEXT: or $1, $1, $2 +; MIPS32-NEXT: ori $2, $zero, 0 +; MIPS32-NEXT: xor $1, $1, $2 +; MIPS32-NEXT: sltu $2, $zero, $1 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +entry: + %cmp = icmp ne i64 %a, %b + ret i1 %cmp +} + +define i1 @sgt_i64(i64 %a, i64 %b) { +; MIPS32-LABEL: sgt_i64: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: slt $1, $7, $5 +; MIPS32-NEXT: xor $2, $5, $7 +; MIPS32-NEXT: sltiu $2, $2, 1 +; MIPS32-NEXT: sltu $3, $6, $4 +; MIPS32-NEXT: ori $4, $zero, 1 +; MIPS32-NEXT: and $2, $2, $4 +; MIPS32-NEXT: movn $1, $3, $2 +; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +entry: + %cmp = icmp sgt i64 %a, %b + ret i1 %cmp +} + +define i1 @sge_i64(i64 %a, i64 %b) { +; MIPS32-LABEL: sge_i64: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: slt $1, $5, $7 +; MIPS32-NEXT: xori $1, $1, 1 +; MIPS32-NEXT: xor $2, $5, $7 +; MIPS32-NEXT: sltiu $2, $2, 1 +; MIPS32-NEXT: sltu $3, $4, $6 +; MIPS32-NEXT: xori $3, $3, 1 +; MIPS32-NEXT: ori $4, $zero, 1 +; MIPS32-NEXT: and $2, $2, $4 +; MIPS32-NEXT: movn $1, $3, $2 +; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +entry: + %cmp = icmp sge i64 %a, %b + ret i1 %cmp +} + +define i1 @slt_i64(i64 %a, i64 %b) { +; MIPS32-LABEL: slt_i64: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: slt $1, $5, $7 +; MIPS32-NEXT: xor $2, $5, $7 +; MIPS32-NEXT: sltiu $2, $2, 1 +; MIPS32-NEXT: sltu $3, $4, $6 +; MIPS32-NEXT: ori $4, $zero, 1 +; MIPS32-NEXT: and $2, $2, $4 +; MIPS32-NEXT: movn $1, $3, $2 +; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +entry: + %cmp = icmp slt i64 %a, %b + ret i1 %cmp +} + +define i1 @sle_i64(i64 %a, i64 %b) { +; MIPS32-LABEL: sle_i64: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: slt $1, $7, $5 +; MIPS32-NEXT: xori $1, $1, 1 +; MIPS32-NEXT: xor $2, $5, $7 +; MIPS32-NEXT: sltiu $2, $2, 1 +; MIPS32-NEXT: sltu $3, $6, $4 +; MIPS32-NEXT: xori $3, $3, 1 +; MIPS32-NEXT: ori $4, $zero, 1 +; MIPS32-NEXT: and $2, $2, $4 +; MIPS32-NEXT: movn $1, $3, $2 +; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +entry: + %cmp = icmp sle i64 %a, %b + ret i1 %cmp +} + +define i1 @ugt_i64(i64 %a, i64 %b) { +; MIPS32-LABEL: ugt_i64: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: sltu $1, $7, $5 +; MIPS32-NEXT: xor $2, $5, $7 +; MIPS32-NEXT: sltiu $2, $2, 1 +; MIPS32-NEXT: sltu $3, $6, $4 +; MIPS32-NEXT: ori $4, $zero, 1 +; MIPS32-NEXT: and $2, $2, $4 +; MIPS32-NEXT: movn $1, $3, $2 +; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +entry: + %cmp = icmp ugt i64 %a, %b + ret i1 %cmp +} + +define i1 @uge_i64(i64 %a, i64 %b) { +; MIPS32-LABEL: uge_i64: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: sltu $1, $5, $7 +; MIPS32-NEXT: xori $1, $1, 1 +; MIPS32-NEXT: xor $2, $5, $7 +; MIPS32-NEXT: sltiu $2, $2, 1 +; MIPS32-NEXT: sltu $3, $4, $6 +; MIPS32-NEXT: xori $3, $3, 1 +; MIPS32-NEXT: ori $4, $zero, 1 +; MIPS32-NEXT: and $2, $2, $4 +; MIPS32-NEXT: movn $1, $3, $2 +; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +entry: + %cmp = icmp uge i64 %a, %b + ret i1 %cmp +} + +define i1 @ult_i64(i64 %a, i64 %b) { +; MIPS32-LABEL: ult_i64: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: sltu $1, $5, $7 +; MIPS32-NEXT: xor $2, $5, $7 +; MIPS32-NEXT: sltiu $2, $2, 1 +; MIPS32-NEXT: sltu $3, $4, $6 +; MIPS32-NEXT: ori $4, $zero, 1 +; MIPS32-NEXT: and $2, $2, $4 +; MIPS32-NEXT: movn $1, $3, $2 +; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +entry: + %cmp = icmp ult i64 %a, %b + ret i1 %cmp +} + +define i1 @ule_i64(i64 %a, i64 %b) { +; MIPS32-LABEL: ule_i64: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: sltu $1, $7, $5 +; MIPS32-NEXT: xori $1, $1, 1 +; MIPS32-NEXT: xor $2, $5, $7 +; MIPS32-NEXT: sltiu $2, $2, 1 +; MIPS32-NEXT: sltu $3, $6, $4 +; MIPS32-NEXT: xori $3, $3, 1 +; MIPS32-NEXT: ori $4, $zero, 1 +; MIPS32-NEXT: and $2, $2, $4 +; MIPS32-NEXT: movn $1, $3, $2 +; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +entry: + %cmp = icmp ule i64 %a, %b + ret i1 %cmp } diff --git a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/icmp.mir b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/icmp.mir index d03113a5be208..797c76a3cd61b 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/icmp.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/icmp.mir @@ -2,20 +2,12 @@ # RUN: llc -O0 -mtriple=mipsel-linux-gnu -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=MIPS32 --- | - define void @eq() {entry: ret void} - define void @ne() {entry: ret void} - define void @sgt() {entry: ret void} - define void @sge() {entry: ret void} - define void @slt() {entry: ret void} - define void @sle() {entry: ret void} - define void @ugt() {entry: ret void} - define void @uge() {entry: ret void} - define void @ult() {entry: ret void} - define void @ule() {entry: ret void} + define void @ne_i32() {entry: ret void} + define void @eq_ptr() {entry: ret void} ... --- -name: eq +name: ne_i32 alignment: 2 legalized: true tracksRegLiveness: true @@ -23,231 +15,24 @@ body: | bb.1.entry: liveins: $a0, $a1 - ; MIPS32-LABEL: name: eq - ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:gprb(s32) = COPY $a1 - ; MIPS32: [[ICMP:%[0-9]+]]:gprb(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] - ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1 - ; MIPS32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY [[ICMP]](s32) - ; MIPS32: [[AND:%[0-9]+]]:gprb(s32) = G_AND [[COPY2]], [[C]] - ; MIPS32: $v0 = COPY [[AND]](s32) - ; MIPS32: RetRA implicit $v0 - %0:_(s32) = COPY $a0 - %1:_(s32) = COPY $a1 - %4:_(s32) = G_ICMP intpred(eq), %0(s32), %1 - %5:_(s32) = G_CONSTANT i32 1 - %6:_(s32) = COPY %4(s32) - %3:_(s32) = G_AND %6, %5 - $v0 = COPY %3(s32) - RetRA implicit $v0 - -... ---- -name: ne -alignment: 2 -legalized: true -tracksRegLiveness: true -body: | - bb.1.entry: - liveins: $a0, $a1 - - ; MIPS32-LABEL: name: ne + ; MIPS32-LABEL: name: ne_i32 ; MIPS32: liveins: $a0, $a1 ; MIPS32: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:gprb(s32) = COPY $a1 ; MIPS32: [[ICMP:%[0-9]+]]:gprb(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]] - ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY [[ICMP]](s32) - ; MIPS32: [[AND:%[0-9]+]]:gprb(s32) = G_AND [[COPY2]], [[C]] - ; MIPS32: $v0 = COPY [[AND]](s32) + ; MIPS32: $v0 = COPY [[COPY2]](s32) ; MIPS32: RetRA implicit $v0 %0:_(s32) = COPY $a0 %1:_(s32) = COPY $a1 %4:_(s32) = G_ICMP intpred(ne), %0(s32), %1 - %5:_(s32) = G_CONSTANT i32 1 - %6:_(s32) = COPY %4(s32) - %3:_(s32) = G_AND %6, %5 - $v0 = COPY %3(s32) - RetRA implicit $v0 - -... ---- -name: sgt -alignment: 2 -legalized: true -tracksRegLiveness: true -body: | - bb.1.entry: - liveins: $a0, $a1 - - ; MIPS32-LABEL: name: sgt - ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:gprb(s32) = COPY $a1 - ; MIPS32: [[ICMP:%[0-9]+]]:gprb(s32) = G_ICMP intpred(sgt), [[COPY]](s32), [[COPY1]] - ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1 - ; MIPS32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY [[ICMP]](s32) - ; MIPS32: [[AND:%[0-9]+]]:gprb(s32) = G_AND [[COPY2]], [[C]] - ; MIPS32: $v0 = COPY [[AND]](s32) - ; MIPS32: RetRA implicit $v0 - %0:_(s32) = COPY $a0 - %1:_(s32) = COPY $a1 - %4:_(s32) = G_ICMP intpred(sgt), %0(s32), %1 - %5:_(s32) = G_CONSTANT i32 1 - %6:_(s32) = COPY %4(s32) - %3:_(s32) = G_AND %6, %5 - $v0 = COPY %3(s32) - RetRA implicit $v0 - -... ---- -name: sge -alignment: 2 -legalized: true -tracksRegLiveness: true -body: | - bb.1.entry: - liveins: $a0, $a1 - - ; MIPS32-LABEL: name: sge - ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:gprb(s32) = COPY $a1 - ; MIPS32: [[ICMP:%[0-9]+]]:gprb(s32) = G_ICMP intpred(sge), [[COPY]](s32), [[COPY1]] - ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1 - ; MIPS32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY [[ICMP]](s32) - ; MIPS32: [[AND:%[0-9]+]]:gprb(s32) = G_AND [[COPY2]], [[C]] - ; MIPS32: $v0 = COPY [[AND]](s32) - ; MIPS32: RetRA implicit $v0 - %0:_(s32) = COPY $a0 - %1:_(s32) = COPY $a1 - %4:_(s32) = G_ICMP intpred(sge), %0(s32), %1 - %5:_(s32) = G_CONSTANT i32 1 - %6:_(s32) = COPY %4(s32) - %3:_(s32) = G_AND %6, %5 - $v0 = COPY %3(s32) - RetRA implicit $v0 - -... ---- -name: slt -alignment: 2 -legalized: true -tracksRegLiveness: true -body: | - bb.1.entry: - liveins: $a0, $a1 - - ; MIPS32-LABEL: name: slt - ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:gprb(s32) = COPY $a1 - ; MIPS32: [[ICMP:%[0-9]+]]:gprb(s32) = G_ICMP intpred(slt), [[COPY]](s32), [[COPY1]] - ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1 - ; MIPS32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY [[ICMP]](s32) - ; MIPS32: [[AND:%[0-9]+]]:gprb(s32) = G_AND [[COPY2]], [[C]] - ; MIPS32: $v0 = COPY [[AND]](s32) - ; MIPS32: RetRA implicit $v0 - %0:_(s32) = COPY $a0 - %1:_(s32) = COPY $a1 - %4:_(s32) = G_ICMP intpred(slt), %0(s32), %1 - %5:_(s32) = G_CONSTANT i32 1 - %6:_(s32) = COPY %4(s32) - %3:_(s32) = G_AND %6, %5 - $v0 = COPY %3(s32) - RetRA implicit $v0 - -... ---- -name: sle -alignment: 2 -legalized: true -tracksRegLiveness: true -body: | - bb.1.entry: - liveins: $a0, $a1 - - ; MIPS32-LABEL: name: sle - ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:gprb(s32) = COPY $a1 - ; MIPS32: [[ICMP:%[0-9]+]]:gprb(s32) = G_ICMP intpred(sle), [[COPY]](s32), [[COPY1]] - ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1 - ; MIPS32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY [[ICMP]](s32) - ; MIPS32: [[AND:%[0-9]+]]:gprb(s32) = G_AND [[COPY2]], [[C]] - ; MIPS32: $v0 = COPY [[AND]](s32) - ; MIPS32: RetRA implicit $v0 - %0:_(s32) = COPY $a0 - %1:_(s32) = COPY $a1 - %4:_(s32) = G_ICMP intpred(sle), %0(s32), %1 - %5:_(s32) = G_CONSTANT i32 1 - %6:_(s32) = COPY %4(s32) - %3:_(s32) = G_AND %6, %5 - $v0 = COPY %3(s32) - RetRA implicit $v0 - -... ---- -name: ugt -alignment: 2 -legalized: true -tracksRegLiveness: true -body: | - bb.1.entry: - liveins: $a0, $a1 - - ; MIPS32-LABEL: name: ugt - ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:gprb(s32) = COPY $a1 - ; MIPS32: [[ICMP:%[0-9]+]]:gprb(s32) = G_ICMP intpred(ugt), [[COPY]](s32), [[COPY1]] - ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1 - ; MIPS32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY [[ICMP]](s32) - ; MIPS32: [[AND:%[0-9]+]]:gprb(s32) = G_AND [[COPY2]], [[C]] - ; MIPS32: $v0 = COPY [[AND]](s32) - ; MIPS32: RetRA implicit $v0 - %0:_(s32) = COPY $a0 - %1:_(s32) = COPY $a1 - %4:_(s32) = G_ICMP intpred(ugt), %0(s32), %1 - %5:_(s32) = G_CONSTANT i32 1 - %6:_(s32) = COPY %4(s32) - %3:_(s32) = G_AND %6, %5 - $v0 = COPY %3(s32) - RetRA implicit $v0 - -... ---- -name: uge -alignment: 2 -legalized: true -tracksRegLiveness: true -body: | - bb.1.entry: - liveins: $a0, $a1 - - ; MIPS32-LABEL: name: uge - ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:gprb(s32) = COPY $a1 - ; MIPS32: [[ICMP:%[0-9]+]]:gprb(s32) = G_ICMP intpred(uge), [[COPY]](s32), [[COPY1]] - ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1 - ; MIPS32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY [[ICMP]](s32) - ; MIPS32: [[AND:%[0-9]+]]:gprb(s32) = G_AND [[COPY2]], [[C]] - ; MIPS32: $v0 = COPY [[AND]](s32) - ; MIPS32: RetRA implicit $v0 - %0:_(s32) = COPY $a0 - %1:_(s32) = COPY $a1 - %4:_(s32) = G_ICMP intpred(uge), %0(s32), %1 - %5:_(s32) = G_CONSTANT i32 1 - %6:_(s32) = COPY %4(s32) - %3:_(s32) = G_AND %6, %5 + %3:_(s32) = COPY %4(s32) $v0 = COPY %3(s32) RetRA implicit $v0 ... --- -name: ult +name: eq_ptr alignment: 2 legalized: true tracksRegLiveness: true @@ -255,51 +40,18 @@ body: | bb.1.entry: liveins: $a0, $a1 - ; MIPS32-LABEL: name: ult + ; MIPS32-LABEL: name: eq_ptr ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:gprb(s32) = COPY $a1 - ; MIPS32: [[ICMP:%[0-9]+]]:gprb(s32) = G_ICMP intpred(ult), [[COPY]](s32), [[COPY1]] - ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1 + ; MIPS32: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0 + ; MIPS32: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1 + ; MIPS32: [[ICMP:%[0-9]+]]:gprb(s32) = G_ICMP intpred(eq), [[COPY]](p0), [[COPY1]] ; MIPS32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY [[ICMP]](s32) - ; MIPS32: [[AND:%[0-9]+]]:gprb(s32) = G_AND [[COPY2]], [[C]] - ; MIPS32: $v0 = COPY [[AND]](s32) + ; MIPS32: $v0 = COPY [[COPY2]](s32) ; MIPS32: RetRA implicit $v0 - %0:_(s32) = COPY $a0 - %1:_(s32) = COPY $a1 - %4:_(s32) = G_ICMP intpred(ult), %0(s32), %1 - %5:_(s32) = G_CONSTANT i32 1 - %6:_(s32) = COPY %4(s32) - %3:_(s32) = G_AND %6, %5 - $v0 = COPY %3(s32) - RetRA implicit $v0 - -... ---- -name: ule -alignment: 2 -legalized: true -tracksRegLiveness: true -body: | - bb.1.entry: - liveins: $a0, $a1 - - ; MIPS32-LABEL: name: ule - ; MIPS32: liveins: $a0, $a1 - ; MIPS32: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0 - ; MIPS32: [[COPY1:%[0-9]+]]:gprb(s32) = COPY $a1 - ; MIPS32: [[ICMP:%[0-9]+]]:gprb(s32) = G_ICMP intpred(ule), [[COPY]](s32), [[COPY1]] - ; MIPS32: [[C:%[0-9]+]]:gprb(s32) = G_CONSTANT i32 1 - ; MIPS32: [[COPY2:%[0-9]+]]:gprb(s32) = COPY [[ICMP]](s32) - ; MIPS32: [[AND:%[0-9]+]]:gprb(s32) = G_AND [[COPY2]], [[C]] - ; MIPS32: $v0 = COPY [[AND]](s32) - ; MIPS32: RetRA implicit $v0 - %0:_(s32) = COPY $a0 - %1:_(s32) = COPY $a1 - %4:_(s32) = G_ICMP intpred(ule), %0(s32), %1 - %5:_(s32) = G_CONSTANT i32 1 - %6:_(s32) = COPY %4(s32) - %3:_(s32) = G_AND %6, %5 + %0:_(p0) = COPY $a0 + %1:_(p0) = COPY $a1 + %4:_(s32) = G_ICMP intpred(eq), %0(p0), %1 + %3:_(s32) = COPY %4(s32) $v0 = COPY %3(s32) RetRA implicit $v0 From 5214956eaaa10a92794514558525ef6934486e90 Mon Sep 17 00:00:00 2001 From: Justin Hibbits Date: Wed, 17 Jul 2019 12:30:04 +0000 Subject: [PATCH 336/451] PowerPC/SPE: Fix load/store handling for SPE Summary: Pointed out in a comment for D49754, register spilling will currently spill SPE registers at almost any offset. However, the instructions `evstdd` and `evldd` require a) 8-byte alignment, and b) a limit of 256 (unsigned) bytes from the base register, as the offset must fix into a 5-bit offset, which ranges from 0-31 (indexed in double-words). The update to the register spill test is taken partially from the test case shown in D49754. Additionally, pointed out by Kei Thomsen, globals will currently use evldd/evstdd, though the offset isn't known at compile time, so may exceed the 8-bit (unsigned) offset permitted. This fixes that as well, by forcing it to always use evlddx/evstddx when accessing globals. Part of the patch contributed by Kei Thomsen. Reviewers: nemanjai, hfinkel, joerg Subscribers: kbarton, jsji, llvm-commits Differential Revision: https://reviews.llvm.org/D54409 llvm-svn: 366318 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 23 +++++++++++++++++++ llvm/lib/Target/PowerPC/PPCISelLowering.h | 5 +++++ llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp | 8 ++++++- llvm/test/CodeGen/PowerPC/spe.ll | 25 ++++++++++++++++++--- 4 files changed, 57 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index d014e0070950a..24d50074860d7 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -2233,6 +2233,25 @@ bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) { return isIntS16Immediate(Op.getNode(), Imm); } + +/// SelectAddressEVXRegReg - Given the specified address, check to see if it can +/// be represented as an indexed [r+r] operation. +bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base, + SDValue &Index, + SelectionDAG &DAG) const { + for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); + UI != E; ++UI) { + if (MemSDNode *Memop = dyn_cast(*UI)) { + if (Memop->getMemoryVT() == MVT::f64) { + Base = N.getOperand(0); + Index = N.getOperand(1); + return true; + } + } + } + return false; +} + /// SelectAddressRegReg - Given the specified addressed, check to see if it /// can be represented as an indexed [r+r] operation. Returns false if it /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is @@ -2244,6 +2263,10 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, unsigned EncodingAlignment) const { int16_t imm = 0; if (N.getOpcode() == ISD::ADD) { + // Is there any SPE load/store (f64), which can't handle 16bit offset? + // SPE load/store can only handle 8-bit offsets. + if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG)) + return true; if (isIntS16Immediate(N.getOperand(1), imm) && (!EncodingAlignment || !(imm % EncodingAlignment))) return false; // r+i diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 73c6dcd7c859e..97422c6eda360 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -669,6 +669,11 @@ namespace llvm { ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override; + /// SelectAddressEVXRegReg - Given the specified addressed, check to see if + /// it can be more efficiently represented as [r+imm]. + bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, + SelectionDAG &DAG) const; + /// SelectAddressRegReg - Given the specified addressed, check to see if it /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment /// is non-zero, only accept displacement which is not suitable for [r+imm]. diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp index 32b3d7e61d056..0498812050261 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -938,6 +938,9 @@ static unsigned offsetMinAlignForOpcode(unsigned OpC) { case PPC::STXSD: case PPC::STXSSP: return 4; + case PPC::EVLDD: + case PPC::EVSTDD: + return 8; case PPC::LXV: case PPC::STXV: return 16; @@ -1060,7 +1063,10 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // happen in invalid code. assert(OpC != PPC::DBG_VALUE && "This should be handled in a target-independent way"); - if (!noImmForm && ((isInt<16>(Offset) && + bool OffsetFitsMnemonic = (OpC == PPC::EVSTDD || OpC == PPC::EVLDD) ? + isUInt<8>(Offset) : + isInt<16>(Offset); + if (!noImmForm && ((OffsetFitsMnemonic && ((Offset % offsetMinAlign(MI)) == 0)) || OpC == TargetOpcode::STACKMAP || OpC == TargetOpcode::PATCHPOINT)) { diff --git a/llvm/test/CodeGen/PowerPC/spe.ll b/llvm/test/CodeGen/PowerPC/spe.ll index 06915ceb2dbab..bd03fa30e6a70 100644 --- a/llvm/test/CodeGen/PowerPC/spe.ll +++ b/llvm/test/CodeGen/PowerPC/spe.ll @@ -523,18 +523,37 @@ entry: ; CHECK: #NO_APP } -define double @test_spill(double %a) nounwind { +declare double @test_spill_spe_regs(double, double); +define dso_local void @test_func2() #0 { entry: + ret void +} + +declare void @test_memset(i8* nocapture writeonly, i8, i32, i1) +@global_var1 = global i32 0, align 4 +define double @test_spill(double %a, i32 %a1, i64 %a2, i8 * %a3, i32 *%a4, i32* %a5) nounwind { +entry: + %v1 = alloca [13 x i32], align 4 + %v2 = alloca [11 x i32], align 4 %0 = fadd double %a, %a - call void asm sideeffect "","~{r0},~{r3},~{s4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{r16},~{r17},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30},~{r31}"() nounwind + call void asm sideeffect "","~{s0},~{s3},~{s4},~{s5},~{s6},~{s7},~{s8},~{s9},~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19},~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29},~{s30},~{s31}"() nounwind %1 = fadd double %0, 3.14159 + %2 = bitcast [13 x i32]* %v1 to i8* + call void @test_memset(i8* align 4 %2, i8 0, i32 24, i1 true) + store i32 0, i32* %a5, align 4 + call void @test_func2() + %3 = bitcast [11 x i32]* %v2 to i8* + call void @test_memset(i8* align 4 %3, i8 0, i32 20, i1 true) br label %return return: ret double %1 ; CHECK-LABEL: test_spill -; CHECK: efdadd +; CHECK: li [[VREG:[0-9]+]], 256 +; CHECK: evstddx {{[0-9]+}}, {{[0-9]+}}, [[VREG]] +; CHECK-NOT: evstdd {{[0-9]+}}, 256({{[0-9]+}} ; CHECK: evstdd +; CHECK: efdadd ; CHECK: evldd } From 0257c6b659f1a81e6071b606da49c91d5d586c7d Mon Sep 17 00:00:00 2001 From: Justin Hibbits Date: Wed, 17 Jul 2019 12:30:48 +0000 Subject: [PATCH 337/451] PowerPC: Fix register spilling for SPE registers Summary: Missed in the original commit, use the correct callee-saved register list for spilling, instead of the standard SVR432 list. This avoids needlessly spilling the SPE non-volatile registers when they're not used. As part of this, also add where missing, and sort, the spill opcode checks for SPE and SPE4 register classes. Reviewers: nemanjai, hfinkel, joerg Subscribers: kbarton, jsji, llvm-commits Differential Revision: https://reviews.llvm.org/D56703 llvm-svn: 366319 --- llvm/lib/Target/PowerPC/PPCCallingConv.td | 15 ++++-- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 2 + llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp | 55 +++++++++++++-------- 3 files changed, 47 insertions(+), 25 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCCallingConv.td b/llvm/lib/Target/PowerPC/PPCCallingConv.td index ee367214dd7a9..369b9ce1a711e 100644 --- a/llvm/lib/Target/PowerPC/PPCCallingConv.td +++ b/llvm/lib/Target/PowerPC/PPCCallingConv.td @@ -366,15 +366,22 @@ def CSR_NoRegs : CalleeSavedRegs<(add)>; // and value may be altered by inter-library calls. // Do not include r12 as it is used as a scratch register. // Do not include return registers r3, f1, v2. -def CSR_SVR32_ColdCC : CalleeSavedRegs<(add (sequence "R%u", 4, 10), - (sequence "R%u", 14, 31), - F0, (sequence "F%u", 2, 31), - (sequence "CR%u", 0, 7))>; +def CSR_SVR32_ColdCC_Common : CalleeSavedRegs<(add (sequence "R%u", 4, 10), + (sequence "R%u", 14, 31), + (sequence "CR%u", 0, 7))>; + +def CSR_SVR32_ColdCC : CalleeSavedRegs<(add CSR_SVR32_ColdCC_Common, + F0, (sequence "F%u", 2, 31))>; + def CSR_SVR32_ColdCC_Altivec : CalleeSavedRegs<(add CSR_SVR32_ColdCC, (sequence "V%u", 0, 1), (sequence "V%u", 3, 31))>; +def CSR_SVR32_ColdCC_SPE : CalleeSavedRegs<(add CSR_SVR32_ColdCC_Common, + (sequence "S%u", 4, 10), + (sequence "S%u", 14, 31))>; + def CSR_SVR64_ColdCC : CalleeSavedRegs<(add (sequence "X%u", 4, 10), (sequence "X%u", 14, 31), F0, (sequence "F%u", 2, 31), diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index ea406a20df8ae..a787bdd56b9d7 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1009,6 +1009,8 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = PPC::QVFMRb; else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg)) Opc = PPC::CROR; + else if (PPC::SPE4RCRegClass.contains(DestReg, SrcReg)) + Opc = PPC::OR; else if (PPC::SPERCRegClass.contains(DestReg, SrcReg)) Opc = PPC::EVOR; else diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp index 0498812050261..12554ea8d0797 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -159,30 +159,39 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { if (TM.isPPC64() && MF->getInfo()->isSplitCSR()) return CSR_SRV464_TLS_PE_SaveList; - if (Subtarget.hasSPE()) - return CSR_SVR432_SPE_SaveList; - // On PPC64, we might need to save r2 (but only if it is not reserved). bool SaveR2 = MF->getRegInfo().isAllocatable(PPC::X2); + // Cold calling convention CSRs. if (MF->getFunction().getCallingConv() == CallingConv::Cold) { - return TM.isPPC64() - ? (Subtarget.hasAltivec() - ? (SaveR2 ? CSR_SVR64_ColdCC_R2_Altivec_SaveList - : CSR_SVR64_ColdCC_Altivec_SaveList) - : (SaveR2 ? CSR_SVR64_ColdCC_R2_SaveList - : CSR_SVR64_ColdCC_SaveList)) - : (Subtarget.hasAltivec() ? CSR_SVR32_ColdCC_Altivec_SaveList - : CSR_SVR32_ColdCC_SaveList); + if (TM.isPPC64()) { + if (Subtarget.hasAltivec()) + return SaveR2 ? CSR_SVR64_ColdCC_R2_Altivec_SaveList + : CSR_SVR64_ColdCC_Altivec_SaveList; + return SaveR2 ? CSR_SVR64_ColdCC_R2_SaveList + : CSR_SVR64_ColdCC_SaveList; + } + // 32-bit targets. + if (Subtarget.hasAltivec()) + return CSR_SVR32_ColdCC_Altivec_SaveList; + else if (Subtarget.hasSPE()) + return CSR_SVR32_ColdCC_SPE_SaveList; + return CSR_SVR32_ColdCC_SaveList; } - - return TM.isPPC64() - ? (Subtarget.hasAltivec() - ? (SaveR2 ? CSR_SVR464_R2_Altivec_SaveList - : CSR_SVR464_Altivec_SaveList) - : (SaveR2 ? CSR_SVR464_R2_SaveList : CSR_SVR464_SaveList)) - : (Subtarget.hasAltivec() ? CSR_SVR432_Altivec_SaveList - : CSR_SVR432_SaveList); + // Standard calling convention CSRs. + if (TM.isPPC64()) { + if (Subtarget.hasAltivec()) + return SaveR2 ? CSR_SVR464_R2_Altivec_SaveList + : CSR_SVR464_Altivec_SaveList; + return SaveR2 ? CSR_SVR464_R2_SaveList + : CSR_SVR464_SaveList; + } + // 32-bit targets. + if (Subtarget.hasAltivec()) + return CSR_SVR432_Altivec_SaveList; + else if (Subtarget.hasSPE()) + return CSR_SVR432_SPE_SaveList; + return CSR_SVR432_SaveList; } const MCPhysReg * @@ -236,13 +245,17 @@ PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF, return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR64_ColdCC_Altivec_RegMask : CSR_SVR64_ColdCC_RegMask) : (Subtarget.hasAltivec() ? CSR_SVR32_ColdCC_Altivec_RegMask - : CSR_SVR32_ColdCC_RegMask); + : (Subtarget.hasSPE() + ? CSR_SVR32_ColdCC_SPE_RegMask + : CSR_SVR32_ColdCC_RegMask)); } return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR464_Altivec_RegMask : CSR_SVR464_RegMask) : (Subtarget.hasAltivec() ? CSR_SVR432_Altivec_RegMask - : CSR_SVR432_RegMask); + : (Subtarget.hasSPE() + ? CSR_SVR432_SPE_RegMask + : CSR_SVR432_RegMask)); } const uint32_t* From 6011a285edf2cf45ee6d660b6219abe3db9e7dc1 Mon Sep 17 00:00:00 2001 From: Kadir Cetinkaya Date: Wed, 17 Jul 2019 13:14:02 +0000 Subject: [PATCH 338/451] [clangd] Handle windows line endings in QueryDriver Summary: fixes second case of https://github.com/clangd/clangd/issues/93 Reviewers: sammccall Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D64789 llvm-svn: 366320 --- clang-tools-extra/clangd/QueryDriverDatabase.cpp | 4 +++- clang-tools-extra/clangd/test/system-include-extractor.test | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/clang-tools-extra/clangd/QueryDriverDatabase.cpp b/clang-tools-extra/clangd/QueryDriverDatabase.cpp index d71ee3184cf5f..110b8fc43fc2b 100644 --- a/clang-tools-extra/clangd/QueryDriverDatabase.cpp +++ b/clang-tools-extra/clangd/QueryDriverDatabase.cpp @@ -63,7 +63,9 @@ std::vector parseDriverOutput(llvm::StringRef Output) { llvm::SmallVector Lines; Output.split(Lines, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false); - auto StartIt = std::find(Lines.begin(), Lines.end(), SIS); + auto StartIt = + std::find_if(Lines.begin(), Lines.end(), + [](llvm::StringRef Line) { return Line.trim() == SIS; }); if (StartIt == Lines.end()) { elog("System include extraction: start marker not found: {0}", Output); return {}; diff --git a/clang-tools-extra/clangd/test/system-include-extractor.test b/clang-tools-extra/clangd/test/system-include-extractor.test index 130afbd828c21..8f99db9122a6d 100644 --- a/clang-tools-extra/clangd/test/system-include-extractor.test +++ b/clang-tools-extra/clangd/test/system-include-extractor.test @@ -5,7 +5,7 @@ # RUN: echo '#!/bin/bash' >> %t.dir/my_driver.sh # RUN: echo '[ "$0" = "%t.dir/my_driver.sh" ] || exit' >> %t.dir/my_driver.sh # RUN: echo 'echo line to ignore >&2' >> %t.dir/my_driver.sh -# RUN: echo 'echo \#include \<...\> search starts here: >&2' >> %t.dir/my_driver.sh +# RUN: echo 'echo -e "#include <...> search starts here:\r" >&2' >> %t.dir/my_driver.sh # RUN: echo 'echo %t.dir/my/dir/ >&2' >> %t.dir/my_driver.sh # RUN: echo 'echo %t.dir/my/dir2/ >&2' >> %t.dir/my_driver.sh # RUN: echo 'echo End of search list. >&2' >> %t.dir/my_driver.sh From 2889fe67691b4220e31bbf8c78b63474c973f26c Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Wed, 17 Jul 2019 13:21:25 +0000 Subject: [PATCH 339/451] [clangd] Force the required interpretation of #import on windows tests. Summary: NFC but should fix a bunch of tests. Reviewers: kadircet Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64857 llvm-svn: 366321 --- clang-tools-extra/clangd/unittests/TestTU.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/clang-tools-extra/clangd/unittests/TestTU.cpp b/clang-tools-extra/clangd/unittests/TestTU.cpp index 4747f5ea2e3d6..7e77160870663 100644 --- a/clang-tools-extra/clangd/unittests/TestTU.cpp +++ b/clang-tools-extra/clangd/unittests/TestTU.cpp @@ -38,6 +38,10 @@ ParsedAST TestTU::build() const { Cmd.push_back("-include"); Cmd.push_back(ImplicitHeaderGuard ? ImportThunk.c_str() : FullHeaderName.c_str()); + // ms-compatibility changes the meaning of #import. + // The default is OS-dependent (on on windows), ensure it's off. + if (ImplicitHeaderGuard) + Cmd.push_back("-fno-ms-compatibility"); } Cmd.insert(Cmd.end(), ExtraArgs.begin(), ExtraArgs.end()); // Put the file name at the end -- this allows the extra arg (-xc++) to From 4b8da3a503e434ddbc08ecf66582475765f449bc Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Wed, 17 Jul 2019 13:23:27 +0000 Subject: [PATCH 340/451] [AArch64] Add support for Transactional Memory Extension (TME) TME is a future architecture technology, documented in https://developer.arm.com/architectures/cpu-architecture/a-profile/exploration-tools https://developer.arm.com/docs/ddi0601/a More about the future architectures: https://community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/new-technologies-for-the-arm-a-profile-architecture This patch adds support for the TME instructions TSTART, TTEST, TCOMMIT, and TCANCEL and the target feature/arch extension "tme". It also implements TME builtin functions, defined in ACLE Q2 2019 (https://developer.arm.com/docs/101028/latest) Patch by Javed Absar and Momchil Velikov Differential Revision: https://reviews.llvm.org/D64416 llvm-svn: 366322 --- clang/include/clang/Basic/BuiltinsAArch64.def | 6 ++ clang/lib/Basic/Targets/AArch64.cpp | 6 ++ clang/lib/Basic/Targets/AArch64.h | 1 + clang/lib/Headers/arm_acle.h | 24 +++++++- clang/lib/Sema/SemaChecking.cpp | 1 + .../test/CodeGen/aarch64-tme-tcancel-arg.cpp | 10 ++++ clang/test/CodeGen/aarch64-tme.c | 36 ++++++++++++ clang/test/Sema/aarch64-tme-errors.c | 8 +++ .../Sema/aarch64-tme-tcancel-const-error.c | 4 ++ .../Sema/aarch64-tme-tcancel-range-error.c | 4 ++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 17 ++++++ .../llvm/Support/AArch64TargetParser.def | 1 + .../llvm/Support/AArch64TargetParser.h | 1 + llvm/lib/Target/AArch64/AArch64.td | 3 + .../lib/Target/AArch64/AArch64InstrFormats.td | 55 +++++++++++++++++-- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 29 ++++++++-- llvm/lib/Target/AArch64/AArch64Subtarget.h | 2 + llvm/test/CodeGen/AArch64/tme-tcancel.ll | 16 ++++++ llvm/test/CodeGen/AArch64/tme-tcommit.ll | 16 ++++++ llvm/test/CodeGen/AArch64/tme-tstart.ll | 16 ++++++ llvm/test/CodeGen/AArch64/tme-ttest.ll | 16 ++++++ llvm/test/MC/AArch64/tme-error.s | 47 ++++++++++++++++ llvm/test/MC/AArch64/tme.s | 24 ++++++++ llvm/test/MC/Disassembler/AArch64/tme.txt | 19 +++++++ llvm/unittests/Support/TargetParserTest.cpp | 1 + 25 files changed, 350 insertions(+), 13 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-tme-tcancel-arg.cpp create mode 100644 clang/test/CodeGen/aarch64-tme.c create mode 100644 clang/test/Sema/aarch64-tme-errors.c create mode 100644 clang/test/Sema/aarch64-tme-tcancel-const-error.c create mode 100644 clang/test/Sema/aarch64-tme-tcancel-range-error.c create mode 100644 llvm/test/CodeGen/AArch64/tme-tcancel.ll create mode 100644 llvm/test/CodeGen/AArch64/tme-tcommit.ll create mode 100644 llvm/test/CodeGen/AArch64/tme-tstart.ll create mode 100644 llvm/test/CodeGen/AArch64/tme-ttest.ll create mode 100644 llvm/test/MC/AArch64/tme-error.s create mode 100644 llvm/test/MC/AArch64/tme.s create mode 100644 llvm/test/MC/Disassembler/AArch64/tme.txt diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def index 7701ad98f4832..a144979acca6d 100644 --- a/clang/include/clang/Basic/BuiltinsAArch64.def +++ b/clang/include/clang/Basic/BuiltinsAArch64.def @@ -91,6 +91,12 @@ LANGBUILTIN(__sevl, "v", "", ALL_MS_LANGUAGES) // Misc BUILTIN(__builtin_sponentry, "v*", "c") +// Transactional Memory Extension +BUILTIN(__builtin_arm_tstart, "WUi", "nj") +BUILTIN(__builtin_arm_tcommit, "v", "n") +BUILTIN(__builtin_arm_tcancel, "vWUIi", "nr") +BUILTIN(__builtin_arm_ttest, "WUi", "nc") + TARGET_HEADER_BUILTIN(_BitScanForward, "UcUNi*UNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_BitScanReverse, "UcUNi*UNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_BitScanForward64, "UcUNi*ULLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 74ac69ab8946a..2abca0a660ae4 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -219,6 +219,9 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts, if (HasMTE) Builder.defineMacro("__ARM_FEATURE_MEMORY_TAGGING", "1"); + if (HasTME) + Builder.defineMacro("__ARM_FEATURE_TME", "1"); + if ((FPU & NeonMode) && HasFP16FML) Builder.defineMacro("__ARM_FEATURE_FP16FML", "1"); @@ -270,6 +273,7 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector &Features, HasDotProd = false; HasFP16FML = false; HasMTE = false; + HasTME = false; ArchKind = llvm::AArch64::ArchKind::ARMV8A; for (const auto &Feature : Features) { @@ -301,6 +305,8 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector &Features, HasFP16FML = true; if (Feature == "+mte") HasMTE = true; + if (Feature == "+tme") + HasTME = true; } setDataLayout(); diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h index 5833c146003b0..b6aa07780edda 100644 --- a/clang/lib/Basic/Targets/AArch64.h +++ b/clang/lib/Basic/Targets/AArch64.h @@ -35,6 +35,7 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo { bool HasDotProd; bool HasFP16FML; bool HasMTE; + bool HasTME; llvm::AArch64::ArchKind ArchKind; diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h index 096cc261af2c6..0510e6fd809f2 100644 --- a/clang/lib/Headers/arm_acle.h +++ b/clang/lib/Headers/arm_acle.h @@ -613,7 +613,7 @@ __jcvt(double __a) { #define __arm_wsr64(sysreg, v) __builtin_arm_wsr64(sysreg, v) #define __arm_wsrp(sysreg, v) __builtin_arm_wsrp(sysreg, v) -// Memory Tagging Extensions (MTE) Intrinsics +/* Memory Tagging Extensions (MTE) Intrinsics */ #if __ARM_FEATURE_MEMORY_TAGGING #define __arm_mte_create_random_tag(__ptr, __mask) __builtin_arm_irg(__ptr, __mask) #define __arm_mte_increment_tag(__ptr, __tag_offset) __builtin_arm_addg(__ptr, __tag_offset) @@ -623,6 +623,28 @@ __jcvt(double __a) { #define __arm_mte_ptrdiff(__ptra, __ptrb) __builtin_arm_subp(__ptra, __ptrb) #endif +/* Transactional Memory Extension (TME) Intrinsics */ +#if __ARM_FEATURE_TME + +#define _TMFAILURE_REASON 0x00007fffu +#define _TMFAILURE_RTRY 0x00008000u +#define _TMFAILURE_CNCL 0x00010000u +#define _TMFAILURE_MEM 0x00020000u +#define _TMFAILURE_IMP 0x00040000u +#define _TMFAILURE_ERR 0x00080000u +#define _TMFAILURE_SIZE 0x00100000u +#define _TMFAILURE_NEST 0x00200000u +#define _TMFAILURE_DBG 0x00400000u +#define _TMFAILURE_INT 0x00800000u +#define _TMFAILURE_TRIVIAL 0x01000000u + +#define __tstart() __builtin_arm_tstart() +#define __tcommit() __builtin_arm_tcommit() +#define __tcancel(__arg) __builtin_arm_tcancel(__arg) +#define __ttest() __builtin_arm_ttest() + +#endif /* __ARM_FEATURE_TME */ + #if defined(__cplusplus) } #endif diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index f9f82cdeef432..442cbcf1429b6 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -1928,6 +1928,7 @@ bool Sema::CheckAArch64BuiltinFunctionCall(unsigned BuiltinID, case AArch64::BI__builtin_arm_dmb: case AArch64::BI__builtin_arm_dsb: case AArch64::BI__builtin_arm_isb: l = 0; u = 15; break; + case AArch64::BI__builtin_arm_tcancel: l = 0; u = 65535; break; } return SemaBuiltinConstantArgRange(TheCall, i, l, u + l); diff --git a/clang/test/CodeGen/aarch64-tme-tcancel-arg.cpp b/clang/test/CodeGen/aarch64-tme-tcancel-arg.cpp new file mode 100644 index 0000000000000..ae6694703c838 --- /dev/null +++ b/clang/test/CodeGen/aarch64-tme-tcancel-arg.cpp @@ -0,0 +1,10 @@ +// RUN: %clang_cc1 -triple aarch64-eabi -target-feature +tme -S -emit-llvm %s -o - | FileCheck %s + +#define A -1 +constexpr int f() { return 65536; } + +void t_cancel() { + __builtin_arm_tcancel(f() + A); +} + +// CHECK: call void @llvm.aarch64.tcancel(i64 65535) diff --git a/clang/test/CodeGen/aarch64-tme.c b/clang/test/CodeGen/aarch64-tme.c new file mode 100644 index 0000000000000..8f90fb8eb8297 --- /dev/null +++ b/clang/test/CodeGen/aarch64-tme.c @@ -0,0 +1,36 @@ +// RUN: %clang_cc1 -triple aarch64-eabi -target-feature +tme -S -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -DUSE_ACLE -triple aarch64-eabi -target-feature +tme -S -emit-llvm %s -o - | FileCheck %s + +#ifdef USE_ACLE +#include "arm_acle.h" +void test_tme_funcs() { + __tstart(); + (void)__ttest(); + __tcommit(); + __tcancel(0x789a); +} +#else +void test_tme_funcs() { + __builtin_arm_tstart(); + (void)__builtin_arm_ttest(); + __builtin_arm_tcommit(); + __builtin_arm_tcancel(0x789a); +} +#endif +// CHECK: call i64 @llvm.aarch64.tstart() +// CHECK: call i64 @llvm.aarch64.ttest() +// CHECK: call void @llvm.aarch64.tcommit() +// CHECK: call void @llvm.aarch64.tcancel(i64 30874) + +// CHECK: declare i64 @llvm.aarch64.tstart() #1 +// CHECK: declare i64 @llvm.aarch64.ttest() #1 +// CHECK: declare void @llvm.aarch64.tcommit() #1 +// CHECK: declare void @llvm.aarch64.tcancel(i64 immarg) #2 + +#ifdef __ARM_FEATURE_TME +void arm_feature_tme_defined() {} +#endif +// CHECK: define void @arm_feature_tme_defined() + +// CHECK: attributes #1 = { nounwind } +// CHECK: attributes #2 = { noreturn nounwind } diff --git a/clang/test/Sema/aarch64-tme-errors.c b/clang/test/Sema/aarch64-tme-errors.c new file mode 100644 index 0000000000000..0e9c2a6beec0c --- /dev/null +++ b/clang/test/Sema/aarch64-tme-errors.c @@ -0,0 +1,8 @@ +// RUN: %clang_cc1 -triple aarch64-eabi -verify %s + +#include "arm_acle.h" + +void test_no_tme_funcs() { + __tstart(); // expected-warning{{implicit declaration of function '__tstart'}} + __builtin_tstart(); // expected-error{{use of unknown builtin '__builtin_tstart'}} +} diff --git a/clang/test/Sema/aarch64-tme-tcancel-const-error.c b/clang/test/Sema/aarch64-tme-tcancel-const-error.c new file mode 100644 index 0000000000000..f97ece59b660b --- /dev/null +++ b/clang/test/Sema/aarch64-tme-tcancel-const-error.c @@ -0,0 +1,4 @@ +// RUN: %clang_cc1 -triple aarch64-eabi -target-feature +tme -verify %s +void t_cancel(unsigned short u) { + __builtin_arm_tcancel(u); // expected-error{{argument to '__builtin_arm_tcancel' must be a constant integer}} +} diff --git a/clang/test/Sema/aarch64-tme-tcancel-range-error.c b/clang/test/Sema/aarch64-tme-tcancel-range-error.c new file mode 100644 index 0000000000000..c61ec90dfa501 --- /dev/null +++ b/clang/test/Sema/aarch64-tme-tcancel-range-error.c @@ -0,0 +1,4 @@ +// RUN: %clang_cc1 -triple aarch64-eabi -target-feature +tme -verify %s +void t_cancel() { + __builtin_arm_tcancel(0x12345u); // expected-error{{argument value 74565 is outside the valid range [0, 65535]}} +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 7616d6a90c1bc..ceec212b66303 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -703,3 +703,20 @@ def int_aarch64_stg : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], def int_aarch64_subp : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_ptr_ty], [IntrNoMem]>; } + +// Transactional Memory Extension (TME) Intrinsics +let TargetPrefix = "aarch64" in { +def int_aarch64_tstart : GCCBuiltin<"__builtin_arm_tstart">, + Intrinsic<[llvm_i64_ty]>; + +def int_aarch64_tcommit : GCCBuiltin<"__builtin_arm_tcommit">, Intrinsic<[]>; + +def int_aarch64_tcancel : GCCBuiltin<"__builtin_arm_tcancel">, + Intrinsic<[], [llvm_i64_ty], + [ImmArg<0>, IntrNoMem, IntrHasSideEffects, + IntrNoReturn]>; + +def int_aarch64_ttest : GCCBuiltin<"__builtin_arm_ttest">, + Intrinsic<[llvm_i64_ty], [], + [IntrNoMem, IntrHasSideEffects]>; +} diff --git a/llvm/include/llvm/Support/AArch64TargetParser.def b/llvm/include/llvm/Support/AArch64TargetParser.def index e152f383b3ec0..fd21e3615b71e 100644 --- a/llvm/include/llvm/Support/AArch64TargetParser.def +++ b/llvm/include/llvm/Support/AArch64TargetParser.def @@ -79,6 +79,7 @@ AARCH64_ARCH_EXT_NAME("memtag", AArch64::AEK_MTE, "+mte", "-mte") AARCH64_ARCH_EXT_NAME("ssbs", AArch64::AEK_SSBS, "+ssbs", "-ssbs") AARCH64_ARCH_EXT_NAME("sb", AArch64::AEK_SB, "+sb", "-sb") AARCH64_ARCH_EXT_NAME("predres", AArch64::AEK_PREDRES, "+predres", "-predres") +AARCH64_ARCH_EXT_NAME("tme", AArch64::AEK_TME, "+tme", "-tme") #undef AARCH64_ARCH_EXT_NAME #ifndef AARCH64_CPU_NAME diff --git a/llvm/include/llvm/Support/AArch64TargetParser.h b/llvm/include/llvm/Support/AArch64TargetParser.h index 965d38535e747..564f831b07069 100644 --- a/llvm/include/llvm/Support/AArch64TargetParser.h +++ b/llvm/include/llvm/Support/AArch64TargetParser.h @@ -54,6 +54,7 @@ enum ArchExtKind : unsigned { AEK_SVE2SM4 = 1 << 25, AEK_SVE2SHA3 = 1 << 26, AEK_BITPERM = 1 << 27, + AEK_TME = 1 << 28, }; enum class ArchKind { diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index e39c6995e3673..fcd5818727f12 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -345,6 +345,9 @@ def FeatureRandGen : SubtargetFeature<"rand", "HasRandGen", def FeatureMTE : SubtargetFeature<"mte", "HasMTE", "true", "Enable Memory Tagging Extension" >; +def FeatureTME : SubtargetFeature<"tme", "HasTME", + "true", "Enable Transactional Memory Extension" >; + //===----------------------------------------------------------------------===// // Architectures. // diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 74fa5ef713d9e..2af5726fc4f5e 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -714,12 +714,15 @@ def logical_imm64_not : Operand { let ParserMatchClass = LogicalImm64NotOperand; } -// imm0_65535 predicate - True if the immediate is in the range [0,65535]. -def imm0_65535 : Operand, ImmLeaf, PrintMethod = "printImmHex" in { +def i32_imm0_65535 : Operand, ImmLeaf { - let ParserMatchClass = AsmImmRange<0, 65535>; - let PrintMethod = "printImmHex"; +}]>; + +def i64_imm0_65535 : Operand, ImmLeaf; } // imm0_255 predicate - True if the immediate is in the range [0,255]. @@ -1082,6 +1085,46 @@ class RtSystemI let Inst{4-0} = Rt; } +// System instructions for transactional memory extension +class TMBaseSystemI CRm, bits<3> op2, dag oops, dag iops, + string asm, string operands, list pattern> + : BaseSystemI, + Sched<[WriteSys]> { + let Inst{20-12} = 0b000110011; + let Inst{11-8} = CRm; + let Inst{7-5} = op2; + let DecoderMethod = ""; + + let mayLoad = 1; + let mayStore = 1; +} + +// System instructions for transactional memory - single input operand +class TMSystemI CRm, string asm, list pattern> + : TMBaseSystemI<0b1, CRm, 0b011, + (outs GPR64:$Rt), (ins), asm, "\t$Rt", pattern> { + bits<5> Rt; + let Inst{4-0} = Rt; +} + +// System instructions for transactional memory - no operand +class TMSystemINoOperand CRm, string asm, list pattern> + : TMBaseSystemI<0b0, CRm, 0b011, (outs), (ins), asm, "", pattern> { + let Inst{4-0} = 0b11111; +} + +// System instructions for exit from transactions +let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in +class TMSystemException op1, string asm, list pattern> + : I<(outs), (ins i64_imm0_65535:$imm), asm, "\t$imm", "", pattern>, + Sched<[WriteSys]> { + bits<16> imm; + let Inst{31-24} = 0b11010100; + let Inst{23-21} = op1; + let Inst{20-5} = imm; + let Inst{4-0} = 0b00000; +} + // Hint instructions that take both a CRm and a 3-bit immediate. // NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot // model patterns with sufficiently fine granularity @@ -4086,7 +4129,7 @@ multiclass MemTagStore opc1, string insn> { let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in class ExceptionGeneration op1, bits<2> ll, string asm> - : I<(outs), (ins imm0_65535:$imm), asm, "\t$imm", "", []>, + : I<(outs), (ins i32_imm0_65535:$imm), asm, "\t$imm", "", []>, Sched<[WriteSys]> { bits<16> imm; let Inst{31-24} = 0b11010100; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 897b3ebb3847f..74f07f569a518 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -133,6 +133,8 @@ def HasBTI : Predicate<"Subtarget->hasBTI()">, AssemblerPredicate<"FeatureBranchTargetId", "bti">; def HasMTE : Predicate<"Subtarget->hasMTE()">, AssemblerPredicate<"FeatureMTE", "mte">; +def HasTME : Predicate<"Subtarget->hasTME()">, + AssemblerPredicate<"FeatureTME", "tme">; def IsLE : Predicate<"Subtarget->isLittleEndian()">; def IsBE : Predicate<"!Subtarget->isLittleEndian()">; def IsWindows : Predicate<"Subtarget->isTargetWindows()">; @@ -798,6 +800,21 @@ def : InstAlias<"sys $op1, $Cn, $Cm, $op2", (SYSxt imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, XZR)>; + +let Predicates = [HasTME] in { + +def TSTART : TMSystemI<0b0000, "tstart", [(set GPR64:$Rt, (int_aarch64_tstart))]>; + +def TCOMMIT : TMSystemINoOperand<0b0000, "tcommit", [(int_aarch64_tcommit)]>; + +let mayLoad = 0, mayStore = 0 in { +def TTEST : TMSystemI<0b0001, "ttest", [(set GPR64:$Rt, (int_aarch64_ttest))]>; +def TCANCEL : TMSystemException<0b011, "tcancel", [(int_aarch64_tcancel i64_imm0_65535:$imm)]> { + let isBarrier = 1; +} +} +} // HasTME + //===----------------------------------------------------------------------===// // Move immediate instructions. //===----------------------------------------------------------------------===// @@ -809,12 +826,12 @@ let PostEncoderMethod = "fixMOVZ" in defm MOVZ : MoveImmediate<0b10, "movz">; // First group of aliases covers an implicit "lsl #0". -def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, imm0_65535:$imm, 0), 0>; -def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, imm0_65535:$imm, 0), 0>; -def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, imm0_65535:$imm, 0)>; -def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, imm0_65535:$imm, 0)>; -def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, imm0_65535:$imm, 0)>; -def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, imm0_65535:$imm, 0)>; +def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, i32_imm0_65535:$imm, 0), 0>; +def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, i32_imm0_65535:$imm, 0), 0>; +def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, i32_imm0_65535:$imm, 0)>; +def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, i32_imm0_65535:$imm, 0)>; +def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, i32_imm0_65535:$imm, 0)>; +def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, i32_imm0_65535:$imm, 0)>; // Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax. def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>; diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index 0c84cfb8329a6..ce829795309c2 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -134,6 +134,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { bool HasBTI = false; bool HasRandGen = false; bool HasMTE = false; + bool HasTME = false; // Arm SVE2 extensions bool HasSVE2AES = false; @@ -380,6 +381,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { bool hasBTI() const { return HasBTI; } bool hasRandGen() const { return HasRandGen; } bool hasMTE() const { return HasMTE; } + bool hasTME() const { return HasTME; } // Arm SVE2 extensions bool hasSVE2AES() const { return HasSVE2AES; } bool hasSVE2SM4() const { return HasSVE2SM4; } diff --git a/llvm/test/CodeGen/AArch64/tme-tcancel.ll b/llvm/test/CodeGen/AArch64/tme-tcancel.ll new file mode 100644 index 0000000000000..f4fb7b665de16 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/tme-tcancel.ll @@ -0,0 +1,16 @@ +; RUN: llc %s -o - | FileCheck %s + +target triple = "aarch64-unknown-unknown-eabi" + +define void @test_tcancel() #0 { + tail call void @llvm.aarch64.tcancel(i64 0) #1 + unreachable +} + +declare void @llvm.aarch64.tcancel(i64 immarg) #1 + +attributes #0 = { "target-features"="+tme" } +attributes #1 = { nounwind noreturn } + +; CHECK-LABEL: test_tcancel +; CHECK: tcancel diff --git a/llvm/test/CodeGen/AArch64/tme-tcommit.ll b/llvm/test/CodeGen/AArch64/tme-tcommit.ll new file mode 100644 index 0000000000000..cd85a3e5bd8ec --- /dev/null +++ b/llvm/test/CodeGen/AArch64/tme-tcommit.ll @@ -0,0 +1,16 @@ +; RUN: llc %s -o - | FileCheck %s + +target triple = "aarch64-unknown-unknown-eabi" + +define void @test_tcommit() #0 { + tail call void @llvm.aarch64.tcommit() + ret void +} + +declare void @llvm.aarch64.tcommit() #1 + +attributes #0 = { "target-features"="+tme" } +attributes #1 = { nounwind } + +; CHECK-LABEL: test_tcommit +; CHECK: tcommit diff --git a/llvm/test/CodeGen/AArch64/tme-tstart.ll b/llvm/test/CodeGen/AArch64/tme-tstart.ll new file mode 100644 index 0000000000000..c761842e7980d --- /dev/null +++ b/llvm/test/CodeGen/AArch64/tme-tstart.ll @@ -0,0 +1,16 @@ +; RUN: llc %s -o - | FileCheck %s + +target triple = "aarch64-unknown-unknown-eabi" + +define i64 @test_tstart() #0 { + %r = tail call i64 @llvm.aarch64.tstart() + ret i64 %r +} + +declare i64 @llvm.aarch64.tstart() #1 + +attributes #0 = { "target-features"="+tme" } +attributes #1 = { nounwind } + +; CHECK-LABEL: test_tstart +; CHECK: tstart x diff --git a/llvm/test/CodeGen/AArch64/tme-ttest.ll b/llvm/test/CodeGen/AArch64/tme-ttest.ll new file mode 100644 index 0000000000000..597821081354f --- /dev/null +++ b/llvm/test/CodeGen/AArch64/tme-ttest.ll @@ -0,0 +1,16 @@ +; RUN: llc %s -o - | FileCheck %s + +target triple = "aarch64-unknown-unknown-eabi" + +define i64 @test_ttest() #0 { + %r = tail call i64 @llvm.aarch64.ttest() + ret i64 %r +} + +declare i64 @llvm.aarch64.ttest() #1 + +attributes #0 = { "target-features"="+tme" } +attributes #1 = { nounwind } + +; CHECK-LABEL: test_ttest +; CHECK: ttest x diff --git a/llvm/test/MC/AArch64/tme-error.s b/llvm/test/MC/AArch64/tme-error.s new file mode 100644 index 0000000000000..f91f58fa3ef25 --- /dev/null +++ b/llvm/test/MC/AArch64/tme-error.s @@ -0,0 +1,47 @@ +// Tests for transactional memory extension instructions +// RUN: not llvm-mc -triple aarch64 -show-encoding -mattr=+tme < %s 2>&1 | FileCheck %s + +tstart +// CHECK: error: too few operands for instruction +// CHECK-NEXT: tstart +tstart x4, x5 +// CHECK: error: invalid operand for instruction +// CHECK-NEXT: tstart x4, x5 +tstart x4, #1 +// CHECK: error: invalid operand for instruction +// CHECK-NEXT: tstart x4, #1 +tstart sp +// CHECK: error: invalid operand for instruction +// CHECK-NEXT: tstart sp + +ttest +// CHECK: error: too few operands for instruction +// CHECK-NEXT: ttest +ttest x4, x5 +// CHECK: error: invalid operand for instruction +// CHECK-NEXT: ttest x4, x5 +ttest x4, #1 +// CHECK: error: invalid operand for instruction +// CHECK-NEXT: ttest x4, #1 +ttest sp +// CHECK: error: invalid operand for instruction +// CHECK-NEXT: ttest sp + +tcommit x4 +// CHECK: error: invalid operand for instruction +// CHECK-NEXT: tcommit x4 +tcommit sp +// CHECK: error: invalid operand for instruction +// CHECK-NEXT: tcommit sp + + +tcancel +// CHECK: error: too few operands for instruction +// CHECK-NEXT tcancel +tcancel x0 +// CHECK: error: immediate must be an integer in range [0, 65535] +// CHECK-NEXT tcancel +tcancel #65536 +// CHECK: error: immediate must be an integer in range [0, 65535] +// CHECK-NEXT: tcancel #65536 + diff --git a/llvm/test/MC/AArch64/tme.s b/llvm/test/MC/AArch64/tme.s new file mode 100644 index 0000000000000..cd47274127649 --- /dev/null +++ b/llvm/test/MC/AArch64/tme.s @@ -0,0 +1,24 @@ +// Tests for transaction memory extension instructions +// +// RUN: llvm-mc -triple aarch64 -show-encoding -mattr=+tme < %s | FileCheck %s +// RUN: not llvm-mc -triple aarch64 -show-encoding -mattr=-tme < %s 2>&1 | FileCheck %s --check-prefix=NOTME + +tstart x3 +ttest x4 +tcommit +tcancel #0x1234 + +// CHECK: tstart x3 // encoding: [0x63,0x30,0x23,0xd5] +// CHECK: ttest x4 // encoding: [0x64,0x31,0x23,0xd5] +// CHECK: tcommit // encoding: [0x7f,0x30,0x03,0xd5] +// CHECK: tcancel #0x1234 // encoding: [0x80,0x46,0x62,0xd4] + + +// NOTME: instruction requires: tme +// NOTME-NEXT: tstart x3 +// NOTME: instruction requires: tme +// NOTME-NEXT: ttest x4 +// NOTME: instruction requires: tme +// NOTME-NEXT: tcommit +// NOTME: instruction requires: tme +// NOTME-NEXT: tcancel #0x1234 diff --git a/llvm/test/MC/Disassembler/AArch64/tme.txt b/llvm/test/MC/Disassembler/AArch64/tme.txt new file mode 100644 index 0000000000000..f250b33e0e1df --- /dev/null +++ b/llvm/test/MC/Disassembler/AArch64/tme.txt @@ -0,0 +1,19 @@ +# Tests for transaction memory extension instructions +# RUN: llvm-mc -triple=aarch64 -mattr=+tme -disassemble < %s | FileCheck %s +# RUN: not llvm-mc -triple=aarch64 -mattr=-tme -disassemble < %s 2>&1 | FileCheck %s --check-prefix=NOTME + +[0x63,0x30,0x23,0xd5] +[0x64,0x31,0x23,0xd5] +[0x7f,0x30,0x03,0xd5] +[0x80,0x46,0x62,0xd4] + +# CHECK: tstart x3 +# CHECK: ttest x4 +# CHECK: tcommit +# CHECK: tcancel #0x1234 + +# NOTEME: mrs +# NOTEME-NEXT: mrs +# NOTEME-NEXT: msr +# NOTME: warning: invalid instruction encoding +# NOTME-NEXT: [0x80,0x46,0x62,0xd4] diff --git a/llvm/unittests/Support/TargetParserTest.cpp b/llvm/unittests/Support/TargetParserTest.cpp index 34c7a8a4fd1c8..5ef8f2e4b5500 100644 --- a/llvm/unittests/Support/TargetParserTest.cpp +++ b/llvm/unittests/Support/TargetParserTest.cpp @@ -1119,6 +1119,7 @@ TEST(TargetParserTest, AArch64ArchExtFeature) { {"rcpc", "norcpc", "+rcpc", "-rcpc" }, {"rng", "norng", "+rand", "-rand"}, {"memtag", "nomemtag", "+mte", "-mte"}, + {"tme", "notme", "+tme", "-tme"}, {"ssbs", "nossbs", "+ssbs", "-ssbs"}, {"sb", "nosb", "+sb", "-sb"}, {"predres", "nopredres", "+predres", "-predres"} From 70235c642e66bdf4900aabd541fa9a1548f72d0e Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Wed, 17 Jul 2019 13:40:03 +0000 Subject: [PATCH 341/451] [AMDGPU] Optimize atomic AND/OR/XOR Summary: Extend the atomic optimizer to handle AND, OR and XOR. Reviewers: arsenm, sheredom Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, jfb, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64809 llvm-svn: 366323 --- .../Target/AMDGPU/AMDGPUAtomicOptimizer.cpp | 71 ++++++++++++++----- .../atomic_optimizations_local_pointer.ll | 36 ++++++++++ 2 files changed, 91 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp index c65a49b7c5bc7..8a92e7d923fbc 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp @@ -127,6 +127,9 @@ void AMDGPUAtomicOptimizer::visitAtomicRMWInst(AtomicRMWInst &I) { return; case AtomicRMWInst::Add: case AtomicRMWInst::Sub: + case AtomicRMWInst::And: + case AtomicRMWInst::Or: + case AtomicRMWInst::Xor: case AtomicRMWInst::Max: case AtomicRMWInst::Min: case AtomicRMWInst::UMax: @@ -177,6 +180,21 @@ void AMDGPUAtomicOptimizer::visitIntrinsicInst(IntrinsicInst &I) { case Intrinsic::amdgcn_raw_buffer_atomic_sub: Op = AtomicRMWInst::Sub; break; + case Intrinsic::amdgcn_buffer_atomic_and: + case Intrinsic::amdgcn_struct_buffer_atomic_and: + case Intrinsic::amdgcn_raw_buffer_atomic_and: + Op = AtomicRMWInst::And; + break; + case Intrinsic::amdgcn_buffer_atomic_or: + case Intrinsic::amdgcn_struct_buffer_atomic_or: + case Intrinsic::amdgcn_raw_buffer_atomic_or: + Op = AtomicRMWInst::Or; + break; + case Intrinsic::amdgcn_buffer_atomic_xor: + case Intrinsic::amdgcn_struct_buffer_atomic_xor: + case Intrinsic::amdgcn_raw_buffer_atomic_xor: + Op = AtomicRMWInst::Xor; + break; case Intrinsic::amdgcn_buffer_atomic_smin: case Intrinsic::amdgcn_struct_buffer_atomic_smin: case Intrinsic::amdgcn_raw_buffer_atomic_smin: @@ -240,6 +258,12 @@ static Value *buildNonAtomicBinOp(IRBuilder<> &B, AtomicRMWInst::BinOp Op, return B.CreateBinOp(Instruction::Add, LHS, RHS); case AtomicRMWInst::Sub: return B.CreateBinOp(Instruction::Sub, LHS, RHS); + case AtomicRMWInst::And: + return B.CreateBinOp(Instruction::And, LHS, RHS); + case AtomicRMWInst::Or: + return B.CreateBinOp(Instruction::Or, LHS, RHS); + case AtomicRMWInst::Xor: + return B.CreateBinOp(Instruction::Xor, LHS, RHS); case AtomicRMWInst::Max: Pred = CmpInst::ICMP_SGT; @@ -265,8 +289,11 @@ static APInt getIdentityValueForAtomicOp(AtomicRMWInst::BinOp Op, llvm_unreachable("Unhandled atomic op"); case AtomicRMWInst::Add: case AtomicRMWInst::Sub: + case AtomicRMWInst::Or: + case AtomicRMWInst::Xor: case AtomicRMWInst::UMax: return APInt::getMinValue(BitWidth); + case AtomicRMWInst::And: case AtomicRMWInst::UMin: return APInt::getMaxValue(BitWidth); case AtomicRMWInst::Max: @@ -331,10 +358,10 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I, Value *const ExtractHi = B.CreateExtractElement(BitCast, B.getInt32(1)); CallInst *const PartialMbcnt = B.CreateIntrinsic( Intrinsic::amdgcn_mbcnt_lo, {}, {ExtractLo, B.getInt32(0)}); - CallInst *const Mbcnt = B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_hi, {}, - {ExtractHi, PartialMbcnt}); - - Value *const MbcntCast = B.CreateIntCast(Mbcnt, Ty, false); + Value *const Mbcnt = + B.CreateIntCast(B.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_hi, {}, + {ExtractHi, PartialMbcnt}), + Ty, false); Value *const Identity = B.getInt(getIdentityValueForAtomicOp(Op, TyBitWidth)); @@ -408,32 +435,39 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I, case AtomicRMWInst::Add: case AtomicRMWInst::Sub: { - // Get the total number of active lanes we have by using popcount. - Instruction *const Ctpop = - B.CreateUnaryIntrinsic(Intrinsic::ctpop, Ballot); - Value *const CtpopCast = B.CreateIntCast(Ctpop, Ty, false); - - // Calculate the new value we will be contributing to the atomic operation - // for the entire wavefront. - NewV = B.CreateMul(V, CtpopCast); + // The new value we will be contributing to the atomic operation is the + // old value times the number of active lanes. + Value *const Ctpop = B.CreateIntCast( + B.CreateUnaryIntrinsic(Intrinsic::ctpop, Ballot), Ty, false); + NewV = B.CreateMul(V, Ctpop); break; } + case AtomicRMWInst::And: + case AtomicRMWInst::Or: case AtomicRMWInst::Max: case AtomicRMWInst::Min: case AtomicRMWInst::UMax: case AtomicRMWInst::UMin: - // Max/min with a uniform value is idempotent: doing the atomic operation - // multiple times has the same effect as doing it once. + // These operations with a uniform value are idempotent: doing the atomic + // operation multiple times has the same effect as doing it once. NewV = V; break; + + case AtomicRMWInst::Xor: + // The new value we will be contributing to the atomic operation is the + // old value times the parity of the number of active lanes. + Value *const Ctpop = B.CreateIntCast( + B.CreateUnaryIntrinsic(Intrinsic::ctpop, Ballot), Ty, false); + NewV = B.CreateMul(V, B.CreateAnd(Ctpop, 1)); + break; } } // We only want a single lane to enter our new control flow, and we do this // by checking if there are any active lanes below us. Only one lane will // have 0 active lanes below us, so that will be the only one to progress. - Value *const Cond = B.CreateICmpEQ(MbcntCast, B.getIntN(TyBitWidth, 0)); + Value *const Cond = B.CreateICmpEQ(Mbcnt, B.getIntN(TyBitWidth, 0)); // Store I's original basic block before we split the block. BasicBlock *const EntryBB = I.getParent(); @@ -502,14 +536,19 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I, llvm_unreachable("Unhandled atomic op"); case AtomicRMWInst::Add: case AtomicRMWInst::Sub: - LaneOffset = B.CreateMul(V, MbcntCast); + LaneOffset = B.CreateMul(V, Mbcnt); break; + case AtomicRMWInst::And: + case AtomicRMWInst::Or: case AtomicRMWInst::Max: case AtomicRMWInst::Min: case AtomicRMWInst::UMax: case AtomicRMWInst::UMin: LaneOffset = B.CreateSelect(Cond, Identity, V); break; + case AtomicRMWInst::Xor: + LaneOffset = B.CreateMul(V, B.CreateAnd(Mbcnt, 1)); + break; } } Value *const Result = buildNonAtomicBinOp(B, Op, BroadcastI, LaneOffset); diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll index 5f7649c1c0ea5..05620789141cf 100644 --- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll @@ -195,6 +195,42 @@ entry: ret void } +; GCN-LABEL: and_i32_varying: +; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63 +; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]] +; GFX8MORE: ds_and_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]] +define amdgpu_kernel void @and_i32_varying(i32 addrspace(1)* %out) { +entry: + %lane = call i32 @llvm.amdgcn.workitem.id.x() + %old = atomicrmw and i32 addrspace(3)* @local_var32, i32 %lane acq_rel + store i32 %old, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: or_i32_varying: +; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63 +; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]] +; GFX8MORE: ds_or_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]] +define amdgpu_kernel void @or_i32_varying(i32 addrspace(1)* %out) { +entry: + %lane = call i32 @llvm.amdgcn.workitem.id.x() + %old = atomicrmw or i32 addrspace(3)* @local_var32, i32 %lane acq_rel + store i32 %old, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: xor_i32_varying: +; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63 +; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]] +; GFX8MORE: ds_xor_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]] +define amdgpu_kernel void @xor_i32_varying(i32 addrspace(1)* %out) { +entry: + %lane = call i32 @llvm.amdgcn.workitem.id.x() + %old = atomicrmw xor i32 addrspace(3)* @local_var32, i32 %lane acq_rel + store i32 %old, i32 addrspace(1)* %out + ret void +} + ; GCN-LABEL: max_i32_varying: ; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63 ; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]] From 62069ac310654f464e65d98e92d33d16faa738ee Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 17 Jul 2019 13:40:42 +0000 Subject: [PATCH 342/451] [llvm-ar][test] \r\n -> \n Also simplify some empty output tests with 'count 0' llvm-svn: 366324 --- llvm/test/tools/llvm-ar/create.test | 27 +- .../tools/llvm-ar/dash-before-letter.test | 24 +- llvm/test/tools/llvm-ar/delete.test | 134 +++---- llvm/test/tools/llvm-ar/insert-after.test | 118 +++--- llvm/test/tools/llvm-ar/insert-before.test | 118 +++--- llvm/test/tools/llvm-ar/move-after.test | 118 +++--- llvm/test/tools/llvm-ar/move-before.test | 138 +++---- llvm/test/tools/llvm-ar/move.test | 258 ++++++------ llvm/test/tools/llvm-ar/mri-addlib.test | 90 ++--- llvm/test/tools/llvm-ar/mri-addmod.test | 54 +-- llvm/test/tools/llvm-ar/mri-comments.test | 38 +- llvm/test/tools/llvm-ar/mri-end.test | 96 ++--- llvm/test/tools/llvm-ar/quick-append.test | 184 +++++---- .../test/tools/llvm-ar/read-only-archive.test | 60 +-- llvm/test/tools/llvm-ar/replace-update.test | 378 +++++++++--------- llvm/test/tools/llvm-ar/replace.test | 356 ++++++++--------- llvm/test/tools/llvm-ar/symtab.test | 140 +++---- 17 files changed, 1164 insertions(+), 1167 deletions(-) diff --git a/llvm/test/tools/llvm-ar/create.test b/llvm/test/tools/llvm-ar/create.test index 7e4951da5fa2b..a29b93ec5d6da 100644 --- a/llvm/test/tools/llvm-ar/create.test +++ b/llvm/test/tools/llvm-ar/create.test @@ -1,14 +1,13 @@ -## Test the creation warning and supression of that warning. - -RUN: touch %t1.txt -RUN: touch %t2.txt - -RUN: rm -f %t.warning.ar -RUN: llvm-ar r %t.warning.ar %t1.txt %t2.txt 2>&1 \ -RUN: | FileCheck %s -DOUTPUT=%t.warning.ar - -CHECK: warning: creating [[OUTPUT]] - -RUN: rm -f %t.supressed.ar -RUN: llvm-ar cr %t.supressed.ar %t1.txt %t2.txt 2>&1 \ -RUN: | FileCheck --allow-empty /dev/null --implicit-check-not={{.}} +## Test the creation warning and supression of that warning. + +RUN: touch %t1.txt +RUN: touch %t2.txt + +RUN: rm -f %t.warning.ar +RUN: llvm-ar r %t.warning.ar %t1.txt %t2.txt 2>&1 \ +RUN: | FileCheck %s -DOUTPUT=%t.warning.ar + +CHECK: warning: creating [[OUTPUT]] + +RUN: rm -f %t.supressed.ar +RUN: llvm-ar cr %t.supressed.ar %t1.txt %t2.txt 2>&1 | count 0 diff --git a/llvm/test/tools/llvm-ar/dash-before-letter.test b/llvm/test/tools/llvm-ar/dash-before-letter.test index 3002a6de7ef90..43930b1217367 100644 --- a/llvm/test/tools/llvm-ar/dash-before-letter.test +++ b/llvm/test/tools/llvm-ar/dash-before-letter.test @@ -1,12 +1,12 @@ -# Test the use of dash before key letters. - -RUN: touch %t1.txt -RUN: touch %t2.txt - -RUN: rm -f %t.ar -RUN: llvm-ar s -cr %t.ar %t1.txt -RUN: llvm-ar -r -s %t.ar %t2.txt -s -RUN: llvm-ar -t %t.ar | FileCheck %s - -CHECK: 1.txt -CHECK-NEXT: 2.txt +# Test the use of dash before key letters. + +RUN: touch %t1.txt +RUN: touch %t2.txt + +RUN: rm -f %t.ar +RUN: llvm-ar s -cr %t.ar %t1.txt +RUN: llvm-ar -r -s %t.ar %t2.txt -s +RUN: llvm-ar -t %t.ar | FileCheck %s + +CHECK: 1.txt +CHECK-NEXT: 2.txt diff --git a/llvm/test/tools/llvm-ar/delete.test b/llvm/test/tools/llvm-ar/delete.test index d5ab797664173..20e58b9a45b57 100644 --- a/llvm/test/tools/llvm-ar/delete.test +++ b/llvm/test/tools/llvm-ar/delete.test @@ -1,67 +1,67 @@ -## Test the deletion of members and that symbols are removed from the symbol table. - -# RUN: yaml2obj %s -o %t-delete.o --docnum=1 -# RUN: yaml2obj %s -o %t-keep.o --docnum=2 -# RUN: touch %t1.txt -# RUN: touch %t2.txt - -## Add file: -# RUN: rm -f %t.a -# RUN: llvm-ar rc %t.a %t1.txt %t-delete.o %t-keep.o %t2.txt -# RUN: llvm-nm --print-armap %t.a \ -# RUN: | FileCheck %s --check-prefix=SYMBOL-ADDED -# RUN: llvm-ar t %t.a | FileCheck %s --check-prefix=FILE-ADDED - -# SYMBOL-ADDED: symbol1 -# SYMBOL-ADDED-NEXT: symbol2 - -# FILE-ADDED: 1.txt -# FILE-ADDED-NEXT: delete.o -# FILE-ADDED-NEXT: keep.o -# FILE-ADDED-NEXT: 2.txt - -## Delete file that is not a member: -# RUN: cp %t.a %t-archive-copy.a -# RUN: llvm-ar d %t.a t/missing.o -# RUN: cmp %t.a %t-archive-copy.a - -## Delete file: -# RUN: llvm-ar d %t.a %t-delete.o -# RUN: llvm-nm --print-armap %t.a \ -# RUN: | FileCheck %s --check-prefix=SYMBOL-DELETED --implicit-check-not symbol1 -# RUN: llvm-ar t %t.a \ -# RUN: | FileCheck %s --check-prefix=FILE-DELETED --implicit-check-not delete.o - -# SYMBOL-DELETED: symbol2 - -# FILE-DELETED: 1.txt -# FILE-DELETED-NEXT: keep.o -# FILE-DELETED-NEXT: 2.txt - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 -Sections: - - Name: .text - Type: SHT_PROGBITS -Symbols: - - Name: symbol1 - Binding: STB_GLOBAL - Section: .text - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 -Sections: - - Name: .text - Type: SHT_PROGBITS -Symbols: - - Name: symbol2 - Binding: STB_GLOBAL - Section: .text +## Test the deletion of members and that symbols are removed from the symbol table. + +# RUN: yaml2obj %s -o %t-delete.o --docnum=1 +# RUN: yaml2obj %s -o %t-keep.o --docnum=2 +# RUN: touch %t1.txt +# RUN: touch %t2.txt + +## Add file: +# RUN: rm -f %t.a +# RUN: llvm-ar rc %t.a %t1.txt %t-delete.o %t-keep.o %t2.txt +# RUN: llvm-nm --print-armap %t.a \ +# RUN: | FileCheck %s --check-prefix=SYMBOL-ADDED +# RUN: llvm-ar t %t.a | FileCheck %s --check-prefix=FILE-ADDED + +# SYMBOL-ADDED: symbol1 +# SYMBOL-ADDED-NEXT: symbol2 + +# FILE-ADDED: 1.txt +# FILE-ADDED-NEXT: delete.o +# FILE-ADDED-NEXT: keep.o +# FILE-ADDED-NEXT: 2.txt + +## Delete file that is not a member: +# RUN: cp %t.a %t-archive-copy.a +# RUN: llvm-ar d %t.a t/missing.o +# RUN: cmp %t.a %t-archive-copy.a + +## Delete file: +# RUN: llvm-ar d %t.a %t-delete.o +# RUN: llvm-nm --print-armap %t.a \ +# RUN: | FileCheck %s --check-prefix=SYMBOL-DELETED --implicit-check-not symbol1 +# RUN: llvm-ar t %t.a \ +# RUN: | FileCheck %s --check-prefix=FILE-DELETED --implicit-check-not delete.o + +# SYMBOL-DELETED: symbol2 + +# FILE-DELETED: 1.txt +# FILE-DELETED-NEXT: keep.o +# FILE-DELETED-NEXT: 2.txt + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbol1 + Binding: STB_GLOBAL + Section: .text + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbol2 + Binding: STB_GLOBAL + Section: .text diff --git a/llvm/test/tools/llvm-ar/insert-after.test b/llvm/test/tools/llvm-ar/insert-after.test index cd8ee9409c6b4..7c01e736149aa 100644 --- a/llvm/test/tools/llvm-ar/insert-after.test +++ b/llvm/test/tools/llvm-ar/insert-after.test @@ -1,59 +1,59 @@ -## Test inserting files after a file. - -RUN: touch %t1.txt -RUN: touch %t2.txt -RUN: touch %t3.txt -RUN: touch %t4.txt - -# Insert one file: -RUN: rm -f %t-one.a -RUN: llvm-ar rc %t-one.a %t1.txt %t2.txt -RUN: llvm-ar ra %t1.txt %t-one.a %t3.txt -RUN: llvm-ar t %t-one.a | FileCheck %s --check-prefix=ONE - -ONE: 1.txt -ONE-NEXT: 3.txt -ONE-NEXT: 2.txt - -# Insert file at back: -RUN: rm -f %t-back.a -RUN: llvm-ar rc %t-back.a %t1.txt %t2.txt -RUN: llvm-ar ra %t2.txt %t-back.a %t3.txt -RUN: llvm-ar t %t-back.a | FileCheck %s --check-prefix=BACK - -BACK: 1.txt -BACK-NEXT: 2.txt -BACK-NEXT: 3.txt - -# Insert multiple files: -RUN: rm -f %t-multiple.a -RUN: llvm-ar rc %t-multiple.a %t1.txt %t2.txt -RUN: llvm-ar ra %t1.txt %t-multiple.a %t4.txt %t3.txt -RUN: llvm-ar t %t-multiple.a | FileCheck %s --check-prefix=MULTIPLE - -MULTIPLE: 1.txt -MULTIPLE-NEXT: 4.txt -MULTIPLE-NEXT: 3.txt -MULTIPLE-NEXT: 2.txt - -# Insert after invalid file: -RUN: rm -f %t-invalid.a -RUN: llvm-ar rc %t-invalid.a %t1.txt %t2.txt %t3.txt -RUN: not llvm-ar ra invalid.txt %t-invalid.a %t2.txt 2>&1 \ -RUN: | FileCheck %s --check-prefix=ERROR -RUN: llvm-ar t %t-invalid.a | FileCheck %s --check-prefix=INVALID - -ERROR: error: Insertion point not found. -INVALID: 1.txt -INVALID-NEXT: 2.txt -INVALID-NEXT: 3.txt - -# Insert file at the same position: -RUN: rm -f %t-position.a -RUN: llvm-ar rc %t-position.a %t1.txt %t2.txt %t3.txt -RUN: llvm-ar ra %t1.txt %t-position.a %t2.txt -RUN: llvm-ar t %t-position.a | FileCheck %s --check-prefix=POSITION - -POSITION: 1.txt -POSITION-NEXT: 2.txt -POSITION-NEXT: 3.txt +## Test inserting files after a file. + +RUN: touch %t1.txt +RUN: touch %t2.txt +RUN: touch %t3.txt +RUN: touch %t4.txt + +# Insert one file: +RUN: rm -f %t-one.a +RUN: llvm-ar rc %t-one.a %t1.txt %t2.txt +RUN: llvm-ar ra %t1.txt %t-one.a %t3.txt +RUN: llvm-ar t %t-one.a | FileCheck %s --check-prefix=ONE + +ONE: 1.txt +ONE-NEXT: 3.txt +ONE-NEXT: 2.txt + +# Insert file at back: +RUN: rm -f %t-back.a +RUN: llvm-ar rc %t-back.a %t1.txt %t2.txt +RUN: llvm-ar ra %t2.txt %t-back.a %t3.txt +RUN: llvm-ar t %t-back.a | FileCheck %s --check-prefix=BACK + +BACK: 1.txt +BACK-NEXT: 2.txt +BACK-NEXT: 3.txt + +# Insert multiple files: +RUN: rm -f %t-multiple.a +RUN: llvm-ar rc %t-multiple.a %t1.txt %t2.txt +RUN: llvm-ar ra %t1.txt %t-multiple.a %t4.txt %t3.txt +RUN: llvm-ar t %t-multiple.a | FileCheck %s --check-prefix=MULTIPLE + +MULTIPLE: 1.txt +MULTIPLE-NEXT: 4.txt +MULTIPLE-NEXT: 3.txt +MULTIPLE-NEXT: 2.txt + +# Insert after invalid file: +RUN: rm -f %t-invalid.a +RUN: llvm-ar rc %t-invalid.a %t1.txt %t2.txt %t3.txt +RUN: not llvm-ar ra invalid.txt %t-invalid.a %t2.txt 2>&1 \ +RUN: | FileCheck %s --check-prefix=ERROR +RUN: llvm-ar t %t-invalid.a | FileCheck %s --check-prefix=INVALID + +ERROR: error: Insertion point not found. +INVALID: 1.txt +INVALID-NEXT: 2.txt +INVALID-NEXT: 3.txt + +# Insert file at the same position: +RUN: rm -f %t-position.a +RUN: llvm-ar rc %t-position.a %t1.txt %t2.txt %t3.txt +RUN: llvm-ar ra %t1.txt %t-position.a %t2.txt +RUN: llvm-ar t %t-position.a | FileCheck %s --check-prefix=POSITION + +POSITION: 1.txt +POSITION-NEXT: 2.txt +POSITION-NEXT: 3.txt diff --git a/llvm/test/tools/llvm-ar/insert-before.test b/llvm/test/tools/llvm-ar/insert-before.test index 61505d8162aad..4abf3a000287c 100644 --- a/llvm/test/tools/llvm-ar/insert-before.test +++ b/llvm/test/tools/llvm-ar/insert-before.test @@ -1,59 +1,59 @@ -## Test inserting files before a file. - -RUN: touch %t1.txt -RUN: touch %t2.txt -RUN: touch %t3.txt -RUN: touch %t4.txt - -# Insert one file: -RUN: rm -f %t-one.a -RUN: llvm-ar rc %t-one.a %t1.txt %t2.txt -RUN: llvm-ar rb %t2.txt %t-one.a %t3.txt -RUN: llvm-ar t %t-one.a | FileCheck %s --check-prefix=ONE - -ONE: 1.txt -ONE-NEXT: 3.txt -ONE-NEXT: 2.txt - -# Insert file at front: -RUN: rm -f %t-front.a -RUN: llvm-ar rc %t-front.a %t1.txt %t2.txt -RUN: llvm-ar rb %t1.txt %t-front.a %t3.txt -RUN: llvm-ar t %t-front.a | FileCheck %s --check-prefix=FRONT - -FRONT: 3.txt -FRONT-NEXT: 1.txt -FRONT-NEXT: 2.txt - -# Insert multiple files: -RUN: rm -f %t-multiple.a -RUN: llvm-ar rc %t-multiple.a %t1.txt %t2.txt -RUN: llvm-ar rb %t2.txt %t-multiple.a %t4.txt %t3.txt -RUN: llvm-ar t %t-multiple.a | FileCheck %s --check-prefix=MULTIPLE - -MULTIPLE: 1.txt -MULTIPLE-NEXT: 4.txt -MULTIPLE-NEXT: 3.txt -MULTIPLE-NEXT: 2.txt - -# Insert before an invalid file: -RUN: rm -f %t-invalid.a -RUN: llvm-ar rc %t-invalid.a %t1.txt %t2.txt %t3.txt -RUN: not llvm-ar rb invalid.txt %t-invalid.a %t2.txt 2>&1 \ -RUN: | FileCheck %s --check-prefix=ERROR -RUN: llvm-ar t %t-invalid.a | FileCheck %s --check-prefix=INVALID - -ERROR: error: Insertion point not found. -INVALID: 1.txt -INVALID-NEXT: 2.txt -INVALID-NEXT: 3.txt - -# Insert file at the same position: -RUN: rm -f %t-position.a -RUN: llvm-ar rc %t-position.a %t1.txt %t2.txt %t3.txt -RUN: llvm-ar rb %t3.txt %t-position.a %t2.txt -RUN: llvm-ar t %t-position.a | FileCheck %s --check-prefix=POSITION - -POSITION: 1.txt -POSITION-NEXT: 2.txt -POSITION-NEXT: 3.txt +## Test inserting files before a file. + +RUN: touch %t1.txt +RUN: touch %t2.txt +RUN: touch %t3.txt +RUN: touch %t4.txt + +# Insert one file: +RUN: rm -f %t-one.a +RUN: llvm-ar rc %t-one.a %t1.txt %t2.txt +RUN: llvm-ar rb %t2.txt %t-one.a %t3.txt +RUN: llvm-ar t %t-one.a | FileCheck %s --check-prefix=ONE + +ONE: 1.txt +ONE-NEXT: 3.txt +ONE-NEXT: 2.txt + +# Insert file at front: +RUN: rm -f %t-front.a +RUN: llvm-ar rc %t-front.a %t1.txt %t2.txt +RUN: llvm-ar rb %t1.txt %t-front.a %t3.txt +RUN: llvm-ar t %t-front.a | FileCheck %s --check-prefix=FRONT + +FRONT: 3.txt +FRONT-NEXT: 1.txt +FRONT-NEXT: 2.txt + +# Insert multiple files: +RUN: rm -f %t-multiple.a +RUN: llvm-ar rc %t-multiple.a %t1.txt %t2.txt +RUN: llvm-ar rb %t2.txt %t-multiple.a %t4.txt %t3.txt +RUN: llvm-ar t %t-multiple.a | FileCheck %s --check-prefix=MULTIPLE + +MULTIPLE: 1.txt +MULTIPLE-NEXT: 4.txt +MULTIPLE-NEXT: 3.txt +MULTIPLE-NEXT: 2.txt + +# Insert before an invalid file: +RUN: rm -f %t-invalid.a +RUN: llvm-ar rc %t-invalid.a %t1.txt %t2.txt %t3.txt +RUN: not llvm-ar rb invalid.txt %t-invalid.a %t2.txt 2>&1 \ +RUN: | FileCheck %s --check-prefix=ERROR +RUN: llvm-ar t %t-invalid.a | FileCheck %s --check-prefix=INVALID + +ERROR: error: Insertion point not found. +INVALID: 1.txt +INVALID-NEXT: 2.txt +INVALID-NEXT: 3.txt + +# Insert file at the same position: +RUN: rm -f %t-position.a +RUN: llvm-ar rc %t-position.a %t1.txt %t2.txt %t3.txt +RUN: llvm-ar rb %t3.txt %t-position.a %t2.txt +RUN: llvm-ar t %t-position.a | FileCheck %s --check-prefix=POSITION + +POSITION: 1.txt +POSITION-NEXT: 2.txt +POSITION-NEXT: 3.txt diff --git a/llvm/test/tools/llvm-ar/move-after.test b/llvm/test/tools/llvm-ar/move-after.test index fb61f2aa118f6..6a4360c3cd0a4 100644 --- a/llvm/test/tools/llvm-ar/move-after.test +++ b/llvm/test/tools/llvm-ar/move-after.test @@ -1,59 +1,59 @@ -## Test moving files after a file. - -RUN: touch %t1.txt -RUN: touch %t2.txt -RUN: touch %t3.txt -RUN: touch %t4.txt - -# Move one file: -RUN: rm -f %t-one.ar -RUN: llvm-ar rc %t-one.a %t1.txt %t2.txt %t3.txt -RUN: llvm-ar ma %t1.txt %t-one.a %t3.txt -RUN: llvm-ar t %t-one.a | FileCheck %s --check-prefix=ONE - -ONE: 1.txt -ONE-NEXT: 3.txt -ONE-NEXT: 2.txt - -# Move file to back: -RUN: rm -f %t-back.ar -RUN: llvm-ar rc %t-back.a %t1.txt %t2.txt %t3.txt -RUN: llvm-ar ma %t2.txt %t-back.a %t1.txt -RUN: llvm-ar t %t-back.a | FileCheck %s --check-prefix=BACK - -BACK: 2.txt -BACK-NEXT: 1.txt -BACK-NEXT: 3.txt - -# Move multiple files: -RUN: rm -f %t-multiple.ar -RUN: llvm-ar rc %t-multiple.a %t1.txt %t2.txt %t3.txt %t4.txt -RUN: llvm-ar ma %t1.txt %t-multiple.a %t4.txt %t3.txt -RUN: llvm-ar t %t-multiple.a | FileCheck %s --check-prefix=MULTIPLE - -MULTIPLE: 1.txt -MULTIPLE-NEXT: 3.txt -MULTIPLE-NEXT: 4.txt -MULTIPLE-NEXT: 2.txt - -# Move after invalid file: -RUN: rm -f %t-invalid.ar -RUN: llvm-ar rc %t-invalid.a %t1.txt %t2.txt %t3.txt -RUN: not llvm-ar ma invalid.txt %t-invalid.a %t2.txt 2>&1 \ -RUN: | FileCheck %s --check-prefix=ERROR -RUN: llvm-ar t %t-invalid.a | FileCheck %s --check-prefix=INVALID - -ERROR: error: Insertion point not found. -INVALID: 1.txt -INVALID-NEXT: 2.txt -INVALID-NEXT: 3.txt - -# Move file to the same position: -RUN: rm -f %t-position.ar -RUN: llvm-ar rc %t-position.a %t1.txt %t2.txt %t3.txt -RUN: llvm-ar ma %t1.txt %t-position.a %t2.txt -RUN: llvm-ar t %t-position.a | FileCheck %s --check-prefix=POSITION - -POSITION: 1.txt -POSITION-NEXT: 2.txt -POSITION-NEXT: 3.txt +## Test moving files after a file. + +RUN: touch %t1.txt +RUN: touch %t2.txt +RUN: touch %t3.txt +RUN: touch %t4.txt + +# Move one file: +RUN: rm -f %t-one.ar +RUN: llvm-ar rc %t-one.a %t1.txt %t2.txt %t3.txt +RUN: llvm-ar ma %t1.txt %t-one.a %t3.txt +RUN: llvm-ar t %t-one.a | FileCheck %s --check-prefix=ONE + +ONE: 1.txt +ONE-NEXT: 3.txt +ONE-NEXT: 2.txt + +# Move file to back: +RUN: rm -f %t-back.ar +RUN: llvm-ar rc %t-back.a %t1.txt %t2.txt %t3.txt +RUN: llvm-ar ma %t2.txt %t-back.a %t1.txt +RUN: llvm-ar t %t-back.a | FileCheck %s --check-prefix=BACK + +BACK: 2.txt +BACK-NEXT: 1.txt +BACK-NEXT: 3.txt + +# Move multiple files: +RUN: rm -f %t-multiple.ar +RUN: llvm-ar rc %t-multiple.a %t1.txt %t2.txt %t3.txt %t4.txt +RUN: llvm-ar ma %t1.txt %t-multiple.a %t4.txt %t3.txt +RUN: llvm-ar t %t-multiple.a | FileCheck %s --check-prefix=MULTIPLE + +MULTIPLE: 1.txt +MULTIPLE-NEXT: 3.txt +MULTIPLE-NEXT: 4.txt +MULTIPLE-NEXT: 2.txt + +# Move after invalid file: +RUN: rm -f %t-invalid.ar +RUN: llvm-ar rc %t-invalid.a %t1.txt %t2.txt %t3.txt +RUN: not llvm-ar ma invalid.txt %t-invalid.a %t2.txt 2>&1 \ +RUN: | FileCheck %s --check-prefix=ERROR +RUN: llvm-ar t %t-invalid.a | FileCheck %s --check-prefix=INVALID + +ERROR: error: Insertion point not found. +INVALID: 1.txt +INVALID-NEXT: 2.txt +INVALID-NEXT: 3.txt + +# Move file to the same position: +RUN: rm -f %t-position.ar +RUN: llvm-ar rc %t-position.a %t1.txt %t2.txt %t3.txt +RUN: llvm-ar ma %t1.txt %t-position.a %t2.txt +RUN: llvm-ar t %t-position.a | FileCheck %s --check-prefix=POSITION + +POSITION: 1.txt +POSITION-NEXT: 2.txt +POSITION-NEXT: 3.txt diff --git a/llvm/test/tools/llvm-ar/move-before.test b/llvm/test/tools/llvm-ar/move-before.test index b5fefec29557a..306a887e26b89 100644 --- a/llvm/test/tools/llvm-ar/move-before.test +++ b/llvm/test/tools/llvm-ar/move-before.test @@ -1,69 +1,69 @@ -## Test moving files after a file. - -RUN: touch %t1.txt -RUN: touch %t2.txt -RUN: touch %t3.txt -RUN: touch %t4.txt - -# Move one file: -RUN: rm -f %t-one.ar -RUN: llvm-ar rc %t-one.a %t1.txt %t2.txt %t3.txt -RUN: llvm-ar mb %t2.txt %t-one.a %t3.txt -RUN: llvm-ar t %t-one.a | FileCheck %s --check-prefix=ONE - -ONE: 1.txt -ONE-NEXT: 3.txt -ONE-NEXT: 2.txt - -# Move file to front: -RUN: rm -f %t-front.ar -RUN: llvm-ar rc %t-front.ar %t1.txt %t2.txt %t3.txt -RUN: llvm-ar mb %t1.txt %t-front.ar %t3.txt -RUN: llvm-ar t %t-front.ar | FileCheck %s --check-prefix=FRONT - -FRONT: 3.txt -FRONT-NEXT: 1.txt -FRONT-NEXT: 2.txt - -# Move multiple files: -RUN: rm -f %t-multiple.ar -RUN: llvm-ar rc %t-multiple.a %t1.txt %t2.txt %t3.txt %t4.txt -RUN: llvm-ar mb %t2.txt %t-multiple.a %t4.txt %t3.txt -RUN: llvm-ar t %t-multiple.a | FileCheck %s --check-prefix=MULTIPLE - -MULTIPLE: 1.txt -MULTIPLE-NEXT: 3.txt -MULTIPLE-NEXT: 4.txt -MULTIPLE-NEXT: 2.txt - -# Move before invalid file: -RUN: rm -f %t-invalid.ar -RUN: llvm-ar rc %t-invalid.a %t1.txt %t2.txt %t3.txt -RUN: not llvm-ar mb invalid.txt %t-invalid.a %t2.txt 2>&1 \ -RUN: | FileCheck %s --check-prefix=ERROR -RUN: llvm-ar t %t-invalid.a | FileCheck %s --check-prefix=INVALID - -ERROR: error: Insertion point not found. -INVALID: 1.txt -INVALID-NEXT: 2.txt -INVALID-NEXT: 3.txt - -# Move file to the same position: -RUN: rm -f %t-position.ar -RUN: llvm-ar rc %t-position.a %t1.txt %t2.txt %t3.txt -RUN: llvm-ar mb %t3.txt %t-position.a %t2.txt -RUN: llvm-ar t %t-position.a | FileCheck %s --check-prefix=POSITION - -POSITION: 1.txt -POSITION-NEXT: 2.txt -POSITION-NEXT: 3.txt - -# Move file after itself: -RUN: rm -f %t-same.ar -RUN: llvm-ar rc %t-same.ar %t1.txt %t2.txt %t3.txt -RUN: llvm-ar mb %t2.txt %t-same.ar %t2.txt -RUN: llvm-ar t %t-same.ar | FileCheck %s --check-prefix=SAME - -SAME: 1.txt -SAME-NEXT: 2.txt -SAME-NEXT: 3.txt +## Test moving files after a file. + +RUN: touch %t1.txt +RUN: touch %t2.txt +RUN: touch %t3.txt +RUN: touch %t4.txt + +# Move one file: +RUN: rm -f %t-one.ar +RUN: llvm-ar rc %t-one.a %t1.txt %t2.txt %t3.txt +RUN: llvm-ar mb %t2.txt %t-one.a %t3.txt +RUN: llvm-ar t %t-one.a | FileCheck %s --check-prefix=ONE + +ONE: 1.txt +ONE-NEXT: 3.txt +ONE-NEXT: 2.txt + +# Move file to front: +RUN: rm -f %t-front.ar +RUN: llvm-ar rc %t-front.ar %t1.txt %t2.txt %t3.txt +RUN: llvm-ar mb %t1.txt %t-front.ar %t3.txt +RUN: llvm-ar t %t-front.ar | FileCheck %s --check-prefix=FRONT + +FRONT: 3.txt +FRONT-NEXT: 1.txt +FRONT-NEXT: 2.txt + +# Move multiple files: +RUN: rm -f %t-multiple.ar +RUN: llvm-ar rc %t-multiple.a %t1.txt %t2.txt %t3.txt %t4.txt +RUN: llvm-ar mb %t2.txt %t-multiple.a %t4.txt %t3.txt +RUN: llvm-ar t %t-multiple.a | FileCheck %s --check-prefix=MULTIPLE + +MULTIPLE: 1.txt +MULTIPLE-NEXT: 3.txt +MULTIPLE-NEXT: 4.txt +MULTIPLE-NEXT: 2.txt + +# Move before invalid file: +RUN: rm -f %t-invalid.ar +RUN: llvm-ar rc %t-invalid.a %t1.txt %t2.txt %t3.txt +RUN: not llvm-ar mb invalid.txt %t-invalid.a %t2.txt 2>&1 \ +RUN: | FileCheck %s --check-prefix=ERROR +RUN: llvm-ar t %t-invalid.a | FileCheck %s --check-prefix=INVALID + +ERROR: error: Insertion point not found. +INVALID: 1.txt +INVALID-NEXT: 2.txt +INVALID-NEXT: 3.txt + +# Move file to the same position: +RUN: rm -f %t-position.ar +RUN: llvm-ar rc %t-position.a %t1.txt %t2.txt %t3.txt +RUN: llvm-ar mb %t3.txt %t-position.a %t2.txt +RUN: llvm-ar t %t-position.a | FileCheck %s --check-prefix=POSITION + +POSITION: 1.txt +POSITION-NEXT: 2.txt +POSITION-NEXT: 3.txt + +# Move file after itself: +RUN: rm -f %t-same.ar +RUN: llvm-ar rc %t-same.ar %t1.txt %t2.txt %t3.txt +RUN: llvm-ar mb %t2.txt %t-same.ar %t2.txt +RUN: llvm-ar t %t-same.ar | FileCheck %s --check-prefix=SAME + +SAME: 1.txt +SAME-NEXT: 2.txt +SAME-NEXT: 3.txt diff --git a/llvm/test/tools/llvm-ar/move.test b/llvm/test/tools/llvm-ar/move.test index c5028f52e405c..268fbf94cf19f 100644 --- a/llvm/test/tools/llvm-ar/move.test +++ b/llvm/test/tools/llvm-ar/move.test @@ -1,129 +1,129 @@ -## Test the move command without modifiers moves members to the end - -# RUN: rm -rf %t && mkdir -p %t -# RUN: yaml2obj %s -o %t/1.o --docnum=1 -# RUN: yaml2obj %s -o %t/2.o --docnum=2 -# RUN: yaml2obj %s -o %t/3.o --docnum=3 - -## Move single member: -# RUN: llvm-ar rc %t/single.a %t/1.o %t/2.o %t/3.o -# RUN: llvm-ar m %t/single.a %t/1.o -# RUN: llvm-ar t %t/single.a \ -# RUN: | FileCheck %s --check-prefix=SINGLE --match-full-lines --implicit-check-not {{.}} - -# SINGLE: 2.o -# SINGLE-NEXT: 3.o -# SINGLE-NEXT: 1.o - -# RUN: llvm-nm --print-armap %t/single.a \ -# RUN: | FileCheck %s --check-prefix=SINGLE-SYM - -# SINGLE-SYM: symbol2 -# SINGLE-SYM-NEXT: symbol3 -# SINGLE-SYM-NEXT: symbol1 - -## Move multiple members: -# RUN: llvm-ar rc %t/multiple.a %t/1.o %t/2.o %t/3.o -# RUN: llvm-ar m %t/multiple.a %t/1.o %t/2.o -# RUN: llvm-ar t %t/multiple.a \ -# RUN: | FileCheck %s --check-prefix=MULTIPLE --match-full-lines --implicit-check-not {{.}} - -# MULTIPLE: 3.o -# MULTIPLE-NEXT: 1.o -# MULTIPLE-NEXT: 2.o - -# RUN: llvm-nm --print-armap %t/multiple.a \ -# RUN: | FileCheck %s --check-prefix=MULTIPLE-SYM - -# MULTIPLE-SYM: symbol3 -# MULTIPLE-SYM-NEXT: symbol1 -# MULTIPLE-SYM-NEXT: symbol2 - -## Move same member: -# RUN: llvm-ar rc %t/same.a %t/1.o %t/2.o %t/3.o -# RUN: llvm-ar m %t/same.a %t/1.o %t/1.o -# RUN: llvm-ar t %t/same.a \ -# RUN: | FileCheck %s --check-prefix=SAME -DFILE=%t/2.o - -# SAME: 2.o -# SAME-NEXT: 3.o -# SAME-NEXT: 1.o - -# RUN: llvm-nm --print-armap %t/same.a \ -# RUN: | FileCheck %s --check-prefix=SAME-SYM - -# SAME-SYM: symbol2 -# SAME-SYM-NEXT: symbol3 -# SAME-SYM-NEXT: symbol1 - -## Move without member: -# RUN: llvm-ar rc %t/without.a %t/1.o %t/2.o %t/3.o -# RUN: llvm-ar m %t/without.a -# RUN: llvm-ar t %t/without.a \ -# RUN: | FileCheck %s --match-full-lines --check-prefix=WITHOUT --implicit-check-not {{.}} - -# WITHOUT: 1.o -# WITHOUT-NEXT: 2.o -# WITHOUT-NEXT: 3.o - -# RUN: llvm-nm --print-armap %t/without.a \ -# RUN: | FileCheck %s --check-prefix=WITHOUT-SYM - -# WITHOUT-SYM: symbol1 -# WITHOUT-SYM-NEXT: symbol2 -# WITHOUT-SYM-NEXT: symbol3 - -## No archive: -# RUN: not llvm-ar m 2>&1 \ -# RUN: | FileCheck %s --check-prefix=NO-ARCHIVE - -# NO-ARCHIVE: error: An archive name must be specified. - -## Member does not exist: -# RUN: llvm-ar rc %t/missing.a %t/1.o %t/2.o %t/3.o -# RUN: not llvm-ar m %t/missing.a %t/missing.txt 2>&1 \ -# RUN: | FileCheck %s --check-prefix=MISSING-FILE -DFILE=%t/missing.txt - -# MISSING-FILE: error: [[FILE]]: {{[nN]}}o such file or directory. - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 -Sections: - - Name: .text - Type: SHT_PROGBITS -Symbols: - - Name: symbol1 - Binding: STB_GLOBAL - Section: .text - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 -Sections: - - Name: .text - Type: SHT_PROGBITS -Symbols: - - Name: symbol2 - Binding: STB_GLOBAL - Section: .text - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 -Sections: - - Name: .text - Type: SHT_PROGBITS -Symbols: - - Name: symbol3 - Binding: STB_GLOBAL - Section: .text +## Test the move command without modifiers moves members to the end + +# RUN: rm -rf %t && mkdir -p %t +# RUN: yaml2obj %s -o %t/1.o --docnum=1 +# RUN: yaml2obj %s -o %t/2.o --docnum=2 +# RUN: yaml2obj %s -o %t/3.o --docnum=3 + +## Move single member: +# RUN: llvm-ar rc %t/single.a %t/1.o %t/2.o %t/3.o +# RUN: llvm-ar m %t/single.a %t/1.o +# RUN: llvm-ar t %t/single.a \ +# RUN: | FileCheck %s --check-prefix=SINGLE --match-full-lines --implicit-check-not {{.}} + +# SINGLE: 2.o +# SINGLE-NEXT: 3.o +# SINGLE-NEXT: 1.o + +# RUN: llvm-nm --print-armap %t/single.a \ +# RUN: | FileCheck %s --check-prefix=SINGLE-SYM + +# SINGLE-SYM: symbol2 +# SINGLE-SYM-NEXT: symbol3 +# SINGLE-SYM-NEXT: symbol1 + +## Move multiple members: +# RUN: llvm-ar rc %t/multiple.a %t/1.o %t/2.o %t/3.o +# RUN: llvm-ar m %t/multiple.a %t/1.o %t/2.o +# RUN: llvm-ar t %t/multiple.a \ +# RUN: | FileCheck %s --check-prefix=MULTIPLE --match-full-lines --implicit-check-not {{.}} + +# MULTIPLE: 3.o +# MULTIPLE-NEXT: 1.o +# MULTIPLE-NEXT: 2.o + +# RUN: llvm-nm --print-armap %t/multiple.a \ +# RUN: | FileCheck %s --check-prefix=MULTIPLE-SYM + +# MULTIPLE-SYM: symbol3 +# MULTIPLE-SYM-NEXT: symbol1 +# MULTIPLE-SYM-NEXT: symbol2 + +## Move same member: +# RUN: llvm-ar rc %t/same.a %t/1.o %t/2.o %t/3.o +# RUN: llvm-ar m %t/same.a %t/1.o %t/1.o +# RUN: llvm-ar t %t/same.a \ +# RUN: | FileCheck %s --check-prefix=SAME -DFILE=%t/2.o + +# SAME: 2.o +# SAME-NEXT: 3.o +# SAME-NEXT: 1.o + +# RUN: llvm-nm --print-armap %t/same.a \ +# RUN: | FileCheck %s --check-prefix=SAME-SYM + +# SAME-SYM: symbol2 +# SAME-SYM-NEXT: symbol3 +# SAME-SYM-NEXT: symbol1 + +## Move without member: +# RUN: llvm-ar rc %t/without.a %t/1.o %t/2.o %t/3.o +# RUN: llvm-ar m %t/without.a +# RUN: llvm-ar t %t/without.a \ +# RUN: | FileCheck %s --match-full-lines --check-prefix=WITHOUT --implicit-check-not {{.}} + +# WITHOUT: 1.o +# WITHOUT-NEXT: 2.o +# WITHOUT-NEXT: 3.o + +# RUN: llvm-nm --print-armap %t/without.a \ +# RUN: | FileCheck %s --check-prefix=WITHOUT-SYM + +# WITHOUT-SYM: symbol1 +# WITHOUT-SYM-NEXT: symbol2 +# WITHOUT-SYM-NEXT: symbol3 + +## No archive: +# RUN: not llvm-ar m 2>&1 \ +# RUN: | FileCheck %s --check-prefix=NO-ARCHIVE + +# NO-ARCHIVE: error: An archive name must be specified. + +## Member does not exist: +# RUN: llvm-ar rc %t/missing.a %t/1.o %t/2.o %t/3.o +# RUN: not llvm-ar m %t/missing.a %t/missing.txt 2>&1 \ +# RUN: | FileCheck %s --check-prefix=MISSING-FILE -DFILE=%t/missing.txt + +# MISSING-FILE: error: [[FILE]]: {{[nN]}}o such file or directory. + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbol1 + Binding: STB_GLOBAL + Section: .text + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbol2 + Binding: STB_GLOBAL + Section: .text + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbol3 + Binding: STB_GLOBAL + Section: .text diff --git a/llvm/test/tools/llvm-ar/mri-addlib.test b/llvm/test/tools/llvm-ar/mri-addlib.test index c297653e2abb1..cf62acbf0b063 100644 --- a/llvm/test/tools/llvm-ar/mri-addlib.test +++ b/llvm/test/tools/llvm-ar/mri-addlib.test @@ -1,45 +1,45 @@ -## Test the ADDLIB MRI command. - -# RUN: rm -rf %t && mkdir -p %t -# RUN: yaml2obj %s -o %t/f.o -# RUN: llvm-ar r %t/f.a %t/f.o - -## Merge contents of archives. -# RUN: echo "CREATE %t/addlib.a" > %t/addlib.mri -# RUN: echo "ADDLIB %t/f.a" >> %t/addlib.mri -# RUN: echo "SAVE" >> %t/addlib.mri -# RUN: llvm-ar -M < %t/addlib.mri -# RUN: llvm-nm --print-armap %t/addlib.a | FileCheck --check-prefix=SYMS %s -# RUN: llvm-ar t %t/addlib.a | FileCheck --check-prefix=FILES %s - -# SYMS: f in {{.*}} -# FILES: f.o - -## ADDLIB with non-archive file. -# RUN: echo "CREATE %t/badlib.a" > %t/badlib.mri -# RUN: echo "ADDLIB %s" >> %t/badlib.mri -# RUN: echo "SAVE" >> %t/badlib.mri -# RUN: not llvm-ar -M < %t/badlib.mri 2>&1 | FileCheck --check-prefix=PARSE %s -# RUN: not ls %t/badlib.a - -# PARSE: Could not parse library - -## No create command. -# RUN: echo "ADDLIB %t/f.a" > %t/nocreate.mri -# RUN: echo "SAVE" >> %t/nocreate.mri -# RUN: not llvm-ar -M < %t/nocreate.mri - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 -Sections: - - Name: .text - Type: SHT_PROGBITS -Symbols: - - Name: f - Binding: STB_GLOBAL - Section: .text -... +## Test the ADDLIB MRI command. + +# RUN: rm -rf %t && mkdir -p %t +# RUN: yaml2obj %s -o %t/f.o +# RUN: llvm-ar r %t/f.a %t/f.o + +## Merge contents of archives. +# RUN: echo "CREATE %t/addlib.a" > %t/addlib.mri +# RUN: echo "ADDLIB %t/f.a" >> %t/addlib.mri +# RUN: echo "SAVE" >> %t/addlib.mri +# RUN: llvm-ar -M < %t/addlib.mri +# RUN: llvm-nm --print-armap %t/addlib.a | FileCheck --check-prefix=SYMS %s +# RUN: llvm-ar t %t/addlib.a | FileCheck --check-prefix=FILES %s + +# SYMS: f in {{.*}} +# FILES: f.o + +## ADDLIB with non-archive file. +# RUN: echo "CREATE %t/badlib.a" > %t/badlib.mri +# RUN: echo "ADDLIB %s" >> %t/badlib.mri +# RUN: echo "SAVE" >> %t/badlib.mri +# RUN: not llvm-ar -M < %t/badlib.mri 2>&1 | FileCheck --check-prefix=PARSE %s +# RUN: not ls %t/badlib.a + +# PARSE: Could not parse library + +## No create command. +# RUN: echo "ADDLIB %t/f.a" > %t/nocreate.mri +# RUN: echo "SAVE" >> %t/nocreate.mri +# RUN: not llvm-ar -M < %t/nocreate.mri + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: f + Binding: STB_GLOBAL + Section: .text +... diff --git a/llvm/test/tools/llvm-ar/mri-addmod.test b/llvm/test/tools/llvm-ar/mri-addmod.test index 2b6e4dc3adbee..c26414fad0b8c 100644 --- a/llvm/test/tools/llvm-ar/mri-addmod.test +++ b/llvm/test/tools/llvm-ar/mri-addmod.test @@ -1,27 +1,27 @@ -## Test the ADDMOD MRI command. - -# RUN: rm -rf %t && mkdir -p %t -# RUN: yaml2obj %s -o %t/f.o - -# RUN: echo "CREATE %t/addmod.a" > %t/addmod.mri -# RUN: echo "ADDMOD %t/f.o" >> %t/addmod.mri -# RUN: echo "SAVE" >> %t/addmod.mri -# RUN: llvm-ar -M < %t/addmod.mri -# RUN: llvm-nm --print-armap %t/addmod.a | FileCheck %s - -# CHECK: f in f.o - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 -Sections: - - Name: .text - Type: SHT_PROGBITS -Symbols: - - Name: f - Binding: STB_GLOBAL - Section: .text -... +## Test the ADDMOD MRI command. + +# RUN: rm -rf %t && mkdir -p %t +# RUN: yaml2obj %s -o %t/f.o + +# RUN: echo "CREATE %t/addmod.a" > %t/addmod.mri +# RUN: echo "ADDMOD %t/f.o" >> %t/addmod.mri +# RUN: echo "SAVE" >> %t/addmod.mri +# RUN: llvm-ar -M < %t/addmod.mri +# RUN: llvm-nm --print-armap %t/addmod.a | FileCheck %s + +# CHECK: f in f.o + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: f + Binding: STB_GLOBAL + Section: .text +... diff --git a/llvm/test/tools/llvm-ar/mri-comments.test b/llvm/test/tools/llvm-ar/mri-comments.test index f5fa61768aeb0..aac62dc00e699 100644 --- a/llvm/test/tools/llvm-ar/mri-comments.test +++ b/llvm/test/tools/llvm-ar/mri-comments.test @@ -1,19 +1,19 @@ -# Test different MRI comment formats and white space. - -RUN: rm -rf %t && mkdir -p %t -RUN: yaml2obj %S/Inputs/elf.yaml -o %t/elf.o - -RUN: echo "create %t/mri.ar;comment" > %t/script.mri -RUN: echo "addmod %t/elf.o * comment" >> %t/script.mri -RUN: echo "; comment" >> %t/script.mri -RUN: echo " ;comment" >> %t/script.mri -RUN: echo "* comment" >> %t/script.mri -RUN: echo " *comment" >> %t/script.mri -RUN: echo "" >> %t/script.mri -RUN: echo " " >> %t/script.mri -RUN: echo " save" >> %t/script.mri - -RUN: llvm-ar -M < %t/script.mri -RUN: llvm-ar t %t/mri.ar | FileCheck %s - -CHECK: elf.o +# Test different MRI comment formats and white space. + +RUN: rm -rf %t && mkdir -p %t +RUN: yaml2obj %S/Inputs/elf.yaml -o %t/elf.o + +RUN: echo "create %t/mri.ar;comment" > %t/script.mri +RUN: echo "addmod %t/elf.o * comment" >> %t/script.mri +RUN: echo "; comment" >> %t/script.mri +RUN: echo " ;comment" >> %t/script.mri +RUN: echo "* comment" >> %t/script.mri +RUN: echo " *comment" >> %t/script.mri +RUN: echo "" >> %t/script.mri +RUN: echo " " >> %t/script.mri +RUN: echo " save" >> %t/script.mri + +RUN: llvm-ar -M < %t/script.mri +RUN: llvm-ar t %t/mri.ar | FileCheck %s + +CHECK: elf.o diff --git a/llvm/test/tools/llvm-ar/mri-end.test b/llvm/test/tools/llvm-ar/mri-end.test index db4abce7d180e..a00f2f53aee26 100644 --- a/llvm/test/tools/llvm-ar/mri-end.test +++ b/llvm/test/tools/llvm-ar/mri-end.test @@ -1,48 +1,48 @@ -# The END MRI command is optional. Scripts that omit -# or include END should be handled by llvm-ar. -RUN: rm -rf %t && mkdir -p %t - -# Empty File -RUN: yaml2obj %S/Inputs/elf.yaml -o %t/elf.o - -RUN: touch %t/empty.mri -RUN: llvm-ar -M < %t/empty.mri - -RUN: echo "END" > %t/empty-end.mri -RUN: llvm-ar -M < %t/empty-end.mri - -# Comment only -RUN: echo "; a comment" > %t/comment.mri -RUN: llvm-ar -M < %t/comment.mri - -RUN: echo "; a comment" > %t/comment-end.mri -RUN: echo "END" > %t/comment-end.mri -RUN: llvm-ar -M < %t/comment-end.mri - -# Without Save -RUN: echo "create %t/mri.ar" > %t/no-save.mri -RUN: echo "addmod %t/elf.o" >> %t/no-save.mri -RUN: llvm-ar -M < %t/no-save.mri -RUN: test ! -e %t/mri.ar - -RUN: echo "create %t/mri.ar" > %t/no-save-end.mri -RUN: echo "addmod %t/elf.o" >> %t/no-save-end.mri -RUN: echo "END" > %t/no-save-end.mri -RUN: llvm-ar -M < %t/no-save-end.mri -RUN: test ! -e %t/mri.ar - -# With Save -RUN: echo "create %t/mri.ar" > %t/save.mri -RUN: echo "addmod %t/elf.o" >> %t/save.mri -RUN: echo "save" >> %t/save.mri -RUN: llvm-ar -M < %t/save.mri -RUN: llvm-ar t %t/mri.ar | FileCheck %s - -RUN: echo "create %t/mri.ar" > %t/save-end.mri -RUN: echo "addmod %t/elf.o" >> %t/save-end.mri -RUN: echo "save" >> %t/save-end.mri -RUN: echo "END" > %t/no-save-end.mri -RUN: llvm-ar -M < %t/save-end.mri -RUN: llvm-ar t %t/mri.ar | FileCheck %s - -CHECK: elf.o +# The END MRI command is optional. Scripts that omit +# or include END should be handled by llvm-ar. +RUN: rm -rf %t && mkdir -p %t + +# Empty File +RUN: yaml2obj %S/Inputs/elf.yaml -o %t/elf.o + +RUN: touch %t/empty.mri +RUN: llvm-ar -M < %t/empty.mri + +RUN: echo "END" > %t/empty-end.mri +RUN: llvm-ar -M < %t/empty-end.mri + +# Comment only +RUN: echo "; a comment" > %t/comment.mri +RUN: llvm-ar -M < %t/comment.mri + +RUN: echo "; a comment" > %t/comment-end.mri +RUN: echo "END" > %t/comment-end.mri +RUN: llvm-ar -M < %t/comment-end.mri + +# Without Save +RUN: echo "create %t/mri.ar" > %t/no-save.mri +RUN: echo "addmod %t/elf.o" >> %t/no-save.mri +RUN: llvm-ar -M < %t/no-save.mri +RUN: test ! -e %t/mri.ar + +RUN: echo "create %t/mri.ar" > %t/no-save-end.mri +RUN: echo "addmod %t/elf.o" >> %t/no-save-end.mri +RUN: echo "END" > %t/no-save-end.mri +RUN: llvm-ar -M < %t/no-save-end.mri +RUN: test ! -e %t/mri.ar + +# With Save +RUN: echo "create %t/mri.ar" > %t/save.mri +RUN: echo "addmod %t/elf.o" >> %t/save.mri +RUN: echo "save" >> %t/save.mri +RUN: llvm-ar -M < %t/save.mri +RUN: llvm-ar t %t/mri.ar | FileCheck %s + +RUN: echo "create %t/mri.ar" > %t/save-end.mri +RUN: echo "addmod %t/elf.o" >> %t/save-end.mri +RUN: echo "save" >> %t/save-end.mri +RUN: echo "END" > %t/no-save-end.mri +RUN: llvm-ar -M < %t/save-end.mri +RUN: llvm-ar t %t/mri.ar | FileCheck %s + +CHECK: elf.o diff --git a/llvm/test/tools/llvm-ar/quick-append.test b/llvm/test/tools/llvm-ar/quick-append.test index ef2e5f74cc808..607ba413ae09d 100644 --- a/llvm/test/tools/llvm-ar/quick-append.test +++ b/llvm/test/tools/llvm-ar/quick-append.test @@ -1,93 +1,91 @@ -## Test quick append - -# RUN: rm -rf %t && mkdir -p %t -# RUN: yaml2obj %s -o %t/1.o --docnum=1 -# RUN: yaml2obj %s -o %t/2.o --docnum=2 - -## Append single member: -# RUN: llvm-ar qc %t/single.a %t/1.o -# RUN: llvm-ar t %t/single.a \ -# RUN: | FileCheck %s --check-prefix=SINGLE --match-full-lines --implicit-check-not {{.}} - -# SINGLE: 1.o - -# RUN: llvm-nm --print-armap %t/single.a \ -# RUN: | FileCheck %s --check-prefix=SINGLE-SYM - -# SINGLE-SYM: symbol1 - -## Append multiple members: -# RUN: llvm-ar qc %t/multiple.a %t/1.o %t/2.o -# RUN: llvm-ar t %t/multiple.a \ -# RUN: | FileCheck %s --check-prefix=MULTIPLE --match-full-lines --implicit-check-not {{.}} - -# MULTIPLE: 1.o -# MULTIPLE-NEXT: 2.o - -# RUN: llvm-nm --print-armap %t/multiple.a \ -# RUN: | FileCheck %s --check-prefix=MULTIPLE-SYM - -# MULTIPLE-SYM: symbol1 -# MULTIPLE-SYM-NEXT: symbol2 - -## Append same member: -# RUN: llvm-ar qc %t/same.a %t/1.o %t/1.o -# RUN: llvm-ar t %t/same.a \ -# RUN: | FileCheck %s --check-prefix=SAME -DFILE=%t/2.o --match-full-lines --implicit-check-not {{.}} - -# SAME: 1.o -# SAME-NEXT: 1.o - -# RUN: llvm-nm --print-armap %t/same.a \ -# RUN: | FileCheck %s --check-prefix=SAME-SYM - -# SAME-SYM: symbol1 -# SAME-SYM-NEXT: symbol1 - -## Append without member: -# RUN: llvm-ar qc %t/without.a -# RUN: llvm-ar t %t/without.a \ -# RUN: | FileCheck /dev/null --allow-empty --implicit-check-not={{.}} - -# RUN: llvm-nm --print-armap %t/without.a \ -# RUN: | FileCheck /dev/null --allow-empty --implicit-check-not={{.}} - -## No archive: -# RUN: not llvm-ar qc 2>&1 \ -# RUN: | FileCheck %s --check-prefix=NO-ARCHIVE - -# NO-ARCHIVE: error: An archive name must be specified. - -## Member does not exist: -# RUN: not llvm-ar qc %t/missing.a %t/missing.txt 2>&1 \ -# RUN: | FileCheck %s --check-prefix=MISSING-FILE -DFILE=%t/missing.txt - -# MISSING-FILE: error: [[FILE]]: {{[nN]}}o such file or directory. - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 -Sections: - - Name: .text - Type: SHT_PROGBITS -Symbols: - - Name: symbol1 - Binding: STB_GLOBAL - Section: .text - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 -Sections: - - Name: .text - Type: SHT_PROGBITS -Symbols: - - Name: symbol2 - Binding: STB_GLOBAL - Section: .text +## Test quick append + +# RUN: rm -rf %t && mkdir -p %t +# RUN: yaml2obj %s -o %t/1.o --docnum=1 +# RUN: yaml2obj %s -o %t/2.o --docnum=2 + +## Append single member: +# RUN: llvm-ar qc %t/single.a %t/1.o +# RUN: llvm-ar t %t/single.a \ +# RUN: | FileCheck %s --check-prefix=SINGLE --match-full-lines --implicit-check-not {{.}} + +# SINGLE: 1.o + +# RUN: llvm-nm --print-armap %t/single.a \ +# RUN: | FileCheck %s --check-prefix=SINGLE-SYM + +# SINGLE-SYM: symbol1 + +## Append multiple members: +# RUN: llvm-ar qc %t/multiple.a %t/1.o %t/2.o +# RUN: llvm-ar t %t/multiple.a \ +# RUN: | FileCheck %s --check-prefix=MULTIPLE --match-full-lines --implicit-check-not {{.}} + +# MULTIPLE: 1.o +# MULTIPLE-NEXT: 2.o + +# RUN: llvm-nm --print-armap %t/multiple.a \ +# RUN: | FileCheck %s --check-prefix=MULTIPLE-SYM + +# MULTIPLE-SYM: symbol1 +# MULTIPLE-SYM-NEXT: symbol2 + +## Append same member: +# RUN: llvm-ar qc %t/same.a %t/1.o %t/1.o +# RUN: llvm-ar t %t/same.a \ +# RUN: | FileCheck %s --check-prefix=SAME -DFILE=%t/2.o --match-full-lines --implicit-check-not {{.}} + +# SAME: 1.o +# SAME-NEXT: 1.o + +# RUN: llvm-nm --print-armap %t/same.a \ +# RUN: | FileCheck %s --check-prefix=SAME-SYM + +# SAME-SYM: symbol1 +# SAME-SYM-NEXT: symbol1 + +## Append without member: +# RUN: llvm-ar qc %t/without.a +# RUN: llvm-ar t %t/without.a | count 0 + +# RUN: llvm-nm --print-armap %t/without.a | count 0 + +## No archive: +# RUN: not llvm-ar qc 2>&1 \ +# RUN: | FileCheck %s --check-prefix=NO-ARCHIVE + +# NO-ARCHIVE: error: An archive name must be specified. + +## Member does not exist: +# RUN: not llvm-ar qc %t/missing.a %t/missing.txt 2>&1 \ +# RUN: | FileCheck %s --check-prefix=MISSING-FILE -DFILE=%t/missing.txt + +# MISSING-FILE: error: [[FILE]]: {{[nN]}}o such file or directory. + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbol1 + Binding: STB_GLOBAL + Section: .text + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbol2 + Binding: STB_GLOBAL + Section: .text diff --git a/llvm/test/tools/llvm-ar/read-only-archive.test b/llvm/test/tools/llvm-ar/read-only-archive.test index f4adea20218c0..1d638f45cac36 100644 --- a/llvm/test/tools/llvm-ar/read-only-archive.test +++ b/llvm/test/tools/llvm-ar/read-only-archive.test @@ -1,30 +1,30 @@ -## Test that read-only archives cannot be edited - -# RUN: rm -rf %t && mkdir -p %t -# RUN: touch %t/1.txt -# RUN: touch %t/2.txt -# RUN: touch %t/3.txt - -# RUN: llvm-ar rc %t/archive.a %t/1.txt %t/2.txt - -## Make read only: -# RUN: chmod 444 %t/archive.a - -# RUN: not llvm-ar r %t/archive.a %t/3.txt \ -# RUN: FileCheck %s --check-prefix=ERROR -DFILE=%t/archive.a - -# ERROR: error: [[FILE]]: no such file or directory. - -# RUN: not llvm-ar q %t/archive.a %t/3.txt \ -# RUN: FileCheck %s --check-prefix=ERROR -DFILE=%t/archive.a - -# RUN: not llvm-ar m %t/archive.a t/1.txt \ -# RUN: FileCheck %s --check-prefix=ERROR-2 -DFILE=%t/archive.a - -# ERROR-2: error: [[FILE]]: permission denied. - -# RUN: llvm-ar t %t/archive.a \ -# RUN: | FileCheck %s --check-prefix=ARCHIVE --match-full-lines --implicit-check-not {{.}} - -# ARCHIVE: 1.txt -# ARCHIVE-NEXT: 2.txt +## Test that read-only archives cannot be edited + +# RUN: rm -rf %t && mkdir -p %t +# RUN: touch %t/1.txt +# RUN: touch %t/2.txt +# RUN: touch %t/3.txt + +# RUN: llvm-ar rc %t/archive.a %t/1.txt %t/2.txt + +## Make read only: +# RUN: chmod 444 %t/archive.a + +# RUN: not llvm-ar r %t/archive.a %t/3.txt \ +# RUN: FileCheck %s --check-prefix=ERROR -DFILE=%t/archive.a + +# ERROR: error: [[FILE]]: no such file or directory. + +# RUN: not llvm-ar q %t/archive.a %t/3.txt \ +# RUN: FileCheck %s --check-prefix=ERROR -DFILE=%t/archive.a + +# RUN: not llvm-ar m %t/archive.a t/1.txt \ +# RUN: FileCheck %s --check-prefix=ERROR-2 -DFILE=%t/archive.a + +# ERROR-2: error: [[FILE]]: permission denied. + +# RUN: llvm-ar t %t/archive.a \ +# RUN: | FileCheck %s --check-prefix=ARCHIVE --match-full-lines --implicit-check-not {{.}} + +# ARCHIVE: 1.txt +# ARCHIVE-NEXT: 2.txt diff --git a/llvm/test/tools/llvm-ar/replace-update.test b/llvm/test/tools/llvm-ar/replace-update.test index 1b51dba1bca00..c056565f144c5 100644 --- a/llvm/test/tools/llvm-ar/replace-update.test +++ b/llvm/test/tools/llvm-ar/replace-update.test @@ -1,189 +1,189 @@ -## Test that the replace command with "u" updates the relevant members. - -# RUN: rm -rf %t && mkdir -p %t/new/other - -# RUN: yaml2obj %s -o %t/1.o --docnum=1 -# RUN: yaml2obj %s -o %t/2.o --docnum=2 -# RUN: yaml2obj %s -o %t/3.o --docnum=3 - -# RUN: env TZ=GMT touch -t 200001020304 %t/1.o -# RUN: env TZ=GMT touch -t 200001020304 %t/2.o -# RUN: env TZ=GMT touch -t 200001020304 %t/3.o - -# RUN: yaml2obj %s -o %t/new/1.o --docnum=4 -# RUN: yaml2obj %s -o %t/new/3.o --docnum=5 - -# RUN: yaml2obj %s -o %t/new/other/1.o --docnum=6 - -# RUN: env TZ=GMT touch -t 200001020304 %t/new/other/1.o - -## Replace single member with newer file: -# RUN: llvm-ar rcU %t/single.a %t/1.o %t/2.o %t/3.o -# RUN: llvm-ar ruU %t/single.a %t/new/1.o -# RUN: llvm-ar t %t/single.a \ -# RUN: | FileCheck %s --check-prefix=SINGLE --implicit-check-not {{.}} - -# SINGLE: 1.o -# SINGLE-NEXT: 2.o -# SINGLE-NEXT: 3.o - -# RUN: llvm-nm --print-armap %t/single.a \ -# RUN: | FileCheck %s --check-prefix=SINGLE-SYM - -# SINGLE-SYM: symbolnew1 -# SINGLE-SYM-NEXT: symbol2 -# SINGLE-SYM-NEXT: symbol3 - -## Replace new single member with older file: -# RUN: llvm-ar ruU %t/single.a %t/1.o -# RUN: llvm-ar t %t/single.a \ -# RUN: | FileCheck %s --check-prefix=SINGLE --implicit-check-not {{.}} - -# RUN: llvm-nm --print-armap %t/single.a \ -# RUN: | FileCheck %s --check-prefix=SINGLE-SYM - -## Replace multiple members with newer files: -# RUN: llvm-ar rcuU %t/multiple.a %t/1.o %t/2.o %t/3.o -# RUN: llvm-ar ruU %t/multiple.a %t/new/1.o %t/new/3.o -# RUN: llvm-ar t %t/multiple.a \ -# RUN: | FileCheck %s --check-prefix=MULTIPLE --implicit-check-not {{.}} - -# MULTIPLE: 1.o -# MULTIPLE-NEXT: 2.o -# MULTIPLE-NEXT: 3.o - -# RUN: llvm-nm --print-armap %t/multiple.a \ -# RUN: | FileCheck %s --check-prefix=MULTIPLE-SYM - -# MULTIPLE-SYM: symbolnew1 -# MULTIPLE-SYM-NEXT: symbol2 -# MULTIPLE-SYM-NEXTs: symbolnew3 - -## Replace newer members with multiple older files: -# RUN: llvm-ar ruU %t/multiple.a %t/1.o %t/2.o -# RUN: llvm-ar t %t/multiple.a \ -# RUN: | FileCheck %s --check-prefix=MULTIPLE --implicit-check-not {{.}} - -# RUN: llvm-nm --print-armap %t/multiple.a \ -# RUN: | FileCheck %s --check-prefix=MULTIPLE-SYM - -## Replace same member with newer files: -# RUN: llvm-ar rcuU %t/same.a %t/1.o %t/2.o %t/3.o -# RUN: llvm-ar ruU %t/same.a %t/new/1.o %t/new/other/1.o -# RUN: llvm-ar t %t/same.a \ -# RUN: | FileCheck %s --check-prefix=SAME -DFILE=%t/2.o --implicit-check-not {{.}} - -# SAME: 1.o -# SAME-NEXT: 2.o -# SAME-NEXT: 3.o -# SAME-NEXT: 1.o - -# RUN: llvm-nm --print-armap %t/same.a \ -# RUN: | FileCheck %s --check-prefix=SAME-SYM - -# SAME-SYM: symbolnew1 -# SAME-SYM-NEXT: symbol2 -# SAME-SYM-NEXT: symbol3 -# SAME-SYM-NEXT: symbolother1 - -## Replace multiple members with an older file and a newer file: -# RUN: llvm-ar rcuU %t/old-new.a %t/new/1.o %t/2.o %t/3.o -# RUN: llvm-ar ruU %t/old-new.a %t/1.o %t/new/3.o -# RUN: llvm-ar t %t/old-new.a \ -# RUN: | FileCheck %s --check-prefix=MULTIPLE --implicit-check-not {{.}} - -# RUN: llvm-nm --print-armap %t/old-new.a \ -# RUN: | FileCheck %s --check-prefix=MULTIPLE-SYM - -## Replace same member with an older file and a newer file: -# RUN: llvm-ar rcuU %t/old-new-same.a %t/new/1.o %t/2.o %t/3.o -# RUN: llvm-ar ruU %t/old-new-same.a %t/1.o %t/new/other/1.o -# RUN: llvm-ar t %t/old-new-same.a \ -# RUN: | FileCheck %s --check-prefix=SAME -DFILE=%t/2.o --implicit-check-not {{.}} - -# RUN: llvm-nm --print-armap %t/same.a \ -# RUN: | FileCheck %s --check-prefix=SAME-SYM - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 -Sections: - - Name: .text - Type: SHT_PROGBITS -Symbols: - - Name: symbol1 - Binding: STB_GLOBAL - Section: .text - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 -Sections: - - Name: .text - Type: SHT_PROGBITS -Symbols: - - Name: symbol2 - Binding: STB_GLOBAL - Section: .text - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 -Sections: - - Name: .text - Type: SHT_PROGBITS -Symbols: - - Name: symbol3 - Binding: STB_GLOBAL - Section: .text - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 -Sections: - - Name: .text - Type: SHT_PROGBITS -Symbols: - - Name: symbolnew1 - Binding: STB_GLOBAL - Section: .text - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 -Sections: - - Name: .text - Type: SHT_PROGBITS -Symbols: - - Name: symbolnew3 - Binding: STB_GLOBAL - Section: .text - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 -Sections: - - Name: .text - Type: SHT_PROGBITS -Symbols: - - Name: symbolother1 - Binding: STB_GLOBAL - Section: .text +## Test that the replace command with "u" updates the relevant members. + +# RUN: rm -rf %t && mkdir -p %t/new/other + +# RUN: yaml2obj %s -o %t/1.o --docnum=1 +# RUN: yaml2obj %s -o %t/2.o --docnum=2 +# RUN: yaml2obj %s -o %t/3.o --docnum=3 + +# RUN: env TZ=GMT touch -t 200001020304 %t/1.o +# RUN: env TZ=GMT touch -t 200001020304 %t/2.o +# RUN: env TZ=GMT touch -t 200001020304 %t/3.o + +# RUN: yaml2obj %s -o %t/new/1.o --docnum=4 +# RUN: yaml2obj %s -o %t/new/3.o --docnum=5 + +# RUN: yaml2obj %s -o %t/new/other/1.o --docnum=6 + +# RUN: env TZ=GMT touch -t 200001020304 %t/new/other/1.o + +## Replace single member with newer file: +# RUN: llvm-ar rcU %t/single.a %t/1.o %t/2.o %t/3.o +# RUN: llvm-ar ruU %t/single.a %t/new/1.o +# RUN: llvm-ar t %t/single.a \ +# RUN: | FileCheck %s --check-prefix=SINGLE --implicit-check-not {{.}} + +# SINGLE: 1.o +# SINGLE-NEXT: 2.o +# SINGLE-NEXT: 3.o + +# RUN: llvm-nm --print-armap %t/single.a \ +# RUN: | FileCheck %s --check-prefix=SINGLE-SYM + +# SINGLE-SYM: symbolnew1 +# SINGLE-SYM-NEXT: symbol2 +# SINGLE-SYM-NEXT: symbol3 + +## Replace new single member with older file: +# RUN: llvm-ar ruU %t/single.a %t/1.o +# RUN: llvm-ar t %t/single.a \ +# RUN: | FileCheck %s --check-prefix=SINGLE --implicit-check-not {{.}} + +# RUN: llvm-nm --print-armap %t/single.a \ +# RUN: | FileCheck %s --check-prefix=SINGLE-SYM + +## Replace multiple members with newer files: +# RUN: llvm-ar rcuU %t/multiple.a %t/1.o %t/2.o %t/3.o +# RUN: llvm-ar ruU %t/multiple.a %t/new/1.o %t/new/3.o +# RUN: llvm-ar t %t/multiple.a \ +# RUN: | FileCheck %s --check-prefix=MULTIPLE --implicit-check-not {{.}} + +# MULTIPLE: 1.o +# MULTIPLE-NEXT: 2.o +# MULTIPLE-NEXT: 3.o + +# RUN: llvm-nm --print-armap %t/multiple.a \ +# RUN: | FileCheck %s --check-prefix=MULTIPLE-SYM + +# MULTIPLE-SYM: symbolnew1 +# MULTIPLE-SYM-NEXT: symbol2 +# MULTIPLE-SYM-NEXTs: symbolnew3 + +## Replace newer members with multiple older files: +# RUN: llvm-ar ruU %t/multiple.a %t/1.o %t/2.o +# RUN: llvm-ar t %t/multiple.a \ +# RUN: | FileCheck %s --check-prefix=MULTIPLE --implicit-check-not {{.}} + +# RUN: llvm-nm --print-armap %t/multiple.a \ +# RUN: | FileCheck %s --check-prefix=MULTIPLE-SYM + +## Replace same member with newer files: +# RUN: llvm-ar rcuU %t/same.a %t/1.o %t/2.o %t/3.o +# RUN: llvm-ar ruU %t/same.a %t/new/1.o %t/new/other/1.o +# RUN: llvm-ar t %t/same.a \ +# RUN: | FileCheck %s --check-prefix=SAME -DFILE=%t/2.o --implicit-check-not {{.}} + +# SAME: 1.o +# SAME-NEXT: 2.o +# SAME-NEXT: 3.o +# SAME-NEXT: 1.o + +# RUN: llvm-nm --print-armap %t/same.a \ +# RUN: | FileCheck %s --check-prefix=SAME-SYM + +# SAME-SYM: symbolnew1 +# SAME-SYM-NEXT: symbol2 +# SAME-SYM-NEXT: symbol3 +# SAME-SYM-NEXT: symbolother1 + +## Replace multiple members with an older file and a newer file: +# RUN: llvm-ar rcuU %t/old-new.a %t/new/1.o %t/2.o %t/3.o +# RUN: llvm-ar ruU %t/old-new.a %t/1.o %t/new/3.o +# RUN: llvm-ar t %t/old-new.a \ +# RUN: | FileCheck %s --check-prefix=MULTIPLE --implicit-check-not {{.}} + +# RUN: llvm-nm --print-armap %t/old-new.a \ +# RUN: | FileCheck %s --check-prefix=MULTIPLE-SYM + +## Replace same member with an older file and a newer file: +# RUN: llvm-ar rcuU %t/old-new-same.a %t/new/1.o %t/2.o %t/3.o +# RUN: llvm-ar ruU %t/old-new-same.a %t/1.o %t/new/other/1.o +# RUN: llvm-ar t %t/old-new-same.a \ +# RUN: | FileCheck %s --check-prefix=SAME -DFILE=%t/2.o --implicit-check-not {{.}} + +# RUN: llvm-nm --print-armap %t/same.a \ +# RUN: | FileCheck %s --check-prefix=SAME-SYM + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbol1 + Binding: STB_GLOBAL + Section: .text + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbol2 + Binding: STB_GLOBAL + Section: .text + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbol3 + Binding: STB_GLOBAL + Section: .text + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbolnew1 + Binding: STB_GLOBAL + Section: .text + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbolnew3 + Binding: STB_GLOBAL + Section: .text + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbolother1 + Binding: STB_GLOBAL + Section: .text diff --git a/llvm/test/tools/llvm-ar/replace.test b/llvm/test/tools/llvm-ar/replace.test index 2a7cddfdeb535..0c1722a23ef5a 100644 --- a/llvm/test/tools/llvm-ar/replace.test +++ b/llvm/test/tools/llvm-ar/replace.test @@ -1,178 +1,178 @@ -## Test the replace command without modifiers replaces the relevant members. - -# RUN: rm -rf %t && mkdir -p %t/new/other -# RUN: yaml2obj %s -o %t/1.o --docnum=1 -# RUN: yaml2obj %s -o %t/2.o --docnum=2 -# RUN: yaml2obj %s -o %t/3.o --docnum=3 - -# RUN: yaml2obj %s -o %t/new/1.o --docnum=4 -# RUN: yaml2obj %s -o %t/new/3.o --docnum=5 - -# RUN: yaml2obj %s -o %t/new/other/1.o --docnum=6 - -## Replace single member: -# RUN: llvm-ar rc %t/single.a %t/1.o %t/2.o %t/3.o -# RUN: llvm-ar r %t/single.a %t/new/1.o -# RUN: llvm-ar t %t/single.a \ -# RUN: | FileCheck %s --check-prefix=SINGLE --implicit-check-not {{.}} - -# SINGLE: 1.o -# SINGLE-NEXT: 2.o -# SINGLE-NEXT: 3.o - -# RUN: llvm-nm --print-armap %t/single.a \ -# RUN: | FileCheck %s --check-prefix=SINGLE-SYM - -# SINGLE-SYM: symbolnew1 -# SINGLE-SYM-NEXT: symbol2 -# SINGLE-SYM-NEXT: symbol3 - -## Replace multiple members: -# RUN: llvm-ar rc %t/multiple.a %t/1.o %t/2.o %t/3.o -# RUN: llvm-ar r %t/multiple.a %t/new/1.o %t/new/3.o -# RUN: llvm-ar t %t/multiple.a \ -# RUN: | FileCheck %s --check-prefix=MULTIPLE --implicit-check-not {{.}} - -# MULTIPLE: 1.o -# MULTIPLE-NEXT: 2.o -# MULTIPLE-NEXT: 3.o - -# RUN: llvm-nm --print-armap %t/multiple.a \ -# RUN: | FileCheck %s --check-prefix=MULTIPLE-SYM - -# MULTIPLE-SYM: symbolnew1 -# MULTIPLE-SYM-NEXT: symbol2 -# MULTIPLE-SYM-NEXT: symbolnew3 - -## Replace same member: -# RUN: llvm-ar rc %t/same.a %t/1.o %t/2.o %t/3.o -# RUN: llvm-ar r %t/same.a %t/new/1.o %t/new/other/1.o -# RUN: llvm-ar t %t/same.a \ -# RUN: | FileCheck %s --check-prefix=SAME --implicit-check-not {{.}} - -# SAME: 1.o -# SAME-NEXT: 2.o -# SAME-NEXT: 3.o -# SAME-NEXT: 1.o - -# RUN: llvm-nm --print-armap %t/same.a \ -# RUN: | FileCheck %s --check-prefix=SAME-SYM - -# SAME-SYM: symbolnew1 -# SAME-SYM-NEXT: symbol2 -# SAME-SYM-NEXT: symbol3 -# SAME-SYM-NEXT: symbolother1 - -## Replace without member: -# RUN: llvm-ar rc %t/without.a %t/1.o %t/2.o %t/3.o -# RUN: llvm-ar r %t/without.a -# RUN: llvm-ar t %t/without.a \ -# RUN: | FileCheck %s --check-prefix=WITHOUT --implicit-check-not {{.}} - -# WITHOUT: 1.o -# WITHOUT-NEXT: 2.o -# WITHOUT-NEXT: 3.o - -# RUN: llvm-nm --print-armap %t/without.a \ -# RUN: | FileCheck %s --check-prefix=WITHOUT-SYM - -# WITHOUT-SYM: symbol1 -# WITHOUT-SYM-NEXT: symbol2 -# WITHOUT-SYM-NEXT: symbol3 - -## No archive: -# RUN: not llvm-ar r 2>&1 \ -# RUN: | FileCheck %s --check-prefix=NO-ARCHIVE - -# NO-ARCHIVE: error: An archive name must be specified. - -## Member does not exist: -# RUN: llvm-ar rc %t/missing.a %t/1.o %t/2.o %t/3.o -# RUN: not llvm-ar r %t/missing.a %t/missing.txt 2>&1 \ -# RUN: | FileCheck %s --check-prefix=MISSING-FILE -DFILE=%t/missing.txt - -# MISSING-FILE: error: [[FILE]]: {{[Nn]}}o such file or directory. - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 -Sections: - - Name: .text - Type: SHT_PROGBITS -Symbols: - - Name: symbol1 - Binding: STB_GLOBAL - Section: .text - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 -Sections: - - Name: .text - Type: SHT_PROGBITS -Symbols: - - Name: symbol2 - Binding: STB_GLOBAL - Section: .text - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 -Sections: - - Name: .text - Type: SHT_PROGBITS -Symbols: - - Name: symbol3 - Binding: STB_GLOBAL - Section: .text - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 -Sections: - - Name: .text - Type: SHT_PROGBITS -Symbols: - - Name: symbolnew1 - Binding: STB_GLOBAL - Section: .text - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 -Sections: - - Name: .text - Type: SHT_PROGBITS -Symbols: - - Name: symbolnew3 - Binding: STB_GLOBAL - Section: .text - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 -Sections: - - Name: .text - Type: SHT_PROGBITS -Symbols: - - Name: symbolother1 - Binding: STB_GLOBAL - Section: .text +## Test the replace command without modifiers replaces the relevant members. + +# RUN: rm -rf %t && mkdir -p %t/new/other +# RUN: yaml2obj %s -o %t/1.o --docnum=1 +# RUN: yaml2obj %s -o %t/2.o --docnum=2 +# RUN: yaml2obj %s -o %t/3.o --docnum=3 + +# RUN: yaml2obj %s -o %t/new/1.o --docnum=4 +# RUN: yaml2obj %s -o %t/new/3.o --docnum=5 + +# RUN: yaml2obj %s -o %t/new/other/1.o --docnum=6 + +## Replace single member: +# RUN: llvm-ar rc %t/single.a %t/1.o %t/2.o %t/3.o +# RUN: llvm-ar r %t/single.a %t/new/1.o +# RUN: llvm-ar t %t/single.a \ +# RUN: | FileCheck %s --check-prefix=SINGLE --implicit-check-not {{.}} + +# SINGLE: 1.o +# SINGLE-NEXT: 2.o +# SINGLE-NEXT: 3.o + +# RUN: llvm-nm --print-armap %t/single.a \ +# RUN: | FileCheck %s --check-prefix=SINGLE-SYM + +# SINGLE-SYM: symbolnew1 +# SINGLE-SYM-NEXT: symbol2 +# SINGLE-SYM-NEXT: symbol3 + +## Replace multiple members: +# RUN: llvm-ar rc %t/multiple.a %t/1.o %t/2.o %t/3.o +# RUN: llvm-ar r %t/multiple.a %t/new/1.o %t/new/3.o +# RUN: llvm-ar t %t/multiple.a \ +# RUN: | FileCheck %s --check-prefix=MULTIPLE --implicit-check-not {{.}} + +# MULTIPLE: 1.o +# MULTIPLE-NEXT: 2.o +# MULTIPLE-NEXT: 3.o + +# RUN: llvm-nm --print-armap %t/multiple.a \ +# RUN: | FileCheck %s --check-prefix=MULTIPLE-SYM + +# MULTIPLE-SYM: symbolnew1 +# MULTIPLE-SYM-NEXT: symbol2 +# MULTIPLE-SYM-NEXT: symbolnew3 + +## Replace same member: +# RUN: llvm-ar rc %t/same.a %t/1.o %t/2.o %t/3.o +# RUN: llvm-ar r %t/same.a %t/new/1.o %t/new/other/1.o +# RUN: llvm-ar t %t/same.a \ +# RUN: | FileCheck %s --check-prefix=SAME --implicit-check-not {{.}} + +# SAME: 1.o +# SAME-NEXT: 2.o +# SAME-NEXT: 3.o +# SAME-NEXT: 1.o + +# RUN: llvm-nm --print-armap %t/same.a \ +# RUN: | FileCheck %s --check-prefix=SAME-SYM + +# SAME-SYM: symbolnew1 +# SAME-SYM-NEXT: symbol2 +# SAME-SYM-NEXT: symbol3 +# SAME-SYM-NEXT: symbolother1 + +## Replace without member: +# RUN: llvm-ar rc %t/without.a %t/1.o %t/2.o %t/3.o +# RUN: llvm-ar r %t/without.a +# RUN: llvm-ar t %t/without.a \ +# RUN: | FileCheck %s --check-prefix=WITHOUT --implicit-check-not {{.}} + +# WITHOUT: 1.o +# WITHOUT-NEXT: 2.o +# WITHOUT-NEXT: 3.o + +# RUN: llvm-nm --print-armap %t/without.a \ +# RUN: | FileCheck %s --check-prefix=WITHOUT-SYM + +# WITHOUT-SYM: symbol1 +# WITHOUT-SYM-NEXT: symbol2 +# WITHOUT-SYM-NEXT: symbol3 + +## No archive: +# RUN: not llvm-ar r 2>&1 \ +# RUN: | FileCheck %s --check-prefix=NO-ARCHIVE + +# NO-ARCHIVE: error: An archive name must be specified. + +## Member does not exist: +# RUN: llvm-ar rc %t/missing.a %t/1.o %t/2.o %t/3.o +# RUN: not llvm-ar r %t/missing.a %t/missing.txt 2>&1 \ +# RUN: | FileCheck %s --check-prefix=MISSING-FILE -DFILE=%t/missing.txt + +# MISSING-FILE: error: [[FILE]]: {{[Nn]}}o such file or directory. + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbol1 + Binding: STB_GLOBAL + Section: .text + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbol2 + Binding: STB_GLOBAL + Section: .text + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbol3 + Binding: STB_GLOBAL + Section: .text + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbolnew1 + Binding: STB_GLOBAL + Section: .text + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbolnew3 + Binding: STB_GLOBAL + Section: .text + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbolother1 + Binding: STB_GLOBAL + Section: .text diff --git a/llvm/test/tools/llvm-ar/symtab.test b/llvm/test/tools/llvm-ar/symtab.test index e59a468d3f2f0..8dde4ede58135 100644 --- a/llvm/test/tools/llvm-ar/symtab.test +++ b/llvm/test/tools/llvm-ar/symtab.test @@ -1,70 +1,70 @@ -## Test the s and S modifiers. Build and do not build a symbol table. - -# RUN: yaml2obj %s -o %t.o -# RUN: touch %t-other.txt - -## Default: -# RUN: rm -f %t-default.a -# RUN: llvm-ar rc %t-default.a %t.o -# RUN: llvm-nm --print-armap %t-default.a \ -# RUN: | FileCheck %s --check-prefix=SYMTAB - -## Use a modifer: -# RUN: rm -f %t-symtab.a -# RUN: llvm-ar rcs %t-symtab.a %t.o -# RUN: llvm-nm --print-armap %t-symtab.a \ -# RUN: | FileCheck %s --check-prefix=SYMTAB - -# RUN: rm -f %t-no-symtab.a -# RUN: llvm-ar rcS %t-no-symtab.a %t.o -# RUN: llvm-nm --print-armap %t-no-symtab.a \ -# RUN: | FileCheck %s --check-prefix=NO-SYMTAB - -## Use both modifers: -# RUN: rm -f %t-symtab-last.a -# RUN: llvm-ar rcSs %t-symtab-last.a %t.o -# RUN: llvm-nm --print-armap %t-symtab-last.a \ -# RUN: | FileCheck %s --check-prefix=SYMTAB - -# RUN: rm -f %t-no-symtab-last.a -# RUN: llvm-ar rcsS %t-no-symtab-last.a %t.o -# RUN: llvm-nm --print-armap %t-no-symtab-last.a \ -# RUN: | FileCheck %s --check-prefix=NO-SYMTAB - -## Use an existing archive: -# RUN: rm -f %t-to-symtab.a -# RUN: llvm-ar rcS %t-to-symtab.a %t.o -# RUN: llvm-ar rs %t-to-symtab.a %t-other.txt -# RUN: llvm-nm --print-armap %t-to-symtab.a \ -# RUN: | FileCheck %s --check-prefix=SYMTAB - -# RUN: llvm-ar rs %t-to-symtab.a %t-other.txt -# RUN: llvm-nm --print-armap %t-to-symtab.a \ -# RUN: | FileCheck %s --check-prefix=SYMTAB - -# RUN: rm -f %t-to-no-symtab.a -# RUN: llvm-ar rcs %t-to-no-symtab.a %t.o -# RUN: llvm-ar rS %t-to-no-symtab.a %t-other.txt -# RUN: llvm-nm --print-armap %t-to-no-symtab.a \ -# RUN: | FileCheck %s --check-prefix=NO-SYMTAB - -# RUN: llvm-ar rS %t-to-no-symtab.a %t-other.txt -# RUN: llvm-nm --print-armap %t-to-no-symtab.a \ -# RUN: | FileCheck %s --check-prefix=NO-SYMTAB - -# SYMTAB: symbol in -# NO-SYMTAB-NOT: symbol in - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 -Sections: - - Name: .text - Type: SHT_PROGBITS -Symbols: - - Name: symbol - Binding: STB_GLOBAL - Section: .text +## Test the s and S modifiers. Build and do not build a symbol table. + +# RUN: yaml2obj %s -o %t.o +# RUN: touch %t-other.txt + +## Default: +# RUN: rm -f %t-default.a +# RUN: llvm-ar rc %t-default.a %t.o +# RUN: llvm-nm --print-armap %t-default.a \ +# RUN: | FileCheck %s --check-prefix=SYMTAB + +## Use a modifer: +# RUN: rm -f %t-symtab.a +# RUN: llvm-ar rcs %t-symtab.a %t.o +# RUN: llvm-nm --print-armap %t-symtab.a \ +# RUN: | FileCheck %s --check-prefix=SYMTAB + +# RUN: rm -f %t-no-symtab.a +# RUN: llvm-ar rcS %t-no-symtab.a %t.o +# RUN: llvm-nm --print-armap %t-no-symtab.a \ +# RUN: | FileCheck %s --check-prefix=NO-SYMTAB + +## Use both modifers: +# RUN: rm -f %t-symtab-last.a +# RUN: llvm-ar rcSs %t-symtab-last.a %t.o +# RUN: llvm-nm --print-armap %t-symtab-last.a \ +# RUN: | FileCheck %s --check-prefix=SYMTAB + +# RUN: rm -f %t-no-symtab-last.a +# RUN: llvm-ar rcsS %t-no-symtab-last.a %t.o +# RUN: llvm-nm --print-armap %t-no-symtab-last.a \ +# RUN: | FileCheck %s --check-prefix=NO-SYMTAB + +## Use an existing archive: +# RUN: rm -f %t-to-symtab.a +# RUN: llvm-ar rcS %t-to-symtab.a %t.o +# RUN: llvm-ar rs %t-to-symtab.a %t-other.txt +# RUN: llvm-nm --print-armap %t-to-symtab.a \ +# RUN: | FileCheck %s --check-prefix=SYMTAB + +# RUN: llvm-ar rs %t-to-symtab.a %t-other.txt +# RUN: llvm-nm --print-armap %t-to-symtab.a \ +# RUN: | FileCheck %s --check-prefix=SYMTAB + +# RUN: rm -f %t-to-no-symtab.a +# RUN: llvm-ar rcs %t-to-no-symtab.a %t.o +# RUN: llvm-ar rS %t-to-no-symtab.a %t-other.txt +# RUN: llvm-nm --print-armap %t-to-no-symtab.a \ +# RUN: | FileCheck %s --check-prefix=NO-SYMTAB + +# RUN: llvm-ar rS %t-to-no-symtab.a %t-other.txt +# RUN: llvm-nm --print-armap %t-to-no-symtab.a \ +# RUN: | FileCheck %s --check-prefix=NO-SYMTAB + +# SYMTAB: symbol in +# NO-SYMTAB-NOT: symbol in + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS +Symbols: + - Name: symbol + Binding: STB_GLOBAL + Section: .text From aefcf5100aae57ed2ff6a15356bd25d74e8fb27e Mon Sep 17 00:00:00 2001 From: Gabor Marton Date: Wed, 17 Jul 2019 13:47:46 +0000 Subject: [PATCH 343/451] [ASTImporter] Fix LLDB lookup in transparent ctx and with ext src Summary: With LLDB we use localUncachedLookup(), however, that fails to find Decls when a transparent context is involved and the given DC has external lexical storage. The solution is to use noload_lookup, which works well with transparent contexts. But, we cannot use only the noload_lookup since the slow case of localUncachedLookup is still needed in some other cases. These other cases are handled in ASTImporterLookupTable, but we cannot use that with LLDB since that traverses through the AST which initiates the load of external decls again via DC::decls(). We must avoid loading external decls during the import becuase ExternalASTSource is implemented with ASTImporter, so external loads during import results in uncontrolled and faulty import. Reviewers: shafik, teemperor, jingham, clayborg, a_sidorin, a.sidorin Subscribers: rnkovacs, dkrupp, Szelethus, gamesh411, cfe-commits, lldb-commits Tags: #clang, #lldb Differential Revision: https://reviews.llvm.org/D61333 llvm-svn: 366325 --- clang/lib/AST/ASTImporter.cpp | 35 +++++++++++--- clang/unittests/AST/ASTImporterTest.cpp | 48 +++++++++++++++++++ .../test/lang/c/modules/TestCModules.py | 14 ++++++ .../lldbsuite/test/lang/c/modules/main.c | 4 +- .../ExpressionParser/Clang/ClangASTSource.cpp | 30 +++++++----- 5 files changed, 110 insertions(+), 21 deletions(-) diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index 8a59c3a76990d..9d5dd84161dec 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -1707,6 +1707,17 @@ static Error setTypedefNameForAnonDecl(TagDecl *From, TagDecl *To, Error ASTNodeImporter::ImportDefinition( RecordDecl *From, RecordDecl *To, ImportDefinitionKind Kind) { + auto DefinitionCompleter = [To]() { + // There are cases in LLDB when we first import a class without its + // members. The class will have DefinitionData, but no members. Then, + // importDefinition is called from LLDB, which tries to get the members, so + // when we get here, the class already has the DefinitionData set, so we + // must unset the CompleteDefinition here to be able to complete again the + // definition. + To->setCompleteDefinition(false); + To->completeDefinition(); + }; + if (To->getDefinition() || To->isBeingDefined()) { if (Kind == IDK_Everything || // In case of lambdas, the class already has a definition ptr set, but @@ -1717,7 +1728,7 @@ Error ASTNodeImporter::ImportDefinition( Error Result = ImportDeclContext(From, /*ForceImport=*/true); // Finish the definition of the lambda, set isBeingDefined to false. if (To->isLambda()) - To->completeDefinition(); + DefinitionCompleter(); return Result; } @@ -1728,8 +1739,8 @@ Error ASTNodeImporter::ImportDefinition( // Complete the definition even if error is returned. // The RecordDecl may be already part of the AST so it is better to // have it in complete state even if something is wrong with it. - auto DefinitionCompleter = - llvm::make_scope_exit([To]() { To->completeDefinition(); }); + auto DefinitionCompleterScopeExit = + llvm::make_scope_exit(DefinitionCompleter); if (Error Err = setTypedefNameForAnonDecl(From, To, Importer)) return Err; @@ -7757,10 +7768,20 @@ ASTImporter::findDeclsInToCtx(DeclContext *DC, DeclarationName Name) { SharedState->getLookupTable()->lookup(ReDC, Name); return FoundDeclsTy(LookupResult.begin(), LookupResult.end()); } else { - // FIXME Can we remove this kind of lookup? - // Or lldb really needs this C/C++ lookup? - FoundDeclsTy Result; - ReDC->localUncachedLookup(Name, Result); + DeclContext::lookup_result NoloadLookupResult = ReDC->noload_lookup(Name); + FoundDeclsTy Result(NoloadLookupResult.begin(), NoloadLookupResult.end()); + // We must search by the slow case of localUncachedLookup because that is + // working even if there is no LookupPtr for the DC. We could use + // DC::buildLookup() to create the LookupPtr, but that would load external + // decls again, we must avoid that case. + // Also, even if we had the LookupPtr, we must find Decls which are not + // in the LookupPtr, so we need the slow case. + // These cases are handled in ASTImporterLookupTable, but we cannot use + // that with LLDB since that traverses through the AST which initiates the + // load of external decls again via DC::decls(). And again, we must avoid + // loading external decls during the import. + if (Result.empty()) + ReDC->localUncachedLookup(Name, Result); return Result; } } diff --git a/clang/unittests/AST/ASTImporterTest.cpp b/clang/unittests/AST/ASTImporterTest.cpp index 6b8315f2b9ead..8b2f7c5b240e4 100644 --- a/clang/unittests/AST/ASTImporterTest.cpp +++ b/clang/unittests/AST/ASTImporterTest.cpp @@ -5122,6 +5122,51 @@ TEST_P(ASTImporterOptionSpecificTestBase, LambdaInFunctionParam) { EXPECT_EQ(ToLSize, FromLSize); } +struct LLDBLookupTest : ASTImporterOptionSpecificTestBase { + LLDBLookupTest() { + Creator = [](ASTContext &ToContext, FileManager &ToFileManager, + ASTContext &FromContext, FileManager &FromFileManager, + bool MinimalImport, + const std::shared_ptr &SharedState) { + return new ASTImporter(ToContext, ToFileManager, FromContext, + FromFileManager, MinimalImport, + // We use the regular lookup. + /*SharedState=*/nullptr); + }; + } +}; + +TEST_P(LLDBLookupTest, ImporterShouldFindInTransparentContext) { + TranslationUnitDecl *ToTU = getToTuDecl( + R"( + extern "C" { + class X{}; + }; + )", + Lang_CXX); + auto *ToX = FirstDeclMatcher().match( + ToTU, cxxRecordDecl(hasName("X"))); + + // Set up a stub external storage. + ToTU->setHasExternalLexicalStorage(true); + // Set up DeclContextBits.HasLazyExternalLexicalLookups to true. + ToTU->setMustBuildLookupTable(); + struct TestExternalASTSource : ExternalASTSource {}; + ToTU->getASTContext().setExternalSource(new TestExternalASTSource()); + + Decl *FromTU = getTuDecl( + R"( + class X; + )", + Lang_CXX); + auto *FromX = FirstDeclMatcher().match( + FromTU, cxxRecordDecl(hasName("X"))); + auto *ImportedX = Import(FromX, Lang_CXX); + // The lookup must find the existing class definition in the LinkageSpecDecl. + // Then the importer renders the existing and the new decl into one chain. + EXPECT_EQ(ImportedX->getCanonicalDecl(), ToX->getCanonicalDecl()); +} + INSTANTIATE_TEST_CASE_P(ParameterizedTests, ASTImporterLookupTableTest, DefaultTestValuesForRunOptions, ); @@ -5168,5 +5213,8 @@ INSTANTIATE_TEST_CASE_P(ParameterizedTests, ImportImplicitMethods, INSTANTIATE_TEST_CASE_P(ParameterizedTests, ImportVariables, DefaultTestValuesForRunOptions, ); +INSTANTIATE_TEST_CASE_P(ParameterizedTests, LLDBLookupTest, + DefaultTestValuesForRunOptions, ); + } // end namespace ast_matchers } // end namespace clang diff --git a/lldb/packages/Python/lldbsuite/test/lang/c/modules/TestCModules.py b/lldb/packages/Python/lldbsuite/test/lang/c/modules/TestCModules.py index 455704280d143..857223b5ed109 100644 --- a/lldb/packages/Python/lldbsuite/test/lang/c/modules/TestCModules.py +++ b/lldb/packages/Python/lldbsuite/test/lang/c/modules/TestCModules.py @@ -47,6 +47,10 @@ def test_expr(self): self.expect("breakpoint list -f", BREAKPOINT_HIT_ONCE, substrs=[' resolved, hit count = 1']) + # Enable logging of the imported AST. + log_file = os.path.join(self.getBuildDir(), "lldb-ast-log.txt") + self.runCmd("log enable lldb ast -f '%s'" % log_file) + self.expect( "expr -l objc++ -- @import Darwin; 3", VARIABLES_DISPLAYED_CORRECTLY, @@ -54,6 +58,8 @@ def test_expr(self): "int", "3"]) + # This expr command imports __sFILE with definition + # (FILE is a typedef to __sFILE.) self.expect( "expr *fopen(\"/dev/zero\", \"w\")", VARIABLES_DISPLAYED_CORRECTLY, @@ -61,6 +67,14 @@ def test_expr(self): "FILE", "_close"]) + # Check that the AST log contains exactly one definition of __sFILE. + f = open(log_file) + log_lines = f.readlines() + f.close() + os.remove(log_file) + self.assertEqual(" ".join(log_lines).count("struct __sFILE definition"), + 1) + self.expect("expr *myFile", VARIABLES_DISPLAYED_CORRECTLY, substrs=["a", "5", "b", "9"]) diff --git a/lldb/packages/Python/lldbsuite/test/lang/c/modules/main.c b/lldb/packages/Python/lldbsuite/test/lang/c/modules/main.c index 2b244bc38d02f..df321a75faaf1 100644 --- a/lldb/packages/Python/lldbsuite/test/lang/c/modules/main.c +++ b/lldb/packages/Python/lldbsuite/test/lang/c/modules/main.c @@ -5,11 +5,11 @@ int printf(const char * __restrict format, ...); typedef struct { int a; int b; -} FILE; +} MYFILE; int main() { - FILE *myFile = malloc(sizeof(FILE)); + MYFILE *myFile = malloc(sizeof(MYFILE)); myFile->a = 5; myFile->b = 9; diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp index 8d29df9dde2db..c5778f86bb62d 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp @@ -612,10 +612,15 @@ void ClangASTSource::FindExternalLexicalDecls( if (!original_decl_context) return; + // Indicates whether we skipped any Decls of the original DeclContext. + bool SkippedDecls = false; for (TagDecl::decl_iterator iter = original_decl_context->decls_begin(); iter != original_decl_context->decls_end(); ++iter) { Decl *decl = *iter; + // The predicate function returns true if the passed declaration kind is + // the one we are looking for. + // See clang::ExternalASTSource::FindExternalLexicalDecls() if (predicate(decl->getKind())) { if (log) { ASTDumper ast_dumper(decl); @@ -640,21 +645,22 @@ void ClangASTSource::FindExternalLexicalDecls( m_ast_importer_sp->RequireCompleteType(copied_field_type); } - - DeclContext *decl_context_non_const = - const_cast(decl_context); - - if (copied_decl->getDeclContext() != decl_context) { - if (copied_decl->getDeclContext()->containsDecl(copied_decl)) - copied_decl->getDeclContext()->removeDecl(copied_decl); - copied_decl->setDeclContext(decl_context_non_const); - } - - if (!decl_context_non_const->containsDecl(copied_decl)) - decl_context_non_const->addDeclInternal(copied_decl); + } else { + SkippedDecls = true; } } + // CopyDecl may build a lookup table which may set up ExternalLexicalStorage + // to false. However, since we skipped some of the external Decls we must + // set it back! + if (SkippedDecls) { + decl_context->setHasExternalLexicalStorage(true); + // This sets HasLazyExternalLexicalLookups to true. By setting this bit we + // ensure that the lookup table is rebuilt, which means the external source + // is consulted again when a clang::DeclContext::lookup is called. + const_cast(decl_context)->setMustBuildLookupTable(); + } + return; } From aa55124eca680e164956c9654a8f2a2fc60b1146 Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Wed, 17 Jul 2019 13:48:49 +0000 Subject: [PATCH 344/451] [RISCV][NFC] Add tests that capture current encodings for DWARF EH Items which are known to be wrong/different vs GCC are marked as TODO and will be address in follow-up patches. llvm-svn: 366326 --- llvm/test/CodeGen/RISCV/dwarf-eh.ll | 68 ++++++++++++++++++++++++++++ llvm/test/DebugInfo/RISCV/eh-frame.s | 34 ++++++++++++++ llvm/test/MC/RISCV/fde-reloc.s | 31 +++++++++++++ 3 files changed, 133 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/dwarf-eh.ll create mode 100644 llvm/test/DebugInfo/RISCV/eh-frame.s create mode 100644 llvm/test/MC/RISCV/fde-reloc.s diff --git a/llvm/test/CodeGen/RISCV/dwarf-eh.ll b/llvm/test/CodeGen/RISCV/dwarf-eh.ll new file mode 100644 index 0000000000000..6abea5c2a655f --- /dev/null +++ b/llvm/test/CodeGen/RISCV/dwarf-eh.ll @@ -0,0 +1,68 @@ +; RUN: llc -march=riscv32 --code-model=small < %s \ +; RUN: | FileCheck --check-prefixes=CHECK,SMALL %s +; RUN: llc -march=riscv32 --code-model=medium < %s \ +; RUN: | FileCheck --check-prefixes=CHECK,MED %s +; RUN: llc -march=riscv32 --code-model=small -relocation-model=pic < %s \ +; RUN: | FileCheck --check-prefixes=CHECK,PIC %s +; RUN: llc -march=riscv32 --code-model=medium -relocation-model=pic < %s \ +; RUN: | FileCheck --check-prefixes=CHECK,PIC %s +; RUN: llc -march=riscv64 --code-model=small < %s \ +; RUN: | FileCheck --check-prefixes=CHECK,SMALL %s +; RUN: llc -march=riscv64 --code-model=medium < %s \ +; RUN: | FileCheck --check-prefixes=CHECK,MED %s +; RUN: llc -march=riscv64 --code-model=small -relocation-model=pic < %s \ +; RUN: | FileCheck --check-prefixes=CHECK,PIC %s +; RUN: llc -march=riscv64 --code-model=medium -relocation-model=pic < %s \ +; RUN: | FileCheck --check-prefixes=CHECK,PIC %s + +declare void @throw_exception() + +declare i32 @__gxx_personality_v0(...) + +declare i8* @__cxa_begin_catch(i8*) + +declare void @__cxa_end_catch() + +; CHECK-LABEL: test1: +; CHECK: .cfi_startproc +; TODO: Personality encoding should be DW_EH_PE_indirect | DW_EH_PE_pcrel | +; DW_EH_PE_sdata4 +; CHECK-NEXT: .cfi_personality 0, __gxx_personality_v0 +; TODO: LSDA encoding should be DW_EH_PE_pcrel | DW_EH_PE_sdata4 +; CHECK-NEXT: .cfi_lsda 0, .Lexception0 + +define void @test1() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + invoke void @throw_exception() to label %try.cont unwind label %lpad + +lpad: + %0 = landingpad { i8*, i32 } + catch i8* null + %1 = extractvalue { i8*, i32 } %0, 0 + %2 = tail call i8* @__cxa_begin_catch(i8* %1) + tail call void @__cxa_end_catch() + br label %try.cont + +try.cont: + ret void +} + +; CHECK-LABEL: GCC_except_table0: +; CHECK-NEXT: .Lexception0: +; CHECK-NEXT: .byte 255 # @LPStart Encoding = omit +; TODO: TTypeEncoding encoding should be DW_EH_PE_indirect | DW_EH_PE_pcrel | +; DW_EH_PE_sdata4 +; CHECK-NEXT: .byte 0 # @TType Encoding = absptr +; TODO: call site encoding should be DW_EH_PE_udata4 +; CHECK: .Lttbaseref0: +; CHECK-NEXT: .byte 1 # Call site Encoding = uleb128 +; CHECK-NEXT: .uleb128 .Lcst_end0-.Lcst_begin0 +; CHECK-NEXT: cst_begin0: +; CHECK-NEXT: .uleb128 .Ltmp0-.Lfunc_begin0 # >> Call Site 1 << +; CHECK-NEXT: .uleb128 .Ltmp1-.Ltmp0 # Call between .Ltmp0 and .Ltmp1 +; CHECK-NEXT: .uleb128 .Ltmp2-.Lfunc_begin0 # jumps to .Ltmp2 +; CHECK-NEXT: .byte 1 # On action: 1 +; CHECK-NEXT: .uleb128 .Ltmp1-.Lfunc_begin0 # >> Call Site 2 << +; CHECK-NEXT: .uleb128 .Lfunc_end0-.Ltmp1 # Call between .Ltmp1 and .Lfunc_end0 +; CHECK-NEXT: .byte 0 # has no landing pad +; CHECK-NEXT: .byte 0 # On action: cleanup diff --git a/llvm/test/DebugInfo/RISCV/eh-frame.s b/llvm/test/DebugInfo/RISCV/eh-frame.s new file mode 100644 index 0000000000000..fcff68c09085e --- /dev/null +++ b/llvm/test/DebugInfo/RISCV/eh-frame.s @@ -0,0 +1,34 @@ +# RUN: llvm-mc -filetype=obj -triple=riscv32 < %s | llvm-dwarfdump -eh-frame - \ +# RUN: | FileCheck --check-prefixes=CHECK,RV32 %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 < %s | llvm-dwarfdump -eh-frame - \ +# RUN: | FileCheck --check-prefixes=CHECK,RV64 %s + +func: + .cfi_startproc + ret + .cfi_endproc + +# CHECK: 00000000 00000010 ffffffff CIE +# CHECK: Version: 1 +# CHECK: Augmentation: "zR" +# CHECK: Code alignment factor: 1 + +# TODO: gas uses -4 for the data alignment factor for both RV32 and RV64. They +# do so on the basis that on RV64F, F registers may only be 4 bytes +# (DWARF2_CIE_DATA_ALIGNMENT). + +# RV32: Data alignment factor: -4 +# RV64: Data alignment factor: -8 + +# CHECK: Return address column: 1 + +# Check the pointer encoding for address pointers used in FDE. This is set by +# FDECFIEncoding and should be DW_EH_PE_pcrel | DW_EH_PE_sdata4 (0x1b). + +# CHECK: Augmentation data: 1B +# CHECK: DW_CFA_def_cfa: reg2 +0 +# +# CHECK: 00000014 00000010 00000018 FDE cie=00000018 pc=00000000...00000004 +# CHECK: DW_CFA_nop: +# CHECK: DW_CFA_nop: +# CHECK: DW_CFA_nop: diff --git a/llvm/test/MC/RISCV/fde-reloc.s b/llvm/test/MC/RISCV/fde-reloc.s new file mode 100644 index 0000000000000..0794ad4d22e74 --- /dev/null +++ b/llvm/test/MC/RISCV/fde-reloc.s @@ -0,0 +1,31 @@ +# RUN: llvm-mc -filetype=obj -triple riscv32 -mattr=+relax < %s \ +# RUN: | llvm-readobj -r | FileCheck -check-prefix=RELAX-RELOC %s +# RUN: llvm-mc -filetype=obj -triple riscv32 -mattr=-relax < %s \ +# RUN: | llvm-readobj -r | FileCheck -check-prefix=NORELAX-RELOC %s + +func: + .cfi_startproc + ret + .cfi_endproc + +# TODO: Should produce R_RISCV_32_PCREL for the FDE pc relocation. Many of the +# ADD32/SUB32 relocations also can be safely resolved even with linker +# relaxation enabled. This test is written to capture current behaviour, in +# preparation for follow-on patches to fix it. + +# RELAX-RELOC: Section (4) .rela.eh_frame { +# RELAX-RELOC-NEXT: 0x0 R_RISCV_ADD32 - 0xFFFFFFFC +# RELAX-RELOC-NEXT: 0x0 R_RISCV_SUB32 - 0x0 +# RELAX-RELOC-NEXT: 0x14 R_RISCV_ADD32 - 0x0 +# RELAX-RELOC-NEXT: 0x14 R_RISCV_SUB32 - 0x0 +# RELAX-RELOC-NEXT: 0x18 R_RISCV_ADD32 - 0x0 +# RELAX-RELOC-NEXT: 0x18 R_RISCV_SUB32 - 0x0 +# RELAX-RELOC-NEXT: 0x1C R_RISCV_ADD32 - 0x0 +# RELAX-RELOC-NEXT: 0x1C R_RISCV_SUB32 - 0x0 +# RELAX-RELOC-NEXT: 0x20 R_RISCV_ADD32 - 0x0 +# RELAX-RELOC-NEXT: 0x20 R_RISCV_SUB32 - 0x0 +# RELAX-RELOC-NEXT: } + +# NORELAX-RELOC: Section (4) .rela.eh_frame { +# NORELAX-RELOC-NEXT: 0x1C R_RISCV_32_PCREL - 0x0 +# NORELAX-RELOC-NEXT: } From b94c233d06731b09d842ed86c5a72c44b40c65bb Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Wed, 17 Jul 2019 13:54:38 +0000 Subject: [PATCH 345/451] [RISCV] Set correct encodings for DWARF exception handling This patch sets correct encodings for DWARF exception handling for RISC-V (other than call site encoding, which must be udata4 rather than uleb128 and is handled by D63415). This has the same intend as D63409, except this version matches GCC/binutils behaviour which uses the same encodings regardless of PIC/non-PIC and medlow/medany code model. llvm-svn: 366327 --- .../CodeGen/TargetLoweringObjectFileImpl.cpp | 8 +++++ llvm/test/CodeGen/RISCV/dwarf-eh.ll | 30 +++++++++---------- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 4c18152ea8d81..d8e6b3ef93a34 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -219,6 +219,14 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, TTypeEncoding = dwarf::DW_EH_PE_absptr; } break; + case Triple::riscv32: + case Triple::riscv64: + LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; + PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_sdata4; + TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_sdata4; + break; case Triple::sparcv9: LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; if (isPositionIndependent()) { diff --git a/llvm/test/CodeGen/RISCV/dwarf-eh.ll b/llvm/test/CodeGen/RISCV/dwarf-eh.ll index 6abea5c2a655f..67236d7da668e 100644 --- a/llvm/test/CodeGen/RISCV/dwarf-eh.ll +++ b/llvm/test/CodeGen/RISCV/dwarf-eh.ll @@ -1,19 +1,19 @@ ; RUN: llc -march=riscv32 --code-model=small < %s \ -; RUN: | FileCheck --check-prefixes=CHECK,SMALL %s +; RUN: | FileCheck %s ; RUN: llc -march=riscv32 --code-model=medium < %s \ -; RUN: | FileCheck --check-prefixes=CHECK,MED %s +; RUN: | FileCheck %s ; RUN: llc -march=riscv32 --code-model=small -relocation-model=pic < %s \ -; RUN: | FileCheck --check-prefixes=CHECK,PIC %s +; RUN: | FileCheck %s ; RUN: llc -march=riscv32 --code-model=medium -relocation-model=pic < %s \ -; RUN: | FileCheck --check-prefixes=CHECK,PIC %s +; RUN: | FileCheck %s ; RUN: llc -march=riscv64 --code-model=small < %s \ -; RUN: | FileCheck --check-prefixes=CHECK,SMALL %s +; RUN: | FileCheck %s ; RUN: llc -march=riscv64 --code-model=medium < %s \ -; RUN: | FileCheck --check-prefixes=CHECK,MED %s +; RUN: | FileCheck %s ; RUN: llc -march=riscv64 --code-model=small -relocation-model=pic < %s \ -; RUN: | FileCheck --check-prefixes=CHECK,PIC %s +; RUN: | FileCheck %s ; RUN: llc -march=riscv64 --code-model=medium -relocation-model=pic < %s \ -; RUN: | FileCheck --check-prefixes=CHECK,PIC %s +; RUN: | FileCheck %s declare void @throw_exception() @@ -25,11 +25,10 @@ declare void @__cxa_end_catch() ; CHECK-LABEL: test1: ; CHECK: .cfi_startproc -; TODO: Personality encoding should be DW_EH_PE_indirect | DW_EH_PE_pcrel | -; DW_EH_PE_sdata4 -; CHECK-NEXT: .cfi_personality 0, __gxx_personality_v0 -; TODO: LSDA encoding should be DW_EH_PE_pcrel | DW_EH_PE_sdata4 -; CHECK-NEXT: .cfi_lsda 0, .Lexception0 +; PersonalityEncoding = DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4 +; CHECK-NEXT: .cfi_personality 155, DW.ref.__gxx_personality_v0 +; LSDAEncoding = DW_EH_PE_pcrel | DW_EH_PE_sdata4 +; CHECK-NEXT: .cfi_lsda 27, .Lexception0 define void @test1() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: @@ -50,9 +49,8 @@ try.cont: ; CHECK-LABEL: GCC_except_table0: ; CHECK-NEXT: .Lexception0: ; CHECK-NEXT: .byte 255 # @LPStart Encoding = omit -; TODO: TTypeEncoding encoding should be DW_EH_PE_indirect | DW_EH_PE_pcrel | -; DW_EH_PE_sdata4 -; CHECK-NEXT: .byte 0 # @TType Encoding = absptr +; TTypeEncoding = DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4 +; CHECK-NEXT: .byte 155 # @TType Encoding = indirect pcrel sdata4 ; TODO: call site encoding should be DW_EH_PE_udata4 ; CHECK: .Lttbaseref0: ; CHECK-NEXT: .byte 1 # Call site Encoding = uleb128 From 6584c4842fc3f5b6786114eb2bb1b7266beef0ee Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 17 Jul 2019 13:55:01 +0000 Subject: [PATCH 346/451] Mips: Remove immarg from copy and insert intrinsics These intrinsics do in fact work with non-constant index arguments. These are lowered to either the generic ISD::INSERT_VECTOR_ELT/ISD::EXTRACT_VECTOR_ELT, or to VEXTRACT_SEXT_ELT. The handling of these all accept variable indexes. Turning these into generic instructions which do allow variables introduces complications in a future change to immarg handling. Since these just turn into generic instructions, these are kind of pointless and should probably just be autoupgraded to extractelement/insertelement. llvm-svn: 366328 --- llvm/include/llvm/IR/IntrinsicsMips.td | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsMips.td b/llvm/include/llvm/IR/IntrinsicsMips.td index 308bec9128993..6393a9ca35d55 100644 --- a/llvm/include/llvm/IR/IntrinsicsMips.td +++ b/llvm/include/llvm/IR/IntrinsicsMips.td @@ -798,22 +798,22 @@ def int_mips_clti_u_d : GCCBuiltin<"__builtin_msa_clti_u_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; def int_mips_copy_s_b : GCCBuiltin<"__builtin_msa_copy_s_b">, - Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_mips_copy_s_h : GCCBuiltin<"__builtin_msa_copy_s_h">, - Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_mips_copy_s_w : GCCBuiltin<"__builtin_msa_copy_s_w">, - Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_mips_copy_s_d : GCCBuiltin<"__builtin_msa_copy_s_d">, - Intrinsic<[llvm_i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; def int_mips_copy_u_b : GCCBuiltin<"__builtin_msa_copy_u_b">, - Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_mips_copy_u_h : GCCBuiltin<"__builtin_msa_copy_u_h">, - Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_mips_copy_u_w : GCCBuiltin<"__builtin_msa_copy_u_w">, - Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_mips_copy_u_d : GCCBuiltin<"__builtin_msa_copy_u_d">, - Intrinsic<[llvm_i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<1>]>; + Intrinsic<[llvm_i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; def int_mips_ctcmsa : GCCBuiltin<"__builtin_msa_ctcmsa">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [ImmArg<0>]>; @@ -1230,16 +1230,16 @@ def int_mips_ilvr_d : GCCBuiltin<"__builtin_msa_ilvr_d">, def int_mips_insert_b : GCCBuiltin<"__builtin_msa_insert_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem]>; def int_mips_insert_h : GCCBuiltin<"__builtin_msa_insert_h">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem]>; def int_mips_insert_w : GCCBuiltin<"__builtin_msa_insert_w">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem]>; def int_mips_insert_d : GCCBuiltin<"__builtin_msa_insert_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty, llvm_i64_ty], - [IntrNoMem, ImmArg<1>]>; + [IntrNoMem]>; def int_mips_insve_b : GCCBuiltin<"__builtin_msa_insve_b">, Intrinsic<[llvm_v16i8_ty], From ab009a602e96b238000d9e20e5c54b078d08aad3 Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Wed, 17 Jul 2019 14:00:35 +0000 Subject: [PATCH 347/451] [AsmPrinter] Make the encoding of call sites in .gcc_except_table configurable and use for RISC-V The original behavior was to always emit the offsets to each call site in the call site table as uleb128 values, however on some architectures (eg RISCV) these uleb128 offsets into the code cannot always be resolved until link time (because relaxation will invalidate any calculated offsets), and there are no appropriate relocations for uleb128 values. As a consequence it needs to be possible to specify an alternative. This also switches RISCV to use DW_EH_PE_udata4 for call side encodings in .gcc_except_table Differential Revision: https://reviews.llvm.org/D63415 Patch by Edward Jones. llvm-svn: 366329 --- llvm/include/llvm/CodeGen/AsmPrinter.h | 6 ++++++ .../llvm/Target/TargetLoweringObjectFile.h | 2 ++ .../CodeGen/AsmPrinter/AsmPrinterDwarf.cpp | 19 +++++++++++++++++++ llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp | 13 +++++++------ .../CodeGen/TargetLoweringObjectFileImpl.cpp | 2 ++ llvm/lib/Target/TargetLoweringObjectFile.cpp | 1 + llvm/test/CodeGen/RISCV/dwarf-eh.ll | 14 +++++++------- 7 files changed, 44 insertions(+), 13 deletions(-) diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h index 9c2097b299520..d110f8b01cb5b 100644 --- a/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -543,6 +543,12 @@ class AsmPrinter : public MachineFunctionPass { emitDwarfStringOffset(S.getEntry()); } + /// Emit reference to a call site with a specified encoding + void EmitCallSiteOffset(const MCSymbol *Hi, const MCSymbol *Lo, + unsigned Encoding) const; + /// Emit an integer value corresponding to the call site encoding + void EmitCallSiteValue(uint64_t Value, unsigned Encoding) const; + /// Get the value for DW_AT_APPLE_isa. Zero if no isa encoding specified. virtual unsigned getISAEncoding() { return 0; } diff --git a/llvm/include/llvm/Target/TargetLoweringObjectFile.h b/llvm/include/llvm/Target/TargetLoweringObjectFile.h index 7e094a1738b71..3a2497bff11e8 100644 --- a/llvm/include/llvm/Target/TargetLoweringObjectFile.h +++ b/llvm/include/llvm/Target/TargetLoweringObjectFile.h @@ -51,6 +51,7 @@ class TargetLoweringObjectFile : public MCObjectFileInfo { unsigned PersonalityEncoding = 0; unsigned LSDAEncoding = 0; unsigned TTypeEncoding = 0; + unsigned CallSiteEncoding = 0; /// This section contains the static constructor pointer list. MCSection *StaticCtorSection = nullptr; @@ -147,6 +148,7 @@ class TargetLoweringObjectFile : public MCObjectFileInfo { unsigned getPersonalityEncoding() const { return PersonalityEncoding; } unsigned getLSDAEncoding() const { return LSDAEncoding; } unsigned getTTypeEncoding() const { return TTypeEncoding; } + unsigned getCallSiteEncoding() const { return CallSiteEncoding; } const MCExpr *getTTypeReference(const MCSymbolRefExpr *Sym, unsigned Encoding, MCStreamer &Streamer) const; diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 7bc64d091a0d6..992e44d953062 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -183,6 +183,25 @@ void AsmPrinter::EmitDwarfOffset(const MCSymbol *Label, uint64_t Offset) const { EmitLabelPlusOffset(Label, Offset, MAI->getCodePointerSize()); } +void AsmPrinter::EmitCallSiteOffset(const MCSymbol *Hi, + const MCSymbol *Lo, + unsigned Encoding) const { + // The least significant 3 bits specify the width of the encoding + if ((Encoding & 0x7) == dwarf::DW_EH_PE_uleb128) + EmitLabelDifferenceAsULEB128(Hi, Lo); + else + EmitLabelDifference(Hi, Lo, GetSizeOfEncodedValue(Encoding)); +} + +void AsmPrinter::EmitCallSiteValue(uint64_t Value, + unsigned Encoding) const { + // The least significant 3 bits specify the width of the encoding + if ((Encoding & 0x7) == dwarf::DW_EH_PE_uleb128) + EmitULEB128(Value); + else + OutStreamer->EmitIntValue(Value, GetSizeOfEncodedValue(Encoding)); +} + //===----------------------------------------------------------------------===// // Dwarf Lowering Routines //===----------------------------------------------------------------------===// diff --git a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp index 2786f8d9f6a40..99e3687b36b8a 100644 --- a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -378,7 +378,8 @@ MCSymbol *EHStreamer::emitExceptionTable() { bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj; bool IsWasm = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Wasm; unsigned CallSiteEncoding = - IsSJLJ ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_uleb128; + IsSJLJ ? static_cast(dwarf::DW_EH_PE_udata4) : + Asm->getObjFileLowering().getCallSiteEncoding(); bool HaveTTData = !TypeInfos.empty() || !FilterIds.empty(); // Type infos. @@ -523,24 +524,24 @@ MCSymbol *EHStreamer::emitExceptionTable() { // Offset of the call site relative to the start of the procedure. if (VerboseAsm) Asm->OutStreamer->AddComment(">> Call Site " + Twine(++Entry) + " <<"); - Asm->EmitLabelDifferenceAsULEB128(BeginLabel, EHFuncBeginSym); + Asm->EmitCallSiteOffset(BeginLabel, EHFuncBeginSym, CallSiteEncoding); if (VerboseAsm) Asm->OutStreamer->AddComment(Twine(" Call between ") + BeginLabel->getName() + " and " + EndLabel->getName()); - Asm->EmitLabelDifferenceAsULEB128(EndLabel, BeginLabel); + Asm->EmitCallSiteOffset(EndLabel, BeginLabel, CallSiteEncoding); // Offset of the landing pad relative to the start of the procedure. if (!S.LPad) { if (VerboseAsm) Asm->OutStreamer->AddComment(" has no landing pad"); - Asm->EmitULEB128(0); + Asm->EmitCallSiteValue(0, CallSiteEncoding); } else { if (VerboseAsm) Asm->OutStreamer->AddComment(Twine(" jumps to ") + S.LPad->LandingPadLabel->getName()); - Asm->EmitLabelDifferenceAsULEB128(S.LPad->LandingPadLabel, - EHFuncBeginSym); + Asm->EmitCallSiteOffset(S.LPad->LandingPadLabel, EHFuncBeginSym, + CallSiteEncoding); } // Offset of the first associated action record, relative to the start of diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index d8e6b3ef93a34..4c8f75b237aa8 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -218,6 +218,7 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, PersonalityEncoding = dwarf::DW_EH_PE_absptr; TTypeEncoding = dwarf::DW_EH_PE_absptr; } + CallSiteEncoding = dwarf::DW_EH_PE_udata4; break; case Triple::riscv32: case Triple::riscv64: @@ -226,6 +227,7 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, dwarf::DW_EH_PE_sdata4; TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; + CallSiteEncoding = dwarf::DW_EH_PE_udata4; break; case Triple::sparcv9: LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; diff --git a/llvm/lib/Target/TargetLoweringObjectFile.cpp b/llvm/lib/Target/TargetLoweringObjectFile.cpp index ee32d01572461..17274e1c2c6eb 100644 --- a/llvm/lib/Target/TargetLoweringObjectFile.cpp +++ b/llvm/lib/Target/TargetLoweringObjectFile.cpp @@ -47,6 +47,7 @@ void TargetLoweringObjectFile::Initialize(MCContext &ctx, // Reset various EH DWARF encodings. PersonalityEncoding = LSDAEncoding = TTypeEncoding = dwarf::DW_EH_PE_absptr; + CallSiteEncoding = dwarf::DW_EH_PE_uleb128; } TargetLoweringObjectFile::~TargetLoweringObjectFile() { diff --git a/llvm/test/CodeGen/RISCV/dwarf-eh.ll b/llvm/test/CodeGen/RISCV/dwarf-eh.ll index 67236d7da668e..ecca81783e56c 100644 --- a/llvm/test/CodeGen/RISCV/dwarf-eh.ll +++ b/llvm/test/CodeGen/RISCV/dwarf-eh.ll @@ -53,14 +53,14 @@ try.cont: ; CHECK-NEXT: .byte 155 # @TType Encoding = indirect pcrel sdata4 ; TODO: call site encoding should be DW_EH_PE_udata4 ; CHECK: .Lttbaseref0: -; CHECK-NEXT: .byte 1 # Call site Encoding = uleb128 +; CHECK-NEXT: .byte 3 # Call site Encoding = udata4 ; CHECK-NEXT: .uleb128 .Lcst_end0-.Lcst_begin0 ; CHECK-NEXT: cst_begin0: -; CHECK-NEXT: .uleb128 .Ltmp0-.Lfunc_begin0 # >> Call Site 1 << -; CHECK-NEXT: .uleb128 .Ltmp1-.Ltmp0 # Call between .Ltmp0 and .Ltmp1 -; CHECK-NEXT: .uleb128 .Ltmp2-.Lfunc_begin0 # jumps to .Ltmp2 +; CHECK-NEXT: .word .Ltmp0-.Lfunc_begin0 # >> Call Site 1 << +; CHECK-NEXT: .word .Ltmp1-.Ltmp0 # Call between .Ltmp0 and .Ltmp1 +; CHECK-NEXT: .word .Ltmp2-.Lfunc_begin0 # jumps to .Ltmp2 ; CHECK-NEXT: .byte 1 # On action: 1 -; CHECK-NEXT: .uleb128 .Ltmp1-.Lfunc_begin0 # >> Call Site 2 << -; CHECK-NEXT: .uleb128 .Lfunc_end0-.Ltmp1 # Call between .Ltmp1 and .Lfunc_end0 -; CHECK-NEXT: .byte 0 # has no landing pad +; CHECK-NEXT: .word .Ltmp1-.Lfunc_begin0 # >> Call Site 2 << +; CHECK-NEXT: .word .Lfunc_end0-.Ltmp1 # Call between .Ltmp1 and .Lfunc_end0 +; CHECK-NEXT: .word 0 # has no landing pad ; CHECK-NEXT: .byte 0 # On action: cleanup From c74fded05ff3d392d1190e39195f1e8d375660d1 Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Wed, 17 Jul 2019 14:04:48 +0000 Subject: [PATCH 348/451] [RISCV][NFC] Remove outdated TODO from test/CodeGen/RISCV/dwarf-eh.ll llvm-svn: 366330 --- llvm/test/CodeGen/RISCV/dwarf-eh.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/RISCV/dwarf-eh.ll b/llvm/test/CodeGen/RISCV/dwarf-eh.ll index ecca81783e56c..5b192ea516708 100644 --- a/llvm/test/CodeGen/RISCV/dwarf-eh.ll +++ b/llvm/test/CodeGen/RISCV/dwarf-eh.ll @@ -51,8 +51,8 @@ try.cont: ; CHECK-NEXT: .byte 255 # @LPStart Encoding = omit ; TTypeEncoding = DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4 ; CHECK-NEXT: .byte 155 # @TType Encoding = indirect pcrel sdata4 -; TODO: call site encoding should be DW_EH_PE_udata4 ; CHECK: .Lttbaseref0: +; CallSiteEncoding = dwarf::DW_EH_PE_udata4 ; CHECK-NEXT: .byte 3 # Call site Encoding = udata4 ; CHECK-NEXT: .uleb128 .Lcst_end0-.Lcst_begin0 ; CHECK-NEXT: cst_begin0: From 80de11ed0216a232e224f4b01ab4eb818401f829 Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Wed, 17 Jul 2019 14:32:25 +0000 Subject: [PATCH 349/451] [RISCV] Add RISCV to LLVM_ALL_TARGETS so it s built by default This follows the RFC . Follow-on commits will add appropriate release notes changes etc. Pushing this now and in a minimal form so there is reasonable time before 9.0 branches to resolve any issues arising from e.g. the backend being exposed on different sanitizer setups. The current builder for RISC-V is on the staging build-bot , however with the RISCV backend being built by default it won't provide any real additional coverage. We will shortly set up a builder that runs the test-suite in qemu-user. llvm-svn: 366331 --- llvm/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index acebd3feb8bf8..b8eb19848bc58 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -285,6 +285,7 @@ set(LLVM_ALL_TARGETS MSP430 NVPTX PowerPC + RISCV Sparc SystemZ WebAssembly From ae512b83d5fc79fb96844ce7cfe4c6a4fee96c9a Mon Sep 17 00:00:00 2001 From: Gabor Marton Date: Wed, 17 Jul 2019 14:40:09 +0000 Subject: [PATCH 350/451] [ASTImporter] Fix structural eq of lambdas Summary: The structural equivalence check reported false eq between lambda classes with different parameters in their call signature. The solution is to check the methods for equality too in case of lambda classes. Reviewers: a_sidorin, a.sidorin Subscribers: rnkovacs, dkrupp, Szelethus, gamesh411, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D64075 llvm-svn: 366332 --- clang/lib/AST/ASTStructuralEquivalence.cpp | 20 +++++++ clang/unittests/AST/ASTImporterTest.cpp | 16 ++++++ .../AST/StructuralEquivalenceTest.cpp | 52 +++++++++++++++++++ 3 files changed, 88 insertions(+) diff --git a/clang/lib/AST/ASTStructuralEquivalence.cpp b/clang/lib/AST/ASTStructuralEquivalence.cpp index bb2e353eeef2b..912db3c130c51 100644 --- a/clang/lib/AST/ASTStructuralEquivalence.cpp +++ b/clang/lib/AST/ASTStructuralEquivalence.cpp @@ -1085,6 +1085,19 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context, return true; } +/// Determine structural equivalence of two lambda classes. +static bool +IsStructurallyEquivalentLambdas(StructuralEquivalenceContext &Context, + CXXRecordDecl *D1, CXXRecordDecl *D2) { + assert(D1->isLambda() && D2->isLambda() && + "Must be called on lambda classes"); + if (!IsStructurallyEquivalent(Context, D1->getLambdaCallOperator(), + D2->getLambdaCallOperator())) + return false; + + return true; +} + /// Determine structural equivalence of two records. static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context, RecordDecl *D1, RecordDecl *D2) { @@ -1166,6 +1179,13 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context, D1CXX->getASTContext().getExternalSource()->CompleteType(D1CXX); } + if (D1CXX->isLambda() != D2CXX->isLambda()) + return false; + if (D1CXX->isLambda()) { + if (!IsStructurallyEquivalentLambdas(Context, D1CXX, D2CXX)) + return false; + } + if (D1CXX->getNumBases() != D2CXX->getNumBases()) { if (Context.Complain) { Context.Diag2(D2->getLocation(), diff --git a/clang/unittests/AST/ASTImporterTest.cpp b/clang/unittests/AST/ASTImporterTest.cpp index 8b2f7c5b240e4..6ea350cc72657 100644 --- a/clang/unittests/AST/ASTImporterTest.cpp +++ b/clang/unittests/AST/ASTImporterTest.cpp @@ -5122,6 +5122,22 @@ TEST_P(ASTImporterOptionSpecificTestBase, LambdaInFunctionParam) { EXPECT_EQ(ToLSize, FromLSize); } +TEST_P(ASTImporterOptionSpecificTestBase, LambdaInGlobalScope) { + Decl *FromTU = getTuDecl( + R"( + auto l1 = [](unsigned lp) { return 1; }; + auto l2 = [](int lp) { return 2; }; + int f(int p) { + return l1(p) + l2(p); + } + )", + Lang_CXX11, "input0.cc"); + FunctionDecl *FromF = FirstDeclMatcher().match( + FromTU, functionDecl(hasName("f"))); + FunctionDecl *ToF = Import(FromF, Lang_CXX11); + EXPECT_TRUE(ToF); +} + struct LLDBLookupTest : ASTImporterOptionSpecificTestBase { LLDBLookupTest() { Creator = [](ASTContext &ToContext, FileManager &ToFileManager, diff --git a/clang/unittests/AST/StructuralEquivalenceTest.cpp b/clang/unittests/AST/StructuralEquivalenceTest.cpp index 63757987e113c..cdb55d90b9dfb 100644 --- a/clang/unittests/AST/StructuralEquivalenceTest.cpp +++ b/clang/unittests/AST/StructuralEquivalenceTest.cpp @@ -797,6 +797,58 @@ TEST_F(StructuralEquivalenceRecordTest, RecordsWithDifferentBody) { EXPECT_FALSE(testStructuralMatch(t)); } +struct StructuralEquivalenceLambdaTest : StructuralEquivalenceTest {}; + +TEST_F(StructuralEquivalenceLambdaTest, LambdaClassesWithDifferentMethods) { + // Get the LambdaExprs, unfortunately we can't match directly the underlying + // implicit CXXRecordDecl of the Lambda classes. + auto t = makeDecls( + "void f() { auto L0 = [](int){}; }", + "void f() { auto L1 = [](){}; }", + Lang_CXX11, + lambdaExpr(), + lambdaExpr()); + CXXRecordDecl *L0 = get<0>(t)->getLambdaClass(); + CXXRecordDecl *L1 = get<1>(t)->getLambdaClass(); + EXPECT_FALSE(testStructuralMatch(L0, L1)); +} + +TEST_F(StructuralEquivalenceLambdaTest, LambdaClassesWithEqMethods) { + auto t = makeDecls( + "void f() { auto L0 = [](int){}; }", + "void f() { auto L1 = [](int){}; }", + Lang_CXX11, + lambdaExpr(), + lambdaExpr()); + CXXRecordDecl *L0 = get<0>(t)->getLambdaClass(); + CXXRecordDecl *L1 = get<1>(t)->getLambdaClass(); + EXPECT_TRUE(testStructuralMatch(L0, L1)); +} + +TEST_F(StructuralEquivalenceLambdaTest, LambdaClassesWithDifferentFields) { + auto t = makeDecls( + "void f() { char* X; auto L0 = [X](){}; }", + "void f() { float X; auto L1 = [X](){}; }", + Lang_CXX11, + lambdaExpr(), + lambdaExpr()); + CXXRecordDecl *L0 = get<0>(t)->getLambdaClass(); + CXXRecordDecl *L1 = get<1>(t)->getLambdaClass(); + EXPECT_FALSE(testStructuralMatch(L0, L1)); +} + +TEST_F(StructuralEquivalenceLambdaTest, LambdaClassesWithEqFields) { + auto t = makeDecls( + "void f() { float X; auto L0 = [X](){}; }", + "void f() { float X; auto L1 = [X](){}; }", + Lang_CXX11, + lambdaExpr(), + lambdaExpr()); + CXXRecordDecl *L0 = get<0>(t)->getLambdaClass(); + CXXRecordDecl *L1 = get<1>(t)->getLambdaClass(); + EXPECT_TRUE(testStructuralMatch(L0, L1)); +} + TEST_F(StructuralEquivalenceTest, CompareSameDeclWithMultiple) { auto t = makeNamedDecls( "struct A{ }; struct B{ }; void foo(A a, A b);", From 87886299b468ccaa0f07d6ee0b237e25c4c35b96 Mon Sep 17 00:00:00 2001 From: Chris Jackson Date: Wed, 17 Jul 2019 14:54:02 +0000 Subject: [PATCH 351/451] [lld] Add Visual Studio compatible diagnostics Summary: Add a --vs-diagnostics flag that alters the format of diagnostic output to enable source hyperlinks in Visual Studio. Differential Revision: https://reviews.llvm.org/D58484 Reviewed by: ruiu llvm-svn: 366333 --- lld/Common/ErrorHandler.cpp | 45 +++++++++++-- lld/ELF/Driver.cpp | 2 + lld/ELF/Options.td | 3 + lld/include/lld/Common/ErrorHandler.h | 3 +- .../ELF/Inputs/vs-diagnostics-duplicate2.s | 31 +++++++++ .../ELF/Inputs/vs-diagnostics-duplicate3.s | 6 ++ lld/test/ELF/vs-diagnostics-duplicate.s | 63 +++++++++++++++++++ .../ELF/vs-diagnostics-dynamic-relocation.s | 35 +++++++++++ .../ELF/vs-diagnostics-undefined-symbol-1.s | 15 +++++ .../ELF/vs-diagnostics-undefined-symbol-2.s | 18 ++++++ .../ELF/vs-diagnostics-undefined-symbol-3.s | 40 ++++++++++++ lld/test/ELF/vs-diagnostics-versionscript.s | 7 +++ 12 files changed, 262 insertions(+), 6 deletions(-) create mode 100644 lld/test/ELF/Inputs/vs-diagnostics-duplicate2.s create mode 100644 lld/test/ELF/Inputs/vs-diagnostics-duplicate3.s create mode 100644 lld/test/ELF/vs-diagnostics-duplicate.s create mode 100644 lld/test/ELF/vs-diagnostics-dynamic-relocation.s create mode 100644 lld/test/ELF/vs-diagnostics-undefined-symbol-1.s create mode 100644 lld/test/ELF/vs-diagnostics-undefined-symbol-2.s create mode 100644 lld/test/ELF/vs-diagnostics-undefined-symbol-3.s create mode 100644 lld/test/ELF/vs-diagnostics-versionscript.s diff --git a/lld/Common/ErrorHandler.cpp b/lld/Common/ErrorHandler.cpp index f5d3eb44848c5..c87c0609b2601 100644 --- a/lld/Common/ErrorHandler.cpp +++ b/lld/Common/ErrorHandler.cpp @@ -16,6 +16,7 @@ #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/raw_ostream.h" #include +#include #if !defined(_MSC_VER) && !defined(__MINGW32__) #include @@ -84,8 +85,42 @@ void lld::checkError(Error e) { [&](ErrorInfoBase &eib) { error(eib.message()); }); } -void ErrorHandler::print(StringRef s, raw_ostream::Colors c) { - *errorOS << logName << ": "; +static std::string getLocation(std::string msg, std::string defaultMsg) { + static std::vector Regexes{ + std::regex(R"(^undefined symbol:.*\n>>> referenced by (\S+):(\d+)\n.*)"), + std::regex(R"(^undefined symbol:.*\n>>> referenced by (.*):)"), + std::regex( + R"(^duplicate symbol: .*\n>>> defined in (\S+)\n>>> defined in.*)"), + std::regex( + R"(^duplicate symbol: .*\n>>> defined at (\S+):(\d+).*)"), + std::regex( + R"(.*\n>>> defined in .*\n>>> referenced by (\S+):(\d+))"), + std::regex( + R"(^undefined (internal|hidden|protected) symbol: .*\n>>> referenced by (\S+):(\d+)\n.*)"), + std::regex(R"((\S+):(\d+): unclosed quote)"), + }; + + std::smatch Match; + for (std::regex &Re : Regexes) { + if (std::regex_search(msg, Match, Re)) { + return Match.size() > 2 ? Match.str(1) + "(" + Match.str(2) + ")" + : Match.str(1); + } + } + return defaultMsg; +} + +void ErrorHandler::printHeader(StringRef s, raw_ostream::Colors c, + const Twine &msg) { + + if (vsDiagnostics) { + // A Visual Studio-style error message starts with an error location. + // If a location cannot be extracted then we default to LogName. + *errorOS << getLocation(msg.str(), logName) << ": "; + } else { + *errorOS << logName << ": "; + } + if (colorDiagnostics) { errorOS->changeColor(c, true); *errorOS << s; @@ -116,7 +151,7 @@ void ErrorHandler::warn(const Twine &msg) { std::lock_guard lock(mu); newline(errorOS, msg); - print("warning: ", raw_ostream::MAGENTA); + printHeader("warning: ", raw_ostream::MAGENTA, msg); *errorOS << msg << "\n"; } @@ -125,10 +160,10 @@ void ErrorHandler::error(const Twine &msg) { newline(errorOS, msg); if (errorLimit == 0 || errorCount < errorLimit) { - print("error: ", raw_ostream::RED); + printHeader("error: ", raw_ostream::RED, msg); *errorOS << msg << "\n"; } else if (errorCount == errorLimit) { - print("error: ", raw_ostream::RED); + printHeader("error: ", raw_ostream::RED, msg); *errorOS << errorLimitExceededMsg << "\n"; if (exitEarly) exitLld(1); diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 98551d2cb34dc..fbfc71d22b7e5 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -786,6 +786,8 @@ static void readConfigs(opt::InputArgList &args) { errorHandler().verbose = args.hasArg(OPT_verbose); errorHandler().fatalWarnings = args.hasFlag(OPT_fatal_warnings, OPT_no_fatal_warnings, false); + errorHandler().vsDiagnostics = + args.hasArg(OPT_visual_studio_diagnostics_format, false); threadsEnabled = args.hasFlag(OPT_threads, OPT_no_threads, true); config->allowMultipleDefinition = diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td index d45d9aaad9af9..3ebb46f2e1b2d 100644 --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -416,6 +416,9 @@ defm wrap: Eq<"wrap", "Use wrapper functions for symbol">, def z: JoinedOrSeparate<["-"], "z">, MetaVarName<"